diff --git a/warehouse/dbt/macros/models/contract_invocation_events_with_l1.sql b/warehouse/dbt/macros/models/contract_invocation_events_with_l1.sql index aa825d3dd..70248334c 100644 --- a/warehouse/dbt/macros/models/contract_invocation_events_with_l1.sql +++ b/warehouse/dbt/macros/models/contract_invocation_events_with_l1.sql @@ -18,7 +18,7 @@ with blockchain_artifacts as ( when artifact_type = 'EOA' then 1 else 0 end as artifact_rank - from {{ ref('int_artifacts_by_project') }} + from {{ ref('int_all_artifacts') }} where artifact_source = "{{ upper_network_name }}" ) group by artifact_source_id diff --git a/warehouse/dbt/macros/models/filtered_blockchain_events.sql b/warehouse/dbt/macros/models/filtered_blockchain_events.sql index 01196d3e5..6544a8bb1 100644 --- a/warehouse/dbt/macros/models/filtered_blockchain_events.sql +++ b/warehouse/dbt/macros/models/filtered_blockchain_events.sql @@ -1,7 +1,7 @@ {% macro filtered_blockchain_events(artifact_source, source_name, source_table) %} with known_addresses as ( select distinct `artifact_source_id` as `address` - from {{ ref("int_artifacts_by_project") }} + from {{ ref("int_all_artifacts") }} where LOWER(artifact_source) = LOWER('{{ artifact_source }}') ), known_to as ( select events.* diff --git a/warehouse/dbt/models/intermediate/blockchain/int_optimism_contract_invocation_events.sql b/warehouse/dbt/models/intermediate/blockchain/int_optimism_contract_invocation_events.sql index f23a715a4..273c89c11 100644 --- a/warehouse/dbt/models/intermediate/blockchain/int_optimism_contract_invocation_events.sql +++ b/warehouse/dbt/models/intermediate/blockchain/int_optimism_contract_invocation_events.sql @@ -33,7 +33,7 @@ with blockchain_artifacts as ( when artifact_type = 'EOA' then 1 else 0 end as artifact_rank - from {{ ref('int_artifacts_by_project') }} + from {{ ref('int_all_artifacts') }} where LOWER(artifact_source) = LOWER('OPTIMISM') ) group by artifact_source_id diff --git a/warehouse/dbt/models/intermediate/blockchain/int_optimism_transactions.sql b/warehouse/dbt/models/intermediate/blockchain/int_optimism_transactions.sql index 9a127a445..900935705 100644 --- a/warehouse/dbt/models/intermediate/blockchain/int_optimism_transactions.sql +++ b/warehouse/dbt/models/intermediate/blockchain/int_optimism_transactions.sql @@ -13,7 +13,7 @@ }} with known_addresses as ( select distinct `artifact_source_id` as `address` - from {{ ref("int_artifacts_by_project") }} + from {{ ref("int_all_artifacts") }} where `artifact_source` = 'OPTIMISM' ), {% if target.name == 'production' %} diff --git a/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql b/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql new file mode 100644 index 000000000..f88c9c63e --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql @@ -0,0 +1,218 @@ +{# + This model is responsible for generating a list of all artifacts associated with a project. + This includes repositories, npm packages, blockchain addresses, and contracts. + + Note: This will create a separate row for each artifact_type, which is de-duplicated + in int_artifacts_by_project + Note: Currently, the source and namespace for blockchain artifacts are the same. This may change + in the future. +#} + +with all_repos as ( + {# + Currently this is just Github. + oss-directory needs some refactoring to support multiple repository providers + #} + select + "GITHUB" as artifact_source, + "REPOSITORY" as artifact_type, + projects.project_id, + repos.owner as artifact_namespace, + repos.name as artifact_name, + repos.url as artifact_url, + CAST(repos.id as STRING) as artifact_source_id + from + {{ ref('stg_ossd__current_projects') }} as projects + cross join + UNNEST(JSON_QUERY_ARRAY(projects.github)) as github + inner join + {{ ref('stg_ossd__current_repositories') }} as repos + on + LOWER(CONCAT("https://github.com/", repos.owner)) + = LOWER(JSON_VALUE(github.url)) + or LOWER(repos.url) = LOWER(JSON_VALUE(github.url)) +), + +all_npm_raw as ( + select + "NPM" as artifact_source, + "PACKAGE" as artifact_type, + projects.project_id, + JSON_VALUE(npm.url) as artifact_source_id, + case + when + JSON_VALUE(npm.url) like "https://npmjs.com/package/%" + then SUBSTR(JSON_VALUE(npm.url), 28) + when + JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%" + then SUBSTR(JSON_VALUE(npm.url), 31) + end as artifact_name, + JSON_VALUE(npm.url) as artifact_url + from + {{ ref('stg_ossd__current_projects') }} as projects + cross join + UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm +), + +all_npm as ( + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_name, + artifact_url, + SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)] + as artifact_namespace + from all_npm_raw +), + +ossd_blockchain as ( + select + projects.project_id, + tag as artifact_type, + network as artifact_namespace, + network as artifact_source, + JSON_VALUE(blockchains.address) as artifact_source_id, + JSON_VALUE(blockchains.address) as artifact_name, + JSON_VALUE(blockchains.address) as artifact_url + from + {{ ref('stg_ossd__current_projects') }} as projects + cross join + UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains + cross join + UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network + cross join + UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag +), + +all_deployers as ( + select + *, + "MAINNET" as artifact_namespace, + "ETHEREUM" as artifact_source + from {{ ref("stg_ethereum__deployers") }} + union all + select + *, + "ARBITRUM_ONE" as artifact_namespace, + "ARBITRUM_ONE" as artifact_source + from {{ ref("stg_arbitrum__deployers") }} + union all + {# Includes all deployers of a contract #} + select + block_timestamp, + transaction_hash, + deployer_address, + contract_address, + UPPER(network) as artifact_namespace, + UPPER(network) as artifact_source + from {{ ref("int_derived_contracts") }} + union all + {# Includes all factory deployers of a contract #} + select + block_timestamp, + transaction_hash, + factory_deployer_address as deployer_address, + contract_address, + UPPER(network) as artifact_namespace, + UPPER(network) as artifact_source + from {{ ref("int_derived_contracts") }} +), + +discovered_contracts as ( + select + "CONTRACT" as artifact_type, + ob.project_id, + ad.contract_address as artifact_source_id, + ob.artifact_source, + ob.artifact_namespace, + ad.contract_address as artifact_name, + ad.contract_address as artifact_url + from ossd_blockchain as ob + inner join all_deployers as ad + on + ob.artifact_source_id = ad.deployer_address + {# + We currently do not really have a notion of namespace in + oss-directory. We may need to change this when that time comes + #} + and UPPER(ob.artifact_source) in (UPPER(ad.artifact_source), "ANY_EVM") + and UPPER(ob.artifact_namespace) in ( + UPPER(ad.artifact_namespace), "ANY_EVM" + ) + and UPPER(ob.artifact_type) in ("EOA", "DEPLOYER", "FACTORY") +), + +all_artifacts as ( + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + all_repos + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + ossd_blockchain + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + discovered_contracts + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + all_npm +), + +all_normalized_artifacts as ( + select distinct + project_id, + LOWER(artifact_source_id) as artifact_source_id, + {# + artifact_source and artifact_type are considered internal constants hence + we apply an UPPER transform + #} + UPPER(artifact_source) as artifact_source, + UPPER(artifact_type) as artifact_type, + LOWER(artifact_namespace) as artifact_namespace, + LOWER(artifact_name) as artifact_name, + LOWER(artifact_url) as artifact_url + from all_artifacts +) + +select + project_id, + {{ oso_id("a.artifact_source", "a.artifact_source_id") }} as `artifact_id`, + artifact_source_id, + artifact_source, + artifact_namespace, + artifact_name, + artifact_url, + artifact_type +from all_normalized_artifacts as a diff --git a/warehouse/dbt/models/intermediate/directory/int_artifact_types.sql b/warehouse/dbt/models/intermediate/directory/int_artifact_types.sql new file mode 100644 index 000000000..9bb025e1f --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_artifact_types.sql @@ -0,0 +1,8 @@ +select distinct + artifact_id, + artifact_source_id, + artifact_source, + artifact_namespace, + artifact_name, + artifact_type +from {{ ref('int_all_artifacts') }} diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts.sql index ccf77d345..623729c0d 100644 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts.sql +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts.sql @@ -7,7 +7,6 @@ with all_artifacts as ( select artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_url, artifact_name @@ -16,7 +15,6 @@ with all_artifacts as ( select artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_url, MAX_BY(artifact_name, last_used) as artifact_name @@ -24,7 +22,6 @@ with all_artifacts as ( group by artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_url ) @@ -33,7 +30,6 @@ select distinct {{ oso_id("artifact_source", "artifact_source_id") }} as artifact_id, artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_name, artifact_url diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_project.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_project.sql index d3718e2f0..e76433c40 100644 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_project.sql +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_project.sql @@ -1,216 +1,9 @@ -{# - This model is responsible for generating a list of all artifacts associated with a project. - This includes repositories, npm packages, blockchain addresses, and contracts. - - Currently, the source and namespace for blockchain artifacts are the same. This may change - in the future. -#} - -with all_repos as ( - {# - Currently this is just Github. - oss-directory needs some refactoring to support multiple repository providers - #} - select - "GITHUB" as artifact_source, - "REPOSITORY" as artifact_type, - projects.project_id, - repos.owner as artifact_namespace, - repos.name as artifact_name, - repos.url as artifact_url, - CAST(repos.id as STRING) as artifact_source_id - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.github)) as github - inner join - {{ ref('stg_ossd__current_repositories') }} as repos - on - LOWER(CONCAT("https://github.com/", repos.owner)) - = LOWER(JSON_VALUE(github.url)) - or LOWER(repos.url) = LOWER(JSON_VALUE(github.url)) -), - -all_npm_raw as ( - select - "NPM" as artifact_source, - "PACKAGE" as artifact_type, - projects.project_id, - JSON_VALUE(npm.url) as artifact_source_id, - case - when - JSON_VALUE(npm.url) like "https://npmjs.com/package/%" - then SUBSTR(JSON_VALUE(npm.url), 28) - when - JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%" - then SUBSTR(JSON_VALUE(npm.url), 31) - end as artifact_name, - JSON_VALUE(npm.url) as artifact_url - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm -), - -all_npm as ( - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_name, - artifact_url, - SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)] - as artifact_namespace - from all_npm_raw -), - -ossd_blockchain as ( - select - projects.project_id, - tag as artifact_type, - network as artifact_namespace, - network as artifact_source, - JSON_VALUE(blockchains.address) as artifact_source_id, - JSON_VALUE(blockchains.address) as artifact_name, - JSON_VALUE(blockchains.address) as artifact_url - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains - cross join - UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network - cross join - UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag -), - -all_deployers as ( - select - *, - "MAINNET" as artifact_namespace, - "ETHEREUM" as artifact_source - from {{ ref("stg_ethereum__deployers") }} - union all - select - *, - "ARBITRUM_ONE" as artifact_namespace, - "ARBITRUM_ONE" as artifact_source - from {{ ref("stg_arbitrum__deployers") }} - union all - {# Includes all deployers of a contract #} - select - block_timestamp, - transaction_hash, - deployer_address, - contract_address, - UPPER(network) as artifact_namespace, - UPPER(network) as artifact_source - from {{ ref("int_derived_contracts") }} - union all - {# Includes all factory deployers of a contract #} - select - block_timestamp, - transaction_hash, - factory_deployer_address as deployer_address, - contract_address, - UPPER(network) as artifact_namespace, - UPPER(network) as artifact_source - from {{ ref("int_derived_contracts") }} -), - -discovered_contracts as ( - select - "CONTRACT" as artifact_type, - ob.project_id, - ad.contract_address as artifact_source_id, - ob.artifact_source, - ob.artifact_namespace, - ad.contract_address as artifact_name, - ad.contract_address as artifact_url - from ossd_blockchain as ob - inner join all_deployers as ad - on - ob.artifact_source_id = ad.deployer_address - {# - We currently do not really have a notion of namespace in - oss-directory. We may need to change this when that time comes - #} - and UPPER(ob.artifact_source) in (UPPER(ad.artifact_source), "ANY_EVM") - and UPPER(ob.artifact_namespace) in ( - UPPER(ad.artifact_namespace), "ANY_EVM" - ) - and UPPER(ob.artifact_type) in ("EOA", "DEPLOYER", "FACTORY") -), - -all_artifacts as ( - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_namespace, - artifact_name, - artifact_url - from - all_repos - union all - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_namespace, - artifact_name, - artifact_url - from - ossd_blockchain - union all - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_namespace, - artifact_name, - artifact_url - from - discovered_contracts - union all - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_namespace, - artifact_name, - artifact_url - from - all_npm -), - -all_unique_artifacts as ( - select distinct - project_id, - LOWER(artifact_source_id) as artifact_source_id, - {# - artifact_source and artifact_type are considered internal constants hence - we apply an UPPER transform - #} - UPPER(artifact_source) as artifact_source, - UPPER(artifact_type) as artifact_type, - LOWER(artifact_namespace) as artifact_namespace, - LOWER(artifact_name) as artifact_name, - LOWER(artifact_url) as artifact_url - from all_artifacts -) - -select +select distinct project_id, + artifact_id, artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_name, - artifact_url, - {{ oso_id("a.artifact_source", "a.artifact_source_id") }} as `artifact_id` -from all_unique_artifacts as a + artifact_url +from {{ ref('int_all_artifacts') }} diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql index 69836a48b..1cbe14b83 100644 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql @@ -11,9 +11,10 @@ with farcaster_users as ( int_users.user_source, int_users.user_source_id, int_users.display_name, - stg_farcaster__addresses.address as artifact_name, int_artifacts.artifact_id, - int_artifacts.artifact_source + int_artifacts.artifact_source, + int_artifacts.artifact_namespace, + stg_farcaster__addresses.address as artifact_name from {{ ref('int_users') }} inner join {{ ref('stg_farcaster__addresses') }} on int_users.user_source_id = stg_farcaster__addresses.fid @@ -27,7 +28,8 @@ select user_source, user_source_id, display_name, - artifact_name, artifact_id, - artifact_source + artifact_source, + artifact_namespace, + artifact_name from farcaster_users diff --git a/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql index eb6abb8bb..0be828205 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql @@ -45,11 +45,11 @@ repo_stats as ( select - int_artifacts_by_project.project_id, - int_artifacts_by_project.artifact_id, - int_artifacts_by_project.artifact_namespace, - int_artifacts_by_project.artifact_name, - int_artifacts_by_project.artifact_source, + int_all_artifacts.project_id, + int_all_artifacts.artifact_id, + int_all_artifacts.artifact_namespace, + int_all_artifacts.artifact_name, + int_all_artifacts.artifact_source, repo_snapshot.is_fork, repo_snapshot.fork_count, repo_snapshot.star_count, @@ -61,11 +61,11 @@ select repo_stats.days_with_commits_count, repo_stats.contributors_to_repo_count, repo_stats.commit_count -from {{ ref('int_artifacts_by_project') }} +from {{ ref('int_all_artifacts') }} left join repo_snapshot - on int_artifacts_by_project.artifact_id = repo_snapshot.artifact_id + on int_all_artifacts.artifact_id = repo_snapshot.artifact_id left join repo_stats - on int_artifacts_by_project.artifact_id = repo_stats.artifact_id + on int_all_artifacts.artifact_id = repo_stats.artifact_id where - int_artifacts_by_project.artifact_source = 'GITHUB' - and UPPER(int_artifacts_by_project.artifact_type) = 'REPOSITORY' + int_all_artifacts.artifact_source = 'GITHUB' + and UPPER(int_all_artifacts.artifact_type) = 'REPOSITORY' diff --git a/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql b/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql index f18e7bda9..a3576c8a5 100644 --- a/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql +++ b/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql @@ -10,7 +10,6 @@ select artifacts_by_project.artifact_source, artifacts_by_project.artifact_namespace, artifacts_by_project.artifact_name, - artifacts_by_project.artifact_type, projects.project_id, projects.project_source, projects.project_namespace, diff --git a/warehouse/dbt/models/marts/directory/artifacts_v1.sql b/warehouse/dbt/models/marts/directory/artifacts_v1.sql index 74c59827b..b58ef983c 100644 --- a/warehouse/dbt/models/marts/directory/artifacts_v1.sql +++ b/warehouse/dbt/models/marts/directory/artifacts_v1.sql @@ -10,7 +10,6 @@ select artifact_id, artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_name, artifact_url diff --git a/warehouse/dbt/models/marts/superchain/rf4_events_daily_to_project.sql b/warehouse/dbt/models/marts/superchain/rf4_events_daily_to_project.sql index 29bb4bdf7..3b070926e 100644 --- a/warehouse/dbt/models/marts/superchain/rf4_events_daily_to_project.sql +++ b/warehouse/dbt/models/marts/superchain/rf4_events_daily_to_project.sql @@ -45,7 +45,7 @@ select then rf4_trusted_users.address end as trusted_user_id from events -left join {{ ref('artifacts_v1') }} as to_artifacts +left join {{ ref('int_artifact_types') }} as to_artifacts on events.to_artifact_id = to_artifacts.artifact_id left join {{ ref('artifacts_v1') }} as from_artifacts on events.from_artifact_id = from_artifacts.artifact_id