From f55f71c33571a0cde81589cd6cdfad0764af59d1 Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Wed, 19 Jun 2024 14:33:28 -0400 Subject: [PATCH] fix: include blockchain EOA artifacts in the artifacts table (#1679) --- .../intermediate/directory/int_artifacts.sql | 44 ++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts.sql index 623729c0d..a400d8f7c 100644 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts.sql +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts.sql @@ -1,9 +1,9 @@ with all_artifacts as ( - {# - The `last_used` value is later used in this query to determine what the most - _current_ name is. However, oss-directory names are considered canonical so - `last_used` is only relevent for `git_user` artifacts. + + {# + This grabs all the artifacts we know about from OSSD and from the contract discovery process. #} + select artifact_source_id, artifact_source, @@ -11,13 +11,47 @@ with all_artifacts as ( artifact_url, artifact_name from {{ ref('int_artifacts_by_project') }} + union all + + {# + This grabs the universe of blockchain artifacts that have interacted with the contracts we care about from the events table. + TODO: this should be refactored when we "index the universe" + #} + + select distinct + artifact_source_id, + artifact_source, + lower(artifact_source) as artifact_namespace, + artifact_source_id as artifact_name, + artifact_source_id as artifact_url + from ( + select + from_artifact_source_id as artifact_source_id, + event_source as artifact_source + from {{ ref('int_events') }} + where event_type = 'CONTRACT_INVOCATION_DAILY_COUNT' + union all + select + to_artifact_source_id as artifact_source_id, + event_source as artifact_source + from {{ ref('int_events') }} + where event_type = 'CONTRACT_INVOCATION_DAILY_COUNT' + ) + + union all + + {# + This grabs the universe of GitHub users that have interacted with the repos we care about. + The `last_used` value is later used in this query to determine what the most _current_ name is. However, oss-directory names are considered canonical so `last_used` is only relevent for `git_user` artifacts. + #} + select artifact_source_id, artifact_source, artifact_namespace, artifact_url, - MAX_BY(artifact_name, last_used) as artifact_name + max_by(artifact_name, last_used) as artifact_name from {{ ref('int_artifacts_history') }} group by artifact_source_id,