From cc513c0188027f6b60e6dc0cd0e7b0b82f517b3d Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Tue, 18 Jun 2024 10:45:19 -0400 Subject: [PATCH] refactor: expand repo scan and returned fields (#1665) --- .../metrics/int_repo_metrics_by_project.sql | 6 ++ .../superchain/rf4_repo_stats_by_project.sql | 56 ++++++++++--------- .../verification/rf4_repos_by_app.sql | 44 +++++++++++++++ 3 files changed, 80 insertions(+), 26 deletions(-) create mode 100644 warehouse/dbt/models/marts/superchain/verification/rf4_repos_by_app.sql diff --git a/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql index e6a56f35f..b3c592e30 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql @@ -1,3 +1,9 @@ +{{ + config( + materialized='table' + ) +}} + with repo_artifact as ( select 'GITHUB' as artifact_source, diff --git a/warehouse/dbt/models/marts/superchain/rf4_repo_stats_by_project.sql b/warehouse/dbt/models/marts/superchain/rf4_repo_stats_by_project.sql index ebfb21e52..18b3bc0cb 100644 --- a/warehouse/dbt/models/marts/superchain/rf4_repo_stats_by_project.sql +++ b/warehouse/dbt/models/marts/superchain/rf4_repo_stats_by_project.sql @@ -33,43 +33,47 @@ with repo_snapshot as ( when ( commit_count >= 10 and days_with_commits_count >= 3 - and first_commit_time < '2024-05-01' and star_count >= 10 and language in ('Solidity', 'JavaScript', 'TypeScript') ) then 'OK' else 'Review' end as repo_activity_check, - concat(artifact_namespace, '/', artifact_name) as url + (first_commit_time < '2024-05-01') as repo_older_than_1_month, + concat(artifact_namespace, '/', artifact_name) as repo from {{ ref('int_repo_metrics_by_project') }} ), -rf4_repos as ( - select lower(replace(artifact, 'https://github.com/', '')) as url - from {{ source("static_data_sources", "agora_rf4_repos_with_contracts") }} -), - filtered_repos as ( - select * from repo_snapshot - where url in (select url from rf4_repos) + select + application_id, + project_name, + repo, + url, + has_contracts + from {{ ref('rf4_repos_by_app') }} + where scan = true ) select - filtered_repos.project_id, - projects_v1.project_name, - filtered_repos.artifact_namespace, - filtered_repos.artifact_name, + filtered_repos.application_id, + filtered_repos.project_name, filtered_repos.url, - filtered_repos.is_fork, - filtered_repos.fork_count, - filtered_repos.star_count, - filtered_repos.first_commit_time, - filtered_repos.last_commit_time, - filtered_repos.days_with_commits_count, - filtered_repos.commit_count, - filtered_repos.language, - filtered_repos.license_spdx_id, - filtered_repos.license_check, - filtered_repos.repo_activity_check + filtered_repos.has_contracts, + repo_snapshot.artifact_namespace, + repo_snapshot.artifact_name, + repo_snapshot.is_fork, + repo_snapshot.fork_count, + repo_snapshot.star_count, + repo_snapshot.first_commit_time, + repo_snapshot.last_commit_time, + repo_snapshot.days_with_commits_count, + repo_snapshot.commit_count, + repo_snapshot.language, + repo_snapshot.license_spdx_id, + repo_snapshot.license_check, + repo_snapshot.repo_older_than_1_month, + repo_snapshot.repo_activity_check, + repo_snapshot.project_id from filtered_repos -left join {{ ref('projects_v1') }} - on filtered_repos.project_id = projects_v1.project_id +left join repo_snapshot + on lower(filtered_repos.repo) = lower(repo_snapshot.repo) diff --git a/warehouse/dbt/models/marts/superchain/verification/rf4_repos_by_app.sql b/warehouse/dbt/models/marts/superchain/verification/rf4_repos_by_app.sql new file mode 100644 index 000000000..8e85dd34c --- /dev/null +++ b/warehouse/dbt/models/marts/superchain/verification/rf4_repos_by_app.sql @@ -0,0 +1,44 @@ +with repos as ( + select + a.application_id, + a.project_name, + a.artifact, + (r.application_id is not null) as has_contracts + from {{ source('static_data_sources', 'agora_rf4_artifacts_by_app') }} as a + left join + {{ source('static_data_sources', 'agora_rf4_repos_with_contracts') }} as r + on a.artifact = r.artifact + where a.artifact_source = 'GITHUB' +), + +repos_w_contracts_by_app as ( + select + application_id, + max(has_contracts) as has_contracts + from repos + group by application_id +), + +repos_by_app as ( + select + repos.application_id, + repos.project_name, + repos.artifact, + repos.has_contracts, + ( + repos.has_contracts = true + or repos_w_contracts_by_app.has_contracts = false + ) as scan + from repos + inner join repos_w_contracts_by_app + on repos.application_id = repos_w_contracts_by_app.application_id +) + +select distinct + application_id, + project_name, + artifact as url, + has_contracts, + scan, + lower(replace(artifact, 'https://github.com/', '')) as repo +from repos_by_app