Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(dbt): mart registry for dependency graph #2623

Merged
merged 13 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions warehouse/dbt/models/deps_dev_sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@ sources:
tables:
- name: dependencies
identifier: Dependencies
- name: package_versions_latest
identifier: PackageVersionsLatest

- name: package_version_to_project_latest
identifier: PackageVersionToProjectLatest
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
with latest_package as (
select distinct
package_artifact_source,
package_artifact_name,
package_github_owner,
package_github_repo
from {{ ref('int_packages') }}
where is_current_owner = true
)

select
github_repos.project_id,
github_repos.artifact_id,
github_repos.artifact_source_id,
github_repos.artifact_source,
latest_package.package_github_owner as artifact_namespace,
latest_package.package_github_repo as artifact_name,
all_packages.artifact_id as package_artifact_id,
latest_package.package_artifact_source as package_artifact_source,
latest_package.package_artifact_name as package_artifact_name
from latest_package
left outer join {{ ref('int_all_artifacts') }} as github_repos
on
latest_package.package_github_owner = github_repos.artifact_namespace
and latest_package.package_github_repo = github_repos.artifact_name
and github_repos.artifact_source = 'GITHUB'
left outer join {{ ref('int_all_artifacts') }} as all_packages
on
latest_package.package_artifact_name = all_packages.artifact_name
and latest_package.package_artifact_source = all_packages.artifact_source
where github_repos.project_id is not null
47 changes: 47 additions & 0 deletions warehouse/dbt/models/intermediate/directory/int_packages.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
with deps_dev as (
select
`Version` as package_version,
upper(`System`) as package_artifact_source,
lower(`Name`) as package_artifact_name,
lower(split(`ProjectName`, '/')[0]) as package_github_owner,
lower(split(`ProjectName`, '/')[1]) as package_github_repo
from {{ ref('stg_deps_dev__packages') }}
),

latest_versions as (
select
package_artifact_source,
package_artifact_name,
package_github_owner as current_owner,
package_github_repo as current_repo
from deps_dev
qualify row_number() over (
partition by package_artifact_name, package_artifact_source
order by package_version desc
) = 1
)

select
deps_dev.package_artifact_source,
deps_dev.package_artifact_name,
deps_dev.package_version,
deps_dev.package_github_owner,
deps_dev.package_github_repo,
case
when deps_dev.package_artifact_source = 'CARGO' then 'RUST'
when deps_dev.package_artifact_source = 'NPM' then 'NPM'
when deps_dev.package_artifact_source = 'PYPI' then 'PIP'
when deps_dev.package_artifact_source = 'GO' then 'GO'
when deps_dev.package_artifact_source = 'MAVEN' then 'MAVEN'
when deps_dev.package_artifact_source = 'NUGET' then 'NUGET'
else 'UNKNOWN'
end as sbom_artifact_source,
(
deps_dev.package_github_owner = latest_versions.current_owner
and deps_dev.package_github_repo = latest_versions.current_repo)
as is_current_owner
from deps_dev
left join latest_versions
on
deps_dev.package_artifact_source = latest_versions.package_artifact_source
and deps_dev.package_artifact_name = latest_versions.package_artifact_name
26 changes: 26 additions & 0 deletions warehouse/dbt/models/intermediate/directory/int_sbom_artifacts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,41 @@ sbom_artifacts as (
snapshot_at
from ranked_snapshots
where row_num = 1
),

deps_dev_packages as (
select distinct
sbom_artifact_source,
package_artifact_name,
package_github_owner,
package_github_repo
from {{ ref('int_packages') }}
where is_current_owner = true
)

select
{#
Because we use repo.id as the artifact_source_id for github, we need to lookup the artifact_id for the SBOM repo. If the artifact is not found, this will return null.
#}
all_repos.project_id,
all_repos.artifact_id,
sbom_artifacts.artifact_source,
sbom_artifacts.artifact_namespace,
sbom_artifacts.artifact_name,
{#
Because we only index packages that are found in OSSD, most of the time this will return a null package_artifact_id.
#}
all_packages.project_id as package_project_id,
all_packages.artifact_id as package_artifact_id,
sbom_artifacts.package_source as package_artifact_source,
'' as package_artifact_namespace,
sbom_artifacts.package as package_artifact_name,
'GITHUB' as package_owner_source,
sbom_artifacts.package_version as package_version,
deps_dev_packages.package_github_owner,
deps_dev_packages.package_github_repo,
deps_dev_repos.artifact_id as package_github_artifact_id,
deps_dev_repos.project_id as package_github_project_id,
sbom_artifacts.snapshot_at
from sbom_artifacts
left outer join {{ ref('int_all_artifacts') }} as all_repos
Expand All @@ -58,3 +76,11 @@ left outer join {{ ref('int_all_artifacts') }} as all_packages
on
sbom_artifacts.package = all_packages.artifact_name
and sbom_artifacts.package_source = all_packages.artifact_source
left outer join deps_dev_packages
on
sbom_artifacts.package_source = deps_dev_packages.sbom_artifact_source
and sbom_artifacts.package = deps_dev_packages.package_artifact_name
left outer join {{ ref('int_all_artifacts') }} as deps_dev_repos
on
deps_dev_packages.package_github_owner = deps_dev_repos.artifact_namespace
and deps_dev_packages.package_github_repo = deps_dev_repos.artifact_name
21 changes: 21 additions & 0 deletions warehouse/dbt/models/marts/directory/package_owners_v0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
materialized='table',
meta = {
'sync_to_db': True,
}
)
}}

select distinct
package_project_id,
package_artifact_id,
package_artifact_source,
package_artifact_namespace,
package_artifact_name,
package_github_project_id as package_owner_project_id,
package_github_artifact_id as package_owner_artifact_id,
package_owner_source,
package_github_owner as package_owner_artifact_namespace,
package_github_repo as package_owner_artifact_name
from {{ ref('int_sbom_artifacts') }}
21 changes: 21 additions & 0 deletions warehouse/dbt/models/marts/directory/sboms_v0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
materialized='table',
meta = {
'sync_to_db': True,
}
)
}}

select distinct
project_id as from_project_id,
artifact_id as from_artifact_id,
artifact_source as from_artifact_source,
artifact_namespace as from_artifact_namespace,
artifact_name as from_artifact_name,
package_project_id as to_package_project_id,
package_artifact_id as to_package_artifact_id,
package_artifact_source as to_package_artifact_source,
package_artifact_namespace as to_package_artifact_namespace,
package_artifact_name as to_package_artifact_name
from {{ ref('int_sbom_artifacts') }}
40 changes: 40 additions & 0 deletions warehouse/dbt/models/staging/deps-dev/stg_deps_dev__packages.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{{ config(
materialized='incremental',
partition_by={
'field': 'SnapshotAt',
'data_type': 'timestamp',
'granularity': 'day'
},
) }}

{% set is_production = target.name == 'production' %}

{% if is_production %}
with base as (
select
`SnapshotAt`,
`System`,
`ProjectName`,
`Name`,
`Version`
from {{ source("deps_dev", "package_version_to_project_latest") }}
where
`ProjectName` is not null
and `ProjectType` = 'GITHUB'
and `RelationType` = 'SOURCE_REPO_TYPE'
)
{% if is_incremental() %}
select * from base
where `SnapshotAt` > (select max(`SnapshotAt`) from {{ this }})
{% else %}
select * from base
{% endif %}
{% else %}
select
'NPM' as `System`,
'opensource-observer/oso' as `ProjectName`,
'@example/oso' as `Name`,
'0.0.16' as `Version`,
current_timestamp() as `SnapshotAt`
limit 1
{% endif %}
Loading