Skip to content

Commit

Permalink
feat(dbt): mart registry for dependency graph (#2623)
Browse files Browse the repository at this point in the history
* fix(dbt): point to better source for latest package owner in deps.dev

* feat(dbt): staging model for latest package owner

* feat(dbt): int model to see if we have a package's owner repo already

* fix(dbt): add model to resolve deps.dev and sbom source names

* feat(dbt): parse package lineage and determine if historic owner is current owner

* fix(dbt): refactor to use int_packages

* fix(dbt): use int_packages to join on sbom package sources

* fix(dbt): lookup deps.dev package owner artifact_id if it exists

* fix(dbt): rename field

* feat(dbt): add sbom mart model

* feat(dbt): add project id to int model

* refactor(dbt): break sboms_v0 into two models
  • Loading branch information
ccerv1 authored Dec 12, 2024
1 parent ca3f10c commit f7c15d8
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 3 deletions.
5 changes: 2 additions & 3 deletions warehouse/dbt/models/deps_dev_sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@ sources:
tables:
- name: dependencies
identifier: Dependencies
- name: package_versions_latest
identifier: PackageVersionsLatest

- name: package_version_to_project_latest
identifier: PackageVersionToProjectLatest
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
with latest_package as (
select distinct
package_artifact_source,
package_artifact_name,
package_github_owner,
package_github_repo
from {{ ref('int_packages') }}
where is_current_owner = true
)

select
github_repos.project_id,
github_repos.artifact_id,
github_repos.artifact_source_id,
github_repos.artifact_source,
latest_package.package_github_owner as artifact_namespace,
latest_package.package_github_repo as artifact_name,
all_packages.artifact_id as package_artifact_id,
latest_package.package_artifact_source as package_artifact_source,
latest_package.package_artifact_name as package_artifact_name
from latest_package
left outer join {{ ref('int_all_artifacts') }} as github_repos
on
latest_package.package_github_owner = github_repos.artifact_namespace
and latest_package.package_github_repo = github_repos.artifact_name
and github_repos.artifact_source = 'GITHUB'
left outer join {{ ref('int_all_artifacts') }} as all_packages
on
latest_package.package_artifact_name = all_packages.artifact_name
and latest_package.package_artifact_source = all_packages.artifact_source
where github_repos.project_id is not null
47 changes: 47 additions & 0 deletions warehouse/dbt/models/intermediate/directory/int_packages.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
with deps_dev as (
select
`Version` as package_version,
upper(`System`) as package_artifact_source,
lower(`Name`) as package_artifact_name,
lower(split(`ProjectName`, '/')[0]) as package_github_owner,
lower(split(`ProjectName`, '/')[1]) as package_github_repo
from {{ ref('stg_deps_dev__packages') }}
),

latest_versions as (
select
package_artifact_source,
package_artifact_name,
package_github_owner as current_owner,
package_github_repo as current_repo
from deps_dev
qualify row_number() over (
partition by package_artifact_name, package_artifact_source
order by package_version desc
) = 1
)

select
deps_dev.package_artifact_source,
deps_dev.package_artifact_name,
deps_dev.package_version,
deps_dev.package_github_owner,
deps_dev.package_github_repo,
case
when deps_dev.package_artifact_source = 'CARGO' then 'RUST'
when deps_dev.package_artifact_source = 'NPM' then 'NPM'
when deps_dev.package_artifact_source = 'PYPI' then 'PIP'
when deps_dev.package_artifact_source = 'GO' then 'GO'
when deps_dev.package_artifact_source = 'MAVEN' then 'MAVEN'
when deps_dev.package_artifact_source = 'NUGET' then 'NUGET'
else 'UNKNOWN'
end as sbom_artifact_source,
(
deps_dev.package_github_owner = latest_versions.current_owner
and deps_dev.package_github_repo = latest_versions.current_repo)
as is_current_owner
from deps_dev
left join latest_versions
on
deps_dev.package_artifact_source = latest_versions.package_artifact_source
and deps_dev.package_artifact_name = latest_versions.package_artifact_name
26 changes: 26 additions & 0 deletions warehouse/dbt/models/intermediate/directory/int_sbom_artifacts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,41 @@ sbom_artifacts as (
snapshot_at
from ranked_snapshots
where row_num = 1
),

deps_dev_packages as (
select distinct
sbom_artifact_source,
package_artifact_name,
package_github_owner,
package_github_repo
from {{ ref('int_packages') }}
where is_current_owner = true
)

select
{#
Because we use repo.id as the artifact_source_id for github, we need to lookup the artifact_id for the SBOM repo. If the artifact is not found, this will return null.
#}
all_repos.project_id,
all_repos.artifact_id,
sbom_artifacts.artifact_source,
sbom_artifacts.artifact_namespace,
sbom_artifacts.artifact_name,
{#
Because we only index packages that are found in OSSD, most of the time this will return a null package_artifact_id.
#}
all_packages.project_id as package_project_id,
all_packages.artifact_id as package_artifact_id,
sbom_artifacts.package_source as package_artifact_source,
'' as package_artifact_namespace,
sbom_artifacts.package as package_artifact_name,
'GITHUB' as package_owner_source,
sbom_artifacts.package_version as package_version,
deps_dev_packages.package_github_owner,
deps_dev_packages.package_github_repo,
deps_dev_repos.artifact_id as package_github_artifact_id,
deps_dev_repos.project_id as package_github_project_id,
sbom_artifacts.snapshot_at
from sbom_artifacts
left outer join {{ ref('int_all_artifacts') }} as all_repos
Expand All @@ -58,3 +76,11 @@ left outer join {{ ref('int_all_artifacts') }} as all_packages
on
sbom_artifacts.package = all_packages.artifact_name
and sbom_artifacts.package_source = all_packages.artifact_source
left outer join deps_dev_packages
on
sbom_artifacts.package_source = deps_dev_packages.sbom_artifact_source
and sbom_artifacts.package = deps_dev_packages.package_artifact_name
left outer join {{ ref('int_all_artifacts') }} as deps_dev_repos
on
deps_dev_packages.package_github_owner = deps_dev_repos.artifact_namespace
and deps_dev_packages.package_github_repo = deps_dev_repos.artifact_name
21 changes: 21 additions & 0 deletions warehouse/dbt/models/marts/directory/package_owners_v0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
materialized='table',
meta = {
'sync_to_db': True,
}
)
}}

select distinct
package_project_id,
package_artifact_id,
package_artifact_source,
package_artifact_namespace,
package_artifact_name,
package_github_project_id as package_owner_project_id,
package_github_artifact_id as package_owner_artifact_id,
package_owner_source,
package_github_owner as package_owner_artifact_namespace,
package_github_repo as package_owner_artifact_name
from {{ ref('int_sbom_artifacts') }}
21 changes: 21 additions & 0 deletions warehouse/dbt/models/marts/directory/sboms_v0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
materialized='table',
meta = {
'sync_to_db': True,
}
)
}}

select distinct
project_id as from_project_id,
artifact_id as from_artifact_id,
artifact_source as from_artifact_source,
artifact_namespace as from_artifact_namespace,
artifact_name as from_artifact_name,
package_project_id as to_package_project_id,
package_artifact_id as to_package_artifact_id,
package_artifact_source as to_package_artifact_source,
package_artifact_namespace as to_package_artifact_namespace,
package_artifact_name as to_package_artifact_name
from {{ ref('int_sbom_artifacts') }}
40 changes: 40 additions & 0 deletions warehouse/dbt/models/staging/deps-dev/stg_deps_dev__packages.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{{ config(
materialized='incremental',
partition_by={
'field': 'SnapshotAt',
'data_type': 'timestamp',
'granularity': 'day'
},
) }}

{% set is_production = target.name == 'production' %}

{% if is_production %}
with base as (
select
`SnapshotAt`,
`System`,
`ProjectName`,
`Name`,
`Version`
from {{ source("deps_dev", "package_version_to_project_latest") }}
where
`ProjectName` is not null
and `ProjectType` = 'GITHUB'
and `RelationType` = 'SOURCE_REPO_TYPE'
)
{% if is_incremental() %}
select * from base
where `SnapshotAt` > (select max(`SnapshotAt`) from {{ this }})
{% else %}
select * from base
{% endif %}
{% else %}
select
'NPM' as `System`,
'opensource-observer/oso' as `ProjectName`,
'@example/oso' as `Name`,
'0.0.16' as `Version`,
current_timestamp() as `SnapshotAt`
limit 1
{% endif %}

0 comments on commit f7c15d8

Please sign in to comment.