diff --git a/warehouse/dbt/macros/models/deps_dev_artifact_details.sql b/warehouse/dbt/macros/models/deps_dev_artifact_details.sql new file mode 100644 index 000000000..d551630a6 --- /dev/null +++ b/warehouse/dbt/macros/models/deps_dev_artifact_details.sql @@ -0,0 +1,41 @@ +{# + Macro to parse the namespace from the artifact name based on the event source. + Arguments: + - event_source: The event source of the artifact. + - artifact_name: The name of the artifact. + Returns the namespace based on event source rules. +#} +{% macro parse_namespace(event_source, artifact_name) %} + case + when {{ event_source }} = 'NPM' and STRPOS({{ artifact_name }}, '/') > 0 then + SPLIT(SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(0)], '@')[SAFE_OFFSET(1)] + when {{ event_source }} = 'GO' and STRPOS({{ artifact_name }}, '/') > 0 then + SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(1)] + when {{ event_source }} = 'MAVEN' then + SPLIT({{ artifact_name }}, ':')[SAFE_OFFSET(0)] + when {{ event_source }} = 'NUGET' and STRPOS({{ artifact_name }}, '.') > 0 then + SPLIT({{ artifact_name }}, '.')[SAFE_OFFSET(0)] + else {{ artifact_name }} + end +{% endmacro %} + +{# + Macro to parse the name from the artifact name based on the event source. + Arguments: + - event_source: The event source of the artifact. + - artifact_name: The name of the artifact. + Returns the name based on event source rules. +#} +{% macro parse_name(event_source, artifact_name) %} + case + when {{ event_source }} = 'NPM' and STRPOS({{ artifact_name }}, '/') > 0 then + SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(1)] + when {{ event_source }} = 'GO' and STRPOS({{ artifact_name }}, '/') > 0 then + SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(2)] + when {{ event_source }} = 'MAVEN' then + SPLIT({{ artifact_name }}, ':')[SAFE_OFFSET(1)] + when {{ event_source }} = 'NUGET' and STRPOS({{ artifact_name }}, '.') > 0 then + REGEXP_REPLACE({{ artifact_name }}, r'^[^.]+\.', '') + else {{ artifact_name }} + end +{% endmacro %} diff --git a/warehouse/dbt/models/intermediate/events/int_events.sql b/warehouse/dbt/models/intermediate/events/int_events.sql index e9e960b50..4b6b3e23c 100644 --- a/warehouse/dbt/models/intermediate/events/int_events.sql +++ b/warehouse/dbt/models/intermediate/events/int_events.sql @@ -214,6 +214,8 @@ all_events as ( select * from {{ ref('int_zora_contract_invocation_events') }} union all select * from {{ ref('int_arbitrum_one_contract_invocation_events') }} + union all + select * from {{ ref('int_events_dependencies') }} ) union all select diff --git a/warehouse/dbt/models/intermediate/events/int_events_dependencies.sql b/warehouse/dbt/models/intermediate/events/int_events_dependencies.sql new file mode 100644 index 000000000..f0f2a061a --- /dev/null +++ b/warehouse/dbt/models/intermediate/events/int_events_dependencies.sql @@ -0,0 +1,115 @@ +{% set event_source_name = '"DEPS_DEV"' %} + +with snapshots as ( + select + `SnapshotAt` as `time`, + `System` as from_artifact_type, + `Name` as from_artifact_name, + `Version` as from_artifact_version, + `Dependency`.`Name` as to_artifact_name, + `Dependency`.`System` as to_artifact_type, + `Dependency`.`Version` as to_artifact_version, + LAG(`Dependency`.`Name`) over ( + partition by `System`, `Name`, `Dependency`.`Name`, `Version`, `Dependency`.`Version` + order by `SnapshotAt` + ) as previous_to_artifact_name + from {{ ref('stg_deps_dev__dependencies') }} + where `MinimumDepth` = 1 +), + +intermediate as ( + select + `time`, + case + when previous_to_artifact_name is null then 'ADD_DEPENDENCY' + when + to_artifact_name is not null and to_artifact_name <> previous_to_artifact_name + then 'REMOVE_DEPENDENCY' + else 'NO_CHANGE' + end as event_type, + {{ event_source_name }} as event_source, + {{ parse_name( + 'to_artifact_type', + 'to_artifact_name') + }} as to_artifact_name, + {{ parse_namespace( + 'to_artifact_type', + 'to_artifact_name') + }} as to_artifact_namespace, + to_artifact_type, + {{ parse_name( + 'from_artifact_type', + 'from_artifact_name') + }} as from_artifact_name, + {{ parse_namespace( + 'from_artifact_type', + 'from_artifact_name') + }} as from_artifact_namespace, + from_artifact_type, + 1.0 as amount + from snapshots +), + +artifact_ids as ( + select + `time`, + event_type, + event_source, + {{ oso_id( + 'event_source', + 'to_artifact_namespace', + 'to_artifact_name') + }} as to_artifact_id, + to_artifact_name, + to_artifact_namespace, + to_artifact_type, + {{ oso_id( + 'event_source', + 'to_artifact_type') + }} as to_artifact_source_id, + {{ oso_id( + 'event_source', + 'from_artifact_namespace', + 'from_artifact_name') + }} as from_artifact_id, + from_artifact_name, + from_artifact_namespace, + from_artifact_type, + {{ oso_id( + 'event_source', + 'from_artifact_type') + }} as from_artifact_source_id, + amount + from intermediate + where event_type <> 'NO_CHANGE' +), + +changes as ( + select + `time`, + event_type, + event_source, + to_artifact_id, + to_artifact_name, + to_artifact_namespace, + to_artifact_type, + to_artifact_source_id, + from_artifact_id, + from_artifact_name, + from_artifact_namespace, + from_artifact_type, + from_artifact_source_id, + amount, + {{ oso_id( + 'event_source', + 'time', + 'to_artifact_id', + 'to_artifact_type', + 'from_artifact_id', + 'from_artifact_type', + 'event_type') + }} as event_source_id + from artifact_ids +) + +select * from changes diff --git a/warehouse/dbt/models/staging/deps-dev/stg_deps_dev__dependencies.sql b/warehouse/dbt/models/staging/deps-dev/stg_deps_dev__dependencies.sql new file mode 100644 index 000000000..6be6db6c2 --- /dev/null +++ b/warehouse/dbt/models/staging/deps-dev/stg_deps_dev__dependencies.sql @@ -0,0 +1,42 @@ +{{ config( + materialized='incremental', + partition_by={ + 'field': 'SnapshotAt', + 'data_type': 'timestamp', + 'granularity': 'day' + }, +) }} + +{% set is_production = target.name == 'production' %} + +{% if is_production %} +with base as ( + select + `SnapshotAt`, + `System`, + `Name`, + `Version`, + `Dependency`, + `MinimumDepth` + from `bigquery-public-data.deps_dev_v1.Dependencies` +) +{% if is_incremental() %} + select * from base + where `SnapshotAt` > (select max(`SnapshotAt`) from {{ this }}) +{% else %} + select * from base +{% endif %} +{% else %} + select + 'NPM' as `System`, + '@example/oso' as `Name`, + '0.0.0' as `Version`, + 1 as `MinimumDepth`, + current_timestamp() as `SnapshotAt`, + struct( + 'NPM' as `System`, + '@example/oso-dep' as `Name`, + '0.0.0' as `Version` + ) as `Dependency` + limit 1 +{% endif %}