From 52147fe778aaef71ffd57b6d75e1aae5b4ddba3d Mon Sep 17 00:00:00 2001 From: Reuven Gonzales Date: Sat, 23 Nov 2024 01:56:50 +0900 Subject: [PATCH] Auxiliary events table for issues (#2512) * Auxiliary events table for issues * Create mart for auxiliary table --- .../events/int_events_aux_issues.sql | 123 ++++++++++++++++++ ...eries_events_aux_issues_by_artifact_v0.sql | 16 +++ 2 files changed, 139 insertions(+) create mode 100644 warehouse/dbt/models/intermediate/events/int_events_aux_issues.sql create mode 100644 warehouse/dbt/models/marts/events/timeseries_events_aux_issues_by_artifact_v0.sql diff --git a/warehouse/dbt/models/intermediate/events/int_events_aux_issues.sql b/warehouse/dbt/models/intermediate/events/int_events_aux_issues.sql new file mode 100644 index 000000000..b3e42817f --- /dev/null +++ b/warehouse/dbt/models/intermediate/events/int_events_aux_issues.sql @@ -0,0 +1,123 @@ +{{ + config( + materialized='table', + partition_by={ + "field": "time", + "data_type": "timestamp", + "granularity": "day", + }, + meta={ + 'sync_to_db': False + } + ) +}} + +with github_comments as ( + select -- noqa: ST06 + created_at as `time`, + type as event_type, + CAST(id as STRING) as event_source_id, + "GITHUB" as event_source, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(1)] + as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_artifact_source_id, + actor_login as from_name, + actor_login as from_namespace, + "GIT_USER" as from_type, + CAST(actor_id as STRING) as from_artifact_source_id, + `number` as issue_number + from {{ ref('stg_github__comments') }} +), + +github_issues as ( + select -- noqa: ST06 + created_at as `time`, + type as event_type, + CAST(id as STRING) as event_source_id, + "GITHUB" as event_source, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(1)] + as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_artifact_source_id, + actor_login as from_name, + actor_login as from_namespace, + "GIT_USER" as from_type, + CAST(actor_id as STRING) as from_artifact_source_id, + `number` as issue_number + from {{ ref('stg_github__issues') }} +), + +github_pull_requests as ( + select -- noqa: ST06 + created_at as `time`, + type as event_type, + CAST(id as STRING) as event_source_id, + "GITHUB" as event_source, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(1)] + as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_artifact_source_id, + actor_login as from_name, + actor_login as from_namespace, + "GIT_USER" as from_type, + CAST(actor_id as STRING) as from_artifact_source_id, + `number` as issue_number + from {{ ref('stg_github__pull_requests') }} +), + +github_pull_request_merge_events as ( + select -- noqa: ST06 + created_at as `time`, + type as event_type, + CAST(id as STRING) as event_source_id, + "GITHUB" as event_source, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(1)] + as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_artifact_source_id, + actor_login as from_name, + actor_login as from_namespace, + "GIT_USER" as from_type, + CAST(actor_id as STRING) as from_artifact_source_id, + `number` as issue_number + from {{ ref('stg_github__pull_request_merge_events') }} +), + +issue_events as ( + select + time, + event_type, + event_source_id, + event_source, + {{ oso_id("event_source", "to_artifact_source_id") }} as to_artifact_id, + {{ oso_id("event_source", "from_artifact_source_id") }} as from_artifact_id, + issue_number + from ( + select * from github_issues + union all + select * from github_pull_requests + union all + select * from github_pull_request_merge_events + union all + select * from github_comments + ) +) + +select + time, + to_artifact_id, + from_artifact_id, + issue_number, + UPPER(event_type) as event_type, + CAST(event_source_id as STRING) as event_source_id, + UPPER(event_source) as event_source +from issue_events diff --git a/warehouse/dbt/models/marts/events/timeseries_events_aux_issues_by_artifact_v0.sql b/warehouse/dbt/models/marts/events/timeseries_events_aux_issues_by_artifact_v0.sql new file mode 100644 index 000000000..b8a708864 --- /dev/null +++ b/warehouse/dbt/models/marts/events/timeseries_events_aux_issues_by_artifact_v0.sql @@ -0,0 +1,16 @@ +{{ + config(meta = { + 'sync_to_db': True, + 'order_by': [ 'event_source', 'event_type', 'to_artifact_id', 'time' ] + }) +}} + +select + time, + to_artifact_id, + from_artifact_id, + event_type, + event_source_id, + event_source, + issue_number +from {{ ref('int_events_aux_issues') }}