Skip to content

Commit

Permalink
Add additional aux data for github issues (#2516)
Browse files Browse the repository at this point in the history
* Add additional aux data for github issues

* Update stg_github__comments.sql

Fix copy paste issues

* Update stg_github__issues.sql

* fix dbt error

* Parse dates

* Add the comments column

* fix

* fix
  • Loading branch information
ravenac95 authored Nov 25, 2024
1 parent 5dfdd17 commit 79771f1
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ with pr_events as (
`number`,
`type`,
actor_id,
created_at,
event_time as `time`,
LOWER(actor_login) as actor_login,
LOWER(repository_name) as repository_name,
CAST(repository_id as STRING) as to_artifact_source_id
Expand All @@ -16,7 +16,7 @@ merge_events as (
`number`,
`type`,
actor_id,
created_at,
created_at as `time`,
LOWER(actor_login) as actor_login,
LOWER(repository_name) as repository_name,
CAST(repository_id as STRING) as to_artifact_source_id
Expand All @@ -28,7 +28,7 @@ issue_events as (
`number`,
`type`,
actor_id,
created_at,
event_time as `time`,
LOWER(actor_login) as actor_login,
LOWER(repository_name) as repository_name,
CAST(repository_id as STRING) as to_artifact_source_id
Expand All @@ -40,7 +40,7 @@ comment_events as (
`number`,
`type`,
actor_id,
created_at,
event_time as `time`,
LOWER(actor_login) as actor_login,
LOWER(repository_name) as repository_name,
CAST(repository_id as STRING) as to_artifact_source_id
Expand All @@ -55,7 +55,7 @@ all_events as (
repository_name,
actor_id,
to_artifact_source_id,
created_at,
`time`,
'GITHUB' as event_source
from pr_events
union all
Expand All @@ -66,7 +66,7 @@ all_events as (
repository_name,
actor_id,
to_artifact_source_id,
created_at,
`time`,
'GITHUB' as event_source
from merge_events
union all
Expand All @@ -77,7 +77,7 @@ all_events as (
repository_name,
actor_id,
to_artifact_source_id,
created_at,
`time`,
'GITHUB' as event_source
from issue_events
union all
Expand All @@ -88,14 +88,14 @@ all_events as (
repository_name,
actor_id,
to_artifact_source_id,
created_at,
`time`,
'GITHUB' as event_source
from comment_events
)

select
'GITHUB' as event_source,
created_at as `time`,
`time`,
`number`,
`type`,
actor_login,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ github_releases as (

github_comments as (
select -- noqa: ST06
created_at as `time`,
event_time as `time`,
type as event_type,
CAST(id as STRING) as event_source_id,
"GITHUB" as event_source,
Expand All @@ -80,7 +80,7 @@ github_comments as (

github_issues as (
select -- noqa: ST06
created_at as `time`,
event_time as `time`,
type as event_type,
CAST(id as STRING) as event_source_id,
"GITHUB" as event_source,
Expand All @@ -100,7 +100,7 @@ github_issues as (

github_pull_requests as (
select -- noqa: ST06
created_at as `time`,
event_time as `time`,
type as event_type,
CAST(id as STRING) as event_source_id,
"GITHUB" as event_source,
Expand All @@ -120,7 +120,7 @@ github_pull_requests as (

github_pull_request_merge_events as (
select -- noqa: ST06
created_at as `time`,
event_time as `time`,
type as event_type,
CAST(id as STRING) as event_source_id,
"GITHUB" as event_source,
Expand Down
40 changes: 32 additions & 8 deletions warehouse/dbt/models/intermediate/events/int_events_aux_issues.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

with github_comments as (
select -- noqa: ST06
created_at as `time`,
`event_time` as `time`,
type as event_type,
CAST(id as STRING) as event_source_id,
"GITHUB" as event_source,
Expand All @@ -28,13 +28,17 @@ with github_comments as (
actor_login as from_namespace,
"GIT_USER" as from_type,
CAST(actor_id as STRING) as from_artifact_source_id,
`number` as issue_number
`number` as issue_number,
created_at,
merged_at,
closed_at,
comments
from {{ ref('stg_github__comments') }}
),

github_issues as (
select -- noqa: ST06
created_at as `time`,
event_time as `time`,
type as event_type,
CAST(id as STRING) as event_source_id,
"GITHUB" as event_source,
Expand All @@ -48,13 +52,17 @@ github_issues as (
actor_login as from_namespace,
"GIT_USER" as from_type,
CAST(actor_id as STRING) as from_artifact_source_id,
`number` as issue_number
`number` as issue_number,
created_at,
CAST(null as TIMESTAMP) as merged_at,
closed_at,
comments
from {{ ref('stg_github__issues') }}
),

github_pull_requests as (
select -- noqa: ST06
created_at as `time`,
event_time as `time`,
type as event_type,
CAST(id as STRING) as event_source_id,
"GITHUB" as event_source,
Expand All @@ -68,7 +76,11 @@ github_pull_requests as (
actor_login as from_namespace,
"GIT_USER" as from_type,
CAST(actor_id as STRING) as from_artifact_source_id,
`number` as issue_number
`number` as issue_number,
created_at,
merged_at,
closed_at,
comments
from {{ ref('stg_github__pull_requests') }}
),

Expand All @@ -88,7 +100,11 @@ github_pull_request_merge_events as (
actor_login as from_namespace,
"GIT_USER" as from_type,
CAST(actor_id as STRING) as from_artifact_source_id,
`number` as issue_number
`number` as issue_number,
created_at,
merged_at,
closed_at,
comments
from {{ ref('stg_github__pull_request_merge_events') }}
),

Expand All @@ -100,7 +116,11 @@ issue_events as (
event_source,
{{ oso_id("event_source", "to_artifact_source_id") }} as to_artifact_id,
{{ oso_id("event_source", "from_artifact_source_id") }} as from_artifact_id,
issue_number
issue_number,
created_at,
merged_at,
closed_at,
comments
from (
select * from github_issues
union all
Expand All @@ -117,6 +137,10 @@ select
to_artifact_id,
from_artifact_id,
issue_number,
created_at,
merged_at,
closed_at,
comments,
UPPER(event_type) as event_type,
CAST(event_source_id as STRING) as event_source_id,
UPPER(event_source) as event_source
Expand Down
53 changes: 45 additions & 8 deletions warehouse/dbt/models/staging/github/stg_github__comments.sql
Original file line number Diff line number Diff line change
@@ -1,29 +1,66 @@
with pull_request_comment_events as (
select
ghe.id as id,
ghe.created_at as created_at,
ghe.created_at as `event_time`,
ghe.repo.id as repository_id,
ghe.repo.name as repository_name,
ghe.actor.id as actor_id,
ghe.actor.login as actor_login,
'PULL_REQUEST_REVIEW_COMMENT' as `type`,
JSON_VALUE(ghe.payload, '$.pull_request.number') as `number`
"PULL_REQUEST_REVIEW_COMMENT" as `type`,
JSON_VALUE(ghe.payload, "$.pull_request.number") as `number`,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(ghe.payload, "$.pull_request.created_at")
) as created_at,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(ghe.payload, "$.pull_request.merged_at")
) as merged_at,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(ghe.payload, "$.pull_request.closed_at")
) as closed_at,
JSON_VALUE(
ghe.payload,
"$.pull_request.state"
) as `state`,
JSON_VALUE(
ghe.payload,
"$.pull_request.comments"
) as comments
from {{ ref('stg_github__events') }} as ghe
where ghe.type = 'PullRequestReviewCommentEvent'
where ghe.type = "PullRequestReviewCommentEvent"
),

issue_comment_events as (
select
ghe.id as id,
ghe.created_at as created_at,
ghe.created_at as `event_time`,
ghe.repo.id as repository_id,
ghe.repo.name as repository_name,
ghe.actor.id as actor_id,
ghe.actor.login as actor_login,
'ISSUE_COMMENT' as `type`,
JSON_VALUE(ghe.payload, '$.issue.number') as `number`
"ISSUE_COMMENT" as `type`,
JSON_VALUE(ghe.payload, "$.issue.number") as `number`,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(ghe.payload, "$.issue.created_at")
) as created_at,
CAST(null as TIMESTAMP) as merged_at,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(ghe.payload, "$.issue.closed_at")
) as closed_at,
JSON_VALUE(
ghe.payload,
"$.issue.state"
) as `state`,
JSON_VALUE(
ghe.payload,
"$.issue.comments"
) as comments
from {{ ref('stg_github__events') }} as ghe
where ghe.type = 'IssueCommentEvent'
where ghe.type = "IssueCommentEvent"
)

select * from pull_request_comment_events
Expand Down
20 changes: 18 additions & 2 deletions warehouse/dbt/models/staging/github/stg_github__issues.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,27 @@ with issue_events as (

select
ie.id as id,
ie.created_at as created_at,
ie.created_at as event_time,
ie.repo.id as repository_id,
ie.repo.name as repository_name,
ie.actor.id as actor_id,
ie.actor.login as actor_login,
CONCAT("ISSUE_", UPPER(JSON_VALUE(ie.payload, "$.action"))) as `type`,
JSON_VALUE(ie.payload, "$.issue.number") as `number`
JSON_VALUE(ie.payload, "$.issue.number") as `number`,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(ie.payload, "$.issue.created_at")
) as created_at,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(ie.payload, "$.issue.closed_at")
) as closed_at,
JSON_VALUE(
ie.payload,
"$.issue.state"
) as `state`,
JSON_VALUE(
ie.payload,
"$.issue.comments"
) as comments
from issue_events as ie
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,19 @@ select distinct
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(pre.payload, "$.pull_request.merged_at")
) as event_time,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(pre.payload, "$.pull_request.merged_at")
) as merged_at,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(pre.payload, "$.pull_request.created_at")
) as created_at,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(pre.payload, "$.pull_request.closed_at")
) as closed_at,
CAST(JSON_VALUE(pre.payload, "$.pull_request.user.id") as INTEGER)
as actor_id,
JSON_VALUE(
Expand All @@ -36,6 +48,9 @@ select distinct
JSON_VALUE(
pre.payload, "$.pull_request.review_comments"
) as review_comments,
JSON_VALUE(
pre.payload, "$.pull_request.comments"
) as comments,
JSON_VALUE(
pre.payload, "$.pull_request.author_association"
) as author_association,
Expand Down
24 changes: 22 additions & 2 deletions warehouse/dbt/models/staging/github/stg_github__pull_requests.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,32 @@ with pull_request_events as (

select
pre.id as id,
pre.created_at as created_at,
pre.created_at as event_time,
pre.repo.id as repository_id,
pre.repo.name as repository_name,
pre.actor.id as actor_id,
pre.actor.login as actor_login,
CONCAT("PULL_REQUEST_", UPPER(JSON_VALUE(pre.payload, "$.action")))
as `type`,
JSON_VALUE(pre.payload, "$.number") as `number`
JSON_VALUE(pre.payload, "$.number") as `number`,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(pre.payload, "$.pull_request.created_at")
) as created_at,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(pre.payload, "$.pull_request.merged_at")
) as merged_at,
PARSE_TIMESTAMP(
"%Y-%m-%dT%H:%M:%E*SZ",
JSON_VALUE(pre.payload, "$.pull_request.closed_at")
) as closed_at,
JSON_VALUE(
pre.payload,
"$.pull_request.state"
) as `state`,
JSON_VALUE(
pre.payload,
"$.pull_request.comments"
) as comments
from pull_request_events as pre

0 comments on commit 79771f1

Please sign in to comment.