Skip to content

Commit

Permalink
Attempt to fix timeseries model by materializing more intermediates (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ravenac95 authored Aug 15, 2024
1 parent 2c15d48 commit 1c5c192
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,75 +6,17 @@

{% set fulltime_dev_days = 10 %}

with commits as (
select
from_artifact_id as developer_id,
to_artifact_id,
event_source,
bucket_day,
CAST(SUM(amount) > 0 as int64) as commit_count
from {{ ref('int_events_daily_to_artifact') }}
where event_type = 'COMMIT_CODE'
group by
from_artifact_id,
to_artifact_id,
event_source,
bucket_day
),

to_artifact_start_dates as (
select
to_artifact_id,
event_source,
MIN(bucket_day) as first_commit_date
from commits
group by
to_artifact_id,
event_source
),

calendar as (
select
to_artifact_id,
event_source,
TIMESTAMP_ADD(first_commit_date, interval day_offset day) as bucket_day
from
to_artifact_start_dates,
UNNEST(
GENERATE_ARRAY(
0,
TIMESTAMP_DIFF(
(select MAX(bucket_day) as last_commit_date from commits),
first_commit_date, day
)
)
) as day_offset
),

devs as (
select distinct developer_id
from commits
),

developer_to_artifact_dates as (
select
devs.developer_id,
calendar.to_artifact_id,
calendar.bucket_day,
calendar.event_source
from calendar
cross join devs
),

filled_data as (
with filled_data as (
select
dpd.bucket_day,
dpd.developer_id,
dpd.to_artifact_id,
dpd.event_source,
COALESCE(c.commit_count, 0) as commit_count
from developer_to_artifact_dates as dpd
left join commits as c
from {{
ref("int_timeseries_code_metrics_by_artifact_developer_days")
}} as dpd
left join {{ ref("int_timeseries_code_metrics_commits") }} as c
on
dpd.bucket_day = c.bucket_day
and dpd.developer_id = c.developer_id
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{{
config(
materialized='table'
)
}}

{% set fulltime_dev_days = 10 %}

with to_artifact_start_dates as (
select
to_artifact_id,
event_source,
MIN(bucket_day) as first_commit_date
from {{ ref("int_timeseries_code_metrics_commits") }}
group by
to_artifact_id,
event_source
),

calendar as (
select
to_artifact_id,
event_source,
TIMESTAMP_ADD(first_commit_date, interval day_offset day) as bucket_day
from
to_artifact_start_dates,
UNNEST(
GENERATE_ARRAY(
0,
TIMESTAMP_DIFF(
(select MAX(bucket_day) as last_commit_date from commits),
first_commit_date, day
)
)
) as day_offset
),

devs as (
select distinct developer_id
from {{ ref("int_timeseries_code_metrics_commits") }}
),

select
devs.developer_id,
calendar.to_artifact_id,
calendar.bucket_day,
calendar.event_source
from calendar
cross join devs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{{
config(
materialized='ephemeral'
)
}}

select
from_artifact_id as developer_id,
to_artifact_id,
event_source,
bucket_day,
CAST(SUM(amount) > 0 as int64) as commit_count
from {{ ref('int_events_daily_to_artifact') }}
where event_type = 'COMMIT_CODE'
group by
from_artifact_id,
to_artifact_id,
event_source,
bucket_day

0 comments on commit 1c5c192

Please sign in to comment.