Skip to content

Commit

Permalink
feat: implement superchain metrics as timeseries (#1911)
Browse files Browse the repository at this point in the history
* add: pre-macro metrics for gas fees and transactions

* add: preprocessing macro

* add: DAAs model

* add: MAAs model

* feat: add trusted user model

* add: trusted transactions

* fix: windowing on monthly metrics

* fix: address reference

* join artifacts_by_address on trusted_user_model

* fix: simplify user model joins

* fix: filter out events before first of the month

* refactor: add windowing macro

* refactor: add project join to macro

* make metrics ephemeral and unify into single model

* fix: linting error
  • Loading branch information
ccerv1 authored Aug 9, 2024
1 parent 4702c16 commit 6f65f41
Show file tree
Hide file tree
Showing 11 changed files with 763 additions and 0 deletions.
145 changes: 145 additions & 0 deletions warehouse/dbt/macros/models/timeseries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
{% macro to_sql_array(string_list) %}
{% set formatted_list = [] %}
{% for item in string_list %}
{% do formatted_list.append("'{}'".format(item)) %}
{% endfor %}
{{ return("(" ~ formatted_list | join(', ') ~ ")") }}
{% endmacro %}

{% macro timeseries_events(
event_sources,
event_types,
to_artifact_types,
from_artifact_types,
time_interval='DAY',
missing_dates='ignore',
event_model='int_events'
) %}

with filtered_events as (
select
TIMESTAMP_TRUNC(`time`, {{ time_interval }}) as sample_date,
from_artifact_id,
to_artifact_id,
event_source,
amount
from {{ ref(event_model) }}
where
event_type in {{ to_sql_array(event_types) }}
and event_source in {{ to_sql_array(event_sources) }}
and to_artifact_type in {{ to_sql_array(to_artifact_types) }}
and from_artifact_type in {{ to_sql_array(from_artifact_types) }}
and `time` < TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), {{ time_interval }})
),

grouped_events as (
select
sample_date,
from_artifact_id,
to_artifact_id,
event_source,
SUM(amount) as amount
from filtered_events
group by
sample_date,
from_artifact_id,
to_artifact_id,
event_source
),

{% if missing_dates == "fill_with_zero" %}

calendar as (
select distinct
TIMESTAMP_TRUNC(date_timestamp, {{ time_interval }}) as calendar_time
from {{ ref('stg_utility__calendar') }}
where TIMESTAMP(date_timestamp) <= CURRENT_TIMESTAMP()
),

first_event_times as (
select
to_artifact_id,
MIN(sample_date) as first_event_time
from grouped_events
group by to_artifact_id
),

timeseries_events as (
select
calendar.calendar_time as sample_date,
event_dates.from_artifact_id,
event_dates.to_artifact_id,
event_dates.event_source,
COALESCE(grouped_events.amount, 0) as amount
from
calendar
cross join (
select
from_artifact_id,
to_artifact_id,
event_source
from grouped_events
group by
from_artifact_id,
to_artifact_id,
event_source
) as event_dates
left join grouped_events
on calendar.calendar_time = grouped_events.sample_date
and event_dates.from_artifact_id = grouped_events.from_artifact_id
and event_dates.to_artifact_id = grouped_events.to_artifact_id
and event_dates.event_source = grouped_events.event_source
where
calendar.calendar_time >= (
select MIN(first_event_time)
from first_event_times
where first_event_times.to_artifact_id = event_dates.to_artifact_id
)
)

{% else %}

timeseries_events as (
select
sample_date,
from_artifact_id,
to_artifact_id,
event_source,
amount
from grouped_events
)

{% endif %}

select
timeseries_events.sample_date,
timeseries_events.from_artifact_id,
timeseries_events.to_artifact_id,
artifacts_by_project.project_id,
timeseries_events.event_source,
timeseries_events.amount
from timeseries_events
inner join {{ ref('artifacts_by_project_v1')}} as artifacts_by_project
on timeseries_events.to_artifact_id = artifacts_by_project.artifact_id

{% endmacro %}

{% macro window_events(
agg_events_table,
agg_func,
window_size,
entity_type
) %}

select
{{ entity_type }}_id,
sample_date,
event_source,
{{ agg_func }}(amount) over (
partition by {{entity_type}}_id, event_source
order by sample_date
rows between {{ window_size-1 }} preceding and current row
) as amount
from {{ agg_events_table }}

{% endmacro %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{% set models = [
'gas_fees_sum',
'transactions_sum',
'trusted_transactions_sum',
'trusted_daily_active_users_avg',
'trusted_monthly_active_users_avg',
'daily_active_addresses_avg',
'monthly_active_addresses_avg',
'trusted_users_onboarded_sum'
] %}

{% for model in models %}
select * from {{ ref('superchain_metric__%s_6_months' % model) }}
{% if not loop.last %}
union all
{% endif %}
{% endfor %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{#
config(
materialized='ephemeral',
)
#}

{% set metric = {
"metric_name": "daily_active_addresses_avg_6_months",
"metric_unit": "addresses",
"event_sources": [
"OPTIMISM",
"BASE",
"FRAX",
"METAL",
"MODE",
"ZORA"
],
"event_types": ["CONTRACT_INVOCATION_SUCCESS_DAILY_COUNT"],
"to_artifact_types": ["CONTRACT"],
"from_artifact_types": ["EOA"],
"window_interval": "DAY",
"window_size": 180,
"window_missing_dates": "fill_with_zero",
"sampling_interval": "daily"
} %}

with events as (
{{ timeseries_events(
metric.event_sources,
metric.event_types,
metric.to_artifact_types,
metric.from_artifact_types,
time_interval=metric.window_interval,
missing_dates=metric.window_missing_dates
) }}
),

agg_events as (
select
events.sample_date,
events.event_source,
events.project_id,
COUNT(distinct artifacts_by_address.artifact_name) as amount
from events
inner join {{ ref('int_artifacts_by_address') }} as artifacts_by_address
on events.from_artifact_id = artifacts_by_address.artifact_id
group by
events.sample_date,
events.event_source,
events.project_id
),

windowed_events as ({{
window_events('agg_events', 'AVG', metric.window_size, 'project')
}})

select
project_id,
sample_date,
event_source,
amount,
'{{ metric.metric_name }}' as metric,
'{{ metric.metric_unit }}' as unit
from windowed_events
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{#
config(
materialized='ephemeral',
)
#}

{% set metric = {
"metric_name": "gas_fees_sum_6_months",
"metric_unit": "L2 ETH",
"event_sources": [
"OPTIMISM",
"BASE",
"FRAX",
"METAL",
"MODE",
"ZORA"
],
"event_types": ["CONTRACT_INVOCATION_DAILY_L2_GAS_USED"],
"to_artifact_types": ["CONTRACT"],
"from_artifact_types": ["EOA", "SAFE"],
"window_interval": "DAY",
"window_size": 180,
"window_missing_dates": "ignore",
"sampling_interval": "daily"
} %}

with events as (
{{ timeseries_events(
metric.event_sources,
metric.event_types,
metric.to_artifact_types,
metric.from_artifact_types,
time_interval=metric.window_interval,
missing_dates=metric.window_missing_dates
) }}
),

agg_events as (
select
events.sample_date,
events.event_source,
events.project_id,
SUM(events.amount / 1e18) as amount
from events
group by
events.sample_date,
events.event_source,
events.project_id
),

windowed_events as ({{
window_events('agg_events', 'SUM', metric.window_size, 'project')
}})

select
project_id,
sample_date,
event_source,
amount,
'{{ metric.metric_name }}' as metric,
'{{ metric.metric_unit }}' as unit
from windowed_events
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{#
config(
materialized='ephemeral',
)
#}

{% set metric = {
"metric_name": "monthly_active_addresses_avg_6_months",
"metric_unit": "addresses",
"event_sources": [
"OPTIMISM",
"BASE",
"FRAX",
"METAL",
"MODE",
"ZORA"
],
"event_types": ["CONTRACT_INVOCATION_SUCCESS_DAILY_COUNT"],
"to_artifact_types": ["CONTRACT"],
"from_artifact_types": ["EOA"],
"window_interval": "MONTH",
"window_size": 6,
"window_missing_dates": "fill_with_zero",
"sampling_interval": "daily"
} %}

with events as (
{{ timeseries_events(
metric.event_sources,
metric.event_types,
metric.to_artifact_types,
metric.from_artifact_types,
time_interval=metric.window_interval,
missing_dates=metric.window_missing_dates
) }}
),

agg_events as (
select
events.sample_date,
events.event_source,
events.project_id,
COUNT(distinct artifacts_by_address.artifact_name) as amount
from events
inner join {{ ref('int_artifacts_by_address') }} as artifacts_by_address
on events.from_artifact_id = artifacts_by_address.artifact_id
group by
events.sample_date,
events.event_source,
events.project_id
),

windowed_events as ({{
window_events('agg_events', 'AVG', metric.window_size, 'project')
}})

select
project_id,
sample_date,
event_source,
amount,
'{{ metric.metric_name }}' as metric,
'{{ metric.metric_unit }}' as unit
from windowed_events
Loading

0 comments on commit 6f65f41

Please sign in to comment.