From c2ce4fdfcc14d89726665033440608bd1b84581e Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Mon, 5 Aug 2024 13:02:10 -0400 Subject: [PATCH] feat: add bot filter and trusted user models to production (#1913) * feat: apply bot filter to addresses on OP mainnet * refactor: first_time_addresses macro to include OP mainnet * chore: rename fields * feat: consolidate first_time_addresses model * feat: addresses_by_user model * chore: remove deprecated user models --- .../macros/models/first_time_addresses.sql | 11 +++-- .../dbt/macros/models/potential_bots.sql | 8 +-- .../directory/int_artifacts_by_user.sql | 35 ------------- ...t_onchain_metric__trusted_transactions.sql | 18 ------- .../users/int_addresses_by_user.sql | 49 +++++++++++++++++++ .../users/int_first_time_addresses.sql | 22 +++++++++ .../int_potential_bots.sql | 2 +- .../intermediate/users/int_user_addresses.sql | 18 ------- .../{directory => users}/int_users.sql | 0 .../stg_optimism__first_time_addresses.sql | 14 ++++++ .../optimism/stg_optimism__potential_bots.sql | 12 +++++ 11 files changed, 110 insertions(+), 79 deletions(-) delete mode 100644 warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql delete mode 100644 warehouse/dbt/models/intermediate/metrics/onchain/int_onchain_metric__trusted_transactions.sql create mode 100644 warehouse/dbt/models/intermediate/users/int_addresses_by_user.sql create mode 100644 warehouse/dbt/models/intermediate/users/int_first_time_addresses.sql rename warehouse/dbt/models/intermediate/{directory => users}/int_potential_bots.sql (96%) delete mode 100644 warehouse/dbt/models/intermediate/users/int_user_addresses.sql rename warehouse/dbt/models/intermediate/{directory => users}/int_users.sql (100%) create mode 100644 warehouse/dbt/models/staging/optimism/stg_optimism__first_time_addresses.sql create mode 100644 warehouse/dbt/models/staging/optimism/stg_optimism__potential_bots.sql diff --git a/warehouse/dbt/macros/models/first_time_addresses.sql b/warehouse/dbt/macros/models/first_time_addresses.sql index 3137c1052..9d736d721 100644 --- a/warehouse/dbt/macros/models/first_time_addresses.sql +++ b/warehouse/dbt/macros/models/first_time_addresses.sql @@ -1,4 +1,7 @@ {% macro first_time_addresses(network_name, block_timestamp_column="block_timestamp") %} + +{% set lower_network_name = network_name.lower() %} + -- todo: -- 1. add first funded by -- 2. most funded by @@ -15,15 +18,15 @@ select from ( select from_address as address - ,'{{ network_name }}' as chain_name + ,'{{ lower_network_name }}' as chain_name ,min(block_timestamp) as first_block_timestamp ,min(block_number) as first_block_number ,min_by(to_address, block_number) as first_tx_to - ,min_by(`hash`, block_number) as first_tx_hash + ,min_by(block_hash, block_number) as first_tx_hash ,min_by(substring(input, 1, 10), block_number) as first_method_id - from {{ oso_source(network_name, "transactions") }} + from {{ ref('int_%s_transactions' % lower_network_name) }} where - gas_price > 0 + receipt_effective_gas_price > 0 and receipt_status = 1 and receipt_gas_used > 0 {% if is_incremental() %} diff --git a/warehouse/dbt/macros/models/potential_bots.sql b/warehouse/dbt/macros/models/potential_bots.sql index f54b85ed7..c51514e3f 100644 --- a/warehouse/dbt/macros/models/potential_bots.sql +++ b/warehouse/dbt/macros/models/potential_bots.sql @@ -1,16 +1,18 @@ {% macro potential_bots(network_name) %} +{% set lower_network_name = network_name.lower() %} +{% set upper_network_name = network_name.upper() %} with sender_transfer_rates as ( select - '{{ network_name }}' as chain_name + '{{ lower_network_name }}' as chain_name ,date_trunc(block_timestamp, hour) as hr ,from_address as sender ,min(block_timestamp) as min_block_time ,max(block_timestamp) as max_block_time ,count(*) as hr_txs - from {{ oso_source(network_name, "transactions") }} + from {{ ref('int_%s_transactions' % lower_network_name) }} where - gas_price > 0 + receipt_effective_gas_price > 0 and receipt_gas_used > 0 group by 1, 2, 3 ) diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql deleted file mode 100644 index 1cbe14b83..000000000 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql +++ /dev/null @@ -1,35 +0,0 @@ -{# - Many to many relationship table for users and artifacts - Note: Currently this does not make any assumptions about - whether the artifact is an EOA address. -#} - - -with farcaster_users as ( - select - int_users.user_id, - int_users.user_source, - int_users.user_source_id, - int_users.display_name, - int_artifacts.artifact_id, - int_artifacts.artifact_source, - int_artifacts.artifact_namespace, - stg_farcaster__addresses.address as artifact_name - from {{ ref('int_users') }} - inner join {{ ref('stg_farcaster__addresses') }} - on int_users.user_source_id = stg_farcaster__addresses.fid - inner join {{ ref('int_artifacts') }} - on stg_farcaster__addresses.address = int_artifacts.artifact_name - where int_users.user_source = 'FARCASTER' -) - -select - user_id, - user_source, - user_source_id, - display_name, - artifact_id, - artifact_source, - artifact_namespace, - artifact_name -from farcaster_users diff --git a/warehouse/dbt/models/intermediate/metrics/onchain/int_onchain_metric__trusted_transactions.sql b/warehouse/dbt/models/intermediate/metrics/onchain/int_onchain_metric__trusted_transactions.sql deleted file mode 100644 index 08a6b713e..000000000 --- a/warehouse/dbt/models/intermediate/metrics/onchain/int_onchain_metric__trusted_transactions.sql +++ /dev/null @@ -1,18 +0,0 @@ -select - events.project_id, - events.event_source, - time_intervals.time_interval, - 'transaction_count' as metric, - SUM(events.amount) as amount -from {{ ref('int_events_daily_to_project') }} as events -cross join {{ ref('int_time_intervals') }} as time_intervals -inner join {{ ref('int_artifacts_by_user') }} as artifacts_by_user - on events.from_artifact_id = artifacts_by_user.artifact_id -where - events.event_type = 'CONTRACT_INVOCATION_SUCCESS_DAILY_COUNT' - and events.bucket_day >= time_intervals.start_date - and artifacts_by_user.user_id is not null -group by - events.project_id, - events.event_source, - time_intervals.time_interval diff --git a/warehouse/dbt/models/intermediate/users/int_addresses_by_user.sql b/warehouse/dbt/models/intermediate/users/int_addresses_by_user.sql new file mode 100644 index 000000000..65a47b76b --- /dev/null +++ b/warehouse/dbt/models/intermediate/users/int_addresses_by_user.sql @@ -0,0 +1,49 @@ +with first_time_addresses as ( + select + address, + MIN(first_block_timestamp) as first_block_timestamp + from {{ ref('int_first_time_addresses') }} + group by address +), + +potential_bots as ( + select distinct address + from {{ ref('int_potential_bots') }} +), + +farcaster_users as ( + select + stg_farcaster__addresses.address, + stg_farcaster__profiles.username as farcaster_username, + CAST(stg_farcaster__addresses.fid as int64) as farcaster_id + from {{ ref('stg_farcaster__addresses') }} + inner join {{ ref('stg_farcaster__profiles') }} + on stg_farcaster__addresses.fid = stg_farcaster__profiles.farcaster_id + where LEFT(stg_farcaster__addresses.address, 2) = '0x' +), + +lens_users as ( + select + stg_lens__owners.profile_id as lens_id, + stg_lens__profiles.full_name as lens_username, + LOWER(stg_lens__owners.owned_by) as address + from {{ ref('stg_lens__owners') }} + inner join {{ ref('stg_lens__profiles') }} + on stg_lens__owners.profile_id = stg_lens__profiles.lens_profile_id +) + +select + first_time_addresses.address, + first_time_addresses.first_block_timestamp, + farcaster_users.farcaster_id, + farcaster_users.farcaster_username, + lens_users.lens_id, + lens_users.lens_username, + COALESCE(potential_bots.address is not null, false) as potential_bot +from first_time_addresses +left join potential_bots + on first_time_addresses.address = potential_bots.address +left join farcaster_users + on first_time_addresses.address = farcaster_users.address +left join lens_users + on first_time_addresses.address = lens_users.address diff --git a/warehouse/dbt/models/intermediate/users/int_first_time_addresses.sql b/warehouse/dbt/models/intermediate/users/int_first_time_addresses.sql new file mode 100644 index 000000000..62b4bdf1e --- /dev/null +++ b/warehouse/dbt/models/intermediate/users/int_first_time_addresses.sql @@ -0,0 +1,22 @@ +{% set network_names = [ + 'base', + 'frax', + 'metal', + 'mode', + 'optimism', + 'zora' +] %} -- +{% if target.name == 'production' %} +{# This is a temporary measure for now to cut costs on the playground #} +{% set network_names = network_names + ['ethereum'] %} +{% endif %} + +{% for network_name in network_names %} + + select * from {{ ref('stg_%s__first_time_addresses' % network_name) }} + + {% if not loop.last %} + union all + {% endif %} + +{% endfor %} diff --git a/warehouse/dbt/models/intermediate/directory/int_potential_bots.sql b/warehouse/dbt/models/intermediate/users/int_potential_bots.sql similarity index 96% rename from warehouse/dbt/models/intermediate/directory/int_potential_bots.sql rename to warehouse/dbt/models/intermediate/users/int_potential_bots.sql index e8007d960..289ea8827 100644 --- a/warehouse/dbt/models/intermediate/directory/int_potential_bots.sql +++ b/warehouse/dbt/models/intermediate/users/int_potential_bots.sql @@ -3,7 +3,7 @@ 'frax', 'metal', 'mode', - 'pgn', + 'optimism', 'zora' ] %} -- {% if target.name == 'production' %} diff --git a/warehouse/dbt/models/intermediate/users/int_user_addresses.sql b/warehouse/dbt/models/intermediate/users/int_user_addresses.sql deleted file mode 100644 index 4796a72ee..000000000 --- a/warehouse/dbt/models/intermediate/users/int_user_addresses.sql +++ /dev/null @@ -1,18 +0,0 @@ -{# - This model is a WIP and is not yet ready for production use. -#} - -with user_data as ( - select - artifact_id, - MAX(rfm_recency) as r, - MAX(rfm_frequency) as f, - MAX(rfm_ecosystem) as e - from {{ ref('int_address_rfm_segments_by_project') }} - group by artifact_id -) - -select - artifact_id as user_id, - (r > 2 and f > 2 and e > 2) as is_trusted -from user_data diff --git a/warehouse/dbt/models/intermediate/directory/int_users.sql b/warehouse/dbt/models/intermediate/users/int_users.sql similarity index 100% rename from warehouse/dbt/models/intermediate/directory/int_users.sql rename to warehouse/dbt/models/intermediate/users/int_users.sql diff --git a/warehouse/dbt/models/staging/optimism/stg_optimism__first_time_addresses.sql b/warehouse/dbt/models/staging/optimism/stg_optimism__first_time_addresses.sql new file mode 100644 index 000000000..81fe8f781 --- /dev/null +++ b/warehouse/dbt/models/staging/optimism/stg_optimism__first_time_addresses.sql @@ -0,0 +1,14 @@ +{{ + config( + materialized='incremental', + partition_by={ + "field": "first_block_timestamp", + "data_type": "timestamp", + "granularity": "day", + }, + unique_id="address", + on_schema_change="append_new_columns", + incremental_strategy="insert_overwrite" + ) +}} +{{ first_time_addresses("optimism") }} diff --git a/warehouse/dbt/models/staging/optimism/stg_optimism__potential_bots.sql b/warehouse/dbt/models/staging/optimism/stg_optimism__potential_bots.sql new file mode 100644 index 000000000..edffb3b0b --- /dev/null +++ b/warehouse/dbt/models/staging/optimism/stg_optimism__potential_bots.sql @@ -0,0 +1,12 @@ +{{ config( + materialized='table', + partition_by={ + "field": "min_block_time", + "data_type": "timestamp", + "granularity": "day", + }, +) }} + +{{ + potential_bots("optimism") +}}