Skip to content

Commit

Permalink
Fix int_derived_contracts (#1690)
Browse files Browse the repository at this point in the history
* Fix and clean up int_derived_contracts

* implements small fixes

* fix linting error

* Fix lint errors

* Add cloudquery as separate pipeline

---------

Co-authored-by: Carl Cervone <[email protected]>
  • Loading branch information
ravenac95 and ccerv1 authored Jun 21, 2024
1 parent e984650 commit 4951ca4
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 31 deletions.
128 changes: 128 additions & 0 deletions .github/workflows/warehouse-run-data-pipeline-cloudquery.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
name: warehouse-run-data-pipeline-cloudquery
env:
X_GITHUB_GRAPHQL_API: ${{ vars.X_GITHUB_GRAPHQL_API }}
X_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CLOUDQUERY_VERSION: 5.5.0
CLOUDQUERY_FILE_DIRECTORY: /tmp/cloudquery
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CLOUDSQL_DB_USER: ${{ secrets.CLOUDSQL_DB_USER }}
CLOUDSQL_DB_PASSWORD: ${{ secrets.CLOUDSQL_DB_PASSWORD }}
CLOUDSQL_DB_NAME: ${{ vars.CLOUDSQL_DB_NAME }}
CLOUDSQL_REGION: ${{ vars.CLOUDSQL_REGION }}
CLOUDSQL_INSTANCE_ID: ${{ vars.CLOUDSQL_INSTANCE_ID }}
GOOGLE_PROJECT_ID: ${{ vars.GOOGLE_PROJECT_ID }}
CLOUDSTORAGE_BUCKET_NAME: ${{ vars.CLOUDSTORAGE_BUCKET_NAME }}
BIGQUERY_DATASET_ID: ${{ vars.BIGQUERY_DATASET_ID }}

# For now this only runs on a schedule once a day. Once we have made some of the
# plugin workflows more incremental we will run this on _every_ commit to main
on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
inputs:
docker_tag:
description: The docker tag to use for cloudquery plugins (only)

jobs:
warehouse-run-data-pipeline-cloudquery:
name: warehouse-run-data-pipeline-cloudquery
environment: indexer
runs-on: ubuntu-latest

permissions:
contents: 'read'
id-token: 'write'

env:
DOCKER_TAG: ${{ inputs.docker_tag != '' && inputs.docker_tag || github.sha }}

steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 1

- name: 'Login to GitHub Container Registry'
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: "Setup Python, Poetry and Dependencies"
uses: packetcoders/action-setup-cache-python-poetry@main
with:
python-version: 3.12
poetry-version: 1.8.2

- name: Run poetry install
run: |
poetry install
# At this time this auth isn't working for dbt
# - uses: 'google-github-actions/auth@v2'
# with:
# service_account: [email protected]
# workload_identity_provider: projects/1054148520225/locations/global/workloadIdentityPools/github/providers/oso-github-actions
# create_credentials_file: true
# access_token_lifetime: 3600s

- uses: 'google-github-actions/auth@v2'
with:
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS_JSON }}'
create_credentials_file: true

- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'

- name: Download and install cloudquery
if: ${{ inputs.skip_cloudquery_plugins != 'true' }}
run: |
curl -L https://github.com/cloudquery/cloudquery/releases/download/cli-v${CLOUDQUERY_VERSION}/cloudquery_linux_amd64 -o /tmp/cloudquery &&
chmod a+x /tmp/cloudquery &&
mv /tmp/cloudquery /usr/local/bin/cloudquery
# For now this is a bit of a hack for the oss-directory plugins as the output from one plugin is the input to
# another. Ideally we would simply tell whatever system to run and it will handle dependencies.
- name: Run cloudquery for oss-directory
if: ${{ inputs.skip_cloudquery_plugins != 'true' }}
run: |
cloudquery sync .github/workflows/cloudquery/oss-directory.yml --log-level debug --log-console
- name: Concat the project jsonl files (if there are many)
if: ${{ inputs.skip_cloudquery_plugins != 'true' }}
run: |
ls -laht ${CLOUDQUERY_FILE_DIRECTORY}/ &&
find ${CLOUDQUERY_FILE_DIRECTORY}/projects_ossd -name "*.json" -type f -exec cat {} \; > ${CLOUDQUERY_FILE_DIRECTORY}/projects.json &&
head -n 5 ${CLOUDQUERY_FILE_DIRECTORY}/projects.json
- uses: actions/upload-artifact@v4
if: ${{ inputs.skip_cloudquery_plugins != 'true' }}
with:
name: projects.json
path: ${{ env.CLOUDQUERY_FILE_DIRECTORY }}/projects.json

# This is currently a punt on how to run this properly because a previous cloudquery
# plugin's output can't be used as input into a different one
# We start the github-resolve-repos container with a volume that can access the project file and use
# Ideally we'd either have a plugin that can act as both a destination/source (so we can chain multiple plugins)
# Or potentially we use something else that can achieve a similar things
- name: Run cloudquery for github-resolve-directory
if: ${{ inputs.skip_cloudquery_plugins != 'true' }}
run: |
docker run -d --rm -p 7777:7777 \
-v ${CLOUDQUERY_FILE_DIRECTORY}:${CLOUDQUERY_FILE_DIRECTORY} \
--name github-resolve-repos \
ghcr.io/opensource-observer/cloudquery-github-resolve-repos:${DOCKER_TAG} \
serve --address 0.0.0.0:7777 &&
cloudquery sync .github/workflows/cloudquery/github-resolve-repos.yml --log-level debug --log-console &&
docker logs github-resolve-repos 2>&1 | tee ${CLOUDQUERY_FILE_DIRECTORY}/github-resolve-repos.log &&
docker stop github-resolve-repos
- uses: actions/upload-artifact@v4
if: ${{ inputs.skip_cloudquery_plugins != 'true' }}
with:
name: github-resolve-repos.log
path: ${{ env.CLOUDQUERY_FILE_DIRECTORY }}/github-resolve-repos.log
Original file line number Diff line number Diff line change
@@ -1,51 +1,73 @@
with factories_and_deployers as (
with contracts_deployed_no_factory as (
{#
This gets all of the contracts that weren't deployed with a factory
#}
select
factories.block_timestamp,
factories.transaction_hash,
factories.network,
factories.originating_address as deployer_address,
factories.contract_address as contract_address
from {{ ref("int_factories") }} as factories
inner join {{ ref("int_deployers") }} as deployers
on
factories.factory_address = deployers.contract_address
and factories.network = deployers.network
union all
select
block_timestamp,
transaction_hash,
network,
deployer_address,
contract_address
from {{ ref("int_deployers") }}
where contract_address is not null
),
contracts_deployed_via_factory as (
{#
This gets all of the contracts deployed by any factory.
Deployer Address is the EOA address that started the transaction
#}
select
network,
originating_address as deployer_address,
contract_address as contract_address
from {{ ref("int_factories") }}
where contract_address is not null
),
factories_and_proxies as (
contracts_deployed_by_safe_or_known_proxy as (
{#
This gets all of the contracts deployed by a safe or other known proxy
Deployer address is a proxy (safe or other known proxy) that deployed the contract
#}
select
factories.block_timestamp,
factories.transaction_hash,
factories.network,
proxies.address as deployer_address,
factories.contract_address as contract_address
from {{ ref("int_factories") }} as factories
inner join {{ ref("int_proxies") }} as proxies
on
factories.originating_address = proxies.address
factories.originating_contract = proxies.address
and factories.network = proxies.network
where contract_address is not null
),
derived_contracts as (
select
network,
deployer_address,
contract_address
from contracts_deployed_no_factory
union all
select
network,
deployer_address,
contract_address
from contracts_deployed_via_factory
union all
select
network,
deployer_address,
contract_address
from contracts_deployed_by_safe_or_known_proxy
)
select
block_timestamp,
transaction_hash,
network,
deployer_address,
contract_address
from factories_and_deployers
union all
select
block_timestamp,
transaction_hash,
select distinct
network,
deployer_address,
contract_address
from factories_and_proxies
from derived_contracts

0 comments on commit 4951ca4

Please sign in to comment.