Skip to content

Commit

Permalink
Creates a generic meltano pipeline to bigquery (#1250)
Browse files Browse the repository at this point in the history
* update meltano path

* fix

* wip mapping

* add additional user to psql db

* fix mapping

* start cloudsql proxy manually

* fix

* fix

* fix

* fix

* fix

* fix

* ignore clean up errors

* more fixes

* fix

* fix

* fix

* fix
  • Loading branch information
ravenac95 authored Apr 19, 2024
1 parent 6a70e87 commit 7f0c96d
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 10 deletions.
46 changes: 41 additions & 5 deletions .github/workflows/meltano/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,62 @@ inputs:
meltano_path:
description: The path to meltano
required: true
meltano_state_backend_uri:
description: The state backend uri for meltano
required: true
google_credentials_path:
description: The path to the application secrets
required: true
db_instance_id:
description: DB instance Id
required: true
db_user:
description: DB password
required: true
db_name:
description: DB password
required: true
db_password:
description: DB password
required: true

runs:
using: "composite"
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10.13'
python-version: '3.11.9'

- uses: snok/install-poetry@v1
with:
version: '1.7.1'

- name: Runner temp
shell: bash
run: |
mkdir -p ${{ runner.temp }}/meltano-tmp
- name: Run cloudsql proxy
shell: bash
run: |
curl -o cloud-sql-proxy https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.11.0/cloud-sql-proxy.linux.amd64 &&
chmod +x cloud-sql-proxy
./cloud-sql-proxy ${{ inputs.db_instance_id }} &
- name: Install poetry deps for meltano
shell: bash
run: |
cd ${{ inputs.meltano_path }} && poetry install && meltano install
cd ${{ inputs.meltano_path }} && poetry install --no-root && poetry run meltano install
- name: Run tap:${{ inputs.tap }} into target:${{ inputs.target }}
shell: bash
# This is for testing for now
run: |
env
env &&
cd ${{ inputs.meltano_path }} &&
poetry run meltano run ${{ inputs.tap }} ${{ inputs.target }}
env:
MELTANO_DATABASE_URI: postgresql+psycopg://${{ inputs.db_user }}:${{ inputs.db_password }}@127.0.0.1:5432/${{ inputs.db_name }}
MELTANO_STATE_BACKEND_URI: ${{ inputs.meltano_state_backend_uri }}
TMPDIR: ${{ runner.temp }}/meltano-tmp

11 changes: 8 additions & 3 deletions .github/workflows/warehouse-meltano-ecosystems-ost.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ env:
TAP_ECOSYSTEMS_OST_AIRBYTE_CONFIG_DATABASE: ${{ secrets.TAP_ECOSYSTEMS_OST_AIRBYTE_CONFIG_DATABASE }}
TAP_ECOSYSTEMS_OST_AIRBYTE_CONFIG_PORT: ${{ secrets.TAP_ECOSYSTEMS_OST_AIRBYTE_CONFIG_PORT }}
TAP_ECOSYSTEMS_OST_AIRBYTE_CONFIG_USERNAME: ${{ secrets.TAP_ECOSYSTEMS_OST_AIRBYTE_CONFIG_USERNAME }}
MELTANO_STATE_BACKEND_URI: ${{ secrets.MELTANO_STATE_BACKEND_URI }}
TARGET_BIGQUERY_PROJECT: ${{ vars.TARGET_BIGQUERY_PROJECT }}

on:
workflow_dispatch:
Expand Down Expand Up @@ -39,8 +39,13 @@ jobs:
with:
tap: tap-ecosystems-ost
target: target-bigquery
meltano_path: warehouse/meltano-setup
db_user: ${{ secrets.METADATA_DB_USER }}
db_name: ${{ secrets.METADATA_DB_NAME }}
db_password: ${{ secrets.METADATA_DB_PASSWORD }}
db_instance_id: ${{ secrets.METADATA_DB_INSTANCE_ID }}
meltano_state_backend_uri: ${{ secrets.MELTANO_STATE_BACKEND_URI }}
env:
TARGET_BIGQUERY_CREDENTIALS_PATH: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
TARGET_BIGQUERY_BUCKET: oso-dataset-transfer-bucket
TARGET_BIGQUERY_PROJECT: opensource-observer
TARGET_BIGQUERY_DATASET: ecosystems-ost
TARGET_BIGQUERY_DATASET: ecosystems_ost
25 changes: 25 additions & 0 deletions ops/tf-modules/warehouse/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ locals {
readonly_service_account_name = "${var.name}-readonly"
cloudsql_name = "${var.name}-psql"
cloudsql_db_user = "${var.name}-admin"
metadata_db_name = "${var.name}-metadata"
metadata_db_user = "${var.name}-metadata-admin"
dataset_id = replace(var.name, "-", "_")
raw_dataset_id = replace("${var.name}_raw_sources", "-", "_")
}
Expand Down Expand Up @@ -108,6 +110,11 @@ resource "google_storage_bucket" "dataset_transfer" {
uniform_bucket_level_access = true
}

resource "random_password" "metadata_user_temp_password" {
length = 24
special = true
}

###
# CloudSQL instance
###
Expand All @@ -125,6 +132,24 @@ module "warehouse_cloudsql" {
dw_name = var.name
}
ip_configuration = var.cloudsql_ip_configuration

additional_databases = [
{
name = local.metadata_db_name
charset = "UTF8"
collation = "en_US.UTF8"
},
]

# At the moment this user needs to be manual configured with permissions to
# the metadata database
additional_users = [
{
name = local.metadata_db_user,
password = random_password.metadata_user_temp_password.result
random_password = false
}
]
}

###
Expand Down
4 changes: 4 additions & 0 deletions ops/tf-modules/warehouse/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
output "dataset_id" {
value = google_bigquery_dataset.dataset.id
}

output "metadata_user_temp_password" {
value = random_password.metadata_user_temp_password.result
}
5 changes: 3 additions & 2 deletions warehouse/meltano-setup/meltano.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ plugins:
pip_url: git+https://github.com/opensource-observer/tap-airbyte-wrapper.git
config:
nullable_generated_fields:
- "*._ab_cdc_deleted_at"
- '*._ab_cdc_deleted_at'
airbyte_spec:
image: airbyte/source-postgres
tag: 3.3.26
Expand All @@ -31,6 +31,7 @@ plugins:
publication: oso_publication
replication_slot: oso_slot
initial_waiting_seconds: 10
force_docker_as_current_user: true
select:
- projects.*
- issues.*
Expand All @@ -43,7 +44,7 @@ plugins:
pip_url: git+https://github.com/opensource-observer/tap-airbyte-wrapper.git
config:
nullable_generated_fields:
- "*._ab_cdc_deleted_at"
- '*._ab_cdc_deleted_at'
airbyte_spec:
image: airbyte/source-postgres
tag: 3.3.26
Expand Down

0 comments on commit 7f0c96d

Please sign in to comment.