diff --git a/.github/scripts/publish-docker-containers.sh b/.github/scripts/publish-docker-containers.sh index d1be002fd..ce989fe27 100644 --- a/.github/scripts/publish-docker-containers.sh +++ b/.github/scripts/publish-docker-containers.sh @@ -1,5 +1,5 @@ #!/bin/bash -set -euo pipefail +set -euxo pipefail SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" cd "${SCRIPT_DIR}/../../" @@ -22,8 +22,8 @@ for path in $images_to_build; do -t ${latest_image} \ --label "org.opencontainers.image.source=https://github.com/opensource-observer/oso" \ --label "observer.opensource.oso.sha=${tag}" \ - --build-arg IMAGE_NAME=${image_name} - -f docker/images/${IMAGE_NAME} \ + --build-arg IMAGE_NAME=${image_name} \ + -f docker/images/${image_name}/Dockerfile \ . echo "Publishing the image to ${sha_image}" docker push "${sha_image}" diff --git a/.github/workflows/warehouse-publish-docker-containers.yml b/.github/workflows/warehouse-publish-docker-containers.yml index dc836f8d4..b499e2672 100644 --- a/.github/workflows/warehouse-publish-docker-containers.yml +++ b/.github/workflows/warehouse-publish-docker-containers.yml @@ -35,6 +35,6 @@ jobs: - name: Package and publish cloudquery plugins run: bash .github/scripts/publish-cloudquery-plugins.sh - # - name: Package and publish other docker containers - # run: bash .github/scripts/publish-docker-containers-plugins.sh + - name: Package and publish other docker containers + run: bash .github/scripts/publish-docker-containers.sh \ No newline at end of file diff --git a/docker/images/dagster-dask/Dockerfile b/docker/images/dagster-dask/Dockerfile index ee363fa25..63c545081 100644 --- a/docker/images/dagster-dask/Dockerfile +++ b/docker/images/dagster-dask/Dockerfile @@ -11,27 +11,31 @@ RUN apt-get install -y curl && \ python3.12 get-pip.py RUN pip3.12 install poetry - -RUN mkdir -p /usr/bin/app && \ - bash -c "mkdir -p /usr/bin/app/warehouse/{bq2cloudsql,oso_dagster,oso_lets_go,common}" && \ - touch /usr/bin/app/warehouse/bq2cloudsql/__init__.py && \ - touch /usr/bin/app/warehouse/bq2cloudsql/script.py && \ - touch /usr/bin/app/warehouse/oso_dagster/__init__.py && \ - touch /usr/bin/app/warehouse/oso_lets_go/__init__.py && \ - touch /usr/bin/app/warehouse/oso_lets_go/wizard.py && \ - touch /usr/bin/app/warehouse/common/__init__.py - -WORKDIR /usr/bin/app -COPY pyproject.toml poetry.lock /usr/bin/app/ -COPY warehouse/cloudquery-example-plugin /usr/bin/app/warehouse/cloudquery-example-plugin +ENV DAGSTER_DBT_PARSE_PROJECT_ON_LOAD=1 +ENV DAGSTER_DBT_GENERATE_AND_AUTH_GCP=1 + +RUN mkdir -p /usr/src/app && \ + bash -c "mkdir -p /usr/src/app/warehouse/{bq2cloudsql,oso_dagster,oso_lets_go,common}" && \ + touch /usr/src/app/warehouse/bq2cloudsql/__init__.py && \ + touch /usr/src/app/warehouse/bq2cloudsql/script.py && \ + touch /usr/src/app/warehouse/oso_dagster/__init__.py && \ + touch /usr/src/app/warehouse/oso_lets_go/__init__.py && \ + touch /usr/src/app/warehouse/oso_lets_go/wizard.py && \ + touch /usr/src/app/warehouse/common/__init__.py + +WORKDIR /usr/src/app +COPY pyproject.toml poetry.lock /usr/src/app/ +COPY warehouse/cloudquery-example-plugin /usr/src/app/warehouse/cloudquery-example-plugin # Install everything onto the system path RUN poetry config virtualenvs.create false && \ poetry install -RUN rm -r /usr/bin/app/warehouse +RUN rm -r /usr/src/app/warehouse -COPY . /usr/bin/app +COPY . /usr/src/app RUN poetry config virtualenvs.create false && \ - poetry install \ No newline at end of file + poetry install + +ENTRYPOINT [ "dagster", "api", "grpc", "-p", "3030"] \ No newline at end of file diff --git a/ops/clusters/warehouse/flux-system/gotk-sync.yaml b/ops/clusters/warehouse/flux-system/gotk-sync.yaml index 90a97f81a..86aa70a30 100644 --- a/ops/clusters/warehouse/flux-system/gotk-sync.yaml +++ b/ops/clusters/warehouse/flux-system/gotk-sync.yaml @@ -6,9 +6,9 @@ metadata: name: flux-system namespace: flux-system spec: - interval: 1m + interval: 15s ref: - branch: main + branch: ravenac95/test-flux-config secretRef: name: flux-system url: https://github.com/opensource-observer/oso.git diff --git a/ops/k8s-apps/base/cloudsql-proxy/proxy.yaml b/ops/k8s-apps/base/cloudsql-proxy/proxy.yaml index 3e5370204..b45047719 100644 --- a/ops/k8s-apps/base/cloudsql-proxy/proxy.yaml +++ b/ops/k8s-apps/base/cloudsql-proxy/proxy.yaml @@ -39,4 +39,5 @@ spec: - instance: oso-playground-psql region: us-central1 project: opensource-observer - port: 5432 \ No newline at end of file + port: 5432 + \ No newline at end of file diff --git a/ops/k8s-apps/base/dagster/dagster.yaml b/ops/k8s-apps/base/dagster/dagster.yaml index 7c45742a6..f2df1571f 100644 --- a/ops/k8s-apps/base/dagster/dagster.yaml +++ b/ops/k8s-apps/base/dagster/dagster.yaml @@ -41,26 +41,32 @@ spec: k8sRunLauncher: envConfigMaps: - name: dagster-oso-extra-env - # runK8sConfig: - # podSpecConfig: - # tolerations: - # - key: pool_type - # effect: NoSchedule - # operator: Equal - # value: spot - # nodeSelector: - # pool_type: spot + runK8sConfig: + podSpecConfig: + tolerations: + - key: pool_type + effect: NoSchedule + operator: Equal + value: spot + nodeSelector: + pool_type: spot dagsterWebserver: + env: + - name: DAGSTER_DBT_GENERATE_AND_AUTH_GCP + value: "1" envConfigMaps: - name: dagster-oso-extra-env resources: limits: - cpu: 150m - memory: 128Mi + cpu: 500m + memory: 768Mi requests: - cpu: 100m - memory: 128Mi + cpu: 250m + memory: 512Mi dagsterDaemon: + env: + - name: DAGSTER_DBT_GENERATE_AND_AUTH_GCP + value: "1" envConfigMaps: - name: dagster-oso-extra-env resources: @@ -70,6 +76,23 @@ spec: requests: cpu: 400m memory: 256Mi + dagster-user-deployments: + deployments: + - name: "oso-dagster" + image: + repository: "ghcr.io/opensource-observer/dagster-dask" + tag: latest + pullPolicy: Always + env: + - name: DAGSTER_DBT_GENERATE_AND_AUTH_GCP + value: "1" + envConfigMaps: + - name: dagster-oso-extra-env + port: 3030 + dagsterApiGrpcArgs: + - "-m" + - "oso_dagster.definitions" + postRenderers: - kustomize: patches: diff --git a/ops/tf-modules/warehouse-cluster/main.tf b/ops/tf-modules/warehouse-cluster/main.tf index 4daa8fd2c..e806a2145 100644 --- a/ops/tf-modules/warehouse-cluster/main.tf +++ b/ops/tf-modules/warehouse-cluster/main.tf @@ -23,7 +23,7 @@ locals { auto_upgrade = true service_account = local.node_service_account_email preemptible = false - initial_node_count = 1 + initial_node_count = 0 }, # The spot pool is for workloads that need spot { @@ -201,3 +201,20 @@ module "gke" { node_pools_tags = local.node_pool_tags } + +# Dagster bucket. In the future it would make more sense that this is managed at +# the application level (e.g. some kubernetes operator) +resource "google_storage_bucket" "dagster" { + name = "${var.dagster_bucket_prefix}-dagster-bucket" + location = var.dagster_bucket_location + force_destroy = true + + uniform_bucket_level_access = true +} + +resource "google_storage_bucket_iam_member" "dagster_bucket_admin" { + for_each = toset(var.dagster_bucket_rw_principals) + bucket = google_storage_bucket.dagster.name + role = "roles/storage.admin" + member = each.key +} diff --git a/ops/tf-modules/warehouse-cluster/variables.tf b/ops/tf-modules/warehouse-cluster/variables.tf index 50546b42b..26dffada9 100644 --- a/ops/tf-modules/warehouse-cluster/variables.tf +++ b/ops/tf-modules/warehouse-cluster/variables.tf @@ -8,6 +8,21 @@ variable "cluster_name" { description = "Name for the gke cluster" } +variable "dagster_bucket_prefix" { + type = string +} + +variable "dagster_bucket_location" { + type = string + default = "US" +} + +variable "dagster_bucket_rw_principals" { + type = list(string) + description = "List of principals to give rw on our data transfer bucket" + default = [] +} + variable "default_node_pool_cluster_zones" { type = list(string) description = "The default node pool is intended to be standard vms (non-volatile). This should be a smaller set of cluster zones" diff --git a/ops/tf-modules/warehouse/main.tf b/ops/tf-modules/warehouse/main.tf index 6421479f9..75517b26b 100644 --- a/ops/tf-modules/warehouse/main.tf +++ b/ops/tf-modules/warehouse/main.tf @@ -162,6 +162,19 @@ module "warehouse_cloudsql" { ] } +resource "google_project_iam_member" "project" { + for_each = toset(var.additional_cloudsql_client_principals) + project = data.google_project.project.project_id + role = "roles/cloudsql.client" + member = each.value + + condition { + title = "only_${local.dataset_id}_db_client" + description = "Restrict access to a database instance: ${local.cloudsql_name}" + expression = "resource.name.startsWith(\"projects/${data.google_project.project.project_id}/instances/${local.cloudsql_name}\")" + } +} + ### # Add permissions for the cloudsql user to read from the bucket ### diff --git a/ops/tf-modules/warehouse/variables.tf b/ops/tf-modules/warehouse/variables.tf index 0152c9da4..67dd232fa 100644 --- a/ops/tf-modules/warehouse/variables.tf +++ b/ops/tf-modules/warehouse/variables.tf @@ -31,6 +31,12 @@ variable "cloudsql_name" { description = "CloudSQL instance name" } +variable "additional_cloudsql_client_principals" { + type = list(string) + description = "List of principals to give client access to the cloudsql instance" + default = [] +} + variable "bucket_rw_principals" { type = list(string) description = "List of principals to give rw on our data transfer bucket" diff --git a/poetry.lock b/poetry.lock index b97421246..2ed10186a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -763,13 +763,13 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "dagster" -version = "1.7.2" +version = "1.7.6" description = "Dagster is an orchestration platform for the development, production, and observation of data assets." optional = false python-versions = "<3.13,>=3.8" files = [ - {file = "dagster-1.7.2-py3-none-any.whl", hash = "sha256:14e58f9ff4aefc16957cff1ce9db2de936038f9ce96980acb243db60cbc8ba64"}, - {file = "dagster-1.7.2.tar.gz", hash = "sha256:f9ff01529cb8f7685b147c0adc4726568417afb59fc2e39c550ccf0d1c10b8f6"}, + {file = "dagster-1.7.6-py3-none-any.whl", hash = "sha256:051d37ccdf4e98b33691d80ec88e1ad71e1c0e1486215c82ce67d4ab9e4cfd2d"}, + {file = "dagster-1.7.6.tar.gz", hash = "sha256:1de28afd0e2e3908ff111a0a14b2fb9423e1c9700c3acc865728c5fcce86ee7e"}, ] [package.dependencies] @@ -777,8 +777,9 @@ alembic = ">=1.2.1,<1.6.3 || >1.6.3,<1.7.0 || >1.7.0,<1.11.0 || >1.11.0" click = ">=5.0" coloredlogs = ">=6.1,<=14.0" croniter = ">=0.3.34" -dagster-pipes = "1.7.2" +dagster-pipes = "1.7.6" docstring-parser = "*" +filelock = "*" grpcio = ">=1.44.0" grpcio-health-checking = ">=1.44.0" Jinja2 = "*" @@ -816,22 +817,22 @@ docker = ["docker"] mypy = ["mypy (==1.8.0)"] pyright = ["pandas-stubs", "pyright (==1.1.356)", "types-PyYAML", "types-backports", "types-certifi", "types-chardet", "types-croniter", "types-cryptography", "types-mock", "types-paramiko", "types-pkg-resources", "types-pyOpenSSL", "types-python-dateutil", "types-pytz", "types-requests", "types-simplejson", "types-six", "types-sqlalchemy (==1.4.53.34)", "types-tabulate", "types-toml", "types-tzlocal"] ruff = ["ruff (==0.3.4)"] -test = ["buildkite-test-collector", "docker", "grpcio-tools (>=1.44.0)", "mock (==3.0.5)", "morefs[asynclocal]", "mypy-protobuf", "objgraph", "pytest (>=7.0.1)", "pytest-cov (==2.10.1)", "pytest-mock (==3.3.1)", "pytest-rerunfailures (==10.0)", "pytest-xdist (==3.5.0)", "rapidfuzz", "responses (<=0.23.1)", "syrupy (>=4.0.0)", "tox (==3.25.0)"] +test = ["buildkite-test-collector", "docker", "fsspec (<2024.5.0)", "grpcio-tools (>=1.44.0)", "mock (==3.0.5)", "morefs[asynclocal]", "mypy-protobuf", "objgraph", "pytest (>=7.0.1)", "pytest-cov (==2.10.1)", "pytest-mock (==3.3.1)", "pytest-rerunfailures (==10.0)", "pytest-xdist (==3.5.0)", "rapidfuzz", "responses (<=0.23.1)", "syrupy (>=4.0.0)", "tox (==3.25.0)"] [[package]] name = "dagster-dbt" -version = "0.23.2" +version = "0.23.6" description = "A Dagster integration for dbt" optional = false python-versions = "<3.13,>=3.8" files = [ - {file = "dagster-dbt-0.23.2.tar.gz", hash = "sha256:c07483b04738eed4ddb7e908be1fd491fbaabd653ee00c0fd740cde84810010d"}, - {file = "dagster_dbt-0.23.2-py3-none-any.whl", hash = "sha256:c526efb8a07e779bde284cee6e26b92552acec2471c2ea1e2d88e1164d83a732"}, + {file = "dagster-dbt-0.23.6.tar.gz", hash = "sha256:daadedaa06b61111c05cfd36a2d70b24c4dabed95e8c8cf2edab6ddbff4c6378"}, + {file = "dagster_dbt-0.23.6-py3-none-any.whl", hash = "sha256:669a09346d117010e3d62181d9a2f05cfbe84d7b142903fe6032526fa99af5fd"}, ] [package.dependencies] -dagster = "1.7.2" -dbt-core = ">=1.5,<1.8" +dagster = "1.7.6" +dbt-core = ">=1.6,<1.8" Jinja2 = "*" networkx = "*" orjson = "*" @@ -846,18 +847,18 @@ test = ["dagster-duckdb", "dagster-duckdb-pandas", "dbt-duckdb"] [[package]] name = "dagster-gcp" -version = "0.23.2" +version = "0.23.6" description = "Package for GCP-specific Dagster framework op and resource components." optional = false python-versions = "<3.13,>=3.8" files = [ - {file = "dagster-gcp-0.23.2.tar.gz", hash = "sha256:bd138be64c14efa5eed2179a6164ad24bad40bae8b3b86d993f3e9e14e8f7fca"}, - {file = "dagster_gcp-0.23.2-py3-none-any.whl", hash = "sha256:96437cef295943b35f5140ffefdd424ba4f4080c3ad2299d4bddde70138bb8ab"}, + {file = "dagster-gcp-0.23.6.tar.gz", hash = "sha256:c62181c38292ada341fd68909a8a6a1992f320e8592a8930ec8b539e35cb5253"}, + {file = "dagster_gcp-0.23.6-py3-none-any.whl", hash = "sha256:22588b8a52953f048faf9992968808c64f852dbba05b35dc4d78bae0c444346e"}, ] [package.dependencies] -dagster = "1.7.2" -dagster-pandas = "0.23.2" +dagster = "1.7.6" +dagster-pandas = "0.23.6" db-dtypes = "*" google-api-python-client = "*" google-cloud-bigquery = "*" @@ -869,17 +870,17 @@ pyarrow = ["pyarrow"] [[package]] name = "dagster-graphql" -version = "1.7.2" +version = "1.7.6" description = "The GraphQL frontend to python dagster." optional = false python-versions = "<3.13,>=3.8" files = [ - {file = "dagster-graphql-1.7.2.tar.gz", hash = "sha256:174fc39f9dcd75354fe89d93eefca5d6864cd1a0342e52fad0ec041209f6bdcd"}, - {file = "dagster_graphql-1.7.2-py3-none-any.whl", hash = "sha256:f431a9f6b7f3c6543c0eb45b3ee3e65a2f046a4e5c68965b7a6133d4bc215fef"}, + {file = "dagster-graphql-1.7.6.tar.gz", hash = "sha256:e2d17dffa4baf3216ef2e66a92c5c52dd64eeac1f34bbb38277dbe54fee0eb51"}, + {file = "dagster_graphql-1.7.6-py3-none-any.whl", hash = "sha256:874398d5e26a3406df121613ae644026d84ff30a4a9f2fb980c34cd7fbb1d961"}, ] [package.dependencies] -dagster = "1.7.2" +dagster = "1.7.6" gql = {version = ">=3,<4", extras = ["requests"]} graphene = ">=3,<4" requests = "*" @@ -887,45 +888,60 @@ starlette = "*" [[package]] name = "dagster-pandas" -version = "0.23.2" +version = "0.23.6" description = "Utilities and examples for working with pandas and dagster, an opinionated framework for expressing data pipelines" optional = false python-versions = "<3.13,>=3.8" files = [ - {file = "dagster-pandas-0.23.2.tar.gz", hash = "sha256:cb2510103d1dbf74183fadd1878eba9a5a3968bcccffc27eae6a2862bd53464a"}, - {file = "dagster_pandas-0.23.2-py3-none-any.whl", hash = "sha256:e717173fdc37dc8b6c7adf4cf1e16abc3ee5137916eed8017aac2aace78ee265"}, + {file = "dagster-pandas-0.23.6.tar.gz", hash = "sha256:8385073e9d26a1858c965765ee722615291ee6bed1fe98ececf212af962c4158"}, + {file = "dagster_pandas-0.23.6-py3-none-any.whl", hash = "sha256:4ee6ad1133d3cbd812acdb0e97b1ab56ed321c7024f4a6bcc320e150d563da45"}, ] [package.dependencies] -dagster = "1.7.2" +dagster = "1.7.6" pandas = "*" [[package]] name = "dagster-pipes" -version = "1.7.2" +version = "1.7.6" description = "Toolkit for Dagster integrations with transform logic outside of Dagster" optional = false python-versions = "<3.13,>=3.8" files = [ - {file = "dagster-pipes-1.7.2.tar.gz", hash = "sha256:31fc3b833419389f4eeca85e91a0f27dec454cd822a4ffd6eb9cd3cf0e808e3b"}, - {file = "dagster_pipes-1.7.2-py3-none-any.whl", hash = "sha256:6393a21bcd61e602d3839dfbf9a7666b5928155d44168f87874f30d0733ac181"}, + {file = "dagster-pipes-1.7.6.tar.gz", hash = "sha256:25d05d95a6f6e79c0a932ce180c5891faf453f7299b4ff411e589b461dd3c315"}, + {file = "dagster_pipes-1.7.6-py3-none-any.whl", hash = "sha256:32bbff40d83727239727b98286ba989e3df1a478d571037c99f1f8e6683a039f"}, ] +[[package]] +name = "dagster-postgres" +version = "0.23.6" +description = "A Dagster integration for postgres" +optional = false +python-versions = "<3.13,>=3.8" +files = [ + {file = "dagster-postgres-0.23.6.tar.gz", hash = "sha256:c4d9a6fb764a8802955920554b630340d97ed33229f9d90e9185e6ddbe50355a"}, + {file = "dagster_postgres-0.23.6-py3-none-any.whl", hash = "sha256:94bab6247eee10ce3f6084e2a2928db48739c10c37cfeb3accba0ab18c15080b"}, +] + +[package.dependencies] +dagster = "1.7.6" +psycopg2-binary = "*" + [[package]] name = "dagster-webserver" -version = "1.7.2" +version = "1.7.6" description = "Web UI for dagster." optional = false python-versions = "<3.13,>=3.8" files = [ - {file = "dagster-webserver-1.7.2.tar.gz", hash = "sha256:c04ea9cebf72990e89a4f8f48f8c651c993fd940e539fb9d194a4bc2cbaefe17"}, - {file = "dagster_webserver-1.7.2-py3-none-any.whl", hash = "sha256:cf5442818a6c1729d2289d58a33ebcb6d598fc700c8044ecd466ae20871ce1d0"}, + {file = "dagster-webserver-1.7.6.tar.gz", hash = "sha256:dcbfe2a955d2ece9bbc2e62cd9aa271a028df465b36dac41354a6211deb207af"}, + {file = "dagster_webserver-1.7.6-py3-none-any.whl", hash = "sha256:7f44a39ad34308928b25b0ac5c4d98590ba21a8ae49dab4c0b8156f475e9b955"}, ] [package.dependencies] click = ">=7.0,<9.0" -dagster = "1.7.2" -dagster-graphql = "1.7.2" +dagster = "1.7.6" +dagster-graphql = "1.7.6" starlette = "!=0.36.0" uvicorn = {version = "*", extras = ["standard"]} @@ -1272,6 +1288,22 @@ files = [ [package.extras] tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] +[[package]] +name = "filelock" +version = "3.14.0" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"}, + {file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] +typing = ["typing-extensions (>=4.8)"] + [[package]] name = "frozenlist" version = "1.4.1" @@ -3566,6 +3598,87 @@ files = [ [package.extras] test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] +[[package]] +name = "psycopg2-binary" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75723c3c0fbbf34350b46a3199eb50638ab22a0228f93fb472ef4d9becc2382b"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83791a65b51ad6ee6cf0845634859d69a038ea9b03d7b26e703f94c7e93dbcf9"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0ef4854e82c09e84cc63084a9e4ccd6d9b154f1dbdd283efb92ecd0b5e2b8c84"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed1184ab8f113e8d660ce49a56390ca181f2981066acc27cf637d5c1e10ce46e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d2997c458c690ec2bc6b0b7ecbafd02b029b7b4283078d3b32a852a7ce3ddd98"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b58b4710c7f4161b5e9dcbe73bb7c62d65670a87df7bcce9e1faaad43e715245"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0c009475ee389757e6e34611d75f6e4f05f0cf5ebb76c6037508318e1a1e0d7e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8dbf6d1bc73f1d04ec1734bae3b4fb0ee3cb2a493d35ede9badbeb901fb40f6f"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win32.whl", hash = "sha256:3f78fd71c4f43a13d342be74ebbc0666fe1f555b8837eb113cb7416856c79682"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:876801744b0dee379e4e3c38b76fc89f88834bb15bf92ee07d94acd06ec890a0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ee825e70b1a209475622f7f7b776785bd68f34af6e7a46e2e42f27b659b5bc26"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1ea665f8ce695bcc37a90ee52de7a7980be5161375d42a0b6c6abedbf0d81f0f"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:143072318f793f53819048fdfe30c321890af0c3ec7cb1dfc9cc87aa88241de2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c332c8d69fb64979ebf76613c66b985414927a40f8defa16cf1bc028b7b0a7b0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977646e05232579d2e7b9c59e21dbe5261f403a88417f6a6512e70d3f8a046be"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b6356793b84728d9d50ead16ab43c187673831e9d4019013f1402c41b1db9b27"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bc7bb56d04601d443f24094e9e31ae6deec9ccb23581f75343feebaf30423359"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:77853062a2c45be16fd6b8d6de2a99278ee1d985a7bd8b103e97e41c034006d2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:78151aa3ec21dccd5cdef6c74c3e73386dcdfaf19bced944169697d7ac7482fc"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8359bf4791968c5a78c56103702000105501adb557f3cf772b2c207284273984"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:275ff571376626195ab95a746e6a04c7df8ea34638b99fc11160de91f2fef503"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:420f9bbf47a02616e8554e825208cb947969451978dceb77f95ad09c37791dae"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4154ad09dac630a0f13f37b583eae260c6aa885d67dfbccb5b02c33f31a6d420"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a148c5d507bb9b4f2030a2025c545fccb0e1ef317393eaba42e7eabd28eb6041"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:68fc1f1ba168724771e38bee37d940d2865cb0f562380a1fb1ffb428b75cb692"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:281309265596e388ef483250db3640e5f414168c5a67e9c665cafce9492eda2f"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:60989127da422b74a04345096c10d416c2b41bd7bf2a380eb541059e4e999980"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:246b123cc54bb5361588acc54218c8c9fb73068bf227a4a531d8ed56fa3ca7d6"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34eccd14566f8fe14b2b95bb13b11572f7c7d5c36da61caf414d23b91fcc5d94"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18d0ef97766055fec15b5de2c06dd8e7654705ce3e5e5eed3b6651a1d2a9a152"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3f82c171b4ccd83bbaf35aa05e44e690113bd4f3b7b6cc54d2219b132f3ae55"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead20f7913a9c1e894aebe47cccf9dc834e1618b7aa96155d2091a626e59c972"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ca49a8119c6cbd77375ae303b0cfd8c11f011abbbd64601167ecca18a87e7cdd"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:323ba25b92454adb36fa425dc5cf6f8f19f78948cbad2e7bc6cdf7b0d7982e59"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:1236ed0952fbd919c100bc839eaa4a39ebc397ed1c08a97fc45fee2a595aa1b3"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:729177eaf0aefca0994ce4cffe96ad3c75e377c7b6f4efa59ebf003b6d398716"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win32.whl", hash = "sha256:804d99b24ad523a1fe18cc707bf741670332f7c7412e9d49cb5eab67e886b9b5"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:a6cdcc3ede532f4a4b96000b6362099591ab4a3e913d70bcbac2b56c872446f7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:72dffbd8b4194858d0941062a9766f8297e8868e1dd07a7b36212aaa90f49472"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:30dcc86377618a4c8f3b72418df92e77be4254d8f89f14b8e8f57d6d43603c0f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31a34c508c003a4347d389a9e6fcc2307cc2150eb516462a7a17512130de109e"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15208be1c50b99203fe88d15695f22a5bed95ab3f84354c494bcb1d08557df67"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1873aade94b74715be2246321c8650cabf5a0d098a95bab81145ffffa4c13876"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a58c98a7e9c021f357348867f537017057c2ed7f77337fd914d0bedb35dace7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4686818798f9194d03c9129a4d9a702d9e113a89cb03bffe08c6cf799e053291"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ebdc36bea43063116f0486869652cb2ed7032dbc59fbcb4445c4862b5c1ecf7f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:ca08decd2697fdea0aea364b370b1249d47336aec935f87b8bbfd7da5b2ee9c1"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ac05fb791acf5e1a3e39402641827780fe44d27e72567a000412c648a85ba860"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win32.whl", hash = "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957"}, +] + [[package]] name = "ptyprocess" version = "0.7.0" @@ -5545,4 +5658,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.11,<3.13" -content-hash = "8b50861b8b8d81dac306adfe82badc04b3098ac1cb54a944060f569c3bd672bc" +content-hash = "771a8960fe65329ff361dc66dd8535df2cfdbd57a9925ee8d3c9bc0caa2017ba" diff --git a/pyproject.toml b/pyproject.toml index 9ffffb00c..52970f57f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ dask = { extras = ["distributed"], version = "^2024.4.2" } lz4 = "^4.3.3" arrow = "^1.3.0" polars = "^0.20.23" +requests = "^2.31.0" +dagster-postgres = "^0.23.6" [tool.poetry.scripts] diff --git a/warehouse/oso_dagster/constants.py b/warehouse/oso_dagster/constants.py index ba3dc3c9b..0f30a8f57 100644 --- a/warehouse/oso_dagster/constants.py +++ b/warehouse/oso_dagster/constants.py @@ -1,9 +1,9 @@ import os from pathlib import Path +import requests from dagster_dbt import DbtCliResource - main_dbt_project_dir = Path(__file__).joinpath("..", "..", "..").resolve() main_dbt = DbtCliResource(project_dir=os.fspath(main_dbt_project_dir)) @@ -11,9 +11,77 @@ # source_dbt_project_dir = Path(__file__).joinpath("..", "..", "source_dbt").resolve() # source_dbt = DbtCliResource(project_dir=os.fspath(source_dbt_project_dir)) +generated_profiles_yml = """ +opensource_observer: + target: production + outputs: + production: + type: bigquery + dataset: oso + job_execution_time_seconds: 300 + job_retries: 1 + location: US + method: service-account + keyfile: %(service_account_path)s + project: %(project_id)s + threads: 32 + base_playground: + type: bigquery + dataset: oso_base_playground + job_execution_time_seconds: 300 + job_retries: 1 + location: US + method: service-account + keyfile: %(service_account_path)s + project: %(project_id)s + threads: 32 + playground: + type: bigquery + dataset: oso_playground + job_execution_time_seconds: 300 + job_retries: 1 + location: US + method: service-account + keyfile: %(service_account_path)s + project: %(project_id)s + threads: 32 +""" + + +def generate_profile_and_auth(): + profiles_path = os.path.expanduser("~/.dbt/profiles.yml") + Path(os.path.dirname(profiles_path)).mkdir(parents=True, exist_ok=True) + + service_account_path = os.path.expanduser("~/service-account.json") + Path(os.path.dirname(service_account_path)).mkdir(parents=True, exist_ok=True) + + print(f"writing dbt profile to {profiles_path}") + + token_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" + r = requests.get( + token_url, allow_redirects=True, headers={"Metadata-Flavor": "Google"} + ) + open(service_account_path, "wb").write(r.content) + project_id_url = ( + "http://metadata.google.internal/computeMetadata/v1/project/project-id" + ) + project_id = requests.get( + project_id_url, allow_redirects=True, headers={"Metadata-Flavor": "Google"} + ).content + with open(profiles_path, "w") as f: + f.write( + generated_profiles_yml + % dict(service_account_path=service_account_path, project_id=project_id) + ) + + # If DAGSTER_DBT_PARSE_PROJECT_ON_LOAD is set, a manifest will be created at run time. # Otherwise, we expect a manifest to be present in the project's target directory. -if os.getenv("DAGSTER_DBT_PARSE_PROJECT_ON_LOAD"): +if os.getenv("DAGSTER_DBT_PARSE_PROJECT_ON_LOAD") or os.getenv( + "DAGSTER_DBT_GENERATE_AND_AUTH_GCP" +): + if os.getenv("DAGSTER_DBT_GENERATE_AND_AUTH_GCP"): + generate_profile_and_auth() main_dbt_manifest_path = ( main_dbt.cli( ["--quiet", "parse"], diff --git a/warehouse/oso_dagster/goldsky.py b/warehouse/oso_dagster/goldsky.py index f8eceb5a8..3e4a27e97 100644 --- a/warehouse/oso_dagster/goldsky.py +++ b/warehouse/oso_dagster/goldsky.py @@ -670,7 +670,7 @@ async def materialize( await self.merge_worker_tables(context, workers) - await self.clean_working_destintation(context, workers) + await self.clean_working_destination(context, workers) def load_schema(self, queues: GoldskyQueues): item = queues.peek() @@ -869,7 +869,7 @@ async def merge_worker_tables( timeout=self.config.transform_timeout_seconds, ) - async def clean_working_destintation( + async def clean_working_destination( self, context: AssetExecutionContext, workers: List[GoldskyWorker] ): # For now we just need to be careful not to run this in multiple processes