From 21f2044226bd192811e95f94582ccbebcf88cdf0 Mon Sep 17 00:00:00 2001 From: Reuven Gonzales Date: Thu, 12 Dec 2024 19:20:33 -0800 Subject: [PATCH] Deploy MCS on flux (#2631) * Fix deployment * Fix docker containers * reconnect to main --- .github/scripts/publish-docker-containers.sh | 12 +++- docker/images/000-oso-base/Dockerfile | 28 +++++++++ docker/images/000-oso-base/image_name | 1 + docker/images/debug/Dockerfile | 16 +++++ docker/images/debug/build.sh | 17 ++++++ .../images/{dagster-dask => oso}/Dockerfile | 26 +-------- .../metrics-calculation-service/Chart.yaml | 2 +- .../templates/app.yaml | 58 ++++++++++--------- .../templates/rbac.yaml | 4 +- .../templates/secrets.yaml | 8 +++ .../templates/service-account.yaml | 2 +- .../templates/service.yaml | 2 +- .../metrics-calculation-service/values.yaml | 5 +- ops/k8s-apps/base/dagster/dagster.yaml | 2 +- warehouse/metrics_tools/compute/constants.py | 2 +- 15 files changed, 125 insertions(+), 60 deletions(-) create mode 100644 docker/images/000-oso-base/Dockerfile create mode 100644 docker/images/000-oso-base/image_name create mode 100644 docker/images/debug/Dockerfile create mode 100644 docker/images/debug/build.sh rename docker/images/{dagster-dask => oso}/Dockerfile (51%) create mode 100644 ops/helm-charts/metrics-calculation-service/templates/secrets.yaml diff --git a/.github/scripts/publish-docker-containers.sh b/.github/scripts/publish-docker-containers.sh index ce989fe27..a8f725ed4 100644 --- a/.github/scripts/publish-docker-containers.sh +++ b/.github/scripts/publish-docker-containers.sh @@ -10,20 +10,28 @@ images_to_build="$(find ./docker/images/* -type f -name 'Dockerfile' -exec sh -c tag="$(git rev-parse HEAD)" for path in $images_to_build; do - image_name=$(basename $path) + # if directory has an image_name file use that for the image name + if [ -f "${path}/image_name" ]; then + image_name=$(cat "${path}/image_name") + else + image_name=$(basename "$path") + fi + image_dir_name=$(basename "$path") image_repo="ghcr.io/opensource-observer/${image_name}" sha_image="${image_repo}:${tag}" latest_image="${image_repo}:latest" + echo "Building ${image_name} plugin" docker build \ -t ${sha_image} \ -t ${latest_image} \ --label "org.opencontainers.image.source=https://github.com/opensource-observer/oso" \ --label "observer.opensource.oso.sha=${tag}" \ + --build-arg REPO_SHA=${tag} \ --build-arg IMAGE_NAME=${image_name} \ - -f docker/images/${image_name}/Dockerfile \ + -f docker/images/${image_dir_name}/Dockerfile \ . echo "Publishing the image to ${sha_image}" docker push "${sha_image}" diff --git a/docker/images/000-oso-base/Dockerfile b/docker/images/000-oso-base/Dockerfile new file mode 100644 index 000000000..eb8b29185 --- /dev/null +++ b/docker/images/000-oso-base/Dockerfile @@ -0,0 +1,28 @@ +FROM ghcr.io/opensource-observer/oso-public-vars:latest AS public_vars + + +FROM ubuntu:jammy + +ARG GCLOUD_VERSION=503.0.0 +ARG PYTHON_VERSION=3.12 +ARG PYTHON_PACKAGE=python3.12 + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y ${PYTHON_PACKAGE} && \ + apt-get install -y curl git && \ + curl -o get-pip.py https://bootstrap.pypa.io/get-pip.py && \ + python${PYTHON_VERSION} get-pip.py && \ + pip${PYTHON_VERSION} install poetry && \ + curl -o gcloud.tar.gz https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-${GCLOUD_VERSION}-linux-x86_64.tar.gz && \ + tar xvf gcloud.tar.gz && \ + bash ./google-cloud-sdk/install.sh && \ + rm gcloud.tar.gz && true +ENV PATH $PATH:/google-cloud-sdk/bin + +COPY --from=public_vars ./public/vars.env /public-vars.env + +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/docker/images/000-oso-base/image_name b/docker/images/000-oso-base/image_name new file mode 100644 index 000000000..d09bba09c --- /dev/null +++ b/docker/images/000-oso-base/image_name @@ -0,0 +1 @@ +oso-base \ No newline at end of file diff --git a/docker/images/debug/Dockerfile b/docker/images/debug/Dockerfile new file mode 100644 index 000000000..aa05f8a96 --- /dev/null +++ b/docker/images/debug/Dockerfile @@ -0,0 +1,16 @@ +ARG REPO_SHA=latest + +FROM ghcr.io/opensource-observer/oso-base:${REPO_SHA} + +ARG PYTHON_VERSION=3.12 + +RUN apt-get update \ + && apt-get install -y vim curl git htop postgresql-client && \ + curl -sL https://deb.nodesource.com/setup_20.x -o nodesource_setup.sh && \ + bash nodesource_setup.sh && \ + mkdir -p /usr/src/app && \ + apt-get install nodejs && \ + npm install -g pnpm + +# A docker image for debugging +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/docker/images/debug/build.sh b/docker/images/debug/build.sh new file mode 100644 index 000000000..7f8d44fed --- /dev/null +++ b/docker/images/debug/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -euxo pipefail + +mkdir -p /usr/src/app +cd /usr/src/app + +curl -sL https://deb.nodesource.com/setup_20.x -o nodesource_setup.sh +bash nodesource_setup.sh + +git clone https://github.com/opensource-observer/oso.git +cd oso + +poetry install +pnpm install + +while true; do sleep 300; done; \ No newline at end of file diff --git a/docker/images/dagster-dask/Dockerfile b/docker/images/oso/Dockerfile similarity index 51% rename from docker/images/dagster-dask/Dockerfile rename to docker/images/oso/Dockerfile index 23c1877e2..27dccbfce 100644 --- a/docker/images/dagster-dask/Dockerfile +++ b/docker/images/oso/Dockerfile @@ -1,27 +1,8 @@ -FROM ghcr.io/opensource-observer/oso-public-vars:latest AS public_vars +ARG REPO_SHA=latest +FROM ghcr.io/opensource-observer/oso-base:${REPO_SHA} -FROM ubuntu:jammy - -ARG GCLOUD_VERSION=478.0.0 ARG PYTHON_VERSION=3.12 -ARG PYTHON_PACKAGE=python3.12 - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && \ - apt-get install -y software-properties-common && \ - add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get install -y ${PYTHON_PACKAGE} && \ - apt-get install -y curl git && \ - curl -o get-pip.py https://bootstrap.pypa.io/get-pip.py && \ - python${PYTHON_VERSION} get-pip.py && \ - pip${PYTHON_VERSION} install poetry && \ - curl -o gcloud.tar.gz https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-${GCLOUD_VERSION}-linux-x86_64.tar.gz && \ - tar xvf gcloud.tar.gz && \ - bash ./google-cloud-sdk/install.sh && \ - rm gcloud.tar.gz && true -ENV PATH $PATH:/google-cloud-sdk/bin RUN mkdir -p /usr/src/app && \ bash -c "mkdir -p /usr/src/app/warehouse/{bq2cloudsql,oso_dagster,oso_lets_go,common}" && \ @@ -48,8 +29,7 @@ RUN poetry config virtualenvs.create false && \ ENV DAGSTER_DBT_TARGET_BASE_DIR=/dbt_targets -COPY --from=public_vars ./public/vars.env /usr/src/app/vars.env RUN mkdir -p ${DAGSTER_DBT_TARGET_BASE_DIR} && \ - python${PYTHON_VERSION} -m oso_dagster.compile --additional-vars /usr/src/app/vars.env + python${PYTHON_VERSION} -m oso_dagster.compile --additional-vars /public-vars.env ENTRYPOINT [] \ No newline at end of file diff --git a/ops/helm-charts/metrics-calculation-service/Chart.yaml b/ops/helm-charts/metrics-calculation-service/Chart.yaml index 8a703b448..e9e5ef32e 100644 --- a/ops/helm-charts/metrics-calculation-service/Chart.yaml +++ b/ops/helm-charts/metrics-calculation-service/Chart.yaml @@ -3,5 +3,5 @@ name: metrics-calculation-service description: The metrics calculation service type: application -version: 0.1.1 +version: 0.1.11 appVersion: 0.1.0 \ No newline at end of file diff --git a/ops/helm-charts/metrics-calculation-service/templates/app.yaml b/ops/helm-charts/metrics-calculation-service/templates/app.yaml index 4efb2dd3b..3599950e0 100644 --- a/ops/helm-charts/metrics-calculation-service/templates/app.yaml +++ b/ops/helm-charts/metrics-calculation-service/templates/app.yaml @@ -16,11 +16,11 @@ spec: labels: app: mcs spec: - serviceAccountName: {{ include "mcs.fullname" . }}-sa + serviceAccountName: {{ include "mcs.fullname" . }} containers: - name: metrics-calculation-service # TODO: Remove latest tag and use a specific version - image: {{ .Values.image.repo }}:{{ .Values.image.tag }} + image: {{ .Values.mcs.image.repo }}:{{ .Values.mcs.image.tag }} command: ["uvicorn"] args: - "metrics_tools.compute.server:app" @@ -38,53 +38,59 @@ spec: mountPath: /config env: - name: METRICS_CLUSTER_NAMESPACE - value: {{ .Values.mcs.cluster.namespace }} + value: "{{ .Values.mcs.cluster.namespace }}" - name: METRICS_CLUSTER_NAME - value: {{ .Values.mcs.cluster.name }} + value: "{{ .Values.mcs.cluster.name }}" - name: METRICS_CLUSTER_IMAGE_REPO - value: {{ .Values.mcs.cluster.image.repo }} + value: "{{ .Values.mcs.cluster.image.repo }}" - name: METRICS_CLUSTER_IMAGE_TAG - value: {{ .Values.mcs.cluster.image.tag }} + value: "{{ .Values.mcs.cluster.image.tag }}" - name: METRICS_SCHEDULER_MEMORY_LIMIT - value: {{ .Values.mcs.cluster.scheduler.memory.limit }} + value: "{{ .Values.mcs.cluster.scheduler.memory.limit }}" - name: METRICS_SCHEDULER_MEMORY_REQUEST - value: {{ .Values.mcs.cluster.scheduler.memory.request }} + value: "{{ .Values.mcs.cluster.scheduler.memory.request }}" - name: METRICS_CLUSTER_WORKER_THREADS - value: {{ .Values.mcs.cluster.worker.threads }} + value: "{{ .Values.mcs.cluster.worker.threads }}" - name: METRICS_WORKER_MEMORY_LIMIT - value: {{ .Values.mcs.cluster.worker.memory.limit }} + value: "{{ .Values.mcs.cluster.worker.memory.limit }}" - name: METRICS_WORKER_MEMORY_REQUEST - value: {{ .Values.mcs.cluster.worker.memory.request }} + value: "{{ .Values.mcs.cluster.worker.memory.request }}" - name: METRICS_WORKER_DUCKDB_PATH - value: {{ .Values.mcs.cluster.worker.duckdb_path }} + value: "{{ .Values.mcs.cluster.worker.duckdb_path }}" - name: METRICS_GCS_BUCKET - value: {{ .Values.mcs.gcs.bucket }} + value: "{{ .Values.mcs.gcs.bucket }}" - name: METRICS_GCS_KEY_ID - value: {{ .Values.mcs.gcs.key_id }} + valueFrom: + secretKeyRef: + name: "{{ include "mcs.fullname" . }}-secrets" + key: gcs_key_id - name: METRICS_GCS_SECRET - value: {{ .Values.mcs.gcs.secret }} + valueFrom: + secretKeyRef: + name: "{{ include "mcs.fullname" . }}-secrets" + key: gcs_secret - name: METRICS_GCS_RESULTS_PATH_PREFIX - value: {{ .Values.mcs.gcs.results_path_prefix }} + value: "{{ .Values.mcs.gcs.results_path_prefix }}" - name: METRICS_TRINO_HOST - value: {{ .Values.mcs.trino.host }} + value: "{{ .Values.mcs.trino.host }}" - name: METRICS_TRINO_PORT - value: {{ .Values.mcs.trino.port }} + value: "{{ .Values.mcs.trino.port }}" - name: METRICS_TRINO_USER - value: {{ .Values.mcs.trino.user }} + value: "{{ .Values.mcs.trino.user }}" - name: METRICS_TRINO_CATALOG - value: {{ .Values.mcs.trino.catalog }} + value: "{{ .Values.mcs.trino.catalog }}" - name: METRICS_HIVE_CATALOG - value: {{ .Values.mcs.hive.catalog }} + value: "{{ .Values.mcs.hive.catalog }}" - name: METRICS_HIVE_SCHEMA - value: {{ .Values.mcs.hive.schema }} + value: "{{ .Values.mcs.hive.schema }}" - name: METRICS_DEBUG_ALL - value: {{ .Values.mcs.debug.all }} + value: "{{ .Values.mcs.debug.all }}" - name: METRICS_DEBUG_CACHE - value: {{ .Values.mcs.debug.cache }} + value: "{{ .Values.mcs.debug.cache }}" - name: METRICS_DEBUG_CLUSTER - value: {{ .Values.mcs.debug.cluster }} + value: "{{ .Values.mcs.debug.cluster }}" - name: METRICS_DEBUG_CLUSTER_NO_SHUTDOWN - value: {{ .Values.mcs.debug.cluster_no_shutdown }} + value: "{{ .Values.mcs.debug.cluster_no_shutdown }}" volumes: - name: config configMap: diff --git a/ops/helm-charts/metrics-calculation-service/templates/rbac.yaml b/ops/helm-charts/metrics-calculation-service/templates/rbac.yaml index 93764cd3e..6e838ca32 100644 --- a/ops/helm-charts/metrics-calculation-service/templates/rbac.yaml +++ b/ops/helm-charts/metrics-calculation-service/templates/rbac.yaml @@ -2,7 +2,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: {{ include "mcs.fullname" . }}-role + name: {{ include "mcs.fullname" . }} rules: - apiGroups: [""] resources: ["pods", "services"] @@ -15,7 +15,7 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ include "mcs.fullname" . }}-role-binding + name: {{ include "mcs.fullname" . }} subjects: - kind: ServiceAccount name: {{ include "mcs.fullname" . }}-sa diff --git a/ops/helm-charts/metrics-calculation-service/templates/secrets.yaml b/ops/helm-charts/metrics-calculation-service/templates/secrets.yaml new file mode 100644 index 000000000..51511a9da --- /dev/null +++ b/ops/helm-charts/metrics-calculation-service/templates/secrets.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "mcs.fullname" . }}-secrets +type: Opaque +data: + gcs_key_id: {{ .Values.mcs.gcs.key_id | b64enc }} + gcs_secret: {{ .Values.mcs.gcs.secret | b64enc }} \ No newline at end of file diff --git a/ops/helm-charts/metrics-calculation-service/templates/service-account.yaml b/ops/helm-charts/metrics-calculation-service/templates/service-account.yaml index 505c7dafc..0c77a2dba 100644 --- a/ops/helm-charts/metrics-calculation-service/templates/service-account.yaml +++ b/ops/helm-charts/metrics-calculation-service/templates/service-account.yaml @@ -2,4 +2,4 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: {{ include "mcs.fullname" . }}-sa \ No newline at end of file + name: {{ include "mcs.fullname" . }} \ No newline at end of file diff --git a/ops/helm-charts/metrics-calculation-service/templates/service.yaml b/ops/helm-charts/metrics-calculation-service/templates/service.yaml index 4a9b28c38..cab124ed4 100644 --- a/ops/helm-charts/metrics-calculation-service/templates/service.yaml +++ b/ops/helm-charts/metrics-calculation-service/templates/service.yaml @@ -8,5 +8,5 @@ spec: {{ include "mcs.selectorLabels" . | nindent 4 }} ports: - protocol: TCP - port: {{ .Values.service.port }} + port: {{ .Values.mcs.service.port }} targetPort: 8000 \ No newline at end of file diff --git a/ops/helm-charts/metrics-calculation-service/values.yaml b/ops/helm-charts/metrics-calculation-service/values.yaml index 652979615..a0a10b501 100644 --- a/ops/helm-charts/metrics-calculation-service/values.yaml +++ b/ops/helm-charts/metrics-calculation-service/values.yaml @@ -1,6 +1,6 @@ +global: + fullnameOverride: "" mcs: - global: - fullnameOverride: "" service: port: 8000 logging: @@ -34,6 +34,7 @@ mcs: bucket: "oso-playground-dataset-transfer-bucket" key_id: "gcp:secretmanager:mcs-gcs-key-id/versions/latest" secret: "gcp:secretmanager:mcs-gcs-secret/versions/latest" + results_path_prefix: "mcs-results" trino: host: "trino" port: "8080" diff --git a/ops/k8s-apps/base/dagster/dagster.yaml b/ops/k8s-apps/base/dagster/dagster.yaml index 63936696d..8b08b92fe 100644 --- a/ops/k8s-apps/base/dagster/dagster.yaml +++ b/ops/k8s-apps/base/dagster/dagster.yaml @@ -105,7 +105,7 @@ spec: deployments: - name: "oso-dagster" image: - repository: "ghcr.io/opensource-observer/dagster-dask" + repository: "ghcr.io/opensource-observer/oso" tag: latest pullPolicy: Always envConfigMaps: diff --git a/warehouse/metrics_tools/compute/constants.py b/warehouse/metrics_tools/compute/constants.py index 38ed37385..66b6e7c2b 100644 --- a/warehouse/metrics_tools/compute/constants.py +++ b/warehouse/metrics_tools/compute/constants.py @@ -6,7 +6,7 @@ cluster_namespace = env.required_str("METRICS_CLUSTER_NAMESPACE") cluster_name = env.required_str("METRICS_CLUSTER_NAME") cluster_image_repo = env.required_str( - "METRICS_CLUSTER_IMAGE_REPO", "ghcr.io/opensource-observer/dagster-dask" + "METRICS_CLUSTER_IMAGE_REPO", "ghcr.io/opensource-observer/oso" ) cluster_image_tag = env.required_str("METRICS_CLUSTER_IMAGE_TAG") scheduler_memory_limit = env.required_str("METRICS_SCHEDULER_MEMORY_LIMIT", "90000Mi")