Skip to content

Commit

Permalink
chore: merge dev
Browse files Browse the repository at this point in the history
  • Loading branch information
d0choa committed Sep 24, 2024
2 parents eaf7089 + df45a6c commit 0a42029
Show file tree
Hide file tree
Showing 51 changed files with 2,342 additions and 1,483 deletions.
32 changes: 30 additions & 2 deletions .github/workflows/artifact.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ name: Build and Push to Artifact Registry

"on":
push:
branches: ["dev"]
tags: ["v*"]
branches: ["*"]
tags: ["*"]

env:
PROJECT_ID: open-targets-genetics-dev
REGION: europe-west1
GAR_LOCATION: europe-west1-docker.pkg.dev/open-targets-genetics-dev
REPOSITORY: gentropy-app
PYTHON_VERSION_DEFAULT: "3.10.8"

jobs:
build-push-artifact:
Expand Down Expand Up @@ -39,7 +40,18 @@ jobs:
run: |-
gcloud auth configure-docker ${{ env.REGION }}-docker.pkg.dev --quiet
- name: Quick Docker build (gentropy only, AMD64 only, with layer cache)
uses: docker/build-push-action@v6
with:
platforms: linux/amd64
push: true
tags: "${{ env.GAR_LOCATION }}/${{ env.REPOSITORY }}/gentropy:${{ github.ref_name }}"
context: .
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Build and push gentropy image
if: github.ref == 'refs/heads/dev' || startsWith(github.ref, 'refs/tags/v')
uses: docker/build-push-action@v6
with:
platforms: linux/amd64,linux/arm64
Expand All @@ -48,10 +60,26 @@ jobs:
context: .

- name: Build and push VEP image
if: github.ref == 'refs/heads/dev' || startsWith(github.ref, 'refs/tags/v')
uses: docker/build-push-action@v6
with:
platforms: linux/amd64
push: true
tags: "${{ env.GAR_LOCATION }}/${{ env.REPOSITORY }}/custom_ensembl_vep:${{ github.ref_name }}"
context: .
file: "src/vep/Dockerfile"

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION_DEFAULT }}
- name: Install and configure Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true

- name: Build and push spark cluster dependencies
run: |
make build
32 changes: 15 additions & 17 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
PROJECT_ID ?= open-targets-genetics-dev
REGION ?= europe-west1
APP_NAME ?= $$(cat pyproject.toml| grep -m 1 "name" | cut -d" " -f3 | sed 's/"//g')
VERSION_NO ?= $$(poetry version --short)
CLEAN_VERSION_NO := $(shell echo "$(VERSION_NO)" | tr -cd '[:alnum:]')
BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/
BUCKET_COMPOSER_DAGS=gs://europe-west1-ot-workflows-fe147745-bucket/dags/
APP_NAME ?= $$(cat pyproject.toml | grep -m 1 "name" | cut -d" " -f3 | sed 's/"//g')
REF ?= $$(git rev-parse --abbrev-ref HEAD)
PACKAGE_VERSION ?= $$(poetry version --short)
CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]')
BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${APP_NAME}/${REF}

.PHONY: $(shell sed -n -e '/^$$/ { n ; /^[^ .\#][^ ]*:/ { s/:.*$$// ; p ; } ; }' $(MAKEFILE_LIST))

Expand Down Expand Up @@ -38,35 +38,33 @@ build-documentation: ## Create local server with documentation
create-dev-cluster: build ## Spin up a simple dataproc cluster with all dependencies for development purposes
@echo "Creating Dataproc Dev Cluster"
@gcloud config set project ${PROJECT_ID}
@gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_VERSION_NO}-$(USER)" \
@gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_PACKAGE_VERSION}-$(USER)" \
--image-version 2.1 \
--region ${REGION} \
--master-machine-type n1-standard-16 \
--initialization-actions=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/install_dependencies_on_cluster.sh \
--metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/gentropy-${VERSION_NO}-py3-none-any.whl,CONFIGTAR=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/config.tar.gz" \
--initialization-actions=$(BUCKET_NAME)/install_dependencies_on_cluster.sh \
--metadata="PACKAGE=$(BUCKET_NAME)/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl" \
--secondary-worker-type spot \
--worker-machine-type n1-standard-4 \
--worker-boot-disk-size 500 \
--autoscaling-policy="projects/${PROJECT_ID}/regions/${REGION}/autoscalingPolicies/otg-etl" \
--optional-components=JUPYTER \
--enable-component-gateway \
--max-idle=30m
--max-idle=60m

make update-dev-cluster: build ## Reinstalls the package on the dev-cluster
@echo "Updating Dataproc Dev Cluster"
@gcloud config set project ${PROJECT_ID}
gcloud dataproc jobs submit pig --cluster="ot-genetics-dev-${CLEAN_VERSION_NO}" \
gcloud dataproc jobs submit pig --cluster="ot-genetics-dev-${CLEAN_PACKAGE_VERSION}" \
--region ${REGION} \
--jars=${BUCKET_NAME}/install_dependencies_on_cluster.sh \
-e='sh chmod 750 $${PWD}/install_dependencies_on_cluster.sh; sh $${PWD}/install_dependencies_on_cluster.sh'

build: clean ## Build Python package with dependencies
@gcloud config set project ${PROJECT_ID}
@echo "Packaging Code and Dependencies for ${APP_NAME}-${VERSION_NO}"
@echo "Packaging Code and Dependencies for ${APP_NAME}-${PACKAGE_VERSION}"
@poetry build
@tar -czf dist/config.tar.gz config/
@echo "Uploading to Dataproc"
@gsutil cp src/gentropy/cli.py ${BUCKET_NAME}
@gsutil cp ./dist/${APP_NAME}-${VERSION_NO}-py3-none-any.whl ${BUCKET_NAME}
@gsutil cp ./dist/config.tar.gz ${BUCKET_NAME}
@gsutil cp ./utils/install_dependencies_on_cluster.sh ${BUCKET_NAME}
@echo "Uploading to ${BUCKET_NAME}"
@gsutil cp src/${APP_NAME}/cli.py ${BUCKET_NAME}/
@gsutil cp ./dist/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl ${BUCKET_NAME}/
@gsutil cp ./utils/install_dependencies_on_cluster.sh ${BUCKET_NAME}/
16 changes: 16 additions & 0 deletions docs/development/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,19 @@ Some functions on MacOS may throw a java error:
This can be resolved by adding the follow line to your `~/.zshrc`:

`export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`

## Creating development dataproc cluster (OT users only)

To start dataproc cluster in the development mode run

```
make create-dev-cluster
```

The command above will prepare 3 different resources:

- gentropy package
- cli script
- cluster setup script

and based on the branch ref (for example `dev`) will create a namespaced folder under GCS (`gs://genetics_etl_python_playground/initialisation/gentropy/dev`) with the three files described above. These files will be then used to create the cluster environment.
22 changes: 21 additions & 1 deletion docs/python_api/datasets/l2g_feature.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,27 @@
title: L2G Feature
---

::: gentropy.method.l2g.feature_factory.L2GFeature
## Abstract Class

::: gentropy.dataset.l2g_feature.L2GFeature

## Feature Classes

### Derived from colocalisation

::: gentropy.dataset.l2g_feature.EQtlColocClppMaximumFeature
::: gentropy.dataset.l2g_feature.PQtlColocClppMaximumFeature
::: gentropy.dataset.l2g_feature.SQtlColocClppMaximumFeature
::: gentropy.dataset.l2g_feature.TuQtlColocClppMaximumFeature
::: gentropy.dataset.l2g_feature.EQtlColocH4MaximumFeature
::: gentropy.dataset.l2g_feature.PQtlColocH4MaximumFeature
::: gentropy.dataset.l2g_feature.SQtlColocH4MaximumFeature
::: gentropy.dataset.l2g_feature.TuQtlColocH4MaximumFeature

### Derived from distance

::: gentropy.dataset.l2g_feature.DistanceTssMinimumFeature
::: gentropy.dataset.l2g_feature.DistanceTssMeanFeature

## Schema

Expand Down
4 changes: 2 additions & 2 deletions docs/python_api/methods/l2g/feature_factory.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
title: L2G Feature Factory
---

::: gentropy.method.l2g.feature_factory.ColocalisationFactory
::: gentropy.method.l2g.feature_factory.FeatureFactory

::: gentropy.method.l2g.feature_factory.StudyLocusFactory
::: gentropy.method.l2g.feature_factory.L2GFeatureInputLoader
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ google = "^3.0.0"
omegaconf = "^2.3.0"
typing-extensions = "^4.9.0"
scikit-learn = "^1.3.2"
pandas = {extras = ["gcp", "parquet"], version = "^2.2.2"}
pandas = { extras = ["gcp", "parquet"], version = "^2.2.2" }
skops = ">=0.9,<0.11"
google-cloud-secret-manager = "^2.20.0"

Expand Down
6 changes: 6 additions & 0 deletions src/gentropy/assets/schemas/colocalisation.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
"type": "long",
"metadata": {}
},
{
"name": "rightStudyType",
"nullable": false,
"type": "string",
"metadata": {}
},
{
"name": "chromosome",
"nullable": false,
Expand Down
155 changes: 0 additions & 155 deletions src/gentropy/assets/schemas/l2g_feature_matrix.json

This file was deleted.

6 changes: 6 additions & 0 deletions src/gentropy/assets/schemas/study_index.json
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,12 @@
"type": "boolean",
"nullable": true,
"metadata": {}
},
{
"name": "condition",
"type": "string",
"nullable": true,
"metadata": {}
}
]
}
6 changes: 6 additions & 0 deletions src/gentropy/assets/schemas/study_locus.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
"nullable": false,
"type": "long"
},
{
"metadata": {},
"name": "studyType",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "variantId",
Expand Down
Loading

0 comments on commit 0a42029

Please sign in to comment.