Skip to content

Commit

Permalink
Commit local_batch_job.sh #839
Browse files Browse the repository at this point in the history
  • Loading branch information
EmileSonneveld committed Sep 27, 2024
1 parent 8ab6805 commit f28b0d0
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 0 deletions.
48 changes: 48 additions & 0 deletions docker/local_batch_job/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
FROM vito-docker.artifactory.vgt.vito.be/openeo-geotrellis-kube:20240917-1919
# TODO: use :latest
# cd /home/emile/openeo/openeo-geopyspark-driver/docker/local_batch_job/ && sudo docker build --file Dockerfile -t openeo_docker_local .

USER root
# TODO: fix yum installs
#RUN yum install -y sshfs

# from openeo-deploy/mep/entrypont.sh
ENV JAVA_HOME=/usr/lib/jvm/jre
ENV PYTHON_EGG_CACHE=./
ENV PYSPARK_PYTHON="/opt/venv/bin/python"
ENV WMTS_BASE_URL_PATTERN=http://openeo.vgt.vito.be/openeo/services/%s
ENV PYTHONPATH="/opt/venv/lib64/python3.8/site-packages:/opt/venv/lib/python3.8/site-packages:/opt/tensorflow/python38/2.8.0:/usr/lib/python3.8/site-packages:/usr/lib64/python3.8/site-packages"
ENV LD_LIBRARY_PATH="/opt/venv/lib64"

# from /home/emile/openeo/jenkinslib/resources/python/test.sh
ENV PYTHONPATH="$SPARK_HOME/python:$PYTHONPATH"
ENV PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH"

RUN python3 -m pip install --root-user-action=ignore pytest-runner && \
python3 -m pip install --root-user-action=ignore pytest-timeout && \
python3 -m pip install --root-user-action=ignore junitparser && \
python3 -m pip install --root-user-action=ignore rioxarray && \
python3 -m pip install --root-user-action=ignore dirty_equals && \
python3 -m pip install --root-user-action=ignore moto

# test python packages:
RUN python3 -c "import pyspark;import openeo;import otbApplication"


# pull openeo-geopyspark-driver in the docker container, just for the test code:
RUN cd /opt && \
curl -L -O https://github.com/Open-EO/openeo-geopyspark-driver/archive/refs/heads/master.zip && \
unzip master.zip && \
mv openeo-geopyspark-driver-master openeo-geopyspark-driver && \
mkdir /opt/openeo-geopyspark-driver/jars/ && \
ln -s /opt/geotrellis-extensions-static.jar /opt/openeo-geopyspark-driver/jars/ && \
ln -s /opt/openeo-logging-static.jar /opt/openeo-geopyspark-driver/jars/ && \
echo done

COPY test_run_graph.py /opt/openeo-geopyspark-driver/tests

# Write directly to the mount. If an error occurs, there is something to debug with
# openeo.log will be written in the current directory, so change current directory
RUN mkdir /opt/docker_mount
WORKDIR /opt/docker_mount
CMD ["python3", "-m", "pytest", "/opt/openeo-geopyspark-driver/tests/test_run_graph.py"]
9 changes: 9 additions & 0 deletions docker/local_batch_job/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Run openEO from Docker file

This method does not have access to the collection on Terrascope or dataspace.copernicus.eu.
External public STAC collections are usable.

Run graph with `local_batch_job.sh path/to/process_graph.json`
The output files will be written to the same folder as process_graph.json.

If the docker file has an error connecting to the internet, consider disabling VPN.
26 changes: 26 additions & 0 deletions docker/local_batch_job/local_batch_job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

if [ -z "$1" ]; then
echo "First argument should be the path to the process graph."
echo "'local_batch_job.sh path/to/process_graph.json'"
exit 1
fi
if [ ! -f "$1" ]; then
echo "File not found: $1"
exit 1
fi

parent_folder="$(dirname "$1")"

# --entrypoint /bin/bash
# Specify user otherwise output files are root
# /etc/passwd to avoid "whoami: cannot find name for user ID"
# Opening a /vsi file with the netCDF driver requires Linux userfaultfd to be available. If running from Docker, --security-opt seccomp=unconfined might be needed.
# mount is used to read process_graph and write results
# Avoid -i, to avoid "the input device is not a TTY"
docker run -t \
--user "$(id -g):$(id -u)" \
-v /etc/passwd:/etc/passwd:ro \
--security-opt seccomp=unconfined \
-v "$parent_folder":/opt/docker_mount \
openeo_docker_local
30 changes: 30 additions & 0 deletions docker/local_batch_job/test_run_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
from pathlib import Path

import requests
from openeo.util import ensure_dir
from openeo_driver.utils import read_json
from openeogeotrellis.deploy.batch_job import run_job

# Avoid IPv6, to avoid hanging on https://services.terrascope.be/catalogue//collections
requests.packages.urllib3.util.connection.HAS_IPV6 = False

# workdir = Path(os.path.dirname(os.path.abspath(__file__)))
workdir = Path(os.getcwd())

process_graph_path = workdir / "process_graph.json"
print("process_graph_path: " + str(process_graph_path))
process_graph = read_json(process_graph_path)
if "process_graph" not in process_graph and "job_options" not in process_graph:
print("Wrapping process graph")
process_graph = {"process_graph": process_graph}

run_job(
process_graph,
output_file=workdir / "random_folder_name",
metadata_file=workdir / "metadata.json",
api_version="1.0.0",
job_dir=ensure_dir(workdir / "job_dir"),
dependencies=[],
user_id="jenkins",
)

0 comments on commit f28b0d0

Please sign in to comment.