Skip to content

Commit

Permalink
Merge pull request #8 from puckel/v1.5.2
Browse files Browse the repository at this point in the history
v1.5.2
  • Loading branch information
puckel committed Oct 28, 2015
2 parents ca7c38e + 7461a59 commit 203ea8a
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 22 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ENV TERM linux
# Work around initramfs-tools running on kernel 'upgrade': <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=594189>
ENV INITRD No

ENV AIRFLOW_VERSION 1.5.1
ENV AIRFLOW_VERSION 1.5.2
ENV AIRFLOW_HOME /usr/local/airflow
ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages

Expand All @@ -34,6 +34,7 @@ RUN apt-get update -yqq \
build-essential \
&& pip install --install-option="--install-purelib=$PYTHONLIBPATH" cryptography \
&& pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow==${AIRFLOW_VERSION} \
&& pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[celery]==${AIRFLOW_VERSION} \
&& pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql]==${AIRFLOW_VERSION} \
&& apt-get clean \
&& rm -rf \
Expand Down
2 changes: 1 addition & 1 deletion circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ test:
pre:
- sleep 5
override:
- docker run puckel/docker-airflow version
- docker run puckel/docker-airflow version |grep '1.5.2'
73 changes: 69 additions & 4 deletions config/airflow.cfg
Original file line number Diff line number Diff line change
@@ -1,46 +1,66 @@
[core]
# The home folder for airflow, default is ~/airflow
airflow_home = /usr/local/airflow
airflow_home = /usr/local/airflow

# The folder where your airflow pipelines live, most likely a
# subfolder in a code repository
dags_folder = /usr/local/airflow/dags

# The folder where airflow should store its log files
base_log_folder = /usr/local/airflow/logs

# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor
executor = CeleryExecutor
executor = SequentialExecutor

# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engine, more information
# their website
sql_alchemy_conn = mysql://airflow:airflow@mysqldb/airflow
sql_alchemy_conn = mysql://airflow:airflow@mysql/airflow

# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
parallelism = 32

# Whether to load the examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_examples = True

# Where your Airflow plugins are stored
plugins_folder = /usr/local/airflow/plugins

# Secret key to save connection passwords in the db
fernet_key = {FERNET_KEY}
fernet_key = $FERNET_KEY

# Whether to disable pickling dags
donot_pickle = False

[webserver]
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is use in automated emails that
# airflow sends to point links to the right web server
base_url = http://localhost:8080

# The ip specified when starting the web server
web_server_host = 0.0.0.0

# The port on which to run the web server
web_server_port = 8080

# Secret key used to run your flask app
secret_key = temporary_key

# number of threads to run the Gunicorn web server
thread = 4

# Expose the configuration file in the web server
expose_config = true

# Set to true to turn on authentication : http://pythonhosted.org/airflow/installation.html#web-authentication
authenticate = False

# Filter the list of dags by owner name (requires authentication to be enabled)
filter_by_owner = False

Expand All @@ -58,28 +78,35 @@ smtp_mail_from = [email protected]
[celery]
# This section only applies if you are using the CeleryExecutor in
# [core] section above

# The app name that will be used by celery
celery_app_name = airflow.executors.celery_executor

# The concurrency that will be used when starting workers with the
# "airflow worker" command. This defines the number of task instances that
# a worker will take, so size up your workers based on the resources on
# your worker box and the nature of your tasks
celeryd_concurrency = 16

# When you start an airflow worker, airflow starts a tiny web server
# subprocess to serve the workers local log files to the airflow main
# web server, who then builds pages and sends them to users. This defines
# the port on which the logs are served. It needs to be unused, and open
# visible from the main web server to connect into the workers.
worker_log_server_port = 8793

# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
broker_url = amqp://airflow:airflow@rabbitmq:5672/airflow

# Another key Celery setting
celery_result_backend = amqp://airflow:airflow@rabbitmq:5672/airflow

# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the port that Celery Flower runs on
flower_port = 5555

# Default queue that tasks get assigned to and that worker listen on.
default_queue = default

Expand All @@ -88,12 +115,50 @@ default_queue = default
# from the CLI or the UI), this defines the frequency at which they should
# listen (in seconds).
job_heartbeat_sec = 5

# The scheduler constantly tries to trigger new tasks (look at the
# scheduler section in the docs for more information). This defines
# how often the scheduler should run (in seconds).
scheduler_heartbeat_sec = 5

# Statsd (https://github.com/etsy/statsd) integration settings
# statsd_on = False
# statsd_host = localhost
# statsd_port = 8125
# statsd_prefix = airflow

[mesos]
# Mesos master address which MesosExecutor will connect to.
master = localhost:5050

# The framework name which Airflow scheduler will register itself as on mesos
framework_name = Airflow

# Number of cpu cores required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_cpu = 1

# Memory in MB required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_memory = 256

# Enable framework checkpointing for mesos
# See http://mesos.apache.org/documentation/latest/slave-recovery/
checkpoint = False

# Failover timeout in milliseconds.
# When checkpointing is enabled and this option is set, Mesos waits until the configured timeout for
# the MesosExecutor framework to re-register after a failover. Mesos shuts down running tasks if the
# MesosExecutor framework fails to re-register within this timeframe.
# failover_timeout = 604800

# Enable framework authentication for mesos
# See http://mesos.apache.org/documentation/latest/configuration/
authenticate = False

# Mesos credentials, if authentication is enabled
# default_principal = admin
# default_secret = admin

19 changes: 8 additions & 11 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mysqldb:
container_name: mysqldb
mysql:
container_name: mysql
image: tutum/mysql
restart: always
ports:
Expand All @@ -11,15 +11,15 @@ mysqldb:

rabbitmq:
container_name: rabbitmq
image: puckel/docker-rabbitmq
image: rabbitmq:3-management
restart: always
ports:
- "15672:15672"
- "5672:5672"
environment:
- RABBITMQ_USER=airflow
- RABBITMQ_PASSWORD=airflow
- RABBITMQ_VHOST=airflow
- RABBITMQ_DEFAULT_USER=airflow
- RABBITMQ_DEFAULT_PASS=airflow
- RABBITMQ_DEFAULT_VHOST=airflow

webserver:
container_name: webserver
Expand All @@ -32,9 +32,10 @@ webserver:
ports:
- "8080:8080"
links:
- mysqldb:mysqldb
- mysql:mysql
- rabbitmq:rabbitmq
- worker:worker
- scheduler:scheduler
command: webserver

flower:
Expand All @@ -58,7 +59,6 @@ worker:
ports:
- "8793:8793"
links:
- mysqldb:mysqldb
- rabbitmq:rabbitmq
command: worker

Expand All @@ -68,7 +68,4 @@ scheduler:
restart: always
environment:
- AIRFLOW_HOME=/usr/local/airflow
links:
- mysqldb:mysqldb
- rabbitmq:rabbitmq
command: scheduler
14 changes: 9 additions & 5 deletions script/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

CMD="airflow"
DB_LOOPS="10"
MYSQL_HOST="mysqldb"
MYSQL_HOST="mysql"
MYSQL_PORT="3306"
RABBITMQ_HOST="rabbitmq"
RABBITMQ_CREDS="airflow:airflow"
Expand All @@ -20,10 +20,11 @@ if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] || [
echo "$(date) - $RABBITMQ_HOST still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for RabbitMQ..."
echo "$(date) - waiting for RabbitMQ... $j/$DB_LOOPS"
sleep 2
done
fi

if [ "$@" = "flower" ]; then
sleep 10
fi
Expand All @@ -37,11 +38,14 @@ if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] ; the
echo "$(date) - ${MYSQL_HOST}:${MYSQL_PORT} still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}..."
echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$DB_LOOPS"
sleep 1
done
sleep 2
$CMD initdb
if [ "$@" = "webserver" ]; then
echo "Initialize database..."
$CMD initdb
fi
sleep 5
fi

exec $CMD "$@"

0 comments on commit 203ea8a

Please sign in to comment.