Skip to content

Commit

Permalink
Bump to Airflow 1.10.4 and Python 3.7
Browse files Browse the repository at this point in the history
  • Loading branch information
puckel committed Aug 7, 2019
1 parent c0edbbf commit 7336340
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 30 deletions.
10 changes: 7 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@ jobs:
- checkout
- setup_remote_docker:
docker_layer_caching: true
- run:
- run:
name: Build docker image
command: |
docker build -t puckel/docker-airflow .
- run:
- run:
name: Test Python version
command: |
docker run puckel/docker-airflow python -V | grep '3.7'
- run:
name: Test docker image
command: |
docker run puckel/docker-airflow version |grep '1.10.3'
docker run puckel/docker-airflow version |grep '1.10.4'
workflows:
version: 2
build_and_test:
Expand Down
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# VERSION 1.10.3
# VERSION 1.10.4
# AUTHOR: Matthieu "Puckel_" Roisil
# DESCRIPTION: Basic Airflow container
# BUILD: docker build --rm -t puckel/docker-airflow .
# SOURCE: https://github.com/puckel/docker-airflow

FROM python:3.6-slim-stretch
FROM python:3.7-slim-stretch
LABEL maintainer="Puckel_"

# Never prompts the user for choices on installation/configuration of packages
ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux

# Airflow
ARG AIRFLOW_VERSION=1.10.3
ARG AIRFLOW_VERSION=1.10.4
ARG AIRFLOW_USER_HOME=/usr/local/airflow
ARG AIRFLOW_DEPS=""
ARG PYTHON_DEPS=""
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a

## Informations

* Based on Python (3.6-slim-stretch) official Image [python:3.6-slim-stretch](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue
* Based on Python (3.7-slim-stretch) official Image [python:3.7-slim-stretch](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue
* Install [Docker](https://www.docker.com/)
* Install [Docker Compose](https://docs.docker.com/compose/install/)
* Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow)
Expand Down
77 changes: 61 additions & 16 deletions config/airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ fab_logging_level = WARN
logging_config_class =

# Log format
# we need to escape the curly braces by adding an additional curly brace
# Colour the logs when the controlling terminal is a TTY.
colored_console_log = True
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter

log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s

Expand All @@ -54,16 +58,26 @@ executor = SequentialExecutor
# their website
# sql_alchemy_conn = sqlite:////tmp/airflow.db

# If SqlAlchemy should pool database connections.
sql_alchemy_pool_enabled = True

# The encoding for the databases
sql_engine_encoding = utf-8

# If SqlAlchemy should pool database connections.
sql_alchemy_pool_enabled = True

# The SqlAlchemy pool size is the maximum number of database connections
# in the pool. 0 indicates no limit.
sql_alchemy_pool_size = 5

# The maximum overflow size of the pool.
# When the number of checked-out connections reaches the size set in pool_size,
# additional connections will be returned up to this limit.
# When those additional connections are returned to the pool, they are disconnected and discarded.
# It follows then that the total number of simultaneous connections the pool will allow is pool_size + max_overflow,
# and the total number of "sleeping" connections the pool will allow is pool_size.
# max_overflow can be set to -1 to indicate no overflow limit;
# no limit will be placed on the total number of concurrent connections. Defaults to 10.
sql_alchemy_max_overflow = 10

# The SqlAlchemy pool recycle is the number of seconds a connection
# can be idle in the pool before it is invalidated. This config does
# not apply to sqlite. If the number of DB connections is ever exceeded,
Expand Down Expand Up @@ -182,7 +196,7 @@ password =
[operators]
# The default owner assigned to each new operator, unless
# provided explicitly or passed via `default_args`
default_owner = Airflow
default_owner = airflow
default_cpus = 1
default_ram = 512
default_disk = 512
Expand All @@ -191,9 +205,6 @@ default_gpus = 0
[hive]
# Default mapreduce queue for HiveOperator tasks
default_hive_mapred_queue =
# Template for mapred_job_name in HiveOperator, supports the following named parameters:
# hostname, dag_id, task_id, execution_date
mapred_job_name_template = Airflow HiveOperator task for {hostname}.{dag_id}.{task_id}.{execution_date}

[webserver]
# The base url of your website as airflow cannot guess what domain or
Expand Down Expand Up @@ -301,6 +312,9 @@ cookie_secure = False
# Set samesite policy on session cookie
cookie_samesite =

# Default setting for wrap toggle on DAG code and TI log views.
default_wrap = False

[email]
email_backend = airflow.utils.email.send_email_smtp

Expand Down Expand Up @@ -391,6 +405,13 @@ ssl_key =
ssl_cert =
ssl_cacert =

# Celery Pool implementation.
# Choices include: prefork (default), eventlet, gevent or solo.
# See:
# https://docs.celeryproject.org/en/latest/userguide/workers.html#concurrency
# https://docs.celeryproject.org/en/latest/userguide/concurrency/eventlet.html
pool = prefork

[celery_broker_transport_options]
# This section is for specifying options which can be passed to the
# underlying celery broker transport. See:
Expand Down Expand Up @@ -505,8 +526,8 @@ basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL

# This setting allows the use of LDAP servers that either return a
# broken schema, or do not return a schema.
# This setting allows the use of LDAP servers that either return a
# broken schema, or do not return a schema.
ignore_malformed_schema = False

[mesos]
Expand Down Expand Up @@ -567,10 +588,22 @@ api_rev = v3
hide_sensitive_variable_fields = True

[elasticsearch]
elasticsearch_host =
# we need to escape the curly braces by adding an additional curly brace
elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number}
elasticsearch_end_of_log_mark = end_of_log
# Elasticsearch host
host =
# Format of the log_id, which is used to query for a given tasks logs
log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}}
# Used to mark the end of a log stream for a task
end_of_log_mark = end_of_log
# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id
# Code will construct log_id using the log_id template from the argument above.
# NOTE: The code will prefix the https:// automatically, don't include that here.
frontend =
# Write the task logs to the stdout of the worker, rather than the default files
write_stdout = False
# Instead of the default log formatter, write the log lines as JSON
json_format = False
# Log fields to also attach to the json output, if enabled
json_fields = asctime, filename, lineno, levelname, message

[kubernetes]
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
Expand Down Expand Up @@ -606,7 +639,6 @@ logs_volume_subpath =
# A shared volume claim for the logs
logs_volume_claim =


# For DAGs mounted via a hostPath volume (mutually exclusive with volume claim and git-sync)
# Useful in local environment, discouraged in production
dags_volume_host =
Expand Down Expand Up @@ -634,7 +666,7 @@ git_password =
git_sync_root = /git
git_sync_dest = repo
# Mount point of the volume if git-sync is being used.
# i.e. /root/airflow/dags
# i.e. {AIRFLOW_HOME}/dags
git_dags_folder_mount_point =

# To get Git-sync SSH authentication set up follow this format
Expand Down Expand Up @@ -705,6 +737,13 @@ affinity =
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#toleration-v1-core
tolerations =

# **kwargs parameters to pass while calling a kubernetes client core_v1_api methods from Kubernetes Executor
# provided as a single line formatted JSON dictionary string.
# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis
# See:
# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py
kube_client_request_args =

# Worker pods security context options
# See:
# https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
Expand Down Expand Up @@ -753,3 +792,9 @@ fs_group =
#
# Additionally you may override worker airflow settings with the AIRFLOW__<SECTION>__<KEY>
# formatting as supported by airflow normally.

[kubernetes_labels]
# The Key-value pairs to be given to worker pods.
# The worker pods will be given these static labels, as well as some additional dynamic labels
# to identify the task.
# Should be supplied in the format: key = value
10 changes: 5 additions & 5 deletions docker-compose-CeleryExecutor.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: '2.1'
services:
redis:
image: 'redis:3.2.7'
image: 'redis:5.0.5'
# command: redis-server --requirepass redispass

postgres:
Expand All @@ -16,7 +16,7 @@ services:
# - ./pgdata:/var/lib/postgresql/data/pgdata

webserver:
image: puckel/docker-airflow:1.10.3
image: puckel/docker-airflow:1.10.4
restart: always
depends_on:
- postgres
Expand All @@ -43,7 +43,7 @@ services:
retries: 3

flower:
image: puckel/docker-airflow:1.10.3
image: puckel/docker-airflow:1.10.4
restart: always
depends_on:
- redis
Expand All @@ -55,7 +55,7 @@ services:
command: flower

scheduler:
image: puckel/docker-airflow:1.10.3
image: puckel/docker-airflow:1.10.4
restart: always
depends_on:
- webserver
Expand All @@ -74,7 +74,7 @@ services:
command: scheduler

worker:
image: puckel/docker-airflow:1.10.3
image: puckel/docker-airflow:1.10.4
restart: always
depends_on:
- scheduler
Expand Down
2 changes: 1 addition & 1 deletion docker-compose-LocalExecutor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
- POSTGRES_DB=airflow

webserver:
image: puckel/docker-airflow:1.10.3
image: puckel/docker-airflow:1.10.4
restart: always
depends_on:
- postgres
Expand Down
4 changes: 3 additions & 1 deletion script/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ TRY_LOOP="20"
: "${POSTGRES_DB:="airflow"}"

# Defaults and back-compat
: "${AIRFLOW_HOME:="/usr/local/airflow"}"
: "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}"
: "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}"

export \
AIRFLOW_HOME \
AIRFLOW__CELERY__BROKER_URL \
AIRFLOW__CELERY__RESULT_BACKEND \
AIRFLOW__CORE__EXECUTOR \
Expand All @@ -33,7 +35,7 @@ fi

# Install custom python package if requirements.txt is present
if [ -e "/requirements.txt" ]; then
$(which pip) install --user -r /requirements.txt
$(command -v pip) install --user -r /requirements.txt
fi

if [ -n "$REDIS_PASSWORD" ]; then
Expand Down

0 comments on commit 7336340

Please sign in to comment.