-
Notifications
You must be signed in to change notification settings - Fork 6
/
Dockerfile
224 lines (173 loc) · 6.83 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# Dockerfile for the KCRI CGE Bacterial Analysis Pipeline (BAP)
# ======================================================================
# For full reproducibility, pin the package versions installed by apt
# and conda when releasing to production, using 'package=version'.
# The 'apt-get' and 'conda list' commands output the versions in use.
# Load base Docker image
# ----------------------------------------------------------------------
# Use miniconda3 with Python 3.11 until Flye supports 3.12
# See: https://github.com/mikolmogorov/Flye/issues/669
FROM continuumio/miniconda3:23.10.0-1
# System dependencies
# ----------------------------------------------------------------------
# Debian packages
# - gcc and libz-dev for kma
# - g++ and gawk and libboost-iostreams for kcst
# - g++ and the libboost packages for SKESA
# - file for KCST
# - prodigal for cgMLST
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get -qq update --fix-missing && \
apt-get -qq install apt-utils && \
dpkg --configure -a && \
apt-get -qq install --no-install-recommends \
make g++ gcc libc-dev libz-dev \
gawk file \
libboost-program-options-dev \
libboost-iostreams-dev \
libboost-regex-dev \
libboost-timer-dev \
libboost-chrono-dev \
libboost-system-dev \
prodigal \
&& \
apt-get -qq clean && \
rm -rf /var/lib/apt/lists/*
# Stop container's bash from leaving .bash_histories everywhere
# and add convenience aliases for interactive (debugging) use
RUN echo "unset HISTFILE" >>/etc/bash.bashrc && \
echo "alias ls='ls --color=auto' l='ls -CF' la='l -a' ll='l -l' lla='ll -a'" >>/etc/bash.bashrc
# Python dependencies
# ----------------------------------------------------------------------
# Python dependencies via Conda:
# - Install nomkl to prevent MKL being installed; we don't currently
# use it, it's huge, and it is non-free (why does Conda pick it?)
# - Our jobcontrol module (picoline) requires psutil
# - Biopython and tabulate are used by all CGE services
# - ResFinder requires python-dateutil and gitpython
# - pandas required by cgelib required since ResFinder 4.2.1
# - cgMLST requires ete3 in its make_nj_tree.py, which we don't use,
# and spuriously in cgMLST.py, where we comment it out (see patch).
RUN conda install --quiet --yes \
nomkl \
psutil \
biopython tabulate \
python-dateutil gitpython \
pandas && \
conda list && \
conda clean -qy --tarballs
# Other dependencies
# ----------------------------------------------------------------------
# SKESA, BLAST, Quast, Flye are available in the 'bioconda' channel, but
# yield # myriad dependency conflicts, hence we install them from source.
#RUN conda config --add channels bioconda && \
# conda config --add channels defaults && \
# conda config --set channel_priority strict && \
# conda update --all && \
# conda install blast skesa quast
# Install External Deps
#----------------------------------------------------------------------
# Installation root
RUN mkdir -p /usr/src
WORKDIR /usr/src
# Copy the externals to /usr/src/ext
# Note the .dockerignore filters out a lot
COPY ext ext
# Install BLAST by putting its binaries on the PATH,
# and prevent 2.11.0 phone home bug by opting out
# https://github.com/ncbi/blast_plus_docs/issues/15
ENV PATH=/usr/src/ext/ncbi-blast/bin:$PATH \
BLAST_USAGE_REPORT=false
# Install uf-stats by putting it on the PATH.
ENV PATH=/usr/src/ext/unfasta:$PATH
# Make and install skesa (and gfa_connector, saute)
RUN cd ext/skesa && \
make clean && make -j 6 -f Makefile.nongs && \
mv skesa gfa_connector /usr/local/bin/ && \
cd .. && rm -rf skesa
# Make and install flye
RUN cd ext/flye && \
python3 setup.py install && \
cd .. && rm -rf flye
# Make and install kcst
RUN cd ext/kcst/src && \
make clean && make -j 6 && \
mv khc ../bin/kcst ../data/make-kcst-db.sh /usr/local/bin/ && \
cd ../.. && rm -rf kcst
# Make and install kma
RUN cd ext/kma && \
make clean && make -j 6 && \
cp kma kma_index kma_shm /usr/local/bin/ && \
cd .. && rm -rf kma
# Install kma-retrieve
RUN cd ext/odds-and-ends && \
cp kma-retrieve /usr/local/bin/ && \
cd .. && rm -rf odds-and-ends
# Install fastq-stats
RUN cd ext/fastq-utils && \
make clean && make fastq-stats && \
cp fastq-stats /usr/local/bin/ && \
cd .. && rm -rf fastq-utils
# Install the picoline module
RUN cd ext/picoline && \
python3 setup.py install && \
cd .. && rm -rf picoline
# Install the cgecore module
RUN cd ext/cgecore && \
python3 setup.py install && \
cd .. && rm -rf cgecore
# Install the cgelib module
RUN cd ext/cgelib && \
python3 setup.py install && \
cd .. && rm -rf cgelib
# Install CGE Services
#----------------------------------------------------------------------
# ResFinder since 4.2.1 recommends pip installation, but then pulls in
# old cgecore which breaks virulencefinder and others (no .gz support),
# so we install the dependencies ourselves (see above) and --no-deps.
# OVERRIDE for now: install from source
#RUN pip install --no-color --no-deps --no-cache-dir resfinder
# Install resfinder module from source
RUN python3 -m compileall ext/resfinder/src/resfinder && \
printf '#!/bin/sh\nexport PYTHONPATH=/usr/src/ext/resfinder/src\nexec python3 -m resfinder "$@"\n' \
> /usr/local/bin/resfinder && \
chmod +x /usr/local/bin/resfinder
# Install virulencefinder module from source
RUN python3 -m compileall ext/virulencefinder/src/virulencefinder && \
printf '#!/bin/sh\nexport PYTHONPATH=/usr/src/ext/virulencefinder/src\nexec python3 -m virulencefinder "$@"\n' \
> /usr/local/bin/virulencefinder && \
chmod +x /usr/local/bin/virulencefinder
# Patch cgmlstfinder ete3 dependency and directory bug
RUN sed -i -Ee 's@^from ete3 import@#from ete3 import@' \
'ext/cgmlstfinder/cgMLST.py'
# Precompile the services
RUN python3 -m compileall \
ext/cgmlstfinder \
ext/choleraefinder \
ext/kmerfinder \
ext/mlst \
ext/plasmidfinder \
ext/pmlst
# Add service script directories to PATH
ENV PATH $PATH""\
":/usr/src/ext/cgmlstfinder"\
":/usr/src/ext/choleraefinder"\
":/usr/src/ext/kmerfinder"\
":/usr/src/ext/mlst"\
":/usr/src/ext/plasmidfinder"\
":/usr/src/ext/pmlst"
# Install the BAP code
#----------------------------------------------------------------------
# Copy contents of src into /usr/src
COPY src ./
# Install the KCRI BAP specific code
RUN python3 setup.py install
# Set up user and workdir
#----------------------------------------------------------------------
# Drop to user nobody (running containers as root is not a good idea)
USER nobody:nogroup
# Change to the mounted workdir as initial PWD
WORKDIR /workdir
# No ENTRYPOINT means that any binary on the PATH in the container can
# be run. Set CMD so that without arguments, user gets BAP --help.
CMD ["BAP", "--help"]