-
Notifications
You must be signed in to change notification settings - Fork 0
/
start.sh
414 lines (323 loc) · 13.1 KB
/
start.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
#!/usr/bin/env bash
# https://actuated.dev/blog/kvm-in-github-actions
# https://github.com/firecracker-microvm/firecracker/blob/main/docs/getting-started.md
# https://github.com/firecracker-microvm/firecracker/blob/main/docs/rootfs-and-kernel-setup.md
set -eu
populate_rootfs() {
echo "Populating rootfs..."
local _src_rootfs="${1}"
local _dst_rootfs="${2}"
local _rootfs_mnt="/tmp/rootfs"
mkdir -p "$(dirname "${_dst_rootfs}")"
rm -f "${_dst_rootfs}"
mkdir -p "${_rootfs_mnt}"
truncate -s "${ROOTFS_SIZE}" "${_dst_rootfs}"
mkfs.ext4 "${_dst_rootfs}"
mount -v -t ext4 -o defaults "${_dst_rootfs}" "${_rootfs_mnt}" || {
dmesg | tail -5
exit 1
}
rsync -a "${_src_rootfs}"/ "${_rootfs_mnt}"/
for dir in dev proc run sys var; do mkdir -p "${_rootfs_mnt}/${dir}"; done
# alpine already has /sbin/init that we should replace, otherwise
# we would probably use --ignore-existing as well
rsync -a --keep-dirlinks "${overlay_src}"/ "${_rootfs_mnt}"/
# Write all environment variable exports to a file in the rootfs
printenv | grep -vE "^(PWD|TERM|USER|SHLVL|PATH|HOME|_)=" | awk -F= '{print $1 "='\''" $2 "'\''"}' >"${_rootfs_mnt}/var/environment"
# write the guest command to the end of the init script
echo "exec ${cmd_str}" >>"${_rootfs_mnt}/sbin/init"
umount -v "${_rootfs_mnt}"
chown firecracker:firecracker "${_dst_rootfs}"
}
populate_datafs() {
local _dst_datafs="${1}"
mkdir -p "$(dirname "${_dst_datafs}")"
if [ ! -f "${_dst_datafs}" ]; then
echo "Populating datafs..."
truncate -s "${DATAFS_SIZE}" "${_dst_datafs}"
mkfs.ext4 -q "${_dst_datafs}"
chown firecracker:firecracker "${_dst_datafs}"
fi
}
generate_config() {
echo "Generating Firecracker config file..."
local _src_config="${1}"
local _dst_config="${2}"
envsubst <"${_src_config}" >"${_dst_config}"
jq ".\"boot-source\".boot_args = \"${KERNEL_BOOT_ARGS}\"" "${_dst_config}" >"${_dst_config}".tmp
mv "${_dst_config}".tmp "${_dst_config}"
jq ".\"machine-config\".vcpu_count = ${VCPU_COUNT}" "${_dst_config}" >"${_dst_config}".tmp
mv "${_dst_config}".tmp "${_dst_config}"
jq ".\"machine-config\".mem_size_mib = ${MEM_SIZE_MIB}" "${_dst_config}" >"${_dst_config}".tmp
mv "${_dst_config}".tmp "${_dst_config}"
# It doesn't seem to matter what we call this interface, it always shows up as 'eth0' in the guest
jq ".\"network-interfaces\"[0].iface_id = \"net0\"" "${_dst_config}" >"${_dst_config}".tmp
mv "${_dst_config}".tmp "${_dst_config}"
jq ".\"network-interfaces\"[0].guest_mac = \"${GUEST_MAC}\"" "${_dst_config}" >"${_dst_config}".tmp
mv "${_dst_config}".tmp "${_dst_config}"
jq ".\"network-interfaces\"[0].host_dev_name = \"${TAP_DEVICE}\"" "${_dst_config}" >"${_dst_config}".tmp
mv "${_dst_config}".tmp "${_dst_config}"
# jq . "${_dst_config}"
}
add_rules() {
local _tap_dev="${1}"
local _host_dev="${2}"
echo "Adding iptables rules..."
iptables-legacy -t nat -A POSTROUTING -o "${_host_dev}" -j MASQUERADE -m comment --comment "${_tap_dev}"
iptables-legacy -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT -m comment --comment "${_tap_dev}"
iptables-legacy -A FORWARD -i "${_tap_dev}" -o "${_host_dev}" -j ACCEPT -m comment --comment "${_tap_dev}"
}
delete_rules() {
local _tap_dev="${1}"
local _host_dev="${2}"
echo "Deleting iptables rules..."
# delete rules matching comment
# iptables-legacy-save | grep -v "comment ${_tap_dev}" | iptables-legacy-restore || true
iptables-legacy -t nat -D POSTROUTING -o "${_host_dev}" -j MASQUERADE -m comment --comment "${_tap_dev}" 2>/dev/null || true
iptables-legacy -D FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT -m comment --comment "${_tap_dev}" 2>/dev/null || true
iptables-legacy -D FORWARD -i "${_tap_dev}" -o "${_host_dev}" -j ACCEPT -m comment --comment "${_tap_dev}" 2>/dev/null || true
}
setup_networking() {
local _tap_dev="${1}"
local _tap_cidr="${2}"
local _host_dev="${3}"
# bail out if dap device already exists
if ip link show "${_tap_dev}" >/dev/null 2>&1; then
echo "TAP device ${_tap_dev} already exists!"
exit 1
fi
# write a file to indicate it's safe to cleanup networking
# beyond this point
touch /tmp/.do_cleanup
echo "Creating ${_tap_dev} device..."
# create tap device
ip tuntap add dev "${_tap_dev}" mode tap user firecracker
# ip tuntap add dev "${_tap_dev}" mode tap
ip addr add "${_tap_cidr}" dev "${_tap_dev}"
ip link set dev "${_tap_dev}" up
# local _br_dev="${_tap_dev/tap/br}"
# echo "Creating bridge ${_br_dev}..."
# ip link add name "${_br_dev}" type bridge
# ip link set dev "${_tap_dev}" master "${_br_dev}"
# iptables-legacy -t nat -A POSTROUTING -o "${_br_dev}" -j MASQUERADE
echo "Enabling IP forwarding..."
sysctl -w net.ipv4.ip_forward=1
# sysctl -w net.ipv4.conf.${_tap_dev}.proxy_arp=1
# sysctl -w net.ipv6.conf.${_tap_dev}.disable_ipv6=1
delete_rules "${_tap_dev}" "${_host_dev}"
add_rules "${_tap_dev}" "${_host_dev}"
}
normalize_cidr() {
local _address
local _short_netmask
local _long_netmask
_address="$(ipcalc -nb "${1}" | awk '/^Address:/ {print $2}')"
_long_netmask="$(ipcalc -nb "${1}" | awk '/^Netmask:/ {print $2}')"
_short_netmask="$(ipcalc -nb "${1}" | awk '/^Netmask:/ {print $4}')"
echo "${_address}/${_short_netmask}"
}
network_config() {
local _client_ip="${1}"
local _server_ip=""
local _gw_ip="${2}"
local _netmask=""
local _hostname="${3}"
local _device="${4}"
local _autoconf=off
# normalize addresses to remove cidr suffix
_client_ip="$(ipcalc -nb "${_client_ip}" | awk '/^Address:/ {print $2}')"
_gw_ip="$(ipcalc -nb "${_gw_ip}" | awk '/^Address:/ {print $2}')"
_netmask="$(ipcalc -nb "${_client_ip}" | awk '/^Netmask:/ {print $2}')"
echo "ip=${_client_ip}:${_server_ip}:${_gw_ip}:${_netmask}:${_hostname}:${_device}:${_autoconf}"
}
ip_to_mac() {
# shellcheck disable=SC2183,SC2046
printf '52:54:%02X:%02X:%02X:%02X\n' $(echo "${1}" | tr '.' ' ')
}
create_logs_fifo() {
local _fifo="${1}"
local _out="${2}"
mkdir -p "$(dirname "${_fifo}")"
rm -f "${_fifo}"
# Create a named pipe
mkfifo "${_fifo}"
# Redirect the output of the named pipe to /dev/stdout
cat "${_fifo}" >"${_out}" &
# Take ownership of the named pipe
chown firecracker:firecracker "${_fifo}"
}
cleanup() {
if [ ! -f /tmp/.do_cleanup ]; then
return
fi
echo "Cleaning up..."
# delete tap device
ip link del "${TAP_DEVICE}" || true
delete_rules "${TAP_DEVICE}" "${HOST_IFACE}" || true
rm -f /tmp/.do_cleanup
}
script_root="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
overlay_src="${script_root}/overlay"
rootfs_src="${script_root}/rootfs"
config_src="${script_root}/config.json"
# Check that at least one argument was passed
if [ $# -eq 0 ]; then
echo "At least one COMMAND instruction is required. See the project README for usage."
sleep infinity
fi
# Store the script arguments as the guest command
for arg in "$@"; do
# Remove existing quotes
arg=${arg%\"}
arg=${arg#\"}
# Escape existing unescaped quotes
arg=${arg//\"/\\\"}
# Add quotes around arguments
arg="\"$arg\""
cmd_str+="$arg "
done
total_cpu_count=$(nproc --all)
total_mem_mib=$(($(free -m | grep -oP '\d+' | head -6 | tail -1) - 50))
total_disk_bytes=$(df -B1 . | awk 'NR==2 {print $2}')
# Set default cores to same as system if not specified
if [ -z "${VCPU_COUNT:-}" ]; then
VCPU_COUNT=$(nproc --all)
fi
# firecracker is limited to 32 VPUs
if [ "${VCPU_COUNT}" -gt 32 ]; then
echo "Maximum VCPU count is 32."
VCPU_COUNT=32
fi
# If provided core count is more than the host has available, use the host's max
if [ "${VCPU_COUNT}" -gt "${total_cpu_count}" ]; then
echo "Using max available VCPUs: ${total_cpu_count}."
VCPU_COUNT=${total_cpu_count}
fi
# Set default memory to same as system if not specified
if [ -z "${MEM_SIZE_MIB:-}" ]; then
MEM_SIZE_MIB=${total_mem_mib}
fi
# If provided memory size is more than the host has available, use the host's max
if [ "${MEM_SIZE_MIB}" -gt "${total_mem_mib}" ]; then
echo "Using max available memory: ${total_mem_mib} MiB."
MEM_SIZE_MIB=${total_mem_mib}
fi
# Set default space to same as available on system if not specified
if [ -z "${ROOTFS_SIZE:-}" ]; then
ROOTFS_SIZE=${total_disk_bytes}
fi
# If provided rootfs size is more than the host has available, use the host's max
if [ "${ROOTFS_SIZE}" -gt "${total_disk_bytes}" ]; then
echo "Using max available rootfs size: ${total_disk_bytes} bytes."
ROOTFS_SIZE=${total_disk_bytes}
fi
# Set default space to same as available on system if not specified
if [ -z "${DATAFS_SIZE:-}" ]; then
DATAFS_SIZE=${total_disk_bytes}
fi
# If provided datafs size is more than the host has available, use the host's max
if [ "${DATAFS_SIZE}" -gt "${total_disk_bytes}" ]; then
echo "Using max available datafs size: ${total_disk_bytes} bytes."
DATAFS_SIZE=${total_disk_bytes}
fi
if [ -z "${HOST_IFACE:-}" ]; then
HOST_IFACE="$(ip route | awk '/default/ {print $5}')"
fi
if [ -z "${TAP_IP:-}" ]; then
# generate random number between 1 and 254
TAP_IP=10.$((1 + RANDOM % 254)).$((1 + RANDOM % 254)).1/30
fi
TAP_IP="$(normalize_cidr "${TAP_IP}")"
if [ -z "${GUEST_IP:-}" ]; then
# the default guest IP is the TAP IP + 1
GUEST_IP="$(echo "${TAP_IP}" | awk -F'[./]' '{print $1"."$2"."$3"."$4+1}')"
fi
if [ -z "${TAP_DEVICE:-}" ]; then
# must be less than 16 characters
TAP_DEVICE="$(echo "${TAP_IP}" | awk -F'[./]' '{print "tap-"$1"-"$2"-"$3}')"
fi
if [ -z "${GUEST_MAC:-}" ]; then
# guest MAC is '52:54' followed by the hex encoded guest IP octets
GUEST_MAC="$(ip_to_mac "${GUEST_IP}")"
fi
if [ -z "${KERNEL_BOOT_ARGS:-}" ]; then
KERNEL_BOOT_ARGS="console=ttyS0 reboot=k panic=1 pci=off random.trust_cpu=on"
if [ "$(uname -m)" = "aarch64" ]; then
KERNEL_BOOT_ARGS="keep_bootcon ${KERNEL_BOOT_ARGS}"
fi
fi
if [ -n "${EXTRA_KERNEL_BOOT_ARGS:-}" ]; then
KERNEL_BOOT_ARGS="${KERNEL_BOOT_ARGS} ${EXTRA_KERNEL_BOOT_ARGS}"
fi
KERNEL_BOOT_ARGS="${KERNEL_BOOT_ARGS} $(network_config "${GUEST_IP}" "${TAP_IP}" "$(hostname)" eth0)"
echo "Virtual CPUs: ${VCPU_COUNT}"
echo "Memory: ${MEM_SIZE_MIB}M"
echo "Root Drive (vda): ${ROOTFS_SIZE}B"
echo "Data Drive (vdb): ${DATAFS_SIZE}B"
echo "Host Interface: ${HOST_IFACE}"
echo "TAP Device: ${TAP_DEVICE}"
echo "TAP IP Address: ${TAP_IP}"
echo "Guest IP Address: ${GUEST_IP}"
echo "Guest MAC Address: ${GUEST_MAC}"
echo "Kernel Boot Args: ${KERNEL_BOOT_ARGS}"
echo "Guest Command: ${cmd_str}"
# Check for root filesystem
if ! ls "${rootfs_src}" &>/dev/null; then
echo "Root Filesystem not found in ${rootfs_src}. Did you forget to COPY it?"
sleep infinity
fi
# Check for hardware acceleration
if ! ls /dev/kvm &>/dev/null; then
echo "KVM hardware acceleration unavailable. Pass --device /dev/kvm in your Docker run command."
sleep infinity
fi
trap cleanup EXIT
# Remount tmpfs mounts with the execute bit set
for dir in /tmp /run /srv; do
mkdir -p "${dir}"
if [ "$(stat -f -c '%T' "${dir}")" = "tmpfs" ]; then
echo "Remounting ${dir} as rw,exec..."
mount -o remount,rw,exec tmpfs "${dir}"
fi
done
# The jailer will use this id to create a unique chroot directory for the MicroVM
# among other things.
id="$(uuidgen)"
# These directories will be bind mounted to the chroot and can
# optionally be replaced with volumes mounted by the user.
boot_jail="/jail/boot"
data_jail="/jail/data"
# The jailer will use this directory as the base for the chroot directory
chroot_base="/srv/jailer"
chroot_dir="${chroot_base}/firecracker/${id}/root"
echo "Creating jailer chroot..."
rm -rf "${chroot_base}"
mkdir -p "${boot_jail}" "${chroot_dir}"/boot
mkdir -p "${data_jail}" "${chroot_dir}"/data
if [ -f "${boot_jail}"/vmlinux.bin.lz4 ] && [ ! -f "${boot_jail}"/vmlinux.bin ]; then
echo "Decompressing ${boot_jail}/vmlinux.bin.lz4..."
lz4 -d "${boot_jail}"/vmlinux.bin.lz4 "${boot_jail}"/vmlinux.bin
fi
populate_rootfs "${rootfs_src}" "${boot_jail}"/rootfs.ext4
populate_datafs "${data_jail}"/datafs.ext4
setup_networking "${TAP_DEVICE}" "${TAP_IP}" "${HOST_IFACE}"
generate_config "${config_src}" "${boot_jail}"/config.json
create_logs_fifo "${boot_jail}"/logs.fifo /dev/stdout
# Bind mount /jail/boot and /jail/data to /boot and /data in the chroot.
# This way users can mount their own volumes to /jail/boot and /jail/data
# without needing to know the exact path of the chroot.
mount --bind "${boot_jail}" "${chroot_dir}"/boot
mount --bind "${data_jail}" "${chroot_dir}"/data
# /usr/local/bin/firecracker --help
# /usr/local/bin/jailer --help
echo "Starting firecracker via jailer..."
# https://github.com/firecracker-microvm/firecracker/blob/main/docs/jailer.md
/usr/local/bin/jailer --id "${id}" \
--exec-file /usr/local/bin/firecracker \
--chroot-base-dir "${chroot_base}" \
--uid "$(id -u firecracker)" \
--gid "$(id -g firecracker)" \
-- \
--no-api \
--config-file /boot/config.json \
--log-path /boot/logs.fifo