forked from ArchiveBox/ArchiveBox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
docker-compose.yml
164 lines (139 loc) · 7.44 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# Usage:
# docker compose run archivebox init --setup
# docker compose up
# echo "https://example.com" | docker compose run archivebox archivebox add
# docker compose run archivebox add --depth=1 https://example.com/some/feed.rss
# docker compose run archivebox config --set MEDIA_MAX_SIZE=750m
# docker compose run archivebox help
# Documentation:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
version: '3.9'
services:
archivebox:
#image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
image: archivebox/archivebox:dev
command: server --quick-init 0.0.0.0:8000
ports:
- 8000:8000
volumes:
- ./data:/data
# - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs
# - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox)
# build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox)
environment:
- ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name
# - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
# - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
# - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
# - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo
# - ADMIN_PASSWORD=SomeSecretPassword
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
# - PGID=911
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search
# - SEARCH_BACKEND_HOST_NAME=sonic
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
# - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving
# ...
# add further configuration options from archivebox/config.py as needed (to apply them only to this container)
# or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers)
# For ad-blocking during archiving, uncomment this section and pihole service section below
# networks:
# - dns
# dns:
# - 172.20.0.53
######## Optional Addons: tweak examples below as needed for your specific use case ########
### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg
# $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
# After starting, backfill any existing Snapshots into the full-text index:
# $ docker-compose run archivebox update --index-only
# sonic:
# image: valeriansaliou/sonic:latest
# expose:
# - 1491
# environment:
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# volumes:
# - ./sonic.cfg:/etc/sonic.cfg:ro
# - ./data/sonic:/var/lib/sonic/store
### Example: To run pihole in order to block ad/tracker requests during archiving,
# uncomment this block and set up pihole using its admin interface
# pihole:
# image: pihole/pihole:latest
# ports:
# - 127.0.0.1:8090:80 # uncomment to access the admin HTTP interface on http://localhost:8090
# environment:
# - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD
# - DNSMASQ_LISTENING=all
# dns:
# - 127.0.0.1
# - 1.1.1.1
# networks:
# dns:
# ipv4_address: 172.20.0.53
# volumes:
# - ./etc/pihole:/etc/pihole
# - ./etc/dnsmasq:/etc/dnsmasq.d
### Example: Enable ability to run regularly scheduled archiving tasks by uncommenting this container
# $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml'
# then restart the scheduler container to apply the changes to the schedule
# $ docker compose restart archivebox_scheduler
# archivebox_scheduler:
# image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
# command: schedule --foreground
# environment:
# - MEDIA_MAX_SIZE=750m # increase this number to allow archiving larger audio/video files
# # - TIMEOUT=60 # increase if you see timeouts often during archiving / on slow networks
# # - ONLY_NEW=True # set to False to retry previously failed URLs when re-adding instead of skipping them
# # - CHECK_SSL_VALIDITY=True # set to False to allow saving URLs w/ broken SSL certs
# # - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting URLs to Archive.org when archiving
# # - PUID=502 # set to your host user's UID & GID if you encounter permissions issues
# # - PGID=20
# volumes:
# - ./data:/data
# - ./etc/crontabs:/var/spool/cron/crontabs
# # cpus: 2 # uncomment / edit these values to limit container resource consumption
# # mem_limit: 2048m
# # shm_size: 1024m
### Example: Put Nginx in front of the ArchiveBox server for SSL termination
# nginx:
# image: nginx:alpine
# ports:
# - 443:443
# - 80:80
# volumes:
# - ./etc/nginx.conf:/etc/nginx/nginx.conf
# - ./data:/var/www
### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
# wireguard:
# image: linuxserver/wireguard:latest
# network_mode: 'service:archivebox'
# cap_add:
# - NET_ADMIN
# - SYS_MODULE
# sysctls:
# - net.ipv4.conf.all.rp_filter=2
# - net.ipv4.conf.all.src_valid_mark=1
# volumes:
# - /lib/modules:/lib/modules
# - ./wireguard.conf:/config/wg0.conf:ro
### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
# pywb:
# image: webrecorder/pywb:latest
# entrypoint: /bin/sh -c '(wb-manager init default || test $$? -eq 2) && wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback;'
# environment:
# - INIT_COLLECTION=archivebox
# ports:
# - 8080:8080
# volumes:
# - ./data:/archivebox
# - ./data/wayback:/webarchive
networks:
# network needed for pihole container to offer :53 dns resolving on fixed ip for archivebox container
dns:
ipam:
driver: default
config:
- subnet: 172.20.0.0/24