-
Notifications
You must be signed in to change notification settings - Fork 0
/
futabascraper
75 lines (60 loc) · 1.7 KB
/
futabascraper
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/bin/bash
#
# futabascraper
# Download matching media-types in a thread. Files are written to the current
# working directory, so navigate to where you want the files written. Run the
# script with the threads as arg(s).
#
# <Brody Rethy> https://rethy.xyz
main() {
local deps=("curl" "pup")
local exts=("png" "gif" "jpg" "jpeg" "webm")
checkdeps ${deps[@]}
[[ ! "$1" ]] \
&& printf "Error: No arguments provided.\n" \
&& exit 1
for thread_url in "$@"; do
thread_title=$(\
curl -s "$thread_url" \
| pup 'span .subject text{}' \
| sed -e "s/\ /_/g" -e "s/\#//g" -e "s/\\///g" -e "s/\///g"
)
[[ ! "$thread_title" ]] && thread_title="untitled"
[[ ! -d "$thread_title" ]] \
&& mkdir -p "$thread_title" \
&& action="MKDIR" \
|| action="EXISTS"
printf "[%s] %s\n" "$action" "$thread_title"
for ext in ${exts[@]}; do
thread_suburls=$(\
curl -s "$thread_url" \
| pup "a attr{href}" \
| grep -i "$ext" \
| sed "s/^\/\///g"\
| sort -u
)
for thread_suburl in ${thread_suburls[@]}; do
fname="${thread_suburl##*/}"
[[ ! "$fname" ]] || [[ -e "$thread_title/$fname" ]] && continue
[[ ! $(echo "$thread_suburl" | grep "^http") ]] \
&& thread_suburl="https://$thread_suburl"
curl "$thread_suburl" 2> /dev/null > "$thread_title/$fname" \
&& printf "[GET] > $fname\n"
done
done
done
}
checkdeps() {
local missingdeps=()
for dep in "$@"; do
[[ ! $(command -v "$dep") ]] && missingdeps+=("$dep")
done
if [[ "$missingdeps" ]]; then
[[ ${#missingdeps[@]} -gt 1 ]] \
&& printf "Missing dependencies:\n" \
|| printf "Missing dependency:\n"
for missingdep in ${missingdeps[@]}; do printf "\t$missingdep\n"; done
exit 1
fi
}
main "$@"