forked from mafm/zfs-snapshot-disk-usage-matrix
-
Notifications
You must be signed in to change notification settings - Fork 0
/
zfs-snapshot-disk-usage-matrix.py
executable file
·146 lines (118 loc) · 5.71 KB
/
zfs-snapshot-disk-usage-matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python3
"""Usage: zfs-snapshot-disk-usage-matrix.py [--human-readable|-h] <filesystem>
This script produces csv output giving useful details of the usage of
ZFS snapshots.
When you ask ZFS about the disk space used by snapshots, it really
only wants to tell you about how much space is being *uniquely* used
by individual snapshots. If a two adjacent snapshots use a Gigabyte of
space between them, but have only 1 Megabyte of space unique to each
of them, "zfs list" will tell you that each snapshot is consuming only
a *one* Megabyte of space. This is helpful if you're trying to work
out which snapshots to delete to free up diskspace.
If you use "zfs destroy -nv filesystem@snap1%snap2" ZFS *will*
actually tell you how much space the sequence of snapshots between
snap1 and snap2 is using. This is much more useful for working out
which snapshots are using all the space. In the example from the
previous paragraph, this would tell you that deleting both snapshots
together would 1.002 Gigabytes - obviously much more useful
information.
This script runs "zfs destroy -nv" for all pairs of snapshots in a
filesystem, and shows how much space you can free up by deleting the
corresponding sequence of snapshots.
Output from this script:
(a) converts sizes shown into bytes, or, alternatively, human-readable sizes
(b) strips any common prefix from snapshot names (e.g. "zfs-auto-snap")
Options could be added to enable/disable this, but I can't be bothered.
Example:
zfs-snapshot-disk-usage-matrix.py local-fast-tank-machine0/Virtual-Machines/VirtualBox/vpn-linux-u14 | tee vpn-snapshot-usage.csv
"""
import math
import subprocess
import sys
from os.path import commonprefix
def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "kiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s%s" % (s, size_name[i])
def strip_filesystem_name(snapshot_name):
"""Given the name of a snapshot, strip the filesystem part.
We require (and check) that the snapshot name contains a single
'@' separating filesystem name from the 'snapshot' part of the name.
"""
assert snapshot_name.count("@") == 1
return snapshot_name.split("@")[1]
def maybe_ssh(host):
if (host == 'localhost'):
## no need to ssh host @ start of command - empty string
return []
##else
## will need the ssh in there
return ['ssh', '-C', host]
def snapshots_in_creation_order(filesystem, host='localhost', strip_filesystem=False):
"Return list of snapshots on FILESYSTEM in order of creation."
result = []
cmd = maybe_ssh(host) + ['zfs', 'list', '-r', '-t', 'snapshot',
'-s', 'creation', '-o', 'name', filesystem]
lines = subprocess.check_output(cmd, stderr=subprocess.STDOUT,
encoding='utf8').split('\n')
snapshot_prefix = filesystem + "@"
for line in lines:
if line.startswith(snapshot_prefix):
result.append(line)
if strip_filesystem:
return list(map(strip_filesystem_name, result))
return result
def space_between_snapshots(filesystem, first_snap, last_snap, host='localhost'):
"Space used by a sequence of snapshots."
cmd = maybe_ssh(host) + ['zfs', 'destroy', '-nvp',
'{}@{}%{}'.format(filesystem, first_snap, last_snap)]
lines = subprocess.check_output(cmd, stderr=subprocess.STDOUT, encoding='utf8').split('\n')
return lines[-2].split('\t')[-1]
def print_csv(lines):
"""Write out a list of lists as CSV.
Not robust against odd input."""
for line in lines:
for item in line:
if item != None:
print(item, end='')
print(",", end='')
print()
def write_snapshot_disk_usage_matrix(filesystem, suppress_common_prefix=True,
human_readable=False):
snapshot_names = snapshots_in_creation_order(filesystem, strip_filesystem=True)
if suppress_common_prefix:
suppressed_prefix_len = len(commonprefix(snapshot_names))
else:
suppressed_prefix_len = 0
print_csv([[None] + [name[suppressed_prefix_len:] for name in snapshot_names]]) # Start with Column headers
for end in range(len(snapshot_names)):
this_line = [snapshot_names[end][suppressed_prefix_len:]]
for start in range(len(snapshot_names)):
if start <= end:
start_snap = snapshot_names[start]
end_snap = snapshot_names[end]
space_used = space_between_snapshots(filesystem, start_snap, end_snap)
if human_readable:
space_used = convert_size(float(space_used))
this_line.append(space_used)
else:
this_line.append(None)
## Show line we've just done
print_csv([this_line])
if __name__ == '__main__':
if len(sys.argv) == 3 and (sys.argv[1] == '-h' or sys.argv[1].startswith('--human')):
write_snapshot_disk_usage_matrix(sys.argv[2], human_readable=True)
elif len(sys.argv) == 2:
write_snapshot_disk_usage_matrix(sys.argv[1])
else:
sys.exit("Usage: {} [--human-readable|-h] <filesystem>".format(sys.argv[0]))
# Useful for
# snapshots_in_creation_order('local-fast-tank-machine0/Virtual-Machines/VirtualBox/vpn-linux-u14')
# space_between_snapshots('local-fast-tank-machine0/Virtual-Machines/VirtualBox/vpn-linux-u14',
# 'zfs-auto-snap_monthly-2015-03-18-2345',
# 'zfs-auto-snap_frequent-2015-09-28-0245')
# write_snapshot_disk_usage_matrix('local-fast-tank-machine0/Virtual-Machines/VirtualBox/vpn-linux-u14')