Skip to content

Commit

Permalink
Don't dump pages which only contain zero bytes
Browse files Browse the repository at this point in the history
Introduces a new command line option '--skip-zero-bytes'
which detects pages which only contain zero bytes and
prohibits that they get dumped in the processes image file.
It is a potentially expensive operation because it checks for
every single process page if it contains only zeros, but
it can significantly decrease the image size and improve the
startup-time if many such pages exist. It effectively
replaces such pages which the kernel's zero-page on restore.

Signed-off-by: Volker Simonis <[email protected]>
  • Loading branch information
simonis committed Jan 24, 2024
1 parent 50190ae commit b23ad22
Show file tree
Hide file tree
Showing 18 changed files with 347 additions and 7 deletions.
8 changes: 8 additions & 0 deletions Documentation/criu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,14 @@ mount -t cgroup -o devices,freezer none devices,freezer
Deduplicate "old" data in pages images of previous *dump*. This option
implies incremental *dump* mode (see the *pre-dump* command).

*--skip-zero-pages*::
Don't dump pages containing only zero bytes. This is a
potentially expensive operation because it checks for
every single process page if it contains only zeros, but
it can significantly decrease the image size and improve the
startup-time if many such pages exist. It effectively
replaces such pages which the kernel's zero-page on restore.

*-l*, *--file-locks*::
Dump file locks. It is necessary to make sure that all file lock users
are taken into dump, so it is only safe to use this for enclosed containers
Expand Down
1 change: 1 addition & 0 deletions criu/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
{ "ms", no_argument, 0, 1054 },
BOOL_OPT("track-mem", &opts.track_mem),
BOOL_OPT("auto-dedup", &opts.auto_dedup),
BOOL_OPT("skip-zero-pages", &opts.skip_zero_pages),
{ "libdir", required_argument, 0, 'L' },
{ "cpu-cap", optional_argument, 0, 1057 },
BOOL_OPT("force-irmap", &opts.force_irmap),
Expand Down
3 changes: 3 additions & 0 deletions criu/cr-service.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
if (req->has_auto_dedup)
opts.auto_dedup = req->auto_dedup;

if (req->has_skip_zero_pages)
opts.skip_zero_pages = req->skip_zero_pages;

if (req->has_force_irmap)
opts.force_irmap = req->force_irmap;

Expand Down
1 change: 1 addition & 0 deletions criu/crtools.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ int main(int argc, char *argv[], char *envp[])
" pages images of previous dump\n"
" when used on restore, as soon as page is restored, it\n"
" will be punched from the image\n"
" --skip-zero-pages don't dump pages containing only zero bytes.\n"
" --pre-dump-mode splice - parasite based pre-dumping (default)\n"
" read - process_vm_readv syscall based pre-dumping\n"
"\n"
Expand Down
1 change: 1 addition & 0 deletions criu/include/cr_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ struct cr_options {
int track_mem;
char *img_parent;
int auto_dedup;
int skip_zero_pages;
unsigned int cpu_cap;
int force_irmap;
char **exec_cmd;
Expand Down
2 changes: 2 additions & 0 deletions criu/include/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ enum {
CNT_SHPAGES_SKIPPED_PARENT,
CNT_SHPAGES_WRITTEN,

CNT_SKIPPED_ZERO_PAGES,

DUMP_CNT_NR_STATS,
};

Expand Down
53 changes: 48 additions & 5 deletions criu/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
#include <sys/mman.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/prctl.h>
#include <sys/uio.h>

#include "types.h"
#include "cr_options.h"
Expand All @@ -31,6 +33,7 @@
#include "prctl.h"
#include "compel/infect-util.h"
#include "pidfd-store.h"
#include "xmalloc.h"

#include "protobuf.h"
#include "images/pagemap.pb-c.h"
Expand Down Expand Up @@ -191,11 +194,33 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct
bool has_parent)
{
unsigned long nr_scanned;
unsigned long pages[3] = {};
/* Counters for PAGES_SKIPPED_PARENT, PAGES_LAZY, PAGES_WRITTEN and SKIPPED_ZERO_PAGES */
unsigned long pages[4] = {};
unsigned long vaddr;
bool dump_all_pages;
int ret = 0;

static char *zero_page = NULL;
static char *remote_page = NULL;
int zero = 0;
struct iovec local[2];
struct iovec remote[1];
int nread = 0;
if (opts.skip_zero_pages && zero_page == NULL) {
zero_page = xmalloc(PAGE_SIZE);
remote_page = xmalloc(PAGE_SIZE);
if (zero_page == NULL || remote_page == NULL) {
pr_warn("Can't allocate memory - disabling --skip-zero-pages\n");
opts.skip_zero_pages = 0;
} else {
memzero(zero_page, PAGE_SIZE);
local[0].iov_base = remote_page;
local[0].iov_len = PAGE_SIZE;
remote[0].iov_base = (void *)0x0;
remote[0].iov_len = PAGE_SIZE;
}
}

dump_all_pages = should_dump_entire_vma(vma->e);

nr_scanned = 0;
Expand All @@ -207,9 +232,25 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct

/* If dump_all_pages is true, should_dump_page is called to get pme. */
next = should_dump_page(pmc, vma->e, vaddr, &softdirty);
if (!dump_all_pages && next != vaddr) {
vaddr = next - PAGE_SIZE;
continue;
if (!dump_all_pages) {
if (next != vaddr) {
vaddr = next - PAGE_SIZE;
continue;
} else if (opts.skip_zero_pages) {
remote[0].iov_base = (void *)vaddr;
nread = process_vm_readv(item->pid->real, local, 1, remote, 1, 0);
if (nread == PAGE_SIZE) {
zero = memcmp(zero_page, remote_page, PAGE_SIZE);
/*
* If the page contains just zeros we can treat it like the zero page and skip it.
* At restore it will be replaced by a reference to the zero page and COWed if accessed.
*/
if (zero == 0) {
pages[3]++;
continue;
}
}
}
}

if (vma_entry_can_be_lazy(vma->e) && !is_stack(item, vaddr))
Expand Down Expand Up @@ -247,8 +288,10 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct
cnt_add(CNT_PAGES_SKIPPED_PARENT, pages[0]);
cnt_add(CNT_PAGES_LAZY, pages[1]);
cnt_add(CNT_PAGES_WRITTEN, pages[2]);
cnt_add(CNT_SKIPPED_ZERO_PAGES, pages[3]);

pr_info("Pagemap generated: %lu pages (%lu lazy) %lu holes\n", pages[2] + pages[1], pages[1], pages[0]);
pr_info("Pagemap generated: %lu pages (%lu lazy) %lu holes %lu skipped zero\n",
pages[2] + pages[1], pages[1], pages[0], pages[3]);
return ret;
}

Expand Down
7 changes: 7 additions & 0 deletions criu/stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ static void display_stats(int what, StatsEntry *stats)
stats->dump->pages_skipped_parent, stats->dump->pages_skipped_parent);
pr_msg("Memory pages written: %" PRIu64 " (0x%" PRIx64 ")\n", stats->dump->pages_written,
stats->dump->pages_written);
if (stats->dump->has_skipped_zero_pages)
pr_msg("Memory pages skipped because zero: %" PRIu64 " (0x%" PRIx64 ")\n",
stats->dump->skipped_zero_pages, stats->dump->skipped_zero_pages);
pr_msg("Lazy memory pages: %" PRIu64 " (0x%" PRIx64 ")\n", stats->dump->pages_lazy,
stats->dump->pages_lazy);
} else if (what == RESTORE_STATS) {
Expand Down Expand Up @@ -178,6 +181,10 @@ void write_stats(int what)
ds_entry.has_page_pipes = true;
ds_entry.page_pipe_bufs = dstats->counts[CNT_PAGE_PIPE_BUFS];
ds_entry.has_page_pipe_bufs = true;
if (opts.skip_zero_pages) {
ds_entry.has_skipped_zero_pages = true;
ds_entry.skipped_zero_pages = dstats->counts[CNT_SKIPPED_ZERO_PAGES];
}

ds_entry.shpages_scanned = dstats->counts[CNT_SHPAGES_SCANNED];
ds_entry.has_shpages_scanned = true;
Expand Down
1 change: 1 addition & 0 deletions images/rpc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ message criu_opts {
optional bool leave_stopped = 69;
optional bool display_stats = 70;
optional bool log_to_stderr = 71;
optional bool skip_zero_pages = 72;
/* optional bool check_mounts = 128; */
}

Expand Down
2 changes: 2 additions & 0 deletions images/stats.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ message dump_stats_entry {
optional uint64 shpages_scanned = 12;
optional uint64 shpages_skipped_parent = 13;
optional uint64 shpages_written = 14;

optional uint64 skipped_zero_pages = 15;
}

message restore_stats_entry {
Expand Down
11 changes: 11 additions & 0 deletions lib/c/criu.c
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,17 @@ void criu_set_auto_dedup(bool auto_dedup)
criu_local_set_auto_dedup(global_opts, auto_dedup);
}

void criu_local_set_skip_zero_pages(criu_opts *opts, bool skip_zero_pages)
{
opts->rpc->has_skip_zero_pages = true;
opts->rpc->skip_zero_pages = skip_zero_pages;
}

void criu_set_skip_zero_pages(bool skip_zero_pages)
{
criu_local_set_skip_zero_pages(global_opts, skip_zero_pages);
}

void criu_local_set_force_irmap(criu_opts *opts, bool force_irmap)
{
opts->rpc->has_force_irmap = true;
Expand Down
1 change: 1 addition & 0 deletions test/javaTests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<!-- Suite testng xml file to consider for test execution -->
<suiteXmlFiles>
<suiteXmlFile>test.xml</suiteXmlFile>
<suiteXmlFile>test-zero.xml</suiteXmlFile>
</suiteXmlFiles>
</configuration>
</plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public void runtest(String testName, String checkpointOpt, String restoreOpt) th
String pid;
int exitCode;

System.out.println("======= Testing " + testName + " ========");
System.out.println("======= Testing " + testName + " " + checkpointOpt + " ========");

testSetup(testName);

Expand Down
89 changes: 89 additions & 0 deletions test/javaTests/test-zero.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?xml version = "1.0" encoding = "UTF-8"?>
<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd" >

<suite name = "Suite2">
<parameter name="checkpointOpt" value="--skip-zero-pages"/>
<parameter name="restoreOpt" value=""/>

<test name = "test1-FileRead">
<parameter name="testname" value="FileRead"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test2-ReadWrite">
<parameter name="testname" value="ReadWrite"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test3-MemoryMappings">
<parameter name="testname" value="MemoryMappings"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test4-MultipleFileRead">
<parameter name="testname" value="MultipleFileRead"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test5-MultipleFileWrite">
<parameter name="testname" value="MultipleFileWrite"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test6-Sockets">
<parameter name="testname" value="Sockets"/>
<parameter name="checkpointOpt" value="--tcp-established --skip-zero-pages"/>
<parameter name="restoreOpt" value="--tcp-established"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test7-SocketsListen">
<parameter name="testname" value="SocketsListen"/>
<parameter name="checkpointOpt" value="--tcp-established --skip-zero-pages"/>
<parameter name="restoreOpt" value="--tcp-established"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test8-SocketsConnect">
<parameter name="testname" value="SocketsConnect"/>
<parameter name="checkpointOpt" value="--tcp-established --skip-zero-pages"/>
<parameter name="restoreOpt" value="--tcp-established"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test9-SocketsMultiple">
<parameter name="testname" value="SocketsMultiple"/>
<parameter name="checkpointOpt" value="--tcp-established --skip-zero-pages"/>
<parameter name="restoreOpt" value="--tcp-established"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>
</test>

<test name = "test10-SocketsData">
<parameter name="testname" value="SocketsData"/>
<parameter name="checkpointOpt" value="--tcp-established --skip-zero-pages"/>
<parameter name="restoreOpt" value="--tcp-established"/>
<classes>
<class name = "org.criu.java.tests.CheckpointRestore"/>
</classes>

</test>

</suite>
9 changes: 8 additions & 1 deletion test/zdtm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,7 @@ def __init__(self, opts):
self.__sat = bool(opts['sat'])
self.__dedup = bool(opts['dedup'])
self.__mdedup = bool(opts['noauto_dedup'])
self.__skip_zero_pages = bool(opts['skip_zero_pages'])
self.__user = bool(opts['user'])
self.__rootless = bool(opts['rootless'])
self.__leave_stopped = bool(opts['stop'])
Expand Down Expand Up @@ -1381,6 +1382,9 @@ def dump(self, action, opts=[]):
if self.__dedup:
a_opts += ["--auto-dedup"]

if self.__skip_zero_pages:
a_opts += ["--skip-zero-pages"]

a_opts += ["--timeout", "10"]

criu_dir = os.path.dirname(os.getcwd())
Expand Down Expand Up @@ -2083,7 +2087,7 @@ def run_test(self, name, desc, flavor):
'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup',
'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'stream',
'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mntns_compat_mode',
'rootless')
'rootless', 'skip_zero_pages')
arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd}))

if self.__use_log:
Expand Down Expand Up @@ -2697,6 +2701,9 @@ def get_cli_args():
rp.add_argument("--noauto-dedup",
help="Manual deduplicate images on iterations",
action='store_true')
rp.add_argument("--skip-zero-pages",
help="Don't dump pages containing only zero bytes",
action='store_true')
rp.add_argument("--nocr",
help="Do not CR anything, just check test works",
action='store_true')
Expand Down
1 change: 1 addition & 0 deletions test/zdtm/static/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ TST_NOFILE := \
sigtrap01 \
change_mnt_context \
fd_offset \
zero_pages \
# jobctl00 \
PKG_CONFIG ?= pkg-config
Expand Down
Loading

0 comments on commit b23ad22

Please sign in to comment.