Skip to content

Commit

Permalink
Updated/added scripts to plot more expriment data for technical report.
Browse files Browse the repository at this point in the history
Signed-off-by: timothytrippel <[email protected]>
  • Loading branch information
timothytrippel committed Feb 5, 2021
1 parent dc71847 commit 0fb4432
Show file tree
Hide file tree
Showing 3 changed files with 481 additions and 69 deletions.
39 changes: 20 additions & 19 deletions experiment_scripts/plots/exp004_plot_runtimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class FuzzingData:

def __post_init__(self):
self.afl_data = self._load_afl_data()
self.rt = self._load_run_time()
self.runtime = self._load_run_time()

def _load_csv_data(self, csv_file):
return pd.read_csv(csv_file,
Expand All @@ -107,8 +107,18 @@ def _load_csv_data(self, csv_file):
engine='python')

def _load_run_time(self):
with open(log_file, "r") as lf:
line = line.strip()
run_time_path = "%s/logs/fuzz_time.log" % self.data_path
if not os.path.exists(run_time_path):
print(red("ERROR: run time data (%s) does not exist." % run_time_path))
sys.exit(1)
with open(run_time_path, "r") as lf:
for line in lf:
line = line.strip()
if line.startswith("real"):
line_list = line.split()
rt_min = float(line_list[1].split("m")[0])
rt_sec = float(line_list[1].split("m")[1].rstrip("s"))
return ((rt_min * 60) + rt_sec)

def _load_afl_data(self):
afl_glob_path = os.path.join(self.data_path, "out", "afl_*_interactive",
Expand All @@ -125,11 +135,6 @@ def _load_afl_data(self):
afl_df.loc[:, "# unix_time"] -= afl_df.loc[0, "# unix_time"]
return afl_df

@property
def runtime(self):
return float(self.afl_data["# unix_time"].max() -
self.afl_data["# unix_time"].min())


def _drop_outliers_in_range(values, lower_percentile=30, upper_percentile=70):
lower_bound, upper_bound = np.percentile(
Expand All @@ -141,14 +146,6 @@ def _drop_outliers_in_range(values, lower_percentile=30, upper_percentile=70):
return trimmed_values


# CURRENTLY UNUSED
def _winsorize_outliers_in_range(values,
lower_percentile=0.33,
upper_percentile=0.33):
marray = winsorize(values, limits=(lower_percentile, upper_percentile))
return list(marray)


def _aggregrate_instr_complex_rts(exp2data):
exp2rts = {}
for exp_name, fd_list in exp2data.items():
Expand Down Expand Up @@ -324,7 +321,7 @@ def compute_fs_opt_mann_whitney(instr_rts):

def plot_opt_strategies(instr_rts, fsopt_rts, plot_type="violin"):
print(yellow("Generating plots ..."))
LABEL_FONT_SIZE = 12
LABEL_FONT_SIZE = 14
sns.set()

# HW fuzzing instrumentation levels
Expand All @@ -342,6 +339,7 @@ def plot_opt_strategies(instr_rts, fsopt_rts, plot_type="violin"):
jitter=0.3,
size=MARKER_SIZE)
ax1.axhline(y=1.0, color='r', linestyle='-')
ax1.set_ylim(0.5, 3)
ax1.set_xlabel(NUM_STATES_LABEL, fontsize=LABEL_FONT_SIZE)
ax1.set_ylabel(RUN_TIME_LABEL, fontsize=LABEL_FONT_SIZE)
ax1.tick_params("x", labelsize=LABEL_FONT_SIZE)
Expand All @@ -350,7 +348,8 @@ def plot_opt_strategies(instr_rts, fsopt_rts, plot_type="violin"):
fontsize=LABEL_FONT_SIZE,
title_fontsize=LABEL_FONT_SIZE)
plt.tight_layout()
plt.savefig("hwf_instrumentation_levels.png", format="PNG")
# plt.savefig("hwf_instrumentation_levels.png", format="png")
plt.savefig("hwf_instrumentation_levels.pdf", format="pdf")
plt.close()

# HW fork server optimization
Expand All @@ -368,6 +367,7 @@ def plot_opt_strategies(instr_rts, fsopt_rts, plot_type="violin"):
jitter=0.3,
size=MARKER_SIZE)
ax2.axhline(y=1.0, color='r', linestyle='-')
ax1.set_ylim(0.5, 3)
ax2.set_xlabel(NUM_STATES_LABEL, fontsize=LABEL_FONT_SIZE)
ax2.set_ylabel(RUN_TIME_LABEL, fontsize=LABEL_FONT_SIZE)
ax2.tick_params("x", labelsize=LABEL_FONT_SIZE)
Expand All @@ -376,7 +376,8 @@ def plot_opt_strategies(instr_rts, fsopt_rts, plot_type="violin"):
fontsize=LABEL_FONT_SIZE,
title_fontsize=LABEL_FONT_SIZE)
plt.tight_layout()
plt.savefig("hwf_fs_opt.png", format="PNG")
# plt.savefig("hwf_fs_opt.png", format="png")
plt.savefig("hwf_fs_opt.pdf", format="pdf")

print(green("Done."))
print(LINE_SEP)
Expand Down
148 changes: 98 additions & 50 deletions experiment_scripts/plots/exp005_plot_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

import matplotlib as mpl
import matplotlib.pyplot as plt
# import numpy as np
import pandas as pd
import seaborn as sns
from hwfutils.string_color import color_str_green as green
Expand All @@ -39,8 +38,13 @@
TICK_FONT_SIZE = 8
LEGEND_FONT_SIZE = 8
LEGEND_TITLE_FONT_SIZE = 8
TIME_SCALE = "m"
SCALED_MAX_PLOT_TIME = 60
# TIME_SCALE = "m"
TIME_SCALE = "h"
# SCALED_MAX_PLOT_TIME = 60
SCALED_MAX_PLOT_TIME = 24
# PLOT_FILE_NAME = "hwf_grammar_cov_1hour_10trials_avg.pdf"
PLOT_FILE_NAME = "hwf_grammar_cov_24hours_10trials_avg_wline.pdf"
PLOT_FORMAT = "pdf"

# ------------------------------------------------------------------------------
# Plot labels
Expand Down Expand Up @@ -71,7 +75,7 @@
INSTR_TYPES = ["variable", "fixed"]
# TERMINATE_TYPES = ["invalidop", "never"]
TERMINATE_TYPES = ["never"]
TRIALS = range(0, 5)
TRIALS = range(0, 10)

# ------------------------------------------------------------------------------
# Other defines
Expand Down Expand Up @@ -138,6 +142,7 @@ def _load_cov_data(self, cov_type):
cov_data_path = "%s/logs/%s_cum.csv" % (self.cov_data_path, cov_type)
if not os.path.exists(cov_data_path):
print(red("ERROR: coverage data (%s) does not exist." % cov_data_path))
# return None
sys.exit(1)
# Load data into Pandas DataFrame
cov_df = self._load_csv_data(cov_data_path)
Expand Down Expand Up @@ -185,7 +190,10 @@ def get_vlt_cov_at_time(paths_total, vlt_cov_data):
return vlt_cov


def build_avg_coverage_df(exp2data, time_units="m", normalize_to_start=False):
def build_avg_coverage_df(exp2data,
time_units="m",
normalize_to_start=False,
consolidation="avg"):
print(yellow("Building average coverage dataframe ..."))
# Create empty dictionary that will be used to create a Pandas DataFrame that
# looks like the following:
Expand Down Expand Up @@ -219,54 +227,47 @@ def build_avg_coverage_df(exp2data, time_units="m", normalize_to_start=False):
kcov_avg = 0
llvm_cov_avg = 0
vlt_cov_avg = 0
i = 0
kcov_max = 0
llvm_cov_max = 0
vlt_cov_max = 0
for fd in fd_list:
# get the paths_total at the current time
paths_total = get_paths_total_at_time(time, fd.afl_data) - 1
# get coverage data
# print(exp_name, i)
kcov_avg += get_cov_at_time(paths_total, fd.kcov_data,
"Line-Coverage-(%)")
llvm_cov_avg += get_cov_at_time(paths_total, fd.llvm_cov_data,
"Region-Coverage-(%)")
vlt_cov_avg += get_vlt_cov_at_time(paths_total, fd.vlt_cov_data)
i += 1
kcov = get_cov_at_time(paths_total, fd.kcov_data, "Line-Coverage-(%)")
kcov_avg += kcov
kcov_max = max(kcov_max, kcov)
llvm_cov = get_cov_at_time(paths_total, fd.llvm_cov_data,
"Region-Coverage-(%)")
llvm_cov_avg += llvm_cov
llvm_cov_max = max(llvm_cov_max, llvm_cov)
vlt_cov = get_vlt_cov_at_time(paths_total, fd.vlt_cov_data)
vlt_cov_avg += vlt_cov
vlt_cov_max = max(vlt_cov_max, vlt_cov)
kcov_avg /= float(len(fd_list))
llvm_cov_avg /= float(len(fd_list))
vlt_cov_avg /= float(len(fd_list))
# save time 0 coverage to normalize
if time == 0:
kcov_avg_t0 = kcov_avg
llvm_cov_avg_t0 = llvm_cov_avg
vlt_cov_avg_t0 = vlt_cov_avg
if normalize_to_start:
kcov_avg /= kcov_avg_t0
llvm_cov_avg /= llvm_cov_avg_t0
vlt_cov_avg /= vlt_cov_avg_t0
# add kcov data to dataframe row
coverage_dict[COVERAGE_TYPE_LABEL].append(SW_LINE_COVERAGE_LABEL)
coverage_dict[COVERAGE_LABEL].append(kcov_avg)
# add llvm-cov data to dataframe row
coverage_dict[COVERAGE_TYPE_LABEL].append(SW_REGION_COVERAGE_LABEL)
coverage_dict[COVERAGE_LABEL].append(llvm_cov_avg)
# add vlt-cov data to dataframe row
coverage_dict[COVERAGE_TYPE_LABEL].append(HW_LINE_COVERAGE_LABEL)
coverage_dict[COVERAGE_LABEL].append(vlt_cov_avg)
if consolidation == "avg":
coverage_dict[COVERAGE_LABEL].append(kcov_avg)
coverage_dict[COVERAGE_LABEL].append(llvm_cov_avg)
coverage_dict[COVERAGE_LABEL].append(vlt_cov_avg)
else:
coverage_dict[COVERAGE_LABEL].append(kcov_max)
coverage_dict[COVERAGE_LABEL].append(llvm_cov_max)
coverage_dict[COVERAGE_LABEL].append(vlt_cov_max)
# extend lines to max time value
if coverage_dict[TIME_LABEL][-1] != 60.0:
if coverage_dict[TIME_LABEL][-1] != SCALED_MAX_PLOT_TIME:
for _ in range(3):
coverage_dict[TOPLEVEL_LABEL].append(anchor_fd.toplevel)
coverage_dict[GRAMMAR_LABEL].append(anchor_fd.grammar)
coverage_dict[TIME_LABEL].append(SCALED_MAX_PLOT_TIME)
# add kcov data to dataframe row
coverage_dict[COVERAGE_TYPE_LABEL].append(SW_LINE_COVERAGE_LABEL)
coverage_dict[COVERAGE_LABEL].append(kcov_avg)
# add llvm-cov data to dataframe row
coverage_dict[COVERAGE_TYPE_LABEL].append(SW_REGION_COVERAGE_LABEL)
coverage_dict[COVERAGE_LABEL].append(llvm_cov_avg)
# add vlt-cov data to dataframe row
coverage_dict[COVERAGE_TYPE_LABEL].append(HW_LINE_COVERAGE_LABEL)
coverage_dict[COVERAGE_LABEL].append(vlt_cov_avg)
coverage_dict[COVERAGE_LABEL].extend(coverage_dict[COVERAGE_LABEL][-3:])
print(green("Done."))
print(LINE_SEP)
return pd.DataFrame.from_dict(coverage_dict)
Expand Down Expand Up @@ -435,36 +436,77 @@ def _get_axis_limits():
SubplotAxisLimits(),
SubplotAxisLimits())

MIN_TIME = -5
MAX_TIME = 60
MIN_TIME = -2
MAX_TIME = SCALED_MAX_PLOT_TIME

# ------------------------------------------------
# AES axis limits
# ------------------------------------------------
# axis_limits["aes"].kcov_limits = SubplotAxisLimits(MIN_TIME, MAX_TIME, 89,
# 91)
# axis_limits["aes"].llvm_cov_limits = SubplotAxisLimits(
# MIN_TIME, MAX_TIME, 58, 63)
# axis_limits["aes"].vlt_cov_limits = SubplotAxisLimits(
# MIN_TIME, MAX_TIME, 80, 90)
# ------------------------------------------------
axis_limits["aes"].kcov_limits = SubplotAxisLimits(MIN_TIME, MAX_TIME, 89,
91)
axis_limits["aes"].llvm_cov_limits = SubplotAxisLimits(
MIN_TIME, MAX_TIME, 58, 63)
axis_limits["aes"].vlt_cov_limits = SubplotAxisLimits(
MIN_TIME, MAX_TIME, 83, 90)
MIN_TIME, MAX_TIME, 81, 91)
# ------------------------------------------------

# ------------------------------------------------
# HMAC axis limits
# ------------------------------------------------
# axis_limits["hmac"].kcov_limits = SubplotAxisLimits(MIN_TIME, MAX_TIME, 85,
# 89)
# axis_limits["hmac"].llvm_cov_limits = SubplotAxisLimits(
# MIN_TIME, MAX_TIME, 63, 68)
# axis_limits["hmac"].vlt_cov_limits = SubplotAxisLimits(
# MIN_TIME, MAX_TIME, 61, 95)
# ------------------------------------------------
axis_limits["hmac"].kcov_limits = SubplotAxisLimits(MIN_TIME, MAX_TIME, 85,
89)
axis_limits["hmac"].llvm_cov_limits = SubplotAxisLimits(
MIN_TIME, MAX_TIME, 65, 68)
MIN_TIME, MAX_TIME, 63, 68)
axis_limits["hmac"].vlt_cov_limits = SubplotAxisLimits(
MIN_TIME, MAX_TIME, 70, 95)
MIN_TIME, MAX_TIME, 60, 95)
# ------------------------------------------------

# ------------------------------------------------
# KMAC axis limits
# ------------------------------------------------
# axis_limits["kmac"].kcov_limits = SubplotAxisLimits(MIN_TIME, MAX_TIME, 94,
# 96)
# axis_limits["kmac"].llvm_cov_limits = SubplotAxisLimits(
# MIN_TIME, MAX_TIME, 67, 70)
# axis_limits["kmac"].vlt_cov_limits = SubplotAxisLimits(
# MIN_TIME, MAX_TIME, 50, 85)
# ------------------------------------------------
axis_limits["kmac"].kcov_limits = SubplotAxisLimits(MIN_TIME, MAX_TIME, 94,
96)
axis_limits["kmac"].llvm_cov_limits = SubplotAxisLimits(
MIN_TIME, MAX_TIME, 68, 71)
axis_limits["kmac"].vlt_cov_limits = SubplotAxisLimits(
MIN_TIME, MAX_TIME, 60, 85)
# ------------------------------------------------

# ------------------------------------------------
# RV-Timer axis limits
axis_limits["rv_timer"].kcov_limits = SubplotAxisLimits(-0.1, 3, 80, 90)
axis_limits["rv_timer"].llvm_cov_limits = SubplotAxisLimits(-0.1, 3, 50, 85)
axis_limits["rv_timer"].vlt_cov_limits = SubplotAxisLimits(-0.1, 3, 25, 88)
# ------------------------------------------------
# RV_TIMER_START_TIME = -0.1
# RV_TIMER_END_TIME = 3
# axis_limits["rv_timer"].kcov_limits = SubplotAxisLimits(RV_TIMER_START_TIME, RV_TIMER_END_TIME, 80, 90)
# axis_limits["rv_timer"].llvm_cov_limits = SubplotAxisLimits(RV_TIMER_START_TIME, RV_TIMER_END_TIME, 63, 73)
# axis_limits["rv_timer"].vlt_cov_limits = SubplotAxisLimits(RV_TIMER_START_TIME, RV_TIMER_END_TIME, 10, 88)
# ------------------------------------------------
RV_TIMER_START_TIME = -0.01
RV_TIMER_END_TIME = 0.1
axis_limits["rv_timer"].kcov_limits = SubplotAxisLimits(
RV_TIMER_START_TIME, RV_TIMER_END_TIME, 81, 90)
axis_limits["rv_timer"].llvm_cov_limits = SubplotAxisLimits(
RV_TIMER_START_TIME, RV_TIMER_END_TIME, 63, 74)
axis_limits["rv_timer"].vlt_cov_limits = SubplotAxisLimits(
RV_TIMER_START_TIME, RV_TIMER_END_TIME, 10, 95)
# ------------------------------------------------
return axis_limits


Expand Down Expand Up @@ -501,6 +543,7 @@ def plot_avg_coverage_vs_time(cov_df, time_units="m"):
hue=GRAMMAR_LABEL,
ax=axes[row][col],
legend=plot_legend)
ax.axhline(y=1.0, color='r', linestyle='-')
# get legend info if we are plotting the first plot
if plot_legend:
lines = ax.get_lines()
Expand Down Expand Up @@ -547,7 +590,7 @@ def plot_avg_coverage_vs_time(cov_df, time_units="m"):
plt.subplots_adjust(bottom=0.23, wspace=0.25, hspace=0.25)

# adjust figure layout and save to file
plt.savefig("hwf_grammar_cov.pdf", format="PDF")
plt.savefig(PLOT_FILE_NAME, format=PLOT_FORMAT)
print(green("Done."))
print(LINE_SEP)

Expand All @@ -564,8 +607,13 @@ def main(argv):
time_units=TIME_SCALE,
normalize_to_start=False)
# coverage_dfs = build_coverage_dfs(exp2data)

# Compute stats
# print("Dumping to CSV ...")
# avg_cov_df.to_csv("temp.csv", index=False)
# print("Reading from CSV ...")
# avg_cov_df = pd.read_csv("temp.csv",
# delimiter=',',
# index_col=None,
# engine='python')

# Plot data
plot_avg_coverage_vs_time(avg_cov_df, time_units=TIME_SCALE)
Expand Down
Loading

0 comments on commit 0fb4432

Please sign in to comment.