Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/bench' into bench
Browse files Browse the repository at this point in the history
  • Loading branch information
danlkv committed Apr 5, 2024
2 parents ab7bf85 + 53770f2 commit f515c1d
Show file tree
Hide file tree
Showing 146 changed files with 20,943 additions and 20,940 deletions.
556 changes: 278 additions & 278 deletions qtensor/compression/cusz/include/cli/analyzer.hh

Large diffs are not rendered by default.

544 changes: 272 additions & 272 deletions qtensor/compression/cusz/include/cli/document.hh

Large diffs are not rendered by default.

326 changes: 163 additions & 163 deletions qtensor/compression/cusz/include/cli/quality_viewer.hh
Original file line number Diff line number Diff line change
@@ -1,163 +1,163 @@
/**
* @file quality_viewer.hh
* @author Jiannan Tian
* @brief
* @version 0.3
* @date 2022-04-09
* @deprecated 0.3.2
*
* (C) 2022 by Washington State University, Argonne National Laboratory
*
*/

#ifndef QUALITY_VIEWER_HH
#define QUALITY_VIEWER_HH

// 22-11-20 would fail in cxxapi.cu if deleted
#include <thrust/equal.h>

#include "../common/capsule.hh"
#include "../common/definition.hh"
#include "../header.h"
#include "../stat/compare_gpu.hh"
#include "verify.hh"

namespace cusz {

const static auto HOST = cusz::LOC::HOST;
const static auto DEVICE = cusz::LOC::DEVICE;
const static auto HOST_DEVICE = cusz::LOC::HOST_DEVICE;

struct QualityViewer {
template <typename Data>
static void print_metrics_cross(cusz_stats* s, size_t compressed_bytes = 0, bool gpu_checker = false)
{
auto checker = (not gpu_checker) ? string("(using CPU checker)") : string("(using GPU checker)");
auto bytes = (s->len * sizeof(Data) * 1.0);

auto println = [](const char* s, double n1, double n2, double n3, double n4) {
printf(" %-10s %16.8g %16.8g %16.8g %16.8g\n", s, n1, n2, n3, n4);
};
auto printhead = [](const char* s1, const char* s2, const char* s3, const char* s4, const char* s5) {
printf(" \e[1m\e[31m%-10s %16s %16s %16s %16s\e[0m\n", s1, s2, s3, s4, s5);
};

auto is_fp = std::is_same<Data, float>::value or std::is_same<Data, double>::value ? const_cast<char*>("yes")
: const_cast<char*>("no");
printf("\nquality metrics %s:\n", checker.c_str());

printhead("", "data-len", "data-byte", "fp-type?", "");
printf(" %-10s %16zu %16lu %16s\n", "", s->len, sizeof(Data), is_fp);

printhead("", "min", "max", "rng", "std");
println("origin", s->odata.min, s->odata.max, s->odata.rng, s->odata.std);
println("eb-lossy", s->xdata.min, s->xdata.max, s->xdata.rng, s->xdata.std);

printhead("", "abs-val", "abs-idx", "pw-rel", "VS-RNG");
println("max-error", s->max_err.abs, s->max_err.idx, s->max_err.pwrrel, s->max_err.rel);

printhead("", "CR", "NRMSE", "cross-cor", "PSNR");
println("metrics", bytes / compressed_bytes, s->reduced.NRMSE, s->reduced.coeff, s->reduced.PSNR);

// printf("\n");
};

static void print_metrics_auto(double* lag1_cor, double* lag2_cor)
{
auto printhead = [](const char* s1, const char* s2, const char* s3, const char* s4, const char* s5) {
printf(" \e[1m\e[31m%-10s %16s %16s %16s %16s\e[0m\n", s1, s2, s3, s4, s5);
};

printhead("", "lag1-cor", "lag2-cor", "", "");
printf(" %-10s %16lf %16lf\n", "auto", *lag1_cor, *lag2_cor);
printf("\n");
};

template <typename T>
static void echo_metric_gpu(T* reconstructed, T* origin, size_t len, size_t compressed_bytes = 0)
{
// cross
auto stat_x = new cusz_stats;
psz::thrustgpu_assess_quality<T>(stat_x, reconstructed, origin, len);
print_metrics_cross<T>(stat_x, compressed_bytes, true);

auto stat_auto_lag1 = new cusz_stats;
psz::thrustgpu_assess_quality<T>(stat_auto_lag1, origin, origin + 1, len - 1);
auto stat_auto_lag2 = new cusz_stats;
psz::thrustgpu_assess_quality<T>(stat_auto_lag2, origin, origin + 2, len - 2);

print_metrics_auto(&stat_auto_lag1->reduced.coeff, &stat_auto_lag2->reduced.coeff);
}

template <typename T>
static void echo_metric_cpu(T* _d1, T* _d2, size_t len, size_t compressed_bytes = 0, bool from_device = true)
{
auto stat = new cusz_stats;
T* reconstructed;
T* origin;
if (not from_device) {
reconstructed = _d1;
origin = _d2;
}
else {
printf("allocating tmp space for CPU verification\n");
auto bytes = sizeof(T) * len;
cudaMallocHost(&reconstructed, bytes);
cudaMallocHost(&origin, bytes);
cudaMemcpy(reconstructed, _d1, bytes, cudaMemcpyDeviceToHost);
cudaMemcpy(origin, _d2, bytes, cudaMemcpyDeviceToHost);
}
cusz::verify_data<T>(stat, reconstructed, origin, len);
print_metrics_cross<T>(stat, compressed_bytes, false);

auto stat_auto_lag1 = new cusz_stats;
verify_data<T>(stat_auto_lag1, origin, origin + 1, len - 1);
auto stat_auto_lag2 = new cusz_stats;
verify_data<T>(stat_auto_lag2, origin, origin + 2, len - 2);

print_metrics_auto(&stat_auto_lag1->reduced.coeff, &stat_auto_lag2->reduced.coeff);

if (from_device) {
if (reconstructed) cudaFreeHost(reconstructed);
if (origin) cudaFreeHost(origin);
}
}

template <typename T>
static void load_origin(string const& fname, Capsule<T>& origin)
{
origin.mallochost().malloc().fromfile(fname);
}

template <typename T>
static void view(header_t header, Capsule<T>& xdata, Capsule<T>& cmp, string const& compare)
{
auto len = ConfigHelper::get_uncompressed_len(header);
auto compressd_bytes = ConfigHelper::get_filesize(header);

auto compare_on_gpu = [&]() {
cmp.mallochost().malloc().fromfile(compare).host2device();
echo_metric_gpu(xdata.dptr(), cmp.dptr(), len, compressd_bytes);
cmp.freehost().free();
};

auto compare_on_cpu = [&]() {
cmp.mallochost().fromfile(compare);
xdata.device2host();
echo_metric_cpu(xdata.hptr(), cmp.hptr(), len, compressd_bytes);
cmp.freehost();
};

if (compare != "") {
auto gb = 1.0 * sizeof(T) * len / 1e9;
if (gb < 0.8)
compare_on_gpu();
else
compare_on_cpu();
}
}
};

} // namespace cusz

#endif
/**
* @file quality_viewer.hh
* @author Jiannan Tian
* @brief
* @version 0.3
* @date 2022-04-09
* @deprecated 0.3.2
*
* (C) 2022 by Washington State University, Argonne National Laboratory
*
*/

#ifndef QUALITY_VIEWER_HH
#define QUALITY_VIEWER_HH

// 22-11-20 would fail in cxxapi.cu if deleted
#include <thrust/equal.h>

#include "../common/capsule.hh"
#include "../common/definition.hh"
#include "../header.h"
#include "../stat/compare_gpu.hh"
#include "verify.hh"

namespace cusz {

const static auto HOST = cusz::LOC::HOST;
const static auto DEVICE = cusz::LOC::DEVICE;
const static auto HOST_DEVICE = cusz::LOC::HOST_DEVICE;

struct QualityViewer {
template <typename Data>
static void print_metrics_cross(cusz_stats* s, size_t compressed_bytes = 0, bool gpu_checker = false)
{
auto checker = (not gpu_checker) ? string("(using CPU checker)") : string("(using GPU checker)");
auto bytes = (s->len * sizeof(Data) * 1.0);

auto println = [](const char* s, double n1, double n2, double n3, double n4) {
printf(" %-10s %16.8g %16.8g %16.8g %16.8g\n", s, n1, n2, n3, n4);
};
auto printhead = [](const char* s1, const char* s2, const char* s3, const char* s4, const char* s5) {
printf(" \e[1m\e[31m%-10s %16s %16s %16s %16s\e[0m\n", s1, s2, s3, s4, s5);
};

auto is_fp = std::is_same<Data, float>::value or std::is_same<Data, double>::value ? const_cast<char*>("yes")
: const_cast<char*>("no");
printf("\nquality metrics %s:\n", checker.c_str());

printhead("", "data-len", "data-byte", "fp-type?", "");
printf(" %-10s %16zu %16lu %16s\n", "", s->len, sizeof(Data), is_fp);

printhead("", "min", "max", "rng", "std");
println("origin", s->odata.min, s->odata.max, s->odata.rng, s->odata.std);
println("eb-lossy", s->xdata.min, s->xdata.max, s->xdata.rng, s->xdata.std);

printhead("", "abs-val", "abs-idx", "pw-rel", "VS-RNG");
println("max-error", s->max_err.abs, s->max_err.idx, s->max_err.pwrrel, s->max_err.rel);

printhead("", "CR", "NRMSE", "cross-cor", "PSNR");
println("metrics", bytes / compressed_bytes, s->reduced.NRMSE, s->reduced.coeff, s->reduced.PSNR);

// printf("\n");
};

static void print_metrics_auto(double* lag1_cor, double* lag2_cor)
{
auto printhead = [](const char* s1, const char* s2, const char* s3, const char* s4, const char* s5) {
printf(" \e[1m\e[31m%-10s %16s %16s %16s %16s\e[0m\n", s1, s2, s3, s4, s5);
};

printhead("", "lag1-cor", "lag2-cor", "", "");
printf(" %-10s %16lf %16lf\n", "auto", *lag1_cor, *lag2_cor);
printf("\n");
};

template <typename T>
static void echo_metric_gpu(T* reconstructed, T* origin, size_t len, size_t compressed_bytes = 0)
{
// cross
auto stat_x = new cusz_stats;
psz::thrustgpu_assess_quality<T>(stat_x, reconstructed, origin, len);
print_metrics_cross<T>(stat_x, compressed_bytes, true);

auto stat_auto_lag1 = new cusz_stats;
psz::thrustgpu_assess_quality<T>(stat_auto_lag1, origin, origin + 1, len - 1);
auto stat_auto_lag2 = new cusz_stats;
psz::thrustgpu_assess_quality<T>(stat_auto_lag2, origin, origin + 2, len - 2);

print_metrics_auto(&stat_auto_lag1->reduced.coeff, &stat_auto_lag2->reduced.coeff);
}

template <typename T>
static void echo_metric_cpu(T* _d1, T* _d2, size_t len, size_t compressed_bytes = 0, bool from_device = true)
{
auto stat = new cusz_stats;
T* reconstructed;
T* origin;
if (not from_device) {
reconstructed = _d1;
origin = _d2;
}
else {
printf("allocating tmp space for CPU verification\n");
auto bytes = sizeof(T) * len;
cudaMallocHost(&reconstructed, bytes);
cudaMallocHost(&origin, bytes);
cudaMemcpy(reconstructed, _d1, bytes, cudaMemcpyDeviceToHost);
cudaMemcpy(origin, _d2, bytes, cudaMemcpyDeviceToHost);
}
cusz::verify_data<T>(stat, reconstructed, origin, len);
print_metrics_cross<T>(stat, compressed_bytes, false);

auto stat_auto_lag1 = new cusz_stats;
verify_data<T>(stat_auto_lag1, origin, origin + 1, len - 1);
auto stat_auto_lag2 = new cusz_stats;
verify_data<T>(stat_auto_lag2, origin, origin + 2, len - 2);

print_metrics_auto(&stat_auto_lag1->reduced.coeff, &stat_auto_lag2->reduced.coeff);

if (from_device) {
if (reconstructed) cudaFreeHost(reconstructed);
if (origin) cudaFreeHost(origin);
}
}

template <typename T>
static void load_origin(string const& fname, Capsule<T>& origin)
{
origin.mallochost().malloc().fromfile(fname);
}

template <typename T>
static void view(header_t header, Capsule<T>& xdata, Capsule<T>& cmp, string const& compare)
{
auto len = ConfigHelper::get_uncompressed_len(header);
auto compressd_bytes = ConfigHelper::get_filesize(header);

auto compare_on_gpu = [&]() {
cmp.mallochost().malloc().fromfile(compare).host2device();
echo_metric_gpu(xdata.dptr(), cmp.dptr(), len, compressd_bytes);
cmp.freehost().free();
};

auto compare_on_cpu = [&]() {
cmp.mallochost().fromfile(compare);
xdata.device2host();
echo_metric_cpu(xdata.hptr(), cmp.hptr(), len, compressd_bytes);
cmp.freehost();
};

if (compare != "") {
auto gb = 1.0 * sizeof(T) * len / 1e9;
if (gb < 0.8)
compare_on_gpu();
else
compare_on_cpu();
}
}
};

} // namespace cusz

#endif
Loading

0 comments on commit f515c1d

Please sign in to comment.