From c4240f92fa7aa24dd48d40d5e34d95b38fe837f5 Mon Sep 17 00:00:00 2001 From: maekawatoshiki Date: Sun, 4 Aug 2024 21:57:04 +0900 Subject: [PATCH] [session-cpu] Subtle change for profiling result --- crates/session-cpu/examples/deit_cpu.rs | 7 ++----- crates/session-cpu/examples/mobilenet_cpu.rs | 11 ++++------- crates/session-cpu/src/session.rs | 11 +++++++++-- crates/session-cpu/src/translator.rs | 12 ++---------- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/crates/session-cpu/examples/deit_cpu.rs b/crates/session-cpu/examples/deit_cpu.rs index 9e6f4748..f5874edd 100644 --- a/crates/session-cpu/examples/deit_cpu.rs +++ b/crates/session-cpu/examples/deit_cpu.rs @@ -1,6 +1,5 @@ use std::{cmp::Ordering, fs::read_to_string, path::Path, time::Instant}; -use altius_core::flops::compute_flops; use ndarray::CowArray; use ort::{Environment, GraphOptimizationLevel, SessionBuilder, Value}; use structopt::StructOpt; @@ -70,11 +69,9 @@ fn main() { let mut out = out[0].data::().iter().enumerate().collect::>(); out.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal)); - log::info!("inference result: {}", classes[out[0].0]); - log::info!("top5: {:?}", &out[..5]); + println!("inference result: {}", classes[out[0].0]); + println!("top5: {:?}", &out[..5]); } - let flops = compute_flops(i.model()).unwrap(); - log::info!("Model FLOPs: {} ({}M)", flops, flops / 1_000_000); } fn run_on_ort(opt: &Opt) { diff --git a/crates/session-cpu/examples/mobilenet_cpu.rs b/crates/session-cpu/examples/mobilenet_cpu.rs index ee62d2da..1beb7956 100644 --- a/crates/session-cpu/examples/mobilenet_cpu.rs +++ b/crates/session-cpu/examples/mobilenet_cpu.rs @@ -1,4 +1,3 @@ -use altius_core::flops::compute_flops; use ndarray::CowArray; use ort::{Environment, ExecutionProvider, SessionBuilder, Value}; use structopt::StructOpt; @@ -77,8 +76,8 @@ fn main() { let mut out = out.iter().enumerate().collect::>(); out.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal)); - log::info!("prediction: {}", classes[out[0].0]); - log::info!("top5: {:?}", &out[..5]); + println!("prediction: {}", classes[out[0].0]); + println!("top5: {:?}", &out[..5]); } } else { let model = load_onnx(root.join("mobilenetv3.onnx")).unwrap(); @@ -94,10 +93,8 @@ fn main() { let mut out = out[0].data::().iter().enumerate().collect::>(); out.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal)); - log::info!("prediction: {}", classes[out[0].0]); - log::info!("top5: {:?}", &out[..5]); + println!("prediction: {}", classes[out[0].0]); + println!("top5: {:?}", &out[..5]); } - let flops = compute_flops(session.model()).unwrap(); - log::info!("Model FLOPs: {} ({}M)", flops, flops / 1_000_000); } } diff --git a/crates/session-cpu/src/session.rs b/crates/session-cpu/src/session.rs index bcda04c6..1eaf059e 100644 --- a/crates/session-cpu/src/session.rs +++ b/crates/session-cpu/src/session.rs @@ -1,4 +1,5 @@ use altius_core::{ + flops::compute_flops, model::Model, tensor::{Tensor, TypedFixedShape}, value::ValueId, @@ -65,12 +66,18 @@ impl CPUSession { }) .collect::>(); let sum_durations = durations.iter().map(|(_, d)| d).sum::(); - durations.push(("All", entire_duration)); durations.push(("All (Kernel)", sum_durations)); durations.sort_by(|(_, b), (_, a)| a.partial_cmp(b).unwrap()); let width = durations.iter().map(|(op, _)| op.len()).max().unwrap(); for (op, duration) in durations { - log::info!("| {op:width$}: {duration:.5} [ms]"); + log::info!("{op:width$}: {duration:.5} ms"); + } + if let Ok(flops) = compute_flops(&self.model) { + log::info!( + "[ {:.5} ms, {:.5} GFLOPS ]", + entire_duration, + flops as f32 / (entire_duration / 1000.0) / 1_000_000_000.0 + ); } } diff --git a/crates/session-cpu/src/translator.rs b/crates/session-cpu/src/translator.rs index e12abb3c..3a4d681f 100644 --- a/crates/session-cpu/src/translator.rs +++ b/crates/session-cpu/src/translator.rs @@ -234,8 +234,8 @@ impl<'a> Translator<'a> { )); } num_compilied_kernels.fetch_add(num_kernels, Ordering::SeqCst); - log::debug!( - "Compiled {}/{} kernels", + eprint!( + "[{:03}/{:03}] Compilation in progress\r", num_compilied_kernels.load(Ordering::SeqCst), num_kernels_to_compile ); @@ -726,8 +726,6 @@ elapsed_{opname} += end_in_sec - start_in_sec;", ) -> Result { let input_names = &args[..inputs.len()]; let output_names = &args[inputs.len()..]; - log::debug!("input names: {:?}", input_names); - log::debug!("output names: {:?}", output_names); let input = &inputs[Op::CONV2D_IN]; let _weight = &inputs[Op::CONV2D_WEIGHT]; @@ -764,8 +762,6 @@ elapsed_{opname} += end_in_sec - start_in_sec;", let _pad_b = padding[2]; let _pad_r = padding[3]; - log::debug!("kernel: {:?}", kernel); - let code_fill_bias = if let Some(bias) = input_names.get(Op::CONV2D_BIAS) { let output_name = &output_names[0]; format!( @@ -3358,10 +3354,6 @@ impl Regions { fn find_first_free_region(&self, size: usize) -> Range { // Sorted by start offset let regions = self.start_to_region.values().collect::>(); - log::debug!( - "regions: {regions:?} (count: {count})", - count = regions.len() - ); fn roundup(x: usize) -> usize { let mask = 32 - 1; (x + mask) & !mask