Skip to content

Commit

Permalink
[session-cpu] Subtle change for profiling result
Browse files Browse the repository at this point in the history
  • Loading branch information
maekawatoshiki committed Aug 4, 2024
1 parent d48361d commit c4240f9
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 24 deletions.
7 changes: 2 additions & 5 deletions crates/session-cpu/examples/deit_cpu.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::{cmp::Ordering, fs::read_to_string, path::Path, time::Instant};

use altius_core::flops::compute_flops;
use ndarray::CowArray;
use ort::{Environment, GraphOptimizationLevel, SessionBuilder, Value};
use structopt::StructOpt;
Expand Down Expand Up @@ -70,11 +69,9 @@ fn main() {
let mut out = out[0].data::<f32>().iter().enumerate().collect::<Vec<_>>();
out.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal));

log::info!("inference result: {}", classes[out[0].0]);
log::info!("top5: {:?}", &out[..5]);
println!("inference result: {}", classes[out[0].0]);
println!("top5: {:?}", &out[..5]);
}
let flops = compute_flops(i.model()).unwrap();
log::info!("Model FLOPs: {} ({}M)", flops, flops / 1_000_000);
}

fn run_on_ort(opt: &Opt) {
Expand Down
11 changes: 4 additions & 7 deletions crates/session-cpu/examples/mobilenet_cpu.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use altius_core::flops::compute_flops;
use ndarray::CowArray;
use ort::{Environment, ExecutionProvider, SessionBuilder, Value};
use structopt::StructOpt;
Expand Down Expand Up @@ -77,8 +76,8 @@ fn main() {
let mut out = out.iter().enumerate().collect::<Vec<_>>();
out.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal));

log::info!("prediction: {}", classes[out[0].0]);
log::info!("top5: {:?}", &out[..5]);
println!("prediction: {}", classes[out[0].0]);
println!("top5: {:?}", &out[..5]);
}
} else {
let model = load_onnx(root.join("mobilenetv3.onnx")).unwrap();
Expand All @@ -94,10 +93,8 @@ fn main() {
let mut out = out[0].data::<f32>().iter().enumerate().collect::<Vec<_>>();
out.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(Ordering::Equal));

log::info!("prediction: {}", classes[out[0].0]);
log::info!("top5: {:?}", &out[..5]);
println!("prediction: {}", classes[out[0].0]);
println!("top5: {:?}", &out[..5]);
}
let flops = compute_flops(session.model()).unwrap();
log::info!("Model FLOPs: {} ({}M)", flops, flops / 1_000_000);
}
}
11 changes: 9 additions & 2 deletions crates/session-cpu/src/session.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use altius_core::{
flops::compute_flops,
model::Model,
tensor::{Tensor, TypedFixedShape},
value::ValueId,
Expand Down Expand Up @@ -65,12 +66,18 @@ impl CPUSession {
})
.collect::<Vec<_>>();
let sum_durations = durations.iter().map(|(_, d)| d).sum::<f32>();
durations.push(("All", entire_duration));
durations.push(("All (Kernel)", sum_durations));
durations.sort_by(|(_, b), (_, a)| a.partial_cmp(b).unwrap());
let width = durations.iter().map(|(op, _)| op.len()).max().unwrap();
for (op, duration) in durations {
log::info!("| {op:width$}: {duration:.5} [ms]");
log::info!("{op:width$}: {duration:.5} ms");
}
if let Ok(flops) = compute_flops(&self.model) {
log::info!(
"[ {:.5} ms, {:.5} GFLOPS ]",
entire_duration,
flops as f32 / (entire_duration / 1000.0) / 1_000_000_000.0
);
}
}

Expand Down
12 changes: 2 additions & 10 deletions crates/session-cpu/src/translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ impl<'a> Translator<'a> {
));
}
num_compilied_kernels.fetch_add(num_kernels, Ordering::SeqCst);
log::debug!(
"Compiled {}/{} kernels",
eprint!(
"[{:03}/{:03}] Compilation in progress\r",
num_compilied_kernels.load(Ordering::SeqCst),
num_kernels_to_compile
);
Expand Down Expand Up @@ -726,8 +726,6 @@ elapsed_{opname} += end_in_sec - start_in_sec;",
) -> Result<String, SessionError> {
let input_names = &args[..inputs.len()];
let output_names = &args[inputs.len()..];
log::debug!("input names: {:?}", input_names);
log::debug!("output names: {:?}", output_names);

let input = &inputs[Op::CONV2D_IN];
let _weight = &inputs[Op::CONV2D_WEIGHT];
Expand Down Expand Up @@ -764,8 +762,6 @@ elapsed_{opname} += end_in_sec - start_in_sec;",
let _pad_b = padding[2];
let _pad_r = padding[3];

log::debug!("kernel: {:?}", kernel);

let code_fill_bias = if let Some(bias) = input_names.get(Op::CONV2D_BIAS) {
let output_name = &output_names[0];
format!(
Expand Down Expand Up @@ -3358,10 +3354,6 @@ impl Regions {
fn find_first_free_region(&self, size: usize) -> Range<usize> {
// Sorted by start offset
let regions = self.start_to_region.values().collect::<Vec<_>>();
log::debug!(
"regions: {regions:?} (count: {count})",
count = regions.len()
);
fn roundup(x: usize) -> usize {
let mask = 32 - 1;
(x + mask) & !mask
Expand Down

0 comments on commit c4240f9

Please sign in to comment.