Skip to content

Commit e1ef705

Browse files
committed
For Cachegrind, aggregate all jemalloc functions into a single entry.
Because a single entry like this: ``` 120,365,108 (8.7%) <all-jemalloc-files>:<all-jemalloc-functions> ``` is much more helpful than dozens of entries like this: ``` 1,900,760 (0.9%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/include/jemalloc/internal/rtree.h:free 9,296,168 (0.7%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/include/jemalloc/internal/jemalloc_internal_inlines_c.h:malloc 7,926,636 (0.6%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/src/jemalloc.c:free 7,905,743 (0.6%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/include/jemalloc/internal/cache_bin.h:free 7,834,577 (0.6%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/include/jemalloc/internal/cache_bin.h:malloc 5,425,997 (0.4%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/src/arena.c:_rjem_je_arena_ralloc 4,268,616 (0.3%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/src/jemalloc.c:do_rallocx 3,539,679 (0.3%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/src/arena.c:_rjem_je_arena_ralloc_no_move 3,093,308 (0.2%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/include/jemalloc/internal/sz.h:malloc 2,393,337 (0.2%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/src/arena.c:_rjem_je_arena_cache_bin_fill_small 2,371,694 (0.2%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/include/jemalloc/internal/rtree.h:tcache_bin_flush_edatas_lookup.constprop.0 2,309,382 (0.2%) /home/njn/dev/rust2/build/x86_64-unknown-linux-gnu/stage0-rustc/x86_64-unknown-linux-gnu/release/build/jemalloc-sys-1e20251078fe5355/out/build/src/jemalloc.c:malloc ... ``` (And in reality, those jemalloc function entries are interleaved with many non-jemalloc entries, making them even harder to read.)
1 parent 3f38f44 commit e1ef705

File tree

2 files changed

+46
-8
lines changed

2 files changed

+46
-8
lines changed

ci/check-profiling.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ test -f results/cgout-Test-helloworld-Check-Full
5555
grep -q "events: Ir" results/cgout-Test-helloworld-Check-Full
5656
test -f results/cgann-Test-helloworld-Check-Full
5757
grep -q "PROGRAM TOTALS" results/cgann-Test-helloworld-Check-Full
58+
# Ensure the jemalloc file/function aggregation is working.
59+
grep -q "<all-jemalloc-files>:<all-jemalloc-functions>" results/cgann-Test-helloworld-Check-Full
5860

5961
# Callgrind.
6062
RUST_BACKTRACE=1 RUST_LOG=raw_cargo_messages=trace,collector=debug,rust_sysroot=debug \

collector/src/execute/profiler.rs

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::io::BufRead;
66
use std::io::Write;
77
use std::path::Path;
88
use std::process::Command;
9-
use std::{fs, process};
9+
use std::{fs, io, process};
1010

1111
// Tools usable with the profiling subcommands, and named on the command line.
1212
#[derive(Clone, Copy, Debug, PartialEq, clap::ValueEnum)]
@@ -226,7 +226,46 @@ impl<'a> Processor for ProfileProcessor<'a> {
226226
let cgout_file = filepath(self.output_dir, &out_file("cgout"));
227227
let cgann_file = filepath(self.output_dir, &out_file("cgann"));
228228

229-
fs::copy(&tmp_cgout_file, &cgout_file)?;
229+
// It's useful to filter all `file:function` entries from
230+
// jemalloc into a single fake
231+
// `<all-jemalloc-files>:<all-jemalloc-functions>` entry. That
232+
// way the cost of all allocations is visible in one line,
233+
// rather than spread across many small entries.
234+
//
235+
// The downside is that we don't get any annotations within
236+
// jemalloc source files, but this is no real loss, given that
237+
// jemalloc is basically a black box whose code we never look
238+
// at anyway. DHAT is the best way to profile allocations.
239+
let reader = io::BufReader::new(fs::File::open(&tmp_cgout_file)?);
240+
let mut writer = io::BufWriter::new(fs::File::create(&cgout_file)?);
241+
let mut in_jemalloc_file = false;
242+
243+
// A Cachegrind profile contains `fn=<function-name>` lines,
244+
// `fl=<filename>` lines, and everything else. We just need to
245+
// modify the `fn=` and `fl=` lines that refer to jemalloc
246+
// code.
247+
for line in reader.lines() {
248+
let line = line?;
249+
if line.starts_with("fl=") {
250+
// All jemalloc filenames have `/jemalloc/` or
251+
// something like `/jemalloc-sys-1e20251078fe5355/` in
252+
// them.
253+
in_jemalloc_file = line.contains("/jemalloc");
254+
if in_jemalloc_file {
255+
writeln!(writer, "fl=<all-jemalloc-files>")?;
256+
continue;
257+
}
258+
} else if line.starts_with("fn=") {
259+
// Any function within a jemalloc file is a jemalloc
260+
// function.
261+
if in_jemalloc_file {
262+
writeln!(writer, "fn=<all-jemalloc-functions>")?;
263+
continue;
264+
}
265+
}
266+
writeln!(writer, "{}", line)?;
267+
}
268+
writer.flush()?;
230269

231270
let mut cg_annotate_cmd = Command::new("cg_annotate");
232271
cg_annotate_cmd
@@ -301,11 +340,8 @@ impl<'a> Processor for ProfileProcessor<'a> {
301340
let tmp_eprintln_file = filepath(data.cwd.as_ref(), "eprintln");
302341
let eprintln_file = filepath(self.output_dir, &out_file("eprintln"));
303342

304-
let mut final_file =
305-
std::io::BufWriter::new(std::fs::File::create(&eprintln_file)?);
306-
for line in
307-
std::io::BufReader::new(std::fs::File::open(&tmp_eprintln_file)?).lines()
308-
{
343+
let mut final_file = io::BufWriter::new(std::fs::File::create(&eprintln_file)?);
344+
for line in io::BufReader::new(std::fs::File::open(&tmp_eprintln_file)?).lines() {
309345
let line = line?;
310346
// rustc under Cargo currently ~always emits artifact
311347
// messages -- which we don't want in final
@@ -355,7 +391,7 @@ impl<'a> Processor for ProfileProcessor<'a> {
355391

356392
for (cgu, items) in &by_cgu {
357393
let cgu_file = filepath(&out_dir, cgu);
358-
let mut file = std::io::BufWriter::new(
394+
let mut file = io::BufWriter::new(
359395
fs::File::create(&cgu_file).with_context(|| format!("{:?}", cgu_file))?,
360396
);
361397
for (name, linkage) in items {

0 commit comments

Comments
 (0)