Skip to content

Commit 4322b6e

Browse files
committed
coverage: Build the CGU's global file table as late as possible
1 parent b9856b6 commit 4322b6e

File tree

3 files changed

+110
-88
lines changed

3 files changed

+110
-88
lines changed

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

+83-69
Original file line numberDiff line numberDiff line change
@@ -53,31 +53,20 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
5353
None => return,
5454
};
5555

56-
// The order of entries in this global file table needs to be deterministic,
57-
// and ideally should also be independent of the details of stable-hashing,
58-
// because coverage tests snapshots (`.cov-map`) can observe the order and
59-
// would need to be re-blessed if it changes. As long as those requirements
60-
// are satisfied, the order can be arbitrary.
61-
let mut global_file_table = GlobalFileTable::new();
62-
6356
let mut covfun_records = instances_used
6457
.iter()
6558
.copied()
6659
// Sort by symbol name, so that the global file table is built in an
6760
// order that doesn't depend on the stable-hash-based order in which
6861
// instances were visited during codegen.
6962
.sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
70-
.filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true))
63+
.filter_map(|instance| prepare_covfun_record(tcx, instance, true))
7164
.collect::<Vec<_>>();
7265

7366
// In a single designated CGU, also prepare covfun records for functions
7467
// in this crate that were instrumented for coverage, but are unused.
7568
if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
76-
unused::prepare_covfun_records_for_unused_functions(
77-
cx,
78-
&mut global_file_table,
79-
&mut covfun_records,
80-
);
69+
unused::prepare_covfun_records_for_unused_functions(cx, &mut covfun_records);
8170
}
8271

8372
// If there are no covfun records for this CGU, don't generate a covmap record.
@@ -89,68 +78,88 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
8978
return;
9079
}
9180

92-
// Encode all filenames referenced by coverage mappings in this CGU.
93-
let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
94-
// The `llvm-cov` tool uses this hash to associate each covfun record with
95-
// its corresponding filenames table, since the final binary will typically
96-
// contain multiple covmap records from different compilation units.
97-
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
81+
// Prepare the global file table for this CGU, containing all paths needed
82+
// by one or more covfun records.
83+
let global_file_table =
84+
GlobalFileTable::build(tcx, covfun_records.iter().flat_map(|c| c.all_source_files()));
9885

9986
for covfun in &covfun_records {
100-
covfun::generate_covfun_record(cx, filenames_hash, covfun)
87+
covfun::generate_covfun_record(cx, &global_file_table, covfun)
10188
}
10289

10390
// Generate the coverage map header, which contains the filenames used by
10491
// this CGU's coverage mappings, and store it in a well-known global.
10592
// (This is skipped if we returned early due to having no covfun records.)
106-
generate_covmap_record(cx, covmap_version, &filenames_buffer);
93+
generate_covmap_record(cx, covmap_version, &global_file_table.filenames_buffer);
10794
}
10895

109-
/// Maps "global" (per-CGU) file ID numbers to their underlying source files.
96+
/// Maps "global" (per-CGU) file ID numbers to their underlying source file paths.
97+
#[derive(Debug)]
11098
struct GlobalFileTable {
11199
/// This "raw" table doesn't include the working dir, so a file's
112100
/// global ID is its index in this set **plus one**.
113-
raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>,
101+
raw_file_table: FxIndexMap<StableSourceFileId, String>,
102+
103+
/// The file table in encoded form (possibly compressed), which can be
104+
/// included directly in this CGU's `__llvm_covmap` record.
105+
filenames_buffer: Vec<u8>,
106+
107+
/// Truncated hash of the bytes in `filenames_buffer`.
108+
///
109+
/// The `llvm-cov` tool uses this hash to associate each covfun record with
110+
/// its corresponding filenames table, since the final binary will typically
111+
/// contain multiple covmap records from different compilation units.
112+
filenames_hash: u64,
114113
}
115114

116115
impl GlobalFileTable {
117-
fn new() -> Self {
118-
Self { raw_file_table: FxIndexMap::default() }
119-
}
116+
/// Builds a "global file table" for this CGU, mapping numeric IDs to
117+
/// path strings.
118+
fn build<'a>(tcx: TyCtxt<'_>, all_files: impl Iterator<Item = &'a SourceFile>) -> Self {
119+
let mut raw_file_table = FxIndexMap::default();
120+
121+
for file in all_files {
122+
raw_file_table.entry(file.stable_id).or_insert_with(|| {
123+
file.name
124+
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
125+
.to_string_lossy()
126+
.into_owned()
127+
});
128+
}
129+
130+
// FIXME(Zalathar): Consider sorting the file table here, but maybe
131+
// only after adding filename support to coverage-dump, so that the
132+
// table order isn't directly visible in `.coverage-map` snapshots.
133+
134+
let mut table = Vec::with_capacity(raw_file_table.len() + 1);
135+
136+
// Since version 6 of the LLVM coverage mapping format, the first entry
137+
// in the global file table is treated as a base directory, used to
138+
// resolve any other entries that are stored as relative paths.
139+
let base_dir = tcx
140+
.sess
141+
.opts
142+
.working_dir
143+
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
144+
.to_string_lossy();
145+
table.push(base_dir.as_ref());
120146

121-
fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
122-
// Ensure the given file has a table entry, and get its index.
123-
let entry = self.raw_file_table.entry(file.stable_id);
124-
let raw_id = entry.index();
125-
entry.or_insert_with(|| Arc::clone(file));
126-
127-
// The raw file table doesn't include an entry for the working dir
128-
// (which has ID 0), so add 1 to get the correct ID.
129-
GlobalFileId::from_usize(raw_id + 1)
130-
}
147+
// Add the regular entries after the base directory.
148+
table.extend(raw_file_table.values().map(|name| name.as_str()));
131149

132-
fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
133-
let mut table = Vec::with_capacity(self.raw_file_table.len() + 1);
134-
135-
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
136-
// requires setting the first filename to the compilation directory.
137-
// Since rustc generates coverage maps with relative paths, the
138-
// compilation directory can be combined with the relative paths
139-
// to get absolute paths, if needed.
140-
table.push(
141-
tcx.sess
142-
.opts
143-
.working_dir
144-
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
145-
.to_string_lossy(),
146-
);
150+
// Encode the file table into a buffer, and get the hash of its encoded
151+
// bytes, so that we can embed that hash in `__llvm_covfun` records.
152+
let filenames_buffer = llvm_cov::write_filenames_to_buffer(&table);
153+
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
147154

148-
// Add the regular entries after the base directory.
149-
table.extend(self.raw_file_table.values().map(|file| {
150-
file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
151-
}));
155+
Self { raw_file_table, filenames_buffer, filenames_hash }
156+
}
152157

153-
llvm_cov::write_filenames_to_buffer(&table)
158+
fn get_existing_id(&self, file: &SourceFile) -> Option<GlobalFileId> {
159+
let raw_id = self.raw_file_table.get_index_of(&file.stable_id)?;
160+
// The raw file table doesn't include an entry for the base dir
161+
// (which has ID 0), so add 1 to get the correct ID.
162+
Some(GlobalFileId::from_usize(raw_id + 1))
154163
}
155164
}
156165

@@ -166,26 +175,31 @@ rustc_index::newtype_index! {
166175
struct LocalFileId {}
167176
}
168177

169-
/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
170-
/// file IDs.
178+
/// Holds a mapping from "local" (per-function) file IDs to their corresponding
179+
/// source files.
171180
#[derive(Debug, Default)]
172181
struct VirtualFileMapping {
173-
local_to_global: IndexVec<LocalFileId, GlobalFileId>,
174-
global_to_local: FxIndexMap<GlobalFileId, LocalFileId>,
182+
local_file_table: IndexVec<LocalFileId, Arc<SourceFile>>,
175183
}
176184

177185
impl VirtualFileMapping {
178-
fn local_id_for_global(&mut self, global_file_id: GlobalFileId) -> LocalFileId {
179-
*self
180-
.global_to_local
181-
.entry(global_file_id)
182-
.or_insert_with(|| self.local_to_global.push(global_file_id))
186+
fn push_file(&mut self, source_file: &Arc<SourceFile>) -> LocalFileId {
187+
self.local_file_table.push(Arc::clone(source_file))
183188
}
184189

185-
fn to_vec(&self) -> Vec<u32> {
186-
// This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`,
187-
// but it isn't hot or expensive enough to justify the extra unsafety.
188-
self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect()
190+
/// Resolves all of the filenames in this local file mapping to a list of
191+
/// global file IDs in its CGU, for inclusion in this function's
192+
/// `__llvm_covfun` record.
193+
///
194+
/// The global file IDs are returned as `u32` to make FFI easier.
195+
fn resolve_all(&self, global_file_table: &GlobalFileTable) -> Option<Vec<u32>> {
196+
self.local_file_table
197+
.iter()
198+
.map(|file| try {
199+
let id = global_file_table.get_existing_id(file)?;
200+
GlobalFileId::as_u32(id)
201+
})
202+
.collect::<Option<Vec<_>>>()
189203
}
190204
}
191205

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs

+26-16
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
//! [^win]: On Windows the section name is `.lcovfun`.
66
77
use std::ffi::CString;
8+
use std::sync::Arc;
89

910
use rustc_abi::Align;
1011
use rustc_codegen_ssa::traits::{
@@ -15,7 +16,7 @@ use rustc_middle::mir::coverage::{
1516
MappingKind, Op,
1617
};
1718
use rustc_middle::ty::{Instance, TyCtxt};
18-
use rustc_span::Span;
19+
use rustc_span::{SourceFile, Span};
1920
use rustc_target::spec::HasTargetSpec;
2021
use tracing::debug;
2122

@@ -37,9 +38,16 @@ pub(crate) struct CovfunRecord<'tcx> {
3738
regions: ffi::Regions,
3839
}
3940

41+
impl<'tcx> CovfunRecord<'tcx> {
42+
/// Iterator that yields all source files referred to by this function's
43+
/// coverage mappings. Used to build the global file table for the CGU.
44+
pub(crate) fn all_source_files(&self) -> impl Iterator<Item = &SourceFile> {
45+
self.virtual_file_mapping.local_file_table.iter().map(Arc::as_ref)
46+
}
47+
}
48+
4049
pub(crate) fn prepare_covfun_record<'tcx>(
4150
tcx: TyCtxt<'tcx>,
42-
global_file_table: &mut GlobalFileTable,
4351
instance: Instance<'tcx>,
4452
is_used: bool,
4553
) -> Option<CovfunRecord<'tcx>> {
@@ -57,7 +65,7 @@ pub(crate) fn prepare_covfun_record<'tcx>(
5765
regions: ffi::Regions::default(),
5866
};
5967

60-
fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
68+
fill_region_tables(tcx, fn_cov_info, ids_info, &mut covfun);
6169

6270
if covfun.regions.has_no_regions() {
6371
debug!(?covfun, "function has no mappings to embed; skipping");
@@ -92,7 +100,6 @@ fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression
92100
/// Populates the mapping region tables in the current function's covfun record.
93101
fn fill_region_tables<'tcx>(
94102
tcx: TyCtxt<'tcx>,
95-
global_file_table: &mut GlobalFileTable,
96103
fn_cov_info: &'tcx FunctionCoverageInfo,
97104
ids_info: &'tcx CoverageIdsInfo,
98105
covfun: &mut CovfunRecord<'tcx>,
@@ -106,11 +113,7 @@ fn fill_region_tables<'tcx>(
106113
};
107114
let source_file = source_map.lookup_source_file(first_span.lo());
108115

109-
// Look up the global file ID for that file.
110-
let global_file_id = global_file_table.global_file_id_for_file(&source_file);
111-
112-
// Associate that global file ID with a local file ID for this function.
113-
let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
116+
let local_file_id = covfun.virtual_file_mapping.push_file(&source_file);
114117

115118
// In rare cases, _all_ of a function's spans are discarded, and coverage
116119
// codegen needs to handle that gracefully to avoid #133606.
@@ -179,7 +182,7 @@ fn fill_region_tables<'tcx>(
179182
/// as a global variable in the `__llvm_covfun` section.
180183
pub(crate) fn generate_covfun_record<'tcx>(
181184
cx: &CodegenCx<'_, 'tcx>,
182-
filenames_hash: u64,
185+
global_file_table: &GlobalFileTable,
183186
covfun: &CovfunRecord<'tcx>,
184187
) {
185188
let &CovfunRecord {
@@ -191,12 +194,19 @@ pub(crate) fn generate_covfun_record<'tcx>(
191194
ref regions,
192195
} = covfun;
193196

197+
let Some(local_file_table) = virtual_file_mapping.resolve_all(global_file_table) else {
198+
debug_assert!(
199+
false,
200+
"all local files should be present in the global file table: \
201+
global_file_table = {global_file_table:?}, \
202+
virtual_file_mapping = {virtual_file_mapping:?}"
203+
);
204+
return;
205+
};
206+
194207
// Encode the function's coverage mappings into a buffer.
195-
let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer(
196-
&virtual_file_mapping.to_vec(),
197-
expressions,
198-
regions,
199-
);
208+
let coverage_mapping_buffer =
209+
llvm_cov::write_function_mappings_to_buffer(&local_file_table, expressions, regions);
200210

201211
// A covfun record consists of four target-endian integers, followed by the
202212
// encoded mapping data in bytes. Note that the length field is 32 bits.
@@ -209,7 +219,7 @@ pub(crate) fn generate_covfun_record<'tcx>(
209219
cx.const_u64(func_name_hash),
210220
cx.const_u32(coverage_mapping_buffer.len() as u32),
211221
cx.const_u64(source_hash),
212-
cx.const_u64(filenames_hash),
222+
cx.const_u64(global_file_table.filenames_hash),
213223
cx.const_bytes(&coverage_mapping_buffer),
214224
],
215225
// This struct needs to be packed, so that the 32-bit length field

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use rustc_middle::ty::{self, TyCtxt};
77
use rustc_span::def_id::DefIdSet;
88

99
use crate::common::CodegenCx;
10-
use crate::coverageinfo::mapgen::GlobalFileTable;
1110
use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record};
1211
use crate::llvm;
1312

@@ -21,7 +20,6 @@ use crate::llvm;
2120
/// its embedded coverage data.
2221
pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
2322
cx: &CodegenCx<'_, 'tcx>,
24-
global_file_table: &mut GlobalFileTable,
2523
covfun_records: &mut Vec<CovfunRecord<'tcx>>,
2624
) {
2725
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
@@ -33,7 +31,7 @@ pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
3331
// Try to create a covfun record for each unused function.
3432
let mut name_globals = Vec::with_capacity(unused_instances.len());
3533
covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try {
36-
let record = prepare_covfun_record(cx.tcx, global_file_table, unused.instance, false)?;
34+
let record = prepare_covfun_record(cx.tcx, unused.instance, false)?;
3735
// If successful, also store its symbol name in a global constant.
3836
name_globals.push(cx.const_str(unused.symbol_name.name).0);
3937
record

0 commit comments

Comments
 (0)