diff --git a/Cargo.lock b/Cargo.lock index 232c9c9441fb4..35965d9cc48d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3595,6 +3595,7 @@ version = "0.0.0" dependencies = [ "bitflags 1.3.2", "cstr", + "itertools", "libc", "measureme", "object", diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index be09820d08da2..e864337e5dcb7 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -9,6 +9,7 @@ test = false [dependencies] bitflags = "1.0" cstr = "0.2" +itertools = "0.10.5" libc = "0.2" measureme = "10.0.0" object = { version = "0.32.0", default-features = false, features = [ diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs index 84319b4ba2d38..d210b5266e346 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs @@ -1,16 +1,18 @@ use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind}; +use rustc_data_structures::captures::Captures; use rustc_data_structures::fx::FxIndexSet; use rustc_index::bit_set::BitSet; use rustc_middle::mir::coverage::{ CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op, }; use rustc_middle::ty::Instance; +use rustc_span::Symbol; /// Holds all of the coverage mapping data associated with a function instance, /// collected during traversal of `Coverage` statements in the function's MIR. #[derive(Debug)] -pub struct FunctionCoverage<'tcx> { +pub struct FunctionCoverageCollector<'tcx> { /// Coverage info that was attached to this function by the instrumentor. function_coverage_info: &'tcx FunctionCoverageInfo, is_used: bool, @@ -26,7 +28,7 @@ pub struct FunctionCoverage<'tcx> { expressions_seen: BitSet, } -impl<'tcx> FunctionCoverage<'tcx> { +impl<'tcx> FunctionCoverageCollector<'tcx> { /// Creates a new set of coverage data for a used (called) function. pub fn new( instance: Instance<'tcx>, @@ -76,11 +78,6 @@ impl<'tcx> FunctionCoverage<'tcx> { } } - /// Returns true for a used (called) function, and false for an unused function. - pub fn is_used(&self) -> bool { - self.is_used - } - /// Marks a counter ID as having been seen in a counter-increment statement. #[instrument(level = "debug", skip(self))] pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) { @@ -165,64 +162,76 @@ impl<'tcx> FunctionCoverage<'tcx> { ZeroExpressions(zero_expressions) } + pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> { + FunctionCoverage::from_collector(self) + } +} + +pub(crate) struct FunctionCoverage<'tcx> { + function_coverage_info: &'tcx FunctionCoverageInfo, + is_used: bool, + + counters_seen: BitSet, + zero_expressions: ZeroExpressions, +} + +impl<'tcx> FunctionCoverage<'tcx> { + fn from_collector(collector: FunctionCoverageCollector<'tcx>) -> Self { + let zero_expressions = collector.identify_zero_expressions(); + let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = + collector; + + Self { function_coverage_info, is_used, counters_seen, zero_expressions } + } + + /// Returns true for a used (called) function, and false for an unused function. + pub(crate) fn is_used(&self) -> bool { + self.is_used + } + /// Return the source hash, generated from the HIR node structure, and used to indicate whether /// or not the source code structure changed between different compilations. pub fn source_hash(&self) -> u64 { if self.is_used { self.function_coverage_info.function_source_hash } else { 0 } } - /// Generate an array of CounterExpressions, and an iterator over all `Counter`s and their - /// associated `Regions` (from which the LLVM-specific `CoverageMapGenerator` will create - /// `CounterMappingRegion`s. - pub fn get_expressions_and_counter_regions( - &self, - ) -> (Vec, impl Iterator) { - let zero_expressions = self.identify_zero_expressions(); - - let counter_expressions = self.counter_expressions(&zero_expressions); - // Expression IDs are indices into `self.expressions`, and on the LLVM - // side they will be treated as indices into `counter_expressions`, so - // the two vectors should correspond 1:1. - assert_eq!(self.function_coverage_info.expressions.len(), counter_expressions.len()); - - let counter_regions = self.counter_regions(zero_expressions); - - (counter_expressions, counter_regions) + /// Returns an iterator over all filenames used by this function's mappings. + pub(crate) fn all_file_names(&self) -> impl Iterator + Captures<'_> { + self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name) } /// Convert this function's coverage expression data into a form that can be /// passed through FFI to LLVM. - fn counter_expressions(&self, zero_expressions: &ZeroExpressions) -> Vec { + pub(crate) fn counter_expressions( + &self, + ) -> impl Iterator + ExactSizeIterator + Captures<'_> { // We know that LLVM will optimize out any unused expressions before // producing the final coverage map, so there's no need to do the same // thing on the Rust side unless we're confident we can do much better. // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.) let counter_from_operand = |operand: CovTerm| match operand { - CovTerm::Expression(id) if zero_expressions.contains(id) => Counter::ZERO, + CovTerm::Expression(id) if self.zero_expressions.contains(id) => Counter::ZERO, _ => Counter::from_term(operand), }; - self.function_coverage_info - .expressions - .iter() - .map(|&Expression { lhs, op, rhs }| CounterExpression { + self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| { + CounterExpression { lhs: counter_from_operand(lhs), kind: match op { Op::Add => ExprKind::Add, Op::Subtract => ExprKind::Subtract, }, rhs: counter_from_operand(rhs), - }) - .collect::>() + } + }) } /// Converts this function's coverage mappings into an intermediate form /// that will be used by `mapgen` when preparing for FFI. - fn counter_regions( + pub(crate) fn counter_regions( &self, - zero_expressions: ZeroExpressions, - ) -> impl Iterator { + ) -> impl Iterator + ExactSizeIterator { // Historically, mappings were stored directly in counter/expression // statements in MIR, and MIR optimizations would sometimes remove them. // That's mostly no longer true, so now we detect cases where that would @@ -230,7 +239,7 @@ impl<'tcx> FunctionCoverage<'tcx> { let counter_for_term = move |term: CovTerm| { let force_to_zero = match term { CovTerm::Counter(id) => !self.counters_seen.contains(id), - CovTerm::Expression(id) => zero_expressions.contains(id), + CovTerm::Expression(id) => self.zero_expressions.contains(id), CovTerm::Zero => false, }; if force_to_zero { Counter::ZERO } else { Counter::from_term(term) } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index f75add90aa2cf..ba750dea46052 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -1,17 +1,21 @@ use crate::common::CodegenCx; use crate::coverageinfo; use crate::coverageinfo::ffi::CounterMappingRegion; -use crate::coverageinfo::map_data::FunctionCoverage; +use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector}; use crate::llvm; -use rustc_codegen_ssa::traits::ConstMethods; -use rustc_data_structures::fx::FxIndexSet; +use itertools::Itertools as _; +use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, StaticMethods}; +use rustc_data_structures::fx::{FxIndexMap, FxIndexSet}; use rustc_hir::def::DefKind; use rustc_hir::def_id::DefId; use rustc_index::IndexVec; use rustc_middle::bug; +use rustc_middle::mir; use rustc_middle::mir::coverage::CodeRegion; use rustc_middle::ty::{self, TyCtxt}; +use rustc_session::RemapFileNameExt; +use rustc_span::def_id::DefIdSet; use rustc_span::Symbol; /// Generates and exports the Coverage Map. @@ -55,11 +59,25 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { return; } - let mut global_file_table = GlobalFileTable::new(tcx); + let function_coverage_entries = function_coverage_map + .into_iter() + .map(|(instance, function_coverage)| (instance, function_coverage.into_finished())) + .collect::>(); + + let all_file_names = + function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names()); + let global_file_table = GlobalFileTable::new(tcx, all_file_names); + + // Encode all filenames referenced by counters/expressions in this module + let filenames_buffer = global_file_table.filenames_buffer(); + save_covmap_filenames_record(cx, version, filenames_buffer); + let filenames_ref = coverageinfo::hash_bytes(filenames_buffer); + + let mut unused_function_names = Vec::new(); + let covfun_section_name = coverageinfo::covfun_section_name(cx); // Encode coverage mappings and generate function records - let mut function_data = Vec::new(); - for (instance, function_coverage) in function_coverage_map { + for (instance, function_coverage) in function_coverage_entries { debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance); let mangled_function_name = tcx.symbol_name(instance).name; @@ -67,7 +85,7 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { let is_used = function_coverage.is_used(); let coverage_mapping_buffer = - encode_mappings_for_function(&mut global_file_table, &function_coverage); + encode_mappings_for_function(&global_file_table, &function_coverage); if coverage_mapping_buffer.is_empty() { if function_coverage.is_used() { @@ -81,21 +99,10 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { } } - function_data.push((mangled_function_name, source_hash, is_used, coverage_mapping_buffer)); - } - - // Encode all filenames referenced by counters/expressions in this module - let filenames_buffer = global_file_table.into_filenames_buffer(); - - let filenames_size = filenames_buffer.len(); - let filenames_val = cx.const_bytes(&filenames_buffer); - let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer); - - // Generate the LLVM IR representation of the coverage map and store it in a well-known global - let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val); + if !is_used { + unused_function_names.push(mangled_function_name); + } - let covfun_section_name = coverageinfo::covfun_section_name(cx); - for (mangled_function_name, source_hash, is_used, coverage_mapping_buffer) in function_data { save_function_record( cx, &covfun_section_name, @@ -107,44 +114,111 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { ); } - // Save the coverage data value to LLVM IR - coverageinfo::save_cov_data_to_mod(cx, cov_data_val); + // For unused functions, we need to take their mangled names and store them + // in a specially-named global array. LLVM's `InstrProfiling` pass will + // detect this global and include those names in its `__llvm_prf_names` + // section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.) + if !unused_function_names.is_empty() { + assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); + + let name_globals = unused_function_names + .into_iter() + .map(|mangled_function_name| cx.const_str(mangled_function_name).0) + .collect::>(); + let initializer = cx.const_array(cx.type_ptr(), &name_globals); + + let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), "__llvm_coverage_names"); + llvm::set_global_constant(array, true); + llvm::set_linkage(array, llvm::Linkage::InternalLinkage); + llvm::set_initializer(array, initializer); + } } +/// Maps "global" (per-CGU) file ID numbers to their underlying filenames. struct GlobalFileTable { - global_file_table: FxIndexSet, + /// This "raw" table doesn't include the working dir, so a filename's + /// global ID is its index in this set **plus one**. + raw_file_table: FxIndexSet, + filenames_buffer: Vec, } impl GlobalFileTable { - fn new(tcx: TyCtxt<'_>) -> Self { - let mut global_file_table = FxIndexSet::default(); + fn new(tcx: TyCtxt<'_>, all_file_names: impl IntoIterator) -> Self { // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5) // requires setting the first filename to the compilation directory. // Since rustc generates coverage maps with relative paths, the // compilation directory can be combined with the relative paths // to get absolute paths, if needed. - use rustc_session::RemapFileNameExt; - let working_dir = - Symbol::intern(&tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy()); - global_file_table.insert(working_dir); - Self { global_file_table } - } - - fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> u32 { - let (global_file_id, _) = self.global_file_table.insert_full(file_name); - global_file_id as u32 - } + let working_dir: &str = &tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy(); + + // Prepare a map from filename symbols to their underlying strings, so + // that we can sort by the strings. + let mut raw_file_table = FxIndexMap::::default(); + // Filenames usually come in contiguous runs, so dedup to save work. + let all_file_names = all_file_names.into_iter().dedup().collect::>(); + for file_name in &all_file_names { + raw_file_table.entry(*file_name).or_insert_with(|| file_name.as_str()); + } - fn into_filenames_buffer(self) -> Vec { - // This method takes `self` so that the caller can't accidentally - // modify the original file table after encoding it into a buffer. + // Sort the file table by its actual string values, not the arbitrary + // ordering of its symbols. + raw_file_table.sort_unstable_by(|_, a, _, b| str::cmp(a, b)); - llvm::build_byte_buffer(|buffer| { + // Build the LLVM filenames buffer ahead of time, so that we can discard + // the string references afterwards. + let filenames_buffer = llvm::build_byte_buffer(|buffer| { coverageinfo::write_filenames_section_to_buffer( - self.global_file_table.iter().map(Symbol::as_str), + // Insert the working dir at index 0, before the other filenames. + std::iter::once(working_dir).chain(raw_file_table.values().copied()), buffer, ); - }) + }); + + // Discard the string reference values, leaving only a set of symbols. + let raw_file_table = raw_file_table.into_keys().collect::>(); + + Self { raw_file_table, filenames_buffer } + } + + fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 { + let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| { + bug!("file name not found in prepared global file table: {file_name}"); + }); + // The raw file table doesn't include an entry for the working dir + // (which has ID 0), so add 1 to get the correct ID. + (raw_id + 1) as u32 + } + + fn filenames_buffer(&self) -> &[u8] { + &self.filenames_buffer + } +} + +rustc_index::newtype_index! { + // Tell the newtype macro to not generate `Encode`/`Decode` impls. + #[custom_encodable] + #[max = 0xFFFF_FFFF] + struct LocalFileId {} +} + +/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU) +/// file IDs. +#[derive(Default)] +struct VirtualFileMapping { + local_to_global: IndexVec, + global_to_local: FxIndexMap, +} + +impl VirtualFileMapping { + fn local_id_for_global(&mut self, global_file_id: u32) -> LocalFileId { + *self + .global_to_local + .entry(global_file_id) + .or_insert_with(|| self.local_to_global.push(global_file_id)) + } + + fn into_vec(self) -> Vec { + self.local_to_global.raw } } @@ -154,44 +228,42 @@ impl GlobalFileTable { /// /// Newly-encountered filenames will be added to the global file table. fn encode_mappings_for_function( - global_file_table: &mut GlobalFileTable, + global_file_table: &GlobalFileTable, function_coverage: &FunctionCoverage<'_>, ) -> Vec { - let (expressions, counter_regions) = function_coverage.get_expressions_and_counter_regions(); - - let mut counter_regions = counter_regions.collect::>(); + let counter_regions = function_coverage.counter_regions(); if counter_regions.is_empty() { return Vec::new(); } - let mut virtual_file_mapping = IndexVec::::new(); + let expressions = function_coverage.counter_expressions().collect::>(); + + let mut virtual_file_mapping = VirtualFileMapping::default(); let mut mapping_regions = Vec::with_capacity(counter_regions.len()); - // Sort and group the list of (counter, region) mapping pairs by filename. - // (Preserve any further ordering imposed by `FunctionCoverage`.) + // Group mappings into runs with the same filename, preserving the order + // yielded by `FunctionCoverage`. // Prepare file IDs for each filename, and prepare the mapping data so that // we can pass it through FFI to LLVM. - counter_regions.sort_by_key(|(_counter, region)| region.file_name); - for counter_regions_for_file in - counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name) + for (file_name, counter_regions_for_file) in + &counter_regions.group_by(|(_counter, region)| region.file_name) { - // Look up (or allocate) the global file ID for this filename. - let file_name = counter_regions_for_file[0].1.file_name; + // Look up the global file ID for this filename. let global_file_id = global_file_table.global_file_id_for_file_name(file_name); // Associate that global file ID with a local file ID for this function. - let local_file_id: u32 = virtual_file_mapping.push(global_file_id); - debug!(" file id: local {local_file_id} => global {global_file_id} = '{file_name:?}'"); + let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id); + debug!(" file id: {local_file_id:?} => global {global_file_id} = '{file_name:?}'"); // For each counter/region pair in this function+file, convert it to a // form suitable for FFI. - for &(counter, region) in counter_regions_for_file { + for (counter, region) in counter_regions_for_file { let CodeRegion { file_name: _, start_line, start_col, end_line, end_col } = *region; debug!("Adding counter {counter:?} to map for {region:?}"); mapping_regions.push(CounterMappingRegion::code_region( counter, - local_file_id, + local_file_id.as_u32(), start_line, start_col, end_line, @@ -203,7 +275,7 @@ fn encode_mappings_for_function( // Encode the function's coverage mappings into a buffer. llvm::build_byte_buffer(|buffer| { coverageinfo::write_mapping_to_buffer( - virtual_file_mapping.raw, + virtual_file_mapping.into_vec(), expressions, mapping_regions, buffer, @@ -211,15 +283,8 @@ fn encode_mappings_for_function( }) } -/// Construct coverage map header and the array of function records, and combine them into the -/// coverage map. Save the coverage map data into the LLVM IR as a static global using a -/// specific, well-known section and name. -fn generate_coverage_map<'ll>( - cx: &CodegenCx<'ll, '_>, - version: u32, - filenames_size: usize, - filenames_val: &'ll llvm::Value, -) -> &'ll llvm::Value { +fn save_covmap_filenames_record(cx: &CodegenCx<'_, '_>, version: u32, filenames_buffer: &[u8]) { + let filenames_size = filenames_buffer.len(); debug!("cov map: filenames_size = {}, 0-based version = {}", filenames_size, version); // Create the coverage data header (Note, fields 0 and 2 are now always zero, @@ -234,7 +299,30 @@ fn generate_coverage_map<'ll>( ); // Create the complete LLVM coverage data value to add to the LLVM IR - cx.const_struct(&[cov_data_header_val, filenames_val], /*packed=*/ false) + let covmap_val = cx.const_struct( + &[cov_data_header_val, cx.const_bytes(filenames_buffer)], + /*packed=*/ false, + ); + + let covmap_var_name = llvm::build_string(|s| unsafe { + llvm::LLVMRustCoverageWriteMappingVarNameToString(s); + }) + .expect("Rust Coverage Mapping var name failed UTF-8 conversion"); + debug!("covmap var name: {:?}", covmap_var_name); + + let covmap_section_name = llvm::build_string(|s| unsafe { + llvm::LLVMRustCoverageWriteMapSectionNameToString(cx.llmod, s); + }) + .expect("Rust Coverage section name failed UTF-8 conversion"); + debug!("covmap section name: {:?}", covmap_section_name); + + let llglobal = llvm::add_global(cx.llmod, cx.val_ty(covmap_val), &covmap_var_name); + llvm::set_initializer(llglobal, covmap_val); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage); + llvm::set_section(llglobal, &covmap_section_name); + llvm::set_alignment(llglobal, coverageinfo::VAR_ALIGN_BYTES); + cx.add_used_global(llglobal); } /// Construct a function record and combine it with the function's coverage mapping data. @@ -287,13 +375,12 @@ fn save_function_record( /// `-Clink-dead-code` will not generate code for unused generic functions.) /// /// We can find the unused functions (including generic functions) by the set difference of all MIR -/// `DefId`s (`tcx` query `mir_keys`) minus the codegenned `DefId`s (`tcx` query -/// `codegened_and_inlined_items`). +/// `DefId`s (`tcx` query `mir_keys`) minus the codegenned `DefId`s (`codegenned_and_inlined_items`). /// -/// These unused functions are then codegen'd in one of the CGUs which is marked as the -/// "code coverage dead code cgu" during the partitioning process. This prevents us from generating -/// code regions for the same function more than once which can lead to linker errors regarding -/// duplicate symbols. +/// These unused functions don't need to be codegenned, but we do need to add them to the function +/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them. +/// We also end up adding their symbol names to a special global array that LLVM will include in +/// its embedded coverage data. fn add_unused_functions(cx: &CodegenCx<'_, '_>) { assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); @@ -324,19 +411,80 @@ fn add_unused_functions(cx: &CodegenCx<'_, '_>) { }) .collect(); - let codegenned_def_ids = tcx.codegened_and_inlined_items(()); + let codegenned_def_ids = codegenned_and_inlined_items(tcx); - for non_codegenned_def_id in - eligible_def_ids.into_iter().filter(|id| !codegenned_def_ids.contains(id)) - { + // For each `DefId` that should have coverage instrumentation but wasn't + // codegenned, add it to the function coverage map as an unused function. + for def_id in eligible_def_ids.into_iter().filter(|id| !codegenned_def_ids.contains(id)) { // Skip any function that didn't have coverage data added to it by the // coverage instrumentor. - let body = tcx.instance_mir(ty::InstanceDef::Item(non_codegenned_def_id)); + let body = tcx.instance_mir(ty::InstanceDef::Item(def_id)); let Some(function_coverage_info) = body.function_coverage_info.as_deref() else { continue; }; - debug!("generating unused fn: {:?}", non_codegenned_def_id); - cx.define_unused_fn(non_codegenned_def_id, function_coverage_info); + debug!("generating unused fn: {def_id:?}"); + let instance = declare_unused_fn(tcx, def_id); + add_unused_function_coverage(cx, instance, function_coverage_info); + } +} + +/// All items participating in code generation together with (instrumented) +/// items inlined into them. +fn codegenned_and_inlined_items(tcx: TyCtxt<'_>) -> DefIdSet { + let (items, cgus) = tcx.collect_and_partition_mono_items(()); + let mut visited = DefIdSet::default(); + let mut result = items.clone(); + + for cgu in cgus { + for item in cgu.items().keys() { + if let mir::mono::MonoItem::Fn(ref instance) = item { + let did = instance.def_id(); + if !visited.insert(did) { + continue; + } + let body = tcx.instance_mir(instance.def); + for block in body.basic_blocks.iter() { + for statement in &block.statements { + let mir::StatementKind::Coverage(_) = statement.kind else { continue }; + let scope = statement.source_info.scope; + if let Some(inlined) = scope.inlined_instance(&body.source_scopes) { + result.insert(inlined.def_id()); + } + } + } + } + } + } + + result +} + +fn declare_unused_fn<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> ty::Instance<'tcx> { + ty::Instance::new( + def_id, + ty::GenericArgs::for_item(tcx, def_id, |param, _| { + if let ty::GenericParamDefKind::Lifetime = param.kind { + tcx.lifetimes.re_erased.into() + } else { + tcx.mk_param_from_def(param) + } + }), + ) +} + +fn add_unused_function_coverage<'tcx>( + cx: &CodegenCx<'_, 'tcx>, + instance: ty::Instance<'tcx>, + function_coverage_info: &'tcx mir::coverage::FunctionCoverageInfo, +) { + // An unused function's mappings will automatically be rewritten to map to + // zero, because none of its counters/expressions are marked as seen. + let function_coverage = FunctionCoverageCollector::unused(instance, function_coverage_info); + + if let Some(coverage_context) = cx.coverage_context() { + coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage); + } else { + bug!("Could not get the `coverage_context`"); } } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs index 204a73b788a6d..122a672fcada3 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -1,10 +1,9 @@ use crate::llvm; -use crate::abi::Abi; use crate::builder::Builder; use crate::common::CodegenCx; use crate::coverageinfo::ffi::{CounterExpression, CounterMappingRegion}; -use crate::coverageinfo::map_data::FunctionCoverage; +use crate::coverageinfo::map_data::FunctionCoverageCollector; use libc::c_uint; use rustc_codegen_ssa::traits::{ @@ -12,17 +11,12 @@ use rustc_codegen_ssa::traits::{ StaticMethods, }; use rustc_data_structures::fx::FxHashMap; -use rustc_hir as hir; -use rustc_hir::def_id::DefId; use rustc_llvm::RustString; use rustc_middle::bug; -use rustc_middle::mir::coverage::{CounterId, CoverageKind, FunctionCoverageInfo}; +use rustc_middle::mir::coverage::CoverageKind; use rustc_middle::mir::Coverage; -use rustc_middle::ty; -use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt}; -use rustc_middle::ty::GenericArgs; +use rustc_middle::ty::layout::HasTyCtxt; use rustc_middle::ty::Instance; -use rustc_middle::ty::Ty; use std::cell::RefCell; @@ -30,14 +24,13 @@ pub(crate) mod ffi; pub(crate) mod map_data; pub mod mapgen; -const UNUSED_FUNCTION_COUNTER_ID: CounterId = CounterId::START; - const VAR_ALIGN_BYTES: usize = 8; /// A context object for maintaining all state needed by the coverageinfo module. pub struct CrateCoverageContext<'ll, 'tcx> { /// Coverage data for each instrumented function identified by DefId. - pub(crate) function_coverage_map: RefCell, FunctionCoverage<'tcx>>>, + pub(crate) function_coverage_map: + RefCell, FunctionCoverageCollector<'tcx>>>, pub(crate) pgo_func_name_var_map: RefCell, &'ll llvm::Value>>, } @@ -49,7 +42,9 @@ impl<'ll, 'tcx> CrateCoverageContext<'ll, 'tcx> { } } - pub fn take_function_coverage_map(&self) -> FxHashMap, FunctionCoverage<'tcx>> { + pub fn take_function_coverage_map( + &self, + ) -> FxHashMap, FunctionCoverageCollector<'tcx>> { self.function_coverage_map.replace(FxHashMap::default()) } } @@ -76,25 +71,6 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { bug!("Could not get the `coverage_context`"); } } - - /// Functions with MIR-based coverage are normally codegenned _only_ if - /// called. LLVM coverage tools typically expect every function to be - /// defined (even if unused), with at least one call to LLVM intrinsic - /// `instrprof.increment`. - /// - /// Codegen a small function that will never be called, with one counter - /// that will never be incremented. - /// - /// For used/called functions, the coverageinfo was already added to the - /// `function_coverage_map` (keyed by function `Instance`) during codegen. - /// But in this case, since the unused function was _not_ previously - /// codegenned, collect the function coverage info from MIR and add an - /// "unused" entry to the function coverage map. - fn define_unused_fn(&self, def_id: DefId, function_coverage_info: &'tcx FunctionCoverageInfo) { - let instance = declare_unused_fn(self, def_id); - codegen_unused_fn_and_counter(self, instance); - add_unused_function_coverage(self, instance, function_coverage_info); - } } impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { @@ -120,7 +96,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { let mut coverage_map = coverage_context.function_coverage_map.borrow_mut(); let func_coverage = coverage_map .entry(instance) - .or_insert_with(|| FunctionCoverage::new(instance, function_coverage_info)); + .or_insert_with(|| FunctionCoverageCollector::new(instance, function_coverage_info)); let Coverage { kind } = coverage; match *kind { @@ -159,76 +135,6 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { } } -fn declare_unused_fn<'tcx>(cx: &CodegenCx<'_, 'tcx>, def_id: DefId) -> Instance<'tcx> { - let tcx = cx.tcx; - - let instance = Instance::new( - def_id, - GenericArgs::for_item(tcx, def_id, |param, _| { - if let ty::GenericParamDefKind::Lifetime = param.kind { - tcx.lifetimes.re_erased.into() - } else { - tcx.mk_param_from_def(param) - } - }), - ); - - let llfn = cx.declare_fn( - tcx.symbol_name(instance).name, - cx.fn_abi_of_fn_ptr( - ty::Binder::dummy(tcx.mk_fn_sig( - [Ty::new_unit(tcx)], - Ty::new_unit(tcx), - false, - hir::Unsafety::Unsafe, - Abi::Rust, - )), - ty::List::empty(), - ), - None, - ); - - llvm::set_linkage(llfn, llvm::Linkage::PrivateLinkage); - llvm::set_visibility(llfn, llvm::Visibility::Default); - - assert!(cx.instances.borrow_mut().insert(instance, llfn).is_none()); - - instance -} - -fn codegen_unused_fn_and_counter<'tcx>(cx: &CodegenCx<'_, 'tcx>, instance: Instance<'tcx>) { - let llfn = cx.get_fn(instance); - let llbb = Builder::append_block(cx, llfn, "unused_function"); - let mut bx = Builder::build(cx, llbb); - let fn_name = bx.get_pgo_func_name_var(instance); - let hash = bx.const_u64(0); - let num_counters = bx.const_u32(1); - let index = bx.const_u32(u32::from(UNUSED_FUNCTION_COUNTER_ID)); - debug!( - "codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, - index={:?}) for unused function: {:?}", - fn_name, hash, num_counters, index, instance - ); - bx.instrprof_increment(fn_name, hash, num_counters, index); - bx.ret_void(); -} - -fn add_unused_function_coverage<'tcx>( - cx: &CodegenCx<'_, 'tcx>, - instance: Instance<'tcx>, - function_coverage_info: &'tcx FunctionCoverageInfo, -) { - // An unused function's mappings will automatically be rewritten to map to - // zero, because none of its counters/expressions are marked as seen. - let function_coverage = FunctionCoverage::unused(instance, function_coverage_info); - - if let Some(coverage_context) = cx.coverage_context() { - coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage); - } else { - bug!("Could not get the `coverage_context`"); - } -} - /// Calls llvm::createPGOFuncNameVar() with the given function instance's /// mangled function name. The LLVM API returns an llvm::GlobalVariable /// containing the function name, with the specific variable name and linkage @@ -297,31 +203,6 @@ pub(crate) fn mapping_version() -> u32 { unsafe { llvm::LLVMRustCoverageMappingVersion() } } -pub(crate) fn save_cov_data_to_mod<'ll, 'tcx>( - cx: &CodegenCx<'ll, 'tcx>, - cov_data_val: &'ll llvm::Value, -) { - let covmap_var_name = llvm::build_string(|s| unsafe { - llvm::LLVMRustCoverageWriteMappingVarNameToString(s); - }) - .expect("Rust Coverage Mapping var name failed UTF-8 conversion"); - debug!("covmap var name: {:?}", covmap_var_name); - - let covmap_section_name = llvm::build_string(|s| unsafe { - llvm::LLVMRustCoverageWriteMapSectionNameToString(cx.llmod, s); - }) - .expect("Rust Coverage section name failed UTF-8 conversion"); - debug!("covmap section name: {:?}", covmap_section_name); - - let llglobal = llvm::add_global(cx.llmod, cx.val_ty(cov_data_val), &covmap_var_name); - llvm::set_initializer(llglobal, cov_data_val); - llvm::set_global_constant(llglobal, true); - llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage); - llvm::set_section(llglobal, &covmap_section_name); - llvm::set_alignment(llglobal, VAR_ALIGN_BYTES); - cx.add_used_global(llglobal); -} - pub(crate) fn save_func_record_to_mod<'ll, 'tcx>( cx: &CodegenCx<'ll, 'tcx>, covfun_section_name: &str, diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index 7a390d35a2b9f..8a6a5f79b3bb9 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -8,12 +8,13 @@ #![cfg_attr(not(bootstrap), feature(rustdoc_internals))] #![cfg_attr(not(bootstrap), doc(rust_logo))] #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![feature(exact_size_is_empty)] #![feature(extern_types)] #![feature(hash_raw_entry)] #![feature(iter_intersperse)] #![feature(let_chains)] +#![feature(min_specialization)] #![feature(never_type)] -#![feature(slice_group_by)] #![feature(impl_trait_in_assoc_type)] #![recursion_limit = "256"] #![allow(rustc::potential_query_instability)] diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs index b5f873c9257f0..cd5206a837ff7 100644 --- a/compiler/rustc_middle/src/query/mod.rs +++ b/compiler/rustc_middle/src/query/mod.rs @@ -1882,12 +1882,6 @@ rustc_queries! { desc { |tcx| "determining whether `{}` needs codegen", tcx.def_path_str(def_id) } } - /// All items participating in code generation together with items inlined into them. - query codegened_and_inlined_items(_: ()) -> &'tcx DefIdSet { - eval_always - desc { "collecting codegened and inlined items" } - } - query codegen_unit(sym: Symbol) -> &'tcx CodegenUnit<'tcx> { desc { "getting codegen unit `{sym}`" } } diff --git a/compiler/rustc_mir_transform/src/coverage/mod.rs b/compiler/rustc_mir_transform/src/coverage/mod.rs index c9b36ba25ac32..3ec21fe6890a2 100644 --- a/compiler/rustc_mir_transform/src/coverage/mod.rs +++ b/compiler/rustc_mir_transform/src/coverage/mod.rs @@ -1,9 +1,8 @@ -pub mod query; - mod counters; mod graph; +pub mod query; +pub(crate) mod repair; mod spans; - #[cfg(test)] mod tests; diff --git a/compiler/rustc_mir_transform/src/coverage/repair.rs b/compiler/rustc_mir_transform/src/coverage/repair.rs new file mode 100644 index 0000000000000..aa9a28d09fd36 --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/repair.rs @@ -0,0 +1,58 @@ +use rustc_middle::mir::coverage::{CounterId, CoverageKind}; +use rustc_middle::mir::{ + self, Coverage, MirPass, SourceInfo, Statement, StatementKind, START_BLOCK, +}; +use rustc_middle::ty::TyCtxt; +use rustc_span::DUMMY_SP; + +/// If a function has been [instrumented for coverage](super::InstrumentCoverage), +/// but MIR optimizations subsequently remove all of its [`CoverageKind::CounterIncrement`] +/// statements (e.g. because bb0 is unreachable), then we won't generate any +/// `llvm.instrprof.increment` intrinsics. LLVM will think the function is not +/// instrumented, and it will disappear from coverage mappings and coverage reports. +/// +/// This pass detects when that has happened, and re-inserts a dummy counter-increment +/// statement so that LLVM knows to treat the function as instrumented. +pub struct RepairCoverage; + +impl<'tcx> MirPass<'tcx> for RepairCoverage { + fn is_enabled(&self, sess: &rustc_session::Session) -> bool { + sess.instrument_coverage() + } + + fn run_pass(&self, _tcx: TyCtxt<'tcx>, body: &mut mir::Body<'tcx>) { + // If a function wasn't instrumented for coverage in the first place, + // then there's no need to repair anything. + if body.function_coverage_info.is_none() { + return; + } + + // If the body still contains one or more counter-increment statements, + // there's no need to repair anything. + let has_counter = body + .basic_blocks + .iter() + .flat_map(|bb_data| &bb_data.statements) + .filter_map(|statement| match statement.kind { + StatementKind::Coverage(box ref coverage) => Some(coverage), + _ => None, + }) + .any(|coverage| matches!(coverage.kind, CoverageKind::CounterIncrement { .. })); + if has_counter { + return; + } + + debug!( + "all counters were removed after instrumentation; restoring one counter in {:?}", + body.source.def_id() + ); + + let statement = Statement { + source_info: SourceInfo::outermost(DUMMY_SP), + kind: StatementKind::Coverage(Box::new(Coverage { + kind: CoverageKind::CounterIncrement { id: CounterId::START }, + })), + }; + body[START_BLOCK].statements.insert(0, statement); + } +} diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs index 6c0aa51795bdb..17d1f9d404abc 100644 --- a/compiler/rustc_mir_transform/src/lib.rs +++ b/compiler/rustc_mir_transform/src/lib.rs @@ -585,6 +585,8 @@ fn run_optimization_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { &large_enums::EnumSizeOpt { discrepancy: 128 }, // Some cleanup necessary at least for LLVM and potentially other codegen backends. &add_call_guards::CriticalCallEdges, + // If opts removed all coverage counters, ensure there is at least one present. + &coverage::repair::RepairCoverage, // Cleanup for human readability, off by default. &prettify::ReorderBasicBlocks, &prettify::ReorderLocals, diff --git a/compiler/rustc_mir_transform/src/unreachable_prop.rs b/compiler/rustc_mir_transform/src/unreachable_prop.rs index ea7aafd866b15..26398b36693d6 100644 --- a/compiler/rustc_mir_transform/src/unreachable_prop.rs +++ b/compiler/rustc_mir_transform/src/unreachable_prop.rs @@ -13,11 +13,7 @@ pub struct UnreachablePropagation; impl MirPass<'_> for UnreachablePropagation { fn is_enabled(&self, sess: &rustc_session::Session) -> bool { // Enable only under -Zmir-opt-level=2 as this can make programs less debuggable. - - // FIXME(#116171) Coverage gets confused by MIR passes that can remove all - // coverage statements from an instrumented function. This pass can be - // re-enabled when coverage codegen is robust against that happening. - sess.mir_opt_level() >= 2 && !sess.instrument_coverage() + sess.mir_opt_level() >= 2 } fn run_pass<'tcx>(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index 1d8cbe0e21b7d..4009e28924068 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -105,7 +105,6 @@ use rustc_hir::def_id::{DefId, DefIdSet, LOCAL_CRATE}; use rustc_hir::definitions::DefPathDataName; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel}; -use rustc_middle::mir; use rustc_middle::mir::mono::{ CodegenUnit, CodegenUnitNameBuilder, InstantiationMode, Linkage, MonoItem, MonoItemData, Visibility, @@ -1279,38 +1278,8 @@ fn dump_mono_items_stats<'tcx>( Ok(()) } -fn codegened_and_inlined_items(tcx: TyCtxt<'_>, (): ()) -> &DefIdSet { - let (items, cgus) = tcx.collect_and_partition_mono_items(()); - let mut visited = DefIdSet::default(); - let mut result = items.clone(); - - for cgu in cgus { - for item in cgu.items().keys() { - if let MonoItem::Fn(ref instance) = item { - let did = instance.def_id(); - if !visited.insert(did) { - continue; - } - let body = tcx.instance_mir(instance.def); - for block in body.basic_blocks.iter() { - for statement in &block.statements { - let mir::StatementKind::Coverage(_) = statement.kind else { continue }; - let scope = statement.source_info.scope; - if let Some(inlined) = scope.inlined_instance(&body.source_scopes) { - result.insert(inlined.def_id()); - } - } - } - } - } - } - - tcx.arena.alloc(result) -} - pub fn provide(providers: &mut Providers) { providers.collect_and_partition_mono_items = collect_and_partition_mono_items; - providers.codegened_and_inlined_items = codegened_and_inlined_items; providers.is_codegened_item = |tcx, def_id| { let (all_mono_items, _) = tcx.collect_and_partition_mono_items(()); diff --git a/tests/coverage-map/unreachable.rs b/tests/coverage-map/unreachable.rs index 6385bfa160d7d..61728a3c298e5 100644 --- a/tests/coverage-map/unreachable.rs +++ b/tests/coverage-map/unreachable.rs @@ -1,6 +1,6 @@ #![feature(core_intrinsics)] #![feature(coverage_attribute)] -// compile-flags: --edition=2021 +// compile-flags: --edition=2021 -Copt-level=2 // // If we instrument a function for coverage, but all of its counter-increment diff --git a/tests/run-coverage/unreachable.coverage b/tests/run-coverage/unreachable.coverage index fa0ac9ccfa1c8..da52f0522d4a3 100644 --- a/tests/run-coverage/unreachable.coverage +++ b/tests/run-coverage/unreachable.coverage @@ -1,6 +1,6 @@ LL| |#![feature(core_intrinsics)] LL| |#![feature(coverage_attribute)] - LL| |// compile-flags: --edition=2021 + LL| |// compile-flags: --edition=2021 -Copt-level=2 LL| | LL| |// LL| |// If we instrument a function for coverage, but all of its counter-increment diff --git a/tests/run-coverage/unreachable.rs b/tests/run-coverage/unreachable.rs index 6385bfa160d7d..61728a3c298e5 100644 --- a/tests/run-coverage/unreachable.rs +++ b/tests/run-coverage/unreachable.rs @@ -1,6 +1,6 @@ #![feature(core_intrinsics)] #![feature(coverage_attribute)] -// compile-flags: --edition=2021 +// compile-flags: --edition=2021 -Copt-level=2 // // If we instrument a function for coverage, but all of its counter-increment