Skip to content

Commit f54072b

Browse files
committed
Auto merge of #76830 - Artoria2e5:tune, r=nagisa
Pass tune-cpu to LLVM I think this is how it should work... See https://internals.rust-lang.org/t/expose-tune-cpu-from-llvm/13088 for the background. Or the documentation diff.
2 parents afb4514 + a35a93f commit f54072b

File tree

9 files changed

+80
-8
lines changed

9 files changed

+80
-8
lines changed

compiler/rustc_codegen_llvm/src/attributes.rs

+15
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
194194
);
195195
}
196196

197+
pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
198+
if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
199+
let tune_cpu = SmallCStr::new(tune);
200+
llvm::AddFunctionAttrStringValue(
201+
llfn,
202+
llvm::AttributePlace::Function,
203+
const_cstr!("tune-cpu"),
204+
tune_cpu.as_c_str(),
205+
);
206+
}
207+
}
208+
197209
/// Sets the `NonLazyBind` LLVM attribute on a given function,
198210
/// assuming the codegen options allow skipping the PLT.
199211
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
@@ -303,6 +315,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
303315
// Without this, ThinLTO won't inline Rust functions into Clang generated
304316
// functions (because Clang annotates functions this way too).
305317
apply_target_cpu_attr(cx, llfn);
318+
// tune-cpu is only conveyed through the attribute for our purpose.
319+
// The target doesn't care; the subtarget reads our attribute.
320+
apply_tune_cpu_attr(cx, llfn);
306321

307322
let features = llvm_target_features(cx.tcx.sess)
308323
.map(|s| s.to_string())

compiler/rustc_codegen_llvm/src/context.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
417417
}
418418

419419
fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
420-
attributes::apply_target_cpu_attr(self, llfn)
420+
attributes::apply_target_cpu_attr(self, llfn);
421+
attributes::apply_tune_cpu_attr(self, llfn);
421422
}
422423

423424
fn create_used_variable(&self) {

compiler/rustc_codegen_llvm/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
116116
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
117117
llvm_util::target_cpu(sess)
118118
}
119+
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
120+
llvm_util::tune_cpu(sess)
121+
}
119122
}
120123

121124
impl WriteBackendMethods for LlvmCodegenBackend {

compiler/rustc_codegen_llvm/src/llvm_util.rs

+17-5
Original file line numberDiff line numberDiff line change
@@ -202,11 +202,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) {
202202
}
203203
}
204204

205-
pub fn target_cpu(sess: &Session) -> &str {
206-
let name = match sess.opts.cg.target_cpu {
207-
Some(ref s) => &**s,
208-
None => &*sess.target.target.options.cpu,
209-
};
205+
fn handle_native(name: &str) -> &str {
210206
if name != "native" {
211207
return name;
212208
}
@@ -217,3 +213,19 @@ pub fn target_cpu(sess: &Session) -> &str {
217213
str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
218214
}
219215
}
216+
217+
pub fn target_cpu(sess: &Session) -> &str {
218+
let name = match sess.opts.cg.target_cpu {
219+
Some(ref s) => &**s,
220+
None => &*sess.target.target.options.cpu,
221+
};
222+
223+
handle_native(name)
224+
}
225+
226+
pub fn tune_cpu(sess: &Session) -> Option<&str> {
227+
match sess.opts.debugging_opts.tune_cpu {
228+
Some(ref s) => Some(handle_native(&**s)),
229+
None => None,
230+
}
231+
}

compiler/rustc_codegen_ssa/src/traits/backend.rs

+1
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
124124
opt_level: config::OptLevel,
125125
) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
126126
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
127+
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
127128
}

compiler/rustc_interface/src/tests.rs

+1
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,7 @@ fn test_debugging_options_tracking_hash() {
585585
tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
586586
tracked!(teach, true);
587587
tracked!(thinlto, Some(true));
588+
tracked!(tune_cpu, Some(String::from("abc")));
588589
tracked!(tls_model, Some(TlsModel::GeneralDynamic));
589590
tracked!(treat_err_as_bug, Some(1));
590591
tracked!(unleash_the_miri_inside_of_you, true);

compiler/rustc_session/src/options.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
10781078
"show extended diagnostic help (default: no)"),
10791079
terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
10801080
"set the current terminal width"),
1081+
tune_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
1082+
"select processor to schedule for (`rustc --print target-cpus` for details)"),
10811083
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
10821084
"enable ThinLTO when possible"),
10831085
// We default to 1 here since we want to behave like

src/doc/rustc/src/codegen-options/index.md

+18-2
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values:
497497
This instructs `rustc` to generate code specifically for a particular processor.
498498

499499
You can run `rustc --print target-cpus` to see the valid options to pass
500-
here. Additionally, `native` can be passed to use the processor of the host
501-
machine. Each target has a default base CPU.
500+
here. Each target has a default base CPU. Special values include:
501+
502+
* `native` can be passed to use the processor of the host machine.
503+
* `generic` refers to an LLVM target with minimal features but modern tuning.
502504

503505
## target-feature
504506

@@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control
530532
Each target and [`target-cpu`](#target-cpu) has a default set of enabled
531533
features.
532534

535+
## tune-cpu
536+
537+
This instructs `rustc` to schedule code specifically for a particular
538+
processor. This does not affect the compatibility (instruction sets or ABI),
539+
but should make your code slightly more efficient on the selected CPU.
540+
541+
The valid options are the same as those for [`target-cpu`](#target-cpu).
542+
The default is `None`, which LLVM translates as the `target-cpu`.
543+
544+
This is an unstable option. Use `-Z tune-cpu=machine` to specify a value.
545+
546+
Due to limitations in LLVM (12.0.0-git9218f92), this option is currently
547+
effective only for x86 targets.
548+
533549
[option-emit]: ../command-line-arguments.md#option-emit
534550
[option-o-optimize]: ../command-line-arguments.md#option-o-optimize
535551
[profile-guided optimization]: ../profile-guided-optimization.md
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// This test makes sure that functions get annotated with the proper
2+
// "tune-cpu" attribute in LLVM.
3+
4+
// no-prefer-dynamic
5+
// ignore-tidy-linelength
6+
// compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic
7+
8+
#![crate_type = "staticlib"]
9+
10+
// CHECK-LABEL: define {{.*}} @exported() {{.*}} #0
11+
#[no_mangle]
12+
pub extern fn exported() {
13+
not_exported();
14+
}
15+
16+
// CHECK-LABEL: ; tune_cpu_on_functions::not_exported
17+
// CHECK-NEXT: ; Function Attrs:
18+
// CHECK-NEXT: define {{.*}}() {{.*}} #0
19+
fn not_exported() {}
20+
21+
// CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"

0 commit comments

Comments
 (0)