Skip to content

Commit 789baed

Browse files
committed
rustc_errors: use perfect hashing for character replacements
1 parent 8c7e0e1 commit 789baed

File tree

4 files changed

+67
-50
lines changed

4 files changed

+67
-50
lines changed

Cargo.lock

+15
Original file line numberDiff line numberDiff line change
@@ -2712,6 +2712,7 @@ version = "0.11.2"
27122712
source = "registry+https://github.com/rust-lang/crates.io-index"
27132713
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
27142714
dependencies = [
2715+
"phf_macros",
27152716
"phf_shared 0.11.2",
27162717
]
27172718

@@ -2745,6 +2746,19 @@ dependencies = [
27452746
"rand",
27462747
]
27472748

2749+
[[package]]
2750+
name = "phf_macros"
2751+
version = "0.11.2"
2752+
source = "registry+https://github.com/rust-lang/crates.io-index"
2753+
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
2754+
dependencies = [
2755+
"phf_generator 0.11.2",
2756+
"phf_shared 0.11.2",
2757+
"proc-macro2",
2758+
"quote",
2759+
"syn 2.0.67",
2760+
]
2761+
27482762
[[package]]
27492763
name = "phf_shared"
27502764
version = "0.10.0"
@@ -3653,6 +3667,7 @@ version = "0.0.0"
36533667
dependencies = [
36543668
"annotate-snippets 0.10.2",
36553669
"derive_setters",
3670+
"phf",
36563671
"rustc_ast",
36573672
"rustc_ast_pretty",
36583673
"rustc_data_structures",

compiler/rustc_errors/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77
# tidy-alphabetical-start
88
annotate-snippets = "0.10"
99
derive_setters = "0.1.6"
10+
phf = { version = "0.11.2", features = ["macros"] }
1011
rustc_ast = { path = "../rustc_ast" }
1112
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
1213
rustc_data_structures = { path = "../rustc_data_structures" }

compiler/rustc_errors/src/emitter.rs

+46-50
Original file line numberDiff line numberDiff line change
@@ -2563,66 +2563,62 @@ fn num_decimal_digits(num: usize) -> usize {
25632563
}
25642564

25652565
// We replace some characters so the CLI output is always consistent and underlines aligned.
2566-
// Keep the following list in sync with `rustc_span::char_width`.
2567-
// ATTENTION: keep lexicografically sorted so that the binary search will work
2568-
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2569-
// tidy-alphabetical-start
2566+
const OUTPUT_REPLACEMENTS: phf::Map<char, &'static str> = phf::phf_map![
25702567
// In terminals without Unicode support the following will be garbled, but in *all* terminals
25712568
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
25722569
// support" gate.
2573-
('\0', "␀"),
2574-
('\u{0001}', "␁"),
2575-
('\u{0002}', "␂"),
2576-
('\u{0003}', "␃"),
2577-
('\u{0004}', "␄"),
2578-
('\u{0005}', "␅"),
2579-
('\u{0006}', "␆"),
2580-
('\u{0007}', "␇"),
2581-
('\u{0008}', "␈"),
2582-
('\u{0009}', " "), // We do our own tab replacement
2583-
('\u{000b}', "␋"),
2584-
('\u{000c}', "␌"),
2585-
('\u{000d}', "␍"),
2586-
('\u{000e}', "␎"),
2587-
('\u{000f}', "␏"),
2588-
('\u{0010}', "␐"),
2589-
('\u{0011}', "␑"),
2590-
('\u{0012}', "␒"),
2591-
('\u{0013}', "␓"),
2592-
('\u{0014}', "␔"),
2593-
('\u{0015}', "␕"),
2594-
('\u{0016}', "␖"),
2595-
('\u{0017}', "␗"),
2596-
('\u{0018}', "␘"),
2597-
('\u{0019}', "␙"),
2598-
('\u{001a}', "␚"),
2599-
('\u{001b}', "␛"),
2600-
('\u{001c}', "␜"),
2601-
('\u{001d}', "␝"),
2602-
('\u{001e}', "␞"),
2603-
('\u{001f}', "␟"),
2604-
('\u{007f}', "␡"),
2605-
('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters.
2606-
('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently
2607-
('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
2608-
('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always.
2609-
('\u{202d}', "�"),
2610-
('\u{202e}', "�"),
2611-
('\u{2066}', "�"),
2612-
('\u{2067}', "�"),
2613-
('\u{2068}', "�"),
2614-
('\u{2069}', "�"),
2615-
// tidy-alphabetical-end
2570+
'\0' => "␀",
2571+
'\t' => " ", // We do our own tab replacement
2572+
'\r' => "␍",
2573+
'\u{0001}' => "␁",
2574+
'\u{0002}' => "␂",
2575+
'\u{0003}' => "␃",
2576+
'\u{0004}' => "␄",
2577+
'\u{0005}' => "␅",
2578+
'\u{0006}' => "␆",
2579+
'\u{0007}' => "␇",
2580+
'\u{0008}' => "␈",
2581+
'\u{000b}' => "␋",
2582+
'\u{000c}' => "␌",
2583+
'\u{000e}' => "␎",
2584+
'\u{000f}' => "␏",
2585+
'\u{0010}' => "␐",
2586+
'\u{0011}' => "␑",
2587+
'\u{0012}' => "␒",
2588+
'\u{0013}' => "␓",
2589+
'\u{0014}' => "␔",
2590+
'\u{0015}' => "␕",
2591+
'\u{0016}' => "␖",
2592+
'\u{0017}' => "␗",
2593+
'\u{0018}' => "␘",
2594+
'\u{0019}' => "␙",
2595+
'\u{001a}' => "␚",
2596+
'\u{001b}' => "␛",
2597+
'\u{001c}' => "␜",
2598+
'\u{001d}' => "␝",
2599+
'\u{001e}' => "␞",
2600+
'\u{001f}' => "␟",
2601+
'\u{007f}' => "␡",
2602+
'\u{200d}' => "", // Replace ZWJ for consistent terminal output of grapheme clusters.
2603+
'\u{202a}' => "�", // The following unicode text flow control characters are inconsistently
2604+
'\u{202b}' => "�", // supported across CLIs and can cause confusion due to the bytes on disk
2605+
'\u{202c}' => "�", // not corresponding to the visible source code, so we replace them always.
2606+
'\u{202d}' => "�",
2607+
'\u{202e}' => "�",
2608+
'\u{2066}' => "�",
2609+
'\u{2067}' => "�",
2610+
'\u{2068}' => "�",
2611+
'\u{2069}' => "�",
26162612
];
26172613

26182614
fn normalize_whitespace(s: &str) -> String {
26192615
// Scan the input string for a character in the ordered table above. If it's present, replace
26202616
// it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
26212617
// char. At the end, allocate all chars into a string in one operation.
26222618
s.chars().fold(String::with_capacity(s.len()), |mut s, c| {
2623-
match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) {
2624-
Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1),
2625-
_ => s.push(c),
2619+
match OUTPUT_REPLACEMENTS.get(&c) {
2620+
Some(r) => s.push_str(r),
2621+
None => s.push(c),
26262622
}
26272623
s
26282624
})

src/tools/tidy/src/deps.rs

+5
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,10 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
345345
"parking_lot_core",
346346
"pathdiff",
347347
"perf-event-open-sys",
348+
"phf",
349+
"phf_generator",
350+
"phf_macros",
351+
"phf_shared",
348352
"pin-project-lite",
349353
"polonius-engine",
350354
"portable-atomic", // dependency for platforms doesn't support `AtomicU64` in std
@@ -386,6 +390,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
386390
"sha2",
387391
"sharded-slab",
388392
"shlex",
393+
"siphasher",
389394
"smallvec",
390395
"snap",
391396
"stable_deref_trait",

0 commit comments

Comments
 (0)