-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Change output normalization logic to be linear against size of output #128200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2559,60 +2559,66 @@ fn num_decimal_digits(num: usize) -> usize { | |||||
|
||||||
// We replace some characters so the CLI output is always consistent and underlines aligned. | ||||||
// Keep the following list in sync with `rustc_span::char_width`. | ||||||
// ATTENTION: keep lexicografically sorted so that the binary search will work | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ | ||||||
('\t', " "), // We do our own tab replacement | ||||||
('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters. | ||||||
('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently | ||||||
('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk | ||||||
('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always. | ||||||
('\u{202E}', "�"), | ||||||
// In terminals without Unicode support the following will be garbled, but in *all* terminals | ||||||
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode | ||||||
// support" gate. | ||||||
('\0', "␀"), | ||||||
('\u{1}', "␁"), | ||||||
('\u{2}', "␂"), | ||||||
('\u{3}', "␃"), | ||||||
('\u{4}', "␄"), | ||||||
('\u{5}', "␅"), | ||||||
('\u{6}', "␆"), | ||||||
('\u{7}', "␇"), | ||||||
('\u{8}', "␈"), | ||||||
('\t', " "), // We do our own tab replacement | ||||||
('\u{b}', "␋"), | ||||||
('\u{c}', "␌"), | ||||||
('\r', "␍"), | ||||||
('\u{e}', "␎"), | ||||||
('\u{f}', "␏"), | ||||||
('\u{10}', "␐"), | ||||||
('\u{11}', "␑"), | ||||||
('\u{12}', "␒"), | ||||||
('\u{13}', "␓"), | ||||||
('\u{14}', "␔"), | ||||||
('\u{15}', "␕"), | ||||||
('\u{16}', "␖"), | ||||||
('\u{17}', "␗"), | ||||||
('\u{18}', "␘"), | ||||||
('\u{19}', "␙"), | ||||||
('\u{1a}', "␚"), | ||||||
('\u{1b}', "␛"), | ||||||
('\u{1c}', "␜"), | ||||||
('\u{1d}', "␝"), | ||||||
('\u{1e}', "␞"), | ||||||
('\u{1f}', "␟"), | ||||||
('\u{7f}', "␡"), | ||||||
('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters. | ||||||
('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently | ||||||
('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk | ||||||
('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always. | ||||||
('\u{202d}', "�"), | ||||||
('\u{202e}', "�"), | ||||||
('\u{2066}', "�"), | ||||||
('\u{2067}', "�"), | ||||||
('\u{2068}', "�"), | ||||||
('\u{202C}', "�"), | ||||||
('\u{2069}', "�"), | ||||||
// In terminals without Unicode support the following will be garbled, but in *all* terminals | ||||||
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode | ||||||
// support" gate. | ||||||
('\u{0000}', "␀"), | ||||||
('\u{0001}', "␁"), | ||||||
('\u{0002}', "␂"), | ||||||
('\u{0003}', "␃"), | ||||||
('\u{0004}', "␄"), | ||||||
('\u{0005}', "␅"), | ||||||
('\u{0006}', "␆"), | ||||||
('\u{0007}', "␇"), | ||||||
('\u{0008}', "␈"), | ||||||
('\u{000B}', "␋"), | ||||||
('\u{000C}', "␌"), | ||||||
('\u{000D}', "␍"), | ||||||
('\u{000E}', "␎"), | ||||||
('\u{000F}', "␏"), | ||||||
('\u{0010}', "␐"), | ||||||
('\u{0011}', "␑"), | ||||||
('\u{0012}', "␒"), | ||||||
('\u{0013}', "␓"), | ||||||
('\u{0014}', "␔"), | ||||||
('\u{0015}', "␕"), | ||||||
('\u{0016}', "␖"), | ||||||
('\u{0017}', "␗"), | ||||||
('\u{0018}', "␘"), | ||||||
('\u{0019}', "␙"), | ||||||
('\u{001A}', "␚"), | ||||||
('\u{001B}', "␛"), | ||||||
('\u{001C}', "␜"), | ||||||
('\u{001D}', "␝"), | ||||||
('\u{001E}', "␞"), | ||||||
('\u{001F}', "␟"), | ||||||
('\u{007F}', "␡"), | ||||||
]; | ||||||
|
||||||
fn normalize_whitespace(str: &str) -> String { | ||||||
let mut s = str.to_string(); | ||||||
for (c, replacement) in OUTPUT_REPLACEMENTS { | ||||||
s = s.replace(*c, replacement); | ||||||
} | ||||||
s | ||||||
fn normalize_whitespace(s: &str) -> String { | ||||||
// Scan the input string for a character in the ordered table above. If it's present, replace | ||||||
// it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input | ||||||
// char. At the end, allocate all chars into a string in one operation. | ||||||
Comment on lines
+2614
to
+2616
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment seems to be a bit inaccurate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, it was accurate, before I changed it to the current algo. Given this is already r+d, I'll do a follow up (unless you want to do that as part of your draft PR). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've already included it in the draft, yes, so I guess we can do it as a part of it then. There are two drafts actually. One adding const asserts (#128465) and one using perfect hashing (#128463). They both need this one to land first, after which I guess the former can be reviewed independently of the latter, which needs some additional benchmarking. |
||||||
s.chars().fold(String::with_capacity(s.len()), |mut s, c| { | ||||||
match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) { | ||||||
Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1), | ||||||
_ => s.push(c), | ||||||
} | ||||||
s | ||||||
}) | ||||||
} | ||||||
|
||||||
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) { | ||||||
|
Uh oh!
There was an error while loading. Please reload this page.