Skip to content

Commit f1c751b

Browse files
committed
Refactor emitter to better account for unicode chars when trimming
Change the way that underline positions are calculated by delaying using the "visual" column position until the last possible moment, instead using the "file"/byte position in the file, and then calculating visual positioning as late as possible. This should make the underlines more resilient to non-1-width unicode chars. Unfortunately, as part of this change (which fixes some visual bugs) comes with the loss of some eager tab codepoint handling, but the output remains legible despite some minor regression on the "margin trimming" logic.
1 parent 72326bf commit f1c751b

20 files changed

+462
-329
lines changed

compiler/rustc_errors/src/emitter.rs

Lines changed: 99 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -113,24 +113,11 @@ impl Margin {
113113
self.computed_left > 0
114114
}
115115

116-
fn was_cut_right(&self, line_len: usize) -> bool {
117-
let right =
118-
if self.computed_right == self.span_right || self.computed_right == self.label_right {
119-
// FIXME: This comment refers to the only callsite of this method.
120-
// Rephrase it or refactor it, so it can stand on its own.
121-
// Account for the "..." padding given above. Otherwise we end up with code lines
122-
// that do fit but end in "..." as if they were trimmed.
123-
// FIXME: Don't hard-code this offset. Is this meant to represent
124-
// `2 * str_width(self.margin())`?
125-
self.computed_right - 6
126-
} else {
127-
self.computed_right
128-
};
129-
right < line_len && self.computed_left + self.column_width < line_len
130-
}
131-
132116
fn compute(&mut self, max_line_len: usize) {
133117
// When there's a lot of whitespace (>20), we want to trim it as it is useless.
118+
// FIXME: this doesn't account for '\t', but to do so correctly we need to perform that
119+
// calculation later, right before printing in order to be accurate with both unicode
120+
// handling and trimming of long lines.
134121
self.computed_left = if self.whitespace_left > 20 {
135122
self.whitespace_left - 16 // We want some padding.
136123
} else {
@@ -668,43 +655,43 @@ impl HumanEmitter {
668655
width_offset: usize,
669656
code_offset: usize,
670657
margin: Margin,
671-
) {
672-
// Tabs are assumed to have been replaced by spaces in calling code.
673-
debug_assert!(!source_string.contains('\t'));
658+
) -> usize {
674659
let line_len = source_string.len();
675660
// Create the source line we will highlight.
676661
let left = margin.left(line_len);
677662
let right = margin.right(line_len);
678663
// FIXME: The following code looks fishy. See #132860.
679664
// On long lines, we strip the source line, accounting for unicode.
680-
let mut taken = 0;
681665
let code: String = source_string
682666
.chars()
683-
.skip(left)
684-
.take_while(|ch| {
685-
// Make sure that the trimming on the right will fall within the terminal width.
686-
let next = char_width(*ch);
687-
if taken + next > right - left {
688-
return false;
689-
}
690-
taken += next;
691-
true
692-
})
667+
.enumerate()
668+
.skip_while(|(i, _)| *i < left)
669+
.take_while(|(i, _)| *i < right)
670+
.map(|(_, c)| c)
693671
.collect();
672+
let code = normalize_whitespace(&code);
673+
let was_cut_right =
674+
source_string.chars().enumerate().skip_while(|(i, _)| *i < right).next().is_some();
694675
buffer.puts(line_offset, code_offset, &code, Style::Quotation);
695676
let placeholder = self.margin();
696677
if margin.was_cut_left() {
697678
// We have stripped some code/whitespace from the beginning, make it clear.
698679
buffer.puts(line_offset, code_offset, placeholder, Style::LineNumber);
699680
}
700-
if margin.was_cut_right(line_len) {
681+
if was_cut_right {
701682
let padding = str_width(placeholder);
702683
// We have stripped some code after the rightmost span end, make it clear we did so.
703-
buffer.puts(line_offset, code_offset + taken - padding, placeholder, Style::LineNumber);
684+
buffer.puts(
685+
line_offset,
686+
code_offset + str_width(&code) - padding,
687+
placeholder,
688+
Style::LineNumber,
689+
);
704690
}
705691
buffer.puts(line_offset, 0, &self.maybe_anonymized(line_index), Style::LineNumber);
706692

707693
self.draw_col_separator_no_space(buffer, line_offset, width_offset - 2);
694+
left
708695
}
709696

710697
#[instrument(level = "trace", skip(self), ret)]
@@ -736,22 +723,16 @@ impl HumanEmitter {
736723
return Vec::new();
737724
}
738725

739-
let source_string = match file.get_line(line.line_index - 1) {
740-
Some(s) => normalize_whitespace(&s),
741-
None => return Vec::new(),
726+
let Some(source_string) = file.get_line(line.line_index - 1) else {
727+
return Vec::new();
742728
};
743729
trace!(?source_string);
744730

745731
let line_offset = buffer.num_lines();
746732

747-
// Left trim
748-
let left = margin.left(source_string.len());
749-
733+
// Left trim.
750734
// FIXME: This looks fishy. See #132860.
751-
// Account for unicode characters of width !=0 that were removed.
752-
let left = source_string.chars().take(left).map(|ch| char_width(ch)).sum();
753-
754-
self.draw_line(
735+
let left = self.draw_line(
755736
buffer,
756737
&source_string,
757738
line.line_index,
@@ -1033,12 +1014,18 @@ impl HumanEmitter {
10331014
let pos = pos + 1;
10341015
match annotation.annotation_type {
10351016
AnnotationType::MultilineStart(depth) | AnnotationType::MultilineEnd(depth) => {
1017+
let pre: usize = source_string
1018+
.chars()
1019+
.take(annotation.start_col.file)
1020+
.skip(left)
1021+
.map(|c| char_width(c))
1022+
.sum();
10361023
self.draw_range(
10371024
buffer,
10381025
underline.multiline_horizontal,
10391026
line_offset + pos,
10401027
width_offset + depth,
1041-
(code_offset + annotation.start_col.display).saturating_sub(left),
1028+
code_offset + pre,
10421029
underline.style,
10431030
);
10441031
}
@@ -1061,11 +1048,18 @@ impl HumanEmitter {
10611048
let underline = self.underline(annotation.is_primary);
10621049
let pos = pos + 1;
10631050

1051+
let code_offset = code_offset
1052+
+ source_string
1053+
.chars()
1054+
.take(annotation.start_col.file)
1055+
.skip(left)
1056+
.map(|c| char_width(c))
1057+
.sum::<usize>();
10641058
if pos > 1 && (annotation.has_label() || annotation.takes_space()) {
10651059
for p in line_offset + 1..=line_offset + pos {
10661060
buffer.putc(
10671061
p,
1068-
(code_offset + annotation.start_col.display).saturating_sub(left),
1062+
code_offset,
10691063
match annotation.annotation_type {
10701064
AnnotationType::MultilineLine(_) => underline.multiline_vertical,
10711065
_ => underline.vertical_text_line,
@@ -1076,7 +1070,7 @@ impl HumanEmitter {
10761070
if let AnnotationType::MultilineStart(_) = annotation.annotation_type {
10771071
buffer.putc(
10781072
line_offset + pos,
1079-
(code_offset + annotation.start_col.display).saturating_sub(left),
1073+
code_offset,
10801074
underline.bottom_right,
10811075
underline.style,
10821076
);
@@ -1086,7 +1080,7 @@ impl HumanEmitter {
10861080
{
10871081
buffer.putc(
10881082
line_offset + pos,
1089-
(code_offset + annotation.start_col.display).saturating_sub(left),
1083+
code_offset,
10901084
underline.multiline_bottom_right_with_text,
10911085
underline.style,
10921086
);
@@ -1144,13 +1138,30 @@ impl HumanEmitter {
11441138
let style =
11451139
if annotation.is_primary { Style::LabelPrimary } else { Style::LabelSecondary };
11461140
let (pos, col) = if pos == 0 {
1147-
if annotation.end_col.display == 0 {
1148-
(pos + 1, (annotation.end_col.display + 2).saturating_sub(left))
1141+
let pre: usize = source_string
1142+
.chars()
1143+
.take(annotation.end_col.file)
1144+
.skip(left)
1145+
.map(|c| char_width(c))
1146+
.sum();
1147+
if annotation.end_col.file == 0 {
1148+
(pos + 1, (pre + 2))
11491149
} else {
1150-
(pos + 1, (annotation.end_col.display + 1).saturating_sub(left))
1150+
let pad = if annotation.end_col.file - annotation.start_col.file == 0 {
1151+
2
1152+
} else {
1153+
1
1154+
};
1155+
(pos + 1, (pre + pad))
11511156
}
11521157
} else {
1153-
(pos + 2, annotation.start_col.display.saturating_sub(left))
1158+
let pre: usize = source_string
1159+
.chars()
1160+
.take(annotation.start_col.file)
1161+
.skip(left)
1162+
.map(|c| char_width(c))
1163+
.sum();
1164+
(pos + 2, pre)
11541165
};
11551166
if let Some(ref label) = annotation.label {
11561167
buffer.puts(line_offset + pos, code_offset + col, label, style);
@@ -1183,14 +1194,35 @@ impl HumanEmitter {
11831194
// | _^ test
11841195
for &(pos, annotation) in &annotations_position {
11851196
let uline = self.underline(annotation.is_primary);
1186-
for p in annotation.start_col.display..annotation.end_col.display {
1197+
let width = annotation.end_col.file - annotation.start_col.file;
1198+
let previous: String =
1199+
source_string.chars().take(annotation.start_col.file).skip(left).collect();
1200+
let underlined: String =
1201+
source_string.chars().skip(annotation.start_col.file).take(width).collect();
1202+
debug!(?previous, ?underlined);
1203+
let code_offset = code_offset
1204+
+ source_string
1205+
.chars()
1206+
.take(annotation.start_col.file)
1207+
.skip(left)
1208+
.map(|c| char_width(c))
1209+
.sum::<usize>();
1210+
let ann_width: usize = source_string
1211+
.chars()
1212+
.skip(annotation.start_col.file)
1213+
.take(width)
1214+
.map(|c| char_width(c))
1215+
.sum();
1216+
let ann_width = if ann_width == 0
1217+
&& matches!(annotation.annotation_type, AnnotationType::Singleline)
1218+
{
1219+
1
1220+
} else {
1221+
ann_width
1222+
};
1223+
for p in 0..ann_width {
11871224
// The default span label underline.
1188-
buffer.putc(
1189-
line_offset + 1,
1190-
(code_offset + p).saturating_sub(left),
1191-
uline.underline,
1192-
uline.style,
1193-
);
1225+
buffer.putc(line_offset + 1, code_offset + p, uline.underline, uline.style);
11941226
}
11951227

11961228
if pos == 0
@@ -1202,7 +1234,7 @@ impl HumanEmitter {
12021234
// The beginning of a multiline span with its leftward moving line on the same line.
12031235
buffer.putc(
12041236
line_offset + 1,
1205-
(code_offset + annotation.start_col.display).saturating_sub(left),
1237+
code_offset,
12061238
match annotation.annotation_type {
12071239
AnnotationType::MultilineStart(_) => uline.top_right_flat,
12081240
AnnotationType::MultilineEnd(_) => uline.multiline_end_same_line,
@@ -1220,7 +1252,7 @@ impl HumanEmitter {
12201252
// so we start going down first.
12211253
buffer.putc(
12221254
line_offset + 1,
1223-
(code_offset + annotation.start_col.display).saturating_sub(left),
1255+
code_offset,
12241256
match annotation.annotation_type {
12251257
AnnotationType::MultilineStart(_) => uline.multiline_start_down,
12261258
AnnotationType::MultilineEnd(_) => uline.multiline_end_up,
@@ -1230,12 +1262,7 @@ impl HumanEmitter {
12301262
);
12311263
} else if pos != 0 && annotation.has_label() {
12321264
// The beginning of a span label with an actual label, we'll point down.
1233-
buffer.putc(
1234-
line_offset + 1,
1235-
(code_offset + annotation.start_col.display).saturating_sub(left),
1236-
uline.label_start,
1237-
uline.style,
1238-
);
1265+
buffer.putc(line_offset + 1, code_offset, uline.label_start, uline.style);
12391266
}
12401267
}
12411268

@@ -1718,17 +1745,11 @@ impl HumanEmitter {
17181745
// non-rustc_lexer::is_whitespace() chars are reported as an
17191746
// error (ex. no-break-spaces \u{a0}), and thus can't be considered
17201747
// for removal during error reporting.
1748+
// FIXME: doesn't account for '\t' properly.
17211749
let leading_whitespace = source_string
17221750
.chars()
17231751
.take_while(|c| rustc_lexer::is_whitespace(*c))
1724-
.map(|c| {
1725-
match c {
1726-
// Tabs are displayed as 4 spaces
1727-
'\t' => 4,
1728-
_ => 1,
1729-
}
1730-
})
1731-
.sum();
1752+
.count();
17321753
if source_string.chars().any(|c| !rustc_lexer::is_whitespace(c)) {
17331754
whitespace_margin = min(whitespace_margin, leading_whitespace);
17341755
}
@@ -1742,8 +1763,8 @@ impl HumanEmitter {
17421763
let mut span_left_margin = usize::MAX;
17431764
for line in &annotated_file.lines {
17441765
for ann in &line.annotations {
1745-
span_left_margin = min(span_left_margin, ann.start_col.display);
1746-
span_left_margin = min(span_left_margin, ann.end_col.display);
1766+
span_left_margin = min(span_left_margin, ann.start_col.file);
1767+
span_left_margin = min(span_left_margin, ann.end_col.file);
17471768
}
17481769
}
17491770
if span_left_margin == usize::MAX {
@@ -1763,12 +1784,12 @@ impl HumanEmitter {
17631784
.map_or(0, |s| s.len()),
17641785
);
17651786
for ann in &line.annotations {
1766-
span_right_margin = max(span_right_margin, ann.start_col.display);
1767-
span_right_margin = max(span_right_margin, ann.end_col.display);
1787+
span_right_margin = max(span_right_margin, ann.start_col.file);
1788+
span_right_margin = max(span_right_margin, ann.end_col.file);
17681789
// FIXME: account for labels not in the same line
17691790
let label_right = ann.label.as_ref().map_or(0, |l| l.len() + 1);
17701791
label_right_margin =
1771-
max(label_right_margin, ann.end_col.display + label_right);
1792+
max(label_right_margin, ann.end_col.file + label_right);
17721793
}
17731794
}
17741795

tests/ui/codemap_tests/tab_2.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ error[E0765]: unterminated double quote string
44
LL | """;
55
| ___________________^
66
LL | | }
7-
| |_^
7+
| |__^
88

99
error: aborting due to 1 previous error
1010

tests/ui/diagnostic-width/long-span.long.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
error[E0308]: mismatched types
22
╭▸ $DIR/long-span.rs:7:15
33
4-
LL │ …u8 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
4+
LL │ …u8 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
55
╰╴ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━…━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ expected `u8`, found `[{integer}; 1680]`
66

77
error: aborting due to 1 previous error

tests/ui/diagnostic-width/long-span.longest.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
error[E0308]: mismatched types
22
--> $DIR/long-span.rs:7:15
33
|
4-
LL | ... = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
4+
LL | ... = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
55
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^...^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ expected `u8`, found `[{integer}; 1680]`
66

77
error: aborting due to 1 previous error

tests/ui/diagnostic-width/long-span.short.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
error[E0308]: mismatched types
22
╭▸ $DIR/long-span.rs:7:15
33
4-
LL │ …u8 = [0, 0, 0…0]
4+
LL │ …u8 = [0, 0, 0…0];
55
╰╴ ━━━━━━━━…━━ expected `u8`, found `[{integer}; 1680]`
66

77
error: aborting due to 1 previous error

tests/ui/diagnostic-width/long-span.shortest.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
error[E0308]: mismatched types
22
--> $DIR/long-span.rs:7:15
33
|
4-
LL | ... = [0, 0, 0......
4+
LL | ... = [0, 0, 0...0];
55
| ^^^^^^^^...^^ expected `u8`, found `[{integer}; 1680]`
66

77
error: aborting due to 1 previous error

0 commit comments

Comments
 (0)