Skip to content

Commit dbdfa6e

Browse files
committed
Fix byte offsets after normalization
1 parent fda0359 commit dbdfa6e

File tree

6 files changed

+102
-20
lines changed

6 files changed

+102
-20
lines changed

src/librustc/ich/impls_syntax.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
424424
ref lines,
425425
ref multibyte_chars,
426426
ref non_narrow_chars,
427+
ref normalized_pos,
427428
} = *self;
428429

429430
(name_hash as u64).hash_stable(hcx, hasher);
@@ -452,6 +453,12 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
452453
for &char_pos in non_narrow_chars.iter() {
453454
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
454455
}
456+
457+
normalized_pos.len().hash_stable(hcx, hasher);
458+
for &char_pos in normalized_pos.iter() {
459+
stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher);
460+
}
461+
455462
}
456463
}
457464

@@ -481,6 +488,18 @@ fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
481488
(pos.0 - source_file_start.0, width as u32)
482489
}
483490

491+
fn stable_normalized_pos(np: ::syntax_pos::NormalizedPos,
492+
source_file_start: ::syntax_pos::BytePos)
493+
-> (u32, u32) {
494+
let ::syntax_pos::NormalizedPos {
495+
pos,
496+
diff
497+
} = np;
498+
499+
(pos.0 - source_file_start.0, diff as u32)
500+
}
501+
502+
484503
impl<'tcx> HashStable<StableHashingContext<'tcx>> for feature_gate::Features {
485504
fn hash_stable(&self, hcx: &mut StableHashingContext<'tcx>, hasher: &mut StableHasher) {
486505
// Unfortunately we cannot exhaustively list fields here, since the

src/librustc_metadata/decoder.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,7 @@ impl<'a, 'tcx> CrateMetadata {
13301330
mut lines,
13311331
mut multibyte_chars,
13321332
mut non_narrow_chars,
1333+
mut normalized_pos,
13331334
name_hash,
13341335
.. } = source_file_to_import;
13351336

@@ -1349,6 +1350,9 @@ impl<'a, 'tcx> CrateMetadata {
13491350
for swc in &mut non_narrow_chars {
13501351
*swc = *swc - start_pos;
13511352
}
1353+
for np in &mut normalized_pos {
1354+
np.pos = np.pos - start_pos;
1355+
}
13521356

13531357
let local_version = local_source_map.new_imported_source_file(name,
13541358
name_was_remapped,
@@ -1358,7 +1362,8 @@ impl<'a, 'tcx> CrateMetadata {
13581362
source_length,
13591363
lines,
13601364
multibyte_chars,
1361-
non_narrow_chars);
1365+
non_narrow_chars,
1366+
normalized_pos);
13621367
debug!("CrateMetaData::imported_source_files alloc \
13631368
source_file {:?} original (start_pos {:?} end_pos {:?}) \
13641369
translated (start_pos {:?} end_pos {:?})",

src/libsyntax/json.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,10 +333,24 @@ impl DiagnosticSpan {
333333
})
334334
});
335335

336+
let file_np = &start.file.normalized_pos;
337+
let np_start_diff = match file_np.binary_search_by(
338+
|np| np.pos.cmp(&span.lo())) {
339+
Ok(i) => file_np[i].diff,
340+
Err(i) if i == 0 => 0,
341+
Err(i) => file_np[i-1].diff,
342+
};
343+
let np_end_diff = match file_np.binary_search_by(
344+
|np| np.pos.cmp(&span.hi())) {
345+
Ok(i) => file_np[i].diff,
346+
Err(i) if i == 0 => 0,
347+
Err(i) => file_np[i-1].diff,
348+
};
349+
336350
DiagnosticSpan {
337351
file_name: start.file.name.to_string(),
338-
byte_start: span.lo().0 - start.file.start_pos.0,
339-
byte_end: span.hi().0 - start.file.start_pos.0,
352+
byte_start: span.lo().0 - start.file.start_pos.0 + np_start_diff,
353+
byte_end: span.hi().0 - start.file.start_pos.0 + np_end_diff,
340354
line_start: start.line,
341355
line_end: end.line,
342356
column_start: start.col.0 + 1,

src/libsyntax/source_map.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ impl SourceMap {
283283
mut file_local_lines: Vec<BytePos>,
284284
mut file_local_multibyte_chars: Vec<MultiByteChar>,
285285
mut file_local_non_narrow_chars: Vec<NonNarrowChar>,
286+
mut file_local_normalized_pos: Vec<NormalizedPos>,
286287
) -> Lrc<SourceFile> {
287288
let start_pos = self.next_start_pos();
288289

@@ -301,6 +302,10 @@ impl SourceMap {
301302
*swc = *swc + start_pos;
302303
}
303304

305+
for nc in &mut file_local_normalized_pos {
306+
nc.pos = nc.pos + start_pos;
307+
}
308+
304309
let source_file = Lrc::new(SourceFile {
305310
name: filename,
306311
name_was_remapped,
@@ -314,6 +319,7 @@ impl SourceMap {
314319
lines: file_local_lines,
315320
multibyte_chars: file_local_multibyte_chars,
316321
non_narrow_chars: file_local_non_narrow_chars,
322+
normalized_pos: file_local_normalized_pos,
317323
name_hash,
318324
});
319325

src/libsyntax_pos/lib.rs

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,15 @@ impl Sub<BytePos> for NonNarrowChar {
849849
}
850850
}
851851

852+
/// Identifies an offset of a character that was normalized away from `SourceFile`.
853+
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)]
854+
pub struct NormalizedPos {
855+
/// The absolute offset of the character in the `SourceMap`.
856+
pub pos: BytePos,
857+
/// The difference between original and normalized string at position.
858+
pub diff: u32,
859+
}
860+
852861
/// The state of the lazy external source loading mechanism of a `SourceFile`.
853862
#[derive(PartialEq, Eq, Clone)]
854863
pub enum ExternalSource {
@@ -912,6 +921,8 @@ pub struct SourceFile {
912921
pub multibyte_chars: Vec<MultiByteChar>,
913922
/// Width of characters that are not narrow in the source code.
914923
pub non_narrow_chars: Vec<NonNarrowChar>,
924+
/// Locations of characters removed during normalization.
925+
pub normalized_pos: Vec<NormalizedPos>,
915926
/// A hash of the filename, used for speeding up hashing in incremental compilation.
916927
pub name_hash: u128,
917928
}
@@ -978,6 +989,9 @@ impl Encodable for SourceFile {
978989
})?;
979990
s.emit_struct_field("name_hash", 8, |s| {
980991
self.name_hash.encode(s)
992+
})?;
993+
s.emit_struct_field("normalized_pos", 9, |s| {
994+
self.normalized_pos.encode(s)
981995
})
982996
})
983997
}
@@ -1028,6 +1042,8 @@ impl Decodable for SourceFile {
10281042
d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?;
10291043
let name_hash: u128 =
10301044
d.read_struct_field("name_hash", 8, |d| Decodable::decode(d))?;
1045+
let normalized_pos: Vec<NormalizedPos> =
1046+
d.read_struct_field("normalized_pos", 9, |d| Decodable::decode(d))?;
10311047
Ok(SourceFile {
10321048
name,
10331049
name_was_remapped,
@@ -1044,6 +1060,7 @@ impl Decodable for SourceFile {
10441060
lines,
10451061
multibyte_chars,
10461062
non_narrow_chars,
1063+
normalized_pos,
10471064
name_hash,
10481065
})
10491066
})
@@ -1062,8 +1079,7 @@ impl SourceFile {
10621079
unmapped_path: FileName,
10631080
mut src: String,
10641081
start_pos: BytePos) -> Result<SourceFile, OffsetOverflowError> {
1065-
remove_bom(&mut src);
1066-
normalize_newlines(&mut src);
1082+
let normalized_pos = normalize_src(&mut src);
10671083

10681084
let src_hash = {
10691085
let mut hasher: StableHasher = StableHasher::new();
@@ -1096,6 +1112,7 @@ impl SourceFile {
10961112
lines,
10971113
multibyte_chars,
10981114
non_narrow_chars,
1115+
normalized_pos,
10991116
name_hash,
11001117
})
11011118
}
@@ -1224,18 +1241,27 @@ impl SourceFile {
12241241
}
12251242
}
12261243

1244+
/// Normalizes the source code and records the normalizations.
1245+
fn normalize_src(src: &mut String) -> Vec<NormalizedPos> {
1246+
let mut normalized_pos = vec![];
1247+
remove_bom(src, &mut normalized_pos);
1248+
normalize_newlines(src, &mut normalized_pos);
1249+
normalized_pos
1250+
}
1251+
12271252
/// Removes UTF-8 BOM, if any.
1228-
fn remove_bom(src: &mut String) {
1253+
fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
12291254
if src.starts_with("\u{feff}") {
12301255
src.drain(..3);
1256+
normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 });
12311257
}
12321258
}
12331259

12341260

12351261
/// Replaces `\r\n` with `\n` in-place in `src`.
12361262
///
12371263
/// Returns error if there's a lone `\r` in the string
1238-
fn normalize_newlines(src: &mut String) {
1264+
fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
12391265
if !src.as_bytes().contains(&b'\r') {
12401266
return;
12411267
}
@@ -1248,6 +1274,8 @@ fn normalize_newlines(src: &mut String) {
12481274
let mut buf = std::mem::replace(src, String::new()).into_bytes();
12491275
let mut gap_len = 0;
12501276
let mut tail = buf.as_mut_slice();
1277+
let mut cursor = 0;
1278+
let original_gap = normalized_pos.last().map_or(0, |l| l.diff);
12511279
loop {
12521280
let idx = match find_crlf(&tail[gap_len..]) {
12531281
None => tail.len(),
@@ -1258,7 +1286,12 @@ fn normalize_newlines(src: &mut String) {
12581286
if tail.len() == gap_len {
12591287
break;
12601288
}
1289+
cursor += idx - gap_len;
12611290
gap_len += 1;
1291+
normalized_pos.push(NormalizedPos {
1292+
pos: BytePos::from_usize(cursor + 1),
1293+
diff: original_gap + gap_len as u32,
1294+
});
12621295
}
12631296

12641297
// Account for removed `\r`.

src/libsyntax_pos/tests.rs

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,25 @@ fn test_lookup_line() {
1919

2020
#[test]
2121
fn test_normalize_newlines() {
22-
fn check(before: &str, after: &str) {
22+
fn check(before: &str, after: &str, expected_positions: &[u32]) {
2323
let mut actual = before.to_string();
24-
normalize_newlines(&mut actual);
24+
let mut actual_positions = vec![];
25+
normalize_newlines(&mut actual, &mut actual_positions);
26+
let actual_positions : Vec<_> = actual_positions
27+
.into_iter()
28+
.map(|nc| nc.pos.0).collect();
2529
assert_eq!(actual.as_str(), after);
30+
assert_eq!(actual_positions, expected_positions);
2631
}
27-
check("", "");
28-
check("\n", "\n");
29-
check("\r", "\r");
30-
check("\r\r", "\r\r");
31-
check("\r\n", "\n");
32-
check("hello world", "hello world");
33-
check("hello\nworld", "hello\nworld");
34-
check("hello\r\nworld", "hello\nworld");
35-
check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n");
36-
check("\r\r\n", "\r\n");
37-
check("hello\rworld", "hello\rworld");
32+
check("", "", &[]);
33+
check("\n", "\n", &[]);
34+
check("\r", "\r", &[]);
35+
check("\r\r", "\r\r", &[]);
36+
check("\r\n", "\n", &[1]);
37+
check("hello world", "hello world", &[]);
38+
check("hello\nworld", "hello\nworld", &[]);
39+
check("hello\r\nworld", "hello\nworld", &[6]);
40+
check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n", &[1, 7, 13]);
41+
check("\r\r\n", "\r\n", &[2]);
42+
check("hello\rworld", "hello\rworld", &[]);
3843
}

0 commit comments

Comments
 (0)