Skip to content

Commit 35af4e2

Browse files
committed
Normalize identifiers in librustc_parse.
1 parent c605199 commit 35af4e2

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

Cargo.lock

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3770,6 +3770,7 @@ dependencies = [
37703770
"smallvec 1.0.0",
37713771
"syntax",
37723772
"syntax_pos",
3773+
"unicode-normalization",
37733774
]
37743775

37753776
[[package]]
@@ -4976,9 +4977,12 @@ dependencies = [
49764977

49774978
[[package]]
49784979
name = "unicode-normalization"
4979-
version = "0.1.7"
4980+
version = "0.1.11"
49804981
source = "registry+https://github.com/rust-lang/crates.io-index"
4981-
checksum = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
4982+
checksum = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"
4983+
dependencies = [
4984+
"smallvec 1.0.0",
4985+
]
49824986

49834987
[[package]]
49844988
name = "unicode-segmentation"

src/librustc_parse/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ rustc_error_codes = { path = "../librustc_error_codes" }
2020
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
2121
syntax_pos = { path = "../libsyntax_pos" }
2222
syntax = { path = "../libsyntax" }
23+
unicode-normalization = "0.1.11"

src/librustc_parse/lexer/mod.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,7 @@ impl<'a> StringReader<'a> {
219219
if is_raw_ident {
220220
ident_start = ident_start + BytePos(2);
221221
}
222-
// FIXME: perform NFKC normalization here. (Issue #2253)
223-
let sym = self.symbol_from(ident_start);
222+
let sym = self.nfc_symbol_from(ident_start);
224223
if is_raw_ident {
225224
let span = self.mk_sp(start, self.pos);
226225
if !sym.can_be_raw() {
@@ -465,6 +464,20 @@ impl<'a> StringReader<'a> {
465464
Symbol::intern(self.str_from_to(start, end))
466465
}
467466

467+
/// As symbol_from, with the text normalized into Unicode NFC form.
468+
fn nfc_symbol_from(&self, start: BytePos) -> Symbol {
469+
use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
470+
debug!("taking an normalized ident from {:?} to {:?}", start, self.pos);
471+
let sym = self.str_from(start);
472+
match is_nfc_quick(sym.chars()) {
473+
IsNormalized::Yes => Symbol::intern(sym),
474+
_ => {
475+
let sym_str: String = sym.chars().nfc().collect();
476+
Symbol::intern(&sym_str)
477+
}
478+
}
479+
}
480+
468481
/// Slice of the source text spanning from `start` up to but excluding `end`.
469482
fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
470483
{

0 commit comments

Comments
 (0)