Skip to content

Commit 67bd424

Browse files
committed
proc_macro: stop using a remote object handle for Ident
This requires a dependency on `unicode-normalization` and `rustc_lexer`, which is currently not possible for `proc_macro`. Instead, a second `extern "C" fn` is provided by the compiler server to perform these steps from any thread. String values are interned in both the server and client, meaning that identifiers can be stringified without any RPC roundtrips without substantially inflating their size. RPC messages passing symbols include the full un-interned value, and are re-interned on the receiving side. This could potentially be optimized in the future. The symbol infrastructure will alwo be used for literals in a following part.
1 parent ff17109 commit 67bd424

File tree

11 files changed

+326
-126
lines changed

11 files changed

+326
-126
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3886,6 +3886,7 @@ dependencies = [
38863886
"rustc_span",
38873887
"smallvec",
38883888
"tracing",
3889+
"unicode-normalization",
38893890
]
38903891

38913892
[[package]]

compiler/rustc_expand/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ rustc_parse = { path = "../rustc_parse" }
2424
rustc_session = { path = "../rustc_session" }
2525
smallvec = { version = "1.6.1", features = ["union", "may_dangle"] }
2626
rustc_ast = { path = "../rustc_ast" }
27+
unicode-normalization = "0.1.11"

compiler/rustc_expand/src/proc_macro_server.rs

Lines changed: 34 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,16 @@ use rustc_ast_pretty::pprust;
77
use rustc_data_structures::fx::FxHashMap;
88
use rustc_data_structures::sync::Lrc;
99
use rustc_errors::{Diagnostic, MultiSpan, PResult};
10-
use rustc_parse::lexer::nfc_normalize;
1110
use rustc_parse::parse_stream_from_source_str;
1211
use rustc_session::parse::ParseSess;
1312
use rustc_span::def_id::CrateNum;
14-
use rustc_span::symbol::{self, kw, sym, Symbol};
13+
use rustc_span::symbol::{self, sym, Symbol};
1514
use rustc_span::{BytePos, FileName, Pos, SourceFile, Span};
1615

17-
use pm::bridge::{server, DelimSpan, Group, Punct, TokenTree};
16+
use pm::bridge::{server, DelimSpan, Group, Ident, Punct, TokenTree};
1817
use pm::{Delimiter, Level, LineColumn};
18+
use std::ascii;
1919
use std::ops::Bound;
20-
use std::{ascii, panic};
2120

2221
trait FromInternal<T> {
2322
fn from_internal(x: T) -> Self;
@@ -50,7 +49,7 @@ impl ToInternal<token::Delimiter> for Delimiter {
5049
}
5150

5251
impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
53-
for Vec<TokenTree<TokenStream, Span, Ident, Literal>>
52+
for Vec<TokenTree<TokenStream, Span, Symbol, Literal>>
5453
{
5554
fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self {
5655
use rustc_ast::token::*;
@@ -84,9 +83,6 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
8483
span,
8584
}))
8685
);
87-
($ty:ident::$method:ident($($value:expr),*)) => (
88-
trees.push(TokenTree::$ty(self::$ty::$method($($value,)* span)))
89-
);
9086
}
9187
macro_rules! op {
9288
($a:expr) => {{
@@ -152,12 +148,11 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
152148
Question => op!('?'),
153149
SingleQuote => op!('\''),
154150

155-
Ident(name, false) if name == kw::DollarCrate => tt!(Ident::dollar_crate()),
156-
Ident(name, is_raw) => tt!(Ident::new(rustc.sess(), name, is_raw)),
151+
Ident(sym, is_raw) => tt!(Ident { sym, is_raw }),
157152
Lifetime(name) => {
158153
let ident = symbol::Ident::new(name, span).without_first_quote();
159154
tt!(Punct { ch: '\'', joint: true });
160-
tt!(Ident::new(rustc.sess(), ident.name, false));
155+
tt!(Ident { sym: ident.name, is_raw: false });
161156
}
162157
Literal(lit) => tt!(Literal { lit }),
163158
DocComment(_, attr_style, data) => {
@@ -185,7 +180,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
185180
}
186181

187182
Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => {
188-
trees.push(TokenTree::Ident(Ident::new(rustc.sess(), ident.name, is_raw, ident.span)))
183+
trees.push(TokenTree::Ident(Ident { sym: ident.name, is_raw, span: ident.span }))
189184
}
190185

191186
Interpolated(nt) => {
@@ -209,7 +204,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
209204
}
210205
}
211206

212-
impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> {
207+
impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Symbol, Literal> {
213208
fn to_internal(self) -> TokenStream {
214209
use rustc_ast::token::*;
215210

@@ -298,32 +293,6 @@ impl ToInternal<rustc_errors::Level> for Level {
298293

299294
pub struct FreeFunctions;
300295

301-
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
302-
pub struct Ident {
303-
sym: Symbol,
304-
is_raw: bool,
305-
span: Span,
306-
}
307-
308-
impl Ident {
309-
fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident {
310-
let sym = nfc_normalize(sym.as_str());
311-
let string = sym.as_str();
312-
if !rustc_lexer::is_ident(string) {
313-
panic!("`{:?}` is not a valid identifier", string)
314-
}
315-
if is_raw && !sym.can_be_raw() {
316-
panic!("`{}` cannot be a raw identifier", string);
317-
}
318-
sess.symbol_gallery.insert(sym, span);
319-
Ident { sym, is_raw, span }
320-
}
321-
fn dollar_crate(span: Span) -> Ident {
322-
// `$crate` is accepted as an ident only if it comes from the compiler.
323-
Ident { sym: kw::DollarCrate, is_raw: false, span }
324-
}
325-
}
326-
327296
// FIXME(eddyb) `Literal` should not expose internal `Debug` impls.
328297
#[derive(Clone, Debug)]
329298
pub struct Literal {
@@ -368,12 +337,12 @@ impl<'a, 'b> Rustc<'a, 'b> {
368337
impl server::Types for Rustc<'_, '_> {
369338
type FreeFunctions = FreeFunctions;
370339
type TokenStream = TokenStream;
371-
type Ident = Ident;
372340
type Literal = Literal;
373341
type SourceFile = Lrc<SourceFile>;
374342
type MultiSpan = Vec<Span>;
375343
type Diagnostic = Diagnostic;
376344
type Span = Span;
345+
type Symbol = Symbol;
377346
}
378347

379348
impl server::FreeFunctions for Rustc<'_, '_> {
@@ -456,14 +425,14 @@ impl server::TokenStream for Rustc<'_, '_> {
456425
}
457426
fn from_token_tree(
458427
&mut self,
459-
tree: TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>,
428+
tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>,
460429
) -> Self::TokenStream {
461430
tree.to_internal()
462431
}
463432
fn concat_trees(
464433
&mut self,
465434
base: Option<Self::TokenStream>,
466-
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>>,
435+
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>>,
467436
) -> Self::TokenStream {
468437
let mut builder = tokenstream::TokenStreamBuilder::new();
469438
if let Some(base) = base {
@@ -491,23 +460,11 @@ impl server::TokenStream for Rustc<'_, '_> {
491460
fn into_iter(
492461
&mut self,
493462
stream: Self::TokenStream,
494-
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>> {
463+
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>> {
495464
FromInternal::from_internal((stream, self))
496465
}
497466
}
498467

499-
impl server::Ident for Rustc<'_, '_> {
500-
fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident {
501-
Ident::new(self.sess(), Symbol::intern(string), is_raw, span)
502-
}
503-
fn span(&mut self, ident: Self::Ident) -> Self::Span {
504-
ident.span
505-
}
506-
fn with_span(&mut self, ident: Self::Ident, span: Self::Span) -> Self::Ident {
507-
Ident { span, ..ident }
508-
}
509-
}
510-
511468
impl server::Literal for Rustc<'_, '_> {
512469
fn from_str(&mut self, s: &str) -> Result<Self::Literal, ()> {
513470
let name = FileName::proc_macro_source_code(s);
@@ -789,4 +746,26 @@ impl server::Context for Rustc<'_, '_> {
789746
fn mixed_site(&mut self) -> Self::Span {
790747
self.mixed_site
791748
}
749+
750+
// NOTE: May be run on any thread, so cannot use `nfc_normalize`
751+
fn validate_ident(s: &str) -> Result<Option<String>, ()> {
752+
use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
753+
let normalized: Option<String> = match is_nfc_quick(s.chars()) {
754+
IsNormalized::Yes => None,
755+
_ => Some(s.chars().nfc().collect()),
756+
};
757+
if rustc_lexer::is_ident(normalized.as_ref().map(|s| &s[..]).unwrap_or(s)) {
758+
Ok(normalized)
759+
} else {
760+
Err(())
761+
}
762+
}
763+
764+
fn intern_symbol(string: &str) -> Self::Symbol {
765+
Symbol::intern(string)
766+
}
767+
768+
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
769+
f(&symbol.as_str())
770+
}
792771
}

library/proc_macro/src/bridge/buffer.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,41 @@
11
//! Buffer management for same-process client<->server communication.
22
33
use std::io::{self, Write};
4+
use std::marker::PhantomData;
45
use std::mem;
56
use std::ops::{Deref, DerefMut};
67
use std::slice;
78

9+
#[repr(C)]
10+
pub struct Slice<'a> {
11+
data: *const u8,
12+
len: usize,
13+
_marker: PhantomData<&'a [u8]>,
14+
}
15+
16+
unsafe impl<'a> Send for Slice<'a> {}
17+
unsafe impl<'a> Sync for Slice<'a> {}
18+
19+
impl<'a> Copy for Slice<'a> {}
20+
impl<'a> Clone for Slice<'a> {
21+
fn clone(&self) -> Self {
22+
*self
23+
}
24+
}
25+
26+
impl<'a> From<&'a [u8]> for Slice<'a> {
27+
fn from(xs: &'a [u8]) -> Self {
28+
Slice { data: xs.as_ptr(), len: xs.len(), _marker: PhantomData }
29+
}
30+
}
31+
32+
impl<'a> Deref for Slice<'a> {
33+
type Target = [u8];
34+
fn deref(&self) -> &[u8] {
35+
unsafe { slice::from_raw_parts(self.data, self.len) }
36+
}
37+
}
38+
839
#[repr(C)]
940
pub struct Buffer {
1041
data: *mut u8,

0 commit comments

Comments
 (0)