Skip to content

Commit 491fccf

Browse files
committed
proc_macro: stop using a remote object handle for Ident
Doing this for all unicode identifiers would require a dependency on `unicode-normalization` and `rustc_lexer`, which is currently not possible for `proc_macro` due to it being built concurrently with `std` and `core`. Instead, ASCII identifiers are validated locally, and an RPC message is used to validate unicode identifiers when needed. String values are interned on the both the server and client when deserializing, to avoid unnecessary copies and keep Ident cheap to copy and move. This appears to be important for performance. The client-side interner is based roughly on the one from rustc_span, and uses an arena inspired by rustc_arena. RPC messages passing symbols always include the full value. This could potentially be optimized in the future if it is revealed to be a performance bottleneck. Despite now having a relevant implementaion of Display for Ident, ToString is still specialized, as it is a hot-path for this object. The symbol infrastructure will also be used for literals in the next part.
1 parent e0dce6e commit 491fccf

File tree

11 files changed

+441
-114
lines changed

11 files changed

+441
-114
lines changed

compiler/rustc_expand/src/proc_macro_server.rs

+34-57
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ use rustc_parse::lexer::nfc_normalize;
1111
use rustc_parse::parse_stream_from_source_str;
1212
use rustc_session::parse::ParseSess;
1313
use rustc_span::def_id::CrateNum;
14-
use rustc_span::symbol::{self, kw, sym, Symbol};
14+
use rustc_span::symbol::{self, sym, Symbol};
1515
use rustc_span::{BytePos, FileName, Pos, SourceFile, Span};
1616

17-
use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Punct, TokenTree};
17+
use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Ident, Punct, TokenTree};
1818
use pm::{Delimiter, Level, LineColumn};
19+
use std::ascii;
1920
use std::ops::Bound;
20-
use std::{ascii, panic};
2121

2222
trait FromInternal<T> {
2323
fn from_internal(x: T) -> Self;
@@ -50,7 +50,7 @@ impl ToInternal<token::Delimiter> for Delimiter {
5050
}
5151

5252
impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
53-
for Vec<TokenTree<TokenStream, Span, Ident, Literal>>
53+
for Vec<TokenTree<TokenStream, Span, Symbol, Literal>>
5454
{
5555
fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self {
5656
use rustc_ast::token::*;
@@ -135,13 +135,12 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
135135
Question => op("?"),
136136
SingleQuote => op("'"),
137137

138-
Ident(name, false) if name == kw::DollarCrate => trees.push(TokenTree::Ident(Ident::dollar_crate(span))),
139-
Ident(name, is_raw) => trees.push(TokenTree::Ident(Ident::new(rustc.sess(), name, is_raw, span))),
138+
Ident(sym, is_raw) => trees.push(TokenTree::Ident(Ident { sym, is_raw, span })),
140139
Lifetime(name) => {
141140
let ident = symbol::Ident::new(name, span).without_first_quote();
142141
trees.extend([
143142
TokenTree::Punct(Punct { ch: b'\'', joint: true, span }),
144-
TokenTree::Ident(Ident::new(rustc.sess(), ident.name, false, span)),
143+
TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }),
145144
]);
146145
}
147146
Literal(lit) => trees.push(TokenTree::Literal(self::Literal { lit, span })),
@@ -170,7 +169,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
170169
}
171170

172171
Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => {
173-
trees.push(TokenTree::Ident(Ident::new(rustc.sess(), ident.name, is_raw, ident.span)))
172+
trees.push(TokenTree::Ident(Ident { sym: ident.name, is_raw, span: ident.span }))
174173
}
175174

176175
Interpolated(nt) => {
@@ -200,11 +199,14 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
200199
}
201200
}
202201

203-
impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> {
202+
impl ToInternal<TokenStream>
203+
for (TokenTree<TokenStream, Span, Symbol, Literal>, &mut Rustc<'_, '_>)
204+
{
204205
fn to_internal(self) -> TokenStream {
205206
use rustc_ast::token::*;
206207

207-
let (ch, joint, span) = match self {
208+
let (tree, rustc) = self;
209+
let (ch, joint, span) = match tree {
208210
TokenTree::Punct(Punct { ch, joint, span }) => (ch, joint, span),
209211
TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => {
210212
return tokenstream::TokenTree::Delimited(
@@ -215,6 +217,7 @@ impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> {
215217
.into();
216218
}
217219
TokenTree::Ident(self::Ident { sym, is_raw, span }) => {
220+
rustc.sess().symbol_gallery.insert(sym, span);
218221
return tokenstream::TokenTree::token(Ident(sym, is_raw), span).into();
219222
}
220223
TokenTree::Literal(self::Literal {
@@ -289,33 +292,6 @@ impl ToInternal<rustc_errors::Level> for Level {
289292

290293
pub struct FreeFunctions;
291294

292-
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
293-
pub struct Ident {
294-
sym: Symbol,
295-
is_raw: bool,
296-
span: Span,
297-
}
298-
299-
impl Ident {
300-
fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident {
301-
let sym = nfc_normalize(sym.as_str());
302-
let string = sym.as_str();
303-
if !rustc_lexer::is_ident(string) {
304-
panic!("`{:?}` is not a valid identifier", string)
305-
}
306-
if is_raw && !sym.can_be_raw() {
307-
panic!("`{}` cannot be a raw identifier", string);
308-
}
309-
sess.symbol_gallery.insert(sym, span);
310-
Ident { sym, is_raw, span }
311-
}
312-
313-
fn dollar_crate(span: Span) -> Ident {
314-
// `$crate` is accepted as an ident only if it comes from the compiler.
315-
Ident { sym: kw::DollarCrate, is_raw: false, span }
316-
}
317-
}
318-
319295
// FIXME(eddyb) `Literal` should not expose internal `Debug` impls.
320296
#[derive(Clone, Debug)]
321297
pub struct Literal {
@@ -357,12 +333,12 @@ impl<'a, 'b> Rustc<'a, 'b> {
357333
impl server::Types for Rustc<'_, '_> {
358334
type FreeFunctions = FreeFunctions;
359335
type TokenStream = TokenStream;
360-
type Ident = Ident;
361336
type Literal = Literal;
362337
type SourceFile = Lrc<SourceFile>;
363338
type MultiSpan = Vec<Span>;
364339
type Diagnostic = Diagnostic;
365340
type Span = Span;
341+
type Symbol = Symbol;
366342
}
367343

368344
impl server::FreeFunctions for Rustc<'_, '_> {
@@ -453,22 +429,22 @@ impl server::TokenStream for Rustc<'_, '_> {
453429

454430
fn from_token_tree(
455431
&mut self,
456-
tree: TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>,
432+
tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>,
457433
) -> Self::TokenStream {
458-
tree.to_internal()
434+
(tree, &mut *self).to_internal()
459435
}
460436

461437
fn concat_trees(
462438
&mut self,
463439
base: Option<Self::TokenStream>,
464-
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>>,
440+
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>>,
465441
) -> Self::TokenStream {
466442
let mut builder = tokenstream::TokenStreamBuilder::new();
467443
if let Some(base) = base {
468444
builder.push(base);
469445
}
470446
for tree in trees {
471-
builder.push(tree.to_internal());
447+
builder.push((tree, &mut *self).to_internal());
472448
}
473449
builder.build()
474450
}
@@ -491,25 +467,11 @@ impl server::TokenStream for Rustc<'_, '_> {
491467
fn into_trees(
492468
&mut self,
493469
stream: Self::TokenStream,
494-
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>> {
470+
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>> {
495471
FromInternal::from_internal((stream, self))
496472
}
497473
}
498474

499-
impl server::Ident for Rustc<'_, '_> {
500-
fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident {
501-
Ident::new(self.sess(), Symbol::intern(string), is_raw, span)
502-
}
503-
504-
fn span(&mut self, ident: Self::Ident) -> Self::Span {
505-
ident.span
506-
}
507-
508-
fn with_span(&mut self, ident: Self::Ident, span: Self::Span) -> Self::Ident {
509-
Ident { span, ..ident }
510-
}
511-
}
512-
513475
impl server::Literal for Rustc<'_, '_> {
514476
fn from_str(&mut self, s: &str) -> Result<Self::Literal, ()> {
515477
let name = FileName::proc_macro_source_code(s);
@@ -812,6 +774,13 @@ impl server::Span for Rustc<'_, '_> {
812774
}
813775
}
814776

777+
impl server::Symbol for Rustc<'_, '_> {
778+
fn normalize_and_validate_ident(&mut self, string: &str) -> Result<Self::Symbol, ()> {
779+
let sym = nfc_normalize(string);
780+
if rustc_lexer::is_ident(sym.as_str()) { Ok(sym) } else { Err(()) }
781+
}
782+
}
783+
815784
impl server::Server for Rustc<'_, '_> {
816785
fn globals(&mut self) -> ExpnGlobals<Self::Span> {
817786
ExpnGlobals {
@@ -820,4 +789,12 @@ impl server::Server for Rustc<'_, '_> {
820789
mixed_site: self.mixed_site,
821790
}
822791
}
792+
793+
fn intern_symbol(string: &str) -> Self::Symbol {
794+
Symbol::intern(string)
795+
}
796+
797+
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
798+
f(&symbol.as_str())
799+
}
823800
}
+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
2+
//!
3+
//! This is unfortunately a minimal re-implementation rather than a dependency
4+
//! as it is difficult to depend on crates from within `proc_macro`, due to it
5+
//! being built at the same time as `std`.
6+
7+
use std::cell::{Cell, RefCell};
8+
use std::cmp;
9+
use std::mem::MaybeUninit;
10+
use std::ops::Range;
11+
use std::ptr;
12+
use std::slice;
13+
use std::str;
14+
15+
// The arenas start with PAGE-sized chunks, and then each new chunk is twice as
16+
// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon
17+
// we stop growing. This scales well, from arenas that are barely used up to
18+
// arenas that are used for 100s of MiBs. Note also that the chosen sizes match
19+
// the usual sizes of pages and huge pages on Linux.
20+
const PAGE: usize = 4096;
21+
const HUGE_PAGE: usize = 2 * 1024 * 1024;
22+
23+
/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
24+
///
25+
/// This is unfortunately a complete re-implementation rather than a dependency
26+
/// as it is difficult to depend on crates from within `proc_macro`, due to it
27+
/// being built at the same time as `std`.
28+
///
29+
/// This arena doesn't have support for allocating anything other than byte
30+
/// slices, as that is all that is necessary.
31+
pub(crate) struct Arena {
32+
start: Cell<*mut MaybeUninit<u8>>,
33+
end: Cell<*mut MaybeUninit<u8>>,
34+
chunks: RefCell<Vec<Box<[MaybeUninit<u8>]>>>,
35+
}
36+
37+
impl Arena {
38+
pub(crate) fn new() -> Self {
39+
Arena {
40+
start: Cell::new(ptr::null_mut()),
41+
end: Cell::new(ptr::null_mut()),
42+
chunks: RefCell::new(Vec::new()),
43+
}
44+
}
45+
46+
/// Add a new chunk with at least `additional` free bytes.
47+
#[inline(never)]
48+
#[cold]
49+
fn grow(&self, additional: usize) {
50+
let mut chunks = self.chunks.borrow_mut();
51+
let mut new_cap;
52+
if let Some(last_chunk) = chunks.last_mut() {
53+
// If the previous chunk's len is less than HUGE_PAGE
54+
// bytes, then this chunk will be least double the previous
55+
// chunk's size.
56+
new_cap = last_chunk.len().min(HUGE_PAGE / 2);
57+
new_cap *= 2;
58+
} else {
59+
new_cap = PAGE;
60+
}
61+
// Also ensure that this chunk can fit `additional`.
62+
new_cap = cmp::max(additional, new_cap);
63+
64+
let mut chunk = Box::new_uninit_slice(new_cap);
65+
let Range { start, end } = chunk.as_mut_ptr_range();
66+
self.start.set(start);
67+
self.end.set(end);
68+
chunks.push(chunk);
69+
}
70+
71+
/// Allocates a byte slice with specified size from the current memory
72+
/// chunk. Returns `None` if there is no free space left to satisfy the
73+
/// request.
74+
fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit<u8>]> {
75+
let start = self.start.get().addr();
76+
let old_end = self.end.get();
77+
let end = old_end.addr();
78+
79+
let new_end = end.checked_sub(bytes)?;
80+
if start <= new_end {
81+
let new_end = old_end.with_addr(new_end);
82+
self.end.set(new_end);
83+
// SAFETY: `bytes` bytes starting at `new_end` were just reserved.
84+
Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) })
85+
} else {
86+
None
87+
}
88+
}
89+
90+
fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit<u8>] {
91+
if bytes == 0 {
92+
return &mut [];
93+
}
94+
95+
loop {
96+
if let Some(a) = self.alloc_raw_without_grow(bytes) {
97+
break a;
98+
}
99+
// No free space left. Allocate a new chunk to satisfy the request.
100+
// On failure the grow will panic or abort.
101+
self.grow(bytes);
102+
}
103+
}
104+
105+
pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str {
106+
let alloc = self.alloc_raw(string.len());
107+
let bytes = MaybeUninit::write_slice(alloc, string.as_bytes());
108+
109+
// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
110+
// and immediately convert the clone back to `&str`.
111+
unsafe { str::from_utf8_unchecked_mut(bytes) }
112+
}
113+
}

library/proc_macro/src/bridge/client.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ define_handles! {
181181
Diagnostic,
182182

183183
'interned:
184-
Ident,
185184
Span,
186185
}
187186

@@ -242,6 +241,8 @@ impl fmt::Debug for Span {
242241
}
243242
}
244243

244+
pub(crate) use super::symbol::Symbol;
245+
245246
macro_rules! define_client_side {
246247
($($name:ident {
247248
$(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)*
@@ -405,6 +406,9 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
405406
panic::catch_unwind(panic::AssertUnwindSafe(|| {
406407
maybe_install_panic_hook(force_show_panics);
407408

409+
// Make sure the symbol store is empty before decoding inputs.
410+
Symbol::invalidate_all();
411+
408412
let reader = &mut &buf[..];
409413
let (globals, input) = <(ExpnGlobals<Span>, A)>::decode(reader, &mut ());
410414

@@ -438,6 +442,10 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
438442
buf.clear();
439443
Err::<(), _>(e).encode(&mut buf, &mut ());
440444
});
445+
446+
// Now that a response has been serialized, invalidate all symbols
447+
// registered with the interner.
448+
Symbol::invalidate_all();
441449
buf
442450
}
443451

0 commit comments

Comments
 (0)