Skip to content

Commit 28236ab

Browse files
committed
Move various token stream things from rustc_parse to rustc_ast.
Specifically: `TokenCursor`, `TokenTreeCursor`, `LazyAttrTokenStreamImpl`, `FlatToken`, `make_attr_token_stream`, `ParserRange`, `NodeRange`. `ParserReplacement`, and `NodeReplacement`. These are all related to token streams, rather than actual parsing. This will facilitate the simplifications in the next commit.
1 parent 25cdf1f commit 28236ab

File tree

6 files changed

+338
-339
lines changed

6 files changed

+338
-339
lines changed

compiler/rustc_ast/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
test(attr(deny(warnings)))
1313
)]
1414
#![doc(rust_logo)]
15+
#![feature(array_windows)]
1516
#![feature(associated_type_defaults)]
1617
#![feature(box_patterns)]
1718
#![feature(if_let_guard)]

compiler/rustc_ast/src/tokenstream.rs

+325-1
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414
//! ownership of the original.
1515
1616
use std::borrow::Cow;
17+
use std::ops::Range;
1718
use std::sync::Arc;
18-
use std::{cmp, fmt, iter};
19+
use std::{cmp, fmt, iter, mem};
1920

2021
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
2122
use rustc_data_structures::sync;
@@ -156,13 +157,238 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
156157
}
157158
}
158159

160+
/// A token range within a `Parser`'s full token stream.
161+
#[derive(Clone, Debug)]
162+
pub struct ParserRange(pub Range<u32>);
163+
164+
/// A token range within an individual AST node's (lazy) token stream, i.e.
165+
/// relative to that node's first token. Distinct from `ParserRange` so the two
166+
/// kinds of range can't be mixed up.
167+
#[derive(Clone, Debug)]
168+
pub struct NodeRange(pub Range<u32>);
169+
170+
/// Indicates a range of tokens that should be replaced by an `AttrsTarget`
171+
/// (replacement) or be replaced by nothing (deletion). This is used in two
172+
/// places during token collection.
173+
///
174+
/// 1. Replacement. During the parsing of an AST node that may have a
175+
/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]`
176+
/// or `#[cfg_attr]`, we replace the entire inner AST node with
177+
/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an
178+
/// `AttrTokenStream`.
179+
///
180+
/// 2. Deletion. We delete inner attributes from all collected token streams,
181+
/// and instead track them through the `attrs` field on the AST node. This
182+
/// lets us manipulate them similarly to outer attributes. When we create a
183+
/// `TokenStream`, the inner attributes are inserted into the proper place
184+
/// in the token stream.
185+
///
186+
/// Each replacement starts off in `ParserReplacement` form but is converted to
187+
/// `NodeReplacement` form when it is attached to a single AST node, via
188+
/// `LazyAttrTokenStreamImpl`.
189+
pub type ParserReplacement = (ParserRange, Option<AttrsTarget>);
190+
191+
/// See the comment on `ParserReplacement`.
192+
pub type NodeReplacement = (NodeRange, Option<AttrsTarget>);
193+
194+
impl NodeRange {
195+
// Converts a range within a parser's tokens to a range within a
196+
// node's tokens beginning at `start_pos`.
197+
//
198+
// For example, imagine a parser with 50 tokens in its token stream, a
199+
// function that spans `ParserRange(20..40)` and an inner attribute within
200+
// that function that spans `ParserRange(30..35)`. We would find the inner
201+
// attribute's range within the function's tokens by subtracting 20, which
202+
// is the position of the function's start token. This gives
203+
// `NodeRange(10..15)`.
204+
pub fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
205+
assert!(!parser_range.is_empty());
206+
assert!(parser_range.start >= start_pos);
207+
NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
208+
}
209+
}
210+
211+
// From a value of this type we can reconstruct the `TokenStream` seen by the
212+
// `f` callback passed to a call to `Parser::collect_tokens`, by
213+
// replaying the getting of the tokens. This saves us producing a `TokenStream`
214+
// if it is never needed, e.g. a captured `macro_rules!` argument that is never
215+
// passed to a proc macro. In practice, token stream creation happens rarely
216+
// compared to calls to `collect_tokens` (see some statistics in #78736) so we
217+
// are doing as little up-front work as possible.
218+
//
219+
// This also makes `Parser` very cheap to clone, since
220+
// there is no intermediate collection buffer to clone.
221+
pub struct LazyAttrTokenStreamImpl {
222+
pub start_token: (Token, Spacing),
223+
pub cursor_snapshot: TokenCursor,
224+
pub num_calls: u32,
225+
pub break_last_token: u32,
226+
pub node_replacements: Box<[NodeReplacement]>,
227+
}
228+
229+
impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
230+
fn to_attr_token_stream(&self) -> AttrTokenStream {
231+
// The token produced by the final call to `{,inlined_}next` was not
232+
// actually consumed by the callback. The combination of chaining the
233+
// initial token and using `take` produces the desired result - we
234+
// produce an empty `TokenStream` if no calls were made, and omit the
235+
// final token otherwise.
236+
let mut cursor_snapshot = self.cursor_snapshot.clone();
237+
let tokens = iter::once(FlatToken::Token(self.start_token))
238+
.chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
239+
.take(self.num_calls as usize);
240+
241+
if self.node_replacements.is_empty() {
242+
make_attr_token_stream(tokens, self.break_last_token)
243+
} else {
244+
let mut tokens: Vec<_> = tokens.collect();
245+
let mut node_replacements = self.node_replacements.to_vec();
246+
node_replacements.sort_by_key(|(range, _)| range.0.start);
247+
248+
#[cfg(debug_assertions)]
249+
for [(node_range, tokens), (next_node_range, next_tokens)] in
250+
node_replacements.array_windows()
251+
{
252+
assert!(
253+
node_range.0.end <= next_node_range.0.start
254+
|| node_range.0.end >= next_node_range.0.end,
255+
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
256+
node_range,
257+
tokens,
258+
next_node_range,
259+
next_tokens,
260+
);
261+
}
262+
263+
// Process the replace ranges, starting from the highest start
264+
// position and working our way back. If have tokens like:
265+
//
266+
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
267+
//
268+
// Then we will generate replace ranges for both
269+
// the `#[cfg(FALSE)] field: bool` and the entire
270+
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
271+
//
272+
// By starting processing from the replace range with the greatest
273+
// start position, we ensure that any (outer) replace range which
274+
// encloses another (inner) replace range will fully overwrite the
275+
// inner range's replacement.
276+
for (node_range, target) in node_replacements.into_iter().rev() {
277+
assert!(
278+
!node_range.0.is_empty(),
279+
"Cannot replace an empty node range: {:?}",
280+
node_range.0
281+
);
282+
283+
// Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus
284+
// enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the
285+
// total length of `tokens` constant throughout the replacement process, allowing
286+
// us to do all replacements without adjusting indices.
287+
let target_len = target.is_some() as usize;
288+
tokens.splice(
289+
(node_range.0.start as usize)..(node_range.0.end as usize),
290+
target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
291+
iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len),
292+
),
293+
);
294+
}
295+
make_attr_token_stream(tokens.into_iter(), self.break_last_token)
296+
}
297+
}
298+
}
299+
300+
/// A helper struct used when building an `AttrTokenStream` from
301+
/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
302+
/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
303+
/// is then 'parsed' to build up an `AttrTokenStream` with nested
304+
/// `AttrTokenTree::Delimited` tokens.
305+
#[derive(Debug, Clone)]
306+
enum FlatToken {
307+
/// A token - this holds both delimiter (e.g. '{' and '}')
308+
/// and non-delimiter tokens
309+
Token((Token, Spacing)),
310+
/// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
311+
/// directly into the constructed `AttrTokenStream` as an
312+
/// `AttrTokenTree::AttrsTarget`.
313+
AttrsTarget(AttrsTarget),
314+
/// A special 'empty' token that is ignored during the conversion
315+
/// to an `AttrTokenStream`. This is used to simplify the
316+
/// handling of replace ranges.
317+
Empty,
318+
}
319+
159320
/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
160321
/// information about the tokens for attribute targets. This is used
161322
/// during expansion to perform early cfg-expansion, and to process attributes
162323
/// during proc-macro invocations.
163324
#[derive(Clone, Debug, Default, Encodable, Decodable)]
164325
pub struct AttrTokenStream(pub Arc<Vec<AttrTokenTree>>);
165326

327+
/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
328+
/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
329+
/// close delims.
330+
fn make_attr_token_stream(
331+
iter: impl Iterator<Item = FlatToken>,
332+
break_last_token: u32,
333+
) -> AttrTokenStream {
334+
#[derive(Debug)]
335+
struct FrameData {
336+
// This is `None` for the first frame, `Some` for all others.
337+
open_delim_sp: Option<(Delimiter, Span, Spacing)>,
338+
inner: Vec<AttrTokenTree>,
339+
}
340+
// The stack always has at least one element. Storing it separately makes for shorter code.
341+
let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
342+
let mut stack_rest = vec![];
343+
for flat_token in iter {
344+
match flat_token {
345+
FlatToken::Token((token @ Token { kind, span }, spacing)) => {
346+
if let Some(delim) = kind.open_delim() {
347+
stack_rest.push(mem::replace(
348+
&mut stack_top,
349+
FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
350+
));
351+
} else if let Some(delim) = kind.close_delim() {
352+
let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
353+
let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
354+
assert!(
355+
open_delim.eq_ignoring_invisible_origin(&delim),
356+
"Mismatched open/close delims: open={open_delim:?} close={span:?}"
357+
);
358+
let dspan = DelimSpan::from_pair(open_sp, span);
359+
let dspacing = DelimSpacing::new(open_spacing, spacing);
360+
let stream = AttrTokenStream::new(frame_data.inner);
361+
let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
362+
stack_top.inner.push(delimited);
363+
} else {
364+
stack_top.inner.push(AttrTokenTree::Token(token, spacing))
365+
}
366+
}
367+
FlatToken::AttrsTarget(target) => {
368+
stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
369+
}
370+
FlatToken::Empty => {}
371+
}
372+
}
373+
374+
if break_last_token > 0 {
375+
let last_token = stack_top.inner.pop().unwrap();
376+
if let AttrTokenTree::Token(last_token, spacing) = last_token {
377+
let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();
378+
379+
// Tokens are always ASCII chars, so we can use byte arithmetic here.
380+
let mut first_span = last_token.span.shrink_to_lo();
381+
first_span =
382+
first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));
383+
384+
stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
385+
} else {
386+
panic!("Unexpected last token {last_token:?}")
387+
}
388+
}
389+
AttrTokenStream::new(stack_top.inner)
390+
}
391+
166392
/// Like `TokenTree`, but for `AttrTokenStream`.
167393
#[derive(Clone, Debug, Encodable, Decodable)]
168394
pub enum AttrTokenTree {
@@ -641,6 +867,104 @@ impl<'t> Iterator for TokenStreamIter<'t> {
641867
}
642868
}
643869

870+
#[derive(Clone, Debug)]
871+
pub struct TokenTreeCursor {
872+
stream: TokenStream,
873+
/// Points to the current token tree in the stream. In `TokenCursor::curr`,
874+
/// this can be any token tree. In `TokenCursor::stack`, this is always a
875+
/// `TokenTree::Delimited`.
876+
index: usize,
877+
}
878+
879+
impl TokenTreeCursor {
880+
#[inline]
881+
pub fn new(stream: TokenStream) -> Self {
882+
TokenTreeCursor { stream, index: 0 }
883+
}
884+
885+
#[inline]
886+
pub fn curr(&self) -> Option<&TokenTree> {
887+
self.stream.get(self.index)
888+
}
889+
890+
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
891+
self.stream.get(self.index + n)
892+
}
893+
894+
#[inline]
895+
pub fn bump(&mut self) {
896+
self.index += 1;
897+
}
898+
}
899+
900+
/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
901+
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
902+
/// use this type to emit them as a linear sequence. But a linear sequence is
903+
/// what the parser expects, for the most part.
904+
#[derive(Clone, Debug)]
905+
pub struct TokenCursor {
906+
// Cursor for the current (innermost) token stream. The index within the
907+
// cursor can point to any token tree in the stream (or one past the end).
908+
// The delimiters for this token stream are found in `self.stack.last()`;
909+
// if that is `None` we are in the outermost token stream which never has
910+
// delimiters.
911+
pub curr: TokenTreeCursor,
912+
913+
// Token streams surrounding the current one. The index within each cursor
914+
// always points to a `TokenTree::Delimited`.
915+
pub stack: Vec<TokenTreeCursor>,
916+
}
917+
918+
impl TokenCursor {
919+
pub fn next(&mut self) -> (Token, Spacing) {
920+
self.inlined_next()
921+
}
922+
923+
/// This always-inlined version should only be used on hot code paths.
924+
#[inline(always)]
925+
pub fn inlined_next(&mut self) -> (Token, Spacing) {
926+
loop {
927+
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
928+
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
929+
// below can be removed.
930+
if let Some(tree) = self.curr.curr() {
931+
match tree {
932+
&TokenTree::Token(token, spacing) => {
933+
debug_assert!(!token.kind.is_delim());
934+
let res = (token, spacing);
935+
self.curr.bump();
936+
return res;
937+
}
938+
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
939+
let trees = TokenTreeCursor::new(tts.clone());
940+
self.stack.push(mem::replace(&mut self.curr, trees));
941+
if !delim.skip() {
942+
return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open);
943+
}
944+
// No open delimiter to return; continue on to the next iteration.
945+
}
946+
};
947+
} else if let Some(parent) = self.stack.pop() {
948+
// We have exhausted this token stream. Move back to its parent token stream.
949+
let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
950+
panic!("parent should be Delimited")
951+
};
952+
self.curr = parent;
953+
self.curr.bump(); // move past the `Delimited`
954+
if !delim.skip() {
955+
return (Token::new(delim.as_close_token_kind(), span.close), spacing.close);
956+
}
957+
// No close delimiter to return; continue on to the next iteration.
958+
} else {
959+
// We have exhausted the outermost token stream. The use of
960+
// `Spacing::Alone` is arbitrary and immaterial, because the
961+
// `Eof` token's spacing is never used.
962+
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
963+
}
964+
}
965+
}
966+
}
967+
644968
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
645969
pub struct DelimSpan {
646970
pub open: Span,

compiler/rustc_parse/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#![allow(rustc::diagnostic_outside_of_impl)]
66
#![allow(rustc::untranslatable_diagnostic)]
77
#![cfg_attr(bootstrap, feature(let_chains))]
8-
#![feature(array_windows)]
98
#![feature(assert_matches)]
109
#![feature(box_patterns)]
1110
#![feature(debug_closure_helpers)]

compiler/rustc_parse/src/parser/attr.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use rustc_ast as ast;
22
use rustc_ast::token::{self, MetaVarKind};
3+
use rustc_ast::tokenstream::ParserRange;
34
use rustc_ast::{Attribute, attr};
45
use rustc_errors::codes::*;
56
use rustc_errors::{Diag, PResult};
@@ -8,8 +9,7 @@ use thin_vec::ThinVec;
89
use tracing::debug;
910

1011
use super::{
11-
AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing,
12-
UsePreAttrPos,
12+
AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, PathStyle, Trailing, UsePreAttrPos,
1313
};
1414
use crate::{errors, exp, fluent_generated as fluent};
1515

0 commit comments

Comments
 (0)