From ccfc292999e133193f61feacd70031c90884636b Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Thu, 14 Jan 2021 10:42:01 -0500 Subject: [PATCH] Refactor token collection to capture trailing token immediately --- compiler/rustc_ast/src/tokenstream.rs | 11 -- compiler/rustc_parse/src/lib.rs | 2 +- compiler/rustc_parse/src/parser/item.rs | 4 +- compiler/rustc_parse/src/parser/mod.rs | 55 ++++---- compiler/rustc_parse/src/parser/stmt.rs | 170 +++++++++++++----------- 5 files changed, 124 insertions(+), 118 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 00354b42ebb7c..9ac05f316f034 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -127,14 +127,10 @@ where } pub trait CreateTokenStream: sync::Send + sync::Sync { - fn add_trailing_semi(&self) -> Box; fn create_token_stream(&self) -> TokenStream; } impl CreateTokenStream for TokenStream { - fn add_trailing_semi(&self) -> Box { - panic!("Cannot call `add_trailing_semi` on a `TokenStream`!"); - } fn create_token_stream(&self) -> TokenStream { self.clone() } @@ -151,13 +147,6 @@ impl LazyTokenStream { LazyTokenStream(Lrc::new(Box::new(inner))) } - /// Extends the captured stream by one token, - /// which must be a trailing semicolon. This - /// affects the `TokenStream` created by `make_tokenstream`. - pub fn add_trailing_semi(&self) -> LazyTokenStream { - LazyTokenStream(Lrc::new(self.0.add_trailing_semi())) - } - pub fn create_token_stream(&self) -> TokenStream { self.0.create_token_stream() } diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index 62fd6936d210d..c365acac625a4 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -292,7 +292,7 @@ pub fn nt_to_tokenstream( } else if matches!(synthesize_tokens, CanSynthesizeMissingTokens::Yes) { return fake_token_stream(sess, nt); } else { - panic!("Missing tokens for nt {:?}", pprust::nonterminal_to_string(nt)); + panic!("Missing tokens for nt at {:?}: {:?}", nt.span(), pprust::nonterminal_to_string(nt)); } } diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 28067f0216c74..1ed4d39cd0539 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -1,6 +1,6 @@ use super::diagnostics::{dummy_arg, ConsumeClosingDelim, Error}; use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign}; -use super::{FollowedByType, ForceCollect, Parser, PathStyle}; +use super::{FollowedByType, ForceCollect, Parser, PathStyle, TrailingToken}; use crate::{maybe_collect_tokens, maybe_whole}; @@ -125,7 +125,7 @@ impl<'a> Parser<'a> { let item = maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Self| { let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name); unclosed_delims.append(&mut this.unclosed_delims); - item + Ok((item?, TrailingToken::None)) })?; self.unclosed_delims.append(&mut unclosed_delims); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index c85b7a00732d3..b332005ca7424 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -61,6 +61,11 @@ pub enum ForceCollect { No, } +pub enum TrailingToken { + None, + Semi, +} + /// Like `maybe_whole_expr`, but for things other than expressions. #[macro_export] macro_rules! maybe_whole { @@ -1225,6 +1230,13 @@ impl<'a> Parser<'a> { } } + pub fn collect_tokens( + &mut self, + f: impl FnOnce(&mut Self) -> PResult<'a, R>, + ) -> PResult<'a, R> { + self.collect_tokens_trailing_token(|this| Ok((f(this)?, TrailingToken::None))) + } + /// Records all tokens consumed by the provided callback, /// including the current token. These tokens are collected /// into a `LazyTokenStream`, and returned along with the result @@ -1241,9 +1253,9 @@ impl<'a> Parser<'a> { /// This restriction shouldn't be an issue in practice, /// since this function is used to record the tokens for /// a parsed AST item, which always has matching delimiters. - pub fn collect_tokens( + pub fn collect_tokens_trailing_token( &mut self, - f: impl FnOnce(&mut Self) -> PResult<'a, R>, + f: impl FnOnce(&mut Self) -> PResult<'a, (R, TrailingToken)>, ) -> PResult<'a, R> { let start_token = (self.token.clone(), self.token_spacing); let cursor_snapshot = TokenCursor { @@ -1256,7 +1268,7 @@ impl<'a> Parser<'a> { append_unglued_token: self.token_cursor.append_unglued_token.clone(), }; - let mut ret = f(self)?; + let (mut ret, trailing_token) = f(self)?; // Produces a `TokenStream` on-demand. Using `cursor_snapshot` // and `num_calls`, we can reconstruct the `TokenStream` seen @@ -1275,15 +1287,10 @@ impl<'a> Parser<'a> { cursor_snapshot: TokenCursor, num_calls: usize, desugar_doc_comments: bool, - trailing_semi: bool, append_unglued_token: Option, } impl CreateTokenStream for LazyTokenStreamImpl { fn create_token_stream(&self) -> TokenStream { - let mut num_calls = self.num_calls; - if self.trailing_semi { - num_calls += 1; - } // The token produced by the final call to `next` or `next_desugared` // was not actually consumed by the callback. The combination // of chaining the initial token and using `take` produces the desired @@ -1291,39 +1298,33 @@ impl<'a> Parser<'a> { // and omit the final token otherwise. let mut cursor_snapshot = self.cursor_snapshot.clone(); let tokens = std::iter::once(self.start_token.clone()) - .chain((0..num_calls).map(|_| { + .chain((0..self.num_calls).map(|_| { if self.desugar_doc_comments { cursor_snapshot.next_desugared() } else { cursor_snapshot.next() } })) - .take(num_calls); + .take(self.num_calls); make_token_stream(tokens, self.append_unglued_token.clone()) } - fn add_trailing_semi(&self) -> Box { - if self.trailing_semi { - panic!("Called `add_trailing_semi` twice!"); - } - if self.append_unglued_token.is_some() { - panic!( - "Cannot call `add_trailing_semi` when we have an unglued token {:?}", - self.append_unglued_token - ); - } - let mut new = self.clone(); - new.trailing_semi = true; - Box::new(new) + } + + let mut num_calls = self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls; + match trailing_token { + TrailingToken::None => {} + TrailingToken::Semi => { + assert_eq!(self.token.kind, token::Semi); + num_calls += 1; } } let lazy_impl = LazyTokenStreamImpl { start_token, - num_calls: self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls, + num_calls, cursor_snapshot, desugar_doc_comments: self.desugar_doc_comments, - trailing_semi: false, append_unglued_token: self.token_cursor.append_unglued_token.clone(), }; ret.finalize_tokens(LazyTokenStream::new(lazy_impl)); @@ -1427,9 +1428,9 @@ macro_rules! maybe_collect_tokens { if matches!($force_collect, ForceCollect::Yes) || $crate::parser::attr::maybe_needs_tokens($attrs) { - $self.collect_tokens($f) + $self.collect_tokens_trailing_token($f) } else { - $f($self) + Ok($f($self)?.0) } }; } diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index da60ba8472ba3..8373f6acd7e01 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -3,14 +3,13 @@ use super::diagnostics::{AttemptLocalParseRecovery, Error}; use super::expr::LhsExpr; use super::pat::{GateOr, RecoverComma}; use super::path::PathStyle; -use super::{BlockMode, ForceCollect, Parser, Restrictions, SemiColonMode}; +use super::{BlockMode, ForceCollect, Parser, Restrictions, SemiColonMode, TrailingToken}; use crate::{maybe_collect_tokens, maybe_whole}; use rustc_ast as ast; use rustc_ast::attr::HasAttrs; use rustc_ast::ptr::P; use rustc_ast::token::{self, TokenKind}; -use rustc_ast::tokenstream::LazyTokenStream; use rustc_ast::util::classify; use rustc_ast::{AttrStyle, AttrVec, Attribute, MacCall, MacCallStmt, MacStmtStyle}; use rustc_ast::{Block, BlockCheckMode, Expr, ExprKind, Local, Stmt, StmtKind, DUMMY_NODE_ID}; @@ -25,7 +24,7 @@ impl<'a> Parser<'a> { /// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed. // Public for rustfmt usage. pub fn parse_stmt(&mut self, force_collect: ForceCollect) -> PResult<'a, Option> { - Ok(self.parse_stmt_without_recovery(force_collect).unwrap_or_else(|mut e| { + Ok(self.parse_stmt_without_recovery(false, force_collect).unwrap_or_else(|mut e| { e.emit(); self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore); None @@ -36,6 +35,7 @@ impl<'a> Parser<'a> { /// or not we have attributes fn parse_stmt_without_recovery( &mut self, + capture_semi: bool, force_collect: ForceCollect, ) -> PResult<'a, Option> { let mut attrs = self.parse_outer_attributes()?; @@ -50,68 +50,77 @@ impl<'a> Parser<'a> { Some(stmt) }); - maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Self| { - let stmt = if this.eat_keyword(kw::Let) { - this.parse_local_mk(lo, attrs.into())? - } else if this.is_kw_followed_by_ident(kw::Mut) { - this.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")? - } else if this.is_kw_followed_by_ident(kw::Auto) { - this.bump(); // `auto` - let msg = "write `let` instead of `auto` to introduce a new variable"; - this.recover_stmt_local(lo, attrs.into(), msg, "let")? - } else if this.is_kw_followed_by_ident(sym::var) { - this.bump(); // `var` - let msg = "write `let` instead of `var` to introduce a new variable"; - this.recover_stmt_local(lo, attrs.into(), msg, "let")? - } else if this.check_path() - && !this.token.is_qpath_start() - && !this.is_path_start_item() - { - // We have avoided contextual keywords like `union`, items with `crate` visibility, - // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something - // that starts like a path (1 token), but it fact not a path. - // Also, we avoid stealing syntax from `parse_item_`. - this.parse_stmt_path_start(lo, attrs)? - } else if let Some(item) = - this.parse_item_common(attrs.clone(), false, true, |_| true, force_collect)? - { - // FIXME: Bad copy of attrs - this.mk_stmt(lo.to(item.span), StmtKind::Item(P(item))) - } else if this.eat(&token::Semi) { - // Do not attempt to parse an expression if we're done here. - this.error_outer_attrs(&attrs); - this.mk_stmt(lo, StmtKind::Empty) - } else if this.token != token::CloseDelim(token::Brace) { - // Remainder are line-expr stmts. - let e = this.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?; - this.mk_stmt(lo.to(e.span), StmtKind::Expr(e)) - } else { - this.error_outer_attrs(&attrs); - return Ok(None); - }; - Ok(Some(stmt)) - }) + Ok(Some(if self.token.is_keyword(kw::Let) { + self.parse_local_mk(lo, attrs.into(), capture_semi, force_collect)? + } else if self.is_kw_followed_by_ident(kw::Mut) { + self.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")? + } else if self.is_kw_followed_by_ident(kw::Auto) { + self.bump(); // `auto` + let msg = "write `let` instead of `auto` to introduce a new variable"; + self.recover_stmt_local(lo, attrs.into(), msg, "let")? + } else if self.is_kw_followed_by_ident(sym::var) { + self.bump(); // `var` + let msg = "write `let` instead of `var` to introduce a new variable"; + self.recover_stmt_local(lo, attrs.into(), msg, "let")? + } else if self.check_path() && !self.token.is_qpath_start() && !self.is_path_start_item() { + // We have avoided contextual keywords like `union`, items with `crate` visibility, + // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something + // that starts like a path (1 token), but it fact not a path. + // Also, we avoid stealing syntax from `parse_item_`. + self.parse_stmt_path_start(lo, attrs, force_collect)? + } else if let Some(item) = + self.parse_item_common(attrs.clone(), false, true, |_| true, force_collect)? + { + // FIXME: Bad copy of attrs + self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item))) + } else if self.eat(&token::Semi) { + // Do not attempt to parse an expression if we're done here. + self.error_outer_attrs(&attrs); + self.mk_stmt(lo, StmtKind::Empty) + } else if self.token != token::CloseDelim(token::Brace) { + // Remainder are line-expr stmts. + let e = self.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?; + self.mk_stmt(lo.to(e.span), StmtKind::Expr(e)) + } else { + self.error_outer_attrs(&attrs); + return Ok(None); + })) } - fn parse_stmt_path_start(&mut self, lo: Span, attrs: Vec) -> PResult<'a, Stmt> { - let path = self.parse_path(PathStyle::Expr)?; + fn parse_stmt_path_start( + &mut self, + lo: Span, + attrs: Vec, + force_collect: ForceCollect, + ) -> PResult<'a, Stmt> { + maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Parser<'a>| { + let path = this.parse_path(PathStyle::Expr)?; - if self.eat(&token::Not) { - return self.parse_stmt_mac(lo, attrs.into(), path); - } + if this.eat(&token::Not) { + let stmt_mac = this.parse_stmt_mac(lo, attrs.into(), path)?; + if this.token == token::Semi { + return Ok((stmt_mac, TrailingToken::Semi)); + } else { + return Ok((stmt_mac, TrailingToken::None)); + } + } - let expr = if self.eat(&token::OpenDelim(token::Brace)) { - self.parse_struct_expr(path, AttrVec::new(), true)? - } else { - let hi = self.prev_token.span; - self.mk_expr(lo.to(hi), ExprKind::Path(None, path), AttrVec::new()) - }; + let expr = if this.eat(&token::OpenDelim(token::Brace)) { + this.parse_struct_expr(path, AttrVec::new(), true)? + } else { + let hi = this.prev_token.span; + this.mk_expr(lo.to(hi), ExprKind::Path(None, path), AttrVec::new()) + }; - let expr = self.with_res(Restrictions::STMT_EXPR, |this| { - let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs.into())?; - this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr)) - })?; - Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr))) + let expr = this.with_res(Restrictions::STMT_EXPR, |this| { + let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs.into())?; + this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr)) + })?; + Ok(( + this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Expr(expr)), + TrailingToken::None, + )) + }) } /// Parses a statement macro `mac!(args)` provided a `path` representing `mac`. @@ -159,15 +168,34 @@ impl<'a> Parser<'a> { msg: &str, sugg: &str, ) -> PResult<'a, Stmt> { - let stmt = self.parse_local_mk(lo, attrs)?; + let stmt = self.recover_local_after_let(lo, attrs)?; self.struct_span_err(lo, "invalid variable declaration") .span_suggestion(lo, msg, sugg.to_string(), Applicability::MachineApplicable) .emit(); Ok(stmt) } - fn parse_local_mk(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> { - let local = self.parse_local(attrs)?; + fn parse_local_mk( + &mut self, + lo: Span, + attrs: AttrVec, + capture_semi: bool, + force_collect: ForceCollect, + ) -> PResult<'a, Stmt> { + maybe_collect_tokens!(self, force_collect, &attrs, |this: &mut Parser<'a>| { + this.expect_keyword(kw::Let)?; + let local = this.parse_local(attrs.into())?; + let trailing = if capture_semi && this.token.kind == token::Semi { + TrailingToken::Semi + } else { + TrailingToken::None + }; + Ok((this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Local(local)), trailing)) + }) + } + + fn recover_local_after_let(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> { + let local = self.parse_local(attrs.into())?; Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Local(local))) } @@ -289,7 +317,7 @@ impl<'a> Parser<'a> { // bar; // // which is valid in other languages, but not Rust. - match self.parse_stmt_without_recovery(ForceCollect::No) { + match self.parse_stmt_without_recovery(false, ForceCollect::No) { // If the next token is an open brace (e.g., `if a b {`), the place- // inside-a-block suggestion would be more likely wrong than right. Ok(Some(_)) @@ -392,17 +420,11 @@ impl<'a> Parser<'a> { // Skip looking for a trailing semicolon when we have an interpolated statement. maybe_whole!(self, NtStmt, |x| Some(x)); - let mut stmt = match self.parse_stmt_without_recovery(ForceCollect::No)? { + let mut stmt = match self.parse_stmt_without_recovery(true, ForceCollect::No)? { Some(stmt) => stmt, None => return Ok(None), }; - let add_semi_token = |tokens: Option<&mut LazyTokenStream>| { - if let Some(tokens) = tokens { - *tokens = tokens.add_trailing_semi(); - } - }; - let mut eat_semi = true; match stmt.kind { // Expression without semicolon. @@ -458,18 +480,12 @@ impl<'a> Parser<'a> { } } eat_semi = false; - // We just checked that there's a semicolon in the tokenstream, - // so capture it - add_semi_token(local.tokens.as_mut()); } StmtKind::Empty | StmtKind::Item(_) | StmtKind::Semi(_) => eat_semi = false, } if eat_semi && self.eat(&token::Semi) { stmt = stmt.add_trailing_semicolon(); - // We just checked that we have a semicolon in the tokenstream, - // so capture it - add_semi_token(stmt.tokens_mut()); } stmt.span = stmt.span.to(self.prev_token.span); Ok(Some(stmt))