Skip to content

Commit 9ce9567

Browse files
committed
Auto merge of rust-lang#14837 - Veykril:rustc-lexer, r=Veykril
Support c string literals
2 parents 034d7c8 + 3e528b8 commit 9ce9567

File tree

24 files changed

+381
-162
lines changed

24 files changed

+381
-162
lines changed

crates/hir-def/src/body/pretty.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,7 @@ impl<'a> Printer<'a> {
611611
match literal {
612612
Literal::String(it) => w!(self, "{:?}", it),
613613
Literal::ByteString(it) => w!(self, "\"{}\"", it.escape_ascii()),
614+
Literal::CString(it) => w!(self, "\"{}\\0\"", it),
614615
Literal::Char(it) => w!(self, "'{}'", it.escape_debug()),
615616
Literal::Bool(it) => w!(self, "{}", it),
616617
Literal::Int(i, suffix) => {

crates/hir-def/src/hir.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ impl fmt::Display for FloatTypeWrapper {
8585
pub enum Literal {
8686
String(Box<str>),
8787
ByteString(Box<[u8]>),
88+
CString(Box<str>),
8889
Char(char),
8990
Bool(bool),
9091
Int(i128, Option<BuiltinInt>),
@@ -135,6 +136,10 @@ impl From<ast::LiteralKind> for Literal {
135136
let text = s.value().map(Box::from).unwrap_or_else(Default::default);
136137
Literal::String(text)
137138
}
139+
LiteralKind::CString(s) => {
140+
let text = s.value().map(Box::from).unwrap_or_else(Default::default);
141+
Literal::CString(text)
142+
}
138143
LiteralKind::Byte(b) => {
139144
Literal::Uint(b.value().unwrap_or_default() as u128, Some(BuiltinUint::U8))
140145
}

crates/hir-def/src/lang_item.rs

Lines changed: 142 additions & 128 deletions
Large diffs are not rendered by default.

crates/hir-ty/src/consteval/tests.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,6 +1883,38 @@ fn byte_string() {
18831883
);
18841884
}
18851885

1886+
#[test]
1887+
fn c_string() {
1888+
check_number(
1889+
r#"
1890+
//- minicore: index, slice
1891+
#[lang = "CStr"]
1892+
pub struct CStr {
1893+
inner: [u8]
1894+
}
1895+
const GOAL: u8 = {
1896+
let a = c"hello";
1897+
a.inner[0]
1898+
};
1899+
"#,
1900+
104,
1901+
);
1902+
check_number(
1903+
r#"
1904+
//- minicore: index, slice
1905+
#[lang = "CStr"]
1906+
pub struct CStr {
1907+
inner: [u8]
1908+
}
1909+
const GOAL: u8 = {
1910+
let a = c"hello";
1911+
a.inner[6]
1912+
};
1913+
"#,
1914+
0,
1915+
);
1916+
}
1917+
18861918
#[test]
18871919
fn consts() {
18881920
check_number(

crates/hir-ty/src/infer/expr.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use hir_def::{
1313
hir::{
1414
ArithOp, Array, BinaryOp, ClosureKind, Expr, ExprId, LabelId, Literal, Statement, UnaryOp,
1515
},
16-
lang_item::LangItem,
16+
lang_item::{LangItem, LangItemTarget},
1717
path::{GenericArg, GenericArgs},
1818
BlockId, ConstParamId, FieldId, ItemContainerId, Lookup,
1919
};
@@ -832,6 +832,20 @@ impl<'a> InferenceContext<'a> {
832832
let array_type = TyKind::Array(byte_type, len).intern(Interner);
833833
TyKind::Ref(Mutability::Not, static_lifetime(), array_type).intern(Interner)
834834
}
835+
Literal::CString(..) => TyKind::Ref(
836+
Mutability::Not,
837+
static_lifetime(),
838+
self.resolve_lang_item(LangItem::CStr)
839+
.and_then(LangItemTarget::as_struct)
840+
.map_or_else(
841+
|| self.err_ty(),
842+
|strukt| {
843+
TyKind::Adt(AdtId(strukt.into()), Substitution::empty(Interner))
844+
.intern(Interner)
845+
},
846+
),
847+
)
848+
.intern(Interner),
835849
Literal::Char(..) => TyKind::Scalar(Scalar::Char).intern(Interner),
836850
Literal::Int(_v, ty) => match ty {
837851
Some(int_ty) => {

crates/hir-ty/src/infer/pat.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,9 +428,10 @@ fn is_non_ref_pat(body: &hir_def::body::Body, pat: PatId) -> bool {
428428
// FIXME: ConstBlock/Path/Lit might actually evaluate to ref, but inference is unimplemented.
429429
Pat::Path(..) => true,
430430
Pat::ConstBlock(..) => true,
431-
Pat::Lit(expr) => {
432-
!matches!(body[*expr], Expr::Literal(Literal::String(..) | Literal::ByteString(..)))
433-
}
431+
Pat::Lit(expr) => !matches!(
432+
body[*expr],
433+
Expr::Literal(Literal::String(..) | Literal::CString(..) | Literal::ByteString(..))
434+
),
434435
Pat::Wild | Pat::Bind { .. } | Pat::Ref { .. } | Pat::Box { .. } | Pat::Missing => false,
435436
}
436437
}

crates/hir-ty/src/mir/lower.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,15 +1133,26 @@ impl<'ctx> MirLowerCtx<'ctx> {
11331133
let bytes = match l {
11341134
hir_def::hir::Literal::String(b) => {
11351135
let b = b.as_bytes();
1136-
let mut data = vec![];
1136+
let mut data = Vec::with_capacity(mem::size_of::<usize>() * 2);
11371137
data.extend(0usize.to_le_bytes());
11381138
data.extend(b.len().to_le_bytes());
11391139
let mut mm = MemoryMap::default();
11401140
mm.insert(0, b.to_vec());
11411141
return Ok(Operand::from_concrete_const(data, mm, ty));
11421142
}
1143+
hir_def::hir::Literal::CString(b) => {
1144+
let b = b.as_bytes();
1145+
let bytes = b.iter().copied().chain(iter::once(0)).collect::<Vec<_>>();
1146+
1147+
let mut data = Vec::with_capacity(mem::size_of::<usize>() * 2);
1148+
data.extend(0usize.to_le_bytes());
1149+
data.extend(bytes.len().to_le_bytes());
1150+
let mut mm = MemoryMap::default();
1151+
mm.insert(0, bytes);
1152+
return Ok(Operand::from_concrete_const(data, mm, ty));
1153+
}
11431154
hir_def::hir::Literal::ByteString(b) => {
1144-
let mut data = vec![];
1155+
let mut data = Vec::with_capacity(mem::size_of::<usize>() * 2);
11451156
data.extend(0usize.to_le_bytes());
11461157
data.extend(b.len().to_le_bytes());
11471158
let mut mm = MemoryMap::default();

crates/hir-ty/src/tests/simple.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3572,3 +3572,18 @@ fn main() {
35723572
"#,
35733573
);
35743574
}
3575+
3576+
#[test]
3577+
fn cstring_literals() {
3578+
check_types(
3579+
r#"
3580+
#[lang = "CStr"]
3581+
pub struct CStr;
3582+
3583+
fn main() {
3584+
c"ello";
3585+
//^^^^^^^ &CStr
3586+
}
3587+
"#,
3588+
);
3589+
}

crates/ide-assists/src/handlers/raw_string.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use crate::{utils::required_hashes, AssistContext, AssistId, AssistKind, Assists
2020
// }
2121
// ```
2222
pub(crate) fn make_raw_string(acc: &mut Assists, ctx: &AssistContext<'_>) -> Option<()> {
23+
// FIXME: This should support byte and c strings as well.
2324
let token = ctx.find_token_at_offset::<ast::String>()?;
2425
if token.is_raw() {
2526
return None;

crates/ide/src/extend_selection.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ fn try_extend_selection(
3939
) -> Option<TextRange> {
4040
let range = frange.range;
4141

42-
let string_kinds = [COMMENT, STRING, BYTE_STRING];
42+
let string_kinds = [COMMENT, STRING, BYTE_STRING, C_STRING];
4343
let list_kinds = [
4444
RECORD_PAT_FIELD_LIST,
4545
MATCH_ARM_LIST,

crates/ide/src/syntax_highlighting.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ mod tests;
1616
use hir::{Name, Semantics};
1717
use ide_db::{FxHashMap, RootDatabase, SymbolKind};
1818
use syntax::{
19-
ast, AstNode, AstToken, NodeOrToken, SyntaxKind::*, SyntaxNode, TextRange, WalkEvent, T,
19+
ast::{self, IsString},
20+
AstNode, AstToken, NodeOrToken,
21+
SyntaxKind::*,
22+
SyntaxNode, TextRange, WalkEvent, T,
2023
};
2124

2225
use crate::{
@@ -440,7 +443,17 @@ fn traverse(
440443
&& ast::ByteString::can_cast(descended_token.kind())
441444
{
442445
if let Some(byte_string) = ast::ByteString::cast(token) {
443-
highlight_escape_string(hl, &byte_string, range.start());
446+
if !byte_string.is_raw() {
447+
highlight_escape_string(hl, &byte_string, range.start());
448+
}
449+
}
450+
} else if ast::CString::can_cast(token.kind())
451+
&& ast::CString::can_cast(descended_token.kind())
452+
{
453+
if let Some(c_string) = ast::CString::cast(token) {
454+
if !c_string.is_raw() {
455+
highlight_escape_string(hl, &c_string, range.start());
456+
}
444457
}
445458
} else if ast::Char::can_cast(token.kind())
446459
&& ast::Char::can_cast(descended_token.kind())

crates/ide/src/syntax_highlighting/highlight.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ pub(super) fn token(sema: &Semantics<'_, RootDatabase>, token: SyntaxToken) -> O
2626
}
2727

2828
let highlight: Highlight = match token.kind() {
29-
STRING | BYTE_STRING => HlTag::StringLiteral.into(),
29+
STRING | BYTE_STRING | C_STRING => HlTag::StringLiteral.into(),
3030
INT_NUMBER if token.parent_ancestors().nth(1).map(|it| it.kind()) == Some(FIELD_EXPR) => {
3131
SymbolKind::Field.into()
3232
}

crates/ide/src/syntax_tree.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
use ide_db::base_db::{FileId, SourceDatabase};
2-
use ide_db::RootDatabase;
1+
use ide_db::{
2+
base_db::{FileId, SourceDatabase},
3+
RootDatabase,
4+
};
35
use syntax::{
46
AstNode, NodeOrToken, SourceFile, SyntaxKind::STRING, SyntaxToken, TextRange, TextSize,
57
};

crates/parser/src/grammar/expressions/atom.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ use super::*;
1212
// let _ = r"d";
1313
// let _ = b"e";
1414
// let _ = br"f";
15+
// let _ = c"g";
16+
// let _ = cr"h";
1517
// }
1618
pub(crate) const LITERAL_FIRST: TokenSet = TokenSet::new(&[
1719
T![true],
@@ -22,6 +24,7 @@ pub(crate) const LITERAL_FIRST: TokenSet = TokenSet::new(&[
2224
CHAR,
2325
STRING,
2426
BYTE_STRING,
27+
C_STRING,
2528
]);
2629

2730
pub(crate) fn literal(p: &mut Parser<'_>) -> Option<CompletedMarker> {

crates/parser/src/grammar/generic_args.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ const GENERIC_ARG_FIRST: TokenSet = TokenSet::new(&[
2828
BYTE,
2929
STRING,
3030
BYTE_STRING,
31+
C_STRING,
3132
])
3233
.union(types::TYPE_FIRST);
3334

crates/parser/src/lexed_str.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ impl<'a> Converter<'a> {
277277
if !terminated {
278278
err = "Missing trailing `\"` symbol to terminate the string literal";
279279
}
280-
STRING
280+
C_STRING
281281
}
282282
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
283283
if n_hashes.is_none() {
@@ -295,7 +295,7 @@ impl<'a> Converter<'a> {
295295
if n_hashes.is_none() {
296296
err = "Invalid raw string literal";
297297
}
298-
STRING
298+
C_STRING
299299
}
300300
};
301301

crates/parser/test_data/parser/inline/ok/0085_expr_literals.rast

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,30 @@ SOURCE_FILE
131131
LITERAL
132132
BYTE_STRING "br\"f\""
133133
SEMICOLON ";"
134+
WHITESPACE "\n "
135+
LET_STMT
136+
LET_KW "let"
137+
WHITESPACE " "
138+
WILDCARD_PAT
139+
UNDERSCORE "_"
140+
WHITESPACE " "
141+
EQ "="
142+
WHITESPACE " "
143+
LITERAL
144+
C_STRING "c\"g\""
145+
SEMICOLON ";"
146+
WHITESPACE "\n "
147+
LET_STMT
148+
LET_KW "let"
149+
WHITESPACE " "
150+
WILDCARD_PAT
151+
UNDERSCORE "_"
152+
WHITESPACE " "
153+
EQ "="
154+
WHITESPACE " "
155+
LITERAL
156+
C_STRING "cr\"h\""
157+
SEMICOLON ";"
134158
WHITESPACE "\n"
135159
R_CURLY "}"
136160
WHITESPACE "\n"

crates/parser/test_data/parser/inline/ok/0085_expr_literals.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ fn foo() {
99
let _ = r"d";
1010
let _ = b"e";
1111
let _ = br"f";
12+
let _ = c"g";
13+
let _ = cr"h";
1214
}

crates/syntax/src/ast/expr_ext.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ impl ast::ArrayExpr {
288288
pub enum LiteralKind {
289289
String(ast::String),
290290
ByteString(ast::ByteString),
291+
CString(ast::CString),
291292
IntNumber(ast::IntNumber),
292293
FloatNumber(ast::FloatNumber),
293294
Char(ast::Char),
@@ -319,6 +320,9 @@ impl ast::Literal {
319320
if let Some(t) = ast::ByteString::cast(token.clone()) {
320321
return LiteralKind::ByteString(t);
321322
}
323+
if let Some(t) = ast::CString::cast(token.clone()) {
324+
return LiteralKind::CString(t);
325+
}
322326
if let Some(t) = ast::Char::cast(token.clone()) {
323327
return LiteralKind::Char(t);
324328
}

crates/syntax/src/ast/generated/tokens.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,27 @@ impl AstToken for ByteString {
9090
fn syntax(&self) -> &SyntaxToken { &self.syntax }
9191
}
9292

93+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
94+
pub struct CString {
95+
pub(crate) syntax: SyntaxToken,
96+
}
97+
impl std::fmt::Display for CString {
98+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99+
std::fmt::Display::fmt(&self.syntax, f)
100+
}
101+
}
102+
impl AstToken for CString {
103+
fn can_cast(kind: SyntaxKind) -> bool { kind == C_STRING }
104+
fn cast(syntax: SyntaxToken) -> Option<Self> {
105+
if Self::can_cast(syntax.kind()) {
106+
Some(Self { syntax })
107+
} else {
108+
None
109+
}
110+
}
111+
fn syntax(&self) -> &SyntaxToken { &self.syntax }
112+
}
113+
93114
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
94115
pub struct IntNumber {
95116
pub(crate) syntax: SyntaxToken,

0 commit comments

Comments
 (0)