-
Notifications
You must be signed in to change notification settings - Fork 180
Unicode check for crate_name attribute #2463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -42,6 +42,7 @@ | |||||||
#include "rust-early-name-resolver.h" | ||||||||
#include "rust-cfg-strip.h" | ||||||||
#include "rust-expand-visitor.h" | ||||||||
#include "rust-unicode.h" | ||||||||
|
||||||||
#include "diagnostic.h" | ||||||||
#include "input.h" | ||||||||
|
@@ -107,30 +108,39 @@ infer_crate_name (const std::string &filename) | |||||||
return crate; | ||||||||
} | ||||||||
|
||||||||
/* Validate the crate name using the ASCII rules | ||||||||
TODO: Support Unicode version of the rules */ | ||||||||
/* Validate the crate name using the ASCII rules */ | ||||||||
|
||||||||
static bool | ||||||||
validate_crate_name (const std::string &crate_name, Error &error) | ||||||||
Comment on lines
113
to
114
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this function would probably benefit from the new |
||||||||
{ | ||||||||
if (crate_name.empty ()) | ||||||||
Utf8String utf8_name = {crate_name}; | ||||||||
tl::optional<std::vector<Codepoint>> uchars_opt = utf8_name.get_chars (); | ||||||||
|
||||||||
if (!uchars_opt.has_value ()) | ||||||||
{ | ||||||||
error = Error (UNDEF_LOCATION, "crate name is not a valid UTF-8 string"); | ||||||||
return false; | ||||||||
} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and then add this
Suggested change
|
||||||||
|
||||||||
std::vector<Codepoint> uchars = uchars_opt.value (); | ||||||||
if (uchars.empty ()) | ||||||||
{ | ||||||||
error = Error (UNDEF_LOCATION, "crate name cannot be empty"); | ||||||||
return false; | ||||||||
} | ||||||||
if (crate_name.length () > kMaxNameLength) | ||||||||
if (uchars.size () > kMaxNameLength) | ||||||||
{ | ||||||||
error = Error (UNDEF_LOCATION, "crate name cannot exceed %lu characters", | ||||||||
(unsigned long) kMaxNameLength); | ||||||||
return false; | ||||||||
} | ||||||||
for (auto &c : crate_name) | ||||||||
for (Codepoint &c : uchars) | ||||||||
{ | ||||||||
if (!(ISALNUM (c) || c == '_')) | ||||||||
if (!(is_alphabetic (c.value) || is_numeric (c.value) || c.value == '_')) | ||||||||
{ | ||||||||
error = Error (UNDEF_LOCATION, | ||||||||
"invalid character %<%c%> in crate name: %<%s%>", c, | ||||||||
crate_name.c_str ()); | ||||||||
"invalid character %<%s%> in crate name: %<%s%>", | ||||||||
c.as_string ().c_str (), crate_name.c_str ()); | ||||||||
return false; | ||||||||
} | ||||||||
} | ||||||||
|
@@ -1273,13 +1283,17 @@ rust_crate_name_validation_test (void) | |||||||
ASSERT_TRUE (Rust::validate_crate_name ("example", error)); | ||||||||
ASSERT_TRUE (Rust::validate_crate_name ("abcdefg_1234", error)); | ||||||||
ASSERT_TRUE (Rust::validate_crate_name ("1", error)); | ||||||||
// FIXME: The next test does not pass as of current implementation | ||||||||
// ASSERT_TRUE (Rust::CompileOptions::validate_crate_name ("惊吓")); | ||||||||
Comment on lines
-1276
to
-1277
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this original comment is probably wrong. In https://www.unicode.org/Public/14.0.0/ucd/DerivedCoreProperties.txt
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for checking haha. did you try it with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh my god. i just checked and rustc compiles it (@_@) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I overlooked There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hahaha no worries it's all good :D |
||||||||
ASSERT_TRUE (Rust::validate_crate_name ("クレート", error)); | ||||||||
ASSERT_TRUE (Rust::validate_crate_name ("Sōkrátēs", error)); | ||||||||
ASSERT_TRUE (Rust::validate_crate_name ("惊吓", error)); | ||||||||
|
||||||||
// NOTE: - is not allowed in the crate name ... | ||||||||
|
||||||||
ASSERT_FALSE (Rust::validate_crate_name ("abcdefg-1234", error)); | ||||||||
ASSERT_FALSE (Rust::validate_crate_name ("a+b", error)); | ||||||||
ASSERT_FALSE (Rust::validate_crate_name ("/a+b/", error)); | ||||||||
ASSERT_FALSE (Rust::validate_crate_name ("😸++", error)); | ||||||||
ASSERT_FALSE (Rust::validate_crate_name ("∀", error)); | ||||||||
|
||||||||
/* Tests for crate name inference */ | ||||||||
ASSERT_EQ (Rust::infer_crate_name ("c.rs"), "c"); | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,10 +19,29 @@ | |
#ifndef RUST_UNICODE_H | ||
#define RUST_UNICODE_H | ||
|
||
#include "optional.h" | ||
#include "rust-system.h" | ||
#include "rust-lex.h" | ||
|
||
namespace Rust { | ||
|
||
class Utf8String | ||
{ | ||
private: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was about to say a nit here to move the private fields to the bottom of the class declaration but then i realised we do it this style in AST and HIR classes. What is the GCC style? I think having them at the top like this is probably the best now when i think about it. @dkm @CohenArthur @tschwinge any opinions on the style of putting private fields at the top of the class or bottom? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like having private fields at the bottom of my classes personally. I never gave it much attention haha |
||
tl::optional<std::vector<Codepoint>> chars; | ||
|
||
public: | ||
Utf8String (const std::string &maybe_utf8) | ||
{ | ||
Lexer::BufferInputSource input_source = {maybe_utf8, 0}; | ||
chars = input_source.get_chars (); | ||
} | ||
|
||
// Returns UTF codepoints when string is valid as UTF-8, returns nullopt | ||
// otherwise. | ||
tl::optional<std::vector<Codepoint>> get_chars () const { return chars; } | ||
}; | ||
|
||
// TODO: add function nfc_normalize | ||
|
||
bool | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#![crate_name = "😅"] // { dg-error "invalid character ...." "" } | ||
fn main() {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this class should be moved to rust-unicode.h.
At least it should not be put in under gcc/rust/lex/.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that seems good to me