Skip to content

Commit 1266e97

Browse files
committed
gccrs: Add punycode encoding to v0 mangling
gcc/rust/ChangeLog: * backend/rust-mangle.cc (v0_add_identifier): Added punycode encoding (v0_mangle_item): Likewise. * lex/rust-lex.cc (assert_source_content): Change type (test_buffer_input_source): Change type (test_file_input_source): Change type * resolve/rust-ast-resolve-toplevel.h: fix typo * rust-session-manager.cc (Session::load_extern_crate): fix typo * util/rust-canonical-path.h: fix typo * util/rust-hir-map.cc (NodeMapping::get_error): fix typo (Mappings::Mappings): fix typo * util/rust-mapping-common.h (UNKNOWN_CREATENUM): fix typo (UNKNOWN_CRATENUM): Change 0 to UINT32_MAX Signed-off-by: Raiki Tamura <[email protected]>
1 parent 74b27ac commit 1266e97

7 files changed

+55
-29
lines changed

gcc/rust/backend/rust-mangle.cc

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
#include "rust-mangle.h"
22
#include "fnv-hash.h"
3+
#include "optional.h"
34
#include "rust-base62.h"
45
#include "rust-unicode.h"
5-
#include "optional.h"
6+
#include "rust-diagnostics.h"
7+
#include "rust-unicode.h"
8+
#include "rust-punycode.h"
69

710
// FIXME: Rename those to legacy_*
811
static const std::string kMangledSymbolPrefix = "_ZN";
@@ -249,22 +252,43 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis)
249252
static void
250253
v0_add_identifier (std::string &mangled, const std::string &identifier)
251254
{
252-
// FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
253-
// create mangling for unicode values for now. However, this is handled
254-
// by the v0 mangling scheme. The grammar for unicode identifier is
255-
// contained in <undisambiguated-identifier>, right under the <identifier>
256-
// one. If the identifier contains unicode values, then an extra "u" needs
257-
// to be added to the mangling string and `punycode` must be used to encode
258-
// the characters.
259-
260-
mangled += std::to_string (identifier.size ());
255+
// The grammar for unicode identifier is contained in
256+
// <undisambiguated-identifier>, right under the <identifier> one. If the
257+
// identifier contains unicode values, then an extra "u" needs to be added to
258+
// the mangling string and `punycode` must be used to encode the characters.
259+
tl::optional<Utf8String> uident_opt
260+
= Utf8String::make_utf8_string (identifier);
261+
if (uident_opt == tl::nullopt)
262+
rust_unreachable ();
263+
tl::optional<std::string> punycode_opt
264+
= encode_punycode (uident_opt.value ());
265+
if (punycode_opt == tl::nullopt)
266+
rust_unreachable ();
261267

268+
bool is_ascii_ident = true;
269+
for (auto c : uident_opt.value ().get_chars ())
270+
if (c.value > 127)
271+
{
272+
is_ascii_ident = false;
273+
break;
274+
}
275+
276+
std::string punycode = punycode_opt.value ();
277+
// remove tailing hyphen
278+
if (punycode.back () == '-')
279+
punycode.pop_back ();
280+
// replace hyphens in punycode with underscores
281+
std::replace (punycode.begin (), punycode.end (), '-', '_');
282+
283+
if (!is_ascii_ident)
284+
mangled.append ("u");
285+
286+
mangled += std::to_string (punycode.size ());
262287
// If the first character of the identifier is a digit or an underscore, we
263288
// add an extra underscore
264-
if (identifier[0] == '_')
265-
mangled.append ("_");
266-
267-
mangled.append (identifier);
289+
if (punycode[0] == '_')
290+
mangled += "_";
291+
mangled += punycode;
268292
}
269293

270294
static std::string
@@ -300,9 +324,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path)
300324

301325
std::string mangled;
302326
// FIXME: Add real algorithm once all pieces are implemented
303-
auto ty_prefix = v0_type_prefix (ty);
304327
v0_add_identifier (mangled, crate_name);
305328
v0_add_disambiguator (mangled, 62);
329+
auto ty_prefix = v0_type_prefix (ty);
306330

307331
rust_unreachable ();
308332
}

gcc/rust/lex/rust-lex.cc

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2533,8 +2533,9 @@ Lexer::start_line (int current_line, int current_column)
25332533
namespace selftest {
25342534

25352535
// Checks if `src` has the same contents as the given characters
2536-
void
2537-
assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
2536+
static void
2537+
assert_source_content (Rust::InputSource &src,
2538+
const std::vector<uint32_t> &expected)
25382539
{
25392540
Rust::Codepoint src_char = src.next ();
25402541
for (auto expected_char : expected)
@@ -2549,15 +2550,16 @@ assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
25492550
ASSERT_TRUE (src_char.is_eof ());
25502551
}
25512552

2552-
void
2553-
test_buffer_input_source (std::string str, std::vector<uint32_t> expected)
2553+
static void
2554+
test_buffer_input_source (std::string str,
2555+
const std::vector<uint32_t> &expected)
25542556
{
25552557
Rust::BufferInputSource source (str, 0);
25562558
assert_source_content (source, expected);
25572559
}
25582560

2559-
void
2560-
test_file_input_source (std::string str, std::vector<uint32_t> expected)
2561+
static void
2562+
test_file_input_source (std::string str, const std::vector<uint32_t> &expected)
25612563
{
25622564
FILE *tmpf = tmpfile ();
25632565
// Moves to the first character

gcc/rust/resolve/rust-ast-resolve-toplevel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ class ResolveTopLevel : public ResolverBase
430430
}
431431
else
432432
{
433-
CrateNum found_crate_num = UNKNOWN_CREATENUM;
433+
CrateNum found_crate_num = UNKNOWN_CRATENUM;
434434
bool found
435435
= mappings->lookup_crate_name (extern_crate.get_referenced_crate (),
436436
found_crate_num);

gcc/rust/rust-session-manager.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ NodeId
979979
Session::load_extern_crate (const std::string &crate_name, location_t locus)
980980
{
981981
// has it already been loaded?
982-
CrateNum found_crate_num = UNKNOWN_CREATENUM;
982+
CrateNum found_crate_num = UNKNOWN_CRATENUM;
983983
bool found = mappings->lookup_crate_name (crate_name, found_crate_num);
984984
if (found)
985985
{

gcc/rust/util/rust-canonical-path.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class CanonicalPath
5858
{
5959
rust_assert (!path.empty ());
6060
return CanonicalPath ({std::pair<NodeId, std::string> (id, path)},
61-
UNKNOWN_CREATENUM);
61+
UNKNOWN_CRATENUM);
6262
}
6363

6464
static CanonicalPath
@@ -88,7 +88,7 @@ class CanonicalPath
8888

8989
static CanonicalPath create_empty ()
9090
{
91-
return CanonicalPath ({}, UNKNOWN_CREATENUM);
91+
return CanonicalPath ({}, UNKNOWN_CRATENUM);
9292
}
9393

9494
bool is_empty () const { return segs.size () == 0; }
@@ -171,7 +171,7 @@ class CanonicalPath
171171

172172
CrateNum get_crate_num () const
173173
{
174-
rust_assert (crate_num != UNKNOWN_CREATENUM);
174+
rust_assert (crate_num != UNKNOWN_CRATENUM);
175175
return crate_num;
176176
}
177177

gcc/rust/util/rust-hir-map.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ namespace Analysis {
2929
NodeMapping
3030
NodeMapping::get_error ()
3131
{
32-
return NodeMapping (UNKNOWN_CREATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
32+
return NodeMapping (UNKNOWN_CRATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
3333
UNKNOWN_LOCAL_DEFID);
3434
}
3535

@@ -94,7 +94,7 @@ static const HirId kDefaultHirIdBegin = 1;
9494
static const HirId kDefaultCrateNumBegin = 0;
9595

9696
Mappings::Mappings ()
97-
: crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CREATENUM),
97+
: crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CRATENUM),
9898
hirIdIter (kDefaultHirIdBegin), nodeIdIter (kDefaultNodeIdBegin)
9999
{
100100
Analysis::NodeMapping node (0, 0, 0, 0);

gcc/rust/util/rust-mapping-common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ struct DefId
6161
}
6262
};
6363

64-
#define UNKNOWN_CREATENUM ((uint32_t) (0))
64+
#define UNKNOWN_CRATENUM ((uint32_t) (UINT32_MAX))
6565
#define UNKNOWN_NODEID ((uint32_t) (0))
6666
#define UNKNOWN_HIRID ((uint32_t) (0))
6767
#define UNKNOWN_LOCAL_DEFID ((uint32_t) (0))

0 commit comments

Comments
 (0)