Skip to content

Commit 00dea7e

Browse files
tamaroningCohenArthur
authored andcommitted
libcpp: add function to check XID properties
This commit adds a new function intended for checking the XID properties of a possibly unicode character, as well as the accompanying enum describing the possible properties. libcpp/ChangeLog: * charset.cc (cpp_check_xid_property): New. * include/cpplib.h (cpp_check_xid_property): New. (enum cpp_xid_property): New. Signed-off-by: Raiki Tamura <[email protected]>
1 parent 4ded42c commit 00dea7e

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

libcpp/charset.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1332,6 +1332,42 @@ _cpp_uname2c_uax44_lm2 (const char *name, size_t len, char *canon_name)
13321332
return result;
13331333
}
13341334

1335+
/* Returns flags representing the XID properties of the given codepoint. */
1336+
unsigned int
1337+
cpp_check_xid_property (cppchar_t c)
1338+
{
1339+
// fast path for ASCII
1340+
if (c < 0x80)
1341+
{
1342+
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'))
1343+
return CPP_XID_START | CPP_XID_CONTINUE;
1344+
if (('0' <= c && c <= '9') || c == '_')
1345+
return CPP_XID_CONTINUE;
1346+
}
1347+
1348+
if (c > UCS_LIMIT)
1349+
return 0;
1350+
1351+
int mn, mx, md;
1352+
mn = 0;
1353+
mx = ARRAY_SIZE (ucnranges) - 1;
1354+
while (mx != mn)
1355+
{
1356+
md = (mn + mx) / 2;
1357+
if (c <= ucnranges[md].end)
1358+
mx = md;
1359+
else
1360+
mn = md + 1;
1361+
}
1362+
1363+
unsigned short flags = ucnranges[mn].flags;
1364+
1365+
if (flags & CXX23)
1366+
return CPP_XID_START | CPP_XID_CONTINUE;
1367+
if (flags & NXX23)
1368+
return CPP_XID_CONTINUE;
1369+
return 0;
1370+
}
13351371

13361372
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
13371373
the start of an identifier, and 0 if C is not valid in an

libcpp/include/cpplib.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,4 +1631,11 @@ bool cpp_valid_utf8_p (const char *data, size_t num_bytes);
16311631
bool cpp_is_combining_char (cppchar_t c);
16321632
bool cpp_is_printable_char (cppchar_t c);
16331633

1634+
enum cpp_xid_property {
1635+
CPP_XID_START = 1,
1636+
CPP_XID_CONTINUE = 2
1637+
};
1638+
1639+
unsigned int cpp_check_xid_property (cppchar_t c);
1640+
16341641
#endif /* ! LIBCPP_CPPLIB_H */

0 commit comments

Comments
 (0)