Skip to content

Commit ee3789b

Browse files
committed
auto merge of #6029 : Kimundi/rust/ascii-encoding, r=thestinger
Replaced {str, char, u8}::is_ascii Replaced str::to_lower and str::to_upper
2 parents e26f992 + 3759b57 commit ee3789b

File tree

13 files changed

+60
-95
lines changed

13 files changed

+60
-95
lines changed

doc/rust.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -802,7 +802,7 @@ An example of `use` declarations:
802802

803803
~~~~
804804
use core::float::sin;
805-
use core::str::{slice, to_upper};
805+
use core::str::{slice, contains};
806806
use core::option::Some;
807807
808808
fn main() {
@@ -813,8 +813,8 @@ fn main() {
813813
info!(Some(1.0));
814814
815815
// Equivalent to
816-
// 'info!(core::str::to_upper(core::str::slice("foo", 0, 1)));'
817-
info!(to_upper(slice("foo", 0, 1)));
816+
// 'info!(core::str::contains(core::str::slice("foo", 0, 1), "oo"));'
817+
info!(contains(slice("foo", 0, 1), "oo"));
818818
}
819819
~~~~
820820

src/compiletest/errors.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,11 @@ fn parse_expected(line_num: uint, line: ~str) -> ~[ExpectedError] {
5050
while idx < len && line[idx] == (' ' as u8) { idx += 1u; }
5151
let start_kind = idx;
5252
while idx < len && line[idx] != (' ' as u8) { idx += 1u; }
53-
let kind = str::to_lower(str::slice(line, start_kind, idx).to_owned());
53+
54+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
55+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
56+
let kind = str::slice(line, start_kind, idx);
57+
let kind = kind.to_ascii().to_lower().to_str_ascii();
5458

5559
// Extract msg:
5660
while idx < len && line[idx] == (' ' as u8) { idx += 1u; }

src/libcore/char.rs

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,6 @@ pub fn is_alphanumeric(c: char) -> bool {
100100
unicode::general_category::No(c);
101101
}
102102

103-
/// Indicates whether the character is an ASCII character
104-
#[inline(always)]
105-
pub fn is_ascii(c: char) -> bool {
106-
c - ('\x7F' & c) == '\x00'
107-
}
108-
109103
/// Indicates whether the character is numeric (Nd, Nl, or No)
110104
#[inline(always)]
111105
pub fn is_digit(c: char) -> bool {
@@ -116,7 +110,7 @@ pub fn is_digit(c: char) -> bool {
116110

117111
/**
118112
* Checks if a character parses as a numeric digit in the given radix.
119-
* Compared to `is_digit()`, this function only recognizes the ascii
113+
* Compared to `is_digit()`, this function only recognizes the
120114
* characters `0-9`, `a-z` and `A-Z`.
121115
*
122116
* Returns `true` if `c` is a valid digit under `radix`, and `false`
@@ -163,7 +157,7 @@ pub fn to_digit(c: char, radix: uint) -> Option<uint> {
163157
}
164158

165159
/**
166-
* Converts a number to the ascii character representing it.
160+
* Converts a number to the character representing it.
167161
*
168162
* Returns `Some(char)` if `num` represents one digit under `radix`,
169163
* using one character of `0-9` or `a-z`, or `None` if it doesn't.
@@ -316,12 +310,6 @@ fn test_to_digit() {
316310
assert!(to_digit('$', 36u).is_none());
317311
}
318312
319-
#[test]
320-
fn test_is_ascii() {
321-
assert!(str::all(~"banana", is_ascii));
322-
assert!(! str::all(~"ประเทศไทย中华Việt Nam", is_ascii));
323-
}
324-
325313
#[test]
326314
fn test_is_digit() {
327315
assert!(is_digit('2'));

src/libcore/num/uint-template/u8.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,9 @@
1010

1111
//! Operations and constants for `u8`
1212
13-
pub use self::inst::is_ascii;
14-
1513
mod inst {
1614
pub type T = u8;
1715
#[allow(non_camel_case_types)]
1816
pub type T_SIGNED = i8;
1917
pub static bits: uint = 8;
20-
21-
// Type-specific functions here. These must be reexported by the
22-
// parent module so that they appear in core::u8 and not core::u8::u8;
23-
24-
pub fn is_ascii(x: T) -> bool { return 0 as T == x & 128 as T; }
2518
}

src/libcore/path.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use libc;
1919
use option::{None, Option, Some};
2020
use str;
2121
use to_str::ToStr;
22+
use ascii::{AsciiCast, AsciiStr};
2223

2324
#[deriving(Clone, Eq)]
2425
pub struct WindowsPath {
@@ -753,7 +754,9 @@ impl GenericPath for WindowsPath {
753754
fn is_restricted(&self) -> bool {
754755
match self.filestem() {
755756
Some(stem) => {
756-
match stem.to_lower() {
757+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
758+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
759+
match stem.to_ascii().to_lower().to_str_ascii() {
757760
~"con" | ~"aux" | ~"com1" | ~"com2" | ~"com3" | ~"com4" |
758761
~"lpt1" | ~"lpt2" | ~"lpt3" | ~"prn" | ~"nul" => true,
759762
_ => false
@@ -809,7 +812,10 @@ impl GenericPath for WindowsPath {
809812
host: copy self.host,
810813
device: match self.device {
811814
None => None,
812-
Some(ref device) => Some(device.to_upper())
815+
816+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
817+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
818+
Some(ref device) => Some(device.to_ascii().to_upper().to_str_ascii())
813819
},
814820
is_absolute: self.is_absolute,
815821
components: normalize(self.components)

src/libcore/str.rs

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ use option::{None, Option, Some};
2727
use iterator::Iterator;
2828
use ptr;
2929
use str;
30-
use u8;
3130
use uint;
3231
use vec;
3332
use to_str::ToStr;
@@ -787,22 +786,6 @@ pub fn each_split_within<'a>(ss: &'a str,
787786
}
788787
}
789788

790-
/// Convert a string to lowercase. ASCII only
791-
pub fn to_lower(s: &str) -> ~str {
792-
do map(s) |c| {
793-
assert!(char::is_ascii(c));
794-
(unsafe{libc::tolower(c as libc::c_char)}) as char
795-
}
796-
}
797-
798-
/// Convert a string to uppercase. ASCII only
799-
pub fn to_upper(s: &str) -> ~str {
800-
do map(s) |c| {
801-
assert!(char::is_ascii(c));
802-
(unsafe{libc::toupper(c as libc::c_char)}) as char
803-
}
804-
}
805-
806789
/**
807790
* Replace all occurrences of one string with another
808791
*
@@ -1610,13 +1593,6 @@ pub fn ends_with<'a,'b>(haystack: &'a str, needle: &'b str) -> bool {
16101593
Section: String properties
16111594
*/
16121595

1613-
/// Determines if a string contains only ASCII characters
1614-
pub fn is_ascii(s: &str) -> bool {
1615-
let mut i: uint = len(s);
1616-
while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { return false; } }
1617-
return true;
1618-
}
1619-
16201596
/// Returns true if the string has length 0
16211597
pub fn is_empty(s: &str) -> bool { len(s) == 0u }
16221598

@@ -2403,8 +2379,6 @@ pub trait StrSlice<'self> {
24032379
fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool);
24042380
fn starts_with<'a>(&self, needle: &'a str) -> bool;
24052381
fn substr(&self, begin: uint, n: uint) -> &'self str;
2406-
fn to_lower(&self) -> ~str;
2407-
fn to_upper(&self) -> ~str;
24082382
fn escape_default(&self) -> ~str;
24092383
fn escape_unicode(&self) -> ~str;
24102384
fn trim(&self) -> &'self str;
@@ -2565,12 +2539,6 @@ impl<'self> StrSlice<'self> for &'self str {
25652539
fn substr(&self, begin: uint, n: uint) -> &'self str {
25662540
substr(*self, begin, n)
25672541
}
2568-
/// Convert a string to lowercase
2569-
#[inline]
2570-
fn to_lower(&self) -> ~str { to_lower(*self) }
2571-
/// Convert a string to uppercase
2572-
#[inline]
2573-
fn to_upper(&self) -> ~str { to_upper(*self) }
25742542
/// Escape each char in `s` with char::escape_default.
25752543
#[inline]
25762544
fn escape_default(&self) -> ~str { escape_default(*self) }
@@ -3084,27 +3052,6 @@ mod tests {
30843052
assert!(repeat(~"hi", 0) == ~"");
30853053
}
30863054
3087-
#[test]
3088-
fn test_to_upper() {
3089-
// libc::toupper, and hence str::to_upper
3090-
// are culturally insensitive: they only work for ASCII
3091-
// (see Issue #1347)
3092-
let unicode = ~""; //"\u65e5\u672c"; // uncomment once non-ASCII works
3093-
let input = ~"abcDEF" + unicode + ~"xyz:.;";
3094-
let expected = ~"ABCDEF" + unicode + ~"XYZ:.;";
3095-
let actual = to_upper(input);
3096-
assert!(expected == actual);
3097-
}
3098-
3099-
#[test]
3100-
fn test_to_lower() {
3101-
// libc::tolower, and hence str::to_lower
3102-
// are culturally insensitive: they only work for ASCII
3103-
// (see Issue #1347)
3104-
assert!(~"" == to_lower(""));
3105-
assert!(~"ymca" == to_lower("YMCA"));
3106-
}
3107-
31083055
#[test]
31093056
fn test_unsafe_slice() {
31103057
assert!("ab" == unsafe {raw::slice_bytes("abc", 0, 2)});
@@ -3337,13 +3284,6 @@ mod tests {
33373284
assert!((!is_whitespace(~" _ ")));
33383285
}
33393286
3340-
#[test]
3341-
fn test_is_ascii() {
3342-
assert!((is_ascii(~"")));
3343-
assert!((is_ascii(~"a")));
3344-
assert!((!is_ascii(~"\u2009")));
3345-
}
3346-
33473287
#[test]
33483288
fn test_shift_byte() {
33493289
let mut s = ~"ABC";

src/libcore/str/ascii.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ impl ToStrConsume for ~[Ascii] {
199199
#[cfg(test)]
200200
mod tests {
201201
use super::*;
202+
use str;
202203

203204
macro_rules! v2ascii (
204205
( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]);
@@ -221,6 +222,9 @@ mod tests {
221222
assert_eq!('['.to_ascii().to_lower().to_char(), '[');
222223
assert_eq!('`'.to_ascii().to_upper().to_char(), '`');
223224
assert_eq!('{'.to_ascii().to_upper().to_char(), '{');
225+
226+
assert!(str::all(~"banana", |c| c.is_ascii()));
227+
assert!(! str::all(~"ประเทศไทย中华Việt Nam", |c| c.is_ascii()));
224228
}
225229
226230
#[test]
@@ -234,6 +238,15 @@ mod tests {
234238
235239
assert_eq!("abCDef&?#".to_ascii().to_lower().to_str_ascii(), ~"abcdef&?#");
236240
assert_eq!("abCDef&?#".to_ascii().to_upper().to_str_ascii(), ~"ABCDEF&?#");
241+
242+
assert_eq!("".to_ascii().to_lower().to_str_ascii(), ~"");
243+
assert_eq!("YMCA".to_ascii().to_lower().to_str_ascii(), ~"ymca");
244+
assert_eq!("abcDEFxyz:.;".to_ascii().to_upper().to_str_ascii(), ~"ABCDEFXYZ:.;");
245+
246+
assert!("".is_ascii());
247+
assert!("a".is_ascii());
248+
assert!(!"\u2009".is_ascii());
249+
237250
}
238251

239252
#[test]

src/libcore/unstable/extfmt.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,13 @@ pub mod rt {
520520
match cv.ty {
521521
TyDefault => uint_to_str_prec(u, 10, prec),
522522
TyHexLower => uint_to_str_prec(u, 16, prec),
523-
TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)),
523+
524+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
525+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
526+
TyHexUpper => {
527+
let s = uint_to_str_prec(u, 16, prec);
528+
s.to_ascii().to_upper().to_str_ascii()
529+
}
524530
TyBits => uint_to_str_prec(u, 2, prec),
525531
TyOctal => uint_to_str_prec(u, 8, prec)
526532
};

src/librustc/driver/driver.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,11 @@ pub fn build_session_options(binary: @~str,
546546
let lint_dict = lint::get_lint_dict();
547547
for lint_levels.each |level| {
548548
let level_name = lint::level_to_str(*level);
549-
let level_short = level_name.substr(0,1).to_upper();
549+
550+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
551+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
552+
let level_short = level_name.substr(0,1);
553+
let level_short = level_short.to_ascii().to_upper().to_str_ascii();
550554
let flags = vec::append(getopts::opt_strs(matches, level_short),
551555
getopts::opt_strs(matches, level_name));
552556
for flags.each |lint_name| {

src/librustdoc/markdown_index_pass.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,9 @@ pub fn pandoc_header_id(header: &str) -> ~str {
157157
let s = str::replace(s, ~" ", ~"-");
158158
return s;
159159
}
160-
fn convert_to_lowercase(s: &str) -> ~str { str::to_lower(s) }
160+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
161+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
162+
fn convert_to_lowercase(s: &str) -> ~str { s.to_ascii().to_lower().to_str_ascii() }
161163
fn remove_up_to_first_letter(s: &str) -> ~str { s.to_str() }
162164
fn maybe_use_section_id(s: &str) -> ~str { s.to_str() }
163165
}

src/libstd/semver.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ fn parse_reader(rdr: @io::Reader) -> Version {
220220

221221

222222
pub fn parse(s: &str) -> Option<Version> {
223-
if ! str::is_ascii(s) {
223+
if !s.is_ascii() {
224224
return None;
225225
}
226226
let s = s.trim();

src/libstd/sort.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -885,8 +885,12 @@ mod tests {
885885
// tjc: funny that we have to use parens
886886
fn ile(x: &(&'static str), y: &(&'static str)) -> bool
887887
{
888-
let x = x.to_lower();
889-
let y = y.to_lower();
888+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
889+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
890+
// (Actually, could just remove the to_str_* call, but needs an deriving(Ord) on
891+
// Ascii)
892+
let x = x.to_ascii().to_lower().to_str_ascii();
893+
let y = y.to_ascii().to_lower().to_str_ascii();
890894
x <= y
891895
}
892896

src/test/bench/shootout-k-nucleotide-pipes.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,10 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str {
5959
for pairs_sorted.each |kv| {
6060
let (k,v) = copy *kv;
6161
unsafe {
62-
buffer += (fmt!("%s %0.3f\n", str::to_upper(str::raw::from_bytes(k)), v));
62+
let b = str::raw::from_bytes(k);
63+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
64+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
65+
buffer += (fmt!("%s %0.3f\n", b.to_ascii().to_upper().to_str_ascii(), v));
6366
}
6467
}
6568

@@ -68,7 +71,9 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str {
6871

6972
// given a map, search for the frequency of a pattern
7073
fn find(mm: &HashMap<~[u8], uint>, key: ~str) -> uint {
71-
match mm.find(&str::to_bytes(str::to_lower(key))) {
74+
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
75+
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
76+
match mm.find(&str::to_bytes(key.to_ascii().to_lower().to_str_ascii())) {
7277
option::None => { return 0u; }
7378
option::Some(&num) => { return num; }
7479
}

0 commit comments

Comments
 (0)