@@ -402,131 +402,10 @@ macro_rules! utf8_acc_cont_byte(
402402 ( $ch: expr, $byte: expr) => ( ( $ch << 6 ) | ( $byte & 63u8 ) as u32 )
403403)
404404
405- static TAG_CONT_U8 : u8 = 128u8 ;
406-
407- /// Converts a vector of bytes to a new utf-8 string.
408- /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
409- ///
410- /// # Example
411- ///
412- /// ```rust
413- /// let input = b"Hello \xF0\x90\x80World";
414- /// let output = std::str::from_utf8_lossy(input);
415- /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
416- /// ```
405+ /// Deprecated. Use `String::from_utf8_lossy`.
406+ #[ deprecated = "Replaced by String::from_utf8_lossy" ]
417407pub fn from_utf8_lossy < ' a > ( v : & ' a [ u8 ] ) -> MaybeOwned < ' a > {
418- if is_utf8 ( v) {
419- return Slice ( unsafe { mem:: transmute ( v) } )
420- }
421-
422- static REPLACEMENT : & ' static [ u8 ] = b"\xEF \xBF \xBD " ; // U+FFFD in UTF-8
423- let mut i = 0 ;
424- let total = v. len ( ) ;
425- fn unsafe_get ( xs : & [ u8 ] , i : uint ) -> u8 {
426- unsafe { * xs. unsafe_ref ( i) }
427- }
428- fn safe_get ( xs : & [ u8 ] , i : uint , total : uint ) -> u8 {
429- if i >= total {
430- 0
431- } else {
432- unsafe_get ( xs, i)
433- }
434- }
435-
436- let mut res = String :: with_capacity ( total) ;
437-
438- if i > 0 {
439- unsafe {
440- res. push_bytes ( v. slice_to ( i) )
441- } ;
442- }
443-
444- // subseqidx is the index of the first byte of the subsequence we're looking at.
445- // It's used to copy a bunch of contiguous good codepoints at once instead of copying
446- // them one by one.
447- let mut subseqidx = 0 ;
448-
449- while i < total {
450- let i_ = i;
451- let byte = unsafe_get ( v, i) ;
452- i += 1 ;
453-
454- macro_rules! error( ( ) => ( {
455- unsafe {
456- if subseqidx != i_ {
457- res. push_bytes( v. slice( subseqidx, i_) ) ;
458- }
459- subseqidx = i;
460- res. push_bytes( REPLACEMENT ) ;
461- }
462- } ) )
463-
464- if byte < 128u8 {
465- // subseqidx handles this
466- } else {
467- let w = utf8_char_width ( byte) ;
468-
469- match w {
470- 2 => {
471- if safe_get ( v, i, total) & 192u8 != TAG_CONT_U8 {
472- error ! ( ) ;
473- continue ;
474- }
475- i += 1 ;
476- }
477- 3 => {
478- match ( byte, safe_get ( v, i, total) ) {
479- ( 0xE0 , 0xA0 .. 0xBF ) => ( ) ,
480- ( 0xE1 .. 0xEC , 0x80 .. 0xBF ) => ( ) ,
481- ( 0xED , 0x80 .. 0x9F ) => ( ) ,
482- ( 0xEE .. 0xEF , 0x80 .. 0xBF ) => ( ) ,
483- _ => {
484- error ! ( ) ;
485- continue ;
486- }
487- }
488- i += 1 ;
489- if safe_get ( v, i, total) & 192u8 != TAG_CONT_U8 {
490- error ! ( ) ;
491- continue ;
492- }
493- i += 1 ;
494- }
495- 4 => {
496- match ( byte, safe_get ( v, i, total) ) {
497- ( 0xF0 , 0x90 .. 0xBF ) => ( ) ,
498- ( 0xF1 .. 0xF3 , 0x80 .. 0xBF ) => ( ) ,
499- ( 0xF4 , 0x80 .. 0x8F ) => ( ) ,
500- _ => {
501- error ! ( ) ;
502- continue ;
503- }
504- }
505- i += 1 ;
506- if safe_get ( v, i, total) & 192u8 != TAG_CONT_U8 {
507- error ! ( ) ;
508- continue ;
509- }
510- i += 1 ;
511- if safe_get ( v, i, total) & 192u8 != TAG_CONT_U8 {
512- error ! ( ) ;
513- continue ;
514- }
515- i += 1 ;
516- }
517- _ => {
518- error ! ( ) ;
519- continue ;
520- }
521- }
522- }
523- }
524- if subseqidx < total {
525- unsafe {
526- res. push_bytes ( v. slice ( subseqidx, total) )
527- } ;
528- }
529- Owned ( res. into_string ( ) )
408+ String :: from_utf8_lossy ( v)
530409}
531410
532411/*
@@ -2052,41 +1931,6 @@ String::from_str("\u1111\u1171\u11b6"));
20521931 assert_eq ! ( from_utf8( xs) , None ) ;
20531932 }
20541933
2055- #[ test]
2056- fn test_str_from_utf8_lossy ( ) {
2057- let xs = b"hello" ;
2058- assert_eq ! ( from_utf8_lossy( xs) , Slice ( "hello" ) ) ;
2059-
2060- let xs = "ศไทย中华Việt Nam" . as_bytes ( ) ;
2061- assert_eq ! ( from_utf8_lossy( xs) , Slice ( "ศไทย中华Việt Nam" ) ) ;
2062-
2063- let xs = b"Hello\xC2 There\xFF Goodbye" ;
2064- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "Hello\uFFFD There\uFFFD Goodbye" ) ) ) ;
2065-
2066- let xs = b"Hello\xC0 \x80 There\xE6 \x83 Goodbye" ;
2067- assert_eq ! ( from_utf8_lossy( xs) ,
2068- Owned ( String :: from_str( "Hello\uFFFD \uFFFD There\uFFFD Goodbye" ) ) ) ;
2069-
2070- let xs = b"\xF5 foo\xF5 \x80 bar" ;
2071- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "\uFFFD foo\uFFFD \uFFFD bar" ) ) ) ;
2072-
2073- let xs = b"\xF1 foo\xF1 \x80 bar\xF1 \x80 \x80 baz" ;
2074- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "\uFFFD foo\uFFFD bar\uFFFD baz" ) ) ) ;
2075-
2076- let xs = b"\xF4 foo\xF4 \x80 bar\xF4 \xBF baz" ;
2077- assert_eq ! ( from_utf8_lossy( xs) ,
2078- Owned ( String :: from_str( "\uFFFD foo\uFFFD bar\uFFFD \uFFFD baz" ) ) ) ;
2079-
2080- let xs = b"\xF0 \x80 \x80 \x80 foo\xF0 \x90 \x80 \x80 bar" ;
2081- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "\uFFFD \uFFFD \uFFFD \uFFFD \
2082- foo\U 00010000bar") ) ) ;
2083-
2084- // surrogates
2085- let xs = b"\xED \xA0 \x80 foo\xED \xBF \xBF bar" ;
2086- assert_eq ! ( from_utf8_lossy( xs) , Owned ( String :: from_str( "\uFFFD \uFFFD \uFFFD foo\
2087- \uFFFD \uFFFD \uFFFD bar") ) ) ;
2088- }
2089-
20901934 #[ test]
20911935 fn test_maybe_owned_traits ( ) {
20921936 let s = Slice ( "abcde" ) ;
@@ -2296,42 +2140,6 @@ mod bench {
22962140 } ) ;
22972141 }
22982142
2299- #[ bench]
2300- fn from_utf8_lossy_100_ascii ( b : & mut Bencher ) {
2301- let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2302- Lorem ipsum dolor sit amet, consectetur. ";
2303-
2304- assert_eq ! ( 100 , s. len( ) ) ;
2305- b. iter ( || {
2306- let _ = from_utf8_lossy ( s) ;
2307- } ) ;
2308- }
2309-
2310- #[ bench]
2311- fn from_utf8_lossy_100_multibyte ( b : & mut Bencher ) {
2312- let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰" . as_bytes ( ) ;
2313- assert_eq ! ( 100 , s. len( ) ) ;
2314- b. iter ( || {
2315- let _ = from_utf8_lossy ( s) ;
2316- } ) ;
2317- }
2318-
2319- #[ bench]
2320- fn from_utf8_lossy_invalid ( b : & mut Bencher ) {
2321- let s = b"Hello\xC0 \x80 There\xE6 \x83 Goodbye" ;
2322- b. iter ( || {
2323- let _ = from_utf8_lossy ( s) ;
2324- } ) ;
2325- }
2326-
2327- #[ bench]
2328- fn from_utf8_lossy_100_invalid ( b : & mut Bencher ) {
2329- let s = Vec :: from_elem ( 100 , 0xF5u8 ) ;
2330- b. iter ( || {
2331- let _ = from_utf8_lossy ( s. as_slice ( ) ) ;
2332- } ) ;
2333- }
2334-
23352143 #[ bench]
23362144 fn bench_connect ( b : & mut Bencher ) {
23372145 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb" ;
0 commit comments