@@ -117,6 +117,23 @@ impl String {
117117 }
118118 Some ( s)
119119 }
120+
121+ /// Decode a UTF-16 encoded vector `v` into a string, replacing
122+ /// invalid data with the replacement character (U+FFFD).
123+ ///
124+ /// # Example
125+ /// ```rust
126+ /// // ð„žmus<invalid>ic<invalid>
127+ /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
128+ /// 0x0073, 0xDD1E, 0x0069, 0x0063,
129+ /// 0xD834];
130+ ///
131+ /// assert_eq!(String::from_utf16_lossy(v),
132+ /// "ð„žmus\uFFFDic\uFFFD".to_string());
133+ /// ```
134+ pub fn from_utf16_lossy ( v : & [ u16 ] ) -> String {
135+ str:: utf16_items ( v) . map ( |c| c. to_char_lossy ( ) ) . collect ( )
136+ }
120137
121138 /// Convert a vector of chars to a string
122139 ///
@@ -431,6 +448,7 @@ mod tests {
431448 use test:: Bencher ;
432449
433450 use Mutable ;
451+ use str;
434452 use str:: { Str , StrSlice } ;
435453 use super :: String ;
436454
@@ -439,6 +457,95 @@ mod tests {
439457 let owned: Option < :: std:: string:: String > = from_str ( "string" ) ;
440458 assert_eq ! ( owned. as_ref( ) . map( |s| s. as_slice( ) ) , Some ( "string" ) ) ;
441459 }
460+
461+ #[ test]
462+ fn test_from_utf16 ( ) {
463+ let pairs =
464+ [ ( String :: from_str ( "ð…ðŒ¿ðŒ»ð†ðŒ¹ðŒ»ðŒ°\n " ) ,
465+ vec ! [ 0xd800_u16 , 0xdf45_u16 , 0xd800_u16 , 0xdf3f_u16 ,
466+ 0xd800_u16 , 0xdf3b_u16 , 0xd800_u16 , 0xdf46_u16 ,
467+ 0xd800_u16 , 0xdf39_u16 , 0xd800_u16 , 0xdf3b_u16 ,
468+ 0xd800_u16 , 0xdf30_u16 , 0x000a_u16 ] ) ,
469+
470+ ( String :: from_str ( "ð’ð‘‰ð®ð‘€ð²ð‘‹ ðð²ð‘\n " ) ,
471+ vec ! [ 0xd801_u16 , 0xdc12_u16 , 0xd801_u16 ,
472+ 0xdc49_u16 , 0xd801_u16 , 0xdc2e_u16 , 0xd801_u16 ,
473+ 0xdc40_u16 , 0xd801_u16 , 0xdc32_u16 , 0xd801_u16 ,
474+ 0xdc4b_u16 , 0x0020_u16 , 0xd801_u16 , 0xdc0f_u16 ,
475+ 0xd801_u16 , 0xdc32_u16 , 0xd801_u16 , 0xdc4d_u16 ,
476+ 0x000a_u16 ] ) ,
477+
478+ ( String :: from_str ( "ðŒ€ðŒ–ðŒ‹ðŒ„ðŒ‘ðŒ‰Â·ðŒŒðŒ„ðŒ•ðŒ„ðŒ‹ðŒ‰ðŒ‘\n " ) ,
479+ vec ! [ 0xd800_u16 , 0xdf00_u16 , 0xd800_u16 , 0xdf16_u16 ,
480+ 0xd800_u16 , 0xdf0b_u16 , 0xd800_u16 , 0xdf04_u16 ,
481+ 0xd800_u16 , 0xdf11_u16 , 0xd800_u16 , 0xdf09_u16 ,
482+ 0x00b7_u16 , 0xd800_u16 , 0xdf0c_u16 , 0xd800_u16 ,
483+ 0xdf04_u16 , 0xd800_u16 , 0xdf15_u16 , 0xd800_u16 ,
484+ 0xdf04_u16 , 0xd800_u16 , 0xdf0b_u16 , 0xd800_u16 ,
485+ 0xdf09_u16 , 0xd800_u16 , 0xdf11_u16 , 0x000a_u16 ] ) ,
486+
487+ ( String :: from_str ( "ð’‹ð’˜ð’ˆð’‘ð’›ð’’ ð’•ð’“ ð’ˆð’šð’ ð’ð’œð’’ð’–ð’† ð’•ð’†\n " ) ,
488+ vec ! [ 0xd801_u16 , 0xdc8b_u16 , 0xd801_u16 , 0xdc98_u16 ,
489+ 0xd801_u16 , 0xdc88_u16 , 0xd801_u16 , 0xdc91_u16 ,
490+ 0xd801_u16 , 0xdc9b_u16 , 0xd801_u16 , 0xdc92_u16 ,
491+ 0x0020_u16 , 0xd801_u16 , 0xdc95_u16 , 0xd801_u16 ,
492+ 0xdc93_u16 , 0x0020_u16 , 0xd801_u16 , 0xdc88_u16 ,
493+ 0xd801_u16 , 0xdc9a_u16 , 0xd801_u16 , 0xdc8d_u16 ,
494+ 0x0020_u16 , 0xd801_u16 , 0xdc8f_u16 , 0xd801_u16 ,
495+ 0xdc9c_u16 , 0xd801_u16 , 0xdc92_u16 , 0xd801_u16 ,
496+ 0xdc96_u16 , 0xd801_u16 , 0xdc86_u16 , 0x0020_u16 ,
497+ 0xd801_u16 , 0xdc95_u16 , 0xd801_u16 , 0xdc86_u16 ,
498+ 0x000a_u16 ] ) ,
499+ // Issue #12318, even-numbered non-BMP planes
500+ ( String :: from_str ( "\U 00020000" ) ,
501+ vec ! [ 0xD840 , 0xDC00 ] ) ] ;
502+
503+ for p in pairs. iter ( ) {
504+ let ( s, u) = ( * p) . clone ( ) ;
505+ let s_as_utf16 = s. as_slice ( ) . utf16_units ( ) . collect :: < Vec < u16 > > ( ) ;
506+ let u_as_string = String :: from_utf16 ( u. as_slice ( ) ) . unwrap ( ) ;
507+
508+ assert ! ( str :: is_utf16( u. as_slice( ) ) ) ;
509+ assert_eq ! ( s_as_utf16, u) ;
510+
511+ assert_eq ! ( u_as_string, s) ;
512+ assert_eq ! ( String :: from_utf16_lossy( u. as_slice( ) ) , s) ;
513+
514+ assert_eq ! ( String :: from_utf16( s_as_utf16. as_slice( ) ) . unwrap( ) , s) ;
515+ assert_eq ! ( u_as_string. as_slice( ) . utf16_units( ) . collect:: <Vec <u16 >>( ) , u) ;
516+ }
517+ }
518+
519+ #[ test]
520+ fn test_utf16_invalid ( ) {
521+ // completely positive cases tested above.
522+ // lead + eof
523+ assert_eq ! ( String :: from_utf16( [ 0xD800 ] ) , None ) ;
524+ // lead + lead
525+ assert_eq ! ( String :: from_utf16( [ 0xD800 , 0xD800 ] ) , None ) ;
526+
527+ // isolated trail
528+ assert_eq ! ( String :: from_utf16( [ 0x0061 , 0xDC00 ] ) , None ) ;
529+
530+ // general
531+ assert_eq ! ( String :: from_utf16( [ 0xD800 , 0xd801 , 0xdc8b , 0xD800 ] ) , None ) ;
532+ }
533+
534+ #[ test]
535+ fn test_from_utf16_lossy ( ) {
536+ // completely positive cases tested above.
537+ // lead + eof
538+ assert_eq ! ( String :: from_utf16_lossy( [ 0xD800 ] ) , String :: from_str( "\uFFFD " ) ) ;
539+ // lead + lead
540+ assert_eq ! ( String :: from_utf16_lossy( [ 0xD800 , 0xD800 ] ) , String :: from_str( "\uFFFD \uFFFD " ) ) ;
541+
542+ // isolated trail
543+ assert_eq ! ( String :: from_utf16_lossy( [ 0x0061 , 0xDC00 ] ) , String :: from_str( "a\uFFFD " ) ) ;
544+
545+ // general
546+ assert_eq ! ( String :: from_utf16_lossy( [ 0xD800 , 0xd801 , 0xdc8b , 0xD800 ] ) ,
547+ String :: from_str( "\uFFFD ð’‹\uFFFD " ) ) ;
548+ }
442549
443550 #[ bench]
444551 fn bench_with_capacity ( b : & mut Bencher ) {
0 commit comments