@@ -638,8 +638,7 @@ impl char {
638638 #[ rustc_const_stable( feature = "const_char_len_utf" , since = "1.52.0" ) ]
639639 #[ inline]
640640 pub const fn len_utf16 ( self ) -> usize {
641- let ch = self as u32 ;
642- if ( ch & 0xFFFF ) == ch { 1 } else { 2 }
641+ len_utf16 ( self as u32 )
643642 }
644643
645644 /// Encodes this character as UTF-8 into the provided byte buffer,
@@ -709,8 +708,9 @@ impl char {
709708 /// '𝕊'.encode_utf16(&mut b);
710709 /// ```
711710 #[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
711+ #[ rustc_const_unstable( feature = "const_char_encode_utf16" , issue = "130660" ) ]
712712 #[ inline]
713- pub fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
713+ pub const fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
714714 encode_utf16_raw ( self as u32 , dst)
715715 }
716716
@@ -1745,7 +1745,12 @@ const fn len_utf8(code: u32) -> usize {
17451745 }
17461746}
17471747
1748- /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1748+ #[ inline]
1749+ const fn len_utf16 ( code : u32 ) -> usize {
1750+ if ( code & 0xFFFF ) == code { 1 } else { 2 }
1751+ }
1752+
1753+ /// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
17491754/// and then returns the subslice of the buffer that contains the encoded character.
17501755///
17511756/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
@@ -1799,7 +1804,7 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
17991804 unsafe { slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , len) }
18001805}
18011806
1802- /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
1807+ /// Encodes a raw ` u32` value as UTF-16 into the provided `u16` buffer,
18031808/// and then returns the subslice of the buffer that contains the encoded character.
18041809///
18051810/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
@@ -1810,28 +1815,33 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
18101815/// Panics if the buffer is not large enough.
18111816/// A buffer of length 2 is large enough to encode any `char`.
18121817#[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
1818+ #[ rustc_const_unstable( feature = "const_char_encode_utf16" , issue = "130660" ) ]
18131819#[ doc( hidden) ]
18141820#[ inline]
1815- pub fn encode_utf16_raw ( mut code : u32 , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
1816- // SAFETY: each arm checks whether there are enough bits to write into
1817- unsafe {
1818- if ( code & 0xFFFF ) == code && !dst. is_empty ( ) {
1819- // The BMP falls through
1820- * dst. get_unchecked_mut ( 0 ) = code as u16 ;
1821- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 1 )
1822- } else if dst. len ( ) >= 2 {
1823- // Supplementary planes break into surrogates.
1821+ pub const fn encode_utf16_raw ( mut code : u32 , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
1822+ const fn panic_at_const ( _code : u32 , _len : usize , _dst_len : usize ) {
1823+ // Note that we cannot format in constant expressions.
1824+ panic ! ( "encode_utf16: buffer does not have enough bytes to encode code point" ) ;
1825+ }
1826+ fn panic_at_rt ( code : u32 , len : usize , dst_len : usize ) {
1827+ panic ! (
1828+ "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}" ,
1829+ ) ;
1830+ }
1831+ let len = len_utf16 ( code) ;
1832+ match ( len, & mut * dst) {
1833+ ( 1 , [ a, ..] ) => {
1834+ * a = code as u16 ;
1835+ }
1836+ ( 2 , [ a, b, ..] ) => {
18241837 code -= 0x1_0000 ;
1825- * dst. get_unchecked_mut ( 0 ) = 0xD800 | ( ( code >> 10 ) as u16 ) ;
1826- * dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
1827- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
1828- } else {
1829- panic ! (
1830- "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
1831- char :: from_u32_unchecked( code) . len_utf16( ) ,
1832- code,
1833- dst. len( ) ,
1834- )
1838+
1839+ * a = ( code >> 10 ) as u16 | 0xD800 ;
1840+ * b = ( code & 0x3FF ) as u16 | 0xDC00 ;
18351841 }
1836- }
1842+ // FIXME(const-hack): We would prefer to have streamlined panics when formatters become const-friendly.
1843+ _ => const_eval_select ( ( code, len, dst. len ( ) ) , panic_at_const, panic_at_rt) ,
1844+ } ;
1845+ // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
1846+ unsafe { slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , len) }
18371847}
0 commit comments