@@ -169,14 +169,27 @@ fn write(
169169}
170170
171171fn write_valid_utf8_to_console ( handle : c:: HANDLE , utf8 : & str ) -> io:: Result < usize > {
172+ debug_assert ! ( !utf8. is_empty( ) ) ;
173+
172174 let mut utf16 = [ MaybeUninit :: < u16 > :: uninit ( ) ; MAX_BUFFER_SIZE / 2 ] ;
173- let mut len_utf16 = 0 ;
174- for ( chr, dest) in utf8. encode_utf16 ( ) . zip ( utf16. iter_mut ( ) ) {
175- * dest = MaybeUninit :: new ( chr) ;
176- len_utf16 += 1 ;
177- }
178- // Safety: We've initialized `len_utf16` values.
179- let utf16: & [ u16 ] = unsafe { MaybeUninit :: slice_assume_init_ref ( & utf16[ ..len_utf16] ) } ;
175+ let utf8 = & utf8[ ..utf8. floor_char_boundary ( utf16. len ( ) ) ] ;
176+
177+ let utf16: & [ u16 ] = unsafe {
178+ // Note that this theoretically checks validity twice in the (most common) case
179+ // where the underlying byte sequence is valid utf-8 (given the check in `write()`).
180+ let result = c:: MultiByteToWideChar (
181+ c:: CP_UTF8 , // CodePage
182+ c:: MB_ERR_INVALID_CHARS , // dwFlags
183+ utf8. as_ptr ( ) as c:: LPCCH , // lpMultiByteStr
184+ utf8. len ( ) as c:: c_int , // cbMultiByte
185+ utf16. as_mut_ptr ( ) as c:: LPWSTR , // lpWideCharStr
186+ utf16. len ( ) as c:: c_int , // cchWideChar
187+ ) ;
188+ assert ! ( result != 0 , "Unexpected error in MultiByteToWideChar" ) ;
189+
190+ // Safety: MultiByteToWideChar initializes `result` values.
191+ MaybeUninit :: slice_assume_init_ref ( & utf16[ ..result as usize ] )
192+ } ;
180193
181194 let mut written = write_u16s ( handle, & utf16) ?;
182195
@@ -189,8 +202,8 @@ fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usiz
189202 // a missing surrogate can be produced (and also because of the UTF-8 validation above),
190203 // write the missing surrogate out now.
191204 // Buffering it would mean we have to lie about the number of bytes written.
192- let first_char_remaining = utf16[ written] ;
193- if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF {
205+ let first_code_unit_remaining = utf16[ written] ;
206+ if first_code_unit_remaining >= 0xDCEE && first_code_unit_remaining <= 0xDFFF {
194207 // low surrogate
195208 // We just hope this works, and give up otherwise
196209 let _ = write_u16s ( handle, & utf16[ written..written + 1 ] ) ;
@@ -212,6 +225,7 @@ fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usiz
212225}
213226
214227fn write_u16s ( handle : c:: HANDLE , data : & [ u16 ] ) -> io:: Result < usize > {
228+ debug_assert ! ( data. len( ) < u32 :: MAX as usize ) ;
215229 let mut written = 0 ;
216230 cvt ( unsafe {
217231 c:: WriteConsoleW (
@@ -365,26 +379,32 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usiz
365379 Ok ( amount as usize )
366380}
367381
368- #[ allow( unused) ]
369382fn utf16_to_utf8 ( utf16 : & [ u16 ] , utf8 : & mut [ u8 ] ) -> io:: Result < usize > {
370- let mut written = 0 ;
371- for chr in char:: decode_utf16 ( utf16. iter ( ) . cloned ( ) ) {
372- match chr {
373- Ok ( chr) => {
374- chr. encode_utf8 ( & mut utf8[ written..] ) ;
375- written += chr. len_utf8 ( ) ;
376- }
377- Err ( _) => {
378- // We can't really do any better than forget all data and return an error.
379- return Err ( io:: const_io_error!(
380- io:: ErrorKind :: InvalidData ,
381- "Windows stdin in console mode does not support non-UTF-16 input; \
382- encountered unpaired surrogate",
383- ) ) ;
384- }
385- }
383+ debug_assert ! ( utf16. len( ) <= c:: c_int:: MAX as usize ) ;
384+ debug_assert ! ( utf8. len( ) <= c:: c_int:: MAX as usize ) ;
385+
386+ let result = unsafe {
387+ c:: WideCharToMultiByte (
388+ c:: CP_UTF8 , // CodePage
389+ c:: WC_ERR_INVALID_CHARS , // dwFlags
390+ utf16. as_ptr ( ) , // lpWideCharStr
391+ utf16. len ( ) as c:: c_int , // cchWideChar
392+ utf8. as_mut_ptr ( ) as c:: LPSTR , // lpMultiByteStr
393+ utf8. len ( ) as c:: c_int , // cbMultiByte
394+ ptr:: null ( ) , // lpDefaultChar
395+ ptr:: null_mut ( ) , // lpUsedDefaultChar
396+ )
397+ } ;
398+ if result == 0 {
399+ // We can't really do any better than forget all data and return an error.
400+ Err ( io:: const_io_error!(
401+ io:: ErrorKind :: InvalidData ,
402+ "Windows stdin in console mode does not support non-UTF-16 input; \
403+ encountered unpaired surrogate",
404+ ) )
405+ } else {
406+ Ok ( result as usize )
386407 }
387- Ok ( written)
388408}
389409
390410impl IncompleteUtf8 {
0 commit comments