@@ -221,6 +221,91 @@ impl<'a> TryFrom<&'a CStr> for &'a CStr8 {
221221 }
222222}
223223
224+ /// Get a Latin-1 character from a UTF-8 byte slice at the given offset.
225+ ///
226+ /// Returns a pair containing the Latin-1 character and the number of bytes in
227+ /// the UTF-8 encoding of that character.
228+ ///
229+ /// Panics if the string cannot be encoded in Latin-1.
230+ ///
231+ /// # Safety
232+ ///
233+ /// The input `bytes` must be valid UTF-8.
234+ const unsafe fn latin1_from_utf8_at_offset ( bytes : & [ u8 ] , offset : usize ) -> ( u8 , usize ) {
235+ if bytes[ offset] & 0b1000_0000 == 0b0000_0000 {
236+ ( bytes[ offset] as u8 , 1 )
237+ } else if bytes[ offset] & 0b1110_0000 == 0b1100_0000 {
238+ let a = ( bytes[ offset] & 0b0001_1111 ) as u16 ;
239+ let b = ( bytes[ offset + 1 ] & 0b0011_1111 ) as u16 ;
240+ let ch = a << 6 | b;
241+ if ch > 0xff {
242+ panic ! ( "input string cannot be encoded as Latin-1" ) ;
243+ }
244+ ( ch as u8 , 2 )
245+ } else {
246+ // Latin-1 code points only go up to 0xff, so if the input contains any
247+ // UTF-8 characters larger than two bytes it cannot be converted to
248+ // Latin-1.
249+ panic ! ( "input string cannot be encoded as Latin-1" ) ;
250+ }
251+ }
252+
253+ /// Count the number of Latin-1 characters in a string.
254+ ///
255+ /// Panics if the string cannot be encoded in Latin-1.
256+ ///
257+ /// This is public but hidden; it is used in the `cstr8` macro.
258+ pub const fn str_num_latin1_chars ( s : & str ) -> usize {
259+ let bytes = s. as_bytes ( ) ;
260+ let len = bytes. len ( ) ;
261+
262+ let mut offset = 0 ;
263+ let mut num_latin1_chars = 0 ;
264+
265+ while offset < len {
266+ // SAFETY: `bytes` is valid UTF-8.
267+ let ( _, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset ( bytes, offset) } ;
268+ offset += num_utf8_bytes as usize ;
269+ num_latin1_chars += 1 ;
270+ }
271+
272+ num_latin1_chars
273+ }
274+
275+ /// Convert a `str` into a null-terminated Latin-1 character array.
276+ ///
277+ /// Panics if the string cannot be encoded in Latin-1.
278+ ///
279+ /// This is public but hidden; it is used in the `cstr8` macro.
280+ pub const fn str_to_latin1 < const N : usize > ( s : & str ) -> [ u8 ; N ] {
281+ let bytes = s. as_bytes ( ) ;
282+ let len = bytes. len ( ) ;
283+
284+ let mut output = [ 0 ; N ] ;
285+
286+ let mut output_offset = 0 ;
287+ let mut input_offset = 0 ;
288+ while input_offset < len {
289+ // SAFETY: `bytes` is valid UTF-8.
290+ let ( ch, num_utf8_bytes) = unsafe { latin1_from_utf8_at_offset ( bytes, input_offset) } ;
291+ if ch == 0 {
292+ panic ! ( "interior null character" ) ;
293+ } else {
294+ output[ output_offset] = ch;
295+ output_offset += 1 ;
296+ input_offset += num_utf8_bytes;
297+ }
298+ }
299+
300+ // The output array must be one bigger than the converted string,
301+ // to leave room for the trailing null character.
302+ if output_offset + 1 != N {
303+ panic ! ( "incorrect array length" ) ;
304+ }
305+
306+ output
307+ }
308+
224309/// An UCS-2 null-terminated string slice.
225310///
226311/// This type is largely inspired by [`core::ffi::CStr`] with the exception that all characters are
0 commit comments