@@ -324,21 +324,20 @@ pub struct Chars<'a> {
324324 iter : slice:: Iter < ' a , u8 >
325325}
326326
327- // Return the initial codepoint accumulator for the first byte.
328- // The first byte is special, only want bottom 5 bits for width 2, 4 bits
329- // for width 3, and 3 bits for width 4
330- macro_rules! utf8_first_byte {
331- ( $byte: expr, $width: expr) => ( ( $byte & ( 0x7F >> $width) ) as u32 )
332- }
327+ /// Return the initial codepoint accumulator for the first byte.
328+ /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
329+ /// for width 3, and 3 bits for width 4.
330+ #[ inline]
331+ fn utf8_first_byte ( byte : u8 , width : u32 ) -> u32 { ( byte & ( 0x7F >> width) ) as u32 }
333332
334- // return the value of $ch updated with continuation byte $byte
335- macro_rules! utf8_acc_cont_byte {
336- ( $ch: expr, $byte: expr) => ( ( $ch << 6 ) | ( $byte & CONT_MASK ) as u32 )
337- }
333+ /// Return the value of `ch` updated with continuation byte `byte`.
334+ #[ inline]
335+ fn utf8_acc_cont_byte ( ch : u32 , byte : u8 ) -> u32 { ( ch << 6 ) | ( byte & CONT_MASK ) as u32 }
338336
339- macro_rules! utf8_is_cont_byte {
340- ( $byte: expr) => ( ( $byte & !CONT_MASK ) == TAG_CONT_U8 )
341- }
337+ /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
338+ /// bits `10`).
339+ #[ inline]
340+ fn utf8_is_cont_byte ( byte : u8 ) -> bool { ( byte & !CONT_MASK ) == TAG_CONT_U8 }
342341
343342#[ inline]
344343fn unwrap_or_0 ( opt : Option < & u8 > ) -> u8 {
@@ -363,20 +362,20 @@ pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
363362 // Multibyte case follows
364363 // Decode from a byte combination out of: [[[x y] z] w]
365364 // NOTE: Performance is sensitive to the exact formulation here
366- let init = utf8_first_byte ! ( x, 2 ) ;
365+ let init = utf8_first_byte ( x, 2 ) ;
367366 let y = unwrap_or_0 ( bytes. next ( ) ) ;
368- let mut ch = utf8_acc_cont_byte ! ( init, y) ;
367+ let mut ch = utf8_acc_cont_byte ( init, y) ;
369368 if x >= 0xE0 {
370369 // [[x y z] w] case
371370 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
372371 let z = unwrap_or_0 ( bytes. next ( ) ) ;
373- let y_z = utf8_acc_cont_byte ! ( ( y & CONT_MASK ) as u32 , z) ;
372+ let y_z = utf8_acc_cont_byte ( ( y & CONT_MASK ) as u32 , z) ;
374373 ch = init << 12 | y_z;
375374 if x >= 0xF0 {
376375 // [x y z w] case
377376 // use only the lower 3 bits of `init`
378377 let w = unwrap_or_0 ( bytes. next ( ) ) ;
379- ch = ( init & 7 ) << 18 | utf8_acc_cont_byte ! ( y_z, w) ;
378+ ch = ( init & 7 ) << 18 | utf8_acc_cont_byte ( y_z, w) ;
380379 }
381380 }
382381
@@ -399,18 +398,18 @@ pub fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> {
399398 // Decode from a byte combination out of: [x [y [z w]]]
400399 let mut ch;
401400 let z = unwrap_or_0 ( bytes. next_back ( ) ) ;
402- ch = utf8_first_byte ! ( z, 2 ) ;
403- if utf8_is_cont_byte ! ( z) {
401+ ch = utf8_first_byte ( z, 2 ) ;
402+ if utf8_is_cont_byte ( z) {
404403 let y = unwrap_or_0 ( bytes. next_back ( ) ) ;
405- ch = utf8_first_byte ! ( y, 3 ) ;
406- if utf8_is_cont_byte ! ( y) {
404+ ch = utf8_first_byte ( y, 3 ) ;
405+ if utf8_is_cont_byte ( y) {
407406 let x = unwrap_or_0 ( bytes. next_back ( ) ) ;
408- ch = utf8_first_byte ! ( x, 4 ) ;
409- ch = utf8_acc_cont_byte ! ( ch, y) ;
407+ ch = utf8_first_byte ( x, 4 ) ;
408+ ch = utf8_acc_cont_byte ( ch, y) ;
410409 }
411- ch = utf8_acc_cont_byte ! ( ch, z) ;
410+ ch = utf8_acc_cont_byte ( ch, z) ;
412411 }
413- ch = utf8_acc_cont_byte ! ( ch, w) ;
412+ ch = utf8_acc_cont_byte ( ch, w) ;
414413
415414 Some ( ch)
416415}
@@ -1027,7 +1026,7 @@ fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
10271026 // ASCII characters are always valid, so only large
10281027 // bytes need more examination.
10291028 if first >= 128 {
1030- let w = UTF8_CHAR_WIDTH [ first as usize ] as usize ;
1029+ let w = UTF8_CHAR_WIDTH [ first as usize ] ;
10311030 let second = next ! ( ) ;
10321031 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
10331032 // first C2 80 last DF BF
@@ -1580,14 +1579,14 @@ impl StrExt for str {
15801579 i -= 1 ;
15811580 }
15821581
1583- let mut val = s. as_bytes ( ) [ i] as u32 ;
1584- let w = UTF8_CHAR_WIDTH [ val as usize ] as usize ;
1585- assert ! ( ( w != 0 ) ) ;
1582+ let first = s. as_bytes ( ) [ i] ;
1583+ let w = UTF8_CHAR_WIDTH [ first as usize ] ;
1584+ assert ! ( w != 0 ) ;
15861585
1587- val = utf8_first_byte ! ( val , w) ;
1588- val = utf8_acc_cont_byte ! ( val, s. as_bytes( ) [ i + 1 ] ) ;
1589- if w > 2 { val = utf8_acc_cont_byte ! ( val, s. as_bytes( ) [ i + 2 ] ) ; }
1590- if w > 3 { val = utf8_acc_cont_byte ! ( val, s. as_bytes( ) [ i + 3 ] ) ; }
1586+ let mut val = utf8_first_byte ( first , w as u32 ) ;
1587+ val = utf8_acc_cont_byte ( val, s. as_bytes ( ) [ i + 1 ] ) ;
1588+ if w > 2 { val = utf8_acc_cont_byte ( val, s. as_bytes ( ) [ i + 2 ] ) ; }
1589+ if w > 3 { val = utf8_acc_cont_byte ( val, s. as_bytes ( ) [ i + 3 ] ) ; }
15911590
15921591 return CharRange { ch : unsafe { mem:: transmute ( val) } , next : i} ;
15931592 }
@@ -1672,16 +1671,16 @@ pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
16721671
16731672 // Multibyte case is a fn to allow char_range_at to inline cleanly
16741673 fn multibyte_char_range_at ( bytes : & [ u8 ] , i : usize ) -> ( u32 , usize ) {
1675- let mut val = bytes[ i] as u32 ;
1676- let w = UTF8_CHAR_WIDTH [ val as usize ] as usize ;
1677- assert ! ( ( w != 0 ) ) ;
1674+ let first = bytes[ i] ;
1675+ let w = UTF8_CHAR_WIDTH [ first as usize ] ;
1676+ assert ! ( w != 0 ) ;
16781677
1679- val = utf8_first_byte ! ( val , w) ;
1680- val = utf8_acc_cont_byte ! ( val, bytes[ i + 1 ] ) ;
1681- if w > 2 { val = utf8_acc_cont_byte ! ( val, bytes[ i + 2 ] ) ; }
1682- if w > 3 { val = utf8_acc_cont_byte ! ( val, bytes[ i + 3 ] ) ; }
1678+ let mut val = utf8_first_byte ( first , w as u32 ) ;
1679+ val = utf8_acc_cont_byte ( val, bytes[ i + 1 ] ) ;
1680+ if w > 2 { val = utf8_acc_cont_byte ( val, bytes[ i + 2 ] ) ; }
1681+ if w > 3 { val = utf8_acc_cont_byte ( val, bytes[ i + 3 ] ) ; }
16831682
1684- return ( val, i + w) ;
1683+ return ( val, i + w as usize ) ;
16851684 }
16861685
16871686 multibyte_char_range_at ( bytes, i)
0 commit comments