Skip to content

Commit fc9e190

Browse files
committed
not check UTF length twice
1 parent 79cda01 commit fc9e190

File tree

1 file changed

+52
-48
lines changed

1 file changed

+52
-48
lines changed

src/libcore/char/methods.rs

Lines changed: 52 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -436,34 +436,11 @@ impl char {
436436
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
437437
#[inline]
438438
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
439-
let code = self as u32;
440-
unsafe {
441-
let len =
442-
if code < MAX_ONE_B && !dst.is_empty() {
443-
*dst.get_unchecked_mut(0) = code as u8;
444-
1
445-
} else if code < MAX_TWO_B && dst.len() >= 2 {
446-
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
447-
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
448-
2
449-
} else if code < MAX_THREE_B && dst.len() >= 3 {
450-
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
451-
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
452-
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
453-
3
454-
} else if dst.len() >= 4 {
455-
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
456-
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
457-
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
458-
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
459-
4
460-
} else {
461-
panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
462-
from_u32_unchecked(code).len_utf8(),
463-
code,
464-
dst.len())
465-
};
466-
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
439+
let l = dst.len();
440+
match self.try_encode_utf8(dst) {
441+
Some(s) => s,
442+
None => panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
443+
self.len_utf8(), self as u32, l),
467444
}
468445
}
469446

@@ -495,7 +472,32 @@ impl char {
495472
#[unstable(feature = "try_unicode_encode_char", issue = "52579")]
496473
#[inline]
497474
pub fn try_encode_utf8(self, dst: &mut [u8]) -> Option<&mut str> {
498-
if dst.len() < self.len_utf8() { None } else { Some(self.encode_utf8(dst)) }
475+
let code = self as u32;
476+
unsafe {
477+
let len =
478+
if code < MAX_ONE_B && !dst.is_empty() {
479+
*dst.get_unchecked_mut(0) = code as u8;
480+
1
481+
} else if code < MAX_TWO_B && dst.len() >= 2 {
482+
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
483+
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
484+
2
485+
} else if code < MAX_THREE_B && dst.len() >= 3 {
486+
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
487+
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
488+
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
489+
3
490+
} else if dst.len() >= 4 {
491+
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
492+
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
493+
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
494+
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
495+
4
496+
} else {
497+
return None;
498+
};
499+
Some(from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)))
500+
}
499501
}
500502

501503
/// Encodes this character as UTF-16 into the provided `u16` buffer,
@@ -535,24 +537,11 @@ impl char {
535537
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
536538
#[inline]
537539
pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
538-
let mut code = self as u32;
539-
unsafe {
540-
if (code & 0xFFFF) == code && !dst.is_empty() {
541-
// The BMP falls through (assuming non-surrogate, as it should)
542-
*dst.get_unchecked_mut(0) = code as u16;
543-
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
544-
} else if dst.len() >= 2 {
545-
// Supplementary planes break into surrogates.
546-
code -= 0x1_0000;
547-
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
548-
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
549-
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
550-
} else {
551-
panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
552-
from_u32_unchecked(code).len_utf16(),
553-
code,
554-
dst.len())
555-
}
540+
let l = dst.len();
541+
match self.try_encode_utf16(dst) {
542+
Some(s) => s,
543+
None => panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
544+
self.len_utf16(), self as u32, l),
556545
}
557546
}
558547

@@ -584,7 +573,22 @@ impl char {
584573
#[unstable(feature = "try_unicode_encode_char", issue = "52579")]
585574
#[inline]
586575
pub fn try_encode_utf16(self, dst: &mut [u16]) -> Option<&mut [u16]> {
587-
if dst.len() < self.len_utf16() { None } else { Some(self.encode_utf16(dst)) }
576+
let mut code = self as u32;
577+
unsafe {
578+
if (code & 0xFFFF) == code && !dst.is_empty() {
579+
// The BMP falls through (assuming non-surrogate, as it should)
580+
*dst.get_unchecked_mut(0) = code as u16;
581+
Some(slice::from_raw_parts_mut(dst.as_mut_ptr(), 1))
582+
} else if dst.len() >= 2 {
583+
// Supplementary planes break into surrogates.
584+
code -= 0x1_0000;
585+
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
586+
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
587+
Some(slice::from_raw_parts_mut(dst.as_mut_ptr(), 2))
588+
} else {
589+
None
590+
}
591+
}
588592
}
589593

590594
/// Returns true if this `char` is an alphabetic code point, and false if not.

0 commit comments

Comments
 (0)