|
18 | 18 | use char_private::is_printable; |
19 | 19 | use convert::TryFrom; |
20 | 20 | use fmt; |
| 21 | +use slice; |
21 | 22 | use iter::FusedIterator; |
22 | 23 | use mem::transmute; |
23 | 24 |
|
@@ -327,9 +328,9 @@ pub trait CharExt { |
327 | 328 | #[stable(feature = "core", since = "1.6.0")] |
328 | 329 | fn len_utf16(self) -> usize; |
329 | 330 | #[unstable(feature = "unicode", issue = "27784")] |
330 | | - fn encode_utf8(self) -> EncodeUtf8; |
| 331 | + fn encode_utf8(self, dst: &mut [u8]) -> &mut str; |
331 | 332 | #[unstable(feature = "unicode", issue = "27784")] |
332 | | - fn encode_utf16(self) -> EncodeUtf16; |
| 333 | + fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16]; |
333 | 334 | } |
334 | 335 |
|
335 | 336 | #[stable(feature = "core", since = "1.6.0")] |
@@ -419,47 +420,59 @@ impl CharExt for char { |
419 | 420 | } |
420 | 421 |
|
421 | 422 | #[inline] |
422 | | - fn encode_utf8(self) -> EncodeUtf8 { |
| 423 | + fn encode_utf8(self, dst: &mut [u8]) -> &mut str { |
423 | 424 | let code = self as u32; |
424 | | - let mut buf = [0; 4]; |
425 | | - let pos = if code < MAX_ONE_B { |
426 | | - buf[3] = code as u8; |
427 | | - 3 |
428 | | - } else if code < MAX_TWO_B { |
429 | | - buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
430 | | - buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
431 | | - 2 |
432 | | - } else if code < MAX_THREE_B { |
433 | | - buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
434 | | - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
435 | | - buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
436 | | - 1 |
437 | | - } else { |
438 | | - buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
439 | | - buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
440 | | - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
441 | | - buf[3] = (code & 0x3F) as u8 | TAG_CONT; |
442 | | - 0 |
443 | | - }; |
444 | | - EncodeUtf8 { buf: buf, pos: pos } |
| 425 | + unsafe { |
| 426 | + let len = |
| 427 | + if code < MAX_ONE_B && !dst.is_empty() { |
| 428 | + *dst.get_unchecked_mut(0) = code as u8; |
| 429 | + 1 |
| 430 | + } else if code < MAX_TWO_B && dst.len() >= 2 { |
| 431 | + *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
| 432 | + *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; |
| 433 | + 2 |
| 434 | + } else if code < MAX_THREE_B && dst.len() >= 3 { |
| 435 | + *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
| 436 | + *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 437 | + *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; |
| 438 | + 3 |
| 439 | + } else if dst.len() >= 4 { |
| 440 | + *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
| 441 | + *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
| 442 | + *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 443 | + *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; |
| 444 | + 4 |
| 445 | + } else { |
| 446 | + panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", |
| 447 | + from_u32_unchecked(code).len_utf8(), |
| 448 | + code, |
| 449 | + dst.len()) |
| 450 | + }; |
| 451 | + transmute(slice::from_raw_parts_mut(dst.as_mut_ptr(), len)) |
| 452 | + } |
445 | 453 | } |
446 | 454 |
|
447 | 455 | #[inline] |
448 | | - fn encode_utf16(self) -> EncodeUtf16 { |
449 | | - let mut buf = [0; 2]; |
| 456 | + fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { |
450 | 457 | let mut code = self as u32; |
451 | | - let pos = if (code & 0xFFFF) == code { |
452 | | - // The BMP falls through (assuming non-surrogate, as it should) |
453 | | - buf[1] = code as u16; |
454 | | - 1 |
455 | | - } else { |
456 | | - // Supplementary planes break into surrogates. |
457 | | - code -= 0x1_0000; |
458 | | - buf[0] = 0xD800 | ((code >> 10) as u16); |
459 | | - buf[1] = 0xDC00 | ((code as u16) & 0x3FF); |
460 | | - 0 |
461 | | - }; |
462 | | - EncodeUtf16 { buf: buf, pos: pos } |
| 458 | + unsafe { |
| 459 | + if (code & 0xFFFF) == code && !dst.is_empty() { |
| 460 | + // The BMP falls through (assuming non-surrogate, as it should) |
| 461 | + *dst.get_unchecked_mut(0) = code as u16; |
| 462 | + slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) |
| 463 | + } else if dst.len() >= 2 { |
| 464 | + // Supplementary planes break into surrogates. |
| 465 | + code -= 0x1_0000; |
| 466 | + *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); |
| 467 | + *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); |
| 468 | + slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) |
| 469 | + } else { |
| 470 | + panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", |
| 471 | + from_u32_unchecked(code).len_utf16(), |
| 472 | + code, |
| 473 | + dst.len()) |
| 474 | + } |
| 475 | + } |
463 | 476 | } |
464 | 477 | } |
465 | 478 |
|
@@ -702,88 +715,7 @@ impl ExactSizeIterator for EscapeDebug { } |
702 | 715 | #[unstable(feature = "fused", issue = "35602")] |
703 | 716 | impl FusedIterator for EscapeDebug {} |
704 | 717 |
|
705 | | -/// An iterator over `u8` entries represending the UTF-8 encoding of a `char` |
706 | | -/// value. |
707 | | -/// |
708 | | -/// Constructed via the `.encode_utf8()` method on `char`. |
709 | | -#[unstable(feature = "unicode", issue = "27784")] |
710 | | -#[derive(Debug)] |
711 | | -pub struct EncodeUtf8 { |
712 | | - buf: [u8; 4], |
713 | | - pos: usize, |
714 | | -} |
715 | | - |
716 | | -impl EncodeUtf8 { |
717 | | - /// Returns the remaining bytes of this iterator as a slice. |
718 | | - #[unstable(feature = "unicode", issue = "27784")] |
719 | | - pub fn as_slice(&self) -> &[u8] { |
720 | | - &self.buf[self.pos..] |
721 | | - } |
722 | | -} |
723 | | - |
724 | | -#[unstable(feature = "unicode", issue = "27784")] |
725 | | -impl Iterator for EncodeUtf8 { |
726 | | - type Item = u8; |
727 | | - |
728 | | - fn next(&mut self) -> Option<u8> { |
729 | | - if self.pos == self.buf.len() { |
730 | | - None |
731 | | - } else { |
732 | | - let ret = Some(self.buf[self.pos]); |
733 | | - self.pos += 1; |
734 | | - ret |
735 | | - } |
736 | | - } |
737 | | - |
738 | | - fn size_hint(&self) -> (usize, Option<usize>) { |
739 | | - self.as_slice().iter().size_hint() |
740 | | - } |
741 | | -} |
742 | | - |
743 | | -#[unstable(feature = "fused", issue = "35602")] |
744 | | -impl FusedIterator for EncodeUtf8 {} |
745 | | - |
746 | | -/// An iterator over `u16` entries represending the UTF-16 encoding of a `char` |
747 | | -/// value. |
748 | | -/// |
749 | | -/// Constructed via the `.encode_utf16()` method on `char`. |
750 | | -#[unstable(feature = "unicode", issue = "27784")] |
751 | | -#[derive(Debug)] |
752 | | -pub struct EncodeUtf16 { |
753 | | - buf: [u16; 2], |
754 | | - pos: usize, |
755 | | -} |
756 | | - |
757 | | -impl EncodeUtf16 { |
758 | | - /// Returns the remaining bytes of this iterator as a slice. |
759 | | - #[unstable(feature = "unicode", issue = "27784")] |
760 | | - pub fn as_slice(&self) -> &[u16] { |
761 | | - &self.buf[self.pos..] |
762 | | - } |
763 | | -} |
764 | | - |
765 | | - |
766 | | -#[unstable(feature = "unicode", issue = "27784")] |
767 | | -impl Iterator for EncodeUtf16 { |
768 | | - type Item = u16; |
769 | | - |
770 | | - fn next(&mut self) -> Option<u16> { |
771 | | - if self.pos == self.buf.len() { |
772 | | - None |
773 | | - } else { |
774 | | - let ret = Some(self.buf[self.pos]); |
775 | | - self.pos += 1; |
776 | | - ret |
777 | | - } |
778 | | - } |
779 | | - |
780 | | - fn size_hint(&self) -> (usize, Option<usize>) { |
781 | | - self.as_slice().iter().size_hint() |
782 | | - } |
783 | | -} |
784 | 718 |
|
785 | | -#[unstable(feature = "fused", issue = "35602")] |
786 | | -impl FusedIterator for EncodeUtf16 {} |
787 | 719 |
|
788 | 720 | /// An iterator over an iterator of bytes of the characters the bytes represent |
789 | 721 | /// as UTF-8 |
|
0 commit comments