@@ -22,8 +22,11 @@ const BUF_SIZE: usize = 8192;
2222/// size of the buffer, rather than the full length of the encoded data, and
2323/// because it doesn't need to reallocate memory along the way.
2424pub struct FileEncoder {
25- /// The input buffer. For adequate performance, we need to be able to write
26- /// directly to the unwritten region of the buffer, without calling copy_from_slice.
25+ // The input buffer. For adequate performance, we need to be able to write
26+ // directly to the unwritten region of the buffer, without calling copy_from_slice.
27+ // Note that our buffer is always initialized so that we can do that direct access
28+ // without unsafe code. Users of this type write many more than BUF_SIZE bytes, so the
29+ // initialization is approximately free.
2730 buf : Box < [ u8 ; BUF_SIZE ] > ,
2831 buffered : usize ,
2932 flushed : usize ,
@@ -54,13 +57,12 @@ impl FileEncoder {
5457
5558 #[ cold]
5659 #[ inline( never) ]
57- pub fn flush ( & mut self ) -> & mut [ u8 ; BUF_SIZE ] {
60+ pub fn flush ( & mut self ) {
5861 if self . res . is_ok ( ) {
5962 self . res = self . file . write_all ( & self . buf [ ..self . buffered ] ) ;
6063 }
6164 self . flushed += self . buffered ;
6265 self . buffered = 0 ;
63- & mut self . buf
6466 }
6567
6668 pub fn file ( & self ) -> & File {
@@ -76,7 +78,8 @@ impl FileEncoder {
7678 #[ cold]
7779 #[ inline( never) ]
7880 fn write_all_cold_path ( & mut self , buf : & [ u8 ] ) {
79- if let Some ( dest) = self . flush ( ) . get_mut ( ..buf. len ( ) ) {
81+ self . flush ( ) ;
82+ if let Some ( dest) = self . buf . get_mut ( ..buf. len ( ) ) {
8083 dest. copy_from_slice ( buf) ;
8184 self . buffered += buf. len ( ) ;
8285 } else {
@@ -99,13 +102,20 @@ impl FileEncoder {
99102
100103 /// Write up to `N` bytes to this encoder.
101104 ///
102- /// Whenever possible, use this function to do writes whose length has a small and
103- /// compile-time constant upper bound.
105+ /// This function can be used to avoid the overhead of calling memcpy for writes that
106+ /// have runtime-variable length, but are small and have a small fixed upper bound.
107+ ///
108+ /// This can be used to do in-place encoding as is done for leb128 (without this function
109+ /// we would need to write to a temporary buffer then memcpy into the encoder), and it can
110+ /// also be used to implement the varint scheme we use for rmeta and dep graph encoding,
111+ /// where we only want to encode the first few bytes of an integer. Copying in the whole
112+ /// integer then only advancing the encoder state for the few bytes we care about is more
113+ /// efficient than calling [`FileEncoder::write_all`], because variable-size copies are
114+ /// always lowered to `memcpy`, which has overhead and contains a lot of logic we can bypass
115+ /// with this function. Note that common architectures support fixed-size writes up to 8 bytes
116+ /// with one instruction, so while this does in some sense do wasted work, we come out ahead.
104117 #[ inline]
105- pub fn write_with < const N : usize , V > ( & mut self , mut visitor : V )
106- where
107- V : FnMut ( & mut [ u8 ; N ] ) -> usize ,
108- {
118+ pub fn write_with < const N : usize > ( & mut self , visitor : impl FnOnce ( & mut [ u8 ; N ] ) -> usize ) {
109119 let flush_threshold = const { BUF_SIZE . checked_sub ( N ) . unwrap ( ) } ;
110120 if std:: intrinsics:: unlikely ( self . buffered > flush_threshold) {
111121 self . flush ( ) ;
@@ -115,26 +125,50 @@ impl FileEncoder {
115125 // We produce a post-mono error if N > BUF_SIZE.
116126 let buf = unsafe { self . buffer_empty ( ) . first_chunk_mut :: < N > ( ) . unwrap_unchecked ( ) } ;
117127 let written = visitor ( buf) ;
118- debug_assert ! ( written <= N ) ;
119128 // We have to ensure that an errant visitor cannot cause self.buffered to exeed BUF_SIZE.
120- self . buffered += written. min ( N ) ;
129+ if written > N {
130+ Self :: panic_invalid_write :: < N > ( written) ;
131+ }
132+ self . buffered += written;
133+ }
134+
135+ #[ cold]
136+ #[ inline( never) ]
137+ fn panic_invalid_write < const N : usize > ( written : usize ) {
138+ panic ! ( "FileEncoder::write_with::<{N}> cannot be used to write {written} bytes" ) ;
139+ }
140+
141+ /// Helper for calls where [`FileEncoder::write_with`] always writes the whole array.
142+ #[ inline]
143+ pub fn write_array < const N : usize > ( & mut self , buf : [ u8 ; N ] ) {
144+ self . write_with ( |dest| {
145+ * dest = buf;
146+ N
147+ } )
121148 }
122149
123150 pub fn finish ( mut self ) -> Result < usize , io:: Error > {
124151 self . flush ( ) ;
125- match self . res {
152+ match std :: mem :: replace ( & mut self . res , Ok ( ( ) ) ) {
126153 Ok ( ( ) ) => Ok ( self . position ( ) ) ,
127154 Err ( e) => Err ( e) ,
128155 }
129156 }
130157}
131158
159+ impl Drop for FileEncoder {
160+ fn drop ( & mut self ) {
161+ // Likely to be a no-op, because `finish` should have been called and
162+ // it also flushes. But do it just in case.
163+ self . flush ( ) ;
164+ }
165+ }
166+
132167macro_rules! write_leb128 {
133168 ( $this_fn: ident, $int_ty: ty, $write_leb_fn: ident) => {
134169 #[ inline]
135170 fn $this_fn( & mut self , v: $int_ty) {
136- const MAX_ENCODED_LEN : usize = $crate:: leb128:: max_leb128_len:: <$int_ty>( ) ;
137- self . write_with:: <MAX_ENCODED_LEN , _>( |buf| leb128:: $write_leb_fn( buf, v) )
171+ self . write_with( |buf| leb128:: $write_leb_fn( buf, v) )
138172 }
139173 } ;
140174}
@@ -147,18 +181,12 @@ impl Encoder for FileEncoder {
147181
148182 #[ inline]
149183 fn emit_u16 ( & mut self , v : u16 ) {
150- self . write_with ( |buf| {
151- * buf = v. to_le_bytes ( ) ;
152- 2
153- } ) ;
184+ self . write_array ( v. to_le_bytes ( ) ) ;
154185 }
155186
156187 #[ inline]
157188 fn emit_u8 ( & mut self , v : u8 ) {
158- self . write_with ( |buf : & mut [ u8 ; 1 ] | {
159- buf[ 0 ] = v;
160- 1
161- } ) ;
189+ self . write_array ( [ v] ) ;
162190 }
163191
164192 write_leb128 ! ( emit_isize, isize , write_isize_leb128) ;
@@ -168,10 +196,7 @@ impl Encoder for FileEncoder {
168196
169197 #[ inline]
170198 fn emit_i16 ( & mut self , v : i16 ) {
171- self . write_with ( |buf| {
172- * buf = v. to_le_bytes ( ) ;
173- 2
174- } ) ;
199+ self . write_array ( v. to_le_bytes ( ) ) ;
175200 }
176201
177202 #[ inline]
@@ -370,10 +395,7 @@ impl Encodable<FileEncoder> for IntEncodedWithFixedSize {
370395 #[ inline]
371396 fn encode ( & self , e : & mut FileEncoder ) {
372397 let _start_pos = e. position ( ) ;
373- e. write_with ( |buf| {
374- * buf = self . 0 . to_le_bytes ( ) ;
375- buf. len ( )
376- } ) ;
398+ e. write_array ( self . 0 . to_le_bytes ( ) ) ;
377399 let _end_pos = e. position ( ) ;
378400 debug_assert_eq ! ( ( _end_pos - _start_pos) , IntEncodedWithFixedSize :: ENCODED_SIZE ) ;
379401 }
0 commit comments