1- // Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value).
2- // One format is used for keeping span data inline,
3- // another contains index into an out-of-line span interner.
4- // The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd.
5- // See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28
6-
71use crate :: def_id:: { DefIndex , LocalDefId } ;
82use crate :: hygiene:: SyntaxContext ;
93use crate :: SPAN_TRACK ;
@@ -13,59 +7,69 @@ use rustc_data_structures::fx::FxIndexSet;
137
148/// A compressed span.
159///
16- /// Whereas [`SpanData`] is 16 bytes, which is a bit too big to stick everywhere, `Span`
17- /// is a form that only takes up 8 bytes, with less space for the length, parent and
18- /// context. The vast majority (99.9%+) of `SpanData` instances will fit within
19- /// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are
10+ /// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
11+ /// takes up 8 bytes, with less space for the length, parent and context. The
12+ /// vast majority (99.9%+) of `SpanData` instances can be made to fit within
13+ /// those 8 bytes. Any `SpanData` whose fields don't fit into a `Span` are
2014/// stored in a separate interner table, and the `Span` will index into that
2115/// table. Interning is rare enough that the cost is low, but common enough
2216/// that the code is exercised regularly.
2317///
2418/// An earlier version of this code used only 4 bytes for `Span`, but that was
2519/// slower because only 80--90% of spans could be stored inline (even less in
26- /// very large crates) and so the interner was used a lot more.
20+ /// very large crates) and so the interner was used a lot more. That version of
21+ /// the code also predated the storage of parents.
22+ ///
23+ /// There are four different span forms.
2724///
28- /// Inline (compressed) format with no parent:
29- /// - `span.base_or_index == span_data.lo`
30- /// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
31- /// - `span.ctxt_or_tag == span_data.ctxt` (must be `<= MAX_CTXT`)
25+ /// Inline-context format (requires non-huge length, non-huge context, and no parent) :
26+ /// - `span.lo_or_index == span_data.lo`
27+ /// - `span.len_with_tag_or_marker == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
28+ /// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
3229///
33- /// Interned format with inline `SyntaxContext`:
34- /// - `span.base_or_index == index` (indexes into the interner table)
35- /// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
36- /// - `span.ctxt_or_tag == span_data.ctxt` (must be `<= MAX_CTXT`)
30+ /// Inline-parent format (requires non-huge length, root context, and non-huge parent):
31+ /// - `span.lo_or_index == span_data.lo`
32+ /// - `span.len_with_tag_or_marker & !PARENT_TAG == len == span_data.hi - span_data.lo`
33+ /// (must be `<= MAX_LEN`)
34+ /// - `span.len_with_tag_or_marker` has top bit (`PARENT_TAG`) set
35+ /// - `span.ctxt_or_parent_or_marker == span_data.parent` (must be `<= MAX_CTXT`)
3736///
38- /// Inline (compressed) format with root context:
39- /// - `span.base_or_index == span_data.lo`
40- /// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
41- /// - `span.len_or_tag` has top bit (`PARENT_MASK`) set
42- /// - `span.ctxt == span_data.parent` (must be `<= MAX_CTXT`)
37+ /// Partially-interned format (requires non-huge context):
38+ /// - `span.lo_or_index == index` (indexes into the interner table)
39+ /// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
40+ /// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
4341///
44- /// Interned format:
45- /// - `span.base_or_index == index` (indexes into the interner table)
46- /// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
47- /// - `span.ctxt_or_tag == CTXT_TAG `
42+ /// Fully-interned format (all cases not covered above) :
43+ /// - `span.lo_or_index == index` (indexes into the interner table)
44+ /// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
45+ /// - `span.ctxt_or_parent_or_marker == CTXT_INTERNED_MARKER `
4846///
49- /// The inline form uses 0 for the tag value (rather than 1) so that we don't
50- /// need to mask out the tag bit when getting the length, and so that the
51- /// dummy span can be all zeroes.
47+ /// The partially-interned form requires looking in the interning table for
48+ /// lo and length, but the context is stored inline as well as interned.
49+ /// This is useful because context lookups are often done in isolation, and
50+ /// inline lookups are quicker.
5251///
5352/// Notes about the choice of field sizes:
54- /// - `base ` is 32 bits in both `Span` and `SpanData`, which means that `base `
55- /// values never cause interning. The number of bits needed for `base `
53+ /// - `lo ` is 32 bits in both `Span` and `SpanData`, which means that `lo `
54+ /// values never cause interning. The number of bits needed for `lo `
5655/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
57- /// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits
58- /// in `SpanData`, which means that large `len` values will cause interning.
59- /// The number of bits needed for `len` does not depend on the crate size.
60- /// The most common numbers of bits for `len` are from 0 to 7, with a peak usually
61- /// at 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough
62- /// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur
63- /// dozens of times in a typical crate.
64- /// - `ctxt_or_tag` is 16 bits in `Span` and 32 bits in `SpanData`, which means that
65- /// large `ctxt` values will cause interning. The number of bits needed for
66- /// `ctxt` values depend partly on the crate size and partly on the form of
67- /// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt_or_tag`,
68- /// but larger crates might need more than 16 bits.
56+ /// Having no compression on this field means there is no performance cliff
57+ /// if a crate exceeds a particular size.
58+ /// - `len` is ~15 bits in `Span` (a u16, minus 1 bit for PARENT_TAG) and 32
59+ /// bits in `SpanData`, which means that large `len` values will cause
60+ /// interning. The number of bits needed for `len` does not depend on the
61+ /// crate size. The most common numbers of bits for `len` are from 0 to 7,
62+ /// with a peak usually at 3 or 4, and then it drops off quickly from 8
63+ /// onwards. 15 bits is enough for 99.99%+ of cases, but larger values
64+ /// (sometimes 20+ bits) might occur dozens of times in a typical crate.
65+ /// - `ctxt_or_parent_or_marker` is 16 bits in `Span` and two 32 bit fields in
66+ /// `SpanData`, which means intering will happen if `ctxt` is large, if
67+ /// `parent` is large, or if both values are non-zero. The number of bits
68+ /// needed for `ctxt` values depend partly on the crate size and partly on
69+ /// the form of the code. No crates in `rustc-perf` need more than 15 bits
70+ /// for `ctxt_or_parent_or_marker`, but larger crates might need more than 16
71+ /// bits. The number of bits needed for `parent` hasn't been measured,
72+ /// because `parent` isn't currently used by default.
6973///
7074/// In order to reliably use parented spans in incremental compilation,
7175/// the dependency to the parent definition's span. This is performed
@@ -74,19 +78,22 @@ use rustc_data_structures::fx::FxIndexSet;
7478#[ derive( Clone , Copy , Eq , PartialEq , Hash ) ]
7579#[ rustc_pass_by_value]
7680pub struct Span {
77- base_or_index : u32 ,
78- len_or_tag : u16 ,
79- ctxt_or_tag : u16 ,
81+ lo_or_index : u32 ,
82+ len_with_tag_or_marker : u16 ,
83+ ctxt_or_parent_or_marker : u16 ,
8084}
8185
82- const LEN_TAG : u16 = 0b1111_1111_1111_1111 ;
83- const PARENT_MASK : u16 = 0b1000_0000_0000_0000 ;
84- const MAX_LEN : u32 = 0b0111_1111_1111_1111 ;
85- const CTXT_TAG : u32 = 0b1111_1111_1111_1111 ;
86- const MAX_CTXT : u32 = CTXT_TAG - 1 ;
86+ // `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from
87+ // `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.)
88+ const MAX_LEN : u32 = 0b0111_1111_1111_1110 ;
89+ const MAX_CTXT : u32 = 0b0111_1111_1111_1110 ;
90+ const PARENT_TAG : u16 = 0b1000_0000_0000_0000 ;
91+ const BASE_LEN_INTERNED_MARKER : u16 = 0b1111_1111_1111_1111 ;
92+ const CTXT_INTERNED_MARKER : u16 = 0b1111_1111_1111_1111 ;
8793
88- /// Dummy span, both position and length are zero, syntax context is zero as well.
89- pub const DUMMY_SP : Span = Span { base_or_index : 0 , len_or_tag : 0 , ctxt_or_tag : 0 } ;
94+ /// The dummy span has zero position, length, and context, and no parent.
95+ pub const DUMMY_SP : Span =
96+ Span { lo_or_index : 0 , len_with_tag_or_marker : 0 , ctxt_or_parent_or_marker : 0 } ;
9097
9198impl Span {
9299 #[ inline]
@@ -100,39 +107,43 @@ impl Span {
100107 std:: mem:: swap ( & mut lo, & mut hi) ;
101108 }
102109
103- let ( base, len, ctxt2) = ( lo. 0 , hi. 0 - lo. 0 , ctxt. as_u32 ( ) ) ;
104-
105- if len <= MAX_LEN && ctxt2 <= MAX_CTXT {
106- let len_or_tag = len as u16 ;
107- debug_assert_eq ! ( len_or_tag & PARENT_MASK , 0 ) ;
110+ let ( lo2, len, ctxt2) = ( lo. 0 , hi. 0 - lo. 0 , ctxt. as_u32 ( ) ) ;
108111
109- if let Some ( parent) = parent {
110- // Inline format with parent.
111- let len_or_tag = len_or_tag | PARENT_MASK ;
112- let parent2 = parent. local_def_index . as_u32 ( ) ;
113- if ctxt2 == SyntaxContext :: root ( ) . as_u32 ( )
114- && parent2 <= MAX_CTXT
115- && len_or_tag < LEN_TAG
116- {
117- debug_assert_ne ! ( len_or_tag, LEN_TAG ) ;
118- return Span { base_or_index : base, len_or_tag, ctxt_or_tag : parent2 as u16 } ;
119- }
120- } else {
121- // Inline format with ctxt.
122- debug_assert_ne ! ( len_or_tag, LEN_TAG ) ;
112+ if len <= MAX_LEN {
113+ if ctxt2 <= MAX_CTXT && parent. is_none ( ) {
114+ // Inline-context format.
123115 return Span {
124- base_or_index : base,
125- len_or_tag : len as u16 ,
126- ctxt_or_tag : ctxt2 as u16 ,
116+ lo_or_index : lo2,
117+ len_with_tag_or_marker : len as u16 ,
118+ ctxt_or_parent_or_marker : ctxt2 as u16 ,
119+ } ;
120+ } else if ctxt2 == SyntaxContext :: root ( ) . as_u32 ( )
121+ && let Some ( parent) = parent
122+ && let parent2 = parent. local_def_index . as_u32 ( )
123+ && parent2 <= MAX_CTXT
124+ {
125+ // Inline-parent format.
126+ return Span {
127+ lo_or_index : lo2,
128+ len_with_tag_or_marker : PARENT_TAG | len as u16 ,
129+ ctxt_or_parent_or_marker : parent2 as u16
127130 } ;
128131 }
129132 }
130133
131- // Interned format.
134+ // Partially-interned or fully-interned format.
132135 let index =
133136 with_span_interner ( |interner| interner. intern ( & SpanData { lo, hi, ctxt, parent } ) ) ;
134- let ctxt_or_tag = if ctxt2 <= MAX_CTXT { ctxt2 } else { CTXT_TAG } as u16 ;
135- Span { base_or_index : index, len_or_tag : LEN_TAG , ctxt_or_tag }
137+ let ctxt_or_parent_or_marker = if ctxt2 <= MAX_CTXT {
138+ ctxt2 as u16 // partially-interned
139+ } else {
140+ CTXT_INTERNED_MARKER // fully-interned
141+ } ;
142+ Span {
143+ lo_or_index : index,
144+ len_with_tag_or_marker : BASE_LEN_INTERNED_MARKER ,
145+ ctxt_or_parent_or_marker,
146+ }
136147 }
137148
138149 #[ inline]
@@ -148,56 +159,80 @@ impl Span {
148159 /// This function must not be used outside the incremental engine.
149160 #[ inline]
150161 pub fn data_untracked ( self ) -> SpanData {
151- if self . len_or_tag != LEN_TAG {
152- // Inline format.
153- if self . len_or_tag & PARENT_MASK == 0 {
154- debug_assert ! ( self . len_or_tag as u32 <= MAX_LEN ) ;
162+ if self . len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
163+ if self . len_with_tag_or_marker & PARENT_TAG == 0 {
164+ // Inline-context format.
165+ let len = self . len_with_tag_or_marker as u32 ;
166+ debug_assert ! ( len <= MAX_LEN ) ;
155167 SpanData {
156- lo : BytePos ( self . base_or_index ) ,
157- hi : BytePos ( self . base_or_index + self . len_or_tag as u32 ) ,
158- ctxt : SyntaxContext :: from_u32 ( self . ctxt_or_tag as u32 ) ,
168+ lo : BytePos ( self . lo_or_index ) ,
169+ hi : BytePos ( self . lo_or_index + len ) ,
170+ ctxt : SyntaxContext :: from_u32 ( self . ctxt_or_parent_or_marker as u32 ) ,
159171 parent : None ,
160172 }
161173 } else {
162- let len = self . len_or_tag & !PARENT_MASK ;
163- debug_assert ! ( len as u32 <= MAX_LEN ) ;
164- let parent =
165- LocalDefId { local_def_index : DefIndex :: from_u32 ( self . ctxt_or_tag as u32 ) } ;
174+ // Inline-parent format.
175+ let len = ( self . len_with_tag_or_marker & !PARENT_TAG ) as u32 ;
176+ debug_assert ! ( len <= MAX_LEN ) ;
177+ let parent = LocalDefId {
178+ local_def_index : DefIndex :: from_u32 ( self . ctxt_or_parent_or_marker as u32 ) ,
179+ } ;
166180 SpanData {
167- lo : BytePos ( self . base_or_index ) ,
168- hi : BytePos ( self . base_or_index + len as u32 ) ,
181+ lo : BytePos ( self . lo_or_index ) ,
182+ hi : BytePos ( self . lo_or_index + len) ,
169183 ctxt : SyntaxContext :: root ( ) ,
170184 parent : Some ( parent) ,
171185 }
172186 }
173187 } else {
174- // Interned format.
175- let index = self . base_or_index ;
188+ // Fully-interned or partially-interned format. In either case,
189+ // the interned value contains all the data, so we don't need to
190+ // distinguish them.
191+ let index = self . lo_or_index ;
176192 with_span_interner ( |interner| interner. spans [ index as usize ] )
177193 }
178194 }
179195
196+ /// Returns `true` if this is a dummy span with any hygienic context.
197+ #[ inline]
198+ pub fn is_dummy ( self ) -> bool {
199+ if self . len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
200+ // Inline-context or inline-parent format.
201+ let lo = self . lo_or_index ;
202+ let len = ( self . len_with_tag_or_marker & !PARENT_TAG ) as u32 ;
203+ debug_assert ! ( len <= MAX_LEN ) ;
204+ lo == 0 && len == 0
205+ } else {
206+ // Fully-interned or partially-interned format.
207+ let index = self . lo_or_index ;
208+ let data = with_span_interner ( |interner| interner. spans [ index as usize ] ) ;
209+ data. lo == BytePos ( 0 ) && data. hi == BytePos ( 0 )
210+ }
211+ }
212+
180213 /// This function is used as a fast path when decoding the full `SpanData` is not necessary.
214+ /// It's a cut-down version of `data_untracked`.
181215 #[ inline]
182216 pub fn ctxt ( self ) -> SyntaxContext {
183- let ctxt_or_tag = self . ctxt_or_tag as u32 ;
184- // Check for interned format.
185- if self . len_or_tag == LEN_TAG {
186- if ctxt_or_tag == CTXT_TAG {
187- // Fully interned format.
188- let index = self . base_or_index ;
189- with_span_interner ( |interner| interner. spans [ index as usize ] . ctxt )
217+ if self . len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
218+ if self . len_with_tag_or_marker & PARENT_TAG == 0 {
219+ // Inline-context format.
220+ SyntaxContext :: from_u32 ( self . ctxt_or_parent_or_marker as u32 )
190221 } else {
191- // Interned format with inline ctxt .
192- SyntaxContext :: from_u32 ( ctxt_or_tag )
222+ // Inline-parent format. We know that the SyntaxContext is root .
223+ SyntaxContext :: root ( )
193224 }
194- } else if self . len_or_tag & PARENT_MASK == 0 {
195- // Inline format with inline ctxt.
196- SyntaxContext :: from_u32 ( ctxt_or_tag)
197225 } else {
198- // Inline format with inline parent.
199- // We know that the SyntaxContext is root.
200- SyntaxContext :: root ( )
226+ if self . ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
227+ // Partially-interned format. This path avoids looking up the
228+ // interned value, and is the whole point of the
229+ // partially-interned format.
230+ SyntaxContext :: from_u32 ( self . ctxt_or_parent_or_marker as u32 )
231+ } else {
232+ // Fully-interned format.
233+ let index = self . lo_or_index ;
234+ with_span_interner ( |interner| interner. spans [ index as usize ] . ctxt )
235+ }
201236 }
202237 }
203238}
0 commit comments