1414
1515use arena:: DroplessArena ;
1616use rustc_data_structures:: fx:: FxHashMap ;
17+ use rustc_data_structures:: indexed_vec:: Idx ;
1718use serialize:: { Decodable , Decoder , Encodable , Encoder } ;
1819
1920use std:: fmt;
@@ -143,9 +144,18 @@ impl Decodable for Ident {
143144 }
144145}
145146
146- /// A symbol is an interned or gensymed string.
147+ /// A symbol is an interned or gensymed string. The use of newtype_index! means
148+ /// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves
149+ /// the last 256 values for tagging purposes.
150+ ///
151+ /// Note that Symbol cannot be a newtype_index! directly because it implements
152+ /// fmt::Debug, Encodable, and Decodable in special ways.
147153#[ derive( Clone , Copy , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
148- pub struct Symbol ( u32 ) ;
154+ pub struct Symbol ( SymbolIndex ) ;
155+
156+ newtype_index ! {
157+ pub struct SymbolIndex { .. }
158+ }
149159
150160// The interner is pointed to by a thread local value which is only set on the main thread
151161// with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
@@ -156,6 +166,10 @@ impl !Send for Symbol { }
156166impl !Sync for Symbol { }
157167
158168impl Symbol {
169+ const fn new ( n : u32 ) -> Self {
170+ Symbol ( SymbolIndex :: from_u32_const ( n) )
171+ }
172+
159173 /// Maps a string to its interned representation.
160174 pub fn intern ( string : & str ) -> Self {
161175 with_interner ( |interner| interner. intern ( string) )
@@ -189,15 +203,15 @@ impl Symbol {
189203 }
190204
191205 pub fn as_u32 ( self ) -> u32 {
192- self . 0
206+ self . 0 . as_u32 ( )
193207 }
194208}
195209
196210impl fmt:: Debug for Symbol {
197211 fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
198212 let is_gensymed = with_interner ( |interner| interner. is_gensymed ( * self ) ) ;
199213 if is_gensymed {
200- write ! ( f, "{}({})" , self , self . 0 )
214+ write ! ( f, "{}({:? })" , self , self . 0 )
201215 } else {
202216 write ! ( f, "{}" , self )
203217 }
@@ -229,6 +243,9 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
229243}
230244
231245// The `&'static str`s in this type actually point into the arena.
246+ //
247+ // Note that normal symbols are indexed upward from 0, and gensyms are indexed
248+ // downward from SymbolIndex::MAX_AS_U32.
232249#[ derive( Default ) ]
233250pub struct Interner {
234251 arena : DroplessArena ,
@@ -243,7 +260,7 @@ impl Interner {
243260 for & string in init {
244261 if string == "" {
245262 // We can't allocate empty strings in the arena, so handle this here.
246- let name = Symbol ( this. strings . len ( ) as u32 ) ;
263+ let name = Symbol :: new ( this. strings . len ( ) as u32 ) ;
247264 this. names . insert ( "" , name) ;
248265 this. strings . push ( "" ) ;
249266 } else {
@@ -258,7 +275,7 @@ impl Interner {
258275 return name;
259276 }
260277
261- let name = Symbol ( self . strings . len ( ) as u32 ) ;
278+ let name = Symbol :: new ( self . strings . len ( ) as u32 ) ;
262279
263280 // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
264281 // UTF-8.
@@ -276,10 +293,10 @@ impl Interner {
276293 }
277294
278295 pub fn interned ( & self , symbol : Symbol ) -> Symbol {
279- if ( symbol. 0 as usize ) < self . strings . len ( ) {
296+ if ( symbol. 0 . as_usize ( ) ) < self . strings . len ( ) {
280297 symbol
281298 } else {
282- self . interned ( self . gensyms [ ( ! 0 - symbol. 0 ) as usize ] )
299+ self . interned ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] )
283300 }
284301 }
285302
@@ -290,17 +307,17 @@ impl Interner {
290307
291308 fn gensymed ( & mut self , symbol : Symbol ) -> Symbol {
292309 self . gensyms . push ( symbol) ;
293- Symbol ( ! 0 - self . gensyms . len ( ) as u32 + 1 )
310+ Symbol :: new ( SymbolIndex :: MAX_AS_U32 - self . gensyms . len ( ) as u32 + 1 )
294311 }
295312
296313 fn is_gensymed ( & mut self , symbol : Symbol ) -> bool {
297- symbol. 0 as usize >= self . strings . len ( )
314+ symbol. 0 . as_usize ( ) >= self . strings . len ( )
298315 }
299316
300317 pub fn get ( & self , symbol : Symbol ) -> & str {
301- match self . strings . get ( symbol. 0 as usize ) {
318+ match self . strings . get ( symbol. 0 . as_usize ( ) ) {
302319 Some ( string) => string,
303- None => self . get ( self . gensyms [ ( ! 0 - symbol. 0 ) as usize ] ) ,
320+ None => self . get ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] ) ,
304321 }
305322 }
306323}
@@ -324,7 +341,7 @@ macro_rules! declare_keywords {(
324341 $(
325342 #[ allow( non_upper_case_globals) ]
326343 pub const $konst: Keyword = Keyword {
327- ident: Ident :: with_empty_ctxt( super :: Symbol ( $index) )
344+ ident: Ident :: with_empty_ctxt( super :: Symbol :: new ( $index) )
328345 } ;
329346 ) *
330347
@@ -709,19 +726,19 @@ mod tests {
709726 fn interner_tests ( ) {
710727 let mut i: Interner = Interner :: default ( ) ;
711728 // first one is zero:
712- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
729+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
713730 // re-use gets the same entry:
714- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
731+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
715732 // different string gets a different #:
716- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
717- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
733+ assert_eq ! ( i. intern( "cat" ) , Symbol :: new ( 1 ) ) ;
734+ assert_eq ! ( i. intern( "cat" ) , Symbol :: new ( 1 ) ) ;
718735 // dog is still at zero
719- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
720- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967295 ) ) ;
721- // gensym of same string gets new number :
722- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967294 ) ) ;
736+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
737+ assert_eq ! ( i. gensym( "zebra" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 ) ) ;
738+ // gensym of same string gets new number:
739+ assert_eq ! ( i. gensym( "zebra" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 - 1 ) ) ;
723740 // gensym of *existing* string gets new number:
724- assert_eq ! ( i. gensym( "dog" ) , Symbol ( 4294967293 ) ) ;
741+ assert_eq ! ( i. gensym( "dog" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 - 2 ) ) ;
725742 }
726743
727744 #[ test]
0 commit comments