@@ -344,9 +344,18 @@ impl Decodable for Ident {
344344 }
345345}
346346
347- /// A symbol is an interned or gensymed string. The use of `newtype_index!` means
348- /// that `Option<Symbol>` only takes up 4 bytes, because `newtype_index!` reserves
349- /// the last 256 values for tagging purposes.
347+ /// A symbol is an interned or gensymed string. (A gensym is a special kind of
348+ /// symbol that is never equal to any other symbol. E.g.:
349+ /// - `Symbol::intern("x") == Symbol::intern("x")`
350+ /// - `Symbol::gensym("x") != Symbol::intern("x")`
351+ /// - `Symbol::gensym("x") != Symbol::gensym("x")`
352+ ///
353+ /// Gensyms are useful when creating new identifiers that must not match any
354+ /// existing identifiers during macro expansion and syntax desugaring.)
355+ ///
356+ /// The use of `newtype_index!` means that `Option<Symbol>` only takes up 4
357+ /// bytes, because `newtype_index!` reserves the last 256 values for tagging
358+ /// purposes.
350359///
351360/// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
352361/// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
@@ -367,10 +376,6 @@ impl Symbol {
367376 with_interner ( |interner| interner. intern ( string) )
368377 }
369378
370- pub fn interned ( self ) -> Self {
371- with_interner ( |interner| interner. interned ( self ) )
372- }
373-
374379 /// Gensyms a new `usize`, using the current interner.
375380 pub fn gensym ( string : & str ) -> Self {
376381 with_interner ( |interner| interner. gensym ( string) )
@@ -387,7 +392,7 @@ impl Symbol {
387392 pub fn as_str ( self ) -> LocalInternedString {
388393 with_interner ( |interner| unsafe {
389394 LocalInternedString {
390- string : std:: mem:: transmute :: < & str , & str > ( interner. get ( self ) )
395+ string : std:: mem:: transmute :: < & str , & str > ( interner. symbol_or_gensym_str ( self ) )
391396 }
392397 } )
393398 }
@@ -488,11 +493,11 @@ impl Interner {
488493 name
489494 }
490495
491- pub fn interned ( & self , symbol : Symbol ) -> Symbol {
496+ fn interned ( & self , symbol : Symbol ) -> Symbol {
492497 if ( symbol. 0 . as_usize ( ) ) < self . strings . len ( ) {
493498 symbol
494499 } else {
495- self . interned ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] )
500+ self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ]
496501 }
497502 }
498503
@@ -510,10 +515,19 @@ impl Interner {
510515 symbol. 0 . as_usize ( ) >= self . strings . len ( )
511516 }
512517
513- pub fn get ( & self , symbol : Symbol ) -> & str {
518+ /// Get the chars of a normal (non-gensym) symbol.
519+ pub fn symbol_str ( & self , symbol : Symbol ) -> & str {
520+ self . strings [ symbol. 0 . as_usize ( ) ]
521+ }
522+
523+ /// Get the chars of a normal or gensym symbol.
524+ fn symbol_or_gensym_str ( & self , symbol : Symbol ) -> & str {
514525 match self . strings . get ( symbol. 0 . as_usize ( ) ) {
515526 Some ( string) => string,
516- None => self . get ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] ) ,
527+ None => {
528+ let symbol = self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] ;
529+ self . strings [ symbol. 0 . as_usize ( ) ]
530+ }
517531 }
518532 }
519533}
@@ -611,11 +625,17 @@ fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
611625 GLOBALS . with ( |globals| f ( & mut * globals. symbol_interner . lock ( ) ) )
612626}
613627
614- /// Represents a string stored in the interner. Because the interner outlives any thread
615- /// which uses this type, we can safely treat `string` which points to interner data,
616- /// as an immortal string, as long as this type never crosses between threads.
617- // FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
618- // by creating a new thread right after constructing the interner.
628+ /// An alternative to `Symbol` and `LocalInternedString`, useful when the chars
629+ /// within the symbol need to be accessed. It is best used for temporary
630+ /// values.
631+ ///
632+ /// Because the interner outlives any thread which uses this type, we can
633+ /// safely treat `string` which points to interner data, as an immortal string,
634+ /// as long as this type never crosses between threads.
635+ //
636+ // FIXME: ensure that the interner outlives any thread which uses
637+ // `LocalInternedString`, by creating a new thread right after constructing the
638+ // interner.
619639#[ derive( Clone , Copy , Hash , PartialOrd , Eq , Ord ) ]
620640pub struct LocalInternedString {
621641 string : & ' static str ,
@@ -708,7 +728,12 @@ impl Encodable for LocalInternedString {
708728 }
709729}
710730
711- /// Represents a string stored in the string interner.
731+ /// A thin wrapper around `Symbol`. It has two main differences to `Symbol`.
732+ /// - Its implementations of `Hash`, `PartialOrd` and `Ord` work with the
733+ /// string chars rather than the symbol integer. This is useful when
734+ /// hash stability is required across compile sessions, or a guaranteed sort
735+ /// ordering is required.
736+ /// - It is guaranteed to not be a gensym symbol.
712737#[ derive( Clone , Copy , Eq ) ]
713738pub struct InternedString {
714739 symbol : Symbol ,
@@ -717,14 +742,23 @@ pub struct InternedString {
717742impl InternedString {
718743 pub fn with < F : FnOnce ( & str ) -> R , R > ( self , f : F ) -> R {
719744 let str = with_interner ( |interner| {
720- interner. get ( self . symbol ) as * const str
745+ interner. symbol_str ( self . symbol ) as * const str
721746 } ) ;
722747 // This is safe because the interner keeps string alive until it is dropped.
723748 // We can access it because we know the interner is still alive since we use a
724749 // scoped thread local to access it, and it was alive at the beginning of this scope
725750 unsafe { f ( & * str) }
726751 }
727752
753+ pub fn with2 < F : FnOnce ( & str , & str ) -> R , R > ( self , other : & InternedString , f : F ) -> R {
754+ let ( self_str, other_str) = with_interner ( |interner| {
755+ ( interner. symbol_str ( self . symbol ) as * const str ,
756+ interner. symbol_str ( other. symbol ) as * const str )
757+ } ) ;
758+ // This is safe for the same reason that `with` is safe.
759+ unsafe { f ( & * self_str, & * other_str) }
760+ }
761+
728762 pub fn as_symbol ( self ) -> Symbol {
729763 self . symbol
730764 }
@@ -745,7 +779,7 @@ impl PartialOrd<InternedString> for InternedString {
745779 if self . symbol == other. symbol {
746780 return Some ( Ordering :: Equal ) ;
747781 }
748- self . with ( |self_str| other . with ( | other_str| self_str. partial_cmp ( other_str) ) )
782+ self . with2 ( other , |self_str, other_str| self_str. partial_cmp ( other_str) )
749783 }
750784}
751785
@@ -754,7 +788,7 @@ impl Ord for InternedString {
754788 if self . symbol == other. symbol {
755789 return Ordering :: Equal ;
756790 }
757- self . with ( |self_str| other . with ( | other_str| self_str. cmp ( & other_str) ) )
791+ self . with2 ( other , |self_str, other_str| self_str. cmp ( other_str) )
758792 }
759793}
760794
@@ -794,12 +828,6 @@ impl<'a> PartialEq<InternedString> for &'a String {
794828 }
795829}
796830
797- impl std:: convert:: From < InternedString > for String {
798- fn from ( val : InternedString ) -> String {
799- val. as_symbol ( ) . to_string ( )
800- }
801- }
802-
803831impl fmt:: Debug for InternedString {
804832 fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
805833 self . with ( |str| fmt:: Debug :: fmt ( & str, f) )
0 commit comments