2020
2121//! A regex parser yielding an AST.
2222
23+ use regex:: escape;
2324use bit_set:: BitSet ;
2425use std:: str:: FromStr ;
2526use std:: usize;
@@ -199,7 +200,13 @@ impl<'a> Parser<'a> {
199200 }
200201 ) ) ,
201202 b'(' => self . parse_group ( ix, depth) ,
202- b'\\' => self . parse_escape ( ix) ,
203+ b'\\' => {
204+ let ( next, expr) = try!( self . parse_escape ( ix) ) ;
205+ if let Expr :: Backref ( group) = expr {
206+ self . backrefs . insert ( group) ;
207+ }
208+ Ok ( ( next, expr) )
209+ } ,
203210 b'+' | b'*' | b'?' | b'|' | b')' =>
204211 Ok ( ( ix, Expr :: Empty ) ) ,
205212 b'[' => self . parse_class ( ix) ,
@@ -221,7 +228,7 @@ impl<'a> Parser<'a> {
221228 }
222229
223230 // ix points to \ character
224- fn parse_escape ( & mut self , ix : usize ) -> Result < ( usize , Expr ) > {
231+ fn parse_escape ( & self , ix : usize ) -> Result < ( usize , Expr ) > {
225232 if ix + 1 == self . re . len ( ) {
226233 return Err ( Error :: TrailingBackslash ) ;
227234 }
@@ -233,7 +240,6 @@ impl<'a> Parser<'a> {
233240 if let Some ( ( end, group) ) = parse_decimal ( self . re , ix + 1 ) {
234241 // protect BitSet against unreasonably large value
235242 if group < self . re . len ( ) / 2 {
236- self . backrefs . insert ( group) ;
237243 return Ok ( ( end, Expr :: Backref ( group) ) ) ;
238244 }
239245 }
@@ -331,9 +337,9 @@ impl<'a> Parser<'a> {
331337 fn parse_class ( & self , ix : usize ) -> Result < ( usize , Expr ) > {
332338 let bytes = self . re . as_bytes ( ) ;
333339 let mut ix = ix + 1 ; // skip opening '['
334- let mut inner = String :: new ( ) ;
340+ let mut class = String :: new ( ) ;
335341 let mut nest = 1 ;
336- inner . push ( '[' ) ;
342+ class . push ( '[' ) ;
337343 loop {
338344 ix = self . optional_whitespace ( ix) ;
339345 if ix == self . re . len ( ) {
@@ -344,27 +350,46 @@ impl<'a> Parser<'a> {
344350 if ix + 1 == self . re . len ( ) {
345351 return Err ( Error :: InvalidClass ) ;
346352 }
347- ix + 1 + codepoint_len ( bytes[ ix + 1 ] )
353+
354+ // We support more escapes than regex, so parse it ourselves before delegating.
355+ let ( end, expr) = try!( self . parse_escape ( ix) ) ;
356+ match expr {
357+ Expr :: Literal { val, .. } => {
358+ class. push_str ( & escape ( & val) ) ;
359+ }
360+ Expr :: Delegate { inner, .. } => {
361+ class. push_str ( & inner) ;
362+ }
363+ _ => {
364+ return Err ( Error :: InvalidClass ) ;
365+ }
366+ }
367+ end
348368 }
349369 b'[' => {
350370 nest += 1 ;
371+ class. push ( '[' ) ;
351372 ix + 1
352373 }
353374 b']' => {
354375 nest -= 1 ;
355376 if nest == 0 {
356377 break ;
357378 }
379+ class. push ( ']' ) ;
358380 ix + 1
359381 }
360- b => ix + codepoint_len ( b)
382+ b => {
383+ let end = ix + codepoint_len ( b) ;
384+ class. push_str ( & self . re [ ix..end] ) ;
385+ end
386+ }
361387 } ;
362- inner. push_str ( & self . re [ ix..end] ) ;
363388 ix = end;
364389 }
365- inner . push ( ']' ) ;
390+ class . push ( ']' ) ;
366391 let ix = ix + 1 ; // skip closing ']'
367- Ok ( ( ix, Expr :: Delegate { inner : inner , size : 1 } ) )
392+ Ok ( ( ix, Expr :: Delegate { inner : class , size : 1 } ) )
368393 }
369394
370395 fn parse_group ( & mut self , ix : usize , depth : usize ) -> Result < ( usize , Expr ) > {
0 commit comments