@@ -118,8 +118,8 @@ impl CharPos: to_bytes::IterBytes {
118118}
119119
120120pub struct span {
121- lo : CharPos ,
122- hi : CharPos ,
121+ lo : BytePos ,
122+ hi : BytePos ,
123123 expn_info : Option < @ExpnInfo >
124124}
125125
@@ -141,8 +141,10 @@ impl<D: Deserializer> span: Deserializable<D> {
141141 }
142142}
143143
144- pub struct Loc < A : Pos > {
145- file : @FileMap , line : uint , col : A
144+ // XXX col shouldn't be CharPos because col is not an absolute location in the
145+ // codemap, and BytePos and CharPos always represent absolute positions
146+ pub struct Loc {
147+ file : @FileMap , line : uint , col : CharPos
146148}
147149
148150/// An absolute offset within the CodeMap (not a relative offset within a
@@ -178,12 +180,24 @@ pub enum FileSubstr {
178180 pub FssExternal ( { filename: ~str , line: uint, col: CharPos } )
179181}
180182
183+ /// Identifies an offset of a multi-byte character in a FileMap
184+ pub struct MultiByteChar {
185+ /// The absolute offset of the character in the CodeMap
186+ pos: BytePos ,
187+ /// The number of bytes, >=2
188+ bytes: uint,
189+ /// The complete number of 'extra' bytes through this character in the
190+ /// FileMap
191+ sum: uint
192+ }
193+
181194pub struct FileMap {
182195 name: FileName ,
183196 substr: FileSubstr ,
184197 src: @~str ,
185198 start_pos: FilePos ,
186- mut lines: ~[ FilePos ]
199+ mut lines: ~[ FilePos ] ,
200+ multibyte_chars: DVec <MultiByteChar >
187201}
188202
189203pub impl FileMap {
@@ -194,7 +208,8 @@ pub impl FileMap {
194208 return FileMap {
195209 name: filename, substr: substr, src: src,
196210 start_pos: start_pos,
197- mut lines: ~[ ]
211+ mut lines: ~[ ] ,
212+ multibyte_chars: DVec ( )
198213 } ;
199214 }
200215
@@ -219,6 +234,21 @@ pub impl FileMap {
219234 str :: slice( * self . src, begin, end)
220235 }
221236
237+ pub fn record_multibyte_char( & self , pos: BytePos , bytes: uint) {
238+ assert bytes >=2 && bytes <= 4 ;
239+ let sum = if self . multibyte_chars. len( ) > 0 {
240+ self . multibyte_chars. last( ) . sum
241+ } else {
242+ 0
243+ } ;
244+ let sum = sum + bytes ;
245+ let mbc = MultiByteChar {
246+ pos: pos,
247+ bytes: bytes,
248+ sum: sum
249+ } ;
250+ self . multibyte_chars. push( mbc) ;
251+ }
222252}
223253
224254pub struct CodeMap {
@@ -254,12 +284,11 @@ pub impl CodeMap {
254284 pos.line, pos.col.to_uint());
255285 }
256286
257- pub fn lookup_char_pos(&self, +pos: CharPos) -> Loc<CharPos> {
258- pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
259- return self.lookup_pos(pos, lookup);
287+ pub fn lookup_char_pos(&self, +pos: BytePos) -> Loc {
288+ return self.lookup_pos(pos);
260289 }
261290
262- pub fn lookup_char_pos_adj(&self, +pos: CharPos )
291+ pub fn lookup_char_pos_adj(&self, +pos: BytePos )
263292 -> {filename: ~str, line: uint, col: CharPos, file: Option<@FileMap>}
264293 {
265294 let loc = self.lookup_char_pos(pos);
@@ -272,7 +301,7 @@ pub impl CodeMap {
272301 }
273302 FssInternal(sp) => {
274303 self.lookup_char_pos_adj(
275- sp.lo + (pos - loc.file.start_pos.ch ))
304+ sp.lo + (pos - loc.file.start_pos.byte ))
276305 }
277306 FssExternal(eloc) => {
278307 {filename: /* FIXME (#2543) */ copy eloc.filename,
@@ -284,14 +313,13 @@ pub impl CodeMap {
284313 }
285314
286315 pub fn adjust_span(&self, sp: span) -> span {
287- pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
288- let line = self.lookup_line(sp.lo, lookup);
316+ let line = self.lookup_line(sp.lo);
289317 match (line.fm.substr) {
290318 FssNone => sp,
291319 FssInternal(s) => {
292320 self.adjust_span(span {
293- lo: s.lo + (sp.lo - line.fm.start_pos.ch ),
294- hi: s.lo + (sp.hi - line.fm.start_pos.ch ),
321+ lo: s.lo + (sp.lo - line.fm.start_pos.byte ),
322+ hi: s.lo + (sp.hi - line.fm.start_pos.byte ),
295323 expn_info: sp.expn_info
296324 })
297325 }
@@ -321,18 +349,6 @@ pub impl CodeMap {
321349 return @FileLines { file : lo. file , lines : lines} ;
322350 }
323351
324- fn lookup_byte_offset ( & self , +chpos : CharPos )
325- -> { fm : @FileMap , pos : BytePos } {
326- pure fn lookup ( pos : FilePos ) -> uint { return pos. ch . to_uint ( ) ; }
327- let { fm, line} = self . lookup_line ( chpos, lookup) ;
328- let line_offset = fm. lines [ line] . byte - fm. start_pos . byte ;
329- let col = chpos - fm. lines [ line] . ch ;
330- let col_offset = str:: count_bytes ( * fm. src ,
331- line_offset. to_uint ( ) ,
332- col. to_uint ( ) ) ;
333- { fm: fm, pos: line_offset + BytePos ( col_offset) }
334- }
335-
336352 pub fn span_to_snippet ( & self , sp : span ) -> ~str {
337353 let begin = self . lookup_byte_offset ( sp. lo ) ;
338354 let end = self . lookup_byte_offset ( sp. hi ) ;
@@ -351,15 +367,14 @@ pub impl CodeMap {
351367}
352368
353369priv impl CodeMap {
354- fn lookup_line<A : Pos >( & self , pos: A , lookup : LookupFn )
355- -> { fm: @FileMap , line : uint}
356- {
370+
371+ fn lookup_filemap_idx( & self , +pos: BytePos ) -> uint {
357372 let len = self . files . len ( ) ;
358373 let mut a = 0 u;
359374 let mut b = len;
360375 while b - a > 1 u {
361376 let m = ( a + b) / 2 u;
362- if lookup ( self . files [ m] . start_pos ) > pos. to_uint ( ) {
377+ if self . files [ m] . start_pos . byte > pos {
363378 b = m;
364379 } else {
365380 a = m;
@@ -369,22 +384,40 @@ priv impl CodeMap {
369384 fail fmt ! ( "position %u does not resolve to a source location" ,
370385 pos. to_uint( ) )
371386 }
372- let f = self . files [ a] ;
373- a = 0 u;
374- b = vec:: len ( f. lines ) ;
387+
388+ return a;
389+ }
390+
391+ fn lookup_line ( & self , +pos : BytePos )
392+ -> { fm : @FileMap , line : uint }
393+ {
394+ let idx = self . lookup_filemap_idx ( pos) ;
395+ let f = self . files [ idx] ;
396+ let mut a = 0 u;
397+ let mut b = vec:: len ( f. lines ) ;
375398 while b - a > 1 u {
376399 let m = ( a + b) / 2 u;
377- if lookup ( f. lines [ m] ) > pos. to_uint ( ) { b = m; } else { a = m; }
400+ if f. lines [ m] . byte > pos { b = m; } else { a = m; }
378401 }
379402 return { fm: f, line: a} ;
380403 }
381404
382- fn lookup_pos < A : Pos Num > ( & self , pos : A , lookup : LookupFn ) -> Loc < A > {
383- let { fm: f , line : a } = self . lookup_line ( pos, lookup) ;
405+ fn lookup_pos ( & self , +pos : BytePos ) -> Loc {
406+ let { fm: f , line : a } = self . lookup_line ( pos) ;
407+ let line = a + 1 u; // Line numbers start at 1
408+ let chpos = self . bytepos_to_local_charpos ( pos) ;
409+ let linebpos = f. lines [ a] . byte ;
410+ let linechpos = self . bytepos_to_local_charpos ( linebpos) ;
411+ debug ! ( "codemap: byte pos %? is on the line at byte pos %?" ,
412+ pos, linebpos) ;
413+ debug ! ( "codemap: char pos %? is on the line at char pos %?" ,
414+ chpos, linechpos) ;
415+ debug ! ( "codemap: byte is on line: %?" , line) ;
416+ assert chpos >= linechpos;
384417 return Loc {
385418 file : f,
386- line : a + 1 u ,
387- col : pos - from_uint ( lookup ( f . lines [ a ] ) )
419+ line : line ,
420+ col : chpos - linechpos
388421 } ;
389422 }
390423
@@ -394,6 +427,40 @@ priv impl CodeMap {
394427 return fmt ! ( "%s:%u:%u: %u:%u" , lo. file. name,
395428 lo. line, lo. col. to_uint( ) , hi. line, hi. col. to_uint( ) )
396429 }
430+
431+ fn lookup_byte_offset ( & self , +bpos : BytePos )
432+ -> { fm : @FileMap , pos : BytePos } {
433+ let idx = self . lookup_filemap_idx ( bpos) ;
434+ let fm = self . files [ idx] ;
435+ let offset = bpos - fm. start_pos . byte ;
436+ return { fm: fm, pos: offset} ;
437+ }
438+
439+ // Converts an absolute BytePos to a CharPos relative to the file it is
440+ // located in
441+ fn bytepos_to_local_charpos ( & self , +bpos : BytePos ) -> CharPos {
442+ debug ! ( "codemap: converting %? to char pos" , bpos) ;
443+ let idx = self . lookup_filemap_idx ( bpos) ;
444+ let map = self . files [ idx] ;
445+
446+ // The number of extra bytes due to multibyte chars in the FileMap
447+ let mut total_extra_bytes = 0 ;
448+
449+ for map. multibyte_chars. each |mbc| {
450+ debug ! ( "codemap: %?-byte char at %?" , mbc. bytes, mbc. pos) ;
451+ if mbc. pos < bpos {
452+ total_extra_bytes += mbc. bytes ;
453+ // We should never see a byte position in the middle of a
454+ // character
455+ assert bpos == mbc. pos
456+ || bpos. to_uint ( ) >= mbc. pos . to_uint ( ) + mbc. bytes ;
457+ } else {
458+ break ;
459+ }
460+ }
461+
462+ CharPos ( bpos. to_uint ( ) - total_extra_bytes)
463+ }
397464}
398465
399466//
0 commit comments