Skip to content

Commit 7164325

Browse files
committed
fix: support hexadecimal numbers and words with underscores
1 parent 095cd2a commit 7164325

File tree

2 files changed

+66
-2
lines changed

2 files changed

+66
-2
lines changed

src/parse_to_ast.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,4 +573,24 @@ mod tests {
573573
fn error_correct_line_column_unicode_width() {
574574
assert_has_strict_error(r#"["🧑‍🦰", ["#, "Unterminated array on line 1 column 10");
575575
}
576+
577+
#[test]
578+
fn it_should_parse_unquoted_keys_with_hex_and_trailing_comma() {
579+
let parse_result = parse_to_ast(
580+
r#"{
581+
CP_CanFuncReqId: 0x7DF, // 2015
582+
}"#,
583+
&Default::default(),
584+
&Default::default(),
585+
)
586+
.unwrap();
587+
588+
let value = parse_result.value.unwrap();
589+
let obj = value.as_object().unwrap();
590+
assert_eq!(obj.properties.len(), 1);
591+
assert_eq!(obj.properties[0].name.as_str(), "CP_CanFuncReqId");
592+
593+
let number_value = obj.properties[0].value.as_number_lit().unwrap();
594+
assert_eq!(number_value.value, "0x7DF");
595+
}
576596
}

src/scanner.rs

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,23 @@ impl<'a> Scanner<'a> {
160160

161161
if self.is_zero() {
162162
self.move_next_char();
163+
164+
// check for hexadecimal literal (0x or 0X)
165+
if matches!(self.current_char(), Some('x') | Some('X')) {
166+
self.move_next_char();
167+
168+
// must have at least one hex digit
169+
if !self.is_hex_digit() {
170+
return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit));
171+
}
172+
173+
while self.is_hex_digit() {
174+
self.move_next_char();
175+
}
176+
177+
let end_byte_index = self.byte_index;
178+
return Ok(Token::Number(&self.file_text[start_byte_index..end_byte_index]));
179+
}
163180
} else if self.is_one_nine() {
164181
self.move_next_char();
165182
while self.is_digit() {
@@ -288,10 +305,12 @@ impl<'a> Scanner<'a> {
288305
let start_byte_index = self.byte_index;
289306

290307
while let Some(current_char) = self.current_char() {
291-
if current_char.is_whitespace() || current_char == '\r' || current_char == '\n' || current_char == ':' {
308+
// check for word terminators
309+
if current_char.is_whitespace() || current_char == ':' {
292310
break;
293311
}
294-
if !current_char.is_alphanumeric() && current_char != '-' {
312+
// validate that the character is allowed in a word literal
313+
if !current_char.is_alphanumeric() && current_char != '-' && current_char != '_' {
295314
return Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken));
296315
}
297316

@@ -382,6 +401,13 @@ impl<'a> Scanner<'a> {
382401
self.is_one_nine() || self.is_zero()
383402
}
384403

404+
fn is_hex_digit(&self) -> bool {
405+
match self.current_char() {
406+
Some(current_char) => current_char.is_ascii_hexdigit(),
407+
_ => false,
408+
}
409+
}
410+
385411
fn is_zero(&self) -> bool {
386412
self.current_char() == Some('0')
387413
}
@@ -496,6 +522,24 @@ mod tests {
496522
);
497523
}
498524

525+
#[test]
526+
fn it_tokenizes_hexadecimal_numbers() {
527+
assert_has_tokens(
528+
"0x7DF, 0xFF, 0x123ABC, 0xabc, 0X1F",
529+
vec![
530+
Token::Number("0x7DF"),
531+
Token::Comma,
532+
Token::Number("0xFF"),
533+
Token::Comma,
534+
Token::Number("0x123ABC"),
535+
Token::Comma,
536+
Token::Number("0xabc"),
537+
Token::Comma,
538+
Token::Number("0X1F"),
539+
],
540+
);
541+
}
542+
499543
#[test]
500544
fn it_errors_invalid_exponent() {
501545
assert_has_error(

0 commit comments

Comments
 (0)