@@ -175,6 +175,8 @@ CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
175175// https://infra.spec.whatwg.org/#ascii-digit
176176CHAR_TEST (8 , IsASCIIDigit, (ch >= ' 0' && ch <= ' 9' ))
177177
178+ CHAR_TEST (8 , IsASCIIOcDigit, (ch >= ' 0' && ch <= ' 7' ))
179+
178180// https://infra.spec.whatwg.org/#ascii-hex-digit
179181CHAR_TEST (8 , IsASCIIHexDigit, (IsASCIIDigit(ch) ||
180182 (ch >= ' A' && ch <= ' F' ) ||
@@ -407,29 +409,67 @@ int64_t ParseIPv4Number(const char* start, const char* end) {
407409 return strtoll (start, nullptr , R);
408410}
409411
412+ // https://url.spec.whatwg.org/#ipv4-number-parser
413+ bool IsIPv4NumberValid (const std::string_view input) {
414+ if (input.empty ()) {
415+ return false ;
416+ }
417+
418+ // If a number starts with '0' it might be a number with base 8 or base
419+ // 16. If not, checking if all characters are digits proves that it is a
420+ // base 10 number.
421+ if (input.size () >= 2 && input[0 ] == ' 0' ) {
422+ if (input[1 ] == ' X' || input[1 ] == ' x' ) {
423+ if (input.size () == 2 ) {
424+ return true ;
425+ }
426+
427+ return std::all_of (input.begin () + 2 , input.end (), [](const char & c) {
428+ return IsASCIIHexDigit (c);
429+ });
430+ }
431+
432+ return std::all_of (input.begin () + 1 , input.end (), [](const char & c) {
433+ return IsASCIIOcDigit (c);
434+ });
435+ }
436+
437+ return std::all_of (input.begin (), input.end (), [](const char & c) {
438+ return IsASCIIDigit (c);
439+ });
440+ }
441+
410442// https://url.spec.whatwg.org/#ends-in-a-number-checker
411- bool EndsInANumber (const std::string& input) {
412- std::vector<std::string> parts = SplitString (input, ' .' , false );
443+ inline bool EndsInANumber (const std::string_view input) {
444+ if (input.empty ()) {
445+ return false ;
446+ }
413447
414- if (parts.empty ()) return false ;
448+ char delimiter = ' .' ;
449+ auto last_index = input.size () - 1 ;
450+ if (input.back () == delimiter) {
451+ --last_index;
452+ }
415453
416- if (parts.back ().empty ()) {
417- if (parts.size () == 1 ) return false ;
418- parts.pop_back ();
454+ std::string_view last{};
455+ auto pos = input.find_last_of (delimiter, last_index);
456+ if (pos == std::string_view::npos) {
457+ last = input.substr (0 , last_index);
458+ } else {
459+ last = input.substr (pos + 1 , last_index - pos);
419460 }
420461
421- const std::string& last = parts.back ();
462+ if (last.empty ()) {
463+ return false ;
464+ }
422465
423- // If last is non-empty and contains only ASCII digits, then return true
424- if (!last.empty () && std::all_of (last.begin (), last.end (), ::isdigit)) {
466+ if (std::all_of (last.begin (), last.end (), [](const char & c) {
467+ return IsASCIIDigit (c);
468+ })) {
425469 return true ;
426470 }
427471
428- const char * last_str = last.c_str ();
429- int64_t num = ParseIPv4Number (last_str, last_str + last.size ());
430- if (num >= 0 ) return true ;
431-
432- return false ;
472+ return IsIPv4NumberValid (last);
433473}
434474
435475void URLHost::ParseIPv4Host (const char * input, size_t length) {
0 commit comments