Skip to content

Commit cc812ef

Browse files
authored
Fixed longstanding bug in tokenizer that can result in incorrect behavior under certain circumstances involving characters that require two uint16 unicode character codes (a surrogate followed by a second word). (#10504)
1 parent d296a1a commit cc812ef

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

packages/pyright-internal/src/parser/characters.ts

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const _surrogateCharMap: { [code: number]: CharCategoryMap } = {};
4242
// We do lazy initialization of this map because it's rarely used.
4343
let _identifierCharMapInitialized = false;
4444

45-
export function isIdentifierStartChar(char: number, nextChar?: number) {
45+
export function isIdentifierStartChar(char: number, nextChar?: number): boolean {
4646
if (char < _identifierCharFastTableSize) {
4747
return _identifierCharFastTable[char] === CharCategory.StartIdentifierChar;
4848
}
@@ -63,7 +63,7 @@ export function isIdentifierStartChar(char: number, nextChar?: number) {
6363
return charCategory === CharCategory.StartIdentifierChar;
6464
}
6565

66-
export function isIdentifierChar(char: number, nextChar?: number) {
66+
export function isIdentifierChar(char: number, nextChar?: number): boolean {
6767
if (char < _identifierCharFastTableSize) {
6868
return (
6969
_identifierCharFastTable[char] === CharCategory.StartIdentifierChar ||
@@ -77,17 +77,17 @@ export function isIdentifierChar(char: number, nextChar?: number) {
7777
_identifierCharMapInitialized = true;
7878
}
7979

80+
let charCategory: CharCategory;
8081
if (nextChar !== undefined) {
81-
return _lookUpSurrogate(char, nextChar);
82+
charCategory = _lookUpSurrogate(char, nextChar);
83+
} else {
84+
charCategory = _identifierCharMap[char];
8285
}
8386

84-
return (
85-
_identifierCharMap[char] === CharCategory.StartIdentifierChar ||
86-
_identifierCharMap[char] === CharCategory.IdentifierChar
87-
);
87+
return charCategory === CharCategory.StartIdentifierChar || charCategory === CharCategory.IdentifierChar;
8888
}
8989

90-
export function isSurrogateChar(char: number) {
90+
export function isSurrogateChar(char: number): boolean {
9191
if (char < _identifierCharFastTableSize) {
9292
return false;
9393
}
@@ -129,7 +129,7 @@ export function isBinary(ch: number): boolean {
129129
return ch === Char._0 || ch === Char._1 || ch === Char.Underscore;
130130
}
131131

132-
function _lookUpSurrogate(char: number, nextChar: number) {
132+
function _lookUpSurrogate(char: number, nextChar: number): CharCategory {
133133
if (_identifierCharMap[char] !== CharCategory.SurrogateChar) {
134134
return CharCategory.NotIdentifierChar;
135135
}
@@ -197,7 +197,7 @@ function _buildIdentifierLookupTableFromUnicodeRangeTable(
197197
fastTableOnly: boolean,
198198
fastTable: CharCategoryMap,
199199
fullTable: CharCategoryMap
200-
) {
200+
): void {
201201
for (let entryIndex = 0; entryIndex < table.length; entryIndex++) {
202202
const entry = table[entryIndex];
203203
let rangeStart: number;
@@ -227,7 +227,7 @@ function _buildIdentifierLookupTableFromUnicodeRangeTable(
227227
function _buildIdentifierLookupTableFromSurrogateRangeTable(
228228
surrogateTable: unicode.UnicodeSurrogateRangeTable,
229229
category: CharCategory
230-
) {
230+
): void {
231231
for (const surrogateChar in surrogateTable) {
232232
if (!_surrogateCharMap[surrogateChar]) {
233233
_surrogateCharMap[surrogateChar] = {};
@@ -245,7 +245,7 @@ function _buildIdentifierLookupTableFromSurrogateRangeTable(
245245
}
246246

247247
// Build a lookup table for to speed up tokenization of identifiers.
248-
function _buildIdentifierLookupTable(fastTableOnly: boolean) {
248+
function _buildIdentifierLookupTable(fastTableOnly: boolean): void {
249249
_identifierCharFastTable.fill(CharCategory.NotIdentifierChar);
250250

251251
_identifierCharRanges.forEach((table) => {

0 commit comments

Comments
 (0)