11from test import support
22from test .support import os_helper
3- from tokenize import (tokenize , _tokenize , untokenize , NUMBER , NAME , OP ,
3+ from tokenize import (tokenize , untokenize , NUMBER , NAME , OP ,
44 STRING , ENDMARKER , ENCODING , tok_name , detect_encoding ,
55 open as tokenize_open , Untokenizer , generate_tokens ,
66 NEWLINE , _generate_tokens_from_c_tokenizer , DEDENT , TokenInfo )
@@ -51,6 +51,25 @@ def check_tokenize(self, s, expected):
5151 [" ENCODING 'utf-8' (0, 0) (0, 0)" ] +
5252 expected .rstrip ().splitlines ())
5353
54+ def test_invalid_readline (self ):
55+ def gen ():
56+ yield "sdfosdg"
57+ yield "sdfosdg"
58+ with self .assertRaises (TypeError ):
59+ list (tokenize (gen ().__next__ ))
60+
61+ def gen ():
62+ yield b"sdfosdg"
63+ yield b"sdfosdg"
64+ with self .assertRaises (TypeError ):
65+ list (generate_tokens (gen ().__next__ ))
66+
67+ def gen ():
68+ yield "sdfosdg"
69+ 1 / 0
70+ with self .assertRaises (ZeroDivisionError ):
71+ list (generate_tokens (gen ().__next__ ))
72+
5473 def test_implicit_newline (self ):
5574 # Make sure that the tokenizer puts in an implicit NEWLINE
5675 # when the input lacks a trailing new line.
@@ -1161,7 +1180,8 @@ class TestTokenizerAdheresToPep0263(TestCase):
11611180
11621181 def _testFile (self , filename ):
11631182 path = os .path .join (os .path .dirname (__file__ ), filename )
1164- TestRoundtrip .check_roundtrip (self , open (path , 'rb' ))
1183+ with open (path , 'rb' ) as f :
1184+ TestRoundtrip .check_roundtrip (self , f )
11651185
11661186 def test_utf8_coding_cookie_and_no_utf8_bom (self ):
11671187 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
@@ -1206,7 +1226,8 @@ def readline():
12061226 yield b''
12071227
12081228 # skip the initial encoding token and the end tokens
1209- tokens = list (_tokenize (readline (), encoding = 'utf-8' ))[:- 2 ]
1229+ tokens = list (_generate_tokens_from_c_tokenizer (readline ().__next__ , encoding = 'utf-8' ,
1230+ extra_tokens = True ))[:- 2 ]
12101231 expected_tokens = [TokenInfo (3 , '"ЉЊЈЁЂ"' , (1 , 0 ), (1 , 7 ), '"ЉЊЈЁЂ"\n ' )]
12111232 self .assertEqual (tokens , expected_tokens ,
12121233 "bytes not decoded with encoding" )
@@ -1475,13 +1496,13 @@ def test_tokenize(self):
14751496 def mock_detect_encoding (readline ):
14761497 return encoding , [b'first' , b'second' ]
14771498
1478- def mock__tokenize (readline , encoding ):
1499+ def mock__tokenize (readline , encoding , ** kwargs ):
14791500 nonlocal encoding_used
14801501 encoding_used = encoding
14811502 out = []
14821503 while True :
14831504 try :
1484- next_line = next ( readline )
1505+ next_line = readline ( )
14851506 except StopIteration :
14861507 return out
14871508 if next_line :
@@ -1498,16 +1519,16 @@ def mock_readline():
14981519 return str (counter ).encode ()
14991520
15001521 orig_detect_encoding = tokenize_module .detect_encoding
1501- orig__tokenize = tokenize_module ._tokenize
1522+ orig_c_token = tokenize_module ._generate_tokens_from_c_tokenizer
15021523 tokenize_module .detect_encoding = mock_detect_encoding
1503- tokenize_module ._tokenize = mock__tokenize
1524+ tokenize_module ._generate_tokens_from_c_tokenizer = mock__tokenize
15041525 try :
15051526 results = tokenize (mock_readline )
15061527 self .assertEqual (list (results )[1 :],
15071528 [b'first' , b'second' , b'1' , b'2' , b'3' , b'4' ])
15081529 finally :
15091530 tokenize_module .detect_encoding = orig_detect_encoding
1510- tokenize_module ._tokenize = orig__tokenize
1531+ tokenize_module ._generate_tokens_from_c_tokenizer = orig_c_token
15111532
15121533 self .assertEqual (encoding_used , encoding )
15131534
@@ -1834,12 +1855,33 @@ class CTokenizeTest(TestCase):
18341855 def check_tokenize (self , s , expected ):
18351856 # Format the tokens in s in a table format.
18361857 # The ENDMARKER and final NEWLINE are omitted.
1858+ f = StringIO (s )
18371859 with self .subTest (source = s ):
18381860 result = stringify_tokens_from_source (
1839- _generate_tokens_from_c_tokenizer (s ), s
1861+ _generate_tokens_from_c_tokenizer (f . readline ), s
18401862 )
18411863 self .assertEqual (result , expected .rstrip ().splitlines ())
18421864
1865+ def test_encoding (self ):
1866+ def readline (encoding ):
1867+ yield "1+1" .encode (encoding )
1868+
1869+ expected = [
1870+ TokenInfo (type = NUMBER , string = '1' , start = (1 , 0 ), end = (1 , 1 ), line = '1+1\n ' ),
1871+ TokenInfo (type = OP , string = '+' , start = (1 , 1 ), end = (1 , 2 ), line = '1+1\n ' ),
1872+ TokenInfo (type = NUMBER , string = '1' , start = (1 , 2 ), end = (1 , 3 ), line = '1+1\n ' ),
1873+ TokenInfo (type = NEWLINE , string = '\n ' , start = (1 , 3 ), end = (1 , 4 ), line = '1+1\n ' ),
1874+ TokenInfo (type = ENDMARKER , string = '' , start = (2 , 0 ), end = (2 , 0 ), line = '' )
1875+ ]
1876+ for encoding in ["utf-8" , "latin-1" , "utf-16" ]:
1877+ with self .subTest (encoding = encoding ):
1878+ tokens = list (_generate_tokens_from_c_tokenizer (
1879+ readline (encoding ).__next__ ,
1880+ extra_tokens = True ,
1881+ encoding = encoding ,
1882+ ))
1883+ self .assertEqual (tokens , expected )
1884+
18431885 def test_int (self ):
18441886
18451887 self .check_tokenize ('0xff <= 255' , """\
@@ -2675,43 +2717,44 @@ def test_unicode(self):
26752717
26762718 def test_invalid_syntax (self ):
26772719 def get_tokens (string ):
2678- return list (_generate_tokens_from_c_tokenizer (string ))
2679-
2680- self .assertRaises (SyntaxError , get_tokens , "(1+2]" )
2681- self .assertRaises (SyntaxError , get_tokens , "(1+2}" )
2682- self .assertRaises (SyntaxError , get_tokens , "{1+2]" )
2683-
2684- self .assertRaises (SyntaxError , get_tokens , "1_" )
2685- self .assertRaises (SyntaxError , get_tokens , "1.2_" )
2686- self .assertRaises (SyntaxError , get_tokens , "1e2_" )
2687- self .assertRaises (SyntaxError , get_tokens , "1e+" )
2688-
2689- self .assertRaises (SyntaxError , get_tokens , "\xa0 " )
2690- self .assertRaises (SyntaxError , get_tokens , "€" )
2691-
2692- self .assertRaises (SyntaxError , get_tokens , "0b12" )
2693- self .assertRaises (SyntaxError , get_tokens , "0b1_2" )
2694- self .assertRaises (SyntaxError , get_tokens , "0b2" )
2695- self .assertRaises (SyntaxError , get_tokens , "0b1_" )
2696- self .assertRaises (SyntaxError , get_tokens , "0b" )
2697- self .assertRaises (SyntaxError , get_tokens , "0o18" )
2698- self .assertRaises (SyntaxError , get_tokens , "0o1_8" )
2699- self .assertRaises (SyntaxError , get_tokens , "0o8" )
2700- self .assertRaises (SyntaxError , get_tokens , "0o1_" )
2701- self .assertRaises (SyntaxError , get_tokens , "0o" )
2702- self .assertRaises (SyntaxError , get_tokens , "0x1_" )
2703- self .assertRaises (SyntaxError , get_tokens , "0x" )
2704- self .assertRaises (SyntaxError , get_tokens , "1_" )
2705- self .assertRaises (SyntaxError , get_tokens , "012" )
2706- self .assertRaises (SyntaxError , get_tokens , "1.2_" )
2707- self .assertRaises (SyntaxError , get_tokens , "1e2_" )
2708- self .assertRaises (SyntaxError , get_tokens , "1e+" )
2709-
2710- self .assertRaises (SyntaxError , get_tokens , "'sdfsdf" )
2711- self .assertRaises (SyntaxError , get_tokens , "'''sdfsdf''" )
2712-
2713- self .assertRaises (SyntaxError , get_tokens , "(" * 1000 + "a" + ")" * 1000 )
2714- self .assertRaises (SyntaxError , get_tokens , "]" )
2720+ the_string = StringIO (string )
2721+ return list (_generate_tokens_from_c_tokenizer (the_string .readline ))
2722+
2723+ for case in [
2724+ "(1+2]" ,
2725+ "(1+2}" ,
2726+ "{1+2]" ,
2727+ "1_" ,
2728+ "1.2_" ,
2729+ "1e2_" ,
2730+ "1e+" ,
2731+
2732+ "\xa0 " ,
2733+ "€" ,
2734+ "0b12" ,
2735+ "0b1_2" ,
2736+ "0b2" ,
2737+ "0b1_" ,
2738+ "0b" ,
2739+ "0o18" ,
2740+ "0o1_8" ,
2741+ "0o8" ,
2742+ "0o1_" ,
2743+ "0o" ,
2744+ "0x1_" ,
2745+ "0x" ,
2746+ "1_" ,
2747+ "012" ,
2748+ "1.2_" ,
2749+ "1e2_" ,
2750+ "1e+" ,
2751+ "'sdfsdf" ,
2752+ "'''sdfsdf''" ,
2753+ "(" * 1000 + "a" + ")" * 1000 ,
2754+ "]" ,
2755+ ]:
2756+ with self .subTest (case = case ):
2757+ self .assertRaises (SyntaxError , get_tokens , case )
27152758
27162759 def test_max_indent (self ):
27172760 MAXINDENT = 100
@@ -2722,20 +2765,24 @@ def generate_source(indents):
27222765 return source
27232766
27242767 valid = generate_source (MAXINDENT - 1 )
2725- tokens = list (_generate_tokens_from_c_tokenizer (valid ))
2768+ the_input = StringIO (valid )
2769+ tokens = list (_generate_tokens_from_c_tokenizer (the_input .readline ))
27262770 self .assertEqual (tokens [- 2 ].type , DEDENT )
27272771 self .assertEqual (tokens [- 1 ].type , ENDMARKER )
27282772 compile (valid , "<string>" , "exec" )
27292773
27302774 invalid = generate_source (MAXINDENT )
2731- self .assertRaises (SyntaxError , lambda : list (_generate_tokens_from_c_tokenizer (invalid )))
2775+ the_input = StringIO (invalid )
2776+ self .assertRaises (SyntaxError , lambda : list (_generate_tokens_from_c_tokenizer (the_input .readline )))
27322777 self .assertRaises (
27332778 IndentationError , compile , invalid , "<string>" , "exec"
27342779 )
27352780
27362781 def test_continuation_lines_indentation (self ):
27372782 def get_tokens (string ):
2738- return [(kind , string ) for (kind , string , * _ ) in _generate_tokens_from_c_tokenizer (string )]
2783+ the_string = StringIO (string )
2784+ return [(kind , string ) for (kind , string , * _ )
2785+ in _generate_tokens_from_c_tokenizer (the_string .readline )]
27392786
27402787 code = dedent ("""
27412788 def fib(n):
0 commit comments