1- from _typeshed import Incomplete , SupportsRead
2- from codecs import CodecInfo
3- from typing import Protocol , overload , type_check_only
4- from typing_extensions import TypeAlias
1+ import re
2+ from _io import BytesIO , StringIO
3+ from _typeshed import Incomplete , ReadableBuffer , SupportsRead
4+ from collections .abc import Callable , Iterable
5+ from typing import Any , AnyStr , Generic , Literal , TypeVar , overload
6+ from typing_extensions import Self , TypeAlias
57
6- # Is actually webencodings.Encoding
7- @type_check_only
8- class _Encoding (Protocol ):
9- name : str
10- codec_info : CodecInfo
11- def __init__ (self , name : str , codec_info : CodecInfo ) -> None : ...
8+ from webencodings import Encoding
129
1310_UnicodeInputStream : TypeAlias = str | SupportsRead [str ]
1411_BinaryInputStream : TypeAlias = bytes | SupportsRead [bytes ]
1512_InputStream : TypeAlias = _UnicodeInputStream | _BinaryInputStream # noqa: Y047 # used in other files
13+ _SupportsReadT = TypeVar ("_SupportsReadT" , bound = SupportsRead [Any ])
14+ _SupportsReadBytesT = TypeVar ("_SupportsReadBytesT" , bound = SupportsRead [bytes ])
1615
17- spaceCharactersBytes : Incomplete
18- asciiLettersBytes : Incomplete
19- asciiUppercaseBytes : Incomplete
20- spacesAngleBrackets : Incomplete
16+ spaceCharactersBytes : frozenset [ bytes ]
17+ asciiLettersBytes : frozenset [ bytes ]
18+ asciiUppercaseBytes : frozenset [ bytes ]
19+ spacesAngleBrackets : frozenset [ bytes ]
2120invalid_unicode_no_surrogate : str
22- invalid_unicode_re : Incomplete
23- non_bmp_invalid_codepoints : Incomplete
24- ascii_punctuation_re : Incomplete
25- charsUntilRegEx : Incomplete
21+ invalid_unicode_re : re . Pattern [ str ]
22+ non_bmp_invalid_codepoints : set [ int ]
23+ ascii_punctuation_re : re . Pattern [ str ]
24+ charsUntilRegEx : dict [ tuple [ Iterable [ str | bytes | bytearray ], bool ], re . Pattern [ str ]]
2625
27- class BufferedStream :
28- stream : Incomplete
29- buffer : Incomplete
30- position : Incomplete
31- def __init__ (self , stream ) -> None : ...
32- def tell (self ): ...
33- def seek (self , pos ) -> None : ...
34- def read (self , bytes ) : ...
26+ class BufferedStream ( Generic [ AnyStr ]) :
27+ stream : SupportsRead [ AnyStr ]
28+ buffer : list [ AnyStr ]
29+ position : list [ int ]
30+ def __init__ (self , stream : SupportsRead [ AnyStr ] ) -> None : ...
31+ def tell (self ) -> int : ...
32+ def seek (self , pos : int ) -> None : ...
33+ def read (self , bytes : int ) -> AnyStr : ...
3534
3635@overload
3736def HTMLInputStream (source : _UnicodeInputStream ) -> HTMLUnicodeInputStream : ...
@@ -48,9 +47,9 @@ def HTMLInputStream(
4847) -> HTMLBinaryInputStream : ...
4948
5049class HTMLUnicodeInputStream :
51- reportCharacterErrors : Incomplete
52- newLines : Incomplete
53- charEncoding : tuple [_Encoding , str ]
50+ reportCharacterErrors : Callable [[ str ], None ]
51+ newLines : list [ int ]
52+ charEncoding : tuple [Encoding , str ]
5453 dataStream : Incomplete
5554 def __init__ (self , source : _UnicodeInputStream ) -> None : ...
5655 chunk : str
@@ -60,14 +59,17 @@ class HTMLUnicodeInputStream:
6059 prevNumLines : int
6160 prevNumCols : int
6261 def reset (self ) -> None : ...
63- def openStream (self , source ): ...
62+ @overload
63+ def openStream (self , source : _SupportsReadT ) -> _SupportsReadT : ...
64+ @overload
65+ def openStream (self , source : str | None ) -> StringIO : ...
6466 def position (self ) -> tuple [int , int ]: ...
65- def char (self ): ...
66- def readChunk (self , chunkSize = None ): ...
67- def characterErrorsUCS4 (self , data ) -> None : ...
68- def characterErrorsUCS2 (self , data ) -> None : ...
69- def charsUntil (self , characters , opposite : bool = False ): ...
70- def unget (self , char ) -> None : ...
67+ def char (self ) -> str | None : ...
68+ def readChunk (self , chunkSize : int | None = None ) -> bool : ...
69+ def characterErrorsUCS4 (self , data : str ) -> None : ...
70+ def characterErrorsUCS2 (self , data : str ) -> None : ...
71+ def charsUntil (self , characters : Iterable [ str | bytes | bytearray ] , opposite : bool = False ) -> str : ...
72+ def unget (self , char : str | None ) -> None : ...
7173
7274class HTMLBinaryInputStream (HTMLUnicodeInputStream ):
7375 rawStream : Incomplete
@@ -77,8 +79,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
7779 transport_encoding : Incomplete
7880 same_origin_parent_encoding : Incomplete
7981 likely_encoding : Incomplete
80- default_encoding : Incomplete
81- charEncoding : tuple [_Encoding , str ]
82+ default_encoding : str
83+ charEncoding : tuple [Encoding , str ]
8284 def __init__ (
8385 self ,
8486 source : _BinaryInputStream ,
@@ -91,46 +93,52 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
9193 ) -> None : ...
9294 dataStream : Incomplete
9395 def reset (self ) -> None : ...
94- def openStream (self , source ): ...
96+ @overload # type: ignore[override]
97+ def openStream (self , source : _SupportsReadBytesT ) -> _SupportsReadBytesT : ...
98+ @overload # type: ignore[override]
99+ def openStream (self , source : ReadableBuffer ) -> BytesIO : ...
95100 def determineEncoding (self , chardet : bool = True ): ...
96101 def changeEncoding (self , newEncoding : str | bytes | None ) -> None : ...
97- def detectBOM (self ): ...
98- def detectEncodingMeta (self ): ...
102+ def detectBOM (self ) -> Encoding | None : ...
103+ def detectEncodingMeta (self ) -> Encoding | None : ...
99104
100105class EncodingBytes (bytes ):
101- def __new__ (self , value ): ...
102- def __init__ (self , value ) -> None : ...
103- def __iter__ (self ): ...
104- def __next__ (self ): ...
105- def next (self ): ...
106- def previous (self ): ...
107- def setPosition (self , position ) -> None : ...
108- def getPosition (self ): ...
109- position : Incomplete
110- def getCurrentByte (self ): ...
106+ def __new__ (self , value : bytes ) -> Self : ...
107+ def __init__ (self , value : bytes ) -> None : ...
108+ def __iter__ (self ) -> Self : ... # type: ignore[override]
109+ def __next__ (self ) -> bytes : ...
110+ def next (self ) -> bytes : ...
111+ def previous (self ) -> bytes : ...
112+ def setPosition (self , position : int ) -> None : ...
113+ def getPosition (self ) -> int | None : ...
111114 @property
112- def currentByte (self ): ...
113- def skip (self , chars = ...): ...
114- def skipUntil (self , chars ): ...
115- def matchBytes (self , bytes ): ...
116- def jumpTo (self , bytes ): ...
115+ def position (self ) -> int | None : ...
116+ @position .setter
117+ def position (self , position : int ) -> None : ...
118+ def getCurrentByte (self ) -> bytes : ...
119+ @property
120+ def currentByte (self ) -> bytes : ...
121+ def skip (self , chars : bytes | bytearray | Iterable [bytes ] = ...) -> bytes | None : ...
122+ def skipUntil (self , chars : bytes | bytearray | Iterable [bytes ]) -> bytes | None : ...
123+ def matchBytes (self , bytes : bytes | bytearray ) -> bool : ...
124+ def jumpTo (self , bytes : bytes | bytearray ) -> Literal [True ]: ...
117125
118126class EncodingParser :
119- data : Incomplete
120- encoding : Incomplete
121- def __init__ (self , data ) -> None : ...
122- def getEncoding (self ): ...
123- def handleComment (self ): ...
124- def handleMeta (self ): ...
125- def handlePossibleStartTag (self ): ...
126- def handlePossibleEndTag (self ): ...
127- def handlePossibleTag (self , endTag ) : ...
128- def handleOther (self ): ...
129- def getAttribute (self ): ...
127+ data : EncodingBytes
128+ encoding : Encoding | None
129+ def __init__ (self , data : bytes ) -> None : ...
130+ def getEncoding (self ) -> Encoding | None : ...
131+ def handleComment (self ) -> bool : ...
132+ def handleMeta (self ) -> bool : ...
133+ def handlePossibleStartTag (self ) -> bool : ...
134+ def handlePossibleEndTag (self ) -> bool : ...
135+ def handlePossibleTag (self , endTag : bool | None ) -> bool : ...
136+ def handleOther (self ) -> bool : ...
137+ def getAttribute (self ) -> tuple [ bytes , bytes ] | None : ...
130138
131139class ContentAttrParser :
132- data : Incomplete
133- def __init__ (self , data ) -> None : ...
134- def parse (self ): ...
140+ data : EncodingBytes
141+ def __init__ (self , data : EncodingBytes ) -> None : ...
142+ def parse (self ) -> bytes | None : ...
135143
136- def lookupEncoding (encoding : str | bytes | None ) -> str | None : ...
144+ def lookupEncoding (encoding : str | bytes | None ) -> Encoding | None : ...
0 commit comments