88import posixpath
99import re
1010import string
11+ from inspect import getfullargspec
1112from typing import (
1213 cast ,
1314 Callable ,
2021 Union ,
2122)
2223from urllib .parse import (
23- parse_qs ,
24- parse_qsl ,
24+ parse_qs as _parse_qs ,
25+ parse_qsl as _parse_qsl ,
2526 ParseResult ,
2627 quote ,
2728 unquote_to_bytes ,
4142from ._url import _SPECIAL_SCHEMES
4243
4344
45+ _REMOVE_SEPARATOR = 'separator' not in getfullargspec (_parse_qs )[0 ]
46+
47+
48+ def _handle_separator (func , * args , ** kwargs ):
49+ if _REMOVE_SEPARATOR :
50+ kwargs .pop ('separator' , None )
51+ return func (* args , ** kwargs )
52+
53+
54+ def parse_qs (* args , ** kwargs ):
55+ return _handle_separator (_parse_qs , * args , ** kwargs )
56+
57+
58+ def parse_qsl (* args , ** kwargs ):
59+ return _handle_separator (_parse_qsl , * args , ** kwargs )
60+
61+
4462# error handling function for bytes-to-Unicode decoding errors with URLs
4563def _quote_byte (error : UnicodeError ) -> Tuple [str , int ]:
4664 error = cast (AnyUnicodeError , error )
@@ -200,6 +218,8 @@ def url_query_parameter(
200218 parameter : str ,
201219 default : Optional [str ] = None ,
202220 keep_blank_values : Union [bool , int ] = 0 ,
221+ * ,
222+ separator : str = '&' ,
203223) -> Optional [str ]:
204224 """Return the value of a url parameter, given the url and parameter name
205225
@@ -230,7 +250,9 @@ def url_query_parameter(
230250 """
231251
232252 queryparams = parse_qs (
233- urlsplit (str (url ))[3 ], keep_blank_values = bool (keep_blank_values )
253+ urlsplit (str (url ))[3 ],
254+ keep_blank_values = bool (keep_blank_values ),
255+ separator = separator ,
234256 )
235257 if parameter in queryparams :
236258 return queryparams [parameter ][0 ]
@@ -305,9 +327,13 @@ def url_query_cleaner(
305327 return url
306328
307329
308- def _add_or_replace_parameters (url : str , params : Dict [str , str ]) -> str :
330+ def _add_or_replace_parameters (url : str , params : Dict [str , str ], * , separator : str = '&' ) -> str :
309331 parsed = urlsplit (url )
310- current_args = parse_qsl (parsed .query , keep_blank_values = True )
332+ current_args = parse_qsl (
333+ parsed .query ,
334+ keep_blank_values = True ,
335+ separator = separator ,
336+ )
311337
312338 new_args = []
313339 seen_params = set ()
@@ -327,7 +353,7 @@ def _add_or_replace_parameters(url: str, params: Dict[str, str]) -> str:
327353 return urlunsplit (parsed ._replace (query = query ))
328354
329355
330- def add_or_replace_parameter (url : str , name : str , new_value : str ) -> str :
356+ def add_or_replace_parameter (url : str , name : str , new_value : str , * , separator : str = '&' ) -> str :
331357 """Add or remove a parameter to a given url
332358
333359 >>> import w3lib.url
@@ -340,10 +366,10 @@ def add_or_replace_parameter(url: str, name: str, new_value: str) -> str:
340366 >>>
341367
342368 """
343- return _add_or_replace_parameters (url , {name : new_value })
369+ return _add_or_replace_parameters (url , {name : new_value }, separator = separator )
344370
345371
346- def add_or_replace_parameters (url : str , new_parameters : Dict [str , str ]) -> str :
372+ def add_or_replace_parameters (url : str , new_parameters : Dict [str , str ], * , separator : str = '&' ) -> str :
347373 """Add or remove a parameters to a given url
348374
349375 >>> import w3lib.url
@@ -355,7 +381,7 @@ def add_or_replace_parameters(url: str, new_parameters: Dict[str, str]) -> str:
355381 >>>
356382
357383 """
358- return _add_or_replace_parameters (url , new_parameters )
384+ return _add_or_replace_parameters (url , new_parameters , separator = separator )
359385
360386
361387def path_to_file_uri (path : str ) -> str :
@@ -528,6 +554,8 @@ def canonicalize_url(
528554 keep_blank_values : bool = True ,
529555 keep_fragments : bool = False ,
530556 encoding : Optional [str ] = None ,
557+ * ,
558+ query_separator : str = '&'
531559) -> str :
532560 r"""Canonicalize the given url by applying the following procedures:
533561
@@ -600,7 +628,11 @@ def canonicalize_url(
600628 # Similar considerations apply to query parts. The functionality of
601629 # IRIs (namely, to be able to include non-ASCII characters) can only be
602630 # used if the query part is encoded in UTF-8.
603- keyvals = parse_qsl_to_bytes (query , keep_blank_values )
631+ keyvals = parse_qsl_to_bytes (
632+ query ,
633+ keep_blank_values ,
634+ separator = query_separator ,
635+ )
604636
605637 keyvals .sort ()
606638 query = urlencode (keyvals )
@@ -642,7 +674,10 @@ def parse_url(
642674
643675
644676def parse_qsl_to_bytes (
645- qs : str , keep_blank_values : bool = False
677+ qs : str ,
678+ keep_blank_values : bool = False ,
679+ * ,
680+ separator : str = '&' ,
646681) -> List [Tuple [bytes , bytes ]]:
647682 """Parse a query given as a string argument.
648683
@@ -665,7 +700,7 @@ def parse_qsl_to_bytes(
665700 # with unquote_to_bytes(s)
666701 coerce_args = cast (Callable [..., Tuple [str , Callable [..., bytes ]]], _coerce_args )
667702 qs , _coerce_result = coerce_args (qs )
668- pairs = [ s2 for s1 in qs .split ("&" ) for s2 in s1 . split ( ";" )]
703+ pairs = qs .split (separator )
669704 r = []
670705 for name_value in pairs :
671706 if not name_value :
0 commit comments