Skip to content

Commit 7d013d2

Browse files
Add host_subcomponent property (#1159)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent ecb100d commit 7d013d2

File tree

5 files changed

+66
-7
lines changed

5 files changed

+66
-7
lines changed

CHANGES/1159.feature.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Added :attr:`~yarl.URL.host_subcomponent` which returns the :rfc:`3986#section-3.2.2` host subcomponent -- by :user:`bdraco`.
2+
3+
The only current practical difference between :attr:`~yarl.URL.raw_host` and :attr:`~yarl.URL.host_subcomponent` is that IPv6 addresses are returned bracketed.

docs/api.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,22 @@ There are two kinds of properties: *decoded* and *encoded* (with
191191

192192
>>> URL('http://хост.домен').raw_host
193193
'xn--n1agdj.xn--d1acufc'
194+
>>> URL('http://[::1]').raw_host
195+
'::1'
196+
197+
.. attribute:: URL.host_subcomponent
198+
199+
:rfc:`3986#section-3.2.2` host subcomponent part of URL, ``None`` for relative URLs
200+
(:ref:`yarl-api-relative-urls`).
201+
202+
.. doctest::
203+
204+
>>> URL('http://хост.домен').host_subcomponent
205+
'xn--n1agdj.xn--d1acufc'
206+
>>> URL('http://[::1]').host_subcomponent
207+
'[::1]'
194208

209+
.. versionadded:: 1.13
195210

196211
.. attribute:: URL.port
197212

docs/spelling_wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ runtimes
4242
sdist
4343
subclass
4444
subclasses
45+
subcomponent
4546
svetlov
4647
uncompiled
4748
v1

tests/test_url.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,24 @@ def test_raw_host():
176176
assert url.raw_host == url._val.hostname
177177

178178

179+
@pytest.mark.parametrize(
180+
("host"),
181+
[
182+
("example.com"),
183+
("[::1]"),
184+
("xn--gnter-4ya.com"),
185+
],
186+
)
187+
def test_host_subcomponent(host: str):
188+
url = URL(f"http://{host}")
189+
assert url.host_subcomponent == host
190+
191+
192+
def test_host_subcomponent_return_idna_encoded_host():
193+
url = URL("http://оун-упа.укр")
194+
assert url.host_subcomponent == "xn----8sb1bdhvc.xn--j1amh"
195+
196+
179197
def test_raw_host_non_ascii():
180198
url = URL("http://оун-упа.укр")
181199
assert "xn----8sb1bdhvc.xn--j1amh" == url.raw_host

yarl/_url.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,8 @@ def raw_host(self) -> Union[str, None]:
647647
648648
None for relative URLs.
649649
650+
When working with IPv6 addresses, use the `host_subcomponent` property instead
651+
as it will return the host subcomponent with brackets.
650652
"""
651653
# Use host instead of hostname for sake of shortness
652654
# May add .hostname prop later
@@ -660,16 +662,35 @@ def host(self) -> Union[str, None]:
660662
None for relative URLs.
661663
662664
"""
663-
raw = self.raw_host
664-
if raw is None:
665+
if (raw := self.raw_host) is None:
665666
return None
666-
if "%" in raw:
667-
# Hack for scoped IPv6 addresses like
668-
# fe80::2%Перевірка
669-
# presence of '%' sign means only IPv6 address, so idna is useless.
667+
if raw and raw[-1].isdigit() or ":" in raw:
668+
# IP addresses are never IDNA encoded
670669
return raw
671670
return _idna_decode(raw)
672671

672+
@cached_property
673+
def host_subcomponent(self) -> Union[str, None]:
674+
"""Return the host subcomponent part of URL.
675+
676+
None for relative URLs.
677+
678+
https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
679+
680+
`IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
681+
682+
Examples:
683+
- `http://example.com:8080` -> `example.com`
684+
- `http://example.com:80` -> `example.com`
685+
- `https://127.0.0.1:8443` -> `127.0.0.1`
686+
- `https://[::1]:8443` -> `[::1]`
687+
- `http://[::1]` -> `[::1]`
688+
689+
"""
690+
if (raw := self.raw_host) is None:
691+
return None
692+
return f"[{raw}]" if ":" in raw else raw
693+
673694
@cached_property
674695
def port(self) -> Union[int, None]:
675696
"""Port part of URL, with scheme-based fallback.
@@ -953,7 +974,8 @@ def _encode_host(cls, host: str, human: bool = False) -> str:
953974
# - 127.0.0.1 (last character is a digit)
954975
# - 2001:db8::ff00:42:8329 (contains a colon)
955976
# - 2001:db8::ff00:42:8329%eth0 (contains a colon)
956-
# - [2001:db8::ff00:42:8329] (contains a colon)
977+
# - [2001:db8::ff00:42:8329] (contains a colon -- brackets should
978+
# have been removed before it gets here)
957979
# Rare IP Address formats are not supported per:
958980
# https://datatracker.ietf.org/doc/html/rfc3986#section-7.4
959981
#

0 commit comments

Comments
 (0)