Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 20 additions & 19 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import glob
import numbers
import os
import platform
import re
import sys

Expand All @@ -18,17 +19,17 @@
except ImportError:
from collections import Iterable
try:
from urllib.parse import urlparse
from urllib.parse import unquote, urlparse
except ImportError:
from urlparse import urlparse
from urlparse import urlparse, unquote

import numpy

py2 = sys.version_info[0] <= 2
py26 = py2 and sys.version_info[1] <= 6
py27 = py2 and not py26
py35 = not py2 and sys.version_info[1] <= 5
win = os.name == "nt"
win = platform.system().lower().startswith("win")


if py2:
Expand Down Expand Up @@ -163,8 +164,8 @@ def regularize_path(path):


_windows_drive_letter_ending = re.compile(r".*\b[A-Za-z]$")
_windows_absolute_path_pattern = re.compile(r"^[A-Za-z]:\\")
_windows_absolute_path_pattern_slash = re.compile(r"^/[A-Za-z]:\\")
_windows_absolute_path_pattern = re.compile(r"^[A-Za-z]:[\\/]")
_windows_absolute_path_pattern_slash = re.compile(r"^[\\/][A-Za-z]:[\\/]")
_might_be_port = re.compile(r"^[0-9].*")


Expand Down Expand Up @@ -192,10 +193,7 @@ def file_object_path_split(path):

if file_path.upper() in ("FILE", "HTTP", "HTTPS", "ROOT"):
return path, None
elif (
os.name == "nt"
and _windows_drive_letter_ending.match(file_path) is not None
):
elif win and _windows_drive_letter_ending.match(file_path) is not None:
return path, None
else:
return file_path, object_path
Expand Down Expand Up @@ -227,29 +225,32 @@ def file_path_to_source_class(file_path, options):
return out, file_path

windows_absolute_path = None

if os.name == "nt":
if win:
if _windows_absolute_path_pattern.match(file_path) is not None:
windows_absolute_path = file_path

parsed_url = urlparse(file_path)
if parsed_url.scheme.upper() == "FILE":
parsed_url_path = unquote(parsed_url.path)
else:
parsed_url_path = parsed_url.path

if os.name == "nt" and windows_absolute_path is None:
if _windows_absolute_path_pattern.match(parsed_url.path) is not None:
windows_absolute_path = parsed_url.path
elif _windows_absolute_path_pattern_slash.match(parsed_url.path) is not None:
windows_absolute_path = parsed_url.path[1:]
if win and windows_absolute_path is None:
if _windows_absolute_path_pattern.match(parsed_url_path) is not None:
windows_absolute_path = parsed_url_path
elif _windows_absolute_path_pattern_slash.match(parsed_url_path) is not None:
windows_absolute_path = parsed_url_path[1:]

if (
parsed_url.scheme.upper() == "FILE"
or len(parsed_url.scheme) == 0
or windows_absolute_path
or windows_absolute_path is not None
):
if windows_absolute_path is None:
if parsed_url.netloc.upper() == "LOCALHOST":
file_path = parsed_url.path
file_path = parsed_url_path
else:
file_path = parsed_url.netloc + parsed_url.path
file_path = parsed_url.netloc + parsed_url_path
else:
file_path = windows_absolute_path

Expand Down
92 changes: 92 additions & 0 deletions tests/test_0325-fix-windows-file-uris.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE

import numpy as np
import pytest

import uproot._util
import uproot.reading


@pytest.mark.skipif(
not uproot._util.win, reason="Drive letters only parsed on Windows."
)
def test_windows_drive_letters():
assert (
uproot._util.file_path_to_source_class(
"file:///g:/mydir/file.root", uproot.reading.open.defaults
)[1]
== "g:/mydir/file.root"
)

assert (
uproot._util.file_path_to_source_class(
"file:/g:/mydir/file.root", uproot.reading.open.defaults
)[1]
== "g:/mydir/file.root"
)

assert (
uproot._util.file_path_to_source_class(
"file:g:/mydir/file.root", uproot.reading.open.defaults
)[1]
== "g:/mydir/file.root"
)

assert (
uproot._util.file_path_to_source_class(
"/g:/mydir/file.root", uproot.reading.open.defaults
)[1]
== "g:/mydir/file.root"
)

assert (
uproot._util.file_path_to_source_class(
r"\g:/mydir/file.root", uproot.reading.open.defaults
)[1]
== "g:/mydir/file.root"
)

assert (
uproot._util.file_path_to_source_class(
r"g:\mydir\file.root", uproot.reading.open.defaults
)[1]
== r"g:\mydir\file.root"
)

assert (
uproot._util.file_path_to_source_class(
r"\g:\mydir\file.root", uproot.reading.open.defaults
)[1]
== r"g:\mydir\file.root"
)


def test_escaped_uri_codes():
# If they're file:// paths, yes we should unquote the % signs.
assert (
uproot._util.file_path_to_source_class(
"file:///my%20file.root", uproot.reading.open.defaults
)[1]
== "/my file.root"
)
if not uproot._util.py2:
assert (
uproot._util.file_path_to_source_class(
"file:///my%E2%80%92file.root", uproot.reading.open.defaults
)[1]
== u"/my\u2012file.root"
)

# Otherwise, no we should not.
assert (
uproot._util.file_path_to_source_class(
"/my%20file.root", uproot.reading.open.defaults
)[1]
== "/my%20file.root"
)
assert (
uproot._util.file_path_to_source_class(
"/my%E2%80%92file.root", uproot.reading.open.defaults
)[1]
== "/my%E2%80%92file.root"
)