scikit-hep · jpivarski · Apr 8, 2021 · Apr 8, 2021 · Apr 8, 2021
diff --git a/src/uproot/_util.py b/src/uproot/_util.py
@@ -10,6 +10,7 @@
 import glob
 import numbers
 import os
+import platform
 import re
 import sys
 
@@ -18,17 +19,17 @@
 except ImportError:
     from collections import Iterable
 try:
-    from urllib.parse import urlparse
+    from urllib.parse import unquote, urlparse
 except ImportError:
-    from urlparse import urlparse
+    from urlparse import urlparse, unquote
 
 import numpy
 
 py2 = sys.version_info[0] <= 2
 py26 = py2 and sys.version_info[1] <= 6
 py27 = py2 and not py26
 py35 = not py2 and sys.version_info[1] <= 5
-win = os.name == "nt"
+win = platform.system().lower().startswith("win")
 
 
 if py2:
@@ -163,8 +164,8 @@ def regularize_path(path):
 
 
 _windows_drive_letter_ending = re.compile(r".*\b[A-Za-z]$")
-_windows_absolute_path_pattern = re.compile(r"^[A-Za-z]:\\")
-_windows_absolute_path_pattern_slash = re.compile(r"^/[A-Za-z]:\\")
+_windows_absolute_path_pattern = re.compile(r"^[A-Za-z]:[\\/]")
+_windows_absolute_path_pattern_slash = re.compile(r"^[\\/][A-Za-z]:[\\/]")
 _might_be_port = re.compile(r"^[0-9].*")
 
 
@@ -192,10 +193,7 @@ def file_object_path_split(path):
 
         if file_path.upper() in ("FILE", "HTTP", "HTTPS", "ROOT"):
             return path, None
-        elif (
-            os.name == "nt"
-            and _windows_drive_letter_ending.match(file_path) is not None
-        ):
+        elif win and _windows_drive_letter_ending.match(file_path) is not None:
             return path, None
         else:
             return file_path, object_path
@@ -227,29 +225,32 @@ def file_path_to_source_class(file_path, options):
         return out, file_path
 
     windows_absolute_path = None
-
-    if os.name == "nt":
+    if win:
         if _windows_absolute_path_pattern.match(file_path) is not None:
             windows_absolute_path = file_path
 
     parsed_url = urlparse(file_path)
+    if parsed_url.scheme.upper() == "FILE":
+        parsed_url_path = unquote(parsed_url.path)
+    else:
+        parsed_url_path = parsed_url.path
 
-    if os.name == "nt" and windows_absolute_path is None:
-        if _windows_absolute_path_pattern.match(parsed_url.path) is not None:
-            windows_absolute_path = parsed_url.path
-        elif _windows_absolute_path_pattern_slash.match(parsed_url.path) is not None:
-            windows_absolute_path = parsed_url.path[1:]
+    if win and windows_absolute_path is None:
+        if _windows_absolute_path_pattern.match(parsed_url_path) is not None:
+            windows_absolute_path = parsed_url_path
+        elif _windows_absolute_path_pattern_slash.match(parsed_url_path) is not None:
+            windows_absolute_path = parsed_url_path[1:]
 
     if (
         parsed_url.scheme.upper() == "FILE"
         or len(parsed_url.scheme) == 0
-        or windows_absolute_path
+        or windows_absolute_path is not None
     ):
         if windows_absolute_path is None:
             if parsed_url.netloc.upper() == "LOCALHOST":
-                file_path = parsed_url.path
+                file_path = parsed_url_path
             else:
-                file_path = parsed_url.netloc + parsed_url.path
+                file_path = parsed_url.netloc + parsed_url_path
         else:
             file_path = windows_absolute_path
 

diff --git a/tests/test_0325-fix-windows-file-uris.py b/tests/test_0325-fix-windows-file-uris.py
@@ -0,0 +1,92 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE
+
+import numpy as np
+import pytest
+
+import uproot._util
+import uproot.reading
+
+
+@pytest.mark.skipif(
+    not uproot._util.win, reason="Drive letters only parsed on Windows."
+)
+def test_windows_drive_letters():
+    assert (
+        uproot._util.file_path_to_source_class(
+            "file:///g:/mydir/file.root", uproot.reading.open.defaults
+        )[1]
+        == "g:/mydir/file.root"
+    )
+
+    assert (
+        uproot._util.file_path_to_source_class(
+            "file:/g:/mydir/file.root", uproot.reading.open.defaults
+        )[1]
+        == "g:/mydir/file.root"
+    )
+
+    assert (
+        uproot._util.file_path_to_source_class(
+            "file:g:/mydir/file.root", uproot.reading.open.defaults
+        )[1]
+        == "g:/mydir/file.root"
+    )
+
+    assert (
+        uproot._util.file_path_to_source_class(
+            "/g:/mydir/file.root", uproot.reading.open.defaults
+        )[1]
+        == "g:/mydir/file.root"
+    )
+
+    assert (
+        uproot._util.file_path_to_source_class(
+            r"\g:/mydir/file.root", uproot.reading.open.defaults
+        )[1]
+        == "g:/mydir/file.root"
+    )
+
+    assert (
+        uproot._util.file_path_to_source_class(
+            r"g:\mydir\file.root", uproot.reading.open.defaults
+        )[1]
+        == r"g:\mydir\file.root"
+    )
+
+    assert (
+        uproot._util.file_path_to_source_class(
+            r"\g:\mydir\file.root", uproot.reading.open.defaults
+        )[1]
+        == r"g:\mydir\file.root"
+    )
+
+
+def test_escaped_uri_codes():
+    # If they're file:// paths, yes we should unquote the % signs.
+    assert (
+        uproot._util.file_path_to_source_class(
+            "file:///my%20file.root", uproot.reading.open.defaults
+        )[1]
+        == "/my file.root"
+    )
+    if not uproot._util.py2:
+        assert (
+            uproot._util.file_path_to_source_class(
+                "file:///my%E2%80%92file.root", uproot.reading.open.defaults
+            )[1]
+            == u"/my\u2012file.root"
+        )
+
+    # Otherwise, no we should not.
+    assert (
+        uproot._util.file_path_to_source_class(
+            "/my%20file.root", uproot.reading.open.defaults
+        )[1]
+        == "/my%20file.root"
+    )
+    assert (
+        uproot._util.file_path_to_source_class(
+            "/my%E2%80%92file.root", uproot.reading.open.defaults
+        )[1]
+        == "/my%E2%80%92file.root"
+    )