@@ -54,13 +54,30 @@ def _ignore_error(exception):
5454 getattr (exception , 'winerror' , None ) in _IGNORED_WINERRORS )
5555
5656
57+ @functools .cache
5758def _is_case_sensitive (flavour ):
5859 return flavour .normcase ('Aa' ) == 'Aa'
5960
6061#
6162# Globbing helpers
6263#
6364
65+
66+ # fnmatch.translate() returns a regular expression that includes a prefix and
67+ # a suffix, which enable matching newlines and ensure the end of the string is
68+ # matched, respectively. These features are undesirable for our implementation
69+ # of PurePatch.match(), which represents path separators as newlines and joins
70+ # pattern segments together. As a workaround, we define a slice object that
71+ # can remove the prefix and suffix from any translate() result. See the
72+ # _compile_pattern_lines() function for more details.
73+ _FNMATCH_PREFIX , _FNMATCH_SUFFIX = fnmatch .translate ('_' ).split ('_' )
74+ _FNMATCH_SLICE = slice (len (_FNMATCH_PREFIX ), - len (_FNMATCH_SUFFIX ))
75+ _SWAP_SEP_AND_NEWLINE = {
76+ '/' : str .maketrans ({'/' : '\n ' , '\n ' : '/' }),
77+ '\\ ' : str .maketrans ({'\\ ' : '\n ' , '\n ' : '\\ ' }),
78+ }
79+
80+
6481@functools .lru_cache ()
6582def _make_selector (pattern_parts , flavour , case_sensitive ):
6683 pat = pattern_parts [0 ]
@@ -92,6 +109,51 @@ def _compile_pattern(pat, case_sensitive):
92109 return re .compile (fnmatch .translate (pat ), flags ).match
93110
94111
112+ @functools .lru_cache ()
113+ def _compile_pattern_lines (pattern_lines , case_sensitive ):
114+ """Compile the given pattern lines to an `re.Pattern` object.
115+
116+ The *pattern_lines* argument is a glob-style pattern (e.g. '**/*.py') with
117+ its path separators and newlines swapped (e.g. '**\n *.py`). By using
118+ newlines to separate path components, and not setting `re.DOTALL`, we
119+ ensure that the `*` wildcard cannot match path separators.
120+
121+ The returned `re.Pattern` object may have its `match()` method called to
122+ match a complete pattern, or `search()` to match from the right. The
123+ argument supplied to these methods must also have its path separators and
124+ newlines swapped.
125+ """
126+
127+ # Match the start of the path, or just after a path separator
128+ parts = ['^' ]
129+ for part in pattern_lines .splitlines (keepends = True ):
130+ if part == '**\n ' :
131+ # '**/' component: we use '[\s\S]' rather than '.' so that path
132+ # separators (i.e. newlines) are matched. The trailing '^' ensures
133+ # we terminate after a path separator (i.e. on a new line).
134+ part = r'[\s\S]*^'
135+ elif part == '**' :
136+ # '**' component.
137+ part = r'[\s\S]*'
138+ elif '**' in part :
139+ raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
140+ else :
141+ # Any other component: pass to fnmatch.translate(). We slice off
142+ # the common prefix and suffix added by translate() to ensure that
143+ # re.DOTALL is not set, and the end of the string not matched,
144+ # respectively. With DOTALL not set, '*' wildcards will not match
145+ # path separators, because the '.' characters in the pattern will
146+ # not match newlines.
147+ part = fnmatch .translate (part )[_FNMATCH_SLICE ]
148+ parts .append (part )
149+ # Match the end of the path, always.
150+ parts .append (r'\Z' )
151+ flags = re .MULTILINE
152+ if not case_sensitive :
153+ flags |= re .IGNORECASE
154+ return re .compile ('' .join (parts ), flags = flags )
155+
156+
95157class _Selector :
96158 """A selector matches a specific glob pattern part against the children
97159 of a given path."""
@@ -276,6 +338,10 @@ class PurePath:
276338 # to implement comparison methods like `__lt__()`.
277339 '_parts_normcase_cached' ,
278340
341+ # The `_lines_cached` slot stores the string path with path separators
342+ # and newlines swapped. This is used to implement `match()`.
343+ '_lines_cached' ,
344+
279345 # The `_hash` slot stores the hash of the case-normalized string
280346 # path. It's set when `__hash__()` is called for the first time.
281347 '_hash' ,
@@ -441,6 +507,16 @@ def _parts_normcase(self):
441507 self ._parts_normcase_cached = self ._str_normcase .split (self ._flavour .sep )
442508 return self ._parts_normcase_cached
443509
510+ @property
511+ def _lines (self ):
512+ # Path with separators and newlines swapped, for pattern matching.
513+ try :
514+ return self ._lines_cached
515+ except AttributeError :
516+ trans = _SWAP_SEP_AND_NEWLINE [self ._flavour .sep ]
517+ self ._lines_cached = str (self ).translate (trans )
518+ return self ._lines_cached
519+
444520 def __eq__ (self , other ):
445521 if not isinstance (other , PurePath ):
446522 return NotImplemented
@@ -697,23 +773,18 @@ def match(self, path_pattern, *, case_sensitive=None):
697773 """
698774 Return True if this path matches the given pattern.
699775 """
776+ if not isinstance (path_pattern , PurePath ):
777+ path_pattern = self .with_segments (path_pattern )
700778 if case_sensitive is None :
701779 case_sensitive = _is_case_sensitive (self ._flavour )
702- pat = self .with_segments (path_pattern )
703- if not pat .parts :
780+ pattern = _compile_pattern_lines (path_pattern ._lines , case_sensitive )
781+ if path_pattern .drive or path_pattern .root :
782+ return pattern .match (self ._lines ) is not None
783+ elif path_pattern ._tail :
784+ return pattern .search (self ._lines ) is not None
785+ else :
704786 raise ValueError ("empty pattern" )
705- pat_parts = pat .parts
706- parts = self .parts
707- if pat .drive or pat .root :
708- if len (pat_parts ) != len (parts ):
709- return False
710- elif len (pat_parts ) > len (parts ):
711- return False
712- for part , pat in zip (reversed (parts ), reversed (pat_parts )):
713- match = _compile_pattern (pat , case_sensitive )
714- if not match (part ):
715- return False
716- return True
787+
717788
718789# Subclassing os.PathLike makes isinstance() checks slower,
719790# which in turn makes Path construction slower. Register instead!
0 commit comments