2026-02-01 09:31:38 +01:00

848 lines
24 KiB
Python

"""
This module provides utility methods for dealing with path-specs.
"""
import os
import os.path
import pathlib
import posixpath
import stat
from collections.abc import (
Collection,
Iterable,
Iterator,
Sequence)
from dataclasses import (
dataclass)
from typing import (
Any,
Callable, # Replaced by `collections.abc.Callable` in 3.9.2.
Generic,
Optional, # Replaced by `X | None` in 3.10.
TypeVar,
Union) # Replaced by `X | Y` in 3.10.
from .pattern import (
Pattern)
from ._typing import (
AnyStr, # Removed in 3.18.
deprecated) # Added in 3.13.
StrPath = Union[str, os.PathLike[str]]
TStrPath = TypeVar("TStrPath", bound=StrPath)
"""
Type variable for :class:`str` or :class:`os.PathLike`.
"""
NORMALIZE_PATH_SEPS = [
__sep
for __sep in [os.sep, os.altsep]
if __sep and __sep != posixpath.sep
]
"""
*NORMALIZE_PATH_SEPS* (:class:`list` of :class:`str`) contains the path
separators that need to be normalized to the POSIX separator for the current
operating system. The separators are determined by examining :data:`os.sep` and
:data:`os.altsep`.
"""
_registered_patterns = {}
"""
*_registered_patterns* (:class:`dict`) maps a name (:class:`str`) to the
registered pattern factory (:class:`~collections.abc.Callable`).
"""
def append_dir_sep(path: pathlib.Path) -> str:
"""
Appends the path separator to the path if the path is a directory. This can be
used to aid in distinguishing between directories and files on the file-system
by relying on the presence of a trailing path separator.
*path* (:class:`pathlib.Path`) is the path to use.
Returns the path (:class:`str`).
"""
str_path = str(path)
if path.is_dir():
str_path += os.sep
return str_path
def check_match_file(
patterns: Iterable[tuple[int, Pattern]],
file: str,
is_reversed: Optional[bool] = None,
) -> tuple[Optional[bool], Optional[int]]:
"""
Check the file against the patterns.
*patterns* (:class:`~collections.abc.Iterable`) yields each indexed pattern
(:class:`tuple`) which contains the pattern index (:class:`int`) and actua
pattern (:class:`.Pattern`).
*file* (:class:`str`) is the normalized file path to be matched against
*patterns*.
*is_reversed* (:class:`bool` or :data:`None`) is whether the order of the
patterns has been reversed. Default is :data:`None` for :data:`False`.
Reversing the order of the patterns is an optimization.
Returns a :class:`tuple` containing whether to include *file* (:class:`bool`
or :data:`None`), and the index of the last matched pattern (:class:`int` or
:data:`None`).
"""
if is_reversed:
# Check patterns in reverse order. The first pattern that matches takes
# precedence.
for index, pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
return pattern.include, index
return None, None
else:
# Check all patterns. The last pattern that matches takes precedence.
out_include: Optional[bool] = None
out_index: Optional[int] = None
for index, pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
out_include = pattern.include
out_index = index
return out_include, out_index
def detailed_match_files(
patterns: Iterable[Pattern],
files: Iterable[str],
all_matches: Optional[bool] = None,
) -> dict[str, 'MatchDetail']:
"""
Matches the files to the patterns, and returns which patterns matched the
files.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*files* (:class:`~collections.abc.Iterable` of :class:`str`) contains the
normalized file paths to be matched against *patterns*.
*all_matches* (:class:`bool` or :data:`None`) is whether to return all matches
patterns (:data:`True`), or only the last matched pattern (:data:`False`).
Default is :data:`None` for :data:`False`.
Returns the matched files (:class:`dict`) which maps each matched file
(:class:`str`) to the patterns that matched in order (:class:`.MatchDetail`).
"""
all_files = files if isinstance(files, Collection) else list(files)
return_files = {}
for pattern in patterns:
if pattern.include is not None:
result_files = pattern.match(all_files) # TODO: Replace with `.match_file()`.
if pattern.include:
# Add files and record pattern.
for result_file in result_files:
if result_file in return_files:
if all_matches:
return_files[result_file].patterns.append(pattern)
else:
return_files[result_file].patterns[0] = pattern
else:
return_files[result_file] = MatchDetail([pattern])
else:
# Remove files.
for file in result_files:
del return_files[file]
return return_files
def _filter_check_patterns(
patterns: Iterable[Pattern],
) -> list[tuple[int, Pattern]]:
"""
Filters out null-patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns.
Returns a :class:`list` containing each indexed pattern (:class:`tuple`) which
contains the pattern index (:class:`int`) and the actual pattern
(:class:`.Pattern`).
"""
return [
(__index, __pat)
for __index, __pat in enumerate(patterns)
if __pat.include is not None
]
def _is_iterable(value: Any) -> bool:
"""
Check whether the value is an iterable (excludes strings).
*value* is the value to check,
Returns whether *value* is an iterable (:class:`bool`).
"""
return isinstance(value, Iterable) and not isinstance(value, (str, bytes))
@deprecated((
"pathspec.util.iter_tree() is deprecated. Use iter_tree_files() instead."
))
def iter_tree(root, on_error=None, follow_links=None):
"""
.. version-deprecated:: 0.10.0
This is an alias for the :func:`.iter_tree_files` function.
"""
return iter_tree_files(root, on_error=on_error, follow_links=follow_links)
def iter_tree_entries(
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
) -> Iterator['TreeEntry']:
"""
Walks the specified directory for all files and directories.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to search.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
Raises :exc:`.RecursionError` if recursion is detected.
Returns an :class:`~collections.abc.Iterator` yielding each file or directory
entry (:class:`.TreeEntry`) relative to *root*.
"""
if on_error is not None and not callable(on_error):
raise TypeError(f"on_error:{on_error!r} is not callable.")
if follow_links is None:
follow_links = True
yield from _iter_tree_entries_next(os.path.abspath(root), '', {}, on_error, follow_links)
def _iter_tree_entries_next(
root_full: str,
dir_rel: str,
memo: dict[str, str],
on_error: Callable[[OSError], None],
follow_links: bool,
) -> Iterator['TreeEntry']:
"""
Scan the directory for all descendant files.
*root_full* (:class:`str`) the absolute path to the root directory.
*dir_rel* (:class:`str`) the path to the directory to scan relative to
*root_full*.
*memo* (:class:`dict`) keeps track of ancestor directories encountered. Maps
each ancestor real path (:class:`str`) to relative path (:class:`str`).
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions.
*follow_links* (:class:`bool`) is whether to walk symbolic links that resolve
to directories.
Yields each entry (:class:`.TreeEntry`).
"""
dir_full = os.path.join(root_full, dir_rel)
dir_real = os.path.realpath(dir_full)
# Remember each encountered ancestor directory and its canonical (real) path.
# If a canonical path is encountered more than once, recursion has occurred.
if dir_real not in memo:
memo[dir_real] = dir_rel
else:
raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel)
with os.scandir(dir_full) as scan_iter:
node_ent: os.DirEntry
for node_ent in scan_iter:
node_rel = os.path.join(dir_rel, node_ent.name)
# Inspect child node.
try:
node_lstat = node_ent.stat(follow_symlinks=False)
except OSError as e:
if on_error is not None:
on_error(e)
continue
if node_ent.is_symlink():
# Child node is a link, inspect the target node.
try:
node_stat = node_ent.stat()
except OSError as e:
if on_error is not None:
on_error(e)
continue
else:
node_stat = node_lstat
if node_ent.is_dir(follow_symlinks=follow_links):
# Child node is a directory, recurse into it and yield its descendant
# files.
yield TreeEntry(node_ent.name, node_rel, node_lstat, node_stat)
yield from _iter_tree_entries_next(root_full, node_rel, memo, on_error, follow_links)
elif node_ent.is_file() or node_ent.is_symlink():
# Child node is either a file or an unfollowed link, yield it.
yield TreeEntry(node_ent.name, node_rel, node_lstat, node_stat)
# NOTE: Make sure to remove the canonical (real) path of the directory from
# the ancestors memo once we are done with it. This allows the same directory
# to appear multiple times. If this is not done, the second occurrence of the
# directory will be incorrectly interpreted as a recursion. See
# <https://github.com/cpburnz/python-path-specification/pull/7>.
del memo[dir_real]
def iter_tree_files(
root: StrPath,
on_error: Optional[Callable[[OSError], None]] = None,
follow_links: Optional[bool] = None,
) -> Iterator[str]:
"""
Walks the specified directory for all files.
*root* (:class:`str` or :class:`os.PathLike`) is the root directory to search
for files.
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions. It will be called with the
exception (:exc:`OSError`). Reraise the exception to abort the walk. Default
is :data:`None` to ignore file-system exceptions.
*follow_links* (:class:`bool` or :data:`None`) optionally is whether to walk
symbolic links that resolve to directories. Default is :data:`None` for
:data:`True`.
Raises :exc:`.RecursionError` if recursion is detected.
Returns an :class:`~collections.abc.Iterator` yielding the path to each file
(:class:`str`) relative to *root*.
"""
if on_error is not None and not callable(on_error):
raise TypeError(f"on_error:{on_error!r} is not callable.")
if follow_links is None:
follow_links = True
yield from _iter_tree_files_next(os.path.abspath(root), '', {}, on_error, follow_links)
def _iter_tree_files_next(
root_full: str,
dir_rel: str,
memo: dict[str, str],
on_error: Callable[[OSError], None],
follow_links: bool,
) -> Iterator[str]:
"""
Scan the directory for all descendant files.
*root_full* (:class:`str`) the absolute path to the root directory.
*dir_rel* (:class:`str`) the path to the directory to scan relative to
*root_full*.
*memo* (:class:`dict`) keeps track of ancestor directories encountered. Maps
each ancestor real path (:class:`str`) to relative path (:class:`str`).
*on_error* (:class:`~collections.abc.Callable` or :data:`None`) optionally is
the error handler for file-system exceptions.
*follow_links* (:class:`bool`) is whether to walk symbolic links that resolve
to directories.
Yields each file path (:class:`str`).
"""
dir_full = os.path.join(root_full, dir_rel)
dir_real = os.path.realpath(dir_full)
# Remember each encountered ancestor directory and its canonical (real) path.
# If a canonical path is encountered more than once, recursion has occurred.
if dir_real not in memo:
memo[dir_real] = dir_rel
else:
raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel)
with os.scandir(dir_full) as scan_iter:
node_ent: os.DirEntry
for node_ent in scan_iter:
node_rel = os.path.join(dir_rel, node_ent.name)
if node_ent.is_dir(follow_symlinks=follow_links):
# Child node is a directory, recurse into it and yield its descendant
# files.
yield from _iter_tree_files_next(root_full, node_rel, memo, on_error, follow_links)
elif node_ent.is_file():
# Child node is a file, yield it.
yield node_rel
elif not follow_links and node_ent.is_symlink():
# Child node is an unfollowed link, yield it.
yield node_rel
# NOTE: Make sure to remove the canonical (real) path of the directory from
# the ancestors memo once we are done with it. This allows the same directory
# to appear multiple times. If this is not done, the second occurrence of the
# directory will be incorrectly interpreted as a recursion. See
# <https://github.com/cpburnz/python-path-specification/pull/7>.
del memo[dir_real]
def lookup_pattern(name: str) -> Callable[[AnyStr], Pattern]:
"""
Lookups a registered pattern factory by name.
*name* (:class:`str`) is the name of the pattern factory.
Returns the registered pattern factory (:class:`~collections.abc.Callable`).
If no pattern factory is registered, raises :exc:`KeyError`.
"""
return _registered_patterns[name]
def match_file(patterns: Iterable[Pattern], file: str) -> bool:
"""
Matches the file to the patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*file* (:class:`str`) is the normalized file path to be matched against
*patterns*.
Returns :data:`True` if *file* matched; otherwise, :data:`False`.
"""
matched = False
for pattern in patterns:
if pattern.include is not None and pattern.match_file(file) is not None:
matched = pattern.include
return matched
@deprecated((
"pathspec.util.match_files() is deprecated. Use match_file() with a loop for "
"better results."
))
def match_files(
patterns: Iterable[Pattern],
files: Iterable[str],
) -> set[str]:
"""
.. version-deprecated:: 0.10.0
This function is no longer used. Use the :func:`.match_file` function with a
loop for better results.
Matches the files to the patterns.
*patterns* (:class:`~collections.abc.Iterable` of :class:`.Pattern`) contains
the patterns to use.
*files* (:class:`~collections.abc.Iterable` of :class:`str`) contains the
normalized file paths to be matched against *patterns*.
Returns the matched files (:class:`set` of :class:`str`).
"""
use_patterns = [__pat for __pat in patterns if __pat.include is not None]
return_files = set()
for file in files:
if match_file(use_patterns, file):
return_files.add(file)
return return_files
def normalize_file(
file: StrPath,
separators: Optional[Collection[str]] = None,
) -> str:
"""
Normalizes the file path to use the POSIX path separator (i.e., ``"/"``), and
make the paths relative (remove leading ``"/"``).
*file* (:class:`str` or :class:`os.PathLike`) is the file path.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. This does
not need to include the POSIX path separator (``"/"``), but including it will
not affect the results. Default is ``None`` for :data:`.NORMALIZE_PATH_SEPS`.
To prevent normalization, pass an empty container (e.g., an empty tuple
``()``).
Returns the normalized file path (:class:`str`).
"""
# Normalize path separators.
if separators is None:
separators = NORMALIZE_PATH_SEPS
# Convert path object to string.
norm_file: str = os.fspath(file)
for sep in separators:
norm_file = norm_file.replace(sep, posixpath.sep)
if norm_file.startswith('/'):
# Make path relative.
norm_file = norm_file[1:]
elif norm_file.startswith('./'):
# Remove current directory prefix.
norm_file = norm_file[2:]
return norm_file
@deprecated((
"pathspec.util.normalize_files() is deprecated. Use normalize_file() with a "
"loop for better results."
))
def normalize_files(
files: Iterable[StrPath],
separators: Optional[Collection[str]] = None,
) -> dict[str, list[StrPath]]:
"""
.. version-deprecated:: 0.10.0
This function is no longer used. Use the :func:`.normalize_file` function
with a loop for better results.
Normalizes the file paths to use the POSIX path separator.
*files* (:class:`~collections.abc.Iterable` of :class:`str` or
:class:`os.PathLike`) contains the file paths to be normalized.
*separators* (:class:`~collections.abc.Collection` of :class:`str`; or
:data:`None`) optionally contains the path separators to normalize. See
:func:`.normalize_file` for more information.
Returns a :class:`dict` mapping each normalized file path (:class:`str`) to
the original file paths (:class:`list` of :class:`str` or
:class:`os.PathLike`).
"""
norm_files = {}
for path in files:
norm_file = normalize_file(path, separators=separators)
if norm_file in norm_files:
norm_files[norm_file].append(path)
else:
norm_files[norm_file] = [path]
return norm_files
def register_pattern(
name: str,
pattern_factory: Callable[[AnyStr], Pattern],
override: Optional[bool] = None,
) -> None:
"""
Registers the specified pattern factory.
*name* (:class:`str`) is the name to register the pattern factory under.
*pattern_factory* (:class:`~collections.abc.Callable`) is used to compile
patterns. It must accept an uncompiled pattern (:class:`str`) and return the
compiled pattern (:class:`.Pattern`).
*override* (:class:`bool` or :data:`None`) optionally is whether to allow
overriding an already registered pattern under the same name (:data:`True`),
instead of raising an :exc:`.AlreadyRegisteredError` (:data:`False`). Default
is :data:`None` for :data:`False`.
"""
if not isinstance(name, str):
raise TypeError(f"name:{name!r} is not a string.")
if not callable(pattern_factory):
raise TypeError(f"pattern_factory:{pattern_factory!r} is not callable.")
if name in _registered_patterns and not override:
raise AlreadyRegisteredError(name, _registered_patterns[name])
_registered_patterns[name] = pattern_factory
class AlreadyRegisteredError(Exception):
"""
The :exc:`AlreadyRegisteredError` exception is raised when a pattern factory
is registered under a name already in use.
"""
def __init__(
self,
name: str,
pattern_factory: Callable[[AnyStr], Pattern],
) -> None:
"""
Initializes the :exc:`AlreadyRegisteredError` instance.
*name* (:class:`str`) is the name of the registered pattern.
*pattern_factory* (:class:`~collections.abc.Callable`) is the registered
pattern factory.
"""
super().__init__(name, pattern_factory)
@property
def message(self) -> str:
"""
*message* (:class:`str`) is the error message.
"""
return (
f"{self.name!r} is already registered for pattern factory="
f"{self.pattern_factory!r}."
)
@property
def name(self) -> str:
"""
*name* (:class:`str`) is the name of the registered pattern.
"""
return self.args[0]
@property
def pattern_factory(self) -> Callable[[AnyStr], Pattern]:
"""
*pattern_factory* (:class:`~collections.abc.Callable`) is the registered
pattern factory.
"""
return self.args[1]
class RecursionError(Exception):
"""
The :exc:`RecursionError` exception is raised when recursion is detected.
"""
def __init__(
self,
real_path: str,
first_path: str,
second_path: str,
) -> None:
"""
Initializes the :exc:`RecursionError` instance.
*real_path* (:class:`str`) is the real path that recursion was encountered
on.
*first_path* (:class:`str`) is the first path encountered for *real_path*.
*second_path* (:class:`str`) is the second path encountered for *real_path*.
"""
super().__init__(real_path, first_path, second_path)
@property
def first_path(self) -> str:
"""
*first_path* (:class:`str`) is the first path encountered for
:attr:`self.real_path <RecursionError.real_path>`.
"""
return self.args[1]
@property
def message(self) -> str:
"""
*message* (:class:`str`) is the error message.
"""
return (
f"Real path {self.real_path!r} was encountered at {self.first_path!r} "
f"and then {self.second_path!r}."
)
@property
def real_path(self) -> str:
"""
*real_path* (:class:`str`) is the real path that recursion was
encountered on.
"""
return self.args[0]
@property
def second_path(self) -> str:
"""
*second_path* (:class:`str`) is the second path encountered for
:attr:`self.real_path <RecursionError.real_path>`.
"""
return self.args[2]
@dataclass(frozen=True)
class CheckResult(Generic[TStrPath]):
"""
The :class:`CheckResult` class contains information about the file and which
pattern matched it.
"""
# Make the class dict-less.
__slots__ = (
'file',
'include',
'index',
)
file: TStrPath
"""
*file* (:class:`str` or :class:`os.PathLike`) is the file path.
"""
include: Optional[bool]
"""
*include* (:class:`bool` or :data:`None`) is whether to include or exclude the
file. If :data:`None`, no pattern matched.
"""
index: Optional[int]
"""
*index* (:class:`int` or :data:`None`) is the index of the last pattern that
matched. If :data:`None`, no pattern matched.
"""
class MatchDetail(object):
"""
The :class:`.MatchDetail` class contains information about
"""
# Make the class dict-less.
__slots__ = ('patterns',)
def __init__(self, patterns: Sequence[Pattern]) -> None:
"""
Initialize the :class:`.MatchDetail` instance.
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the patterns that matched the file in the order they were encountered.
"""
self.patterns = patterns
"""
*patterns* (:class:`~collections.abc.Sequence` of :class:`.Pattern`)
contains the patterns that matched the file in the order they were
encountered.
"""
class TreeEntry(object):
"""
The :class:`TreeEntry` class contains information about a file-system entry.
"""
# Make the class dict-less.
__slots__ = ('_lstat', 'name', 'path', '_stat')
def __init__(
self,
name: str,
path: str,
lstat: os.stat_result,
stat: os.stat_result,
) -> None:
"""
Initialize the :class:`TreeEntry` instance.
*name* (:class:`str`) is the base name of the entry.
*path* (:class:`str`) is the relative path of the entry.
*lstat* (:class:`os.stat_result`) is the stat result of the direct entry.
*stat* (:class:`os.stat_result`) is the stat result of the entry,
potentially linked.
"""
self._lstat: os.stat_result = lstat
"""
*_lstat* (:class:`os.stat_result`) is the stat result of the direct entry.
"""
self.name: str = name
"""
*name* (:class:`str`) is the base name of the entry.
"""
self.path: str = path
"""
*path* (:class:`str`) is the path of the entry.
"""
self._stat: os.stat_result = stat
"""
*_stat* (:class:`os.stat_result`) is the stat result of the linked entry.
"""
def is_dir(self, follow_links: Optional[bool] = None) -> bool:
"""
Get whether the entry is a directory.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, a symlink to a directory will result in
:data:`True`. Default is :data:`None` for :data:`True`.
Returns whether the entry is a directory (:class:`bool`).
"""
if follow_links is None:
follow_links = True
node_stat = self._stat if follow_links else self._lstat
return stat.S_ISDIR(node_stat.st_mode)
def is_file(self, follow_links: Optional[bool] = None) -> bool:
"""
Get whether the entry is a regular file.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, a symlink to a regular file will result in
:data:`True`. Default is :data:`None` for :data:`True`.
Returns whether the entry is a regular file (:class:`bool`).
"""
if follow_links is None:
follow_links = True
node_stat = self._stat if follow_links else self._lstat
return stat.S_ISREG(node_stat.st_mode)
def is_symlink(self) -> bool:
"""
Returns whether the entry is a symbolic link (:class:`bool`).
"""
return stat.S_ISLNK(self._lstat.st_mode)
def stat(self, follow_links: Optional[bool] = None) -> os.stat_result:
"""
Get the cached stat result for the entry.
*follow_links* (:class:`bool` or :data:`None`) is whether to follow symbolic
links. If this is :data:`True`, the stat result of the linked file will be
returned. Default is :data:`None` for :data:`True`.
Returns that stat result (:class:`os.stat_result`).
"""
if follow_links is None:
follow_links = True
return self._stat if follow_links else self._lstat