second commit
This commit is contained in:
463
env/lib/python3.11/site-packages/pathvalidate/_filename.py
vendored
Normal file
463
env/lib/python3.11/site-packages/pathvalidate/_filename.py
vendored
Normal file
@ -0,0 +1,463 @@
|
||||
"""
|
||||
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
|
||||
"""
|
||||
|
||||
import itertools
|
||||
import posixpath
|
||||
import re
|
||||
import warnings
|
||||
from pathlib import Path, PurePath
|
||||
from typing import Optional, Pattern, Sequence, Tuple
|
||||
|
||||
from ._base import AbstractSanitizer, AbstractValidator, BaseFile, BaseValidator
|
||||
from ._common import findall_to_str, is_nt_abspath, to_str, truncate_str, validate_pathtype
|
||||
from ._const import DEFAULT_MIN_LEN, INVALID_CHAR_ERR_MSG_TMPL, Platform
|
||||
from ._types import PathType, PlatformType
|
||||
from .error import ErrorAttrKey, ErrorReason, InvalidCharError, ValidationError
|
||||
from .handler import ReservedNameHandler, ValidationErrorHandler
|
||||
|
||||
|
||||
_DEFAULT_MAX_FILENAME_LEN = 255
|
||||
_RE_INVALID_FILENAME = re.compile(f"[{re.escape(BaseFile._INVALID_FILENAME_CHARS):s}]", re.UNICODE)
|
||||
_RE_INVALID_WIN_FILENAME = re.compile(
|
||||
f"[{re.escape(BaseFile._INVALID_WIN_FILENAME_CHARS):s}]", re.UNICODE
|
||||
)
|
||||
|
||||
|
||||
class FileNameSanitizer(AbstractSanitizer):
|
||||
def __init__(
|
||||
self,
|
||||
max_len: int = _DEFAULT_MAX_FILENAME_LEN,
|
||||
fs_encoding: Optional[str] = None,
|
||||
platform: Optional[PlatformType] = None,
|
||||
null_value_handler: Optional[ValidationErrorHandler] = None,
|
||||
reserved_name_handler: Optional[ValidationErrorHandler] = None,
|
||||
additional_reserved_names: Optional[Sequence[str]] = None,
|
||||
validate_after_sanitize: bool = False,
|
||||
validator: Optional[AbstractValidator] = None,
|
||||
) -> None:
|
||||
if validator:
|
||||
fname_validator = validator
|
||||
else:
|
||||
fname_validator = FileNameValidator(
|
||||
min_len=DEFAULT_MIN_LEN,
|
||||
max_len=max_len,
|
||||
fs_encoding=fs_encoding,
|
||||
check_reserved=True,
|
||||
additional_reserved_names=additional_reserved_names,
|
||||
platform=platform,
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
max_len=max_len,
|
||||
fs_encoding=fs_encoding,
|
||||
null_value_handler=null_value_handler,
|
||||
reserved_name_handler=reserved_name_handler,
|
||||
additional_reserved_names=additional_reserved_names,
|
||||
platform=platform,
|
||||
validate_after_sanitize=validate_after_sanitize,
|
||||
validator=fname_validator,
|
||||
)
|
||||
|
||||
self._sanitize_regexp = self._get_sanitize_regexp()
|
||||
|
||||
def sanitize(self, value: PathType, replacement_text: str = "") -> PathType:
|
||||
try:
|
||||
validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True))
|
||||
except ValidationError as e:
|
||||
if e.reason == ErrorReason.NULL_NAME:
|
||||
if isinstance(value, PurePath):
|
||||
raise
|
||||
|
||||
return self._null_value_handler(e) # type: ignore
|
||||
raise
|
||||
|
||||
sanitized_filename = self._sanitize_regexp.sub(replacement_text, str(value))
|
||||
sanitized_filename = truncate_str(sanitized_filename, self._fs_encoding, self.max_len)
|
||||
|
||||
try:
|
||||
self._validator.validate(sanitized_filename)
|
||||
except ValidationError as e:
|
||||
if e.reason == ErrorReason.RESERVED_NAME:
|
||||
replacement_word = self._reserved_name_handler(e)
|
||||
if e.reserved_name != replacement_word:
|
||||
sanitized_filename = re.sub(
|
||||
re.escape(e.reserved_name), replacement_word, sanitized_filename
|
||||
)
|
||||
elif e.reason == ErrorReason.INVALID_CHARACTER and self._is_windows(
|
||||
include_universal=True
|
||||
):
|
||||
# Do not start a file or directory name with a space
|
||||
sanitized_filename = sanitized_filename.lstrip(" ")
|
||||
|
||||
# Do not end a file or directory name with a space or a period
|
||||
sanitized_filename = sanitized_filename.rstrip(" ")
|
||||
if sanitized_filename not in (".", ".."):
|
||||
sanitized_filename = sanitized_filename.rstrip(" .")
|
||||
elif e.reason == ErrorReason.NULL_NAME:
|
||||
sanitized_filename = self._null_value_handler(e)
|
||||
|
||||
if self._validate_after_sanitize:
|
||||
try:
|
||||
self._validator.validate(sanitized_filename)
|
||||
except ValidationError as e:
|
||||
raise ValidationError(
|
||||
description=str(e),
|
||||
reason=ErrorReason.INVALID_AFTER_SANITIZE,
|
||||
platform=self.platform,
|
||||
)
|
||||
|
||||
if isinstance(value, PurePath):
|
||||
return Path(sanitized_filename) # type: ignore
|
||||
|
||||
return sanitized_filename # type: ignore
|
||||
|
||||
def _get_sanitize_regexp(self) -> Pattern[str]:
|
||||
if self._is_windows(include_universal=True):
|
||||
return _RE_INVALID_WIN_FILENAME
|
||||
|
||||
return _RE_INVALID_FILENAME
|
||||
|
||||
|
||||
class FileNameValidator(BaseValidator):
|
||||
_WINDOWS_RESERVED_FILE_NAMES = ("CON", "PRN", "AUX", "CLOCK$", "NUL") + tuple(
|
||||
f"{name:s}{num:d}" for name, num in itertools.product(("COM", "LPT"), range(1, 10))
|
||||
)
|
||||
_MACOS_RESERVED_FILE_NAMES = (":",)
|
||||
|
||||
@property
|
||||
def reserved_keywords(self) -> Tuple[str, ...]:
|
||||
common_keywords = super().reserved_keywords
|
||||
|
||||
if self._is_universal():
|
||||
word_set = set(
|
||||
common_keywords
|
||||
+ self._WINDOWS_RESERVED_FILE_NAMES
|
||||
+ self._MACOS_RESERVED_FILE_NAMES
|
||||
)
|
||||
elif self._is_windows():
|
||||
word_set = set(common_keywords + self._WINDOWS_RESERVED_FILE_NAMES)
|
||||
elif self._is_posix() or self._is_macos():
|
||||
word_set = set(common_keywords + self._MACOS_RESERVED_FILE_NAMES)
|
||||
else:
|
||||
word_set = set(common_keywords)
|
||||
|
||||
return tuple(sorted(word_set))
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
min_len: int = DEFAULT_MIN_LEN,
|
||||
max_len: int = _DEFAULT_MAX_FILENAME_LEN,
|
||||
fs_encoding: Optional[str] = None,
|
||||
platform: Optional[PlatformType] = None,
|
||||
check_reserved: bool = True,
|
||||
additional_reserved_names: Optional[Sequence[str]] = None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
min_len=min_len,
|
||||
max_len=max_len,
|
||||
fs_encoding=fs_encoding,
|
||||
check_reserved=check_reserved,
|
||||
additional_reserved_names=additional_reserved_names,
|
||||
platform=platform,
|
||||
)
|
||||
|
||||
def validate(self, value: PathType) -> None:
|
||||
validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True))
|
||||
|
||||
unicode_filename = to_str(value)
|
||||
byte_ct = len(unicode_filename.encode(self._fs_encoding))
|
||||
|
||||
self.validate_abspath(unicode_filename)
|
||||
|
||||
err_kwargs = {
|
||||
ErrorAttrKey.REASON: ErrorReason.INVALID_LENGTH,
|
||||
ErrorAttrKey.PLATFORM: self.platform,
|
||||
ErrorAttrKey.FS_ENCODING: self._fs_encoding,
|
||||
ErrorAttrKey.BYTE_COUNT: byte_ct,
|
||||
}
|
||||
if byte_ct > self.max_len:
|
||||
raise ValidationError(
|
||||
[
|
||||
f"filename is too long: expected<={self.max_len:d} bytes, actual={byte_ct:d} bytes"
|
||||
],
|
||||
**err_kwargs,
|
||||
)
|
||||
if byte_ct < self.min_len:
|
||||
raise ValidationError(
|
||||
[
|
||||
f"filename is too short: expected>={self.min_len:d} bytes, actual={byte_ct:d} bytes"
|
||||
],
|
||||
**err_kwargs,
|
||||
)
|
||||
|
||||
self._validate_reserved_keywords(unicode_filename)
|
||||
self.__validate_universal_filename(unicode_filename)
|
||||
|
||||
if self._is_windows(include_universal=True):
|
||||
self.__validate_win_filename(unicode_filename)
|
||||
|
||||
def validate_abspath(self, value: str) -> None:
|
||||
err = ValidationError(
|
||||
description=f"found an absolute path ({value}), expected a filename",
|
||||
platform=self.platform,
|
||||
reason=ErrorReason.FOUND_ABS_PATH,
|
||||
)
|
||||
|
||||
if self._is_windows(include_universal=True):
|
||||
if is_nt_abspath(value):
|
||||
raise err
|
||||
|
||||
if posixpath.isabs(value):
|
||||
raise err
|
||||
|
||||
def __validate_universal_filename(self, unicode_filename: str) -> None:
|
||||
match = _RE_INVALID_FILENAME.findall(unicode_filename)
|
||||
if match:
|
||||
raise InvalidCharError(
|
||||
INVALID_CHAR_ERR_MSG_TMPL.format(
|
||||
invalid=findall_to_str(match), value=repr(unicode_filename)
|
||||
),
|
||||
platform=Platform.UNIVERSAL,
|
||||
)
|
||||
|
||||
def __validate_win_filename(self, unicode_filename: str) -> None:
|
||||
match = _RE_INVALID_WIN_FILENAME.findall(unicode_filename)
|
||||
if match:
|
||||
raise InvalidCharError(
|
||||
INVALID_CHAR_ERR_MSG_TMPL.format(
|
||||
invalid=findall_to_str(match), value=repr(unicode_filename)
|
||||
),
|
||||
platform=Platform.WINDOWS,
|
||||
)
|
||||
|
||||
if unicode_filename in (".", ".."):
|
||||
return
|
||||
|
||||
KB2829981_err_tmpl = "{}. Refer: https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/file-folder-name-whitespace-characters" # noqa: E501
|
||||
|
||||
if unicode_filename[-1] in (" ", "."):
|
||||
raise InvalidCharError(
|
||||
INVALID_CHAR_ERR_MSG_TMPL.format(
|
||||
invalid=re.escape(unicode_filename[-1]), value=repr(unicode_filename)
|
||||
),
|
||||
platform=Platform.WINDOWS,
|
||||
description=KB2829981_err_tmpl.format(
|
||||
"Do not end a file or directory name with a space or a period"
|
||||
),
|
||||
)
|
||||
|
||||
if unicode_filename[0] in (" "):
|
||||
raise InvalidCharError(
|
||||
INVALID_CHAR_ERR_MSG_TMPL.format(
|
||||
invalid=re.escape(unicode_filename[0]), value=repr(unicode_filename)
|
||||
),
|
||||
platform=Platform.WINDOWS,
|
||||
description=KB2829981_err_tmpl.format(
|
||||
"Do not start a file or directory name with a space"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def validate_filename(
|
||||
filename: PathType,
|
||||
platform: Optional[PlatformType] = None,
|
||||
min_len: int = DEFAULT_MIN_LEN,
|
||||
max_len: int = _DEFAULT_MAX_FILENAME_LEN,
|
||||
fs_encoding: Optional[str] = None,
|
||||
check_reserved: bool = True,
|
||||
additional_reserved_names: Optional[Sequence[str]] = None,
|
||||
) -> None:
|
||||
"""Verifying whether the ``filename`` is a valid file name or not.
|
||||
|
||||
Args:
|
||||
filename:
|
||||
Filename to validate.
|
||||
platform:
|
||||
Target platform name of the filename.
|
||||
|
||||
.. include:: platform.txt
|
||||
min_len:
|
||||
Minimum byte length of the ``filename``. The value must be greater or equal to one.
|
||||
Defaults to ``1``.
|
||||
max_len:
|
||||
Maximum byte length of the ``filename``. The value must be lower than:
|
||||
|
||||
- ``Linux``: 4096
|
||||
- ``macOS``: 1024
|
||||
- ``Windows``: 260
|
||||
- ``universal``: 260
|
||||
|
||||
Defaults to ``255``.
|
||||
fs_encoding:
|
||||
Filesystem encoding that used to calculate the byte length of the filename.
|
||||
If |None|, get the value from the execution environment.
|
||||
check_reserved:
|
||||
If |True|, check the reserved names of the ``platform``.
|
||||
additional_reserved_names:
|
||||
Additional reserved names to check.
|
||||
Case insensitive.
|
||||
|
||||
Raises:
|
||||
ValidationError (ErrorReason.INVALID_LENGTH):
|
||||
If the ``filename`` is longer than ``max_len`` characters.
|
||||
ValidationError (ErrorReason.INVALID_CHARACTER):
|
||||
If the ``filename`` includes invalid character(s) for a filename:
|
||||
|invalid_filename_chars|.
|
||||
The following characters are also invalid for Windows platforms:
|
||||
|invalid_win_filename_chars|.
|
||||
ValidationError (ErrorReason.RESERVED_NAME):
|
||||
If the ``filename`` equals the reserved name by OS.
|
||||
Windows reserved name is as follows:
|
||||
``"CON"``, ``"PRN"``, ``"AUX"``, ``"NUL"``, ``"COM[1-9]"``, ``"LPT[1-9]"``.
|
||||
|
||||
Example:
|
||||
:ref:`example-validate-filename`
|
||||
|
||||
See Also:
|
||||
`Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs
|
||||
<https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file>`__
|
||||
"""
|
||||
|
||||
FileNameValidator(
|
||||
platform=platform,
|
||||
min_len=min_len,
|
||||
max_len=max_len,
|
||||
fs_encoding=fs_encoding,
|
||||
check_reserved=check_reserved,
|
||||
additional_reserved_names=additional_reserved_names,
|
||||
).validate(filename)
|
||||
|
||||
|
||||
def is_valid_filename(
|
||||
filename: PathType,
|
||||
platform: Optional[PlatformType] = None,
|
||||
min_len: int = DEFAULT_MIN_LEN,
|
||||
max_len: Optional[int] = None,
|
||||
fs_encoding: Optional[str] = None,
|
||||
check_reserved: bool = True,
|
||||
additional_reserved_names: Optional[Sequence[str]] = None,
|
||||
) -> bool:
|
||||
"""Check whether the ``filename`` is a valid name or not.
|
||||
|
||||
Args:
|
||||
filename:
|
||||
A filename to be checked.
|
||||
platform:
|
||||
Target platform name of the filename.
|
||||
|
||||
Example:
|
||||
:ref:`example-is-valid-filename`
|
||||
|
||||
See Also:
|
||||
:py:func:`.validate_filename()`
|
||||
"""
|
||||
|
||||
return FileNameValidator(
|
||||
platform=platform,
|
||||
min_len=min_len,
|
||||
max_len=-1 if max_len is None else max_len,
|
||||
fs_encoding=fs_encoding,
|
||||
check_reserved=check_reserved,
|
||||
additional_reserved_names=additional_reserved_names,
|
||||
).is_valid(filename)
|
||||
|
||||
|
||||
def sanitize_filename(
|
||||
filename: PathType,
|
||||
replacement_text: str = "",
|
||||
platform: Optional[PlatformType] = None,
|
||||
max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN,
|
||||
fs_encoding: Optional[str] = None,
|
||||
check_reserved: Optional[bool] = None,
|
||||
null_value_handler: Optional[ValidationErrorHandler] = None,
|
||||
reserved_name_handler: Optional[ValidationErrorHandler] = None,
|
||||
additional_reserved_names: Optional[Sequence[str]] = None,
|
||||
validate_after_sanitize: bool = False,
|
||||
) -> PathType:
|
||||
"""Make a valid filename from a string.
|
||||
|
||||
To make a valid filename, the function does the following:
|
||||
|
||||
- Replace invalid characters as file names included in the ``filename``
|
||||
with the ``replacement_text``. Invalid characters are:
|
||||
|
||||
- unprintable characters
|
||||
- |invalid_filename_chars|
|
||||
- for Windows (or universal) only: |invalid_win_filename_chars|
|
||||
|
||||
- Replace a value if a sanitized value is a reserved name by operating systems
|
||||
with a specified handler by ``reserved_name_handler``.
|
||||
|
||||
Args:
|
||||
filename: Filename to sanitize.
|
||||
replacement_text:
|
||||
Replacement text for invalid characters. Defaults to ``""``.
|
||||
platform:
|
||||
Target platform name of the filename.
|
||||
|
||||
.. include:: platform.txt
|
||||
max_len:
|
||||
Maximum byte length of the ``filename``.
|
||||
Truncate the name length if the ``filename`` length exceeds this value.
|
||||
Defaults to ``255``.
|
||||
fs_encoding:
|
||||
Filesystem encoding that used to calculate the byte length of the filename.
|
||||
If |None|, get the value from the execution environment.
|
||||
check_reserved:
|
||||
[Deprecated] Use 'reserved_name_handler' instead.
|
||||
null_value_handler:
|
||||
Function called when a value after sanitization is an empty string.
|
||||
You can specify predefined handlers:
|
||||
|
||||
- :py:func:`~.handler.NullValueHandler.return_null_string`
|
||||
- :py:func:`~.handler.NullValueHandler.return_timestamp`
|
||||
- :py:func:`~.handler.raise_error`
|
||||
|
||||
Defaults to :py:func:`.handler.NullValueHandler.return_null_string` that just return ``""``.
|
||||
reserved_name_handler:
|
||||
Function called when a value after sanitization is a reserved name.
|
||||
You can specify predefined handlers:
|
||||
|
||||
- :py:meth:`~.handler.ReservedNameHandler.add_leading_underscore`
|
||||
- :py:meth:`~.handler.ReservedNameHandler.add_trailing_underscore`
|
||||
- :py:meth:`~.handler.ReservedNameHandler.as_is`
|
||||
- :py:func:`~.handler.raise_error`
|
||||
|
||||
Defaults to :py:func:`.handler.add_trailing_underscore`.
|
||||
additional_reserved_names:
|
||||
Additional reserved names to sanitize.
|
||||
Case insensitive.
|
||||
validate_after_sanitize:
|
||||
Execute validation after sanitization to the file name.
|
||||
|
||||
Returns:
|
||||
Same type as the ``filename`` (str or PathLike object):
|
||||
Sanitized filename.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
If the ``filename`` is an invalid filename.
|
||||
|
||||
Example:
|
||||
:ref:`example-sanitize-filename`
|
||||
"""
|
||||
|
||||
if check_reserved is not None:
|
||||
warnings.warn(
|
||||
"'check_reserved' is deprecated. Use 'reserved_name_handler' instead.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
|
||||
if check_reserved is False:
|
||||
reserved_name_handler = ReservedNameHandler.as_is
|
||||
|
||||
return FileNameSanitizer(
|
||||
platform=platform,
|
||||
max_len=-1 if max_len is None else max_len,
|
||||
fs_encoding=fs_encoding,
|
||||
null_value_handler=null_value_handler,
|
||||
reserved_name_handler=reserved_name_handler,
|
||||
additional_reserved_names=additional_reserved_names,
|
||||
validate_after_sanitize=validate_after_sanitize,
|
||||
).sanitize(filename, replacement_text)
|
Reference in New Issue
Block a user