week06
This commit is contained in:
8
env/bin/gitlab
vendored
Executable file
8
env/bin/gitlab
vendored
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/home/dongho/netsec/env/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from gitlab.cli import main
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||||
|
sys.exit(main())
|
8
env/bin/normalizer
vendored
Executable file
8
env/bin/normalizer
vendored
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/home/dongho/netsec/env/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from charset_normalizer.cli import cli_detect
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||||
|
sys.exit(cli_detect())
|
1
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/INSTALLER
vendored
Normal file
1
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/INSTALLER
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
pip
|
20
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/LICENSE
vendored
Normal file
20
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/LICENSE
vendored
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
Copyright (c) 2017-2021 Ingy döt Net
|
||||||
|
Copyright (c) 2006-2016 Kirill Simonov
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||||
|
of the Software, and to permit persons to whom the Software is furnished to do
|
||||||
|
so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
46
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/METADATA
vendored
Normal file
46
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/METADATA
vendored
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: PyYAML
|
||||||
|
Version: 6.0.2
|
||||||
|
Summary: YAML parser and emitter for Python
|
||||||
|
Home-page: https://pyyaml.org/
|
||||||
|
Download-URL: https://pypi.org/project/PyYAML/
|
||||||
|
Author: Kirill Simonov
|
||||||
|
Author-email: xi@resolvent.net
|
||||||
|
License: MIT
|
||||||
|
Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues
|
||||||
|
Project-URL: CI, https://github.com/yaml/pyyaml/actions
|
||||||
|
Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation
|
||||||
|
Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core
|
||||||
|
Project-URL: Source Code, https://github.com/yaml/pyyaml
|
||||||
|
Platform: Any
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Cython
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3.8
|
||||||
|
Classifier: Programming Language :: Python :: 3.9
|
||||||
|
Classifier: Programming Language :: Python :: 3.10
|
||||||
|
Classifier: Programming Language :: Python :: 3.11
|
||||||
|
Classifier: Programming Language :: Python :: 3.12
|
||||||
|
Classifier: Programming Language :: Python :: 3.13
|
||||||
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||||
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||||
|
Classifier: Topic :: Text Processing :: Markup
|
||||||
|
Requires-Python: >=3.8
|
||||||
|
License-File: LICENSE
|
||||||
|
|
||||||
|
YAML is a data serialization format designed for human readability
|
||||||
|
and interaction with scripting languages. PyYAML is a YAML parser
|
||||||
|
and emitter for Python.
|
||||||
|
|
||||||
|
PyYAML features a complete YAML 1.1 parser, Unicode support, pickle
|
||||||
|
support, capable extension API, and sensible error messages. PyYAML
|
||||||
|
supports standard YAML tags and provides Python-specific tags that
|
||||||
|
allow to represent an arbitrary Python object.
|
||||||
|
|
||||||
|
PyYAML is applicable for a broad range of tasks from complex
|
||||||
|
configuration files to object serialization and persistence.
|
44
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/RECORD
vendored
Normal file
44
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/RECORD
vendored
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101
|
||||||
|
PyYAML-6.0.2.dist-info/METADATA,sha256=9-odFB5seu4pGPcEv7E8iyxNF51_uKnaNGjLAhz2lto,2060
|
||||||
|
PyYAML-6.0.2.dist-info/RECORD,,
|
||||||
|
PyYAML-6.0.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
PyYAML-6.0.2.dist-info/WHEEL,sha256=YM7r_UgTB_CA6ZLGHfbOA_dd7lb6fUn0DsfI9DvIHHE,154
|
||||||
|
PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11
|
||||||
|
_yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402
|
||||||
|
_yaml/__pycache__/__init__.cpython-312.pyc,,
|
||||||
|
yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311
|
||||||
|
yaml/__pycache__/__init__.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/composer.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/constructor.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/cyaml.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/dumper.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/emitter.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/error.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/events.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/loader.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/nodes.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/parser.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/reader.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/representer.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/resolver.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/scanner.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/serializer.cpython-312.pyc,,
|
||||||
|
yaml/__pycache__/tokens.cpython-312.pyc,,
|
||||||
|
yaml/_yaml.cpython-312-aarch64-linux-gnu.so,sha256=kYQNF-yCT1TQJkdO87ihsv1jctF0lAaJ2wYRWZXqWRI,2456968
|
||||||
|
yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883
|
||||||
|
yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639
|
||||||
|
yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851
|
||||||
|
yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837
|
||||||
|
yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006
|
||||||
|
yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533
|
||||||
|
yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445
|
||||||
|
yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061
|
||||||
|
yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440
|
||||||
|
yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495
|
||||||
|
yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794
|
||||||
|
yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190
|
||||||
|
yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004
|
||||||
|
yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279
|
||||||
|
yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165
|
||||||
|
yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573
|
0
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/REQUESTED
vendored
Normal file
0
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/REQUESTED
vendored
Normal file
6
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/WHEEL
vendored
Normal file
6
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/WHEEL
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.44.0)
|
||||||
|
Root-Is-Purelib: false
|
||||||
|
Tag: cp312-cp312-manylinux_2_17_aarch64
|
||||||
|
Tag: cp312-cp312-manylinux2014_aarch64
|
||||||
|
|
2
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/top_level.txt
vendored
Normal file
2
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/top_level.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
_yaml
|
||||||
|
yaml
|
BIN
env/lib/python3.12/site-packages/__pycache__/appdirs.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/__pycache__/appdirs.cpython-312.pyc
vendored
Normal file
Binary file not shown.
33
env/lib/python3.12/site-packages/_yaml/__init__.py
vendored
Normal file
33
env/lib/python3.12/site-packages/_yaml/__init__.py
vendored
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# This is a stub package designed to roughly emulate the _yaml
|
||||||
|
# extension module, which previously existed as a standalone module
|
||||||
|
# and has been moved into the `yaml` package namespace.
|
||||||
|
# It does not perfectly mimic its old counterpart, but should get
|
||||||
|
# close enough for anyone who's relying on it even when they shouldn't.
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
# in some circumstances, the yaml module we imoprted may be from a different version, so we need
|
||||||
|
# to tread carefully when poking at it here (it may not have the attributes we expect)
|
||||||
|
if not getattr(yaml, '__with_libyaml__', False):
|
||||||
|
from sys import version_info
|
||||||
|
|
||||||
|
exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
|
||||||
|
raise exc("No module named '_yaml'")
|
||||||
|
else:
|
||||||
|
from yaml._yaml import *
|
||||||
|
import warnings
|
||||||
|
warnings.warn(
|
||||||
|
'The _yaml extension module is now located at yaml._yaml'
|
||||||
|
' and its location is subject to change. To use the'
|
||||||
|
' LibYAML-based parser and emitter, import from `yaml`:'
|
||||||
|
' `from yaml import CLoader as Loader, CDumper as Dumper`.',
|
||||||
|
DeprecationWarning
|
||||||
|
)
|
||||||
|
del warnings
|
||||||
|
# Don't `del yaml` here because yaml is actually an existing
|
||||||
|
# namespace member of _yaml.
|
||||||
|
|
||||||
|
__name__ = '_yaml'
|
||||||
|
# If the module is top-level (i.e. not a part of any specific package)
|
||||||
|
# then the attribute should be set to ''.
|
||||||
|
# https://docs.python.org/3.8/library/types.html
|
||||||
|
__package__ = ''
|
BIN
env/lib/python3.12/site-packages/_yaml/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/_yaml/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
1
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/INSTALLER
vendored
Normal file
1
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/INSTALLER
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
pip
|
23
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/LICENSE.txt
vendored
Normal file
23
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/LICENSE.txt
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# This is the MIT license
|
||||||
|
|
||||||
|
Copyright (c) 2010 ActiveState Software Inc.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included
|
||||||
|
in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
264
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/METADATA
vendored
Normal file
264
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/METADATA
vendored
Normal file
@ -0,0 +1,264 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: appdirs
|
||||||
|
Version: 1.4.4
|
||||||
|
Summary: A small Python module for determining appropriate platform-specific dirs, e.g. a "user data dir".
|
||||||
|
Home-page: http://github.com/ActiveState/appdirs
|
||||||
|
Author: Trent Mick
|
||||||
|
Author-email: trentm@gmail.com
|
||||||
|
Maintainer: Jeff Rouse
|
||||||
|
Maintainer-email: jr@its.to
|
||||||
|
License: MIT
|
||||||
|
Keywords: application directory log cache user
|
||||||
|
Platform: UNKNOWN
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python :: 2
|
||||||
|
Classifier: Programming Language :: Python :: 2.7
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3.4
|
||||||
|
Classifier: Programming Language :: Python :: 3.5
|
||||||
|
Classifier: Programming Language :: Python :: 3.6
|
||||||
|
Classifier: Programming Language :: Python :: 3.7
|
||||||
|
Classifier: Programming Language :: Python :: 3.8
|
||||||
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||||
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||||
|
|
||||||
|
|
||||||
|
.. image:: https://secure.travis-ci.org/ActiveState/appdirs.png
|
||||||
|
:target: http://travis-ci.org/ActiveState/appdirs
|
||||||
|
|
||||||
|
the problem
|
||||||
|
===========
|
||||||
|
|
||||||
|
What directory should your app use for storing user data? If running on Mac OS X, you
|
||||||
|
should use::
|
||||||
|
|
||||||
|
~/Library/Application Support/<AppName>
|
||||||
|
|
||||||
|
If on Windows (at least English Win XP) that should be::
|
||||||
|
|
||||||
|
C:\Documents and Settings\<User>\Application Data\Local Settings\<AppAuthor>\<AppName>
|
||||||
|
|
||||||
|
or possibly::
|
||||||
|
|
||||||
|
C:\Documents and Settings\<User>\Application Data\<AppAuthor>\<AppName>
|
||||||
|
|
||||||
|
for `roaming profiles <http://bit.ly/9yl3b6>`_ but that is another story.
|
||||||
|
|
||||||
|
On Linux (and other Unices) the dir, according to the `XDG
|
||||||
|
spec <http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_, is::
|
||||||
|
|
||||||
|
~/.local/share/<AppName>
|
||||||
|
|
||||||
|
|
||||||
|
``appdirs`` to the rescue
|
||||||
|
=========================
|
||||||
|
|
||||||
|
This kind of thing is what the ``appdirs`` module is for. ``appdirs`` will
|
||||||
|
help you choose an appropriate:
|
||||||
|
|
||||||
|
- user data dir (``user_data_dir``)
|
||||||
|
- user config dir (``user_config_dir``)
|
||||||
|
- user cache dir (``user_cache_dir``)
|
||||||
|
- site data dir (``site_data_dir``)
|
||||||
|
- site config dir (``site_config_dir``)
|
||||||
|
- user log dir (``user_log_dir``)
|
||||||
|
|
||||||
|
and also:
|
||||||
|
|
||||||
|
- is a single module so other Python packages can include their own private copy
|
||||||
|
- is slightly opinionated on the directory names used. Look for "OPINION" in
|
||||||
|
documentation and code for when an opinion is being applied.
|
||||||
|
|
||||||
|
|
||||||
|
some example output
|
||||||
|
===================
|
||||||
|
|
||||||
|
On Mac OS X::
|
||||||
|
|
||||||
|
>>> from appdirs import *
|
||||||
|
>>> appname = "SuperApp"
|
||||||
|
>>> appauthor = "Acme"
|
||||||
|
>>> user_data_dir(appname, appauthor)
|
||||||
|
'/Users/trentm/Library/Application Support/SuperApp'
|
||||||
|
>>> site_data_dir(appname, appauthor)
|
||||||
|
'/Library/Application Support/SuperApp'
|
||||||
|
>>> user_cache_dir(appname, appauthor)
|
||||||
|
'/Users/trentm/Library/Caches/SuperApp'
|
||||||
|
>>> user_log_dir(appname, appauthor)
|
||||||
|
'/Users/trentm/Library/Logs/SuperApp'
|
||||||
|
|
||||||
|
On Windows 7::
|
||||||
|
|
||||||
|
>>> from appdirs import *
|
||||||
|
>>> appname = "SuperApp"
|
||||||
|
>>> appauthor = "Acme"
|
||||||
|
>>> user_data_dir(appname, appauthor)
|
||||||
|
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp'
|
||||||
|
>>> user_data_dir(appname, appauthor, roaming=True)
|
||||||
|
'C:\\Users\\trentm\\AppData\\Roaming\\Acme\\SuperApp'
|
||||||
|
>>> user_cache_dir(appname, appauthor)
|
||||||
|
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp\\Cache'
|
||||||
|
>>> user_log_dir(appname, appauthor)
|
||||||
|
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp\\Logs'
|
||||||
|
|
||||||
|
On Linux::
|
||||||
|
|
||||||
|
>>> from appdirs import *
|
||||||
|
>>> appname = "SuperApp"
|
||||||
|
>>> appauthor = "Acme"
|
||||||
|
>>> user_data_dir(appname, appauthor)
|
||||||
|
'/home/trentm/.local/share/SuperApp
|
||||||
|
>>> site_data_dir(appname, appauthor)
|
||||||
|
'/usr/local/share/SuperApp'
|
||||||
|
>>> site_data_dir(appname, appauthor, multipath=True)
|
||||||
|
'/usr/local/share/SuperApp:/usr/share/SuperApp'
|
||||||
|
>>> user_cache_dir(appname, appauthor)
|
||||||
|
'/home/trentm/.cache/SuperApp'
|
||||||
|
>>> user_log_dir(appname, appauthor)
|
||||||
|
'/home/trentm/.cache/SuperApp/log'
|
||||||
|
>>> user_config_dir(appname)
|
||||||
|
'/home/trentm/.config/SuperApp'
|
||||||
|
>>> site_config_dir(appname)
|
||||||
|
'/etc/xdg/SuperApp'
|
||||||
|
>>> os.environ['XDG_CONFIG_DIRS'] = '/etc:/usr/local/etc'
|
||||||
|
>>> site_config_dir(appname, multipath=True)
|
||||||
|
'/etc/SuperApp:/usr/local/etc/SuperApp'
|
||||||
|
|
||||||
|
|
||||||
|
``AppDirs`` for convenience
|
||||||
|
===========================
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
>>> from appdirs import AppDirs
|
||||||
|
>>> dirs = AppDirs("SuperApp", "Acme")
|
||||||
|
>>> dirs.user_data_dir
|
||||||
|
'/Users/trentm/Library/Application Support/SuperApp'
|
||||||
|
>>> dirs.site_data_dir
|
||||||
|
'/Library/Application Support/SuperApp'
|
||||||
|
>>> dirs.user_cache_dir
|
||||||
|
'/Users/trentm/Library/Caches/SuperApp'
|
||||||
|
>>> dirs.user_log_dir
|
||||||
|
'/Users/trentm/Library/Logs/SuperApp'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Per-version isolation
|
||||||
|
=====================
|
||||||
|
|
||||||
|
If you have multiple versions of your app in use that you want to be
|
||||||
|
able to run side-by-side, then you may want version-isolation for these
|
||||||
|
dirs::
|
||||||
|
|
||||||
|
>>> from appdirs import AppDirs
|
||||||
|
>>> dirs = AppDirs("SuperApp", "Acme", version="1.0")
|
||||||
|
>>> dirs.user_data_dir
|
||||||
|
'/Users/trentm/Library/Application Support/SuperApp/1.0'
|
||||||
|
>>> dirs.site_data_dir
|
||||||
|
'/Library/Application Support/SuperApp/1.0'
|
||||||
|
>>> dirs.user_cache_dir
|
||||||
|
'/Users/trentm/Library/Caches/SuperApp/1.0'
|
||||||
|
>>> dirs.user_log_dir
|
||||||
|
'/Users/trentm/Library/Logs/SuperApp/1.0'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
appdirs Changelog
|
||||||
|
=================
|
||||||
|
|
||||||
|
appdirs 1.4.4
|
||||||
|
-------------
|
||||||
|
- [PR #92] Don't import appdirs from setup.py
|
||||||
|
|
||||||
|
Project officially classified as Stable which is important
|
||||||
|
for inclusion in other distros such as ActivePython.
|
||||||
|
|
||||||
|
First of several incremental releases to catch up on maintenance.
|
||||||
|
|
||||||
|
appdirs 1.4.3
|
||||||
|
-------------
|
||||||
|
- [PR #76] Python 3.6 invalid escape sequence deprecation fixes
|
||||||
|
- Fix for Python 3.6 support
|
||||||
|
|
||||||
|
appdirs 1.4.2
|
||||||
|
-------------
|
||||||
|
- [PR #84] Allow installing without setuptools
|
||||||
|
- [PR #86] Fix string delimiters in setup.py description
|
||||||
|
- Add Python 3.6 support
|
||||||
|
|
||||||
|
appdirs 1.4.1
|
||||||
|
-------------
|
||||||
|
- [issue #38] Fix _winreg import on Windows Py3
|
||||||
|
- [issue #55] Make appname optional
|
||||||
|
|
||||||
|
appdirs 1.4.0
|
||||||
|
-------------
|
||||||
|
- [PR #42] AppAuthor is now optional on Windows
|
||||||
|
- [issue 41] Support Jython on Windows, Mac, and Unix-like platforms. Windows
|
||||||
|
support requires `JNA <https://github.com/twall/jna>`_.
|
||||||
|
- [PR #44] Fix incorrect behaviour of the site_config_dir method
|
||||||
|
|
||||||
|
appdirs 1.3.0
|
||||||
|
-------------
|
||||||
|
- [Unix, issue 16] Conform to XDG standard, instead of breaking it for
|
||||||
|
everybody
|
||||||
|
- [Unix] Removes gratuitous case mangling of the case, since \*nix-es are
|
||||||
|
usually case sensitive, so mangling is not wise
|
||||||
|
- [Unix] Fixes the utterly wrong behaviour in ``site_data_dir``, return result
|
||||||
|
based on XDG_DATA_DIRS and make room for respecting the standard which
|
||||||
|
specifies XDG_DATA_DIRS is a multiple-value variable
|
||||||
|
- [Issue 6] Add ``*_config_dir`` which are distinct on nix-es, according to
|
||||||
|
XDG specs; on Windows and Mac return the corresponding ``*_data_dir``
|
||||||
|
|
||||||
|
appdirs 1.2.0
|
||||||
|
-------------
|
||||||
|
|
||||||
|
- [Unix] Put ``user_log_dir`` under the *cache* dir on Unix. Seems to be more
|
||||||
|
typical.
|
||||||
|
- [issue 9] Make ``unicode`` work on py3k.
|
||||||
|
|
||||||
|
appdirs 1.1.0
|
||||||
|
-------------
|
||||||
|
|
||||||
|
- [issue 4] Add ``AppDirs.user_log_dir``.
|
||||||
|
- [Unix, issue 2, issue 7] appdirs now conforms to `XDG base directory spec
|
||||||
|
<http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_.
|
||||||
|
- [Mac, issue 5] Fix ``site_data_dir()`` on Mac.
|
||||||
|
- [Mac] Drop use of 'Carbon' module in favour of hardcoded paths; supports
|
||||||
|
Python3 now.
|
||||||
|
- [Windows] Append "Cache" to ``user_cache_dir`` on Windows by default. Use
|
||||||
|
``opinion=False`` option to disable this.
|
||||||
|
- Add ``appdirs.AppDirs`` convenience class. Usage:
|
||||||
|
|
||||||
|
>>> dirs = AppDirs("SuperApp", "Acme", version="1.0")
|
||||||
|
>>> dirs.user_data_dir
|
||||||
|
'/Users/trentm/Library/Application Support/SuperApp/1.0'
|
||||||
|
|
||||||
|
- [Windows] Cherry-pick Komodo's change to downgrade paths to the Windows short
|
||||||
|
paths if there are high bit chars.
|
||||||
|
- [Linux] Change default ``user_cache_dir()`` on Linux to be singular, e.g.
|
||||||
|
"~/.superapp/cache".
|
||||||
|
- [Windows] Add ``roaming`` option to ``user_data_dir()`` (for use on Windows only)
|
||||||
|
and change the default ``user_data_dir`` behaviour to use a *non*-roaming
|
||||||
|
profile dir (``CSIDL_LOCAL_APPDATA`` instead of ``CSIDL_APPDATA``). Why? Because
|
||||||
|
a large roaming profile can cause login speed issues. The "only syncs on
|
||||||
|
logout" behaviour can cause surprises in appdata info.
|
||||||
|
|
||||||
|
|
||||||
|
appdirs 1.0.1 (never released)
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
Started this changelog 27 July 2010. Before that this module originated in the
|
||||||
|
`Komodo <http://www.activestate.com/komodo>`_ product as ``applib.py`` and then
|
||||||
|
as `applib/location.py
|
||||||
|
<http://github.com/ActiveState/applib/blob/master/applib/location.py>`_ (used by
|
||||||
|
`PyPM <http://code.activestate.com/pypm/>`_ in `ActivePython
|
||||||
|
<http://www.activestate.com/activepython>`_). This is basically a fork of
|
||||||
|
applib.py 1.0.1 and applib/location.py 1.0.1.
|
||||||
|
|
||||||
|
|
||||||
|
|
8
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/RECORD
vendored
Normal file
8
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/RECORD
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
__pycache__/appdirs.cpython-312.pyc,,
|
||||||
|
appdirs-1.4.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
appdirs-1.4.4.dist-info/LICENSE.txt,sha256=Nt200KdFqTqyAyA9cZCBSxuJcn0lTK_0jHp6-71HAAs,1097
|
||||||
|
appdirs-1.4.4.dist-info/METADATA,sha256=k5TVfXMNKGHTfp2wm6EJKTuGwGNuoQR5TqQgH8iwG8M,8981
|
||||||
|
appdirs-1.4.4.dist-info/RECORD,,
|
||||||
|
appdirs-1.4.4.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
|
||||||
|
appdirs-1.4.4.dist-info/top_level.txt,sha256=nKncE8CUqZERJ6VuQWL4_bkunSPDNfn7KZqb4Tr5YEM,8
|
||||||
|
appdirs.py,sha256=g99s2sXhnvTEm79oj4bWI0Toapc-_SmKKNXvOXHkVic,24720
|
6
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/WHEEL
vendored
Normal file
6
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/WHEEL
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.34.2)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py2-none-any
|
||||||
|
Tag: py3-none-any
|
||||||
|
|
1
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/top_level.txt
vendored
Normal file
1
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/top_level.txt
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
appdirs
|
608
env/lib/python3.12/site-packages/appdirs.py
vendored
Normal file
608
env/lib/python3.12/site-packages/appdirs.py
vendored
Normal file
@ -0,0 +1,608 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Copyright (c) 2005-2010 ActiveState Software Inc.
|
||||||
|
# Copyright (c) 2013 Eddy Petrișor
|
||||||
|
|
||||||
|
"""Utilities for determining application-specific dirs.
|
||||||
|
|
||||||
|
See <http://github.com/ActiveState/appdirs> for details and usage.
|
||||||
|
"""
|
||||||
|
# Dev Notes:
|
||||||
|
# - MSDN on where to store app data files:
|
||||||
|
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
|
||||||
|
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
|
||||||
|
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
||||||
|
|
||||||
|
__version__ = "1.4.4"
|
||||||
|
__version_info__ = tuple(int(segment) for segment in __version__.split("."))
|
||||||
|
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
PY3 = sys.version_info[0] == 3
|
||||||
|
|
||||||
|
if PY3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
|
if sys.platform.startswith('java'):
|
||||||
|
import platform
|
||||||
|
os_name = platform.java_ver()[3][0]
|
||||||
|
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
|
||||||
|
system = 'win32'
|
||||||
|
elif os_name.startswith('Mac'): # "Mac OS X", etc.
|
||||||
|
system = 'darwin'
|
||||||
|
else: # "Linux", "SunOS", "FreeBSD", etc.
|
||||||
|
# Setting this to "linux2" is not ideal, but only Windows or Mac
|
||||||
|
# are actually checked for and the rest of the module expects
|
||||||
|
# *sys.platform* style strings.
|
||||||
|
system = 'linux2'
|
||||||
|
else:
|
||||||
|
system = sys.platform
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||||
|
r"""Return full path to the user-specific data dir for this application.
|
||||||
|
|
||||||
|
"appname" is the name of application.
|
||||||
|
If None, just the system directory is returned.
|
||||||
|
"appauthor" (only used on Windows) is the name of the
|
||||||
|
appauthor or distributing body for this application. Typically
|
||||||
|
it is the owning company name. This falls back to appname. You may
|
||||||
|
pass False to disable it.
|
||||||
|
"version" is an optional version path element to append to the
|
||||||
|
path. You might want to use this if you want multiple versions
|
||||||
|
of your app to be able to run independently. If used, this
|
||||||
|
would typically be "<major>.<minor>".
|
||||||
|
Only applied when appname is present.
|
||||||
|
"roaming" (boolean, default False) can be set True to use the Windows
|
||||||
|
roaming appdata directory. That means that for users on a Windows
|
||||||
|
network setup for roaming profiles, this user data will be
|
||||||
|
sync'd on login. See
|
||||||
|
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||||
|
for a discussion of issues.
|
||||||
|
|
||||||
|
Typical user data directories are:
|
||||||
|
Mac OS X: ~/Library/Application Support/<AppName>
|
||||||
|
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
|
||||||
|
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
|
||||||
|
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
|
||||||
|
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
|
||||||
|
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
|
||||||
|
|
||||||
|
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
|
||||||
|
That means, by default "~/.local/share/<AppName>".
|
||||||
|
"""
|
||||||
|
if system == "win32":
|
||||||
|
if appauthor is None:
|
||||||
|
appauthor = appname
|
||||||
|
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
|
||||||
|
path = os.path.normpath(_get_win_folder(const))
|
||||||
|
if appname:
|
||||||
|
if appauthor is not False:
|
||||||
|
path = os.path.join(path, appauthor, appname)
|
||||||
|
else:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
elif system == 'darwin':
|
||||||
|
path = os.path.expanduser('~/Library/Application Support/')
|
||||||
|
if appname:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
else:
|
||||||
|
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
|
||||||
|
if appname:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
if appname and version:
|
||||||
|
path = os.path.join(path, version)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
|
||||||
|
r"""Return full path to the user-shared data dir for this application.
|
||||||
|
|
||||||
|
"appname" is the name of application.
|
||||||
|
If None, just the system directory is returned.
|
||||||
|
"appauthor" (only used on Windows) is the name of the
|
||||||
|
appauthor or distributing body for this application. Typically
|
||||||
|
it is the owning company name. This falls back to appname. You may
|
||||||
|
pass False to disable it.
|
||||||
|
"version" is an optional version path element to append to the
|
||||||
|
path. You might want to use this if you want multiple versions
|
||||||
|
of your app to be able to run independently. If used, this
|
||||||
|
would typically be "<major>.<minor>".
|
||||||
|
Only applied when appname is present.
|
||||||
|
"multipath" is an optional parameter only applicable to *nix
|
||||||
|
which indicates that the entire list of data dirs should be
|
||||||
|
returned. By default, the first item from XDG_DATA_DIRS is
|
||||||
|
returned, or '/usr/local/share/<AppName>',
|
||||||
|
if XDG_DATA_DIRS is not set
|
||||||
|
|
||||||
|
Typical site data directories are:
|
||||||
|
Mac OS X: /Library/Application Support/<AppName>
|
||||||
|
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
|
||||||
|
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
|
||||||
|
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
||||||
|
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
|
||||||
|
|
||||||
|
For Unix, this is using the $XDG_DATA_DIRS[0] default.
|
||||||
|
|
||||||
|
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
||||||
|
"""
|
||||||
|
if system == "win32":
|
||||||
|
if appauthor is None:
|
||||||
|
appauthor = appname
|
||||||
|
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
|
||||||
|
if appname:
|
||||||
|
if appauthor is not False:
|
||||||
|
path = os.path.join(path, appauthor, appname)
|
||||||
|
else:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
elif system == 'darwin':
|
||||||
|
path = os.path.expanduser('/Library/Application Support')
|
||||||
|
if appname:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
else:
|
||||||
|
# XDG default for $XDG_DATA_DIRS
|
||||||
|
# only first, if multipath is False
|
||||||
|
path = os.getenv('XDG_DATA_DIRS',
|
||||||
|
os.pathsep.join(['/usr/local/share', '/usr/share']))
|
||||||
|
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
||||||
|
if appname:
|
||||||
|
if version:
|
||||||
|
appname = os.path.join(appname, version)
|
||||||
|
pathlist = [os.sep.join([x, appname]) for x in pathlist]
|
||||||
|
|
||||||
|
if multipath:
|
||||||
|
path = os.pathsep.join(pathlist)
|
||||||
|
else:
|
||||||
|
path = pathlist[0]
|
||||||
|
return path
|
||||||
|
|
||||||
|
if appname and version:
|
||||||
|
path = os.path.join(path, version)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||||
|
r"""Return full path to the user-specific config dir for this application.
|
||||||
|
|
||||||
|
"appname" is the name of application.
|
||||||
|
If None, just the system directory is returned.
|
||||||
|
"appauthor" (only used on Windows) is the name of the
|
||||||
|
appauthor or distributing body for this application. Typically
|
||||||
|
it is the owning company name. This falls back to appname. You may
|
||||||
|
pass False to disable it.
|
||||||
|
"version" is an optional version path element to append to the
|
||||||
|
path. You might want to use this if you want multiple versions
|
||||||
|
of your app to be able to run independently. If used, this
|
||||||
|
would typically be "<major>.<minor>".
|
||||||
|
Only applied when appname is present.
|
||||||
|
"roaming" (boolean, default False) can be set True to use the Windows
|
||||||
|
roaming appdata directory. That means that for users on a Windows
|
||||||
|
network setup for roaming profiles, this user data will be
|
||||||
|
sync'd on login. See
|
||||||
|
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||||
|
for a discussion of issues.
|
||||||
|
|
||||||
|
Typical user config directories are:
|
||||||
|
Mac OS X: same as user_data_dir
|
||||||
|
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
|
||||||
|
Win *: same as user_data_dir
|
||||||
|
|
||||||
|
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
|
||||||
|
That means, by default "~/.config/<AppName>".
|
||||||
|
"""
|
||||||
|
if system in ["win32", "darwin"]:
|
||||||
|
path = user_data_dir(appname, appauthor, None, roaming)
|
||||||
|
else:
|
||||||
|
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
|
||||||
|
if appname:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
if appname and version:
|
||||||
|
path = os.path.join(path, version)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
|
||||||
|
r"""Return full path to the user-shared data dir for this application.
|
||||||
|
|
||||||
|
"appname" is the name of application.
|
||||||
|
If None, just the system directory is returned.
|
||||||
|
"appauthor" (only used on Windows) is the name of the
|
||||||
|
appauthor or distributing body for this application. Typically
|
||||||
|
it is the owning company name. This falls back to appname. You may
|
||||||
|
pass False to disable it.
|
||||||
|
"version" is an optional version path element to append to the
|
||||||
|
path. You might want to use this if you want multiple versions
|
||||||
|
of your app to be able to run independently. If used, this
|
||||||
|
would typically be "<major>.<minor>".
|
||||||
|
Only applied when appname is present.
|
||||||
|
"multipath" is an optional parameter only applicable to *nix
|
||||||
|
which indicates that the entire list of config dirs should be
|
||||||
|
returned. By default, the first item from XDG_CONFIG_DIRS is
|
||||||
|
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
|
||||||
|
|
||||||
|
Typical site config directories are:
|
||||||
|
Mac OS X: same as site_data_dir
|
||||||
|
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
|
||||||
|
$XDG_CONFIG_DIRS
|
||||||
|
Win *: same as site_data_dir
|
||||||
|
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
||||||
|
|
||||||
|
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
|
||||||
|
|
||||||
|
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
||||||
|
"""
|
||||||
|
if system in ["win32", "darwin"]:
|
||||||
|
path = site_data_dir(appname, appauthor)
|
||||||
|
if appname and version:
|
||||||
|
path = os.path.join(path, version)
|
||||||
|
else:
|
||||||
|
# XDG default for $XDG_CONFIG_DIRS
|
||||||
|
# only first, if multipath is False
|
||||||
|
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
|
||||||
|
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
||||||
|
if appname:
|
||||||
|
if version:
|
||||||
|
appname = os.path.join(appname, version)
|
||||||
|
pathlist = [os.sep.join([x, appname]) for x in pathlist]
|
||||||
|
|
||||||
|
if multipath:
|
||||||
|
path = os.pathsep.join(pathlist)
|
||||||
|
else:
|
||||||
|
path = pathlist[0]
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
|
||||||
|
r"""Return full path to the user-specific cache dir for this application.
|
||||||
|
|
||||||
|
"appname" is the name of application.
|
||||||
|
If None, just the system directory is returned.
|
||||||
|
"appauthor" (only used on Windows) is the name of the
|
||||||
|
appauthor or distributing body for this application. Typically
|
||||||
|
it is the owning company name. This falls back to appname. You may
|
||||||
|
pass False to disable it.
|
||||||
|
"version" is an optional version path element to append to the
|
||||||
|
path. You might want to use this if you want multiple versions
|
||||||
|
of your app to be able to run independently. If used, this
|
||||||
|
would typically be "<major>.<minor>".
|
||||||
|
Only applied when appname is present.
|
||||||
|
"opinion" (boolean) can be False to disable the appending of
|
||||||
|
"Cache" to the base app data dir for Windows. See
|
||||||
|
discussion below.
|
||||||
|
|
||||||
|
Typical user cache directories are:
|
||||||
|
Mac OS X: ~/Library/Caches/<AppName>
|
||||||
|
Unix: ~/.cache/<AppName> (XDG default)
|
||||||
|
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
|
||||||
|
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
|
||||||
|
|
||||||
|
On Windows the only suggestion in the MSDN docs is that local settings go in
|
||||||
|
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
|
||||||
|
app data dir (the default returned by `user_data_dir` above). Apps typically
|
||||||
|
put cache data somewhere *under* the given dir here. Some examples:
|
||||||
|
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
|
||||||
|
...\Acme\SuperApp\Cache\1.0
|
||||||
|
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
|
||||||
|
This can be disabled with the `opinion=False` option.
|
||||||
|
"""
|
||||||
|
if system == "win32":
|
||||||
|
if appauthor is None:
|
||||||
|
appauthor = appname
|
||||||
|
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
|
||||||
|
if appname:
|
||||||
|
if appauthor is not False:
|
||||||
|
path = os.path.join(path, appauthor, appname)
|
||||||
|
else:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
if opinion:
|
||||||
|
path = os.path.join(path, "Cache")
|
||||||
|
elif system == 'darwin':
|
||||||
|
path = os.path.expanduser('~/Library/Caches')
|
||||||
|
if appname:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
else:
|
||||||
|
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
|
||||||
|
if appname:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
if appname and version:
|
||||||
|
path = os.path.join(path, version)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||||
|
r"""Return full path to the user-specific state dir for this application.
|
||||||
|
|
||||||
|
"appname" is the name of application.
|
||||||
|
If None, just the system directory is returned.
|
||||||
|
"appauthor" (only used on Windows) is the name of the
|
||||||
|
appauthor or distributing body for this application. Typically
|
||||||
|
it is the owning company name. This falls back to appname. You may
|
||||||
|
pass False to disable it.
|
||||||
|
"version" is an optional version path element to append to the
|
||||||
|
path. You might want to use this if you want multiple versions
|
||||||
|
of your app to be able to run independently. If used, this
|
||||||
|
would typically be "<major>.<minor>".
|
||||||
|
Only applied when appname is present.
|
||||||
|
"roaming" (boolean, default False) can be set True to use the Windows
|
||||||
|
roaming appdata directory. That means that for users on a Windows
|
||||||
|
network setup for roaming profiles, this user data will be
|
||||||
|
sync'd on login. See
|
||||||
|
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||||
|
for a discussion of issues.
|
||||||
|
|
||||||
|
Typical user state directories are:
|
||||||
|
Mac OS X: same as user_data_dir
|
||||||
|
Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined
|
||||||
|
Win *: same as user_data_dir
|
||||||
|
|
||||||
|
For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
|
||||||
|
to extend the XDG spec and support $XDG_STATE_HOME.
|
||||||
|
|
||||||
|
That means, by default "~/.local/state/<AppName>".
|
||||||
|
"""
|
||||||
|
if system in ["win32", "darwin"]:
|
||||||
|
path = user_data_dir(appname, appauthor, None, roaming)
|
||||||
|
else:
|
||||||
|
path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
|
||||||
|
if appname:
|
||||||
|
path = os.path.join(path, appname)
|
||||||
|
if appname and version:
|
||||||
|
path = os.path.join(path, version)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
|
||||||
|
r"""Return full path to the user-specific log dir for this application.
|
||||||
|
|
||||||
|
"appname" is the name of application.
|
||||||
|
If None, just the system directory is returned.
|
||||||
|
"appauthor" (only used on Windows) is the name of the
|
||||||
|
appauthor or distributing body for this application. Typically
|
||||||
|
it is the owning company name. This falls back to appname. You may
|
||||||
|
pass False to disable it.
|
||||||
|
"version" is an optional version path element to append to the
|
||||||
|
path. You might want to use this if you want multiple versions
|
||||||
|
of your app to be able to run independently. If used, this
|
||||||
|
would typically be "<major>.<minor>".
|
||||||
|
Only applied when appname is present.
|
||||||
|
"opinion" (boolean) can be False to disable the appending of
|
||||||
|
"Logs" to the base app data dir for Windows, and "log" to the
|
||||||
|
base cache dir for Unix. See discussion below.
|
||||||
|
|
||||||
|
Typical user log directories are:
|
||||||
|
Mac OS X: ~/Library/Logs/<AppName>
|
||||||
|
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
|
||||||
|
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
|
||||||
|
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
|
||||||
|
|
||||||
|
On Windows the only suggestion in the MSDN docs is that local settings
|
||||||
|
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
|
||||||
|
examples of what some windows apps use for a logs dir.)
|
||||||
|
|
||||||
|
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
|
||||||
|
value for Windows and appends "log" to the user cache dir for Unix.
|
||||||
|
This can be disabled with the `opinion=False` option.
|
||||||
|
"""
|
||||||
|
if system == "darwin":
|
||||||
|
path = os.path.join(
|
||||||
|
os.path.expanduser('~/Library/Logs'),
|
||||||
|
appname)
|
||||||
|
elif system == "win32":
|
||||||
|
path = user_data_dir(appname, appauthor, version)
|
||||||
|
version = False
|
||||||
|
if opinion:
|
||||||
|
path = os.path.join(path, "Logs")
|
||||||
|
else:
|
||||||
|
path = user_cache_dir(appname, appauthor, version)
|
||||||
|
version = False
|
||||||
|
if opinion:
|
||||||
|
path = os.path.join(path, "log")
|
||||||
|
if appname and version:
|
||||||
|
path = os.path.join(path, version)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
class AppDirs(object):
|
||||||
|
"""Convenience wrapper for getting application dirs."""
|
||||||
|
def __init__(self, appname=None, appauthor=None, version=None,
|
||||||
|
roaming=False, multipath=False):
|
||||||
|
self.appname = appname
|
||||||
|
self.appauthor = appauthor
|
||||||
|
self.version = version
|
||||||
|
self.roaming = roaming
|
||||||
|
self.multipath = multipath
|
||||||
|
|
||||||
|
@property
|
||||||
|
def user_data_dir(self):
|
||||||
|
return user_data_dir(self.appname, self.appauthor,
|
||||||
|
version=self.version, roaming=self.roaming)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_data_dir(self):
|
||||||
|
return site_data_dir(self.appname, self.appauthor,
|
||||||
|
version=self.version, multipath=self.multipath)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def user_config_dir(self):
|
||||||
|
return user_config_dir(self.appname, self.appauthor,
|
||||||
|
version=self.version, roaming=self.roaming)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_config_dir(self):
|
||||||
|
return site_config_dir(self.appname, self.appauthor,
|
||||||
|
version=self.version, multipath=self.multipath)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def user_cache_dir(self):
|
||||||
|
return user_cache_dir(self.appname, self.appauthor,
|
||||||
|
version=self.version)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def user_state_dir(self):
|
||||||
|
return user_state_dir(self.appname, self.appauthor,
|
||||||
|
version=self.version)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def user_log_dir(self):
|
||||||
|
return user_log_dir(self.appname, self.appauthor,
|
||||||
|
version=self.version)
|
||||||
|
|
||||||
|
|
||||||
|
#---- internal support stuff
|
||||||
|
|
||||||
|
def _get_win_folder_from_registry(csidl_name):
|
||||||
|
"""This is a fallback technique at best. I'm not sure if using the
|
||||||
|
registry for this guarantees us the correct answer for all CSIDL_*
|
||||||
|
names.
|
||||||
|
"""
|
||||||
|
if PY3:
|
||||||
|
import winreg as _winreg
|
||||||
|
else:
|
||||||
|
import _winreg
|
||||||
|
|
||||||
|
shell_folder_name = {
|
||||||
|
"CSIDL_APPDATA": "AppData",
|
||||||
|
"CSIDL_COMMON_APPDATA": "Common AppData",
|
||||||
|
"CSIDL_LOCAL_APPDATA": "Local AppData",
|
||||||
|
}[csidl_name]
|
||||||
|
|
||||||
|
key = _winreg.OpenKey(
|
||||||
|
_winreg.HKEY_CURRENT_USER,
|
||||||
|
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
|
||||||
|
)
|
||||||
|
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
|
||||||
|
return dir
|
||||||
|
|
||||||
|
|
||||||
|
def _get_win_folder_with_pywin32(csidl_name):
|
||||||
|
from win32com.shell import shellcon, shell
|
||||||
|
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
|
||||||
|
# Try to make this a unicode path because SHGetFolderPath does
|
||||||
|
# not return unicode strings when there is unicode data in the
|
||||||
|
# path.
|
||||||
|
try:
|
||||||
|
dir = unicode(dir)
|
||||||
|
|
||||||
|
# Downgrade to short path name if have highbit chars. See
|
||||||
|
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||||
|
has_high_char = False
|
||||||
|
for c in dir:
|
||||||
|
if ord(c) > 255:
|
||||||
|
has_high_char = True
|
||||||
|
break
|
||||||
|
if has_high_char:
|
||||||
|
try:
|
||||||
|
import win32api
|
||||||
|
dir = win32api.GetShortPathName(dir)
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
except UnicodeError:
|
||||||
|
pass
|
||||||
|
return dir
|
||||||
|
|
||||||
|
|
||||||
|
def _get_win_folder_with_ctypes(csidl_name):
|
||||||
|
import ctypes
|
||||||
|
|
||||||
|
csidl_const = {
|
||||||
|
"CSIDL_APPDATA": 26,
|
||||||
|
"CSIDL_COMMON_APPDATA": 35,
|
||||||
|
"CSIDL_LOCAL_APPDATA": 28,
|
||||||
|
}[csidl_name]
|
||||||
|
|
||||||
|
buf = ctypes.create_unicode_buffer(1024)
|
||||||
|
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
|
||||||
|
|
||||||
|
# Downgrade to short path name if have highbit chars. See
|
||||||
|
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||||
|
has_high_char = False
|
||||||
|
for c in buf:
|
||||||
|
if ord(c) > 255:
|
||||||
|
has_high_char = True
|
||||||
|
break
|
||||||
|
if has_high_char:
|
||||||
|
buf2 = ctypes.create_unicode_buffer(1024)
|
||||||
|
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
|
||||||
|
buf = buf2
|
||||||
|
|
||||||
|
return buf.value
|
||||||
|
|
||||||
|
def _get_win_folder_with_jna(csidl_name):
|
||||||
|
import array
|
||||||
|
from com.sun import jna
|
||||||
|
from com.sun.jna.platform import win32
|
||||||
|
|
||||||
|
buf_size = win32.WinDef.MAX_PATH * 2
|
||||||
|
buf = array.zeros('c', buf_size)
|
||||||
|
shell = win32.Shell32.INSTANCE
|
||||||
|
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
|
||||||
|
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
||||||
|
|
||||||
|
# Downgrade to short path name if have highbit chars. See
|
||||||
|
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||||
|
has_high_char = False
|
||||||
|
for c in dir:
|
||||||
|
if ord(c) > 255:
|
||||||
|
has_high_char = True
|
||||||
|
break
|
||||||
|
if has_high_char:
|
||||||
|
buf = array.zeros('c', buf_size)
|
||||||
|
kernel = win32.Kernel32.INSTANCE
|
||||||
|
if kernel.GetShortPathName(dir, buf, buf_size):
|
||||||
|
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
||||||
|
|
||||||
|
return dir
|
||||||
|
|
||||||
|
if system == "win32":
|
||||||
|
try:
|
||||||
|
import win32com.shell
|
||||||
|
_get_win_folder = _get_win_folder_with_pywin32
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from ctypes import windll
|
||||||
|
_get_win_folder = _get_win_folder_with_ctypes
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
import com.sun.jna
|
||||||
|
_get_win_folder = _get_win_folder_with_jna
|
||||||
|
except ImportError:
|
||||||
|
_get_win_folder = _get_win_folder_from_registry
|
||||||
|
|
||||||
|
|
||||||
|
#---- self test code
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
appname = "MyApp"
|
||||||
|
appauthor = "MyCompany"
|
||||||
|
|
||||||
|
props = ("user_data_dir",
|
||||||
|
"user_config_dir",
|
||||||
|
"user_cache_dir",
|
||||||
|
"user_state_dir",
|
||||||
|
"user_log_dir",
|
||||||
|
"site_data_dir",
|
||||||
|
"site_config_dir")
|
||||||
|
|
||||||
|
print("-- app dirs %s --" % __version__)
|
||||||
|
|
||||||
|
print("-- app dirs (with optional 'version')")
|
||||||
|
dirs = AppDirs(appname, appauthor, version="1.0")
|
||||||
|
for prop in props:
|
||||||
|
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||||
|
|
||||||
|
print("\n-- app dirs (without optional 'version')")
|
||||||
|
dirs = AppDirs(appname, appauthor)
|
||||||
|
for prop in props:
|
||||||
|
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||||
|
|
||||||
|
print("\n-- app dirs (without optional 'appauthor')")
|
||||||
|
dirs = AppDirs(appname)
|
||||||
|
for prop in props:
|
||||||
|
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||||
|
|
||||||
|
print("\n-- app dirs (with disabled 'appauthor')")
|
||||||
|
dirs = AppDirs(appname, appauthor=False)
|
||||||
|
for prop in props:
|
||||||
|
print("%s: %s" % (prop, getattr(dirs, prop)))
|
1
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/INSTALLER
vendored
Normal file
1
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/INSTALLER
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
pip
|
20
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/LICENSE
vendored
Normal file
20
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/LICENSE
vendored
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
This package contains a modified version of ca-bundle.crt:
|
||||||
|
|
||||||
|
ca-bundle.crt -- Bundle of CA Root Certificates
|
||||||
|
|
||||||
|
This is a bundle of X.509 certificates of public Certificate Authorities
|
||||||
|
(CA). These were automatically extracted from Mozilla's root certificates
|
||||||
|
file (certdata.txt). This file can be found in the mozilla source tree:
|
||||||
|
https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
|
||||||
|
It contains the certificates in PEM format and therefore
|
||||||
|
can be directly used with curl / libcurl / php_curl, or with
|
||||||
|
an Apache+mod_ssl webserver for SSL client authentication.
|
||||||
|
Just configure this file as the SSLCACertificateFile.#
|
||||||
|
|
||||||
|
***** BEGIN LICENSE BLOCK *****
|
||||||
|
This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||||
|
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
|
||||||
|
one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
***** END LICENSE BLOCK *****
|
||||||
|
@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $
|
67
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/METADATA
vendored
Normal file
67
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/METADATA
vendored
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: certifi
|
||||||
|
Version: 2024.8.30
|
||||||
|
Summary: Python package for providing Mozilla's CA Bundle.
|
||||||
|
Home-page: https://github.com/certifi/python-certifi
|
||||||
|
Author: Kenneth Reitz
|
||||||
|
Author-email: me@kennethreitz.com
|
||||||
|
License: MPL-2.0
|
||||||
|
Project-URL: Source, https://github.com/certifi/python-certifi
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
|
||||||
|
Classifier: Natural Language :: English
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3 :: Only
|
||||||
|
Classifier: Programming Language :: Python :: 3.6
|
||||||
|
Classifier: Programming Language :: Python :: 3.7
|
||||||
|
Classifier: Programming Language :: Python :: 3.8
|
||||||
|
Classifier: Programming Language :: Python :: 3.9
|
||||||
|
Classifier: Programming Language :: Python :: 3.10
|
||||||
|
Classifier: Programming Language :: Python :: 3.11
|
||||||
|
Classifier: Programming Language :: Python :: 3.12
|
||||||
|
Requires-Python: >=3.6
|
||||||
|
License-File: LICENSE
|
||||||
|
|
||||||
|
Certifi: Python SSL Certificates
|
||||||
|
================================
|
||||||
|
|
||||||
|
Certifi provides Mozilla's carefully curated collection of Root Certificates for
|
||||||
|
validating the trustworthiness of SSL certificates while verifying the identity
|
||||||
|
of TLS hosts. It has been extracted from the `Requests`_ project.
|
||||||
|
|
||||||
|
Installation
|
||||||
|
------------
|
||||||
|
|
||||||
|
``certifi`` is available on PyPI. Simply install it with ``pip``::
|
||||||
|
|
||||||
|
$ pip install certifi
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
|
||||||
|
To reference the installed certificate authority (CA) bundle, you can use the
|
||||||
|
built-in function::
|
||||||
|
|
||||||
|
>>> import certifi
|
||||||
|
|
||||||
|
>>> certifi.where()
|
||||||
|
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
|
||||||
|
|
||||||
|
Or from the command line::
|
||||||
|
|
||||||
|
$ python -m certifi
|
||||||
|
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
|
||||||
|
|
||||||
|
Enjoy!
|
||||||
|
|
||||||
|
.. _`Requests`: https://requests.readthedocs.io/en/master/
|
||||||
|
|
||||||
|
Addition/Removal of Certificates
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
Certifi does not support any addition/removal or other modification of the
|
||||||
|
CA trust store content. This project is intended to provide a reliable and
|
||||||
|
highly portable root of trust to python deployments. Look to upstream projects
|
||||||
|
for methods to use alternate trust.
|
14
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/RECORD
vendored
Normal file
14
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/RECORD
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
certifi-2024.8.30.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
certifi-2024.8.30.dist-info/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
|
||||||
|
certifi-2024.8.30.dist-info/METADATA,sha256=GhBHRVUN6a4ZdUgE_N5wmukJfyuoE-QyIl8Y3ifNQBM,2222
|
||||||
|
certifi-2024.8.30.dist-info/RECORD,,
|
||||||
|
certifi-2024.8.30.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
|
||||||
|
certifi-2024.8.30.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
|
||||||
|
certifi/__init__.py,sha256=p_GYZrjUwPBUhpLlCZoGb0miKBKSqDAyZC5DvIuqbHQ,94
|
||||||
|
certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
|
||||||
|
certifi/__pycache__/__init__.cpython-312.pyc,,
|
||||||
|
certifi/__pycache__/__main__.cpython-312.pyc,,
|
||||||
|
certifi/__pycache__/core.cpython-312.pyc,,
|
||||||
|
certifi/cacert.pem,sha256=lO3rZukXdPyuk6BWUJFOKQliWaXH6HGh9l1GGrUgG0c,299427
|
||||||
|
certifi/core.py,sha256=qRDDFyXVJwTB_EmoGppaXU_R9qCZvhl-EzxPMuV3nTA,4426
|
||||||
|
certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/WHEEL
vendored
Normal file
5
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/WHEEL
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: setuptools (74.0.0)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py3-none-any
|
||||||
|
|
1
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/top_level.txt
vendored
Normal file
1
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/top_level.txt
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
certifi
|
4
env/lib/python3.12/site-packages/certifi/__init__.py
vendored
Normal file
4
env/lib/python3.12/site-packages/certifi/__init__.py
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from .core import contents, where
|
||||||
|
|
||||||
|
__all__ = ["contents", "where"]
|
||||||
|
__version__ = "2024.08.30"
|
12
env/lib/python3.12/site-packages/certifi/__main__.py
vendored
Normal file
12
env/lib/python3.12/site-packages/certifi/__main__.py
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
import argparse
|
||||||
|
|
||||||
|
from certifi import contents, where
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-c", "--contents", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.contents:
|
||||||
|
print(contents())
|
||||||
|
else:
|
||||||
|
print(where())
|
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc
vendored
Normal file
Binary file not shown.
4929
env/lib/python3.12/site-packages/certifi/cacert.pem
vendored
Normal file
4929
env/lib/python3.12/site-packages/certifi/cacert.pem
vendored
Normal file
File diff suppressed because it is too large
Load Diff
114
env/lib/python3.12/site-packages/certifi/core.py
vendored
Normal file
114
env/lib/python3.12/site-packages/certifi/core.py
vendored
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
"""
|
||||||
|
certifi.py
|
||||||
|
~~~~~~~~~~
|
||||||
|
|
||||||
|
This module returns the installation location of cacert.pem or its contents.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import atexit
|
||||||
|
|
||||||
|
def exit_cacert_ctx() -> None:
|
||||||
|
_CACERT_CTX.__exit__(None, None, None) # type: ignore[union-attr]
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info >= (3, 11):
|
||||||
|
|
||||||
|
from importlib.resources import as_file, files
|
||||||
|
|
||||||
|
_CACERT_CTX = None
|
||||||
|
_CACERT_PATH = None
|
||||||
|
|
||||||
|
def where() -> str:
|
||||||
|
# This is slightly terrible, but we want to delay extracting the file
|
||||||
|
# in cases where we're inside of a zipimport situation until someone
|
||||||
|
# actually calls where(), but we don't want to re-extract the file
|
||||||
|
# on every call of where(), so we'll do it once then store it in a
|
||||||
|
# global variable.
|
||||||
|
global _CACERT_CTX
|
||||||
|
global _CACERT_PATH
|
||||||
|
if _CACERT_PATH is None:
|
||||||
|
# This is slightly janky, the importlib.resources API wants you to
|
||||||
|
# manage the cleanup of this file, so it doesn't actually return a
|
||||||
|
# path, it returns a context manager that will give you the path
|
||||||
|
# when you enter it and will do any cleanup when you leave it. In
|
||||||
|
# the common case of not needing a temporary file, it will just
|
||||||
|
# return the file system location and the __exit__() is a no-op.
|
||||||
|
#
|
||||||
|
# We also have to hold onto the actual context manager, because
|
||||||
|
# it will do the cleanup whenever it gets garbage collected, so
|
||||||
|
# we will also store that at the global level as well.
|
||||||
|
_CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
|
||||||
|
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||||
|
atexit.register(exit_cacert_ctx)
|
||||||
|
|
||||||
|
return _CACERT_PATH
|
||||||
|
|
||||||
|
def contents() -> str:
|
||||||
|
return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
|
||||||
|
|
||||||
|
elif sys.version_info >= (3, 7):
|
||||||
|
|
||||||
|
from importlib.resources import path as get_path, read_text
|
||||||
|
|
||||||
|
_CACERT_CTX = None
|
||||||
|
_CACERT_PATH = None
|
||||||
|
|
||||||
|
def where() -> str:
|
||||||
|
# This is slightly terrible, but we want to delay extracting the
|
||||||
|
# file in cases where we're inside of a zipimport situation until
|
||||||
|
# someone actually calls where(), but we don't want to re-extract
|
||||||
|
# the file on every call of where(), so we'll do it once then store
|
||||||
|
# it in a global variable.
|
||||||
|
global _CACERT_CTX
|
||||||
|
global _CACERT_PATH
|
||||||
|
if _CACERT_PATH is None:
|
||||||
|
# This is slightly janky, the importlib.resources API wants you
|
||||||
|
# to manage the cleanup of this file, so it doesn't actually
|
||||||
|
# return a path, it returns a context manager that will give
|
||||||
|
# you the path when you enter it and will do any cleanup when
|
||||||
|
# you leave it. In the common case of not needing a temporary
|
||||||
|
# file, it will just return the file system location and the
|
||||||
|
# __exit__() is a no-op.
|
||||||
|
#
|
||||||
|
# We also have to hold onto the actual context manager, because
|
||||||
|
# it will do the cleanup whenever it gets garbage collected, so
|
||||||
|
# we will also store that at the global level as well.
|
||||||
|
_CACERT_CTX = get_path("certifi", "cacert.pem")
|
||||||
|
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||||
|
atexit.register(exit_cacert_ctx)
|
||||||
|
|
||||||
|
return _CACERT_PATH
|
||||||
|
|
||||||
|
def contents() -> str:
|
||||||
|
return read_text("certifi", "cacert.pem", encoding="ascii")
|
||||||
|
|
||||||
|
else:
|
||||||
|
import os
|
||||||
|
import types
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
Package = Union[types.ModuleType, str]
|
||||||
|
Resource = Union[str, "os.PathLike"]
|
||||||
|
|
||||||
|
# This fallback will work for Python versions prior to 3.7 that lack the
|
||||||
|
# importlib.resources module but relies on the existing `where` function
|
||||||
|
# so won't address issues with environments like PyOxidizer that don't set
|
||||||
|
# __file__ on modules.
|
||||||
|
def read_text(
|
||||||
|
package: Package,
|
||||||
|
resource: Resource,
|
||||||
|
encoding: str = 'utf-8',
|
||||||
|
errors: str = 'strict'
|
||||||
|
) -> str:
|
||||||
|
with open(where(), encoding=encoding) as data:
|
||||||
|
return data.read()
|
||||||
|
|
||||||
|
# If we don't have importlib.resources, then we will just do the old logic
|
||||||
|
# of assuming we're on the filesystem and munge the path directly.
|
||||||
|
def where() -> str:
|
||||||
|
f = os.path.dirname(__file__)
|
||||||
|
|
||||||
|
return os.path.join(f, "cacert.pem")
|
||||||
|
|
||||||
|
def contents() -> str:
|
||||||
|
return read_text("certifi", "cacert.pem", encoding="ascii")
|
0
env/lib/python3.12/site-packages/certifi/py.typed
vendored
Normal file
0
env/lib/python3.12/site-packages/certifi/py.typed
vendored
Normal file
1
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/INSTALLER
vendored
Normal file
1
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/INSTALLER
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
pip
|
21
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/LICENSE
vendored
Normal file
21
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/LICENSE
vendored
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2019 TAHRI Ahmed R.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
695
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/METADATA
vendored
Normal file
695
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/METADATA
vendored
Normal file
@ -0,0 +1,695 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: charset-normalizer
|
||||||
|
Version: 3.4.0
|
||||||
|
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||||
|
Home-page: https://github.com/Ousret/charset_normalizer
|
||||||
|
Author: Ahmed TAHRI
|
||||||
|
Author-email: tahri.ahmed@proton.me
|
||||||
|
License: MIT
|
||||||
|
Project-URL: Bug Reports, https://github.com/Ousret/charset_normalizer/issues
|
||||||
|
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/en/latest
|
||||||
|
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3.7
|
||||||
|
Classifier: Programming Language :: Python :: 3.8
|
||||||
|
Classifier: Programming Language :: Python :: 3.9
|
||||||
|
Classifier: Programming Language :: Python :: 3.10
|
||||||
|
Classifier: Programming Language :: Python :: 3.11
|
||||||
|
Classifier: Programming Language :: Python :: 3.12
|
||||||
|
Classifier: Programming Language :: Python :: 3.13
|
||||||
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||||
|
Classifier: Topic :: Text Processing :: Linguistic
|
||||||
|
Classifier: Topic :: Utilities
|
||||||
|
Classifier: Typing :: Typed
|
||||||
|
Requires-Python: >=3.7.0
|
||||||
|
Description-Content-Type: text/markdown
|
||||||
|
License-File: LICENSE
|
||||||
|
Provides-Extra: unicode_backport
|
||||||
|
|
||||||
|
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<sup>The Real First Universal Charset Detector</sup><br>
|
||||||
|
<a href="https://pypi.org/project/charset-normalizer">
|
||||||
|
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||||
|
</a>
|
||||||
|
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||||
|
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||||
|
</a>
|
||||||
|
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||||
|
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
<p align="center">
|
||||||
|
<sup><i>Featured Packages</i></sup><br>
|
||||||
|
<a href="https://github.com/jawah/niquests">
|
||||||
|
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-HTTP_1.1%2C%202%2C_and_3_Client-cyan">
|
||||||
|
</a>
|
||||||
|
<a href="https://github.com/jawah/wassima">
|
||||||
|
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Killer-cyan">
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
<p align="center">
|
||||||
|
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||||
|
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||||
|
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||||
|
> I'm trying to resolve the issue by taking a new approach.
|
||||||
|
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||||
|
</p>
|
||||||
|
|
||||||
|
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||||
|
|
||||||
|
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||||
|
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||||
|
| `Fast` | ❌ | ✅ | ✅ |
|
||||||
|
| `Universal**` | ❌ | ✅ | ❌ |
|
||||||
|
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||||
|
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||||
|
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||||
|
| `Native Python` | ✅ | ✅ | ❌ |
|
||||||
|
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||||
|
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||||
|
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||||
|
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||||
|
Did you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)
|
||||||
|
|
||||||
|
## ⚡ Performance
|
||||||
|
|
||||||
|
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||||
|
|
||||||
|
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||||
|
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||||
|
| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |
|
||||||
|
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||||
|
|
||||||
|
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||||
|
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||||
|
| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |
|
||||||
|
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||||
|
|
||||||
|
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||||
|
|
||||||
|
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||||
|
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||||
|
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||||
|
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||||
|
> (eg. Supported Encoding) Challenge-them if you want.
|
||||||
|
|
||||||
|
## ✨ Installation
|
||||||
|
|
||||||
|
Using pip:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pip install charset-normalizer -U
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚀 Basic Usage
|
||||||
|
|
||||||
|
### CLI
|
||||||
|
This package comes with a CLI.
|
||||||
|
|
||||||
|
```
|
||||||
|
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||||
|
file [file ...]
|
||||||
|
|
||||||
|
The Real First Universal Charset Detector. Discover originating encoding used
|
||||||
|
on text file. Normalize text to unicode.
|
||||||
|
|
||||||
|
positional arguments:
|
||||||
|
files File(s) to be analysed
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-v, --verbose Display complementary information about file if any.
|
||||||
|
Stdout will contain logs about the detection process.
|
||||||
|
-a, --with-alternative
|
||||||
|
Output complementary possibilities if any. Top-level
|
||||||
|
JSON WILL be a list.
|
||||||
|
-n, --normalize Permit to normalize input file. If not set, program
|
||||||
|
does not write anything.
|
||||||
|
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||||
|
JSON output.
|
||||||
|
-r, --replace Replace file when trying to normalize it instead of
|
||||||
|
creating a new one.
|
||||||
|
-f, --force Replace file without asking if you are sure, use this
|
||||||
|
flag with caution.
|
||||||
|
-t THRESHOLD, --threshold THRESHOLD
|
||||||
|
Define a custom maximum amount of chaos allowed in
|
||||||
|
decoded content. 0. <= chaos <= 1.
|
||||||
|
--version Show version information and exit.
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
normalizer ./data/sample.1.fr.srt
|
||||||
|
```
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||||
|
```
|
||||||
|
|
||||||
|
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||||
|
"encoding": "cp1252",
|
||||||
|
"encoding_aliases": [
|
||||||
|
"1252",
|
||||||
|
"windows_1252"
|
||||||
|
],
|
||||||
|
"alternative_encodings": [
|
||||||
|
"cp1254",
|
||||||
|
"cp1256",
|
||||||
|
"cp1258",
|
||||||
|
"iso8859_14",
|
||||||
|
"iso8859_15",
|
||||||
|
"iso8859_16",
|
||||||
|
"iso8859_3",
|
||||||
|
"iso8859_9",
|
||||||
|
"latin_1",
|
||||||
|
"mbcs"
|
||||||
|
],
|
||||||
|
"language": "French",
|
||||||
|
"alphabets": [
|
||||||
|
"Basic Latin",
|
||||||
|
"Latin-1 Supplement"
|
||||||
|
],
|
||||||
|
"has_sig_or_bom": false,
|
||||||
|
"chaos": 0.149,
|
||||||
|
"coherence": 97.152,
|
||||||
|
"unicode_path": null,
|
||||||
|
"is_preferred": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Python
|
||||||
|
*Just print out normalized text*
|
||||||
|
```python
|
||||||
|
from charset_normalizer import from_path
|
||||||
|
|
||||||
|
results = from_path('./my_subtitle.srt')
|
||||||
|
|
||||||
|
print(str(results.best()))
|
||||||
|
```
|
||||||
|
|
||||||
|
*Upgrade your code without effort*
|
||||||
|
```python
|
||||||
|
from charset_normalizer import detect
|
||||||
|
```
|
||||||
|
|
||||||
|
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||||
|
|
||||||
|
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||||
|
|
||||||
|
## 😇 Why
|
||||||
|
|
||||||
|
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||||
|
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||||
|
|
||||||
|
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||||
|
produce **two identical rendered string.**
|
||||||
|
What I want is to get readable text, the best I can.
|
||||||
|
|
||||||
|
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||||
|
|
||||||
|
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||||
|
|
||||||
|
## 🍰 How
|
||||||
|
|
||||||
|
- Discard all charset encoding table that could not fit the binary content.
|
||||||
|
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||||
|
- Extract matches with the lowest mess detected.
|
||||||
|
- Additionally, we measure coherence / probe for a language.
|
||||||
|
|
||||||
|
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||||
|
|
||||||
|
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||||
|
**I established** some ground rules about **what is obvious** when **it seems like** a mess.
|
||||||
|
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||||
|
improve or rewrite it.
|
||||||
|
|
||||||
|
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||||
|
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||||
|
|
||||||
|
## ⚡ Known limitations
|
||||||
|
|
||||||
|
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||||
|
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||||
|
|
||||||
|
## ⚠️ About Python EOLs
|
||||||
|
|
||||||
|
**If you are running:**
|
||||||
|
|
||||||
|
- Python >=2.7,<3.5: Unsupported
|
||||||
|
- Python 3.5: charset-normalizer < 2.1
|
||||||
|
- Python 3.6: charset-normalizer < 3.1
|
||||||
|
- Python 3.7: charset-normalizer < 4.0
|
||||||
|
|
||||||
|
Upgrade your Python interpreter as soon as possible.
|
||||||
|
|
||||||
|
## 👤 Contributing
|
||||||
|
|
||||||
|
Contributions, issues and feature requests are very much welcome.<br />
|
||||||
|
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||||
|
|
||||||
|
## 📝 License
|
||||||
|
|
||||||
|
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||||
|
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||||
|
|
||||||
|
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||||
|
|
||||||
|
## 💼 For Enterprise
|
||||||
|
|
||||||
|
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||||
|
Subscription][1]. Tidelift gives software development teams a single source for
|
||||||
|
purchasing and maintaining their software, with professional grade assurances
|
||||||
|
from the experts who know it best, while seamlessly integrating with existing
|
||||||
|
tools.
|
||||||
|
|
||||||
|
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||||
|
|
||||||
|
# Changelog
|
||||||
|
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
|
|
||||||
|
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||||
|
- Support for Python 3.13 (#512)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||||
|
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||||
|
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||||
|
|
||||||
|
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||||
|
- Regression on some detection case showcased in the documentation (#371)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||||
|
|
||||||
|
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||||
|
- Improved the general detection reliability based on reports from the community
|
||||||
|
|
||||||
|
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||||
|
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||||
|
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||||
|
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||||
|
|
||||||
|
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||||
|
- Minor improvement over the global detection reliability
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||||
|
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||||
|
- Explicit support for Python 3.12
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||||
|
|
||||||
|
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Support for Python 3.6 (PR #260)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Optional speedup provided by mypy/c 1.0.1
|
||||||
|
|
||||||
|
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||||
|
|
||||||
|
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||||
|
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||||
|
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||||
|
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Build with static metadata using 'build' frontend
|
||||||
|
- Make the language detection stricter
|
||||||
|
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- CLI with opt --normalize fail when using full path for files
|
||||||
|
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||||
|
- Sphinx warnings when generating the documentation
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||||
|
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||||
|
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||||
|
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||||
|
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||||
|
- Breaking: Top-level function `normalize`
|
||||||
|
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||||
|
- Support for the backport `unicodedata2`
|
||||||
|
|
||||||
|
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||||
|
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||||
|
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Build with static metadata using 'build' frontend
|
||||||
|
- Make the language detection stricter
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- CLI with opt --normalize fail when using full path for files
|
||||||
|
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||||
|
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||||
|
|
||||||
|
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||||
|
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Sphinx warnings when generating the documentation
|
||||||
|
|
||||||
|
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||||
|
- Breaking: Top-level function `normalize`
|
||||||
|
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||||
|
- Support for the backport `unicodedata2`
|
||||||
|
|
||||||
|
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
- Function `normalize` scheduled for removal in 3.0
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Removed useless call to decode in fn is_unprintable (#206)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||||
|
|
||||||
|
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||||
|
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||||
|
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Support for Python 3.5 (PR #192)
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||||
|
|
||||||
|
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- ASCII miss-detection on rare cases (PR #170)
|
||||||
|
|
||||||
|
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Explicit support for Python 3.11 (PR #164)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||||
|
|
||||||
|
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||||
|
|
||||||
|
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||||
|
|
||||||
|
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||||
|
### Changed
|
||||||
|
- Improvement over Vietnamese detection (PR #126)
|
||||||
|
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||||
|
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||||
|
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||||
|
- Code style as refactored by Sourcery-AI (PR #131)
|
||||||
|
- Minor adjustment on the MD around european words (PR #133)
|
||||||
|
- Remove and replace SRTs from assets / tests (PR #139)
|
||||||
|
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||||
|
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||||
|
- Avoid using too insignificant chunk (PR #137)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||||
|
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||||
|
|
||||||
|
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||||
|
### Added
|
||||||
|
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||||
|
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||||
|
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||||
|
- Various detection improvement (MD+CD) (PR #117)
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||||
|
|
||||||
|
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||||
|
### Fixed
|
||||||
|
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||||
|
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||||
|
|
||||||
|
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||||
|
### Changed
|
||||||
|
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||||
|
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||||
|
- The Unicode detection is slightly improved (PR #93)
|
||||||
|
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||||
|
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||||
|
- The MANIFEST.in was not exhaustive (PR #78)
|
||||||
|
|
||||||
|
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||||
|
### Fixed
|
||||||
|
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||||
|
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||||
|
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||||
|
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||||
|
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||||
|
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||||
|
- Allow fallback on specified encoding if any (PR #71)
|
||||||
|
|
||||||
|
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||||
|
### Changed
|
||||||
|
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||||
|
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||||
|
|
||||||
|
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||||
|
### Fixed
|
||||||
|
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||||
|
|
||||||
|
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||||
|
### Fixed
|
||||||
|
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||||
|
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||||
|
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||||
|
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||||
|
|
||||||
|
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||||
|
### Changed
|
||||||
|
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||||
|
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||||
|
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||||
|
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||||
|
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||||
|
- utf_7 detection has been reinstated.
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||||
|
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||||
|
- The exception hook on UnicodeDecodeError has been removed.
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||||
|
|
||||||
|
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||||
|
### Fixed
|
||||||
|
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||||
|
|
||||||
|
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||||
|
### Removed
|
||||||
|
- Using standard logging instead of using the package loguru.
|
||||||
|
- Dropping nose test framework in favor of the maintained pytest.
|
||||||
|
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||||
|
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||||
|
- Stop support for UTF-7 that does not contain a SIG.
|
||||||
|
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||||
|
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Improving the package final size by compressing frequencies.json.
|
||||||
|
- Huge improvement over the larges payload.
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- CLI now produces JSON consumable output.
|
||||||
|
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||||
|
|
||||||
|
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||||
|
|
||||||
|
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||||
|
|
||||||
|
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||||
|
|
||||||
|
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||||
|
|
||||||
|
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Dependencies refactoring, constraints revised.
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Add python 3.9 and 3.10 to the supported interpreters
|
||||||
|
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2019 TAHRI Ahmed R.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
35
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/RECORD
vendored
Normal file
35
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/RECORD
vendored
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
../../../bin/normalizer,sha256=pWxmMYA_SquLIU6d0ASgK3copKj6QWxw28YimXUHlzw,251
|
||||||
|
charset_normalizer-3.4.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
charset_normalizer-3.4.0.dist-info/LICENSE,sha256=6zGgxaT7Cbik4yBV0lweX5w1iidS_vPNcgIT0cz-4kE,1070
|
||||||
|
charset_normalizer-3.4.0.dist-info/METADATA,sha256=WGbEW9ehh2spNJxo1M6sEGGZWmsQ-oj2DsMjV29zoms,34159
|
||||||
|
charset_normalizer-3.4.0.dist-info/RECORD,,
|
||||||
|
charset_normalizer-3.4.0.dist-info/WHEEL,sha256=Z868N0_Fq1ssfDKgnQWj75ig0pzypFewyov-H4g6Btc,153
|
||||||
|
charset_normalizer-3.4.0.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65
|
||||||
|
charset_normalizer-3.4.0.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
|
||||||
|
charset_normalizer/__init__.py,sha256=UzI3xC8PhmcLRMzSgPb6minTmRq0kWznnCBJ8ZCc2XI,1577
|
||||||
|
charset_normalizer/__main__.py,sha256=JxY8bleaENOFlLRb9HfoeZCzAMnn2A1oGR5Xm2eyqg0,73
|
||||||
|
charset_normalizer/__pycache__/__init__.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/__main__.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/api.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/cd.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/constant.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/legacy.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/md.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/models.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/utils.cpython-312.pyc,,
|
||||||
|
charset_normalizer/__pycache__/version.cpython-312.pyc,,
|
||||||
|
charset_normalizer/api.py,sha256=kMyNUqrfBZU22PP0pYKrSldtYUGA24wsGlXGLAKra7c,22559
|
||||||
|
charset_normalizer/cd.py,sha256=xwZliZcTQFA3jU0c00PRiu9MNxXTFxQkFLWmMW24ZzI,12560
|
||||||
|
charset_normalizer/cli/__init__.py,sha256=D5ERp8P62llm2FuoMzydZ7d9rs8cvvLXqE-1_6oViPc,100
|
||||||
|
charset_normalizer/cli/__main__.py,sha256=zX9sV_ApU1d96Wb0cS04vulstdB4F0Eh7kLn-gevfw4,10411
|
||||||
|
charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc,,
|
||||||
|
charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc,,
|
||||||
|
charset_normalizer/constant.py,sha256=uwoW87NicWZDTLviX7le0wdoYBbhBQDA4n1JtJo77ts,40499
|
||||||
|
charset_normalizer/legacy.py,sha256=XJjkT0hejMH8qfAKz1ts8OUiBT18t2FJP3tJgLwUWwc,2327
|
||||||
|
charset_normalizer/md.cpython-312-aarch64-linux-gnu.so,sha256=medVy2qYxvmhqZLDgu6sOFWJ_3LJ2X3o-RJovGFelks,69800
|
||||||
|
charset_normalizer/md.py,sha256=SIIZcENrslI7h3v4GigbFN61fRyE_wiCN1z9Ii3fBRo,20138
|
||||||
|
charset_normalizer/md__mypyc.cpython-312-aarch64-linux-gnu.so,sha256=sxeTw_aoOZt6lM09TkDdRVjlOp1FyW8wJQWSCrj5ldc,322008
|
||||||
|
charset_normalizer/models.py,sha256=oAMAcBSEY7CngbUXJp34Wc4Rl9NKJJjGmUwW3EPtk6g,12425
|
||||||
|
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
charset_normalizer/utils.py,sha256=teiosMqzKjXyAHXnGdjSBOgnBZwx-SkBbCLrx0UXy8M,11894
|
||||||
|
charset_normalizer/version.py,sha256=AX66S4ytQFdd6F5jbVU2OPMqYwFS5M3BkMvyX-3BKF8,79
|
6
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/WHEEL
vendored
Normal file
6
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/WHEEL
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: setuptools (75.1.0)
|
||||||
|
Root-Is-Purelib: false
|
||||||
|
Tag: cp312-cp312-manylinux_2_17_aarch64
|
||||||
|
Tag: cp312-cp312-manylinux2014_aarch64
|
||||||
|
|
2
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/entry_points.txt
vendored
Normal file
2
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/entry_points.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
[console_scripts]
|
||||||
|
normalizer = charset_normalizer.cli:cli_detect
|
1
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/top_level.txt
vendored
Normal file
1
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/top_level.txt
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
charset_normalizer
|
46
env/lib/python3.12/site-packages/charset_normalizer/__init__.py
vendored
Normal file
46
env/lib/python3.12/site-packages/charset_normalizer/__init__.py
vendored
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Charset-Normalizer
|
||||||
|
~~~~~~~~~~~~~~
|
||||||
|
The Real First Universal Charset Detector.
|
||||||
|
A library that helps you read text from an unknown charset encoding.
|
||||||
|
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
|
||||||
|
All IANA character set names for which the Python core library provides codecs are supported.
|
||||||
|
|
||||||
|
Basic usage:
|
||||||
|
>>> from charset_normalizer import from_bytes
|
||||||
|
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
|
||||||
|
>>> best_guess = results.best()
|
||||||
|
>>> str(best_guess)
|
||||||
|
'Bсеки човек има право на образование. Oбразованието!'
|
||||||
|
|
||||||
|
Others methods and usages are available - see the full documentation
|
||||||
|
at <https://github.com/Ousret/charset_normalizer>.
|
||||||
|
:copyright: (c) 2021 by Ahmed TAHRI
|
||||||
|
:license: MIT, see LICENSE for more details.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from .api import from_bytes, from_fp, from_path, is_binary
|
||||||
|
from .legacy import detect
|
||||||
|
from .models import CharsetMatch, CharsetMatches
|
||||||
|
from .utils import set_logging_handler
|
||||||
|
from .version import VERSION, __version__
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
"from_fp",
|
||||||
|
"from_path",
|
||||||
|
"from_bytes",
|
||||||
|
"is_binary",
|
||||||
|
"detect",
|
||||||
|
"CharsetMatch",
|
||||||
|
"CharsetMatches",
|
||||||
|
"__version__",
|
||||||
|
"VERSION",
|
||||||
|
"set_logging_handler",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Attach a NullHandler to the top level logger by default
|
||||||
|
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
|
||||||
|
|
||||||
|
logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())
|
4
env/lib/python3.12/site-packages/charset_normalizer/__main__.py
vendored
Normal file
4
env/lib/python3.12/site-packages/charset_normalizer/__main__.py
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from .cli import cli_detect
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cli_detect()
|
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/api.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/api.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/cd.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/cd.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/constant.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/constant.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/legacy.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/legacy.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/md.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/md.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/models.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/models.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/utils.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/utils.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/version.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/version.cpython-312.pyc
vendored
Normal file
Binary file not shown.
668
env/lib/python3.12/site-packages/charset_normalizer/api.py
vendored
Normal file
668
env/lib/python3.12/site-packages/charset_normalizer/api.py
vendored
Normal file
@ -0,0 +1,668 @@
|
|||||||
|
import logging
|
||||||
|
from os import PathLike
|
||||||
|
from typing import BinaryIO, List, Optional, Set, Union
|
||||||
|
|
||||||
|
from .cd import (
|
||||||
|
coherence_ratio,
|
||||||
|
encoding_languages,
|
||||||
|
mb_encoding_languages,
|
||||||
|
merge_coherence_ratios,
|
||||||
|
)
|
||||||
|
from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
|
||||||
|
from .md import mess_ratio
|
||||||
|
from .models import CharsetMatch, CharsetMatches
|
||||||
|
from .utils import (
|
||||||
|
any_specified_encoding,
|
||||||
|
cut_sequence_chunks,
|
||||||
|
iana_name,
|
||||||
|
identify_sig_or_bom,
|
||||||
|
is_cp_similar,
|
||||||
|
is_multi_byte_encoding,
|
||||||
|
should_strip_sig_or_bom,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Will most likely be controversial
|
||||||
|
# logging.addLevelName(TRACE, "TRACE")
|
||||||
|
logger = logging.getLogger("charset_normalizer")
|
||||||
|
explain_handler = logging.StreamHandler()
|
||||||
|
explain_handler.setFormatter(
|
||||||
|
logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def from_bytes(
|
||||||
|
sequences: Union[bytes, bytearray],
|
||||||
|
steps: int = 5,
|
||||||
|
chunk_size: int = 512,
|
||||||
|
threshold: float = 0.2,
|
||||||
|
cp_isolation: Optional[List[str]] = None,
|
||||||
|
cp_exclusion: Optional[List[str]] = None,
|
||||||
|
preemptive_behaviour: bool = True,
|
||||||
|
explain: bool = False,
|
||||||
|
language_threshold: float = 0.1,
|
||||||
|
enable_fallback: bool = True,
|
||||||
|
) -> CharsetMatches:
|
||||||
|
"""
|
||||||
|
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
|
||||||
|
If there is no results, it is a strong indicator that the source is binary/not text.
|
||||||
|
By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
|
||||||
|
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
|
||||||
|
|
||||||
|
The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
|
||||||
|
but never take it for granted. Can improve the performance.
|
||||||
|
|
||||||
|
You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
|
||||||
|
purpose.
|
||||||
|
|
||||||
|
This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
|
||||||
|
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
|
||||||
|
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
|
||||||
|
Custom logging format and handler can be set manually.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not isinstance(sequences, (bytearray, bytes)):
|
||||||
|
raise TypeError(
|
||||||
|
"Expected object of type bytes or bytearray, got: {0}".format(
|
||||||
|
type(sequences)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if explain:
|
||||||
|
previous_logger_level: int = logger.level
|
||||||
|
logger.addHandler(explain_handler)
|
||||||
|
logger.setLevel(TRACE)
|
||||||
|
|
||||||
|
length: int = len(sequences)
|
||||||
|
|
||||||
|
if length == 0:
|
||||||
|
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level or logging.WARNING)
|
||||||
|
return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
|
||||||
|
|
||||||
|
if cp_isolation is not None:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"cp_isolation is set. use this flag for debugging purpose. "
|
||||||
|
"limited list of encoding allowed : %s.",
|
||||||
|
", ".join(cp_isolation),
|
||||||
|
)
|
||||||
|
cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
|
||||||
|
else:
|
||||||
|
cp_isolation = []
|
||||||
|
|
||||||
|
if cp_exclusion is not None:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"cp_exclusion is set. use this flag for debugging purpose. "
|
||||||
|
"limited list of encoding excluded : %s.",
|
||||||
|
", ".join(cp_exclusion),
|
||||||
|
)
|
||||||
|
cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
|
||||||
|
else:
|
||||||
|
cp_exclusion = []
|
||||||
|
|
||||||
|
if length <= (chunk_size * steps):
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
|
||||||
|
steps,
|
||||||
|
chunk_size,
|
||||||
|
length,
|
||||||
|
)
|
||||||
|
steps = 1
|
||||||
|
chunk_size = length
|
||||||
|
|
||||||
|
if steps > 1 and length / steps < chunk_size:
|
||||||
|
chunk_size = int(length / steps)
|
||||||
|
|
||||||
|
is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
|
||||||
|
is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
|
||||||
|
|
||||||
|
if is_too_small_sequence:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
|
||||||
|
length
|
||||||
|
),
|
||||||
|
)
|
||||||
|
elif is_too_large_sequence:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
|
||||||
|
length
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
prioritized_encodings: List[str] = []
|
||||||
|
|
||||||
|
specified_encoding: Optional[str] = (
|
||||||
|
any_specified_encoding(sequences) if preemptive_behaviour else None
|
||||||
|
)
|
||||||
|
|
||||||
|
if specified_encoding is not None:
|
||||||
|
prioritized_encodings.append(specified_encoding)
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Detected declarative mark in sequence. Priority +1 given for %s.",
|
||||||
|
specified_encoding,
|
||||||
|
)
|
||||||
|
|
||||||
|
tested: Set[str] = set()
|
||||||
|
tested_but_hard_failure: List[str] = []
|
||||||
|
tested_but_soft_failure: List[str] = []
|
||||||
|
|
||||||
|
fallback_ascii: Optional[CharsetMatch] = None
|
||||||
|
fallback_u8: Optional[CharsetMatch] = None
|
||||||
|
fallback_specified: Optional[CharsetMatch] = None
|
||||||
|
|
||||||
|
results: CharsetMatches = CharsetMatches()
|
||||||
|
|
||||||
|
early_stop_results: CharsetMatches = CharsetMatches()
|
||||||
|
|
||||||
|
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
|
||||||
|
|
||||||
|
if sig_encoding is not None:
|
||||||
|
prioritized_encodings.append(sig_encoding)
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
|
||||||
|
len(sig_payload),
|
||||||
|
sig_encoding,
|
||||||
|
)
|
||||||
|
|
||||||
|
prioritized_encodings.append("ascii")
|
||||||
|
|
||||||
|
if "utf_8" not in prioritized_encodings:
|
||||||
|
prioritized_encodings.append("utf_8")
|
||||||
|
|
||||||
|
for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
|
||||||
|
if cp_isolation and encoding_iana not in cp_isolation:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if cp_exclusion and encoding_iana in cp_exclusion:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if encoding_iana in tested:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tested.add(encoding_iana)
|
||||||
|
|
||||||
|
decoded_payload: Optional[str] = None
|
||||||
|
bom_or_sig_available: bool = sig_encoding == encoding_iana
|
||||||
|
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
|
||||||
|
encoding_iana
|
||||||
|
)
|
||||||
|
|
||||||
|
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
if encoding_iana in {"utf_7"} and not bom_or_sig_available:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
|
||||||
|
except (ModuleNotFoundError, ImportError):
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Encoding %s does not provide an IncrementalDecoder",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
if is_too_large_sequence and is_multi_byte_decoder is False:
|
||||||
|
str(
|
||||||
|
(
|
||||||
|
sequences[: int(50e4)]
|
||||||
|
if strip_sig_or_bom is False
|
||||||
|
else sequences[len(sig_payload) : int(50e4)]
|
||||||
|
),
|
||||||
|
encoding=encoding_iana,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
decoded_payload = str(
|
||||||
|
(
|
||||||
|
sequences
|
||||||
|
if strip_sig_or_bom is False
|
||||||
|
else sequences[len(sig_payload) :]
|
||||||
|
),
|
||||||
|
encoding=encoding_iana,
|
||||||
|
)
|
||||||
|
except (UnicodeDecodeError, LookupError) as e:
|
||||||
|
if not isinstance(e, LookupError):
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Code page %s does not fit given bytes sequence at ALL. %s",
|
||||||
|
encoding_iana,
|
||||||
|
str(e),
|
||||||
|
)
|
||||||
|
tested_but_hard_failure.append(encoding_iana)
|
||||||
|
continue
|
||||||
|
|
||||||
|
similar_soft_failure_test: bool = False
|
||||||
|
|
||||||
|
for encoding_soft_failed in tested_but_soft_failure:
|
||||||
|
if is_cp_similar(encoding_iana, encoding_soft_failed):
|
||||||
|
similar_soft_failure_test = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if similar_soft_failure_test:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
|
||||||
|
encoding_iana,
|
||||||
|
encoding_soft_failed,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
r_ = range(
|
||||||
|
0 if not bom_or_sig_available else len(sig_payload),
|
||||||
|
length,
|
||||||
|
int(length / steps),
|
||||||
|
)
|
||||||
|
|
||||||
|
multi_byte_bonus: bool = (
|
||||||
|
is_multi_byte_decoder
|
||||||
|
and decoded_payload is not None
|
||||||
|
and len(decoded_payload) < length
|
||||||
|
)
|
||||||
|
|
||||||
|
if multi_byte_bonus:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Code page %s is a multi byte encoding table and it appear that at least one character "
|
||||||
|
"was encoded using n-bytes.",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
|
||||||
|
max_chunk_gave_up: int = int(len(r_) / 4)
|
||||||
|
|
||||||
|
max_chunk_gave_up = max(max_chunk_gave_up, 2)
|
||||||
|
early_stop_count: int = 0
|
||||||
|
lazy_str_hard_failure = False
|
||||||
|
|
||||||
|
md_chunks: List[str] = []
|
||||||
|
md_ratios = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
for chunk in cut_sequence_chunks(
|
||||||
|
sequences,
|
||||||
|
encoding_iana,
|
||||||
|
r_,
|
||||||
|
chunk_size,
|
||||||
|
bom_or_sig_available,
|
||||||
|
strip_sig_or_bom,
|
||||||
|
sig_payload,
|
||||||
|
is_multi_byte_decoder,
|
||||||
|
decoded_payload,
|
||||||
|
):
|
||||||
|
md_chunks.append(chunk)
|
||||||
|
|
||||||
|
md_ratios.append(
|
||||||
|
mess_ratio(
|
||||||
|
chunk,
|
||||||
|
threshold,
|
||||||
|
explain is True and 1 <= len(cp_isolation) <= 2,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if md_ratios[-1] >= threshold:
|
||||||
|
early_stop_count += 1
|
||||||
|
|
||||||
|
if (early_stop_count >= max_chunk_gave_up) or (
|
||||||
|
bom_or_sig_available and strip_sig_or_bom is False
|
||||||
|
):
|
||||||
|
break
|
||||||
|
except (
|
||||||
|
UnicodeDecodeError
|
||||||
|
) as e: # Lazy str loading may have missed something there
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
|
||||||
|
encoding_iana,
|
||||||
|
str(e),
|
||||||
|
)
|
||||||
|
early_stop_count = max_chunk_gave_up
|
||||||
|
lazy_str_hard_failure = True
|
||||||
|
|
||||||
|
# We might want to check the sequence again with the whole content
|
||||||
|
# Only if initial MD tests passes
|
||||||
|
if (
|
||||||
|
not lazy_str_hard_failure
|
||||||
|
and is_too_large_sequence
|
||||||
|
and not is_multi_byte_decoder
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
sequences[int(50e3) :].decode(encoding_iana, errors="strict")
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
|
||||||
|
encoding_iana,
|
||||||
|
str(e),
|
||||||
|
)
|
||||||
|
tested_but_hard_failure.append(encoding_iana)
|
||||||
|
continue
|
||||||
|
|
||||||
|
mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
|
||||||
|
if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
|
||||||
|
tested_but_soft_failure.append(encoding_iana)
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"%s was excluded because of initial chaos probing. Gave up %i time(s). "
|
||||||
|
"Computed mean chaos is %f %%.",
|
||||||
|
encoding_iana,
|
||||||
|
early_stop_count,
|
||||||
|
round(mean_mess_ratio * 100, ndigits=3),
|
||||||
|
)
|
||||||
|
# Preparing those fallbacks in case we got nothing.
|
||||||
|
if (
|
||||||
|
enable_fallback
|
||||||
|
and encoding_iana in ["ascii", "utf_8", specified_encoding]
|
||||||
|
and not lazy_str_hard_failure
|
||||||
|
):
|
||||||
|
fallback_entry = CharsetMatch(
|
||||||
|
sequences,
|
||||||
|
encoding_iana,
|
||||||
|
threshold,
|
||||||
|
False,
|
||||||
|
[],
|
||||||
|
decoded_payload,
|
||||||
|
preemptive_declaration=specified_encoding,
|
||||||
|
)
|
||||||
|
if encoding_iana == specified_encoding:
|
||||||
|
fallback_specified = fallback_entry
|
||||||
|
elif encoding_iana == "ascii":
|
||||||
|
fallback_ascii = fallback_entry
|
||||||
|
else:
|
||||||
|
fallback_u8 = fallback_entry
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"%s passed initial chaos probing. Mean measured chaos is %f %%",
|
||||||
|
encoding_iana,
|
||||||
|
round(mean_mess_ratio * 100, ndigits=3),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not is_multi_byte_decoder:
|
||||||
|
target_languages: List[str] = encoding_languages(encoding_iana)
|
||||||
|
else:
|
||||||
|
target_languages = mb_encoding_languages(encoding_iana)
|
||||||
|
|
||||||
|
if target_languages:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"{} should target any language(s) of {}".format(
|
||||||
|
encoding_iana, str(target_languages)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
cd_ratios = []
|
||||||
|
|
||||||
|
# We shall skip the CD when its about ASCII
|
||||||
|
# Most of the time its not relevant to run "language-detection" on it.
|
||||||
|
if encoding_iana != "ascii":
|
||||||
|
for chunk in md_chunks:
|
||||||
|
chunk_languages = coherence_ratio(
|
||||||
|
chunk,
|
||||||
|
language_threshold,
|
||||||
|
",".join(target_languages) if target_languages else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
cd_ratios.append(chunk_languages)
|
||||||
|
|
||||||
|
cd_ratios_merged = merge_coherence_ratios(cd_ratios)
|
||||||
|
|
||||||
|
if cd_ratios_merged:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"We detected language {} using {}".format(
|
||||||
|
cd_ratios_merged, encoding_iana
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
current_match = CharsetMatch(
|
||||||
|
sequences,
|
||||||
|
encoding_iana,
|
||||||
|
mean_mess_ratio,
|
||||||
|
bom_or_sig_available,
|
||||||
|
cd_ratios_merged,
|
||||||
|
(
|
||||||
|
decoded_payload
|
||||||
|
if (
|
||||||
|
is_too_large_sequence is False
|
||||||
|
or encoding_iana in [specified_encoding, "ascii", "utf_8"]
|
||||||
|
)
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
preemptive_declaration=specified_encoding,
|
||||||
|
)
|
||||||
|
|
||||||
|
results.append(current_match)
|
||||||
|
|
||||||
|
if (
|
||||||
|
encoding_iana in [specified_encoding, "ascii", "utf_8"]
|
||||||
|
and mean_mess_ratio < 0.1
|
||||||
|
):
|
||||||
|
# If md says nothing to worry about, then... stop immediately!
|
||||||
|
if mean_mess_ratio == 0.0:
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: %s is most likely the one.",
|
||||||
|
current_match.encoding,
|
||||||
|
)
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level)
|
||||||
|
return CharsetMatches([current_match])
|
||||||
|
|
||||||
|
early_stop_results.append(current_match)
|
||||||
|
|
||||||
|
if (
|
||||||
|
len(early_stop_results)
|
||||||
|
and (specified_encoding is None or specified_encoding in tested)
|
||||||
|
and "ascii" in tested
|
||||||
|
and "utf_8" in tested
|
||||||
|
):
|
||||||
|
probable_result: CharsetMatch = early_stop_results.best() # type: ignore[assignment]
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: %s is most likely the one.",
|
||||||
|
probable_result.encoding,
|
||||||
|
)
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level)
|
||||||
|
|
||||||
|
return CharsetMatches([probable_result])
|
||||||
|
|
||||||
|
if encoding_iana == sig_encoding:
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
|
||||||
|
"the beginning of the sequence.",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level)
|
||||||
|
return CharsetMatches([results[encoding_iana]])
|
||||||
|
|
||||||
|
if len(results) == 0:
|
||||||
|
if fallback_u8 or fallback_ascii or fallback_specified:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
|
||||||
|
)
|
||||||
|
|
||||||
|
if fallback_specified:
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: %s will be used as a fallback match",
|
||||||
|
fallback_specified.encoding,
|
||||||
|
)
|
||||||
|
results.append(fallback_specified)
|
||||||
|
elif (
|
||||||
|
(fallback_u8 and fallback_ascii is None)
|
||||||
|
or (
|
||||||
|
fallback_u8
|
||||||
|
and fallback_ascii
|
||||||
|
and fallback_u8.fingerprint != fallback_ascii.fingerprint
|
||||||
|
)
|
||||||
|
or (fallback_u8 is not None)
|
||||||
|
):
|
||||||
|
logger.debug("Encoding detection: utf_8 will be used as a fallback match")
|
||||||
|
results.append(fallback_u8)
|
||||||
|
elif fallback_ascii:
|
||||||
|
logger.debug("Encoding detection: ascii will be used as a fallback match")
|
||||||
|
results.append(fallback_ascii)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
|
||||||
|
results.best().encoding, # type: ignore
|
||||||
|
len(results) - 1,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug("Encoding detection: Unable to determine any suitable charset.")
|
||||||
|
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def from_fp(
|
||||||
|
fp: BinaryIO,
|
||||||
|
steps: int = 5,
|
||||||
|
chunk_size: int = 512,
|
||||||
|
threshold: float = 0.20,
|
||||||
|
cp_isolation: Optional[List[str]] = None,
|
||||||
|
cp_exclusion: Optional[List[str]] = None,
|
||||||
|
preemptive_behaviour: bool = True,
|
||||||
|
explain: bool = False,
|
||||||
|
language_threshold: float = 0.1,
|
||||||
|
enable_fallback: bool = True,
|
||||||
|
) -> CharsetMatches:
|
||||||
|
"""
|
||||||
|
Same thing than the function from_bytes but using a file pointer that is already ready.
|
||||||
|
Will not close the file pointer.
|
||||||
|
"""
|
||||||
|
return from_bytes(
|
||||||
|
fp.read(),
|
||||||
|
steps,
|
||||||
|
chunk_size,
|
||||||
|
threshold,
|
||||||
|
cp_isolation,
|
||||||
|
cp_exclusion,
|
||||||
|
preemptive_behaviour,
|
||||||
|
explain,
|
||||||
|
language_threshold,
|
||||||
|
enable_fallback,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def from_path(
|
||||||
|
path: Union[str, bytes, PathLike], # type: ignore[type-arg]
|
||||||
|
steps: int = 5,
|
||||||
|
chunk_size: int = 512,
|
||||||
|
threshold: float = 0.20,
|
||||||
|
cp_isolation: Optional[List[str]] = None,
|
||||||
|
cp_exclusion: Optional[List[str]] = None,
|
||||||
|
preemptive_behaviour: bool = True,
|
||||||
|
explain: bool = False,
|
||||||
|
language_threshold: float = 0.1,
|
||||||
|
enable_fallback: bool = True,
|
||||||
|
) -> CharsetMatches:
|
||||||
|
"""
|
||||||
|
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
|
||||||
|
Can raise IOError.
|
||||||
|
"""
|
||||||
|
with open(path, "rb") as fp:
|
||||||
|
return from_fp(
|
||||||
|
fp,
|
||||||
|
steps,
|
||||||
|
chunk_size,
|
||||||
|
threshold,
|
||||||
|
cp_isolation,
|
||||||
|
cp_exclusion,
|
||||||
|
preemptive_behaviour,
|
||||||
|
explain,
|
||||||
|
language_threshold,
|
||||||
|
enable_fallback,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_binary(
|
||||||
|
fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type: ignore[type-arg]
|
||||||
|
steps: int = 5,
|
||||||
|
chunk_size: int = 512,
|
||||||
|
threshold: float = 0.20,
|
||||||
|
cp_isolation: Optional[List[str]] = None,
|
||||||
|
cp_exclusion: Optional[List[str]] = None,
|
||||||
|
preemptive_behaviour: bool = True,
|
||||||
|
explain: bool = False,
|
||||||
|
language_threshold: float = 0.1,
|
||||||
|
enable_fallback: bool = False,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
|
||||||
|
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
|
||||||
|
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
|
||||||
|
"""
|
||||||
|
if isinstance(fp_or_path_or_payload, (str, PathLike)):
|
||||||
|
guesses = from_path(
|
||||||
|
fp_or_path_or_payload,
|
||||||
|
steps=steps,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
threshold=threshold,
|
||||||
|
cp_isolation=cp_isolation,
|
||||||
|
cp_exclusion=cp_exclusion,
|
||||||
|
preemptive_behaviour=preemptive_behaviour,
|
||||||
|
explain=explain,
|
||||||
|
language_threshold=language_threshold,
|
||||||
|
enable_fallback=enable_fallback,
|
||||||
|
)
|
||||||
|
elif isinstance(
|
||||||
|
fp_or_path_or_payload,
|
||||||
|
(
|
||||||
|
bytes,
|
||||||
|
bytearray,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
guesses = from_bytes(
|
||||||
|
fp_or_path_or_payload,
|
||||||
|
steps=steps,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
threshold=threshold,
|
||||||
|
cp_isolation=cp_isolation,
|
||||||
|
cp_exclusion=cp_exclusion,
|
||||||
|
preemptive_behaviour=preemptive_behaviour,
|
||||||
|
explain=explain,
|
||||||
|
language_threshold=language_threshold,
|
||||||
|
enable_fallback=enable_fallback,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
guesses = from_fp(
|
||||||
|
fp_or_path_or_payload,
|
||||||
|
steps=steps,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
threshold=threshold,
|
||||||
|
cp_isolation=cp_isolation,
|
||||||
|
cp_exclusion=cp_exclusion,
|
||||||
|
preemptive_behaviour=preemptive_behaviour,
|
||||||
|
explain=explain,
|
||||||
|
language_threshold=language_threshold,
|
||||||
|
enable_fallback=enable_fallback,
|
||||||
|
)
|
||||||
|
|
||||||
|
return not guesses
|
395
env/lib/python3.12/site-packages/charset_normalizer/cd.py
vendored
Normal file
395
env/lib/python3.12/site-packages/charset_normalizer/cd.py
vendored
Normal file
@ -0,0 +1,395 @@
|
|||||||
|
import importlib
|
||||||
|
from codecs import IncrementalDecoder
|
||||||
|
from collections import Counter
|
||||||
|
from functools import lru_cache
|
||||||
|
from typing import Counter as TypeCounter, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from .constant import (
|
||||||
|
FREQUENCIES,
|
||||||
|
KO_NAMES,
|
||||||
|
LANGUAGE_SUPPORTED_COUNT,
|
||||||
|
TOO_SMALL_SEQUENCE,
|
||||||
|
ZH_NAMES,
|
||||||
|
)
|
||||||
|
from .md import is_suspiciously_successive_range
|
||||||
|
from .models import CoherenceMatches
|
||||||
|
from .utils import (
|
||||||
|
is_accentuated,
|
||||||
|
is_latin,
|
||||||
|
is_multi_byte_encoding,
|
||||||
|
is_unicode_range_secondary,
|
||||||
|
unicode_range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def encoding_unicode_range(iana_name: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return associated unicode ranges in a single byte code page.
|
||||||
|
"""
|
||||||
|
if is_multi_byte_encoding(iana_name):
|
||||||
|
raise IOError("Function not supported on multi-byte code page")
|
||||||
|
|
||||||
|
decoder = importlib.import_module(
|
||||||
|
"encodings.{}".format(iana_name)
|
||||||
|
).IncrementalDecoder
|
||||||
|
|
||||||
|
p: IncrementalDecoder = decoder(errors="ignore")
|
||||||
|
seen_ranges: Dict[str, int] = {}
|
||||||
|
character_count: int = 0
|
||||||
|
|
||||||
|
for i in range(0x40, 0xFF):
|
||||||
|
chunk: str = p.decode(bytes([i]))
|
||||||
|
|
||||||
|
if chunk:
|
||||||
|
character_range: Optional[str] = unicode_range(chunk)
|
||||||
|
|
||||||
|
if character_range is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if is_unicode_range_secondary(character_range) is False:
|
||||||
|
if character_range not in seen_ranges:
|
||||||
|
seen_ranges[character_range] = 0
|
||||||
|
seen_ranges[character_range] += 1
|
||||||
|
character_count += 1
|
||||||
|
|
||||||
|
return sorted(
|
||||||
|
[
|
||||||
|
character_range
|
||||||
|
for character_range in seen_ranges
|
||||||
|
if seen_ranges[character_range] / character_count >= 0.15
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def unicode_range_languages(primary_range: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return inferred languages used with a unicode range.
|
||||||
|
"""
|
||||||
|
languages: List[str] = []
|
||||||
|
|
||||||
|
for language, characters in FREQUENCIES.items():
|
||||||
|
for character in characters:
|
||||||
|
if unicode_range(character) == primary_range:
|
||||||
|
languages.append(language)
|
||||||
|
break
|
||||||
|
|
||||||
|
return languages
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache()
|
||||||
|
def encoding_languages(iana_name: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Single-byte encoding language association. Some code page are heavily linked to particular language(s).
|
||||||
|
This function does the correspondence.
|
||||||
|
"""
|
||||||
|
unicode_ranges: List[str] = encoding_unicode_range(iana_name)
|
||||||
|
primary_range: Optional[str] = None
|
||||||
|
|
||||||
|
for specified_range in unicode_ranges:
|
||||||
|
if "Latin" not in specified_range:
|
||||||
|
primary_range = specified_range
|
||||||
|
break
|
||||||
|
|
||||||
|
if primary_range is None:
|
||||||
|
return ["Latin Based"]
|
||||||
|
|
||||||
|
return unicode_range_languages(primary_range)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache()
|
||||||
|
def mb_encoding_languages(iana_name: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
|
||||||
|
This function does the correspondence.
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
iana_name.startswith("shift_")
|
||||||
|
or iana_name.startswith("iso2022_jp")
|
||||||
|
or iana_name.startswith("euc_j")
|
||||||
|
or iana_name == "cp932"
|
||||||
|
):
|
||||||
|
return ["Japanese"]
|
||||||
|
if iana_name.startswith("gb") or iana_name in ZH_NAMES:
|
||||||
|
return ["Chinese"]
|
||||||
|
if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
|
||||||
|
return ["Korean"]
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
|
||||||
|
def get_target_features(language: str) -> Tuple[bool, bool]:
|
||||||
|
"""
|
||||||
|
Determine main aspects from a supported language if it contains accents and if is pure Latin.
|
||||||
|
"""
|
||||||
|
target_have_accents: bool = False
|
||||||
|
target_pure_latin: bool = True
|
||||||
|
|
||||||
|
for character in FREQUENCIES[language]:
|
||||||
|
if not target_have_accents and is_accentuated(character):
|
||||||
|
target_have_accents = True
|
||||||
|
if target_pure_latin and is_latin(character) is False:
|
||||||
|
target_pure_latin = False
|
||||||
|
|
||||||
|
return target_have_accents, target_pure_latin
|
||||||
|
|
||||||
|
|
||||||
|
def alphabet_languages(
|
||||||
|
characters: List[str], ignore_non_latin: bool = False
|
||||||
|
) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return associated languages associated to given characters.
|
||||||
|
"""
|
||||||
|
languages: List[Tuple[str, float]] = []
|
||||||
|
|
||||||
|
source_have_accents = any(is_accentuated(character) for character in characters)
|
||||||
|
|
||||||
|
for language, language_characters in FREQUENCIES.items():
|
||||||
|
target_have_accents, target_pure_latin = get_target_features(language)
|
||||||
|
|
||||||
|
if ignore_non_latin and target_pure_latin is False:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if target_have_accents is False and source_have_accents:
|
||||||
|
continue
|
||||||
|
|
||||||
|
character_count: int = len(language_characters)
|
||||||
|
|
||||||
|
character_match_count: int = len(
|
||||||
|
[c for c in language_characters if c in characters]
|
||||||
|
)
|
||||||
|
|
||||||
|
ratio: float = character_match_count / character_count
|
||||||
|
|
||||||
|
if ratio >= 0.2:
|
||||||
|
languages.append((language, ratio))
|
||||||
|
|
||||||
|
languages = sorted(languages, key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
return [compatible_language[0] for compatible_language in languages]
|
||||||
|
|
||||||
|
|
||||||
|
def characters_popularity_compare(
|
||||||
|
language: str, ordered_characters: List[str]
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
|
||||||
|
The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
|
||||||
|
Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
|
||||||
|
"""
|
||||||
|
if language not in FREQUENCIES:
|
||||||
|
raise ValueError("{} not available".format(language))
|
||||||
|
|
||||||
|
character_approved_count: int = 0
|
||||||
|
FREQUENCIES_language_set = set(FREQUENCIES[language])
|
||||||
|
|
||||||
|
ordered_characters_count: int = len(ordered_characters)
|
||||||
|
target_language_characters_count: int = len(FREQUENCIES[language])
|
||||||
|
|
||||||
|
large_alphabet: bool = target_language_characters_count > 26
|
||||||
|
|
||||||
|
for character, character_rank in zip(
|
||||||
|
ordered_characters, range(0, ordered_characters_count)
|
||||||
|
):
|
||||||
|
if character not in FREQUENCIES_language_set:
|
||||||
|
continue
|
||||||
|
|
||||||
|
character_rank_in_language: int = FREQUENCIES[language].index(character)
|
||||||
|
expected_projection_ratio: float = (
|
||||||
|
target_language_characters_count / ordered_characters_count
|
||||||
|
)
|
||||||
|
character_rank_projection: int = int(character_rank * expected_projection_ratio)
|
||||||
|
|
||||||
|
if (
|
||||||
|
large_alphabet is False
|
||||||
|
and abs(character_rank_projection - character_rank_in_language) > 4
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if (
|
||||||
|
large_alphabet is True
|
||||||
|
and abs(character_rank_projection - character_rank_in_language)
|
||||||
|
< target_language_characters_count / 3
|
||||||
|
):
|
||||||
|
character_approved_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
characters_before_source: List[str] = FREQUENCIES[language][
|
||||||
|
0:character_rank_in_language
|
||||||
|
]
|
||||||
|
characters_after_source: List[str] = FREQUENCIES[language][
|
||||||
|
character_rank_in_language:
|
||||||
|
]
|
||||||
|
characters_before: List[str] = ordered_characters[0:character_rank]
|
||||||
|
characters_after: List[str] = ordered_characters[character_rank:]
|
||||||
|
|
||||||
|
before_match_count: int = len(
|
||||||
|
set(characters_before) & set(characters_before_source)
|
||||||
|
)
|
||||||
|
|
||||||
|
after_match_count: int = len(
|
||||||
|
set(characters_after) & set(characters_after_source)
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(characters_before_source) == 0 and before_match_count <= 4:
|
||||||
|
character_approved_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(characters_after_source) == 0 and after_match_count <= 4:
|
||||||
|
character_approved_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if (
|
||||||
|
before_match_count / len(characters_before_source) >= 0.4
|
||||||
|
or after_match_count / len(characters_after_source) >= 0.4
|
||||||
|
):
|
||||||
|
character_approved_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
return character_approved_count / len(ordered_characters)
|
||||||
|
|
||||||
|
|
||||||
|
def alpha_unicode_split(decoded_sequence: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
|
||||||
|
Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
|
||||||
|
One containing the latin letters and the other hebrew.
|
||||||
|
"""
|
||||||
|
layers: Dict[str, str] = {}
|
||||||
|
|
||||||
|
for character in decoded_sequence:
|
||||||
|
if character.isalpha() is False:
|
||||||
|
continue
|
||||||
|
|
||||||
|
character_range: Optional[str] = unicode_range(character)
|
||||||
|
|
||||||
|
if character_range is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
layer_target_range: Optional[str] = None
|
||||||
|
|
||||||
|
for discovered_range in layers:
|
||||||
|
if (
|
||||||
|
is_suspiciously_successive_range(discovered_range, character_range)
|
||||||
|
is False
|
||||||
|
):
|
||||||
|
layer_target_range = discovered_range
|
||||||
|
break
|
||||||
|
|
||||||
|
if layer_target_range is None:
|
||||||
|
layer_target_range = character_range
|
||||||
|
|
||||||
|
if layer_target_range not in layers:
|
||||||
|
layers[layer_target_range] = character.lower()
|
||||||
|
continue
|
||||||
|
|
||||||
|
layers[layer_target_range] += character.lower()
|
||||||
|
|
||||||
|
return list(layers.values())
|
||||||
|
|
||||||
|
|
||||||
|
def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
|
||||||
|
"""
|
||||||
|
This function merge results previously given by the function coherence_ratio.
|
||||||
|
The return type is the same as coherence_ratio.
|
||||||
|
"""
|
||||||
|
per_language_ratios: Dict[str, List[float]] = {}
|
||||||
|
for result in results:
|
||||||
|
for sub_result in result:
|
||||||
|
language, ratio = sub_result
|
||||||
|
if language not in per_language_ratios:
|
||||||
|
per_language_ratios[language] = [ratio]
|
||||||
|
continue
|
||||||
|
per_language_ratios[language].append(ratio)
|
||||||
|
|
||||||
|
merge = [
|
||||||
|
(
|
||||||
|
language,
|
||||||
|
round(
|
||||||
|
sum(per_language_ratios[language]) / len(per_language_ratios[language]),
|
||||||
|
4,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
for language in per_language_ratios
|
||||||
|
]
|
||||||
|
|
||||||
|
return sorted(merge, key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
|
||||||
|
"""
|
||||||
|
We shall NOT return "English—" in CoherenceMatches because it is an alternative
|
||||||
|
of "English". This function only keeps the best match and remove the em-dash in it.
|
||||||
|
"""
|
||||||
|
index_results: Dict[str, List[float]] = dict()
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
language, ratio = result
|
||||||
|
no_em_name: str = language.replace("—", "")
|
||||||
|
|
||||||
|
if no_em_name not in index_results:
|
||||||
|
index_results[no_em_name] = []
|
||||||
|
|
||||||
|
index_results[no_em_name].append(ratio)
|
||||||
|
|
||||||
|
if any(len(index_results[e]) > 1 for e in index_results):
|
||||||
|
filtered_results: CoherenceMatches = []
|
||||||
|
|
||||||
|
for language in index_results:
|
||||||
|
filtered_results.append((language, max(index_results[language])))
|
||||||
|
|
||||||
|
return filtered_results
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=2048)
|
||||||
|
def coherence_ratio(
|
||||||
|
decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
|
||||||
|
) -> CoherenceMatches:
|
||||||
|
"""
|
||||||
|
Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
|
||||||
|
A layer = Character extraction by alphabets/ranges.
|
||||||
|
"""
|
||||||
|
|
||||||
|
results: List[Tuple[str, float]] = []
|
||||||
|
ignore_non_latin: bool = False
|
||||||
|
|
||||||
|
sufficient_match_count: int = 0
|
||||||
|
|
||||||
|
lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
|
||||||
|
if "Latin Based" in lg_inclusion_list:
|
||||||
|
ignore_non_latin = True
|
||||||
|
lg_inclusion_list.remove("Latin Based")
|
||||||
|
|
||||||
|
for layer in alpha_unicode_split(decoded_sequence):
|
||||||
|
sequence_frequencies: TypeCounter[str] = Counter(layer)
|
||||||
|
most_common = sequence_frequencies.most_common()
|
||||||
|
|
||||||
|
character_count: int = sum(o for c, o in most_common)
|
||||||
|
|
||||||
|
if character_count <= TOO_SMALL_SEQUENCE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
popular_character_ordered: List[str] = [c for c, o in most_common]
|
||||||
|
|
||||||
|
for language in lg_inclusion_list or alphabet_languages(
|
||||||
|
popular_character_ordered, ignore_non_latin
|
||||||
|
):
|
||||||
|
ratio: float = characters_popularity_compare(
|
||||||
|
language, popular_character_ordered
|
||||||
|
)
|
||||||
|
|
||||||
|
if ratio < threshold:
|
||||||
|
continue
|
||||||
|
elif ratio >= 0.8:
|
||||||
|
sufficient_match_count += 1
|
||||||
|
|
||||||
|
results.append((language, round(ratio, 4)))
|
||||||
|
|
||||||
|
if sufficient_match_count >= 3:
|
||||||
|
break
|
||||||
|
|
||||||
|
return sorted(
|
||||||
|
filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
|
||||||
|
)
|
6
env/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py
vendored
Normal file
6
env/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from .__main__ import cli_detect, query_yes_no
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
"cli_detect",
|
||||||
|
"query_yes_no",
|
||||||
|
)
|
320
env/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py
vendored
Normal file
320
env/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py
vendored
Normal file
@ -0,0 +1,320 @@
|
|||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from json import dumps
|
||||||
|
from os.path import abspath, basename, dirname, join, realpath
|
||||||
|
from platform import python_version
|
||||||
|
from typing import List, Optional
|
||||||
|
from unicodedata import unidata_version
|
||||||
|
|
||||||
|
import charset_normalizer.md as md_module
|
||||||
|
from charset_normalizer import from_fp
|
||||||
|
from charset_normalizer.models import CliDetectionResult
|
||||||
|
from charset_normalizer.version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
def query_yes_no(question: str, default: str = "yes") -> bool:
|
||||||
|
"""Ask a yes/no question via input() and return their answer.
|
||||||
|
|
||||||
|
"question" is a string that is presented to the user.
|
||||||
|
"default" is the presumed answer if the user just hits <Enter>.
|
||||||
|
It must be "yes" (the default), "no" or None (meaning
|
||||||
|
an answer is required of the user).
|
||||||
|
|
||||||
|
The "answer" return value is True for "yes" or False for "no".
|
||||||
|
|
||||||
|
Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
|
||||||
|
"""
|
||||||
|
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
|
||||||
|
if default is None:
|
||||||
|
prompt = " [y/n] "
|
||||||
|
elif default == "yes":
|
||||||
|
prompt = " [Y/n] "
|
||||||
|
elif default == "no":
|
||||||
|
prompt = " [y/N] "
|
||||||
|
else:
|
||||||
|
raise ValueError("invalid default answer: '%s'" % default)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
sys.stdout.write(question + prompt)
|
||||||
|
choice = input().lower()
|
||||||
|
if default is not None and choice == "":
|
||||||
|
return valid[default]
|
||||||
|
elif choice in valid:
|
||||||
|
return valid[choice]
|
||||||
|
else:
|
||||||
|
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
|
||||||
|
|
||||||
|
|
||||||
|
def cli_detect(argv: Optional[List[str]] = None) -> int:
|
||||||
|
"""
|
||||||
|
CLI assistant using ARGV and ArgumentParser
|
||||||
|
:param argv:
|
||||||
|
:return: 0 if everything is fine, anything else equal trouble
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="The Real First Universal Charset Detector. "
|
||||||
|
"Discover originating encoding used on text file. "
|
||||||
|
"Normalize text to unicode."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-v",
|
||||||
|
"--verbose",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="verbose",
|
||||||
|
help="Display complementary information about file if any. "
|
||||||
|
"Stdout will contain logs about the detection process.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-a",
|
||||||
|
"--with-alternative",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="alternatives",
|
||||||
|
help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-n",
|
||||||
|
"--normalize",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="normalize",
|
||||||
|
help="Permit to normalize input file. If not set, program does not write anything.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-m",
|
||||||
|
"--minimal",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="minimal",
|
||||||
|
help="Only output the charset detected to STDOUT. Disabling JSON output.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-r",
|
||||||
|
"--replace",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="replace",
|
||||||
|
help="Replace file when trying to normalize it instead of creating a new one.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-f",
|
||||||
|
"--force",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="force",
|
||||||
|
help="Replace file without asking if you are sure, use this flag with caution.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-i",
|
||||||
|
"--no-preemptive",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="no_preemptive",
|
||||||
|
help="Disable looking at a charset declaration to hint the detector.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-t",
|
||||||
|
"--threshold",
|
||||||
|
action="store",
|
||||||
|
default=0.2,
|
||||||
|
type=float,
|
||||||
|
dest="threshold",
|
||||||
|
help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--version",
|
||||||
|
action="version",
|
||||||
|
version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
|
||||||
|
__version__,
|
||||||
|
python_version(),
|
||||||
|
unidata_version,
|
||||||
|
"OFF" if md_module.__file__.lower().endswith(".py") else "ON",
|
||||||
|
),
|
||||||
|
help="Show version information and exit.",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
if args.replace is True and args.normalize is False:
|
||||||
|
if args.files:
|
||||||
|
for my_file in args.files:
|
||||||
|
my_file.close()
|
||||||
|
print("Use --replace in addition of --normalize only.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.force is True and args.replace is False:
|
||||||
|
if args.files:
|
||||||
|
for my_file in args.files:
|
||||||
|
my_file.close()
|
||||||
|
print("Use --force in addition of --replace only.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.threshold < 0.0 or args.threshold > 1.0:
|
||||||
|
if args.files:
|
||||||
|
for my_file in args.files:
|
||||||
|
my_file.close()
|
||||||
|
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
x_ = []
|
||||||
|
|
||||||
|
for my_file in args.files:
|
||||||
|
matches = from_fp(
|
||||||
|
my_file,
|
||||||
|
threshold=args.threshold,
|
||||||
|
explain=args.verbose,
|
||||||
|
preemptive_behaviour=args.no_preemptive is False,
|
||||||
|
)
|
||||||
|
|
||||||
|
best_guess = matches.best()
|
||||||
|
|
||||||
|
if best_guess is None:
|
||||||
|
print(
|
||||||
|
'Unable to identify originating encoding for "{}". {}'.format(
|
||||||
|
my_file.name,
|
||||||
|
(
|
||||||
|
"Maybe try increasing maximum amount of chaos."
|
||||||
|
if args.threshold < 1.0
|
||||||
|
else ""
|
||||||
|
),
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
x_.append(
|
||||||
|
CliDetectionResult(
|
||||||
|
abspath(my_file.name),
|
||||||
|
None,
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
"Unknown",
|
||||||
|
[],
|
||||||
|
False,
|
||||||
|
1.0,
|
||||||
|
0.0,
|
||||||
|
None,
|
||||||
|
True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
x_.append(
|
||||||
|
CliDetectionResult(
|
||||||
|
abspath(my_file.name),
|
||||||
|
best_guess.encoding,
|
||||||
|
best_guess.encoding_aliases,
|
||||||
|
[
|
||||||
|
cp
|
||||||
|
for cp in best_guess.could_be_from_charset
|
||||||
|
if cp != best_guess.encoding
|
||||||
|
],
|
||||||
|
best_guess.language,
|
||||||
|
best_guess.alphabets,
|
||||||
|
best_guess.bom,
|
||||||
|
best_guess.percent_chaos,
|
||||||
|
best_guess.percent_coherence,
|
||||||
|
None,
|
||||||
|
True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(matches) > 1 and args.alternatives:
|
||||||
|
for el in matches:
|
||||||
|
if el != best_guess:
|
||||||
|
x_.append(
|
||||||
|
CliDetectionResult(
|
||||||
|
abspath(my_file.name),
|
||||||
|
el.encoding,
|
||||||
|
el.encoding_aliases,
|
||||||
|
[
|
||||||
|
cp
|
||||||
|
for cp in el.could_be_from_charset
|
||||||
|
if cp != el.encoding
|
||||||
|
],
|
||||||
|
el.language,
|
||||||
|
el.alphabets,
|
||||||
|
el.bom,
|
||||||
|
el.percent_chaos,
|
||||||
|
el.percent_coherence,
|
||||||
|
None,
|
||||||
|
False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.normalize is True:
|
||||||
|
if best_guess.encoding.startswith("utf") is True:
|
||||||
|
print(
|
||||||
|
'"{}" file does not need to be normalized, as it already came from unicode.'.format(
|
||||||
|
my_file.name
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
continue
|
||||||
|
|
||||||
|
dir_path = dirname(realpath(my_file.name))
|
||||||
|
file_name = basename(realpath(my_file.name))
|
||||||
|
|
||||||
|
o_: List[str] = file_name.split(".")
|
||||||
|
|
||||||
|
if args.replace is False:
|
||||||
|
o_.insert(-1, best_guess.encoding)
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
elif (
|
||||||
|
args.force is False
|
||||||
|
and query_yes_no(
|
||||||
|
'Are you sure to normalize "{}" by replacing it ?'.format(
|
||||||
|
my_file.name
|
||||||
|
),
|
||||||
|
"no",
|
||||||
|
)
|
||||||
|
is False
|
||||||
|
):
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
x_[0].unicode_path = join(dir_path, ".".join(o_))
|
||||||
|
|
||||||
|
with open(x_[0].unicode_path, "wb") as fp:
|
||||||
|
fp.write(best_guess.output())
|
||||||
|
except IOError as e:
|
||||||
|
print(str(e), file=sys.stderr)
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
|
||||||
|
if args.minimal is False:
|
||||||
|
print(
|
||||||
|
dumps(
|
||||||
|
[el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
|
||||||
|
ensure_ascii=True,
|
||||||
|
indent=4,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
for my_file in args.files:
|
||||||
|
print(
|
||||||
|
", ".join(
|
||||||
|
[
|
||||||
|
el.encoding or "undefined"
|
||||||
|
for el in x_
|
||||||
|
if el.path == abspath(my_file.name)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cli_detect()
|
BIN
env/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
1997
env/lib/python3.12/site-packages/charset_normalizer/constant.py
vendored
Normal file
1997
env/lib/python3.12/site-packages/charset_normalizer/constant.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
65
env/lib/python3.12/site-packages/charset_normalizer/legacy.py
vendored
Normal file
65
env/lib/python3.12/site-packages/charset_normalizer/legacy.py
vendored
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
from warnings import warn
|
||||||
|
|
||||||
|
from .api import from_bytes
|
||||||
|
from .constant import CHARDET_CORRESPONDENCE
|
||||||
|
|
||||||
|
# TODO: remove this check when dropping Python 3.7 support
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
|
class ResultDict(TypedDict):
|
||||||
|
encoding: Optional[str]
|
||||||
|
language: str
|
||||||
|
confidence: Optional[float]
|
||||||
|
|
||||||
|
|
||||||
|
def detect(
|
||||||
|
byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
|
||||||
|
) -> ResultDict:
|
||||||
|
"""
|
||||||
|
chardet legacy method
|
||||||
|
Detect the encoding of the given byte string. It should be mostly backward-compatible.
|
||||||
|
Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
|
||||||
|
This function is deprecated and should be used to migrate your project easily, consult the documentation for
|
||||||
|
further information. Not planned for removal.
|
||||||
|
|
||||||
|
:param byte_str: The byte sequence to examine.
|
||||||
|
:param should_rename_legacy: Should we rename legacy encodings
|
||||||
|
to their more modern equivalents?
|
||||||
|
"""
|
||||||
|
if len(kwargs):
|
||||||
|
warn(
|
||||||
|
f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not isinstance(byte_str, (bytearray, bytes)):
|
||||||
|
raise TypeError( # pragma: nocover
|
||||||
|
"Expected object of type bytes or bytearray, got: "
|
||||||
|
"{0}".format(type(byte_str))
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(byte_str, bytearray):
|
||||||
|
byte_str = bytes(byte_str)
|
||||||
|
|
||||||
|
r = from_bytes(byte_str).best()
|
||||||
|
|
||||||
|
encoding = r.encoding if r is not None else None
|
||||||
|
language = r.language if r is not None and r.language != "Unknown" else ""
|
||||||
|
confidence = 1.0 - r.chaos if r is not None else None
|
||||||
|
|
||||||
|
# Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
|
||||||
|
# but chardet does return 'utf-8-sig' and it is a valid codec name.
|
||||||
|
if r is not None and encoding == "utf_8" and r.bom:
|
||||||
|
encoding += "_sig"
|
||||||
|
|
||||||
|
if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
|
||||||
|
encoding = CHARDET_CORRESPONDENCE[encoding]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"encoding": encoding,
|
||||||
|
"language": language,
|
||||||
|
"confidence": confidence,
|
||||||
|
}
|
BIN
env/lib/python3.12/site-packages/charset_normalizer/md.cpython-312-aarch64-linux-gnu.so
vendored
Executable file
BIN
env/lib/python3.12/site-packages/charset_normalizer/md.cpython-312-aarch64-linux-gnu.so
vendored
Executable file
Binary file not shown.
628
env/lib/python3.12/site-packages/charset_normalizer/md.py
vendored
Normal file
628
env/lib/python3.12/site-packages/charset_normalizer/md.py
vendored
Normal file
@ -0,0 +1,628 @@
|
|||||||
|
from functools import lru_cache
|
||||||
|
from logging import getLogger
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from .constant import (
|
||||||
|
COMMON_SAFE_ASCII_CHARACTERS,
|
||||||
|
TRACE,
|
||||||
|
UNICODE_SECONDARY_RANGE_KEYWORD,
|
||||||
|
)
|
||||||
|
from .utils import (
|
||||||
|
is_accentuated,
|
||||||
|
is_arabic,
|
||||||
|
is_arabic_isolated_form,
|
||||||
|
is_case_variable,
|
||||||
|
is_cjk,
|
||||||
|
is_emoticon,
|
||||||
|
is_hangul,
|
||||||
|
is_hiragana,
|
||||||
|
is_katakana,
|
||||||
|
is_latin,
|
||||||
|
is_punctuation,
|
||||||
|
is_separator,
|
||||||
|
is_symbol,
|
||||||
|
is_thai,
|
||||||
|
is_unprintable,
|
||||||
|
remove_accent,
|
||||||
|
unicode_range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MessDetectorPlugin:
|
||||||
|
"""
|
||||||
|
Base abstract class used for mess detection plugins.
|
||||||
|
All detectors MUST extend and implement given methods.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
"""
|
||||||
|
Determine if given character should be fed in.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError # pragma: nocover
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
"""
|
||||||
|
The main routine to be executed upon character.
|
||||||
|
Insert the logic in witch the text would be considered chaotic.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError # pragma: nocover
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
"""
|
||||||
|
Permit to reset the plugin to the initial state.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
"""
|
||||||
|
Compute the chaos ratio based on what your feed() has seen.
|
||||||
|
Must NOT be lower than 0.; No restriction gt 0.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError # pragma: nocover
|
||||||
|
|
||||||
|
|
||||||
|
class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._punctuation_count: int = 0
|
||||||
|
self._symbol_count: int = 0
|
||||||
|
self._character_count: int = 0
|
||||||
|
|
||||||
|
self._last_printable_char: Optional[str] = None
|
||||||
|
self._frenzy_symbol_in_word: bool = False
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return character.isprintable()
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
if (
|
||||||
|
character != self._last_printable_char
|
||||||
|
and character not in COMMON_SAFE_ASCII_CHARACTERS
|
||||||
|
):
|
||||||
|
if is_punctuation(character):
|
||||||
|
self._punctuation_count += 1
|
||||||
|
elif (
|
||||||
|
character.isdigit() is False
|
||||||
|
and is_symbol(character)
|
||||||
|
and is_emoticon(character) is False
|
||||||
|
):
|
||||||
|
self._symbol_count += 2
|
||||||
|
|
||||||
|
self._last_printable_char = character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._punctuation_count = 0
|
||||||
|
self._character_count = 0
|
||||||
|
self._symbol_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
ratio_of_punctuation: float = (
|
||||||
|
self._punctuation_count + self._symbol_count
|
||||||
|
) / self._character_count
|
||||||
|
|
||||||
|
return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class TooManyAccentuatedPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._character_count: int = 0
|
||||||
|
self._accentuated_count: int = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return character.isalpha()
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
if is_accentuated(character):
|
||||||
|
self._accentuated_count += 1
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._character_count = 0
|
||||||
|
self._accentuated_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count < 8:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
ratio_of_accentuation: float = self._accentuated_count / self._character_count
|
||||||
|
return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class UnprintablePlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._unprintable_count: int = 0
|
||||||
|
self._character_count: int = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
if is_unprintable(character):
|
||||||
|
self._unprintable_count += 1
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._unprintable_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return (self._unprintable_count * 8) / self._character_count
|
||||||
|
|
||||||
|
|
||||||
|
class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._successive_count: int = 0
|
||||||
|
self._character_count: int = 0
|
||||||
|
|
||||||
|
self._last_latin_character: Optional[str] = None
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return character.isalpha() and is_latin(character)
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
if (
|
||||||
|
self._last_latin_character is not None
|
||||||
|
and is_accentuated(character)
|
||||||
|
and is_accentuated(self._last_latin_character)
|
||||||
|
):
|
||||||
|
if character.isupper() and self._last_latin_character.isupper():
|
||||||
|
self._successive_count += 1
|
||||||
|
# Worse if its the same char duplicated with different accent.
|
||||||
|
if remove_accent(character) == remove_accent(self._last_latin_character):
|
||||||
|
self._successive_count += 1
|
||||||
|
self._last_latin_character = character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._successive_count = 0
|
||||||
|
self._character_count = 0
|
||||||
|
self._last_latin_character = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return (self._successive_count * 2) / self._character_count
|
||||||
|
|
||||||
|
|
||||||
|
class SuspiciousRange(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._suspicious_successive_range_count: int = 0
|
||||||
|
self._character_count: int = 0
|
||||||
|
self._last_printable_seen: Optional[str] = None
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return character.isprintable()
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
if (
|
||||||
|
character.isspace()
|
||||||
|
or is_punctuation(character)
|
||||||
|
or character in COMMON_SAFE_ASCII_CHARACTERS
|
||||||
|
):
|
||||||
|
self._last_printable_seen = None
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._last_printable_seen is None:
|
||||||
|
self._last_printable_seen = character
|
||||||
|
return
|
||||||
|
|
||||||
|
unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen)
|
||||||
|
unicode_range_b: Optional[str] = unicode_range(character)
|
||||||
|
|
||||||
|
if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
|
||||||
|
self._suspicious_successive_range_count += 1
|
||||||
|
|
||||||
|
self._last_printable_seen = character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._character_count = 0
|
||||||
|
self._suspicious_successive_range_count = 0
|
||||||
|
self._last_printable_seen = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count <= 13:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
ratio_of_suspicious_range_usage: float = (
|
||||||
|
self._suspicious_successive_range_count * 2
|
||||||
|
) / self._character_count
|
||||||
|
|
||||||
|
return ratio_of_suspicious_range_usage
|
||||||
|
|
||||||
|
|
||||||
|
class SuperWeirdWordPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._word_count: int = 0
|
||||||
|
self._bad_word_count: int = 0
|
||||||
|
self._foreign_long_count: int = 0
|
||||||
|
|
||||||
|
self._is_current_word_bad: bool = False
|
||||||
|
self._foreign_long_watch: bool = False
|
||||||
|
|
||||||
|
self._character_count: int = 0
|
||||||
|
self._bad_character_count: int = 0
|
||||||
|
|
||||||
|
self._buffer: str = ""
|
||||||
|
self._buffer_accent_count: int = 0
|
||||||
|
self._buffer_glyph_count: int = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
if character.isalpha():
|
||||||
|
self._buffer += character
|
||||||
|
if is_accentuated(character):
|
||||||
|
self._buffer_accent_count += 1
|
||||||
|
if (
|
||||||
|
self._foreign_long_watch is False
|
||||||
|
and (is_latin(character) is False or is_accentuated(character))
|
||||||
|
and is_cjk(character) is False
|
||||||
|
and is_hangul(character) is False
|
||||||
|
and is_katakana(character) is False
|
||||||
|
and is_hiragana(character) is False
|
||||||
|
and is_thai(character) is False
|
||||||
|
):
|
||||||
|
self._foreign_long_watch = True
|
||||||
|
if (
|
||||||
|
is_cjk(character)
|
||||||
|
or is_hangul(character)
|
||||||
|
or is_katakana(character)
|
||||||
|
or is_hiragana(character)
|
||||||
|
or is_thai(character)
|
||||||
|
):
|
||||||
|
self._buffer_glyph_count += 1
|
||||||
|
return
|
||||||
|
if not self._buffer:
|
||||||
|
return
|
||||||
|
if (
|
||||||
|
character.isspace() or is_punctuation(character) or is_separator(character)
|
||||||
|
) and self._buffer:
|
||||||
|
self._word_count += 1
|
||||||
|
buffer_length: int = len(self._buffer)
|
||||||
|
|
||||||
|
self._character_count += buffer_length
|
||||||
|
|
||||||
|
if buffer_length >= 4:
|
||||||
|
if self._buffer_accent_count / buffer_length >= 0.5:
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
# Word/Buffer ending with an upper case accentuated letter are so rare,
|
||||||
|
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
|
||||||
|
elif (
|
||||||
|
is_accentuated(self._buffer[-1])
|
||||||
|
and self._buffer[-1].isupper()
|
||||||
|
and all(_.isupper() for _ in self._buffer) is False
|
||||||
|
):
|
||||||
|
self._foreign_long_count += 1
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
elif self._buffer_glyph_count == 1:
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
self._foreign_long_count += 1
|
||||||
|
if buffer_length >= 24 and self._foreign_long_watch:
|
||||||
|
camel_case_dst = [
|
||||||
|
i
|
||||||
|
for c, i in zip(self._buffer, range(0, buffer_length))
|
||||||
|
if c.isupper()
|
||||||
|
]
|
||||||
|
probable_camel_cased: bool = False
|
||||||
|
|
||||||
|
if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
|
||||||
|
probable_camel_cased = True
|
||||||
|
|
||||||
|
if not probable_camel_cased:
|
||||||
|
self._foreign_long_count += 1
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
|
||||||
|
if self._is_current_word_bad:
|
||||||
|
self._bad_word_count += 1
|
||||||
|
self._bad_character_count += len(self._buffer)
|
||||||
|
self._is_current_word_bad = False
|
||||||
|
|
||||||
|
self._foreign_long_watch = False
|
||||||
|
self._buffer = ""
|
||||||
|
self._buffer_accent_count = 0
|
||||||
|
self._buffer_glyph_count = 0
|
||||||
|
elif (
|
||||||
|
character not in {"<", ">", "-", "=", "~", "|", "_"}
|
||||||
|
and character.isdigit() is False
|
||||||
|
and is_symbol(character)
|
||||||
|
):
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
self._buffer += character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._buffer = ""
|
||||||
|
self._is_current_word_bad = False
|
||||||
|
self._foreign_long_watch = False
|
||||||
|
self._bad_word_count = 0
|
||||||
|
self._word_count = 0
|
||||||
|
self._character_count = 0
|
||||||
|
self._bad_character_count = 0
|
||||||
|
self._foreign_long_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._word_count <= 10 and self._foreign_long_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return self._bad_character_count / self._character_count
|
||||||
|
|
||||||
|
|
||||||
|
class CjkInvalidStopPlugin(MessDetectorPlugin):
|
||||||
|
"""
|
||||||
|
GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
|
||||||
|
can be easily detected. Searching for the overuse of '丅' and '丄'.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._wrong_stop_count: int = 0
|
||||||
|
self._cjk_character_count: int = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
if character in {"丅", "丄"}:
|
||||||
|
self._wrong_stop_count += 1
|
||||||
|
return
|
||||||
|
if is_cjk(character):
|
||||||
|
self._cjk_character_count += 1
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._wrong_stop_count = 0
|
||||||
|
self._cjk_character_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._cjk_character_count < 16:
|
||||||
|
return 0.0
|
||||||
|
return self._wrong_stop_count / self._cjk_character_count
|
||||||
|
|
||||||
|
|
||||||
|
class ArchaicUpperLowerPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._buf: bool = False
|
||||||
|
|
||||||
|
self._character_count_since_last_sep: int = 0
|
||||||
|
|
||||||
|
self._successive_upper_lower_count: int = 0
|
||||||
|
self._successive_upper_lower_count_final: int = 0
|
||||||
|
|
||||||
|
self._character_count: int = 0
|
||||||
|
|
||||||
|
self._last_alpha_seen: Optional[str] = None
|
||||||
|
self._current_ascii_only: bool = True
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
is_concerned = character.isalpha() and is_case_variable(character)
|
||||||
|
chunk_sep = is_concerned is False
|
||||||
|
|
||||||
|
if chunk_sep and self._character_count_since_last_sep > 0:
|
||||||
|
if (
|
||||||
|
self._character_count_since_last_sep <= 64
|
||||||
|
and character.isdigit() is False
|
||||||
|
and self._current_ascii_only is False
|
||||||
|
):
|
||||||
|
self._successive_upper_lower_count_final += (
|
||||||
|
self._successive_upper_lower_count
|
||||||
|
)
|
||||||
|
|
||||||
|
self._successive_upper_lower_count = 0
|
||||||
|
self._character_count_since_last_sep = 0
|
||||||
|
self._last_alpha_seen = None
|
||||||
|
self._buf = False
|
||||||
|
self._character_count += 1
|
||||||
|
self._current_ascii_only = True
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._current_ascii_only is True and character.isascii() is False:
|
||||||
|
self._current_ascii_only = False
|
||||||
|
|
||||||
|
if self._last_alpha_seen is not None:
|
||||||
|
if (character.isupper() and self._last_alpha_seen.islower()) or (
|
||||||
|
character.islower() and self._last_alpha_seen.isupper()
|
||||||
|
):
|
||||||
|
if self._buf is True:
|
||||||
|
self._successive_upper_lower_count += 2
|
||||||
|
self._buf = False
|
||||||
|
else:
|
||||||
|
self._buf = True
|
||||||
|
else:
|
||||||
|
self._buf = False
|
||||||
|
|
||||||
|
self._character_count += 1
|
||||||
|
self._character_count_since_last_sep += 1
|
||||||
|
self._last_alpha_seen = character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._character_count = 0
|
||||||
|
self._character_count_since_last_sep = 0
|
||||||
|
self._successive_upper_lower_count = 0
|
||||||
|
self._successive_upper_lower_count_final = 0
|
||||||
|
self._last_alpha_seen = None
|
||||||
|
self._buf = False
|
||||||
|
self._current_ascii_only = True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return self._successive_upper_lower_count_final / self._character_count
|
||||||
|
|
||||||
|
|
||||||
|
class ArabicIsolatedFormPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._character_count: int = 0
|
||||||
|
self._isolated_form_count: int = 0
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._character_count = 0
|
||||||
|
self._isolated_form_count = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return is_arabic(character)
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
if is_arabic_isolated_form(character):
|
||||||
|
self._isolated_form_count += 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count < 8:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
isolated_form_usage: float = self._isolated_form_count / self._character_count
|
||||||
|
|
||||||
|
return isolated_form_usage
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1024)
|
||||||
|
def is_suspiciously_successive_range(
|
||||||
|
unicode_range_a: Optional[str], unicode_range_b: Optional[str]
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Determine if two Unicode range seen next to each other can be considered as suspicious.
|
||||||
|
"""
|
||||||
|
if unicode_range_a is None or unicode_range_b is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if unicode_range_a == unicode_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Latin characters can be accompanied with a combining diacritical mark
|
||||||
|
# eg. Vietnamese.
|
||||||
|
if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
|
||||||
|
"Combining" in unicode_range_a or "Combining" in unicode_range_b
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
keywords_range_a, keywords_range_b = unicode_range_a.split(
|
||||||
|
" "
|
||||||
|
), unicode_range_b.split(" ")
|
||||||
|
|
||||||
|
for el in keywords_range_a:
|
||||||
|
if el in UNICODE_SECONDARY_RANGE_KEYWORD:
|
||||||
|
continue
|
||||||
|
if el in keywords_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Japanese Exception
|
||||||
|
range_a_jp_chars, range_b_jp_chars = (
|
||||||
|
unicode_range_a
|
||||||
|
in (
|
||||||
|
"Hiragana",
|
||||||
|
"Katakana",
|
||||||
|
),
|
||||||
|
unicode_range_b in ("Hiragana", "Katakana"),
|
||||||
|
)
|
||||||
|
if (range_a_jp_chars or range_b_jp_chars) and (
|
||||||
|
"CJK" in unicode_range_a or "CJK" in unicode_range_b
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
if range_a_jp_chars and range_b_jp_chars:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
|
||||||
|
if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Chinese/Japanese use dedicated range for punctuation and/or separators.
|
||||||
|
if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
|
||||||
|
unicode_range_a in ["Katakana", "Hiragana"]
|
||||||
|
and unicode_range_b in ["Katakana", "Hiragana"]
|
||||||
|
):
|
||||||
|
if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=2048)
|
||||||
|
def mess_ratio(
|
||||||
|
decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
|
||||||
|
"""
|
||||||
|
|
||||||
|
detectors: List[MessDetectorPlugin] = [
|
||||||
|
md_class() for md_class in MessDetectorPlugin.__subclasses__()
|
||||||
|
]
|
||||||
|
|
||||||
|
length: int = len(decoded_sequence) + 1
|
||||||
|
|
||||||
|
mean_mess_ratio: float = 0.0
|
||||||
|
|
||||||
|
if length < 512:
|
||||||
|
intermediary_mean_mess_ratio_calc: int = 32
|
||||||
|
elif length <= 1024:
|
||||||
|
intermediary_mean_mess_ratio_calc = 64
|
||||||
|
else:
|
||||||
|
intermediary_mean_mess_ratio_calc = 128
|
||||||
|
|
||||||
|
for character, index in zip(decoded_sequence + "\n", range(length)):
|
||||||
|
for detector in detectors:
|
||||||
|
if detector.eligible(character):
|
||||||
|
detector.feed(character)
|
||||||
|
|
||||||
|
if (
|
||||||
|
index > 0 and index % intermediary_mean_mess_ratio_calc == 0
|
||||||
|
) or index == length - 1:
|
||||||
|
mean_mess_ratio = sum(dt.ratio for dt in detectors)
|
||||||
|
|
||||||
|
if mean_mess_ratio >= maximum_threshold:
|
||||||
|
break
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
logger = getLogger("charset_normalizer")
|
||||||
|
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Mess-detector extended-analysis start. "
|
||||||
|
f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
|
||||||
|
f"maximum_threshold={maximum_threshold}",
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(decoded_sequence) > 16:
|
||||||
|
logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
|
||||||
|
logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
|
||||||
|
|
||||||
|
for dt in detectors: # pragma: nocover
|
||||||
|
logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
|
||||||
|
|
||||||
|
return round(mean_mess_ratio, 3)
|
BIN
env/lib/python3.12/site-packages/charset_normalizer/md__mypyc.cpython-312-aarch64-linux-gnu.so
vendored
Executable file
BIN
env/lib/python3.12/site-packages/charset_normalizer/md__mypyc.cpython-312-aarch64-linux-gnu.so
vendored
Executable file
Binary file not shown.
359
env/lib/python3.12/site-packages/charset_normalizer/models.py
vendored
Normal file
359
env/lib/python3.12/site-packages/charset_normalizer/models.py
vendored
Normal file
@ -0,0 +1,359 @@
|
|||||||
|
from encodings.aliases import aliases
|
||||||
|
from hashlib import sha256
|
||||||
|
from json import dumps
|
||||||
|
from re import sub
|
||||||
|
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
from .constant import RE_POSSIBLE_ENCODING_INDICATION, TOO_BIG_SEQUENCE
|
||||||
|
from .utils import iana_name, is_multi_byte_encoding, unicode_range
|
||||||
|
|
||||||
|
|
||||||
|
class CharsetMatch:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
payload: bytes,
|
||||||
|
guessed_encoding: str,
|
||||||
|
mean_mess_ratio: float,
|
||||||
|
has_sig_or_bom: bool,
|
||||||
|
languages: "CoherenceMatches",
|
||||||
|
decoded_payload: Optional[str] = None,
|
||||||
|
preemptive_declaration: Optional[str] = None,
|
||||||
|
):
|
||||||
|
self._payload: bytes = payload
|
||||||
|
|
||||||
|
self._encoding: str = guessed_encoding
|
||||||
|
self._mean_mess_ratio: float = mean_mess_ratio
|
||||||
|
self._languages: CoherenceMatches = languages
|
||||||
|
self._has_sig_or_bom: bool = has_sig_or_bom
|
||||||
|
self._unicode_ranges: Optional[List[str]] = None
|
||||||
|
|
||||||
|
self._leaves: List[CharsetMatch] = []
|
||||||
|
self._mean_coherence_ratio: float = 0.0
|
||||||
|
|
||||||
|
self._output_payload: Optional[bytes] = None
|
||||||
|
self._output_encoding: Optional[str] = None
|
||||||
|
|
||||||
|
self._string: Optional[str] = decoded_payload
|
||||||
|
|
||||||
|
self._preemptive_declaration: Optional[str] = preemptive_declaration
|
||||||
|
|
||||||
|
def __eq__(self, other: object) -> bool:
|
||||||
|
if not isinstance(other, CharsetMatch):
|
||||||
|
if isinstance(other, str):
|
||||||
|
return iana_name(other) == self.encoding
|
||||||
|
return False
|
||||||
|
return self.encoding == other.encoding and self.fingerprint == other.fingerprint
|
||||||
|
|
||||||
|
def __lt__(self, other: object) -> bool:
|
||||||
|
"""
|
||||||
|
Implemented to make sorted available upon CharsetMatches items.
|
||||||
|
"""
|
||||||
|
if not isinstance(other, CharsetMatch):
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
|
chaos_difference: float = abs(self.chaos - other.chaos)
|
||||||
|
coherence_difference: float = abs(self.coherence - other.coherence)
|
||||||
|
|
||||||
|
# Below 1% difference --> Use Coherence
|
||||||
|
if chaos_difference < 0.01 and coherence_difference > 0.02:
|
||||||
|
return self.coherence > other.coherence
|
||||||
|
elif chaos_difference < 0.01 and coherence_difference <= 0.02:
|
||||||
|
# When having a difficult decision, use the result that decoded as many multi-byte as possible.
|
||||||
|
# preserve RAM usage!
|
||||||
|
if len(self._payload) >= TOO_BIG_SEQUENCE:
|
||||||
|
return self.chaos < other.chaos
|
||||||
|
return self.multi_byte_usage > other.multi_byte_usage
|
||||||
|
|
||||||
|
return self.chaos < other.chaos
|
||||||
|
|
||||||
|
@property
|
||||||
|
def multi_byte_usage(self) -> float:
|
||||||
|
return 1.0 - (len(str(self)) / len(self.raw))
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
# Lazy Str Loading
|
||||||
|
if self._string is None:
|
||||||
|
self._string = str(self._payload, self._encoding, "strict")
|
||||||
|
return self._string
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return "<CharsetMatch '{}' bytes({})>".format(self.encoding, self.fingerprint)
|
||||||
|
|
||||||
|
def add_submatch(self, other: "CharsetMatch") -> None:
|
||||||
|
if not isinstance(other, CharsetMatch) or other == self:
|
||||||
|
raise ValueError(
|
||||||
|
"Unable to add instance <{}> as a submatch of a CharsetMatch".format(
|
||||||
|
other.__class__
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
other._string = None # Unload RAM usage; dirty trick.
|
||||||
|
self._leaves.append(other)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoding(self) -> str:
|
||||||
|
return self._encoding
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoding_aliases(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
|
||||||
|
"""
|
||||||
|
also_known_as: List[str] = []
|
||||||
|
for u, p in aliases.items():
|
||||||
|
if self.encoding == u:
|
||||||
|
also_known_as.append(p)
|
||||||
|
elif self.encoding == p:
|
||||||
|
also_known_as.append(u)
|
||||||
|
return also_known_as
|
||||||
|
|
||||||
|
@property
|
||||||
|
def bom(self) -> bool:
|
||||||
|
return self._has_sig_or_bom
|
||||||
|
|
||||||
|
@property
|
||||||
|
def byte_order_mark(self) -> bool:
|
||||||
|
return self._has_sig_or_bom
|
||||||
|
|
||||||
|
@property
|
||||||
|
def languages(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return the complete list of possible languages found in decoded sequence.
|
||||||
|
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
|
||||||
|
"""
|
||||||
|
return [e[0] for e in self._languages]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self) -> str:
|
||||||
|
"""
|
||||||
|
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
|
||||||
|
"Unknown".
|
||||||
|
"""
|
||||||
|
if not self._languages:
|
||||||
|
# Trying to infer the language based on the given encoding
|
||||||
|
# Its either English or we should not pronounce ourselves in certain cases.
|
||||||
|
if "ascii" in self.could_be_from_charset:
|
||||||
|
return "English"
|
||||||
|
|
||||||
|
# doing it there to avoid circular import
|
||||||
|
from charset_normalizer.cd import encoding_languages, mb_encoding_languages
|
||||||
|
|
||||||
|
languages = (
|
||||||
|
mb_encoding_languages(self.encoding)
|
||||||
|
if is_multi_byte_encoding(self.encoding)
|
||||||
|
else encoding_languages(self.encoding)
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(languages) == 0 or "Latin Based" in languages:
|
||||||
|
return "Unknown"
|
||||||
|
|
||||||
|
return languages[0]
|
||||||
|
|
||||||
|
return self._languages[0][0]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def chaos(self) -> float:
|
||||||
|
return self._mean_mess_ratio
|
||||||
|
|
||||||
|
@property
|
||||||
|
def coherence(self) -> float:
|
||||||
|
if not self._languages:
|
||||||
|
return 0.0
|
||||||
|
return self._languages[0][1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def percent_chaos(self) -> float:
|
||||||
|
return round(self.chaos * 100, ndigits=3)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def percent_coherence(self) -> float:
|
||||||
|
return round(self.coherence * 100, ndigits=3)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def raw(self) -> bytes:
|
||||||
|
"""
|
||||||
|
Original untouched bytes.
|
||||||
|
"""
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
@property
|
||||||
|
def submatch(self) -> List["CharsetMatch"]:
|
||||||
|
return self._leaves
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_submatch(self) -> bool:
|
||||||
|
return len(self._leaves) > 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def alphabets(self) -> List[str]:
|
||||||
|
if self._unicode_ranges is not None:
|
||||||
|
return self._unicode_ranges
|
||||||
|
# list detected ranges
|
||||||
|
detected_ranges: List[Optional[str]] = [
|
||||||
|
unicode_range(char) for char in str(self)
|
||||||
|
]
|
||||||
|
# filter and sort
|
||||||
|
self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
|
||||||
|
return self._unicode_ranges
|
||||||
|
|
||||||
|
@property
|
||||||
|
def could_be_from_charset(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
The complete list of encoding that output the exact SAME str result and therefore could be the originating
|
||||||
|
encoding.
|
||||||
|
This list does include the encoding available in property 'encoding'.
|
||||||
|
"""
|
||||||
|
return [self._encoding] + [m.encoding for m in self._leaves]
|
||||||
|
|
||||||
|
def output(self, encoding: str = "utf_8") -> bytes:
|
||||||
|
"""
|
||||||
|
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
|
||||||
|
Any errors will be simply ignored by the encoder NOT replaced.
|
||||||
|
"""
|
||||||
|
if self._output_encoding is None or self._output_encoding != encoding:
|
||||||
|
self._output_encoding = encoding
|
||||||
|
decoded_string = str(self)
|
||||||
|
if (
|
||||||
|
self._preemptive_declaration is not None
|
||||||
|
and self._preemptive_declaration.lower()
|
||||||
|
not in ["utf-8", "utf8", "utf_8"]
|
||||||
|
):
|
||||||
|
patched_header = sub(
|
||||||
|
RE_POSSIBLE_ENCODING_INDICATION,
|
||||||
|
lambda m: m.string[m.span()[0] : m.span()[1]].replace(
|
||||||
|
m.groups()[0], iana_name(self._output_encoding) # type: ignore[arg-type]
|
||||||
|
),
|
||||||
|
decoded_string[:8192],
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
|
decoded_string = patched_header + decoded_string[8192:]
|
||||||
|
|
||||||
|
self._output_payload = decoded_string.encode(encoding, "replace")
|
||||||
|
|
||||||
|
return self._output_payload # type: ignore
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fingerprint(self) -> str:
|
||||||
|
"""
|
||||||
|
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
|
||||||
|
"""
|
||||||
|
return sha256(self.output()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
class CharsetMatches:
|
||||||
|
"""
|
||||||
|
Container with every CharsetMatch items ordered by default from most probable to the less one.
|
||||||
|
Act like a list(iterable) but does not implements all related methods.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, results: Optional[List[CharsetMatch]] = None):
|
||||||
|
self._results: List[CharsetMatch] = sorted(results) if results else []
|
||||||
|
|
||||||
|
def __iter__(self) -> Iterator[CharsetMatch]:
|
||||||
|
yield from self._results
|
||||||
|
|
||||||
|
def __getitem__(self, item: Union[int, str]) -> CharsetMatch:
|
||||||
|
"""
|
||||||
|
Retrieve a single item either by its position or encoding name (alias may be used here).
|
||||||
|
Raise KeyError upon invalid index or encoding not present in results.
|
||||||
|
"""
|
||||||
|
if isinstance(item, int):
|
||||||
|
return self._results[item]
|
||||||
|
if isinstance(item, str):
|
||||||
|
item = iana_name(item, False)
|
||||||
|
for result in self._results:
|
||||||
|
if item in result.could_be_from_charset:
|
||||||
|
return result
|
||||||
|
raise KeyError
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self._results)
|
||||||
|
|
||||||
|
def __bool__(self) -> bool:
|
||||||
|
return len(self._results) > 0
|
||||||
|
|
||||||
|
def append(self, item: CharsetMatch) -> None:
|
||||||
|
"""
|
||||||
|
Insert a single match. Will be inserted accordingly to preserve sort.
|
||||||
|
Can be inserted as a submatch.
|
||||||
|
"""
|
||||||
|
if not isinstance(item, CharsetMatch):
|
||||||
|
raise ValueError(
|
||||||
|
"Cannot append instance '{}' to CharsetMatches".format(
|
||||||
|
str(item.__class__)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# We should disable the submatch factoring when the input file is too heavy (conserve RAM usage)
|
||||||
|
if len(item.raw) < TOO_BIG_SEQUENCE:
|
||||||
|
for match in self._results:
|
||||||
|
if match.fingerprint == item.fingerprint and match.chaos == item.chaos:
|
||||||
|
match.add_submatch(item)
|
||||||
|
return
|
||||||
|
self._results.append(item)
|
||||||
|
self._results = sorted(self._results)
|
||||||
|
|
||||||
|
def best(self) -> Optional["CharsetMatch"]:
|
||||||
|
"""
|
||||||
|
Simply return the first match. Strict equivalent to matches[0].
|
||||||
|
"""
|
||||||
|
if not self._results:
|
||||||
|
return None
|
||||||
|
return self._results[0]
|
||||||
|
|
||||||
|
def first(self) -> Optional["CharsetMatch"]:
|
||||||
|
"""
|
||||||
|
Redundant method, call the method best(). Kept for BC reasons.
|
||||||
|
"""
|
||||||
|
return self.best()
|
||||||
|
|
||||||
|
|
||||||
|
CoherenceMatch = Tuple[str, float]
|
||||||
|
CoherenceMatches = List[CoherenceMatch]
|
||||||
|
|
||||||
|
|
||||||
|
class CliDetectionResult:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
path: str,
|
||||||
|
encoding: Optional[str],
|
||||||
|
encoding_aliases: List[str],
|
||||||
|
alternative_encodings: List[str],
|
||||||
|
language: str,
|
||||||
|
alphabets: List[str],
|
||||||
|
has_sig_or_bom: bool,
|
||||||
|
chaos: float,
|
||||||
|
coherence: float,
|
||||||
|
unicode_path: Optional[str],
|
||||||
|
is_preferred: bool,
|
||||||
|
):
|
||||||
|
self.path: str = path
|
||||||
|
self.unicode_path: Optional[str] = unicode_path
|
||||||
|
self.encoding: Optional[str] = encoding
|
||||||
|
self.encoding_aliases: List[str] = encoding_aliases
|
||||||
|
self.alternative_encodings: List[str] = alternative_encodings
|
||||||
|
self.language: str = language
|
||||||
|
self.alphabets: List[str] = alphabets
|
||||||
|
self.has_sig_or_bom: bool = has_sig_or_bom
|
||||||
|
self.chaos: float = chaos
|
||||||
|
self.coherence: float = coherence
|
||||||
|
self.is_preferred: bool = is_preferred
|
||||||
|
|
||||||
|
@property
|
||||||
|
def __dict__(self) -> Dict[str, Any]: # type: ignore
|
||||||
|
return {
|
||||||
|
"path": self.path,
|
||||||
|
"encoding": self.encoding,
|
||||||
|
"encoding_aliases": self.encoding_aliases,
|
||||||
|
"alternative_encodings": self.alternative_encodings,
|
||||||
|
"language": self.language,
|
||||||
|
"alphabets": self.alphabets,
|
||||||
|
"has_sig_or_bom": self.has_sig_or_bom,
|
||||||
|
"chaos": self.chaos,
|
||||||
|
"coherence": self.coherence,
|
||||||
|
"unicode_path": self.unicode_path,
|
||||||
|
"is_preferred": self.is_preferred,
|
||||||
|
}
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
return dumps(self.__dict__, ensure_ascii=True, indent=4)
|
0
env/lib/python3.12/site-packages/charset_normalizer/py.typed
vendored
Normal file
0
env/lib/python3.12/site-packages/charset_normalizer/py.typed
vendored
Normal file
421
env/lib/python3.12/site-packages/charset_normalizer/utils.py
vendored
Normal file
421
env/lib/python3.12/site-packages/charset_normalizer/utils.py
vendored
Normal file
@ -0,0 +1,421 @@
|
|||||||
|
import importlib
|
||||||
|
import logging
|
||||||
|
import unicodedata
|
||||||
|
from codecs import IncrementalDecoder
|
||||||
|
from encodings.aliases import aliases
|
||||||
|
from functools import lru_cache
|
||||||
|
from re import findall
|
||||||
|
from typing import Generator, List, Optional, Set, Tuple, Union
|
||||||
|
|
||||||
|
from _multibytecodec import MultibyteIncrementalDecoder
|
||||||
|
|
||||||
|
from .constant import (
|
||||||
|
ENCODING_MARKS,
|
||||||
|
IANA_SUPPORTED_SIMILAR,
|
||||||
|
RE_POSSIBLE_ENCODING_INDICATION,
|
||||||
|
UNICODE_RANGES_COMBINED,
|
||||||
|
UNICODE_SECONDARY_RANGE_KEYWORD,
|
||||||
|
UTF8_MAXIMAL_ALLOCATION,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_accentuated(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
description: str = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
return (
|
||||||
|
"WITH GRAVE" in description
|
||||||
|
or "WITH ACUTE" in description
|
||||||
|
or "WITH CEDILLA" in description
|
||||||
|
or "WITH DIAERESIS" in description
|
||||||
|
or "WITH CIRCUMFLEX" in description
|
||||||
|
or "WITH TILDE" in description
|
||||||
|
or "WITH MACRON" in description
|
||||||
|
or "WITH RING ABOVE" in description
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def remove_accent(character: str) -> str:
|
||||||
|
decomposed: str = unicodedata.decomposition(character)
|
||||||
|
if not decomposed:
|
||||||
|
return character
|
||||||
|
|
||||||
|
codes: List[str] = decomposed.split(" ")
|
||||||
|
|
||||||
|
return chr(int(codes[0], 16))
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def unicode_range(character: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Retrieve the Unicode range official name from a single character.
|
||||||
|
"""
|
||||||
|
character_ord: int = ord(character)
|
||||||
|
|
||||||
|
for range_name, ord_range in UNICODE_RANGES_COMBINED.items():
|
||||||
|
if character_ord in ord_range:
|
||||||
|
return range_name
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_latin(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
description: str = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
return "LATIN" in description
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_punctuation(character: str) -> bool:
|
||||||
|
character_category: str = unicodedata.category(character)
|
||||||
|
|
||||||
|
if "P" in character_category:
|
||||||
|
return True
|
||||||
|
|
||||||
|
character_range: Optional[str] = unicode_range(character)
|
||||||
|
|
||||||
|
if character_range is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "Punctuation" in character_range
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_symbol(character: str) -> bool:
|
||||||
|
character_category: str = unicodedata.category(character)
|
||||||
|
|
||||||
|
if "S" in character_category or "N" in character_category:
|
||||||
|
return True
|
||||||
|
|
||||||
|
character_range: Optional[str] = unicode_range(character)
|
||||||
|
|
||||||
|
if character_range is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "Forms" in character_range and character_category != "Lo"
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_emoticon(character: str) -> bool:
|
||||||
|
character_range: Optional[str] = unicode_range(character)
|
||||||
|
|
||||||
|
if character_range is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "Emoticons" in character_range or "Pictographs" in character_range
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_separator(character: str) -> bool:
|
||||||
|
if character.isspace() or character in {"|", "+", "<", ">"}:
|
||||||
|
return True
|
||||||
|
|
||||||
|
character_category: str = unicodedata.category(character)
|
||||||
|
|
||||||
|
return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_case_variable(character: str) -> bool:
|
||||||
|
return character.islower() != character.isupper()
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_cjk(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
character_name = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "CJK" in character_name
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_hiragana(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
character_name = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "HIRAGANA" in character_name
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_katakana(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
character_name = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "KATAKANA" in character_name
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_hangul(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
character_name = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "HANGUL" in character_name
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_thai(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
character_name = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "THAI" in character_name
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_arabic(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
character_name = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "ARABIC" in character_name
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_arabic_isolated_form(character: str) -> bool:
|
||||||
|
try:
|
||||||
|
character_name = unicodedata.name(character)
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return "ARABIC" in character_name and "ISOLATED FORM" in character_name
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
|
||||||
|
def is_unicode_range_secondary(range_name: str) -> bool:
|
||||||
|
return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||||
|
def is_unprintable(character: str) -> bool:
|
||||||
|
return (
|
||||||
|
character.isspace() is False # includes \n \t \r \v
|
||||||
|
and character.isprintable() is False
|
||||||
|
and character != "\x1A" # Why? Its the ASCII substitute character.
|
||||||
|
and character != "\ufeff" # bug discovered in Python,
|
||||||
|
# Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space.
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Extract using ASCII-only decoder any specified encoding in the first n-bytes.
|
||||||
|
"""
|
||||||
|
if not isinstance(sequence, bytes):
|
||||||
|
raise TypeError
|
||||||
|
|
||||||
|
seq_len: int = len(sequence)
|
||||||
|
|
||||||
|
results: List[str] = findall(
|
||||||
|
RE_POSSIBLE_ENCODING_INDICATION,
|
||||||
|
sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"),
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(results) == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
for specified_encoding in results:
|
||||||
|
specified_encoding = specified_encoding.lower().replace("-", "_")
|
||||||
|
|
||||||
|
encoding_alias: str
|
||||||
|
encoding_iana: str
|
||||||
|
|
||||||
|
for encoding_alias, encoding_iana in aliases.items():
|
||||||
|
if encoding_alias == specified_encoding:
|
||||||
|
return encoding_iana
|
||||||
|
if encoding_iana == specified_encoding:
|
||||||
|
return encoding_iana
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=128)
|
||||||
|
def is_multi_byte_encoding(name: str) -> bool:
|
||||||
|
"""
|
||||||
|
Verify is a specific encoding is a multi byte one based on it IANA name
|
||||||
|
"""
|
||||||
|
return name in {
|
||||||
|
"utf_8",
|
||||||
|
"utf_8_sig",
|
||||||
|
"utf_16",
|
||||||
|
"utf_16_be",
|
||||||
|
"utf_16_le",
|
||||||
|
"utf_32",
|
||||||
|
"utf_32_le",
|
||||||
|
"utf_32_be",
|
||||||
|
"utf_7",
|
||||||
|
} or issubclass(
|
||||||
|
importlib.import_module("encodings.{}".format(name)).IncrementalDecoder,
|
||||||
|
MultibyteIncrementalDecoder,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def identify_sig_or_bom(sequence: bytes) -> Tuple[Optional[str], bytes]:
|
||||||
|
"""
|
||||||
|
Identify and extract SIG/BOM in given sequence.
|
||||||
|
"""
|
||||||
|
|
||||||
|
for iana_encoding in ENCODING_MARKS:
|
||||||
|
marks: Union[bytes, List[bytes]] = ENCODING_MARKS[iana_encoding]
|
||||||
|
|
||||||
|
if isinstance(marks, bytes):
|
||||||
|
marks = [marks]
|
||||||
|
|
||||||
|
for mark in marks:
|
||||||
|
if sequence.startswith(mark):
|
||||||
|
return iana_encoding, mark
|
||||||
|
|
||||||
|
return None, b""
|
||||||
|
|
||||||
|
|
||||||
|
def should_strip_sig_or_bom(iana_encoding: str) -> bool:
|
||||||
|
return iana_encoding not in {"utf_16", "utf_32"}
|
||||||
|
|
||||||
|
|
||||||
|
def iana_name(cp_name: str, strict: bool = True) -> str:
|
||||||
|
cp_name = cp_name.lower().replace("-", "_")
|
||||||
|
|
||||||
|
encoding_alias: str
|
||||||
|
encoding_iana: str
|
||||||
|
|
||||||
|
for encoding_alias, encoding_iana in aliases.items():
|
||||||
|
if cp_name in [encoding_alias, encoding_iana]:
|
||||||
|
return encoding_iana
|
||||||
|
|
||||||
|
if strict:
|
||||||
|
raise ValueError("Unable to retrieve IANA for '{}'".format(cp_name))
|
||||||
|
|
||||||
|
return cp_name
|
||||||
|
|
||||||
|
|
||||||
|
def range_scan(decoded_sequence: str) -> List[str]:
|
||||||
|
ranges: Set[str] = set()
|
||||||
|
|
||||||
|
for character in decoded_sequence:
|
||||||
|
character_range: Optional[str] = unicode_range(character)
|
||||||
|
|
||||||
|
if character_range is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
ranges.add(character_range)
|
||||||
|
|
||||||
|
return list(ranges)
|
||||||
|
|
||||||
|
|
||||||
|
def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
|
||||||
|
if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
decoder_a = importlib.import_module(
|
||||||
|
"encodings.{}".format(iana_name_a)
|
||||||
|
).IncrementalDecoder
|
||||||
|
decoder_b = importlib.import_module(
|
||||||
|
"encodings.{}".format(iana_name_b)
|
||||||
|
).IncrementalDecoder
|
||||||
|
|
||||||
|
id_a: IncrementalDecoder = decoder_a(errors="ignore")
|
||||||
|
id_b: IncrementalDecoder = decoder_b(errors="ignore")
|
||||||
|
|
||||||
|
character_match_count: int = 0
|
||||||
|
|
||||||
|
for i in range(255):
|
||||||
|
to_be_decoded: bytes = bytes([i])
|
||||||
|
if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded):
|
||||||
|
character_match_count += 1
|
||||||
|
|
||||||
|
return character_match_count / 254
|
||||||
|
|
||||||
|
|
||||||
|
def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool:
|
||||||
|
"""
|
||||||
|
Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
|
||||||
|
the function cp_similarity.
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
iana_name_a in IANA_SUPPORTED_SIMILAR
|
||||||
|
and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def set_logging_handler(
|
||||||
|
name: str = "charset_normalizer",
|
||||||
|
level: int = logging.INFO,
|
||||||
|
format_string: str = "%(asctime)s | %(levelname)s | %(message)s",
|
||||||
|
) -> None:
|
||||||
|
logger = logging.getLogger(name)
|
||||||
|
logger.setLevel(level)
|
||||||
|
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setFormatter(logging.Formatter(format_string))
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
|
||||||
|
def cut_sequence_chunks(
|
||||||
|
sequences: bytes,
|
||||||
|
encoding_iana: str,
|
||||||
|
offsets: range,
|
||||||
|
chunk_size: int,
|
||||||
|
bom_or_sig_available: bool,
|
||||||
|
strip_sig_or_bom: bool,
|
||||||
|
sig_payload: bytes,
|
||||||
|
is_multi_byte_decoder: bool,
|
||||||
|
decoded_payload: Optional[str] = None,
|
||||||
|
) -> Generator[str, None, None]:
|
||||||
|
if decoded_payload and is_multi_byte_decoder is False:
|
||||||
|
for i in offsets:
|
||||||
|
chunk = decoded_payload[i : i + chunk_size]
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
yield chunk
|
||||||
|
else:
|
||||||
|
for i in offsets:
|
||||||
|
chunk_end = i + chunk_size
|
||||||
|
if chunk_end > len(sequences) + 8:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cut_sequence = sequences[i : i + chunk_size]
|
||||||
|
|
||||||
|
if bom_or_sig_available and strip_sig_or_bom is False:
|
||||||
|
cut_sequence = sig_payload + cut_sequence
|
||||||
|
|
||||||
|
chunk = cut_sequence.decode(
|
||||||
|
encoding_iana,
|
||||||
|
errors="ignore" if is_multi_byte_decoder else "strict",
|
||||||
|
)
|
||||||
|
|
||||||
|
# multi-byte bad cutting detector and adjustment
|
||||||
|
# not the cleanest way to perform that fix but clever enough for now.
|
||||||
|
if is_multi_byte_decoder and i > 0:
|
||||||
|
chunk_partial_size_chk: int = min(chunk_size, 16)
|
||||||
|
|
||||||
|
if (
|
||||||
|
decoded_payload
|
||||||
|
and chunk[:chunk_partial_size_chk] not in decoded_payload
|
||||||
|
):
|
||||||
|
for j in range(i, i - 4, -1):
|
||||||
|
cut_sequence = sequences[j:chunk_end]
|
||||||
|
|
||||||
|
if bom_or_sig_available and strip_sig_or_bom is False:
|
||||||
|
cut_sequence = sig_payload + cut_sequence
|
||||||
|
|
||||||
|
chunk = cut_sequence.decode(encoding_iana, errors="ignore")
|
||||||
|
|
||||||
|
if chunk[:chunk_partial_size_chk] in decoded_payload:
|
||||||
|
break
|
||||||
|
|
||||||
|
yield chunk
|
6
env/lib/python3.12/site-packages/charset_normalizer/version.py
vendored
Normal file
6
env/lib/python3.12/site-packages/charset_normalizer/version.py
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
"""
|
||||||
|
Expose version
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = "3.4.0"
|
||||||
|
VERSION = __version__.split(".")
|
47
env/lib/python3.12/site-packages/gitlab/__init__.py
vendored
Normal file
47
env/lib/python3.12/site-packages/gitlab/__init__.py
vendored
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2019 Gauvain Pocentek, 2019-2023 python-gitlab team
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
"""Wrapper for the GitLab API."""
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
import gitlab.config # noqa: F401
|
||||||
|
from gitlab._version import ( # noqa: F401
|
||||||
|
__author__,
|
||||||
|
__copyright__,
|
||||||
|
__email__,
|
||||||
|
__license__,
|
||||||
|
__title__,
|
||||||
|
__version__,
|
||||||
|
)
|
||||||
|
from gitlab.client import Gitlab, GitlabList, GraphQL # noqa: F401
|
||||||
|
from gitlab.exceptions import * # noqa: F401,F403
|
||||||
|
|
||||||
|
warnings.filterwarnings("default", category=DeprecationWarning, module="^gitlab")
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"__author__",
|
||||||
|
"__copyright__",
|
||||||
|
"__email__",
|
||||||
|
"__license__",
|
||||||
|
"__title__",
|
||||||
|
"__version__",
|
||||||
|
"Gitlab",
|
||||||
|
"GitlabList",
|
||||||
|
"GraphQL",
|
||||||
|
]
|
||||||
|
__all__.extend(gitlab.exceptions.__all__)
|
4
env/lib/python3.12/site-packages/gitlab/__main__.py
vendored
Normal file
4
env/lib/python3.12/site-packages/gitlab/__main__.py
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
import gitlab.cli
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
gitlab.cli.main()
|
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/_version.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/_version.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/base.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/base.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/cli.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/cli.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/client.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/client.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/config.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/config.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/const.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/const.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/exceptions.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/exceptions.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/mixins.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/mixins.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/types.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/types.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/utils.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/utils.cpython-312.pyc
vendored
Normal file
Binary file not shown.
22
env/lib/python3.12/site-packages/gitlab/_backends/__init__.py
vendored
Normal file
22
env/lib/python3.12/site-packages/gitlab/_backends/__init__.py
vendored
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
"""
|
||||||
|
Defines http backends for processing http requests
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .requests_backend import (
|
||||||
|
JobTokenAuth,
|
||||||
|
OAuthTokenAuth,
|
||||||
|
PrivateTokenAuth,
|
||||||
|
RequestsBackend,
|
||||||
|
RequestsResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
DefaultBackend = RequestsBackend
|
||||||
|
DefaultResponse = RequestsResponse
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"DefaultBackend",
|
||||||
|
"DefaultResponse",
|
||||||
|
"JobTokenAuth",
|
||||||
|
"OAuthTokenAuth",
|
||||||
|
"PrivateTokenAuth",
|
||||||
|
]
|
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/graphql.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/graphql.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/protocol.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/protocol.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/requests_backend.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/requests_backend.cpython-312.pyc
vendored
Normal file
Binary file not shown.
24
env/lib/python3.12/site-packages/gitlab/_backends/graphql.py
vendored
Normal file
24
env/lib/python3.12/site-packages/gitlab/_backends/graphql.py
vendored
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from gql.transport.httpx import HTTPXTransport
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabTransport(HTTPXTransport):
|
||||||
|
"""A gql httpx transport that reuses an existing httpx.Client.
|
||||||
|
By default, gql's transports do not have a keep-alive session
|
||||||
|
and do not enable providing your own session that's kept open.
|
||||||
|
This transport lets us provide and close our session on our own
|
||||||
|
and provide additional auth.
|
||||||
|
For details, see https://github.com/graphql-python/gql/issues/91.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *args: Any, client: httpx.Client, **kwargs: Any):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.client = client
|
||||||
|
|
||||||
|
def connect(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
pass
|
32
env/lib/python3.12/site-packages/gitlab/_backends/protocol.py
vendored
Normal file
32
env/lib/python3.12/site-packages/gitlab/_backends/protocol.py
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import abc
|
||||||
|
import sys
|
||||||
|
from typing import Any, Dict, Optional, Union
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from requests_toolbelt.multipart.encoder import MultipartEncoder # type: ignore
|
||||||
|
|
||||||
|
if sys.version_info >= (3, 8):
|
||||||
|
from typing import Protocol
|
||||||
|
else:
|
||||||
|
from typing_extensions import Protocol
|
||||||
|
|
||||||
|
|
||||||
|
class BackendResponse(Protocol):
|
||||||
|
@abc.abstractmethod
|
||||||
|
def __init__(self, response: requests.Response) -> None: ...
|
||||||
|
|
||||||
|
|
||||||
|
class Backend(Protocol):
|
||||||
|
@abc.abstractmethod
|
||||||
|
def http_request(
|
||||||
|
self,
|
||||||
|
method: str,
|
||||||
|
url: str,
|
||||||
|
json: Optional[Union[Dict[str, Any], bytes]],
|
||||||
|
data: Optional[Union[Dict[str, Any], MultipartEncoder]],
|
||||||
|
params: Optional[Any],
|
||||||
|
timeout: Optional[float],
|
||||||
|
verify: Optional[Union[bool, str]],
|
||||||
|
stream: Optional[bool],
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> BackendResponse: ...
|
168
env/lib/python3.12/site-packages/gitlab/_backends/requests_backend.py
vendored
Normal file
168
env/lib/python3.12/site-packages/gitlab/_backends/requests_backend.py
vendored
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import dataclasses
|
||||||
|
from typing import Any, BinaryIO, Dict, Optional, TYPE_CHECKING, Union
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from requests import PreparedRequest
|
||||||
|
from requests.auth import AuthBase
|
||||||
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
from requests_toolbelt.multipart.encoder import MultipartEncoder # type: ignore
|
||||||
|
|
||||||
|
from . import protocol
|
||||||
|
|
||||||
|
|
||||||
|
class TokenAuth:
|
||||||
|
def __init__(self, token: str):
|
||||||
|
self.token = token
|
||||||
|
|
||||||
|
|
||||||
|
class OAuthTokenAuth(TokenAuth, AuthBase):
|
||||||
|
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
||||||
|
r.headers["Authorization"] = f"Bearer {self.token}"
|
||||||
|
r.headers.pop("PRIVATE-TOKEN", None)
|
||||||
|
r.headers.pop("JOB-TOKEN", None)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
class PrivateTokenAuth(TokenAuth, AuthBase):
|
||||||
|
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
||||||
|
r.headers["PRIVATE-TOKEN"] = self.token
|
||||||
|
r.headers.pop("JOB-TOKEN", None)
|
||||||
|
r.headers.pop("Authorization", None)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
class JobTokenAuth(TokenAuth, AuthBase):
|
||||||
|
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
||||||
|
r.headers["JOB-TOKEN"] = self.token
|
||||||
|
r.headers.pop("PRIVATE-TOKEN", None)
|
||||||
|
r.headers.pop("Authorization", None)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class SendData:
|
||||||
|
content_type: str
|
||||||
|
data: Optional[Union[Dict[str, Any], MultipartEncoder]] = None
|
||||||
|
json: Optional[Union[Dict[str, Any], bytes]] = None
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if self.json is not None and self.data is not None:
|
||||||
|
raise ValueError(
|
||||||
|
f"`json` and `data` are mutually exclusive. Only one can be set. "
|
||||||
|
f"json={self.json!r} data={self.data!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RequestsResponse(protocol.BackendResponse):
|
||||||
|
def __init__(self, response: requests.Response) -> None:
|
||||||
|
self._response: requests.Response = response
|
||||||
|
|
||||||
|
@property
|
||||||
|
def response(self) -> requests.Response:
|
||||||
|
return self._response
|
||||||
|
|
||||||
|
@property
|
||||||
|
def status_code(self) -> int:
|
||||||
|
return self._response.status_code
|
||||||
|
|
||||||
|
@property
|
||||||
|
def headers(self) -> CaseInsensitiveDict[str]:
|
||||||
|
return self._response.headers
|
||||||
|
|
||||||
|
@property
|
||||||
|
def content(self) -> bytes:
|
||||||
|
return self._response.content
|
||||||
|
|
||||||
|
@property
|
||||||
|
def reason(self) -> str:
|
||||||
|
return self._response.reason
|
||||||
|
|
||||||
|
def json(self) -> Any:
|
||||||
|
return self._response.json()
|
||||||
|
|
||||||
|
|
||||||
|
class RequestsBackend(protocol.Backend):
|
||||||
|
def __init__(self, session: Optional[requests.Session] = None) -> None:
|
||||||
|
self._client: requests.Session = session or requests.Session()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def client(self) -> requests.Session:
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def prepare_send_data(
|
||||||
|
files: Optional[Dict[str, Any]] = None,
|
||||||
|
post_data: Optional[Union[Dict[str, Any], bytes, BinaryIO]] = None,
|
||||||
|
raw: bool = False,
|
||||||
|
) -> SendData:
|
||||||
|
if files:
|
||||||
|
if post_data is None:
|
||||||
|
post_data = {}
|
||||||
|
else:
|
||||||
|
# When creating a `MultipartEncoder` instance with data-types
|
||||||
|
# which don't have an `encode` method it will cause an error:
|
||||||
|
# object has no attribute 'encode'
|
||||||
|
# So convert common non-string types into strings.
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert isinstance(post_data, dict)
|
||||||
|
for k, v in post_data.items():
|
||||||
|
if isinstance(v, bool):
|
||||||
|
v = int(v)
|
||||||
|
if isinstance(v, (complex, float, int)):
|
||||||
|
post_data[k] = str(v)
|
||||||
|
post_data["file"] = files.get("file")
|
||||||
|
post_data["avatar"] = files.get("avatar")
|
||||||
|
|
||||||
|
data = MultipartEncoder(fields=post_data)
|
||||||
|
return SendData(data=data, content_type=data.content_type)
|
||||||
|
|
||||||
|
if raw and post_data:
|
||||||
|
return SendData(data=post_data, content_type="application/octet-stream")
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert not isinstance(post_data, BinaryIO)
|
||||||
|
|
||||||
|
return SendData(json=post_data, content_type="application/json")
|
||||||
|
|
||||||
|
def http_request(
|
||||||
|
self,
|
||||||
|
method: str,
|
||||||
|
url: str,
|
||||||
|
json: Optional[Union[Dict[str, Any], bytes]] = None,
|
||||||
|
data: Optional[Union[Dict[str, Any], MultipartEncoder]] = None,
|
||||||
|
params: Optional[Any] = None,
|
||||||
|
timeout: Optional[float] = None,
|
||||||
|
verify: Optional[Union[bool, str]] = True,
|
||||||
|
stream: Optional[bool] = False,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> RequestsResponse:
|
||||||
|
"""Make HTTP request
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method: The HTTP method to call ('get', 'post', 'put', 'delete', etc.)
|
||||||
|
url: The full URL
|
||||||
|
data: The data to send to the server in the body of the request
|
||||||
|
json: Data to send in the body in json by default
|
||||||
|
timeout: The timeout, in seconds, for the request
|
||||||
|
verify: Whether SSL certificates should be validated. If
|
||||||
|
the value is a string, it is the path to a CA file used for
|
||||||
|
certificate validation.
|
||||||
|
stream: Whether the data should be streamed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A requests Response object.
|
||||||
|
"""
|
||||||
|
response: requests.Response = self._client.request(
|
||||||
|
method=method,
|
||||||
|
url=url,
|
||||||
|
params=params,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
stream=stream,
|
||||||
|
verify=verify,
|
||||||
|
json=json,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
return RequestsResponse(response=response)
|
6
env/lib/python3.12/site-packages/gitlab/_version.py
vendored
Normal file
6
env/lib/python3.12/site-packages/gitlab/_version.py
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
__author__ = "Gauvain Pocentek, python-gitlab team"
|
||||||
|
__copyright__ = "Copyright 2013-2019 Gauvain Pocentek, 2019-2023 python-gitlab team"
|
||||||
|
__email__ = "gauvainpocentek@gmail.com"
|
||||||
|
__license__ = "LGPL3"
|
||||||
|
__title__ = "python-gitlab"
|
||||||
|
__version__ = "5.1.0"
|
394
env/lib/python3.12/site-packages/gitlab/base.py
vendored
Normal file
394
env/lib/python3.12/site-packages/gitlab/base.py
vendored
Normal file
@ -0,0 +1,394 @@
|
|||||||
|
import copy
|
||||||
|
import importlib
|
||||||
|
import json
|
||||||
|
import pprint
|
||||||
|
import textwrap
|
||||||
|
from types import ModuleType
|
||||||
|
from typing import Any, Dict, Iterable, Optional, Type, TYPE_CHECKING, Union
|
||||||
|
|
||||||
|
import gitlab
|
||||||
|
from gitlab import types as g_types
|
||||||
|
from gitlab.exceptions import GitlabParsingError
|
||||||
|
|
||||||
|
from .client import Gitlab, GitlabList
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"RESTObject",
|
||||||
|
"RESTObjectList",
|
||||||
|
"RESTManager",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
_URL_ATTRIBUTE_ERROR = (
|
||||||
|
f"https://python-gitlab.readthedocs.io/en/v{gitlab.__version__}/"
|
||||||
|
f"faq.html#attribute-error-list"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RESTObject:
|
||||||
|
"""Represents an object built from server data.
|
||||||
|
|
||||||
|
It holds the attributes know from the server, and the updated attributes in
|
||||||
|
another. This allows smart updates, if the object allows it.
|
||||||
|
|
||||||
|
You can redefine ``_id_attr`` in child classes to specify which attribute
|
||||||
|
must be used as the unique ID. ``None`` means that the object can be updated
|
||||||
|
without ID in the url.
|
||||||
|
|
||||||
|
Likewise, you can define a ``_repr_attr`` in subclasses to specify which
|
||||||
|
attribute should be added as a human-readable identifier when called in the
|
||||||
|
object's ``__repr__()`` method.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_id_attr: Optional[str] = "id"
|
||||||
|
_attrs: Dict[str, Any]
|
||||||
|
_created_from_list: bool # Indicates if object was created from a list() action
|
||||||
|
_module: ModuleType
|
||||||
|
_parent_attrs: Dict[str, Any]
|
||||||
|
_repr_attr: Optional[str] = None
|
||||||
|
_updated_attrs: Dict[str, Any]
|
||||||
|
_lazy: bool
|
||||||
|
manager: "RESTManager"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
manager: "RESTManager",
|
||||||
|
attrs: Dict[str, Any],
|
||||||
|
*,
|
||||||
|
created_from_list: bool = False,
|
||||||
|
lazy: bool = False,
|
||||||
|
) -> None:
|
||||||
|
if not isinstance(attrs, dict):
|
||||||
|
raise GitlabParsingError(
|
||||||
|
f"Attempted to initialize RESTObject with a non-dictionary value: "
|
||||||
|
f"{attrs!r}\nThis likely indicates an incorrect or malformed server "
|
||||||
|
f"response."
|
||||||
|
)
|
||||||
|
self.__dict__.update(
|
||||||
|
{
|
||||||
|
"manager": manager,
|
||||||
|
"_attrs": attrs,
|
||||||
|
"_updated_attrs": {},
|
||||||
|
"_module": importlib.import_module(self.__module__),
|
||||||
|
"_created_from_list": created_from_list,
|
||||||
|
"_lazy": lazy,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.__dict__["_parent_attrs"] = self.manager.parent_attrs
|
||||||
|
self._create_managers()
|
||||||
|
|
||||||
|
def __getstate__(self) -> Dict[str, Any]:
|
||||||
|
state = self.__dict__.copy()
|
||||||
|
module = state.pop("_module")
|
||||||
|
state["_module_name"] = module.__name__
|
||||||
|
return state
|
||||||
|
|
||||||
|
def __setstate__(self, state: Dict[str, Any]) -> None:
|
||||||
|
module_name = state.pop("_module_name")
|
||||||
|
self.__dict__.update(state)
|
||||||
|
self.__dict__["_module"] = importlib.import_module(module_name)
|
||||||
|
|
||||||
|
def __getattr__(self, name: str) -> Any:
|
||||||
|
if name in self.__dict__["_updated_attrs"]:
|
||||||
|
return self.__dict__["_updated_attrs"][name]
|
||||||
|
|
||||||
|
if name in self.__dict__["_attrs"]:
|
||||||
|
value = self.__dict__["_attrs"][name]
|
||||||
|
# If the value is a list, we copy it in the _updated_attrs dict
|
||||||
|
# because we are not able to detect changes made on the object
|
||||||
|
# (append, insert, pop, ...). Without forcing the attr
|
||||||
|
# creation __setattr__ is never called, the list never ends up
|
||||||
|
# in the _updated_attrs dict, and the update() and save()
|
||||||
|
# method never push the new data to the server.
|
||||||
|
# See https://github.com/python-gitlab/python-gitlab/issues/306
|
||||||
|
#
|
||||||
|
# note: _parent_attrs will only store simple values (int) so we
|
||||||
|
# don't make this check in the next block.
|
||||||
|
if isinstance(value, list):
|
||||||
|
self.__dict__["_updated_attrs"][name] = value[:]
|
||||||
|
return self.__dict__["_updated_attrs"][name]
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
if name in self.__dict__["_parent_attrs"]:
|
||||||
|
return self.__dict__["_parent_attrs"][name]
|
||||||
|
|
||||||
|
message = f"{type(self).__name__!r} object has no attribute {name!r}"
|
||||||
|
if self._created_from_list:
|
||||||
|
message = (
|
||||||
|
f"{message}\n\n"
|
||||||
|
+ textwrap.fill(
|
||||||
|
f"{self.__class__!r} was created via a list() call and "
|
||||||
|
f"only a subset of the data may be present. To ensure "
|
||||||
|
f"all data is present get the object using a "
|
||||||
|
f"get(object.id) call. For more details, see:"
|
||||||
|
)
|
||||||
|
+ f"\n\n{_URL_ATTRIBUTE_ERROR}"
|
||||||
|
)
|
||||||
|
elif self._lazy:
|
||||||
|
message = f"{message}\n\n" + textwrap.fill(
|
||||||
|
f"If you tried to access object attributes returned from the server, "
|
||||||
|
f"note that {self.__class__!r} was created as a `lazy` object and was "
|
||||||
|
f"not initialized with any data."
|
||||||
|
)
|
||||||
|
raise AttributeError(message)
|
||||||
|
|
||||||
|
def __setattr__(self, name: str, value: Any) -> None:
|
||||||
|
self.__dict__["_updated_attrs"][name] = value
|
||||||
|
|
||||||
|
def asdict(self, *, with_parent_attrs: bool = False) -> Dict[str, Any]:
|
||||||
|
data = {}
|
||||||
|
if with_parent_attrs:
|
||||||
|
data.update(copy.deepcopy(self._parent_attrs))
|
||||||
|
data.update(copy.deepcopy(self._attrs))
|
||||||
|
data.update(copy.deepcopy(self._updated_attrs))
|
||||||
|
return data
|
||||||
|
|
||||||
|
@property
|
||||||
|
def attributes(self) -> Dict[str, Any]:
|
||||||
|
return self.asdict(with_parent_attrs=True)
|
||||||
|
|
||||||
|
def to_json(self, *, with_parent_attrs: bool = False, **kwargs: Any) -> str:
|
||||||
|
return json.dumps(self.asdict(with_parent_attrs=with_parent_attrs), **kwargs)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f"{type(self)} => {self.asdict()}"
|
||||||
|
|
||||||
|
def pformat(self) -> str:
|
||||||
|
return f"{type(self)} => \n{pprint.pformat(self.asdict())}"
|
||||||
|
|
||||||
|
def pprint(self) -> None:
|
||||||
|
print(self.pformat())
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
name = self.__class__.__name__
|
||||||
|
|
||||||
|
if (self._id_attr and self._repr_value) and (self._id_attr != self._repr_attr):
|
||||||
|
return (
|
||||||
|
f"<{name} {self._id_attr}:{self.get_id()} "
|
||||||
|
f"{self._repr_attr}:{self._repr_value}>"
|
||||||
|
)
|
||||||
|
if self._id_attr:
|
||||||
|
return f"<{name} {self._id_attr}:{self.get_id()}>"
|
||||||
|
if self._repr_value:
|
||||||
|
return f"<{name} {self._repr_attr}:{self._repr_value}>"
|
||||||
|
|
||||||
|
return f"<{name}>"
|
||||||
|
|
||||||
|
def __eq__(self, other: object) -> bool:
|
||||||
|
if not isinstance(other, RESTObject):
|
||||||
|
return NotImplemented
|
||||||
|
if self.get_id() and other.get_id():
|
||||||
|
return self.get_id() == other.get_id()
|
||||||
|
return super() == other
|
||||||
|
|
||||||
|
def __ne__(self, other: object) -> bool:
|
||||||
|
if not isinstance(other, RESTObject):
|
||||||
|
return NotImplemented
|
||||||
|
if self.get_id() and other.get_id():
|
||||||
|
return self.get_id() != other.get_id()
|
||||||
|
return super() != other
|
||||||
|
|
||||||
|
def __dir__(self) -> Iterable[str]:
|
||||||
|
return set(self.attributes).union(super().__dir__())
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
if not self.get_id():
|
||||||
|
return super().__hash__()
|
||||||
|
return hash(self.get_id())
|
||||||
|
|
||||||
|
def _create_managers(self) -> None:
|
||||||
|
# NOTE(jlvillal): We are creating our managers by looking at the class
|
||||||
|
# annotations. If an attribute is annotated as being a *Manager type
|
||||||
|
# then we create the manager and assign it to the attribute.
|
||||||
|
for attr, annotation in sorted(self.__class__.__annotations__.items()):
|
||||||
|
# We ignore creating a manager for the 'manager' attribute as that
|
||||||
|
# is done in the self.__init__() method
|
||||||
|
if attr in ("manager",):
|
||||||
|
continue
|
||||||
|
if not isinstance(annotation, (type, str)): # pragma: no cover
|
||||||
|
continue
|
||||||
|
if isinstance(annotation, type):
|
||||||
|
cls_name = annotation.__name__
|
||||||
|
else:
|
||||||
|
cls_name = annotation
|
||||||
|
# All *Manager classes are used except for the base "RESTManager" class
|
||||||
|
if cls_name == "RESTManager" or not cls_name.endswith("Manager"):
|
||||||
|
continue
|
||||||
|
cls = getattr(self._module, cls_name)
|
||||||
|
manager = cls(self.manager.gitlab, parent=self)
|
||||||
|
# Since we have our own __setattr__ method, we can't use setattr()
|
||||||
|
self.__dict__[attr] = manager
|
||||||
|
|
||||||
|
def _update_attrs(self, new_attrs: Dict[str, Any]) -> None:
|
||||||
|
self.__dict__["_updated_attrs"] = {}
|
||||||
|
self.__dict__["_attrs"] = new_attrs
|
||||||
|
|
||||||
|
def get_id(self) -> Optional[Union[int, str]]:
|
||||||
|
"""Returns the id of the resource."""
|
||||||
|
if self._id_attr is None or not hasattr(self, self._id_attr):
|
||||||
|
return None
|
||||||
|
id_val = getattr(self, self._id_attr)
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert id_val is None or isinstance(id_val, (int, str))
|
||||||
|
return id_val
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _repr_value(self) -> Optional[str]:
|
||||||
|
"""Safely returns the human-readable resource name if present."""
|
||||||
|
if self._repr_attr is None or not hasattr(self, self._repr_attr):
|
||||||
|
return None
|
||||||
|
repr_val = getattr(self, self._repr_attr)
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert isinstance(repr_val, str)
|
||||||
|
return repr_val
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoded_id(self) -> Optional[Union[int, str]]:
|
||||||
|
"""Ensure that the ID is url-encoded so that it can be safely used in a URL
|
||||||
|
path"""
|
||||||
|
obj_id = self.get_id()
|
||||||
|
if isinstance(obj_id, str):
|
||||||
|
obj_id = gitlab.utils.EncodedId(obj_id)
|
||||||
|
return obj_id
|
||||||
|
|
||||||
|
|
||||||
|
class RESTObjectList:
|
||||||
|
"""Generator object representing a list of RESTObject's.
|
||||||
|
|
||||||
|
This generator uses the Gitlab pagination system to fetch new data when
|
||||||
|
required.
|
||||||
|
|
||||||
|
Note: you should not instantiate such objects, they are returned by calls
|
||||||
|
to RESTManager.list()
|
||||||
|
|
||||||
|
Args:
|
||||||
|
manager: Manager to attach to the created objects
|
||||||
|
obj_cls: Type of objects to create from the json data
|
||||||
|
_list: A GitlabList object
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, manager: "RESTManager", obj_cls: Type[RESTObject], _list: GitlabList
|
||||||
|
) -> None:
|
||||||
|
"""Creates an objects list from a GitlabList.
|
||||||
|
|
||||||
|
You should not create objects of this type, but use managers list()
|
||||||
|
methods instead.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
manager: the RESTManager to attach to the objects
|
||||||
|
obj_cls: the class of the created objects
|
||||||
|
_list: the GitlabList holding the data
|
||||||
|
"""
|
||||||
|
self.manager = manager
|
||||||
|
self._obj_cls = obj_cls
|
||||||
|
self._list = _list
|
||||||
|
|
||||||
|
def __iter__(self) -> "RESTObjectList":
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self._list)
|
||||||
|
|
||||||
|
def __next__(self) -> RESTObject:
|
||||||
|
return self.next()
|
||||||
|
|
||||||
|
def next(self) -> RESTObject:
|
||||||
|
data = self._list.next()
|
||||||
|
return self._obj_cls(self.manager, data, created_from_list=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_page(self) -> int:
|
||||||
|
"""The current page number."""
|
||||||
|
return self._list.current_page
|
||||||
|
|
||||||
|
@property
|
||||||
|
def prev_page(self) -> Optional[int]:
|
||||||
|
"""The previous page number.
|
||||||
|
|
||||||
|
If None, the current page is the first.
|
||||||
|
"""
|
||||||
|
return self._list.prev_page
|
||||||
|
|
||||||
|
@property
|
||||||
|
def next_page(self) -> Optional[int]:
|
||||||
|
"""The next page number.
|
||||||
|
|
||||||
|
If None, the current page is the last.
|
||||||
|
"""
|
||||||
|
return self._list.next_page
|
||||||
|
|
||||||
|
@property
|
||||||
|
def per_page(self) -> Optional[int]:
|
||||||
|
"""The number of items per page."""
|
||||||
|
return self._list.per_page
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_pages(self) -> Optional[int]:
|
||||||
|
"""The total number of pages."""
|
||||||
|
return self._list.total_pages
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total(self) -> Optional[int]:
|
||||||
|
"""The total number of items."""
|
||||||
|
return self._list.total
|
||||||
|
|
||||||
|
|
||||||
|
class RESTManager:
|
||||||
|
"""Base class for CRUD operations on objects.
|
||||||
|
|
||||||
|
Derived class must define ``_path`` and ``_obj_cls``.
|
||||||
|
|
||||||
|
``_path``: Base URL path on which requests will be sent (e.g. '/projects')
|
||||||
|
``_obj_cls``: The class of objects that will be created
|
||||||
|
"""
|
||||||
|
|
||||||
|
_create_attrs: g_types.RequiredOptional = g_types.RequiredOptional()
|
||||||
|
_update_attrs: g_types.RequiredOptional = g_types.RequiredOptional()
|
||||||
|
_path: Optional[str] = None
|
||||||
|
_obj_cls: Optional[Type[RESTObject]] = None
|
||||||
|
_from_parent_attrs: Dict[str, Any] = {}
|
||||||
|
_types: Dict[str, Type[g_types.GitlabAttribute]] = {}
|
||||||
|
|
||||||
|
_computed_path: Optional[str]
|
||||||
|
_parent: Optional[RESTObject]
|
||||||
|
_parent_attrs: Dict[str, Any]
|
||||||
|
gitlab: Gitlab
|
||||||
|
|
||||||
|
def __init__(self, gl: Gitlab, parent: Optional[RESTObject] = None) -> None:
|
||||||
|
"""REST manager constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
gl: :class:`~gitlab.Gitlab` connection to use to make requests.
|
||||||
|
parent: REST object to which the manager is attached.
|
||||||
|
"""
|
||||||
|
self.gitlab = gl
|
||||||
|
self._parent = parent # for nested managers
|
||||||
|
self._computed_path = self._compute_path()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parent_attrs(self) -> Optional[Dict[str, Any]]:
|
||||||
|
return self._parent_attrs
|
||||||
|
|
||||||
|
def _compute_path(self, path: Optional[str] = None) -> Optional[str]:
|
||||||
|
self._parent_attrs = {}
|
||||||
|
if path is None:
|
||||||
|
path = self._path
|
||||||
|
if path is None:
|
||||||
|
return None
|
||||||
|
if self._parent is None or not self._from_parent_attrs:
|
||||||
|
return path
|
||||||
|
|
||||||
|
data: Dict[str, Optional[gitlab.utils.EncodedId]] = {}
|
||||||
|
for self_attr, parent_attr in self._from_parent_attrs.items():
|
||||||
|
if not hasattr(self._parent, parent_attr):
|
||||||
|
data[self_attr] = None
|
||||||
|
continue
|
||||||
|
data[self_attr] = gitlab.utils.EncodedId(getattr(self._parent, parent_attr))
|
||||||
|
self._parent_attrs = data
|
||||||
|
return path.format(**data)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self) -> Optional[str]:
|
||||||
|
return self._computed_path
|
420
env/lib/python3.12/site-packages/gitlab/cli.py
vendored
Normal file
420
env/lib/python3.12/site-packages/gitlab/cli.py
vendored
Normal file
@ -0,0 +1,420 @@
|
|||||||
|
import argparse
|
||||||
|
import dataclasses
|
||||||
|
import functools
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from types import ModuleType
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Callable,
|
||||||
|
cast,
|
||||||
|
Dict,
|
||||||
|
NoReturn,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Type,
|
||||||
|
TYPE_CHECKING,
|
||||||
|
TypeVar,
|
||||||
|
Union,
|
||||||
|
)
|
||||||
|
|
||||||
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
|
import gitlab.config
|
||||||
|
from gitlab.base import RESTObject
|
||||||
|
|
||||||
|
# This regex is based on:
|
||||||
|
# https://github.com/jpvanhal/inflection/blob/master/inflection/__init__.py
|
||||||
|
camel_upperlower_regex = re.compile(r"([A-Z]+)([A-Z][a-z])")
|
||||||
|
camel_lowerupper_regex = re.compile(r"([a-z\d])([A-Z])")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class CustomAction:
|
||||||
|
required: Tuple[str, ...]
|
||||||
|
optional: Tuple[str, ...]
|
||||||
|
in_object: bool
|
||||||
|
requires_id: bool # if the `_id_attr` value should be a required argument
|
||||||
|
help: Optional[str] # help text for the custom action
|
||||||
|
|
||||||
|
|
||||||
|
# custom_actions = {
|
||||||
|
# cls: {
|
||||||
|
# action: CustomAction,
|
||||||
|
# },
|
||||||
|
# }
|
||||||
|
custom_actions: Dict[str, Dict[str, CustomAction]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
# For an explanation of how these type-hints work see:
|
||||||
|
# https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
|
||||||
|
#
|
||||||
|
# The goal here is that functions which get decorated will retain their types.
|
||||||
|
__F = TypeVar("__F", bound=Callable[..., Any])
|
||||||
|
|
||||||
|
|
||||||
|
def register_custom_action(
|
||||||
|
*,
|
||||||
|
cls_names: Union[str, Tuple[str, ...]],
|
||||||
|
required: Tuple[str, ...] = (),
|
||||||
|
optional: Tuple[str, ...] = (),
|
||||||
|
custom_action: Optional[str] = None,
|
||||||
|
requires_id: bool = True, # if the `_id_attr` value should be a required argument
|
||||||
|
help: Optional[str] = None, # help text for the action
|
||||||
|
) -> Callable[[__F], __F]:
|
||||||
|
def wrap(f: __F) -> __F:
|
||||||
|
@functools.wraps(f)
|
||||||
|
def wrapped_f(*args: Any, **kwargs: Any) -> Any:
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
|
||||||
|
# in_obj defines whether the method belongs to the obj or the manager
|
||||||
|
in_obj = True
|
||||||
|
if isinstance(cls_names, tuple):
|
||||||
|
classes = cls_names
|
||||||
|
else:
|
||||||
|
classes = (cls_names,)
|
||||||
|
|
||||||
|
for cls_name in classes:
|
||||||
|
final_name = cls_name
|
||||||
|
if cls_name.endswith("Manager"):
|
||||||
|
final_name = cls_name.replace("Manager", "")
|
||||||
|
in_obj = False
|
||||||
|
if final_name not in custom_actions:
|
||||||
|
custom_actions[final_name] = {}
|
||||||
|
|
||||||
|
action = custom_action or f.__name__.replace("_", "-")
|
||||||
|
custom_actions[final_name][action] = CustomAction(
|
||||||
|
required=required,
|
||||||
|
optional=optional,
|
||||||
|
in_object=in_obj,
|
||||||
|
requires_id=requires_id,
|
||||||
|
help=help,
|
||||||
|
)
|
||||||
|
|
||||||
|
return cast(__F, wrapped_f)
|
||||||
|
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
|
||||||
|
def die(msg: str, e: Optional[Exception] = None) -> NoReturn:
|
||||||
|
if e:
|
||||||
|
msg = f"{msg} ({e})"
|
||||||
|
sys.stderr.write(f"{msg}\n")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def gitlab_resource_to_cls(
|
||||||
|
gitlab_resource: str, namespace: ModuleType
|
||||||
|
) -> Type[RESTObject]:
|
||||||
|
classes = CaseInsensitiveDict(namespace.__dict__)
|
||||||
|
lowercase_class = gitlab_resource.replace("-", "")
|
||||||
|
class_type = classes[lowercase_class]
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert isinstance(class_type, type)
|
||||||
|
assert issubclass(class_type, RESTObject)
|
||||||
|
return class_type
|
||||||
|
|
||||||
|
|
||||||
|
def cls_to_gitlab_resource(cls: RESTObject) -> str:
|
||||||
|
dasherized_uppercase = camel_upperlower_regex.sub(r"\1-\2", cls.__name__)
|
||||||
|
dasherized_lowercase = camel_lowerupper_regex.sub(r"\1-\2", dasherized_uppercase)
|
||||||
|
return dasherized_lowercase.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_base_parser(add_help: bool = True) -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
add_help=add_help,
|
||||||
|
description="GitLab API Command Line Interface",
|
||||||
|
allow_abbrev=False,
|
||||||
|
)
|
||||||
|
parser.add_argument("--version", help="Display the version.", action="store_true")
|
||||||
|
parser.add_argument(
|
||||||
|
"-v",
|
||||||
|
"--verbose",
|
||||||
|
"--fancy",
|
||||||
|
help="Verbose mode (legacy format only) [env var: GITLAB_VERBOSE]",
|
||||||
|
action="store_true",
|
||||||
|
default=os.getenv("GITLAB_VERBOSE"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-d",
|
||||||
|
"--debug",
|
||||||
|
help="Debug mode (display HTTP requests) [env var: GITLAB_DEBUG]",
|
||||||
|
action="store_true",
|
||||||
|
default=os.getenv("GITLAB_DEBUG"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-c",
|
||||||
|
"--config-file",
|
||||||
|
action="append",
|
||||||
|
help=(
|
||||||
|
"Configuration file to use. Can be used multiple times. "
|
||||||
|
"[env var: PYTHON_GITLAB_CFG]"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-g",
|
||||||
|
"--gitlab",
|
||||||
|
help=(
|
||||||
|
"Which configuration section should "
|
||||||
|
"be used. If not defined, the default selection "
|
||||||
|
"will be used."
|
||||||
|
),
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-o",
|
||||||
|
"--output",
|
||||||
|
help="Output format (v4 only): json|legacy|yaml",
|
||||||
|
required=False,
|
||||||
|
choices=["json", "legacy", "yaml"],
|
||||||
|
default="legacy",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-f",
|
||||||
|
"--fields",
|
||||||
|
help=(
|
||||||
|
"Fields to display in the output (comma "
|
||||||
|
"separated). Not used with legacy output"
|
||||||
|
),
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--server-url",
|
||||||
|
help=("GitLab server URL [env var: GITLAB_URL]"),
|
||||||
|
required=False,
|
||||||
|
default=os.getenv("GITLAB_URL"),
|
||||||
|
)
|
||||||
|
|
||||||
|
ssl_verify_group = parser.add_mutually_exclusive_group()
|
||||||
|
ssl_verify_group.add_argument(
|
||||||
|
"--ssl-verify",
|
||||||
|
help=(
|
||||||
|
"Path to a CA_BUNDLE file or directory with certificates of trusted CAs. "
|
||||||
|
"[env var: GITLAB_SSL_VERIFY]"
|
||||||
|
),
|
||||||
|
required=False,
|
||||||
|
default=os.getenv("GITLAB_SSL_VERIFY"),
|
||||||
|
)
|
||||||
|
ssl_verify_group.add_argument(
|
||||||
|
"--no-ssl-verify",
|
||||||
|
help="Disable SSL verification",
|
||||||
|
required=False,
|
||||||
|
dest="ssl_verify",
|
||||||
|
action="store_false",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--timeout",
|
||||||
|
help=(
|
||||||
|
"Timeout to use for requests to the GitLab server. "
|
||||||
|
"[env var: GITLAB_TIMEOUT]"
|
||||||
|
),
|
||||||
|
required=False,
|
||||||
|
type=int,
|
||||||
|
default=os.getenv("GITLAB_TIMEOUT"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--api-version",
|
||||||
|
help=("GitLab API version [env var: GITLAB_API_VERSION]"),
|
||||||
|
required=False,
|
||||||
|
default=os.getenv("GITLAB_API_VERSION"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--per-page",
|
||||||
|
help=(
|
||||||
|
"Number of entries to return per page in the response. "
|
||||||
|
"[env var: GITLAB_PER_PAGE]"
|
||||||
|
),
|
||||||
|
required=False,
|
||||||
|
type=int,
|
||||||
|
default=os.getenv("GITLAB_PER_PAGE"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--pagination",
|
||||||
|
help=(
|
||||||
|
"Whether to use keyset or offset pagination [env var: GITLAB_PAGINATION]"
|
||||||
|
),
|
||||||
|
required=False,
|
||||||
|
default=os.getenv("GITLAB_PAGINATION"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--order-by",
|
||||||
|
help=("Set order_by globally [env var: GITLAB_ORDER_BY]"),
|
||||||
|
required=False,
|
||||||
|
default=os.getenv("GITLAB_ORDER_BY"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--user-agent",
|
||||||
|
help=(
|
||||||
|
"The user agent to send to GitLab with the HTTP request. "
|
||||||
|
"[env var: GITLAB_USER_AGENT]"
|
||||||
|
),
|
||||||
|
required=False,
|
||||||
|
default=os.getenv("GITLAB_USER_AGENT"),
|
||||||
|
)
|
||||||
|
|
||||||
|
tokens = parser.add_mutually_exclusive_group()
|
||||||
|
tokens.add_argument(
|
||||||
|
"--private-token",
|
||||||
|
help=("GitLab private access token [env var: GITLAB_PRIVATE_TOKEN]"),
|
||||||
|
required=False,
|
||||||
|
default=os.getenv("GITLAB_PRIVATE_TOKEN"),
|
||||||
|
)
|
||||||
|
tokens.add_argument(
|
||||||
|
"--oauth-token",
|
||||||
|
help=("GitLab OAuth token [env var: GITLAB_OAUTH_TOKEN]"),
|
||||||
|
required=False,
|
||||||
|
default=os.getenv("GITLAB_OAUTH_TOKEN"),
|
||||||
|
)
|
||||||
|
tokens.add_argument(
|
||||||
|
"--job-token",
|
||||||
|
help=("GitLab CI job token [env var: CI_JOB_TOKEN]"),
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skip-login",
|
||||||
|
help=(
|
||||||
|
"Skip initial authenticated API call to the current user endpoint. "
|
||||||
|
"This may be useful when invoking the CLI in scripts. "
|
||||||
|
"[env var: GITLAB_SKIP_LOGIN]"
|
||||||
|
),
|
||||||
|
action="store_true",
|
||||||
|
default=os.getenv("GITLAB_SKIP_LOGIN"),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-mask-credentials",
|
||||||
|
help="Don't mask credentials in debug mode",
|
||||||
|
dest="mask_credentials",
|
||||||
|
action="store_false",
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def _get_parser() -> argparse.ArgumentParser:
|
||||||
|
# NOTE: We must delay import of gitlab.v4.cli until now or
|
||||||
|
# otherwise it will cause circular import errors
|
||||||
|
from gitlab.v4 import cli as v4_cli
|
||||||
|
|
||||||
|
parser = _get_base_parser()
|
||||||
|
return v4_cli.extend_parser(parser)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_value(v: Any) -> Any:
|
||||||
|
if isinstance(v, str) and v.startswith("@@"):
|
||||||
|
return v[1:]
|
||||||
|
if isinstance(v, str) and v.startswith("@"):
|
||||||
|
# If the user-provided value starts with @, we try to read the file
|
||||||
|
# path provided after @ as the real value.
|
||||||
|
filepath = pathlib.Path(v[1:]).expanduser().resolve()
|
||||||
|
try:
|
||||||
|
with open(filepath, encoding="utf-8") as f:
|
||||||
|
return f.read()
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
with open(filepath, "rb") as f:
|
||||||
|
return f.read()
|
||||||
|
except OSError as exc:
|
||||||
|
exc_name = type(exc).__name__
|
||||||
|
sys.stderr.write(f"{exc_name}: {exc}\n")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
|
def docs() -> argparse.ArgumentParser: # pragma: no cover
|
||||||
|
"""
|
||||||
|
Provide a statically generated parser for sphinx only, so we don't need
|
||||||
|
to provide dummy gitlab config for readthedocs.
|
||||||
|
"""
|
||||||
|
if "sphinx" not in sys.modules:
|
||||||
|
sys.exit("Docs parser is only intended for build_sphinx")
|
||||||
|
|
||||||
|
return _get_parser()
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
if "--version" in sys.argv:
|
||||||
|
print(gitlab.__version__)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
parser = _get_base_parser(add_help=False)
|
||||||
|
|
||||||
|
# This first parsing step is used to find the gitlab config to use, and
|
||||||
|
# load the propermodule (v3 or v4) accordingly. At that point we don't have
|
||||||
|
# any subparser setup
|
||||||
|
(options, _) = parser.parse_known_args(sys.argv)
|
||||||
|
try:
|
||||||
|
config = gitlab.config.GitlabConfigParser(options.gitlab, options.config_file)
|
||||||
|
except gitlab.config.ConfigError as e:
|
||||||
|
if "--help" in sys.argv or "-h" in sys.argv:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(0)
|
||||||
|
sys.exit(str(e))
|
||||||
|
# We only support v4 API at this time
|
||||||
|
if config.api_version not in ("4",): # dead code # pragma: no cover
|
||||||
|
raise ModuleNotFoundError(f"gitlab.v{config.api_version}.cli")
|
||||||
|
|
||||||
|
# Now we build the entire set of subcommands and do the complete parsing
|
||||||
|
parser = _get_parser()
|
||||||
|
try:
|
||||||
|
import argcomplete # type: ignore
|
||||||
|
|
||||||
|
argcomplete.autocomplete(parser) # pragma: no cover
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
config_files = args.config_file
|
||||||
|
gitlab_id = args.gitlab
|
||||||
|
verbose = args.verbose
|
||||||
|
output = args.output
|
||||||
|
fields = []
|
||||||
|
if args.fields:
|
||||||
|
fields = [x.strip() for x in args.fields.split(",")]
|
||||||
|
debug = args.debug
|
||||||
|
gitlab_resource = args.gitlab_resource
|
||||||
|
resource_action = args.resource_action
|
||||||
|
skip_login = args.skip_login
|
||||||
|
mask_credentials = args.mask_credentials
|
||||||
|
|
||||||
|
args_dict = vars(args)
|
||||||
|
# Remove CLI behavior-related args
|
||||||
|
for item in (
|
||||||
|
"api_version",
|
||||||
|
"config_file",
|
||||||
|
"debug",
|
||||||
|
"fields",
|
||||||
|
"gitlab",
|
||||||
|
"gitlab_resource",
|
||||||
|
"job_token",
|
||||||
|
"mask_credentials",
|
||||||
|
"oauth_token",
|
||||||
|
"output",
|
||||||
|
"pagination",
|
||||||
|
"private_token",
|
||||||
|
"resource_action",
|
||||||
|
"server_url",
|
||||||
|
"skip_login",
|
||||||
|
"ssl_verify",
|
||||||
|
"timeout",
|
||||||
|
"user_agent",
|
||||||
|
"verbose",
|
||||||
|
"version",
|
||||||
|
):
|
||||||
|
args_dict.pop(item)
|
||||||
|
args_dict = {k: _parse_value(v) for k, v in args_dict.items() if v is not None}
|
||||||
|
|
||||||
|
try:
|
||||||
|
gl = gitlab.Gitlab.merge_config(vars(options), gitlab_id, config_files)
|
||||||
|
if debug:
|
||||||
|
gl.enable_debug(mask_credentials=mask_credentials)
|
||||||
|
if not skip_login and (gl.private_token or gl.oauth_token):
|
||||||
|
gl.auth()
|
||||||
|
except Exception as e:
|
||||||
|
die(str(e))
|
||||||
|
|
||||||
|
gitlab.v4.cli.run(
|
||||||
|
gl, gitlab_resource, resource_action, args_dict, verbose, output, fields
|
||||||
|
)
|
1369
env/lib/python3.12/site-packages/gitlab/client.py
vendored
Normal file
1369
env/lib/python3.12/site-packages/gitlab/client.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
287
env/lib/python3.12/site-packages/gitlab/config.py
vendored
Normal file
287
env/lib/python3.12/site-packages/gitlab/config.py
vendored
Normal file
@ -0,0 +1,287 @@
|
|||||||
|
import configparser
|
||||||
|
import os
|
||||||
|
import shlex
|
||||||
|
import subprocess
|
||||||
|
from os.path import expanduser, expandvars
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
from gitlab.const import USER_AGENT
|
||||||
|
|
||||||
|
_DEFAULT_FILES: List[str] = [
|
||||||
|
"/etc/python-gitlab.cfg",
|
||||||
|
str(Path.home() / ".python-gitlab.cfg"),
|
||||||
|
]
|
||||||
|
|
||||||
|
HELPER_PREFIX = "helper:"
|
||||||
|
|
||||||
|
HELPER_ATTRIBUTES = ["job_token", "http_password", "private_token", "oauth_token"]
|
||||||
|
|
||||||
|
_CONFIG_PARSER_ERRORS = (configparser.NoOptionError, configparser.NoSectionError)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_file(filepath: Union[Path, str]) -> str:
|
||||||
|
resolved = Path(filepath).resolve(strict=True)
|
||||||
|
return str(resolved)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_config_files(
|
||||||
|
config_files: Optional[List[str]] = None,
|
||||||
|
) -> Union[str, List[str]]:
|
||||||
|
"""
|
||||||
|
Return resolved path(s) to config files if they exist, with precedence:
|
||||||
|
1. Files passed in config_files
|
||||||
|
2. File defined in PYTHON_GITLAB_CFG
|
||||||
|
3. User- and system-wide config files
|
||||||
|
"""
|
||||||
|
resolved_files = []
|
||||||
|
|
||||||
|
if config_files:
|
||||||
|
for config_file in config_files:
|
||||||
|
try:
|
||||||
|
resolved = _resolve_file(config_file)
|
||||||
|
except OSError as e:
|
||||||
|
raise GitlabConfigMissingError(
|
||||||
|
f"Cannot read config from file: {e}"
|
||||||
|
) from e
|
||||||
|
resolved_files.append(resolved)
|
||||||
|
|
||||||
|
return resolved_files
|
||||||
|
|
||||||
|
try:
|
||||||
|
env_config = os.environ["PYTHON_GITLAB_CFG"]
|
||||||
|
return _resolve_file(env_config)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
except OSError as e:
|
||||||
|
raise GitlabConfigMissingError(
|
||||||
|
f"Cannot read config from PYTHON_GITLAB_CFG: {e}"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
for config_file in _DEFAULT_FILES:
|
||||||
|
try:
|
||||||
|
resolved = _resolve_file(config_file)
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
resolved_files.append(resolved)
|
||||||
|
|
||||||
|
return resolved_files
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabIDError(ConfigError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabDataError(ConfigError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabConfigMissingError(ConfigError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabConfigHelperError(ConfigError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabConfigParser:
|
||||||
|
def __init__(
|
||||||
|
self, gitlab_id: Optional[str] = None, config_files: Optional[List[str]] = None
|
||||||
|
) -> None:
|
||||||
|
self.gitlab_id = gitlab_id
|
||||||
|
self.http_username: Optional[str] = None
|
||||||
|
self.http_password: Optional[str] = None
|
||||||
|
self.job_token: Optional[str] = None
|
||||||
|
self.oauth_token: Optional[str] = None
|
||||||
|
self.private_token: Optional[str] = None
|
||||||
|
|
||||||
|
self.api_version: str = "4"
|
||||||
|
self.order_by: Optional[str] = None
|
||||||
|
self.pagination: Optional[str] = None
|
||||||
|
self.per_page: Optional[int] = None
|
||||||
|
self.retry_transient_errors: bool = False
|
||||||
|
self.ssl_verify: Union[bool, str] = True
|
||||||
|
self.timeout: int = 60
|
||||||
|
self.url: Optional[str] = None
|
||||||
|
self.user_agent: str = USER_AGENT
|
||||||
|
self.keep_base_url: bool = False
|
||||||
|
|
||||||
|
self._files = _get_config_files(config_files)
|
||||||
|
if self._files:
|
||||||
|
self._parse_config()
|
||||||
|
|
||||||
|
if self.gitlab_id and not self._files:
|
||||||
|
raise GitlabConfigMissingError(
|
||||||
|
f"A gitlab id was provided ({self.gitlab_id}) but no config file found"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _parse_config(self) -> None:
|
||||||
|
_config = configparser.ConfigParser()
|
||||||
|
_config.read(self._files, encoding="utf-8")
|
||||||
|
|
||||||
|
if self.gitlab_id and not _config.has_section(self.gitlab_id):
|
||||||
|
raise GitlabDataError(
|
||||||
|
f"A gitlab id was provided ({self.gitlab_id}) "
|
||||||
|
"but no config section found"
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.gitlab_id is None:
|
||||||
|
try:
|
||||||
|
self.gitlab_id = _config.get("global", "default")
|
||||||
|
except Exception as e:
|
||||||
|
raise GitlabIDError(
|
||||||
|
"Impossible to get the gitlab id (not specified in config file)"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.url = _config.get(self.gitlab_id, "url")
|
||||||
|
except Exception as e:
|
||||||
|
raise GitlabDataError(
|
||||||
|
"Impossible to get gitlab details from "
|
||||||
|
f"configuration ({self.gitlab_id})"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.ssl_verify = _config.getboolean("global", "ssl_verify")
|
||||||
|
except ValueError:
|
||||||
|
# Value Error means the option exists but isn't a boolean.
|
||||||
|
# Get as a string instead as it should then be a local path to a
|
||||||
|
# CA bundle.
|
||||||
|
self.ssl_verify = _config.get("global", "ssl_verify")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.ssl_verify = _config.getboolean(self.gitlab_id, "ssl_verify")
|
||||||
|
except ValueError:
|
||||||
|
# Value Error means the option exists but isn't a boolean.
|
||||||
|
# Get as a string instead as it should then be a local path to a
|
||||||
|
# CA bundle.
|
||||||
|
self.ssl_verify = _config.get(self.gitlab_id, "ssl_verify")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.timeout = _config.getint("global", "timeout")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.timeout = _config.getint(self.gitlab_id, "timeout")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.private_token = _config.get(self.gitlab_id, "private_token")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.oauth_token = _config.get(self.gitlab_id, "oauth_token")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.job_token = _config.get(self.gitlab_id, "job_token")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.http_username = _config.get(self.gitlab_id, "http_username")
|
||||||
|
self.http_password = _config.get(
|
||||||
|
self.gitlab_id, "http_password"
|
||||||
|
) # pragma: no cover
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self._get_values_from_helper()
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.api_version = _config.get("global", "api_version")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.api_version = _config.get(self.gitlab_id, "api_version")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
if self.api_version not in ("4",):
|
||||||
|
raise GitlabDataError(f"Unsupported API version: {self.api_version}")
|
||||||
|
|
||||||
|
for section in ["global", self.gitlab_id]:
|
||||||
|
try:
|
||||||
|
self.per_page = _config.getint(section, "per_page")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
if self.per_page is not None and not 0 <= self.per_page <= 100:
|
||||||
|
raise GitlabDataError(f"Unsupported per_page number: {self.per_page}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.pagination = _config.get(self.gitlab_id, "pagination")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.order_by = _config.get(self.gitlab_id, "order_by")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.user_agent = _config.get("global", "user_agent")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.user_agent = _config.get(self.gitlab_id, "user_agent")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.keep_base_url = _config.getboolean("global", "keep_base_url")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.keep_base_url = _config.getboolean(self.gitlab_id, "keep_base_url")
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.retry_transient_errors = _config.getboolean(
|
||||||
|
"global", "retry_transient_errors"
|
||||||
|
)
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.retry_transient_errors = _config.getboolean(
|
||||||
|
self.gitlab_id, "retry_transient_errors"
|
||||||
|
)
|
||||||
|
except _CONFIG_PARSER_ERRORS:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _get_values_from_helper(self) -> None:
|
||||||
|
"""Update attributes that may get values from an external helper program"""
|
||||||
|
for attr in HELPER_ATTRIBUTES:
|
||||||
|
value = getattr(self, attr)
|
||||||
|
if not isinstance(value, str):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not value.lower().strip().startswith(HELPER_PREFIX):
|
||||||
|
continue
|
||||||
|
|
||||||
|
helper = value[len(HELPER_PREFIX) :].strip()
|
||||||
|
commmand = [expanduser(expandvars(token)) for token in shlex.split(helper)]
|
||||||
|
|
||||||
|
try:
|
||||||
|
value = (
|
||||||
|
subprocess.check_output(commmand, stderr=subprocess.PIPE)
|
||||||
|
.decode("utf-8")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
stderr = e.stderr.decode().strip()
|
||||||
|
raise GitlabConfigHelperError(
|
||||||
|
f"Failed to read {attr} value from helper "
|
||||||
|
f"for {self.gitlab_id}:\n{stderr}"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
setattr(self, attr, value)
|
169
env/lib/python3.12/site-packages/gitlab/const.py
vendored
Normal file
169
env/lib/python3.12/site-packages/gitlab/const.py
vendored
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
from enum import Enum, IntEnum
|
||||||
|
|
||||||
|
from gitlab._version import __title__, __version__
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabEnum(str, Enum):
|
||||||
|
"""An enum mixed in with str to make it JSON-serializable."""
|
||||||
|
|
||||||
|
|
||||||
|
# https://gitlab.com/gitlab-org/gitlab/-/blob/e97357824bedf007e75f8782259fe07435b64fbb/lib/gitlab/access.rb#L12-18
|
||||||
|
class AccessLevel(IntEnum):
|
||||||
|
NO_ACCESS: int = 0
|
||||||
|
MINIMAL_ACCESS: int = 5
|
||||||
|
GUEST: int = 10
|
||||||
|
PLANNER: int = 15
|
||||||
|
REPORTER: int = 20
|
||||||
|
DEVELOPER: int = 30
|
||||||
|
MAINTAINER: int = 40
|
||||||
|
OWNER: int = 50
|
||||||
|
ADMIN: int = 60
|
||||||
|
|
||||||
|
|
||||||
|
# https://gitlab.com/gitlab-org/gitlab/-/blob/e97357824bedf007e75f8782259fe07435b64fbb/lib/gitlab/visibility_level.rb#L23-25
|
||||||
|
class Visibility(GitlabEnum):
|
||||||
|
PRIVATE: str = "private"
|
||||||
|
INTERNAL: str = "internal"
|
||||||
|
PUBLIC: str = "public"
|
||||||
|
|
||||||
|
|
||||||
|
class NotificationLevel(GitlabEnum):
|
||||||
|
DISABLED: str = "disabled"
|
||||||
|
PARTICIPATING: str = "participating"
|
||||||
|
WATCH: str = "watch"
|
||||||
|
GLOBAL: str = "global"
|
||||||
|
MENTION: str = "mention"
|
||||||
|
CUSTOM: str = "custom"
|
||||||
|
|
||||||
|
|
||||||
|
# https://gitlab.com/gitlab-org/gitlab/-/blob/e97357824bedf007e75f8782259fe07435b64fbb/app/views/search/_category.html.haml#L10-37
|
||||||
|
class SearchScope(GitlabEnum):
|
||||||
|
# all scopes (global, group and project)
|
||||||
|
PROJECTS: str = "projects"
|
||||||
|
ISSUES: str = "issues"
|
||||||
|
MERGE_REQUESTS: str = "merge_requests"
|
||||||
|
MILESTONES: str = "milestones"
|
||||||
|
WIKI_BLOBS: str = "wiki_blobs"
|
||||||
|
COMMITS: str = "commits"
|
||||||
|
BLOBS: str = "blobs"
|
||||||
|
USERS: str = "users"
|
||||||
|
|
||||||
|
# specific global scope
|
||||||
|
GLOBAL_SNIPPET_TITLES: str = "snippet_titles"
|
||||||
|
|
||||||
|
# specific project scope
|
||||||
|
PROJECT_NOTES: str = "notes"
|
||||||
|
|
||||||
|
|
||||||
|
# https://docs.gitlab.com/ee/api/merge_requests.html#merge-status
|
||||||
|
class DetailedMergeStatus(GitlabEnum):
|
||||||
|
# possible values for the detailed_merge_status field of Merge Requests
|
||||||
|
BLOCKED_STATUS: str = "blocked_status"
|
||||||
|
BROKEN_STATUS: str = "broken_status"
|
||||||
|
CHECKING: str = "checking"
|
||||||
|
UNCHECKED: str = "unchecked"
|
||||||
|
CI_MUST_PASS: str = "ci_must_pass"
|
||||||
|
CI_STILL_RUNNING: str = "ci_still_running"
|
||||||
|
DISCUSSIONS_NOT_RESOLVED: str = "discussions_not_resolved"
|
||||||
|
DRAFT_STATUS: str = "draft_status"
|
||||||
|
EXTERNAL_STATUS_CHECKS: str = "external_status_checks"
|
||||||
|
MERGEABLE: str = "mergeable"
|
||||||
|
NOT_APPROVED: str = "not_approved"
|
||||||
|
NOT_OPEN: str = "not_open"
|
||||||
|
POLICIES_DENIED: str = "policies_denied"
|
||||||
|
|
||||||
|
|
||||||
|
# https://docs.gitlab.com/ee/api/pipelines.html
|
||||||
|
class PipelineStatus(GitlabEnum):
|
||||||
|
CREATED: str = "created"
|
||||||
|
WAITING_FOR_RESOURCE: str = "waiting_for_resource"
|
||||||
|
PREPARING: str = "preparing"
|
||||||
|
PENDING: str = "pending"
|
||||||
|
RUNNING: str = "running"
|
||||||
|
SUCCESS: str = "success"
|
||||||
|
FAILED: str = "failed"
|
||||||
|
CANCELED: str = "canceled"
|
||||||
|
SKIPPED: str = "skipped"
|
||||||
|
MANUAL: str = "manual"
|
||||||
|
SCHEDULED: str = "scheduled"
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_URL: str = "https://gitlab.com"
|
||||||
|
|
||||||
|
NO_ACCESS = AccessLevel.NO_ACCESS.value
|
||||||
|
MINIMAL_ACCESS = AccessLevel.MINIMAL_ACCESS.value
|
||||||
|
GUEST_ACCESS = AccessLevel.GUEST.value
|
||||||
|
REPORTER_ACCESS = AccessLevel.REPORTER.value
|
||||||
|
DEVELOPER_ACCESS = AccessLevel.DEVELOPER.value
|
||||||
|
MAINTAINER_ACCESS = AccessLevel.MAINTAINER.value
|
||||||
|
OWNER_ACCESS = AccessLevel.OWNER.value
|
||||||
|
ADMIN_ACCESS = AccessLevel.ADMIN.value
|
||||||
|
|
||||||
|
VISIBILITY_PRIVATE = Visibility.PRIVATE.value
|
||||||
|
VISIBILITY_INTERNAL = Visibility.INTERNAL.value
|
||||||
|
VISIBILITY_PUBLIC = Visibility.PUBLIC.value
|
||||||
|
|
||||||
|
NOTIFICATION_LEVEL_DISABLED = NotificationLevel.DISABLED.value
|
||||||
|
NOTIFICATION_LEVEL_PARTICIPATING = NotificationLevel.PARTICIPATING.value
|
||||||
|
NOTIFICATION_LEVEL_WATCH = NotificationLevel.WATCH.value
|
||||||
|
NOTIFICATION_LEVEL_GLOBAL = NotificationLevel.GLOBAL.value
|
||||||
|
NOTIFICATION_LEVEL_MENTION = NotificationLevel.MENTION.value
|
||||||
|
NOTIFICATION_LEVEL_CUSTOM = NotificationLevel.CUSTOM.value
|
||||||
|
|
||||||
|
# Search scopes
|
||||||
|
# all scopes (global, group and project)
|
||||||
|
SEARCH_SCOPE_PROJECTS = SearchScope.PROJECTS.value
|
||||||
|
SEARCH_SCOPE_ISSUES = SearchScope.ISSUES.value
|
||||||
|
SEARCH_SCOPE_MERGE_REQUESTS = SearchScope.MERGE_REQUESTS.value
|
||||||
|
SEARCH_SCOPE_MILESTONES = SearchScope.MILESTONES.value
|
||||||
|
SEARCH_SCOPE_WIKI_BLOBS = SearchScope.WIKI_BLOBS.value
|
||||||
|
SEARCH_SCOPE_COMMITS = SearchScope.COMMITS.value
|
||||||
|
SEARCH_SCOPE_BLOBS = SearchScope.BLOBS.value
|
||||||
|
SEARCH_SCOPE_USERS = SearchScope.USERS.value
|
||||||
|
|
||||||
|
# specific global scope
|
||||||
|
SEARCH_SCOPE_GLOBAL_SNIPPET_TITLES = SearchScope.GLOBAL_SNIPPET_TITLES.value
|
||||||
|
|
||||||
|
# specific project scope
|
||||||
|
SEARCH_SCOPE_PROJECT_NOTES = SearchScope.PROJECT_NOTES.value
|
||||||
|
|
||||||
|
USER_AGENT: str = f"{__title__}/{__version__}"
|
||||||
|
|
||||||
|
NO_JSON_RESPONSE_CODES = [204]
|
||||||
|
RETRYABLE_TRANSIENT_ERROR_CODES = [500, 502, 503, 504] + list(range(520, 531))
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"AccessLevel",
|
||||||
|
"Visibility",
|
||||||
|
"NotificationLevel",
|
||||||
|
"SearchScope",
|
||||||
|
"ADMIN_ACCESS",
|
||||||
|
"DEFAULT_URL",
|
||||||
|
"DEVELOPER_ACCESS",
|
||||||
|
"GUEST_ACCESS",
|
||||||
|
"MAINTAINER_ACCESS",
|
||||||
|
"MINIMAL_ACCESS",
|
||||||
|
"NO_ACCESS",
|
||||||
|
"NOTIFICATION_LEVEL_CUSTOM",
|
||||||
|
"NOTIFICATION_LEVEL_DISABLED",
|
||||||
|
"NOTIFICATION_LEVEL_GLOBAL",
|
||||||
|
"NOTIFICATION_LEVEL_MENTION",
|
||||||
|
"NOTIFICATION_LEVEL_PARTICIPATING",
|
||||||
|
"NOTIFICATION_LEVEL_WATCH",
|
||||||
|
"OWNER_ACCESS",
|
||||||
|
"REPORTER_ACCESS",
|
||||||
|
"SEARCH_SCOPE_BLOBS",
|
||||||
|
"SEARCH_SCOPE_COMMITS",
|
||||||
|
"SEARCH_SCOPE_GLOBAL_SNIPPET_TITLES",
|
||||||
|
"SEARCH_SCOPE_ISSUES",
|
||||||
|
"SEARCH_SCOPE_MERGE_REQUESTS",
|
||||||
|
"SEARCH_SCOPE_MILESTONES",
|
||||||
|
"SEARCH_SCOPE_PROJECT_NOTES",
|
||||||
|
"SEARCH_SCOPE_PROJECTS",
|
||||||
|
"SEARCH_SCOPE_USERS",
|
||||||
|
"SEARCH_SCOPE_WIKI_BLOBS",
|
||||||
|
"USER_AGENT",
|
||||||
|
"VISIBILITY_INTERNAL",
|
||||||
|
"VISIBILITY_PRIVATE",
|
||||||
|
"VISIBILITY_PUBLIC",
|
||||||
|
]
|
428
env/lib/python3.12/site-packages/gitlab/exceptions.py
vendored
Normal file
428
env/lib/python3.12/site-packages/gitlab/exceptions.py
vendored
Normal file
@ -0,0 +1,428 @@
|
|||||||
|
import functools
|
||||||
|
from typing import Any, Callable, cast, Optional, Type, TYPE_CHECKING, TypeVar, Union
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabError(Exception):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
error_message: Union[str, bytes] = "",
|
||||||
|
response_code: Optional[int] = None,
|
||||||
|
response_body: Optional[bytes] = None,
|
||||||
|
) -> None:
|
||||||
|
Exception.__init__(self, error_message)
|
||||||
|
# Http status code
|
||||||
|
self.response_code = response_code
|
||||||
|
# Full http response
|
||||||
|
self.response_body = response_body
|
||||||
|
# Parsed error message from gitlab
|
||||||
|
try:
|
||||||
|
# if we receive str/bytes we try to convert to unicode/str to have
|
||||||
|
# consistent message types (see #616)
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert isinstance(error_message, bytes)
|
||||||
|
self.error_message = error_message.decode()
|
||||||
|
except Exception:
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert isinstance(error_message, str)
|
||||||
|
self.error_message = error_message
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
if self.response_code is not None:
|
||||||
|
return f"{self.response_code}: {self.error_message}"
|
||||||
|
return f"{self.error_message}"
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabAuthenticationError(GitlabError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class RedirectError(GitlabError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabParsingError(GitlabError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabCiLintError(GitlabError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabConnectionError(GitlabError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabOperationError(GitlabError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabHttpError(GitlabError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabListError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabGetError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabHeadError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabCreateError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabUpdateError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabDeleteError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabSetError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabProtectError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabTransferProjectError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabGroupTransferError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabProjectDeployKeyError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabPromoteError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabCancelError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabPipelineCancelError(GitlabCancelError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabRetryError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabBuildCancelError(GitlabCancelError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabBuildRetryError(GitlabRetryError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabBuildPlayError(GitlabRetryError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabBuildEraseError(GitlabRetryError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabJobCancelError(GitlabCancelError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabJobRetryError(GitlabRetryError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabJobPlayError(GitlabRetryError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabJobEraseError(GitlabRetryError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabPipelinePlayError(GitlabRetryError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabPipelineRetryError(GitlabRetryError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabBlockError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabUnblockError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabDeactivateError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabActivateError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabBanError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabUnbanError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabSubscribeError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabUnsubscribeError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabMRForbiddenError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabMRApprovalError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabMRRebaseError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabMRResetApprovalError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabMRClosedError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabMROnBuildSuccessError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabTodoError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabTopicMergeError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabTimeTrackingError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabUploadError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabAttachFileError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabImportError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabInvitationError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabCherryPickError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabHousekeepingError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabOwnershipError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabSearchError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabStopError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabMarkdownError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabVerifyError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabRenderError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabRepairError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabRestoreError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabRevertError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabRotateError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabLicenseError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabFollowError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabUnfollowError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabUserApproveError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabUserRejectError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabDeploymentApprovalError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabHookTestError(GitlabOperationError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# For an explanation of how these type-hints work see:
|
||||||
|
# https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
|
||||||
|
#
|
||||||
|
# The goal here is that functions which get decorated will retain their types.
|
||||||
|
__F = TypeVar("__F", bound=Callable[..., Any])
|
||||||
|
|
||||||
|
|
||||||
|
def on_http_error(error: Type[Exception]) -> Callable[[__F], __F]:
|
||||||
|
"""Manage GitlabHttpError exceptions.
|
||||||
|
|
||||||
|
This decorator function can be used to catch GitlabHttpError exceptions
|
||||||
|
raise specialized exceptions instead.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
The exception type to raise -- must inherit from GitlabError
|
||||||
|
"""
|
||||||
|
|
||||||
|
def wrap(f: __F) -> __F:
|
||||||
|
@functools.wraps(f)
|
||||||
|
def wrapped_f(*args: Any, **kwargs: Any) -> Any:
|
||||||
|
try:
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
except GitlabHttpError as e:
|
||||||
|
raise error(e.error_message, e.response_code, e.response_body) from e
|
||||||
|
|
||||||
|
return cast(__F, wrapped_f)
|
||||||
|
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
|
||||||
|
# Export manually to keep mypy happy
|
||||||
|
__all__ = [
|
||||||
|
"GitlabActivateError",
|
||||||
|
"GitlabAttachFileError",
|
||||||
|
"GitlabAuthenticationError",
|
||||||
|
"GitlabBanError",
|
||||||
|
"GitlabBlockError",
|
||||||
|
"GitlabBuildCancelError",
|
||||||
|
"GitlabBuildEraseError",
|
||||||
|
"GitlabBuildPlayError",
|
||||||
|
"GitlabBuildRetryError",
|
||||||
|
"GitlabCancelError",
|
||||||
|
"GitlabCherryPickError",
|
||||||
|
"GitlabCiLintError",
|
||||||
|
"GitlabConnectionError",
|
||||||
|
"GitlabCreateError",
|
||||||
|
"GitlabDeactivateError",
|
||||||
|
"GitlabDeleteError",
|
||||||
|
"GitlabDeploymentApprovalError",
|
||||||
|
"GitlabError",
|
||||||
|
"GitlabFollowError",
|
||||||
|
"GitlabGetError",
|
||||||
|
"GitlabGroupTransferError",
|
||||||
|
"GitlabHeadError",
|
||||||
|
"GitlabHookTestError",
|
||||||
|
"GitlabHousekeepingError",
|
||||||
|
"GitlabHttpError",
|
||||||
|
"GitlabImportError",
|
||||||
|
"GitlabInvitationError",
|
||||||
|
"GitlabJobCancelError",
|
||||||
|
"GitlabJobEraseError",
|
||||||
|
"GitlabJobPlayError",
|
||||||
|
"GitlabJobRetryError",
|
||||||
|
"GitlabLicenseError",
|
||||||
|
"GitlabListError",
|
||||||
|
"GitlabMRApprovalError",
|
||||||
|
"GitlabMRClosedError",
|
||||||
|
"GitlabMRForbiddenError",
|
||||||
|
"GitlabMROnBuildSuccessError",
|
||||||
|
"GitlabMRRebaseError",
|
||||||
|
"GitlabMRResetApprovalError",
|
||||||
|
"GitlabMarkdownError",
|
||||||
|
"GitlabOperationError",
|
||||||
|
"GitlabOwnershipError",
|
||||||
|
"GitlabParsingError",
|
||||||
|
"GitlabPipelineCancelError",
|
||||||
|
"GitlabPipelinePlayError",
|
||||||
|
"GitlabPipelineRetryError",
|
||||||
|
"GitlabProjectDeployKeyError",
|
||||||
|
"GitlabPromoteError",
|
||||||
|
"GitlabProtectError",
|
||||||
|
"GitlabRenderError",
|
||||||
|
"GitlabRepairError",
|
||||||
|
"GitlabRestoreError",
|
||||||
|
"GitlabRetryError",
|
||||||
|
"GitlabRevertError",
|
||||||
|
"GitlabRotateError",
|
||||||
|
"GitlabSearchError",
|
||||||
|
"GitlabSetError",
|
||||||
|
"GitlabStopError",
|
||||||
|
"GitlabSubscribeError",
|
||||||
|
"GitlabTimeTrackingError",
|
||||||
|
"GitlabTodoError",
|
||||||
|
"GitlabTopicMergeError",
|
||||||
|
"GitlabTransferProjectError",
|
||||||
|
"GitlabUnbanError",
|
||||||
|
"GitlabUnblockError",
|
||||||
|
"GitlabUnfollowError",
|
||||||
|
"GitlabUnsubscribeError",
|
||||||
|
"GitlabUpdateError",
|
||||||
|
"GitlabUploadError",
|
||||||
|
"GitlabUserApproveError",
|
||||||
|
"GitlabUserRejectError",
|
||||||
|
"GitlabVerifyError",
|
||||||
|
"RedirectError",
|
||||||
|
]
|
1099
env/lib/python3.12/site-packages/gitlab/mixins.py
vendored
Normal file
1099
env/lib/python3.12/site-packages/gitlab/mixins.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
0
env/lib/python3.12/site-packages/gitlab/py.typed
vendored
Normal file
0
env/lib/python3.12/site-packages/gitlab/py.typed
vendored
Normal file
105
env/lib/python3.12/site-packages/gitlab/types.py
vendored
Normal file
105
env/lib/python3.12/site-packages/gitlab/types.py
vendored
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
import dataclasses
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class RequiredOptional:
|
||||||
|
required: Tuple[str, ...] = ()
|
||||||
|
optional: Tuple[str, ...] = ()
|
||||||
|
exclusive: Tuple[str, ...] = ()
|
||||||
|
|
||||||
|
def validate_attrs(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
data: Dict[str, Any],
|
||||||
|
excludes: Optional[List[str]] = None,
|
||||||
|
) -> None:
|
||||||
|
if excludes is None:
|
||||||
|
excludes = []
|
||||||
|
|
||||||
|
if self.required:
|
||||||
|
required = [k for k in self.required if k not in excludes]
|
||||||
|
missing = [attr for attr in required if attr not in data]
|
||||||
|
if missing:
|
||||||
|
raise AttributeError(f"Missing attributes: {', '.join(missing)}")
|
||||||
|
|
||||||
|
if self.exclusive:
|
||||||
|
exclusives = [attr for attr in data if attr in self.exclusive]
|
||||||
|
if len(exclusives) > 1:
|
||||||
|
raise AttributeError(
|
||||||
|
f"Provide only one of these attributes: {', '.join(exclusives)}"
|
||||||
|
)
|
||||||
|
if not exclusives:
|
||||||
|
raise AttributeError(
|
||||||
|
f"Must provide one of these attributes: "
|
||||||
|
f"{', '.join(self.exclusive)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GitlabAttribute:
|
||||||
|
def __init__(self, value: Any = None) -> None:
|
||||||
|
self._value = value
|
||||||
|
|
||||||
|
def get(self) -> Any:
|
||||||
|
return self._value
|
||||||
|
|
||||||
|
def set_from_cli(self, cli_value: Any) -> None:
|
||||||
|
self._value = cli_value
|
||||||
|
|
||||||
|
def get_for_api(self, *, key: str) -> Tuple[str, Any]:
|
||||||
|
return (key, self._value)
|
||||||
|
|
||||||
|
|
||||||
|
class _ListArrayAttribute(GitlabAttribute):
|
||||||
|
"""Helper class to support `list` / `array` types."""
|
||||||
|
|
||||||
|
def set_from_cli(self, cli_value: str) -> None:
|
||||||
|
if not cli_value.strip():
|
||||||
|
self._value = []
|
||||||
|
else:
|
||||||
|
self._value = [item.strip() for item in cli_value.split(",")]
|
||||||
|
|
||||||
|
def get_for_api(self, *, key: str) -> Tuple[str, str]:
|
||||||
|
# Do not comma-split single value passed as string
|
||||||
|
if isinstance(self._value, str):
|
||||||
|
return (key, self._value)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert isinstance(self._value, list)
|
||||||
|
return (key, ",".join([str(x) for x in self._value]))
|
||||||
|
|
||||||
|
|
||||||
|
class ArrayAttribute(_ListArrayAttribute):
|
||||||
|
"""To support `array` types as documented in
|
||||||
|
https://docs.gitlab.com/ee/api/#array"""
|
||||||
|
|
||||||
|
def get_for_api(self, *, key: str) -> Tuple[str, Any]:
|
||||||
|
if isinstance(self._value, str):
|
||||||
|
return (f"{key}[]", self._value)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert isinstance(self._value, list)
|
||||||
|
return (f"{key}[]", self._value)
|
||||||
|
|
||||||
|
|
||||||
|
class CommaSeparatedListAttribute(_ListArrayAttribute):
|
||||||
|
"""For values which are sent to the server as a Comma Separated Values
|
||||||
|
(CSV) string. We allow them to be specified as a list and we convert it
|
||||||
|
into a CSV"""
|
||||||
|
|
||||||
|
|
||||||
|
class LowercaseStringAttribute(GitlabAttribute):
|
||||||
|
def get_for_api(self, *, key: str) -> Tuple[str, str]:
|
||||||
|
return (key, str(self._value).lower())
|
||||||
|
|
||||||
|
|
||||||
|
class FileAttribute(GitlabAttribute):
|
||||||
|
@staticmethod
|
||||||
|
def get_file_name(attr_name: Optional[str] = None) -> Optional[str]:
|
||||||
|
return attr_name
|
||||||
|
|
||||||
|
|
||||||
|
class ImageAttribute(FileAttribute):
|
||||||
|
@staticmethod
|
||||||
|
def get_file_name(attr_name: Optional[str] = None) -> str:
|
||||||
|
return f"{attr_name}.png" if attr_name else "image.png"
|
303
env/lib/python3.12/site-packages/gitlab/utils.py
vendored
Normal file
303
env/lib/python3.12/site-packages/gitlab/utils.py
vendored
Normal file
@ -0,0 +1,303 @@
|
|||||||
|
import dataclasses
|
||||||
|
import email.message
|
||||||
|
import logging
|
||||||
|
import pathlib
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
import urllib.parse
|
||||||
|
import warnings
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Callable,
|
||||||
|
Dict,
|
||||||
|
Iterator,
|
||||||
|
Literal,
|
||||||
|
MutableMapping,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Type,
|
||||||
|
Union,
|
||||||
|
)
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from gitlab import const, types
|
||||||
|
|
||||||
|
|
||||||
|
class _StdoutStream:
|
||||||
|
def __call__(self, chunk: Any) -> None:
|
||||||
|
print(chunk)
|
||||||
|
|
||||||
|
|
||||||
|
def get_base_url(url: Optional[str] = None) -> str:
|
||||||
|
"""Return the base URL with the trailing slash stripped.
|
||||||
|
If the URL is a Falsy value, return the default URL.
|
||||||
|
Returns:
|
||||||
|
The base URL
|
||||||
|
"""
|
||||||
|
if not url:
|
||||||
|
return const.DEFAULT_URL
|
||||||
|
|
||||||
|
return url.rstrip("/")
|
||||||
|
|
||||||
|
|
||||||
|
def get_content_type(content_type: Optional[str]) -> str:
|
||||||
|
message = email.message.Message()
|
||||||
|
if content_type is not None:
|
||||||
|
message["content-type"] = content_type
|
||||||
|
|
||||||
|
return message.get_content_type()
|
||||||
|
|
||||||
|
|
||||||
|
class MaskingFormatter(logging.Formatter):
|
||||||
|
"""A logging formatter that can mask credentials"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
fmt: Optional[str] = logging.BASIC_FORMAT,
|
||||||
|
datefmt: Optional[str] = None,
|
||||||
|
style: Literal["%", "{", "$"] = "%",
|
||||||
|
validate: bool = True,
|
||||||
|
masked: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(fmt, datefmt, style, validate)
|
||||||
|
self.masked = masked
|
||||||
|
|
||||||
|
def _filter(self, entry: str) -> str:
|
||||||
|
if not self.masked:
|
||||||
|
return entry
|
||||||
|
|
||||||
|
return entry.replace(self.masked, "[MASKED]")
|
||||||
|
|
||||||
|
def format(self, record: logging.LogRecord) -> str:
|
||||||
|
original = logging.Formatter.format(self, record)
|
||||||
|
return self._filter(original)
|
||||||
|
|
||||||
|
|
||||||
|
def response_content(
|
||||||
|
response: requests.Response,
|
||||||
|
streamed: bool,
|
||||||
|
action: Optional[Callable[[bytes], None]],
|
||||||
|
chunk_size: int,
|
||||||
|
*,
|
||||||
|
iterator: bool,
|
||||||
|
) -> Optional[Union[bytes, Iterator[Any]]]:
|
||||||
|
if iterator:
|
||||||
|
return response.iter_content(chunk_size=chunk_size)
|
||||||
|
|
||||||
|
if streamed is False:
|
||||||
|
return response.content
|
||||||
|
|
||||||
|
if action is None:
|
||||||
|
action = _StdoutStream()
|
||||||
|
|
||||||
|
for chunk in response.iter_content(chunk_size=chunk_size):
|
||||||
|
if chunk:
|
||||||
|
action(chunk)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class Retry:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_retries: int,
|
||||||
|
obey_rate_limit: Optional[bool] = True,
|
||||||
|
retry_transient_errors: Optional[bool] = False,
|
||||||
|
) -> None:
|
||||||
|
self.cur_retries = 0
|
||||||
|
self.max_retries = max_retries
|
||||||
|
self.obey_rate_limit = obey_rate_limit
|
||||||
|
self.retry_transient_errors = retry_transient_errors
|
||||||
|
|
||||||
|
def _retryable_status_code(
|
||||||
|
self, status_code: Optional[int], reason: str = ""
|
||||||
|
) -> bool:
|
||||||
|
if status_code == 429 and self.obey_rate_limit:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not self.retry_transient_errors:
|
||||||
|
return False
|
||||||
|
if status_code in const.RETRYABLE_TRANSIENT_ERROR_CODES:
|
||||||
|
return True
|
||||||
|
if status_code == 409 and "Resource lock" in reason:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def handle_retry_on_status(
|
||||||
|
self,
|
||||||
|
status_code: Optional[int],
|
||||||
|
headers: Optional[MutableMapping[str, str]] = None,
|
||||||
|
reason: str = "",
|
||||||
|
) -> bool:
|
||||||
|
if not self._retryable_status_code(status_code, reason):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if headers is None:
|
||||||
|
headers = {}
|
||||||
|
|
||||||
|
# Response headers documentation:
|
||||||
|
# https://docs.gitlab.com/ee/user/admin_area/settings/user_and_ip_rate_limits.html#response-headers
|
||||||
|
if self.max_retries == -1 or self.cur_retries < self.max_retries:
|
||||||
|
wait_time = 2**self.cur_retries * 0.1
|
||||||
|
if "Retry-After" in headers:
|
||||||
|
wait_time = int(headers["Retry-After"])
|
||||||
|
elif "RateLimit-Reset" in headers:
|
||||||
|
wait_time = int(headers["RateLimit-Reset"]) - time.time()
|
||||||
|
self.cur_retries += 1
|
||||||
|
time.sleep(wait_time)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def handle_retry(self) -> bool:
|
||||||
|
if self.retry_transient_errors and (
|
||||||
|
self.max_retries == -1 or self.cur_retries < self.max_retries
|
||||||
|
):
|
||||||
|
wait_time = 2**self.cur_retries * 0.1
|
||||||
|
self.cur_retries += 1
|
||||||
|
time.sleep(wait_time)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _transform_types(
|
||||||
|
data: Dict[str, Any],
|
||||||
|
custom_types: Dict[str, Any],
|
||||||
|
*,
|
||||||
|
transform_data: bool,
|
||||||
|
transform_files: Optional[bool] = True,
|
||||||
|
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||||
|
"""Copy the data dict with attributes that have custom types and transform them
|
||||||
|
before being sent to the server.
|
||||||
|
|
||||||
|
``transform_files``: If ``True`` (default), also populates the ``files`` dict for
|
||||||
|
FileAttribute types with tuples to prepare fields for requests' MultipartEncoder:
|
||||||
|
https://toolbelt.readthedocs.io/en/latest/user.html#multipart-form-data-encoder
|
||||||
|
|
||||||
|
``transform_data``: If ``True`` transforms the ``data`` dict with fields
|
||||||
|
suitable for encoding as query parameters for GitLab's API:
|
||||||
|
https://docs.gitlab.com/ee/api/#encoding-api-parameters-of-array-and-hash-types
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple of the transformed data dict and files dict"""
|
||||||
|
|
||||||
|
# Duplicate data to avoid messing with what the user sent us
|
||||||
|
data = data.copy()
|
||||||
|
if not transform_files and not transform_data:
|
||||||
|
return data, {}
|
||||||
|
|
||||||
|
files = {}
|
||||||
|
|
||||||
|
for attr_name, attr_class in custom_types.items():
|
||||||
|
if attr_name not in data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
gitlab_attribute = attr_class(data[attr_name])
|
||||||
|
|
||||||
|
# if the type is FileAttribute we need to pass the data as file
|
||||||
|
if isinstance(gitlab_attribute, types.FileAttribute) and transform_files:
|
||||||
|
key = gitlab_attribute.get_file_name(attr_name)
|
||||||
|
files[attr_name] = (key, data.pop(attr_name))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not transform_data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isinstance(gitlab_attribute, types.GitlabAttribute):
|
||||||
|
key, value = gitlab_attribute.get_for_api(key=attr_name)
|
||||||
|
if key != attr_name:
|
||||||
|
del data[attr_name]
|
||||||
|
data[key] = value
|
||||||
|
|
||||||
|
return data, files
|
||||||
|
|
||||||
|
|
||||||
|
def copy_dict(
|
||||||
|
*,
|
||||||
|
src: Dict[str, Any],
|
||||||
|
dest: Dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
for k, v in src.items():
|
||||||
|
if isinstance(v, dict):
|
||||||
|
# NOTE(jlvillal): This provides some support for the `hash` type
|
||||||
|
# https://docs.gitlab.com/ee/api/#hash
|
||||||
|
# Transform dict values to new attributes. For example:
|
||||||
|
# custom_attributes: {'foo', 'bar'} =>
|
||||||
|
# "custom_attributes['foo']": "bar"
|
||||||
|
for dict_k, dict_v in v.items():
|
||||||
|
dest[f"{k}[{dict_k}]"] = dict_v
|
||||||
|
else:
|
||||||
|
dest[k] = v
|
||||||
|
|
||||||
|
|
||||||
|
class EncodedId(str):
|
||||||
|
"""A custom `str` class that will return the URL-encoded value of the string.
|
||||||
|
|
||||||
|
* Using it recursively will only url-encode the value once.
|
||||||
|
* Can accept either `str` or `int` as input value.
|
||||||
|
* Can be used in an f-string and output the URL-encoded string.
|
||||||
|
|
||||||
|
Reference to documentation on why this is necessary.
|
||||||
|
|
||||||
|
See::
|
||||||
|
|
||||||
|
https://docs.gitlab.com/ee/api/index.html#namespaced-path-encoding
|
||||||
|
https://docs.gitlab.com/ee/api/index.html#path-parameters
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __new__(cls, value: Union[str, int, "EncodedId"]) -> "EncodedId":
|
||||||
|
if isinstance(value, EncodedId):
|
||||||
|
return value
|
||||||
|
|
||||||
|
if not isinstance(value, (int, str)):
|
||||||
|
raise TypeError(f"Unsupported type received: {type(value)}")
|
||||||
|
if isinstance(value, str):
|
||||||
|
value = urllib.parse.quote(value, safe="")
|
||||||
|
return super().__new__(cls, value)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_none_from_dict(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
return {k: v for k, v in data.items() if v is not None}
|
||||||
|
|
||||||
|
|
||||||
|
def warn(
|
||||||
|
message: str,
|
||||||
|
*,
|
||||||
|
category: Optional[Type[Warning]] = None,
|
||||||
|
source: Optional[Any] = None,
|
||||||
|
show_caller: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""This `warnings.warn` wrapper function attempts to show the location causing the
|
||||||
|
warning in the user code that called the library.
|
||||||
|
|
||||||
|
It does this by walking up the stack trace to find the first frame located outside
|
||||||
|
the `gitlab/` directory. This is helpful to users as it shows them their code that
|
||||||
|
is causing the warning.
|
||||||
|
"""
|
||||||
|
# Get `stacklevel` for user code so we indicate where issue is in
|
||||||
|
# their code.
|
||||||
|
pg_dir = pathlib.Path(__file__).parent.resolve()
|
||||||
|
stack = traceback.extract_stack()
|
||||||
|
stacklevel = 1
|
||||||
|
warning_from = ""
|
||||||
|
for stacklevel, frame in enumerate(reversed(stack), start=1):
|
||||||
|
warning_from = f" (python-gitlab: {frame.filename}:{frame.lineno})"
|
||||||
|
frame_dir = str(pathlib.Path(frame.filename).parent.resolve())
|
||||||
|
if not frame_dir.startswith(str(pg_dir)):
|
||||||
|
break
|
||||||
|
if show_caller:
|
||||||
|
message += warning_from
|
||||||
|
warnings.warn(
|
||||||
|
message=message,
|
||||||
|
category=category,
|
||||||
|
stacklevel=stacklevel,
|
||||||
|
source=source,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class WarnMessageData:
|
||||||
|
message: str
|
||||||
|
show_caller: bool
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user