week06
This commit is contained in:
8
env/bin/gitlab
vendored
Executable file
8
env/bin/gitlab
vendored
Executable file
@ -0,0 +1,8 @@
|
||||
#!/home/dongho/netsec/env/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from gitlab.cli import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
8
env/bin/normalizer
vendored
Executable file
8
env/bin/normalizer
vendored
Executable file
@ -0,0 +1,8 @@
|
||||
#!/home/dongho/netsec/env/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from charset_normalizer.cli import cli_detect
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(cli_detect())
|
1
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/INSTALLER
vendored
Normal file
1
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/INSTALLER
vendored
Normal file
@ -0,0 +1 @@
|
||||
pip
|
20
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/LICENSE
vendored
Normal file
20
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/LICENSE
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
Copyright (c) 2017-2021 Ingy döt Net
|
||||
Copyright (c) 2006-2016 Kirill Simonov
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
46
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/METADATA
vendored
Normal file
46
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/METADATA
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: PyYAML
|
||||
Version: 6.0.2
|
||||
Summary: YAML parser and emitter for Python
|
||||
Home-page: https://pyyaml.org/
|
||||
Download-URL: https://pypi.org/project/PyYAML/
|
||||
Author: Kirill Simonov
|
||||
Author-email: xi@resolvent.net
|
||||
License: MIT
|
||||
Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues
|
||||
Project-URL: CI, https://github.com/yaml/pyyaml/actions
|
||||
Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation
|
||||
Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core
|
||||
Project-URL: Source Code, https://github.com/yaml/pyyaml
|
||||
Platform: Any
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Cython
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: Text Processing :: Markup
|
||||
Requires-Python: >=3.8
|
||||
License-File: LICENSE
|
||||
|
||||
YAML is a data serialization format designed for human readability
|
||||
and interaction with scripting languages. PyYAML is a YAML parser
|
||||
and emitter for Python.
|
||||
|
||||
PyYAML features a complete YAML 1.1 parser, Unicode support, pickle
|
||||
support, capable extension API, and sensible error messages. PyYAML
|
||||
supports standard YAML tags and provides Python-specific tags that
|
||||
allow to represent an arbitrary Python object.
|
||||
|
||||
PyYAML is applicable for a broad range of tasks from complex
|
||||
configuration files to object serialization and persistence.
|
44
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/RECORD
vendored
Normal file
44
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/RECORD
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101
|
||||
PyYAML-6.0.2.dist-info/METADATA,sha256=9-odFB5seu4pGPcEv7E8iyxNF51_uKnaNGjLAhz2lto,2060
|
||||
PyYAML-6.0.2.dist-info/RECORD,,
|
||||
PyYAML-6.0.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
PyYAML-6.0.2.dist-info/WHEEL,sha256=YM7r_UgTB_CA6ZLGHfbOA_dd7lb6fUn0DsfI9DvIHHE,154
|
||||
PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11
|
||||
_yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402
|
||||
_yaml/__pycache__/__init__.cpython-312.pyc,,
|
||||
yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311
|
||||
yaml/__pycache__/__init__.cpython-312.pyc,,
|
||||
yaml/__pycache__/composer.cpython-312.pyc,,
|
||||
yaml/__pycache__/constructor.cpython-312.pyc,,
|
||||
yaml/__pycache__/cyaml.cpython-312.pyc,,
|
||||
yaml/__pycache__/dumper.cpython-312.pyc,,
|
||||
yaml/__pycache__/emitter.cpython-312.pyc,,
|
||||
yaml/__pycache__/error.cpython-312.pyc,,
|
||||
yaml/__pycache__/events.cpython-312.pyc,,
|
||||
yaml/__pycache__/loader.cpython-312.pyc,,
|
||||
yaml/__pycache__/nodes.cpython-312.pyc,,
|
||||
yaml/__pycache__/parser.cpython-312.pyc,,
|
||||
yaml/__pycache__/reader.cpython-312.pyc,,
|
||||
yaml/__pycache__/representer.cpython-312.pyc,,
|
||||
yaml/__pycache__/resolver.cpython-312.pyc,,
|
||||
yaml/__pycache__/scanner.cpython-312.pyc,,
|
||||
yaml/__pycache__/serializer.cpython-312.pyc,,
|
||||
yaml/__pycache__/tokens.cpython-312.pyc,,
|
||||
yaml/_yaml.cpython-312-aarch64-linux-gnu.so,sha256=kYQNF-yCT1TQJkdO87ihsv1jctF0lAaJ2wYRWZXqWRI,2456968
|
||||
yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883
|
||||
yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639
|
||||
yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851
|
||||
yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837
|
||||
yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006
|
||||
yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533
|
||||
yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445
|
||||
yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061
|
||||
yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440
|
||||
yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495
|
||||
yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794
|
||||
yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190
|
||||
yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004
|
||||
yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279
|
||||
yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165
|
||||
yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573
|
0
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/REQUESTED
vendored
Normal file
0
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/REQUESTED
vendored
Normal file
6
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/WHEEL
vendored
Normal file
6
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/WHEEL
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.44.0)
|
||||
Root-Is-Purelib: false
|
||||
Tag: cp312-cp312-manylinux_2_17_aarch64
|
||||
Tag: cp312-cp312-manylinux2014_aarch64
|
||||
|
2
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/top_level.txt
vendored
Normal file
2
env/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/top_level.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
_yaml
|
||||
yaml
|
BIN
env/lib/python3.12/site-packages/__pycache__/appdirs.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/__pycache__/appdirs.cpython-312.pyc
vendored
Normal file
Binary file not shown.
33
env/lib/python3.12/site-packages/_yaml/__init__.py
vendored
Normal file
33
env/lib/python3.12/site-packages/_yaml/__init__.py
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
# This is a stub package designed to roughly emulate the _yaml
|
||||
# extension module, which previously existed as a standalone module
|
||||
# and has been moved into the `yaml` package namespace.
|
||||
# It does not perfectly mimic its old counterpart, but should get
|
||||
# close enough for anyone who's relying on it even when they shouldn't.
|
||||
import yaml
|
||||
|
||||
# in some circumstances, the yaml module we imoprted may be from a different version, so we need
|
||||
# to tread carefully when poking at it here (it may not have the attributes we expect)
|
||||
if not getattr(yaml, '__with_libyaml__', False):
|
||||
from sys import version_info
|
||||
|
||||
exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
|
||||
raise exc("No module named '_yaml'")
|
||||
else:
|
||||
from yaml._yaml import *
|
||||
import warnings
|
||||
warnings.warn(
|
||||
'The _yaml extension module is now located at yaml._yaml'
|
||||
' and its location is subject to change. To use the'
|
||||
' LibYAML-based parser and emitter, import from `yaml`:'
|
||||
' `from yaml import CLoader as Loader, CDumper as Dumper`.',
|
||||
DeprecationWarning
|
||||
)
|
||||
del warnings
|
||||
# Don't `del yaml` here because yaml is actually an existing
|
||||
# namespace member of _yaml.
|
||||
|
||||
__name__ = '_yaml'
|
||||
# If the module is top-level (i.e. not a part of any specific package)
|
||||
# then the attribute should be set to ''.
|
||||
# https://docs.python.org/3.8/library/types.html
|
||||
__package__ = ''
|
BIN
env/lib/python3.12/site-packages/_yaml/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/_yaml/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
1
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/INSTALLER
vendored
Normal file
1
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/INSTALLER
vendored
Normal file
@ -0,0 +1 @@
|
||||
pip
|
23
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/LICENSE.txt
vendored
Normal file
23
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/LICENSE.txt
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
# This is the MIT license
|
||||
|
||||
Copyright (c) 2010 ActiveState Software Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
264
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/METADATA
vendored
Normal file
264
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/METADATA
vendored
Normal file
@ -0,0 +1,264 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: appdirs
|
||||
Version: 1.4.4
|
||||
Summary: A small Python module for determining appropriate platform-specific dirs, e.g. a "user data dir".
|
||||
Home-page: http://github.com/ActiveState/appdirs
|
||||
Author: Trent Mick
|
||||
Author-email: trentm@gmail.com
|
||||
Maintainer: Jeff Rouse
|
||||
Maintainer-email: jr@its.to
|
||||
License: MIT
|
||||
Keywords: application directory log cache user
|
||||
Platform: UNKNOWN
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python :: 2
|
||||
Classifier: Programming Language :: Python :: 2.7
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.4
|
||||
Classifier: Programming Language :: Python :: 3.5
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
|
||||
|
||||
.. image:: https://secure.travis-ci.org/ActiveState/appdirs.png
|
||||
:target: http://travis-ci.org/ActiveState/appdirs
|
||||
|
||||
the problem
|
||||
===========
|
||||
|
||||
What directory should your app use for storing user data? If running on Mac OS X, you
|
||||
should use::
|
||||
|
||||
~/Library/Application Support/<AppName>
|
||||
|
||||
If on Windows (at least English Win XP) that should be::
|
||||
|
||||
C:\Documents and Settings\<User>\Application Data\Local Settings\<AppAuthor>\<AppName>
|
||||
|
||||
or possibly::
|
||||
|
||||
C:\Documents and Settings\<User>\Application Data\<AppAuthor>\<AppName>
|
||||
|
||||
for `roaming profiles <http://bit.ly/9yl3b6>`_ but that is another story.
|
||||
|
||||
On Linux (and other Unices) the dir, according to the `XDG
|
||||
spec <http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_, is::
|
||||
|
||||
~/.local/share/<AppName>
|
||||
|
||||
|
||||
``appdirs`` to the rescue
|
||||
=========================
|
||||
|
||||
This kind of thing is what the ``appdirs`` module is for. ``appdirs`` will
|
||||
help you choose an appropriate:
|
||||
|
||||
- user data dir (``user_data_dir``)
|
||||
- user config dir (``user_config_dir``)
|
||||
- user cache dir (``user_cache_dir``)
|
||||
- site data dir (``site_data_dir``)
|
||||
- site config dir (``site_config_dir``)
|
||||
- user log dir (``user_log_dir``)
|
||||
|
||||
and also:
|
||||
|
||||
- is a single module so other Python packages can include their own private copy
|
||||
- is slightly opinionated on the directory names used. Look for "OPINION" in
|
||||
documentation and code for when an opinion is being applied.
|
||||
|
||||
|
||||
some example output
|
||||
===================
|
||||
|
||||
On Mac OS X::
|
||||
|
||||
>>> from appdirs import *
|
||||
>>> appname = "SuperApp"
|
||||
>>> appauthor = "Acme"
|
||||
>>> user_data_dir(appname, appauthor)
|
||||
'/Users/trentm/Library/Application Support/SuperApp'
|
||||
>>> site_data_dir(appname, appauthor)
|
||||
'/Library/Application Support/SuperApp'
|
||||
>>> user_cache_dir(appname, appauthor)
|
||||
'/Users/trentm/Library/Caches/SuperApp'
|
||||
>>> user_log_dir(appname, appauthor)
|
||||
'/Users/trentm/Library/Logs/SuperApp'
|
||||
|
||||
On Windows 7::
|
||||
|
||||
>>> from appdirs import *
|
||||
>>> appname = "SuperApp"
|
||||
>>> appauthor = "Acme"
|
||||
>>> user_data_dir(appname, appauthor)
|
||||
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp'
|
||||
>>> user_data_dir(appname, appauthor, roaming=True)
|
||||
'C:\\Users\\trentm\\AppData\\Roaming\\Acme\\SuperApp'
|
||||
>>> user_cache_dir(appname, appauthor)
|
||||
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp\\Cache'
|
||||
>>> user_log_dir(appname, appauthor)
|
||||
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp\\Logs'
|
||||
|
||||
On Linux::
|
||||
|
||||
>>> from appdirs import *
|
||||
>>> appname = "SuperApp"
|
||||
>>> appauthor = "Acme"
|
||||
>>> user_data_dir(appname, appauthor)
|
||||
'/home/trentm/.local/share/SuperApp
|
||||
>>> site_data_dir(appname, appauthor)
|
||||
'/usr/local/share/SuperApp'
|
||||
>>> site_data_dir(appname, appauthor, multipath=True)
|
||||
'/usr/local/share/SuperApp:/usr/share/SuperApp'
|
||||
>>> user_cache_dir(appname, appauthor)
|
||||
'/home/trentm/.cache/SuperApp'
|
||||
>>> user_log_dir(appname, appauthor)
|
||||
'/home/trentm/.cache/SuperApp/log'
|
||||
>>> user_config_dir(appname)
|
||||
'/home/trentm/.config/SuperApp'
|
||||
>>> site_config_dir(appname)
|
||||
'/etc/xdg/SuperApp'
|
||||
>>> os.environ['XDG_CONFIG_DIRS'] = '/etc:/usr/local/etc'
|
||||
>>> site_config_dir(appname, multipath=True)
|
||||
'/etc/SuperApp:/usr/local/etc/SuperApp'
|
||||
|
||||
|
||||
``AppDirs`` for convenience
|
||||
===========================
|
||||
|
||||
::
|
||||
|
||||
>>> from appdirs import AppDirs
|
||||
>>> dirs = AppDirs("SuperApp", "Acme")
|
||||
>>> dirs.user_data_dir
|
||||
'/Users/trentm/Library/Application Support/SuperApp'
|
||||
>>> dirs.site_data_dir
|
||||
'/Library/Application Support/SuperApp'
|
||||
>>> dirs.user_cache_dir
|
||||
'/Users/trentm/Library/Caches/SuperApp'
|
||||
>>> dirs.user_log_dir
|
||||
'/Users/trentm/Library/Logs/SuperApp'
|
||||
|
||||
|
||||
|
||||
Per-version isolation
|
||||
=====================
|
||||
|
||||
If you have multiple versions of your app in use that you want to be
|
||||
able to run side-by-side, then you may want version-isolation for these
|
||||
dirs::
|
||||
|
||||
>>> from appdirs import AppDirs
|
||||
>>> dirs = AppDirs("SuperApp", "Acme", version="1.0")
|
||||
>>> dirs.user_data_dir
|
||||
'/Users/trentm/Library/Application Support/SuperApp/1.0'
|
||||
>>> dirs.site_data_dir
|
||||
'/Library/Application Support/SuperApp/1.0'
|
||||
>>> dirs.user_cache_dir
|
||||
'/Users/trentm/Library/Caches/SuperApp/1.0'
|
||||
>>> dirs.user_log_dir
|
||||
'/Users/trentm/Library/Logs/SuperApp/1.0'
|
||||
|
||||
|
||||
|
||||
appdirs Changelog
|
||||
=================
|
||||
|
||||
appdirs 1.4.4
|
||||
-------------
|
||||
- [PR #92] Don't import appdirs from setup.py
|
||||
|
||||
Project officially classified as Stable which is important
|
||||
for inclusion in other distros such as ActivePython.
|
||||
|
||||
First of several incremental releases to catch up on maintenance.
|
||||
|
||||
appdirs 1.4.3
|
||||
-------------
|
||||
- [PR #76] Python 3.6 invalid escape sequence deprecation fixes
|
||||
- Fix for Python 3.6 support
|
||||
|
||||
appdirs 1.4.2
|
||||
-------------
|
||||
- [PR #84] Allow installing without setuptools
|
||||
- [PR #86] Fix string delimiters in setup.py description
|
||||
- Add Python 3.6 support
|
||||
|
||||
appdirs 1.4.1
|
||||
-------------
|
||||
- [issue #38] Fix _winreg import on Windows Py3
|
||||
- [issue #55] Make appname optional
|
||||
|
||||
appdirs 1.4.0
|
||||
-------------
|
||||
- [PR #42] AppAuthor is now optional on Windows
|
||||
- [issue 41] Support Jython on Windows, Mac, and Unix-like platforms. Windows
|
||||
support requires `JNA <https://github.com/twall/jna>`_.
|
||||
- [PR #44] Fix incorrect behaviour of the site_config_dir method
|
||||
|
||||
appdirs 1.3.0
|
||||
-------------
|
||||
- [Unix, issue 16] Conform to XDG standard, instead of breaking it for
|
||||
everybody
|
||||
- [Unix] Removes gratuitous case mangling of the case, since \*nix-es are
|
||||
usually case sensitive, so mangling is not wise
|
||||
- [Unix] Fixes the utterly wrong behaviour in ``site_data_dir``, return result
|
||||
based on XDG_DATA_DIRS and make room for respecting the standard which
|
||||
specifies XDG_DATA_DIRS is a multiple-value variable
|
||||
- [Issue 6] Add ``*_config_dir`` which are distinct on nix-es, according to
|
||||
XDG specs; on Windows and Mac return the corresponding ``*_data_dir``
|
||||
|
||||
appdirs 1.2.0
|
||||
-------------
|
||||
|
||||
- [Unix] Put ``user_log_dir`` under the *cache* dir on Unix. Seems to be more
|
||||
typical.
|
||||
- [issue 9] Make ``unicode`` work on py3k.
|
||||
|
||||
appdirs 1.1.0
|
||||
-------------
|
||||
|
||||
- [issue 4] Add ``AppDirs.user_log_dir``.
|
||||
- [Unix, issue 2, issue 7] appdirs now conforms to `XDG base directory spec
|
||||
<http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_.
|
||||
- [Mac, issue 5] Fix ``site_data_dir()`` on Mac.
|
||||
- [Mac] Drop use of 'Carbon' module in favour of hardcoded paths; supports
|
||||
Python3 now.
|
||||
- [Windows] Append "Cache" to ``user_cache_dir`` on Windows by default. Use
|
||||
``opinion=False`` option to disable this.
|
||||
- Add ``appdirs.AppDirs`` convenience class. Usage:
|
||||
|
||||
>>> dirs = AppDirs("SuperApp", "Acme", version="1.0")
|
||||
>>> dirs.user_data_dir
|
||||
'/Users/trentm/Library/Application Support/SuperApp/1.0'
|
||||
|
||||
- [Windows] Cherry-pick Komodo's change to downgrade paths to the Windows short
|
||||
paths if there are high bit chars.
|
||||
- [Linux] Change default ``user_cache_dir()`` on Linux to be singular, e.g.
|
||||
"~/.superapp/cache".
|
||||
- [Windows] Add ``roaming`` option to ``user_data_dir()`` (for use on Windows only)
|
||||
and change the default ``user_data_dir`` behaviour to use a *non*-roaming
|
||||
profile dir (``CSIDL_LOCAL_APPDATA`` instead of ``CSIDL_APPDATA``). Why? Because
|
||||
a large roaming profile can cause login speed issues. The "only syncs on
|
||||
logout" behaviour can cause surprises in appdata info.
|
||||
|
||||
|
||||
appdirs 1.0.1 (never released)
|
||||
------------------------------
|
||||
|
||||
Started this changelog 27 July 2010. Before that this module originated in the
|
||||
`Komodo <http://www.activestate.com/komodo>`_ product as ``applib.py`` and then
|
||||
as `applib/location.py
|
||||
<http://github.com/ActiveState/applib/blob/master/applib/location.py>`_ (used by
|
||||
`PyPM <http://code.activestate.com/pypm/>`_ in `ActivePython
|
||||
<http://www.activestate.com/activepython>`_). This is basically a fork of
|
||||
applib.py 1.0.1 and applib/location.py 1.0.1.
|
||||
|
||||
|
||||
|
8
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/RECORD
vendored
Normal file
8
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/RECORD
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
__pycache__/appdirs.cpython-312.pyc,,
|
||||
appdirs-1.4.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
appdirs-1.4.4.dist-info/LICENSE.txt,sha256=Nt200KdFqTqyAyA9cZCBSxuJcn0lTK_0jHp6-71HAAs,1097
|
||||
appdirs-1.4.4.dist-info/METADATA,sha256=k5TVfXMNKGHTfp2wm6EJKTuGwGNuoQR5TqQgH8iwG8M,8981
|
||||
appdirs-1.4.4.dist-info/RECORD,,
|
||||
appdirs-1.4.4.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
|
||||
appdirs-1.4.4.dist-info/top_level.txt,sha256=nKncE8CUqZERJ6VuQWL4_bkunSPDNfn7KZqb4Tr5YEM,8
|
||||
appdirs.py,sha256=g99s2sXhnvTEm79oj4bWI0Toapc-_SmKKNXvOXHkVic,24720
|
6
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/WHEEL
vendored
Normal file
6
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/WHEEL
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.34.2)
|
||||
Root-Is-Purelib: true
|
||||
Tag: py2-none-any
|
||||
Tag: py3-none-any
|
||||
|
1
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/top_level.txt
vendored
Normal file
1
env/lib/python3.12/site-packages/appdirs-1.4.4.dist-info/top_level.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
appdirs
|
608
env/lib/python3.12/site-packages/appdirs.py
vendored
Normal file
608
env/lib/python3.12/site-packages/appdirs.py
vendored
Normal file
@ -0,0 +1,608 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2005-2010 ActiveState Software Inc.
|
||||
# Copyright (c) 2013 Eddy Petrișor
|
||||
|
||||
"""Utilities for determining application-specific dirs.
|
||||
|
||||
See <http://github.com/ActiveState/appdirs> for details and usage.
|
||||
"""
|
||||
# Dev Notes:
|
||||
# - MSDN on where to store app data files:
|
||||
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
|
||||
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
|
||||
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
||||
|
||||
__version__ = "1.4.4"
|
||||
__version_info__ = tuple(int(segment) for segment in __version__.split("."))
|
||||
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
if PY3:
|
||||
unicode = str
|
||||
|
||||
if sys.platform.startswith('java'):
|
||||
import platform
|
||||
os_name = platform.java_ver()[3][0]
|
||||
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
|
||||
system = 'win32'
|
||||
elif os_name.startswith('Mac'): # "Mac OS X", etc.
|
||||
system = 'darwin'
|
||||
else: # "Linux", "SunOS", "FreeBSD", etc.
|
||||
# Setting this to "linux2" is not ideal, but only Windows or Mac
|
||||
# are actually checked for and the rest of the module expects
|
||||
# *sys.platform* style strings.
|
||||
system = 'linux2'
|
||||
else:
|
||||
system = sys.platform
|
||||
|
||||
|
||||
|
||||
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user data directories are:
|
||||
Mac OS X: ~/Library/Application Support/<AppName>
|
||||
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
|
||||
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
|
||||
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
|
||||
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
|
||||
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
|
||||
|
||||
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
|
||||
That means, by default "~/.local/share/<AppName>".
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
|
||||
path = os.path.normpath(_get_win_folder(const))
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('~/Library/Application Support/')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
|
||||
r"""Return full path to the user-shared data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"multipath" is an optional parameter only applicable to *nix
|
||||
which indicates that the entire list of data dirs should be
|
||||
returned. By default, the first item from XDG_DATA_DIRS is
|
||||
returned, or '/usr/local/share/<AppName>',
|
||||
if XDG_DATA_DIRS is not set
|
||||
|
||||
Typical site data directories are:
|
||||
Mac OS X: /Library/Application Support/<AppName>
|
||||
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
|
||||
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
|
||||
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
||||
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
|
||||
|
||||
For Unix, this is using the $XDG_DATA_DIRS[0] default.
|
||||
|
||||
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('/Library/Application Support')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
# XDG default for $XDG_DATA_DIRS
|
||||
# only first, if multipath is False
|
||||
path = os.getenv('XDG_DATA_DIRS',
|
||||
os.pathsep.join(['/usr/local/share', '/usr/share']))
|
||||
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
||||
if appname:
|
||||
if version:
|
||||
appname = os.path.join(appname, version)
|
||||
pathlist = [os.sep.join([x, appname]) for x in pathlist]
|
||||
|
||||
if multipath:
|
||||
path = os.pathsep.join(pathlist)
|
||||
else:
|
||||
path = pathlist[0]
|
||||
return path
|
||||
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific config dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user config directories are:
|
||||
Mac OS X: same as user_data_dir
|
||||
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
|
||||
Win *: same as user_data_dir
|
||||
|
||||
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
|
||||
That means, by default "~/.config/<AppName>".
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = user_data_dir(appname, appauthor, None, roaming)
|
||||
else:
|
||||
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
|
||||
r"""Return full path to the user-shared data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"multipath" is an optional parameter only applicable to *nix
|
||||
which indicates that the entire list of config dirs should be
|
||||
returned. By default, the first item from XDG_CONFIG_DIRS is
|
||||
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
|
||||
|
||||
Typical site config directories are:
|
||||
Mac OS X: same as site_data_dir
|
||||
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
|
||||
$XDG_CONFIG_DIRS
|
||||
Win *: same as site_data_dir
|
||||
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
||||
|
||||
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
|
||||
|
||||
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = site_data_dir(appname, appauthor)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
else:
|
||||
# XDG default for $XDG_CONFIG_DIRS
|
||||
# only first, if multipath is False
|
||||
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
|
||||
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
||||
if appname:
|
||||
if version:
|
||||
appname = os.path.join(appname, version)
|
||||
pathlist = [os.sep.join([x, appname]) for x in pathlist]
|
||||
|
||||
if multipath:
|
||||
path = os.pathsep.join(pathlist)
|
||||
else:
|
||||
path = pathlist[0]
|
||||
return path
|
||||
|
||||
|
||||
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
|
||||
r"""Return full path to the user-specific cache dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"opinion" (boolean) can be False to disable the appending of
|
||||
"Cache" to the base app data dir for Windows. See
|
||||
discussion below.
|
||||
|
||||
Typical user cache directories are:
|
||||
Mac OS X: ~/Library/Caches/<AppName>
|
||||
Unix: ~/.cache/<AppName> (XDG default)
|
||||
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
|
||||
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
|
||||
|
||||
On Windows the only suggestion in the MSDN docs is that local settings go in
|
||||
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
|
||||
app data dir (the default returned by `user_data_dir` above). Apps typically
|
||||
put cache data somewhere *under* the given dir here. Some examples:
|
||||
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
|
||||
...\Acme\SuperApp\Cache\1.0
|
||||
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
|
||||
This can be disabled with the `opinion=False` option.
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
if opinion:
|
||||
path = os.path.join(path, "Cache")
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('~/Library/Caches')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific state dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user state directories are:
|
||||
Mac OS X: same as user_data_dir
|
||||
Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined
|
||||
Win *: same as user_data_dir
|
||||
|
||||
For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
|
||||
to extend the XDG spec and support $XDG_STATE_HOME.
|
||||
|
||||
That means, by default "~/.local/state/<AppName>".
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = user_data_dir(appname, appauthor, None, roaming)
|
||||
else:
|
||||
path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
|
||||
r"""Return full path to the user-specific log dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"opinion" (boolean) can be False to disable the appending of
|
||||
"Logs" to the base app data dir for Windows, and "log" to the
|
||||
base cache dir for Unix. See discussion below.
|
||||
|
||||
Typical user log directories are:
|
||||
Mac OS X: ~/Library/Logs/<AppName>
|
||||
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
|
||||
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
|
||||
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
|
||||
|
||||
On Windows the only suggestion in the MSDN docs is that local settings
|
||||
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
|
||||
examples of what some windows apps use for a logs dir.)
|
||||
|
||||
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
|
||||
value for Windows and appends "log" to the user cache dir for Unix.
|
||||
This can be disabled with the `opinion=False` option.
|
||||
"""
|
||||
if system == "darwin":
|
||||
path = os.path.join(
|
||||
os.path.expanduser('~/Library/Logs'),
|
||||
appname)
|
||||
elif system == "win32":
|
||||
path = user_data_dir(appname, appauthor, version)
|
||||
version = False
|
||||
if opinion:
|
||||
path = os.path.join(path, "Logs")
|
||||
else:
|
||||
path = user_cache_dir(appname, appauthor, version)
|
||||
version = False
|
||||
if opinion:
|
||||
path = os.path.join(path, "log")
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
class AppDirs(object):
|
||||
"""Convenience wrapper for getting application dirs."""
|
||||
def __init__(self, appname=None, appauthor=None, version=None,
|
||||
roaming=False, multipath=False):
|
||||
self.appname = appname
|
||||
self.appauthor = appauthor
|
||||
self.version = version
|
||||
self.roaming = roaming
|
||||
self.multipath = multipath
|
||||
|
||||
@property
|
||||
def user_data_dir(self):
|
||||
return user_data_dir(self.appname, self.appauthor,
|
||||
version=self.version, roaming=self.roaming)
|
||||
|
||||
@property
|
||||
def site_data_dir(self):
|
||||
return site_data_dir(self.appname, self.appauthor,
|
||||
version=self.version, multipath=self.multipath)
|
||||
|
||||
@property
|
||||
def user_config_dir(self):
|
||||
return user_config_dir(self.appname, self.appauthor,
|
||||
version=self.version, roaming=self.roaming)
|
||||
|
||||
@property
|
||||
def site_config_dir(self):
|
||||
return site_config_dir(self.appname, self.appauthor,
|
||||
version=self.version, multipath=self.multipath)
|
||||
|
||||
@property
|
||||
def user_cache_dir(self):
|
||||
return user_cache_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
@property
|
||||
def user_state_dir(self):
|
||||
return user_state_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
@property
|
||||
def user_log_dir(self):
|
||||
return user_log_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
|
||||
#---- internal support stuff
|
||||
|
||||
def _get_win_folder_from_registry(csidl_name):
|
||||
"""This is a fallback technique at best. I'm not sure if using the
|
||||
registry for this guarantees us the correct answer for all CSIDL_*
|
||||
names.
|
||||
"""
|
||||
if PY3:
|
||||
import winreg as _winreg
|
||||
else:
|
||||
import _winreg
|
||||
|
||||
shell_folder_name = {
|
||||
"CSIDL_APPDATA": "AppData",
|
||||
"CSIDL_COMMON_APPDATA": "Common AppData",
|
||||
"CSIDL_LOCAL_APPDATA": "Local AppData",
|
||||
}[csidl_name]
|
||||
|
||||
key = _winreg.OpenKey(
|
||||
_winreg.HKEY_CURRENT_USER,
|
||||
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
|
||||
)
|
||||
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
|
||||
return dir
|
||||
|
||||
|
||||
def _get_win_folder_with_pywin32(csidl_name):
|
||||
from win32com.shell import shellcon, shell
|
||||
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
|
||||
# Try to make this a unicode path because SHGetFolderPath does
|
||||
# not return unicode strings when there is unicode data in the
|
||||
# path.
|
||||
try:
|
||||
dir = unicode(dir)
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in dir:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
try:
|
||||
import win32api
|
||||
dir = win32api.GetShortPathName(dir)
|
||||
except ImportError:
|
||||
pass
|
||||
except UnicodeError:
|
||||
pass
|
||||
return dir
|
||||
|
||||
|
||||
def _get_win_folder_with_ctypes(csidl_name):
|
||||
import ctypes
|
||||
|
||||
csidl_const = {
|
||||
"CSIDL_APPDATA": 26,
|
||||
"CSIDL_COMMON_APPDATA": 35,
|
||||
"CSIDL_LOCAL_APPDATA": 28,
|
||||
}[csidl_name]
|
||||
|
||||
buf = ctypes.create_unicode_buffer(1024)
|
||||
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in buf:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
buf2 = ctypes.create_unicode_buffer(1024)
|
||||
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
|
||||
buf = buf2
|
||||
|
||||
return buf.value
|
||||
|
||||
def _get_win_folder_with_jna(csidl_name):
|
||||
import array
|
||||
from com.sun import jna
|
||||
from com.sun.jna.platform import win32
|
||||
|
||||
buf_size = win32.WinDef.MAX_PATH * 2
|
||||
buf = array.zeros('c', buf_size)
|
||||
shell = win32.Shell32.INSTANCE
|
||||
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
|
||||
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in dir:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
buf = array.zeros('c', buf_size)
|
||||
kernel = win32.Kernel32.INSTANCE
|
||||
if kernel.GetShortPathName(dir, buf, buf_size):
|
||||
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
||||
|
||||
return dir
|
||||
|
||||
if system == "win32":
|
||||
try:
|
||||
import win32com.shell
|
||||
_get_win_folder = _get_win_folder_with_pywin32
|
||||
except ImportError:
|
||||
try:
|
||||
from ctypes import windll
|
||||
_get_win_folder = _get_win_folder_with_ctypes
|
||||
except ImportError:
|
||||
try:
|
||||
import com.sun.jna
|
||||
_get_win_folder = _get_win_folder_with_jna
|
||||
except ImportError:
|
||||
_get_win_folder = _get_win_folder_from_registry
|
||||
|
||||
|
||||
#---- self test code
|
||||
|
||||
if __name__ == "__main__":
|
||||
appname = "MyApp"
|
||||
appauthor = "MyCompany"
|
||||
|
||||
props = ("user_data_dir",
|
||||
"user_config_dir",
|
||||
"user_cache_dir",
|
||||
"user_state_dir",
|
||||
"user_log_dir",
|
||||
"site_data_dir",
|
||||
"site_config_dir")
|
||||
|
||||
print("-- app dirs %s --" % __version__)
|
||||
|
||||
print("-- app dirs (with optional 'version')")
|
||||
dirs = AppDirs(appname, appauthor, version="1.0")
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (without optional 'version')")
|
||||
dirs = AppDirs(appname, appauthor)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (without optional 'appauthor')")
|
||||
dirs = AppDirs(appname)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (with disabled 'appauthor')")
|
||||
dirs = AppDirs(appname, appauthor=False)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
1
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/INSTALLER
vendored
Normal file
1
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/INSTALLER
vendored
Normal file
@ -0,0 +1 @@
|
||||
pip
|
20
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/LICENSE
vendored
Normal file
20
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/LICENSE
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
This package contains a modified version of ca-bundle.crt:
|
||||
|
||||
ca-bundle.crt -- Bundle of CA Root Certificates
|
||||
|
||||
This is a bundle of X.509 certificates of public Certificate Authorities
|
||||
(CA). These were automatically extracted from Mozilla's root certificates
|
||||
file (certdata.txt). This file can be found in the mozilla source tree:
|
||||
https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
|
||||
It contains the certificates in PEM format and therefore
|
||||
can be directly used with curl / libcurl / php_curl, or with
|
||||
an Apache+mod_ssl webserver for SSL client authentication.
|
||||
Just configure this file as the SSLCACertificateFile.#
|
||||
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
|
||||
one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $
|
67
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/METADATA
vendored
Normal file
67
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/METADATA
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: certifi
|
||||
Version: 2024.8.30
|
||||
Summary: Python package for providing Mozilla's CA Bundle.
|
||||
Home-page: https://github.com/certifi/python-certifi
|
||||
Author: Kenneth Reitz
|
||||
Author-email: me@kennethreitz.com
|
||||
License: MPL-2.0
|
||||
Project-URL: Source, https://github.com/certifi/python-certifi
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Requires-Python: >=3.6
|
||||
License-File: LICENSE
|
||||
|
||||
Certifi: Python SSL Certificates
|
||||
================================
|
||||
|
||||
Certifi provides Mozilla's carefully curated collection of Root Certificates for
|
||||
validating the trustworthiness of SSL certificates while verifying the identity
|
||||
of TLS hosts. It has been extracted from the `Requests`_ project.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
``certifi`` is available on PyPI. Simply install it with ``pip``::
|
||||
|
||||
$ pip install certifi
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
To reference the installed certificate authority (CA) bundle, you can use the
|
||||
built-in function::
|
||||
|
||||
>>> import certifi
|
||||
|
||||
>>> certifi.where()
|
||||
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
|
||||
|
||||
Or from the command line::
|
||||
|
||||
$ python -m certifi
|
||||
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
|
||||
|
||||
Enjoy!
|
||||
|
||||
.. _`Requests`: https://requests.readthedocs.io/en/master/
|
||||
|
||||
Addition/Removal of Certificates
|
||||
--------------------------------
|
||||
|
||||
Certifi does not support any addition/removal or other modification of the
|
||||
CA trust store content. This project is intended to provide a reliable and
|
||||
highly portable root of trust to python deployments. Look to upstream projects
|
||||
for methods to use alternate trust.
|
14
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/RECORD
vendored
Normal file
14
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/RECORD
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
certifi-2024.8.30.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
certifi-2024.8.30.dist-info/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
|
||||
certifi-2024.8.30.dist-info/METADATA,sha256=GhBHRVUN6a4ZdUgE_N5wmukJfyuoE-QyIl8Y3ifNQBM,2222
|
||||
certifi-2024.8.30.dist-info/RECORD,,
|
||||
certifi-2024.8.30.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
|
||||
certifi-2024.8.30.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
|
||||
certifi/__init__.py,sha256=p_GYZrjUwPBUhpLlCZoGb0miKBKSqDAyZC5DvIuqbHQ,94
|
||||
certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
|
||||
certifi/__pycache__/__init__.cpython-312.pyc,,
|
||||
certifi/__pycache__/__main__.cpython-312.pyc,,
|
||||
certifi/__pycache__/core.cpython-312.pyc,,
|
||||
certifi/cacert.pem,sha256=lO3rZukXdPyuk6BWUJFOKQliWaXH6HGh9l1GGrUgG0c,299427
|
||||
certifi/core.py,sha256=qRDDFyXVJwTB_EmoGppaXU_R9qCZvhl-EzxPMuV3nTA,4426
|
||||
certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/WHEEL
vendored
Normal file
5
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/WHEEL
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: setuptools (74.0.0)
|
||||
Root-Is-Purelib: true
|
||||
Tag: py3-none-any
|
||||
|
1
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/top_level.txt
vendored
Normal file
1
env/lib/python3.12/site-packages/certifi-2024.8.30.dist-info/top_level.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
certifi
|
4
env/lib/python3.12/site-packages/certifi/__init__.py
vendored
Normal file
4
env/lib/python3.12/site-packages/certifi/__init__.py
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
from .core import contents, where
|
||||
|
||||
__all__ = ["contents", "where"]
|
||||
__version__ = "2024.08.30"
|
12
env/lib/python3.12/site-packages/certifi/__main__.py
vendored
Normal file
12
env/lib/python3.12/site-packages/certifi/__main__.py
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
import argparse
|
||||
|
||||
from certifi import contents, where
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-c", "--contents", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.contents:
|
||||
print(contents())
|
||||
else:
|
||||
print(where())
|
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc
vendored
Normal file
Binary file not shown.
4929
env/lib/python3.12/site-packages/certifi/cacert.pem
vendored
Normal file
4929
env/lib/python3.12/site-packages/certifi/cacert.pem
vendored
Normal file
File diff suppressed because it is too large
Load Diff
114
env/lib/python3.12/site-packages/certifi/core.py
vendored
Normal file
114
env/lib/python3.12/site-packages/certifi/core.py
vendored
Normal file
@ -0,0 +1,114 @@
|
||||
"""
|
||||
certifi.py
|
||||
~~~~~~~~~~
|
||||
|
||||
This module returns the installation location of cacert.pem or its contents.
|
||||
"""
|
||||
import sys
|
||||
import atexit
|
||||
|
||||
def exit_cacert_ctx() -> None:
|
||||
_CACERT_CTX.__exit__(None, None, None) # type: ignore[union-attr]
|
||||
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
|
||||
from importlib.resources import as_file, files
|
||||
|
||||
_CACERT_CTX = None
|
||||
_CACERT_PATH = None
|
||||
|
||||
def where() -> str:
|
||||
# This is slightly terrible, but we want to delay extracting the file
|
||||
# in cases where we're inside of a zipimport situation until someone
|
||||
# actually calls where(), but we don't want to re-extract the file
|
||||
# on every call of where(), so we'll do it once then store it in a
|
||||
# global variable.
|
||||
global _CACERT_CTX
|
||||
global _CACERT_PATH
|
||||
if _CACERT_PATH is None:
|
||||
# This is slightly janky, the importlib.resources API wants you to
|
||||
# manage the cleanup of this file, so it doesn't actually return a
|
||||
# path, it returns a context manager that will give you the path
|
||||
# when you enter it and will do any cleanup when you leave it. In
|
||||
# the common case of not needing a temporary file, it will just
|
||||
# return the file system location and the __exit__() is a no-op.
|
||||
#
|
||||
# We also have to hold onto the actual context manager, because
|
||||
# it will do the cleanup whenever it gets garbage collected, so
|
||||
# we will also store that at the global level as well.
|
||||
_CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
|
||||
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||
atexit.register(exit_cacert_ctx)
|
||||
|
||||
return _CACERT_PATH
|
||||
|
||||
def contents() -> str:
|
||||
return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
|
||||
|
||||
elif sys.version_info >= (3, 7):
|
||||
|
||||
from importlib.resources import path as get_path, read_text
|
||||
|
||||
_CACERT_CTX = None
|
||||
_CACERT_PATH = None
|
||||
|
||||
def where() -> str:
|
||||
# This is slightly terrible, but we want to delay extracting the
|
||||
# file in cases where we're inside of a zipimport situation until
|
||||
# someone actually calls where(), but we don't want to re-extract
|
||||
# the file on every call of where(), so we'll do it once then store
|
||||
# it in a global variable.
|
||||
global _CACERT_CTX
|
||||
global _CACERT_PATH
|
||||
if _CACERT_PATH is None:
|
||||
# This is slightly janky, the importlib.resources API wants you
|
||||
# to manage the cleanup of this file, so it doesn't actually
|
||||
# return a path, it returns a context manager that will give
|
||||
# you the path when you enter it and will do any cleanup when
|
||||
# you leave it. In the common case of not needing a temporary
|
||||
# file, it will just return the file system location and the
|
||||
# __exit__() is a no-op.
|
||||
#
|
||||
# We also have to hold onto the actual context manager, because
|
||||
# it will do the cleanup whenever it gets garbage collected, so
|
||||
# we will also store that at the global level as well.
|
||||
_CACERT_CTX = get_path("certifi", "cacert.pem")
|
||||
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||
atexit.register(exit_cacert_ctx)
|
||||
|
||||
return _CACERT_PATH
|
||||
|
||||
def contents() -> str:
|
||||
return read_text("certifi", "cacert.pem", encoding="ascii")
|
||||
|
||||
else:
|
||||
import os
|
||||
import types
|
||||
from typing import Union
|
||||
|
||||
Package = Union[types.ModuleType, str]
|
||||
Resource = Union[str, "os.PathLike"]
|
||||
|
||||
# This fallback will work for Python versions prior to 3.7 that lack the
|
||||
# importlib.resources module but relies on the existing `where` function
|
||||
# so won't address issues with environments like PyOxidizer that don't set
|
||||
# __file__ on modules.
|
||||
def read_text(
|
||||
package: Package,
|
||||
resource: Resource,
|
||||
encoding: str = 'utf-8',
|
||||
errors: str = 'strict'
|
||||
) -> str:
|
||||
with open(where(), encoding=encoding) as data:
|
||||
return data.read()
|
||||
|
||||
# If we don't have importlib.resources, then we will just do the old logic
|
||||
# of assuming we're on the filesystem and munge the path directly.
|
||||
def where() -> str:
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, "cacert.pem")
|
||||
|
||||
def contents() -> str:
|
||||
return read_text("certifi", "cacert.pem", encoding="ascii")
|
0
env/lib/python3.12/site-packages/certifi/py.typed
vendored
Normal file
0
env/lib/python3.12/site-packages/certifi/py.typed
vendored
Normal file
1
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/INSTALLER
vendored
Normal file
1
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/INSTALLER
vendored
Normal file
@ -0,0 +1 @@
|
||||
pip
|
21
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/LICENSE
vendored
Normal file
21
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/LICENSE
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
695
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/METADATA
vendored
Normal file
695
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/METADATA
vendored
Normal file
@ -0,0 +1,695 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.0
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Home-page: https://github.com/Ousret/charset_normalizer
|
||||
Author: Ahmed TAHRI
|
||||
Author-email: tahri.ahmed@proton.me
|
||||
License: MIT
|
||||
Project-URL: Bug Reports, https://github.com/Ousret/charset_normalizer/issues
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/en/latest
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7.0
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode_backport
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-HTTP_1.1%2C%202%2C_and_3_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Killer-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
Did you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (eg. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess.
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
35
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/RECORD
vendored
Normal file
35
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/RECORD
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
../../../bin/normalizer,sha256=pWxmMYA_SquLIU6d0ASgK3copKj6QWxw28YimXUHlzw,251
|
||||
charset_normalizer-3.4.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
charset_normalizer-3.4.0.dist-info/LICENSE,sha256=6zGgxaT7Cbik4yBV0lweX5w1iidS_vPNcgIT0cz-4kE,1070
|
||||
charset_normalizer-3.4.0.dist-info/METADATA,sha256=WGbEW9ehh2spNJxo1M6sEGGZWmsQ-oj2DsMjV29zoms,34159
|
||||
charset_normalizer-3.4.0.dist-info/RECORD,,
|
||||
charset_normalizer-3.4.0.dist-info/WHEEL,sha256=Z868N0_Fq1ssfDKgnQWj75ig0pzypFewyov-H4g6Btc,153
|
||||
charset_normalizer-3.4.0.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65
|
||||
charset_normalizer-3.4.0.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
|
||||
charset_normalizer/__init__.py,sha256=UzI3xC8PhmcLRMzSgPb6minTmRq0kWznnCBJ8ZCc2XI,1577
|
||||
charset_normalizer/__main__.py,sha256=JxY8bleaENOFlLRb9HfoeZCzAMnn2A1oGR5Xm2eyqg0,73
|
||||
charset_normalizer/__pycache__/__init__.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/__main__.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/api.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/cd.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/constant.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/legacy.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/md.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/models.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/utils.cpython-312.pyc,,
|
||||
charset_normalizer/__pycache__/version.cpython-312.pyc,,
|
||||
charset_normalizer/api.py,sha256=kMyNUqrfBZU22PP0pYKrSldtYUGA24wsGlXGLAKra7c,22559
|
||||
charset_normalizer/cd.py,sha256=xwZliZcTQFA3jU0c00PRiu9MNxXTFxQkFLWmMW24ZzI,12560
|
||||
charset_normalizer/cli/__init__.py,sha256=D5ERp8P62llm2FuoMzydZ7d9rs8cvvLXqE-1_6oViPc,100
|
||||
charset_normalizer/cli/__main__.py,sha256=zX9sV_ApU1d96Wb0cS04vulstdB4F0Eh7kLn-gevfw4,10411
|
||||
charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc,,
|
||||
charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc,,
|
||||
charset_normalizer/constant.py,sha256=uwoW87NicWZDTLviX7le0wdoYBbhBQDA4n1JtJo77ts,40499
|
||||
charset_normalizer/legacy.py,sha256=XJjkT0hejMH8qfAKz1ts8OUiBT18t2FJP3tJgLwUWwc,2327
|
||||
charset_normalizer/md.cpython-312-aarch64-linux-gnu.so,sha256=medVy2qYxvmhqZLDgu6sOFWJ_3LJ2X3o-RJovGFelks,69800
|
||||
charset_normalizer/md.py,sha256=SIIZcENrslI7h3v4GigbFN61fRyE_wiCN1z9Ii3fBRo,20138
|
||||
charset_normalizer/md__mypyc.cpython-312-aarch64-linux-gnu.so,sha256=sxeTw_aoOZt6lM09TkDdRVjlOp1FyW8wJQWSCrj5ldc,322008
|
||||
charset_normalizer/models.py,sha256=oAMAcBSEY7CngbUXJp34Wc4Rl9NKJJjGmUwW3EPtk6g,12425
|
||||
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
charset_normalizer/utils.py,sha256=teiosMqzKjXyAHXnGdjSBOgnBZwx-SkBbCLrx0UXy8M,11894
|
||||
charset_normalizer/version.py,sha256=AX66S4ytQFdd6F5jbVU2OPMqYwFS5M3BkMvyX-3BKF8,79
|
6
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/WHEEL
vendored
Normal file
6
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/WHEEL
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: setuptools (75.1.0)
|
||||
Root-Is-Purelib: false
|
||||
Tag: cp312-cp312-manylinux_2_17_aarch64
|
||||
Tag: cp312-cp312-manylinux2014_aarch64
|
||||
|
2
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/entry_points.txt
vendored
Normal file
2
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/entry_points.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
[console_scripts]
|
||||
normalizer = charset_normalizer.cli:cli_detect
|
1
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/top_level.txt
vendored
Normal file
1
env/lib/python3.12/site-packages/charset_normalizer-3.4.0.dist-info/top_level.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
charset_normalizer
|
46
env/lib/python3.12/site-packages/charset_normalizer/__init__.py
vendored
Normal file
46
env/lib/python3.12/site-packages/charset_normalizer/__init__.py
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Charset-Normalizer
|
||||
~~~~~~~~~~~~~~
|
||||
The Real First Universal Charset Detector.
|
||||
A library that helps you read text from an unknown charset encoding.
|
||||
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
|
||||
All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
Basic usage:
|
||||
>>> from charset_normalizer import from_bytes
|
||||
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
|
||||
>>> best_guess = results.best()
|
||||
>>> str(best_guess)
|
||||
'Bсеки човек има право на образование. Oбразованието!'
|
||||
|
||||
Others methods and usages are available - see the full documentation
|
||||
at <https://github.com/Ousret/charset_normalizer>.
|
||||
:copyright: (c) 2021 by Ahmed TAHRI
|
||||
:license: MIT, see LICENSE for more details.
|
||||
"""
|
||||
import logging
|
||||
|
||||
from .api import from_bytes, from_fp, from_path, is_binary
|
||||
from .legacy import detect
|
||||
from .models import CharsetMatch, CharsetMatches
|
||||
from .utils import set_logging_handler
|
||||
from .version import VERSION, __version__
|
||||
|
||||
__all__ = (
|
||||
"from_fp",
|
||||
"from_path",
|
||||
"from_bytes",
|
||||
"is_binary",
|
||||
"detect",
|
||||
"CharsetMatch",
|
||||
"CharsetMatches",
|
||||
"__version__",
|
||||
"VERSION",
|
||||
"set_logging_handler",
|
||||
)
|
||||
|
||||
# Attach a NullHandler to the top level logger by default
|
||||
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
|
||||
|
||||
logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())
|
4
env/lib/python3.12/site-packages/charset_normalizer/__main__.py
vendored
Normal file
4
env/lib/python3.12/site-packages/charset_normalizer/__main__.py
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
from .cli import cli_detect
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli_detect()
|
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/api.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/api.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/cd.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/cd.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/constant.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/constant.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/legacy.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/legacy.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/md.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/md.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/models.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/models.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/utils.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/utils.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/version.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/__pycache__/version.cpython-312.pyc
vendored
Normal file
Binary file not shown.
668
env/lib/python3.12/site-packages/charset_normalizer/api.py
vendored
Normal file
668
env/lib/python3.12/site-packages/charset_normalizer/api.py
vendored
Normal file
@ -0,0 +1,668 @@
|
||||
import logging
|
||||
from os import PathLike
|
||||
from typing import BinaryIO, List, Optional, Set, Union
|
||||
|
||||
from .cd import (
|
||||
coherence_ratio,
|
||||
encoding_languages,
|
||||
mb_encoding_languages,
|
||||
merge_coherence_ratios,
|
||||
)
|
||||
from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
|
||||
from .md import mess_ratio
|
||||
from .models import CharsetMatch, CharsetMatches
|
||||
from .utils import (
|
||||
any_specified_encoding,
|
||||
cut_sequence_chunks,
|
||||
iana_name,
|
||||
identify_sig_or_bom,
|
||||
is_cp_similar,
|
||||
is_multi_byte_encoding,
|
||||
should_strip_sig_or_bom,
|
||||
)
|
||||
|
||||
# Will most likely be controversial
|
||||
# logging.addLevelName(TRACE, "TRACE")
|
||||
logger = logging.getLogger("charset_normalizer")
|
||||
explain_handler = logging.StreamHandler()
|
||||
explain_handler.setFormatter(
|
||||
logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
|
||||
)
|
||||
|
||||
|
||||
def from_bytes(
|
||||
sequences: Union[bytes, bytearray],
|
||||
steps: int = 5,
|
||||
chunk_size: int = 512,
|
||||
threshold: float = 0.2,
|
||||
cp_isolation: Optional[List[str]] = None,
|
||||
cp_exclusion: Optional[List[str]] = None,
|
||||
preemptive_behaviour: bool = True,
|
||||
explain: bool = False,
|
||||
language_threshold: float = 0.1,
|
||||
enable_fallback: bool = True,
|
||||
) -> CharsetMatches:
|
||||
"""
|
||||
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
|
||||
If there is no results, it is a strong indicator that the source is binary/not text.
|
||||
By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
|
||||
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
|
||||
|
||||
The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
|
||||
but never take it for granted. Can improve the performance.
|
||||
|
||||
You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
|
||||
purpose.
|
||||
|
||||
This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
|
||||
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
|
||||
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
|
||||
Custom logging format and handler can be set manually.
|
||||
"""
|
||||
|
||||
if not isinstance(sequences, (bytearray, bytes)):
|
||||
raise TypeError(
|
||||
"Expected object of type bytes or bytearray, got: {0}".format(
|
||||
type(sequences)
|
||||
)
|
||||
)
|
||||
|
||||
if explain:
|
||||
previous_logger_level: int = logger.level
|
||||
logger.addHandler(explain_handler)
|
||||
logger.setLevel(TRACE)
|
||||
|
||||
length: int = len(sequences)
|
||||
|
||||
if length == 0:
|
||||
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level or logging.WARNING)
|
||||
return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
|
||||
|
||||
if cp_isolation is not None:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"cp_isolation is set. use this flag for debugging purpose. "
|
||||
"limited list of encoding allowed : %s.",
|
||||
", ".join(cp_isolation),
|
||||
)
|
||||
cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
|
||||
else:
|
||||
cp_isolation = []
|
||||
|
||||
if cp_exclusion is not None:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"cp_exclusion is set. use this flag for debugging purpose. "
|
||||
"limited list of encoding excluded : %s.",
|
||||
", ".join(cp_exclusion),
|
||||
)
|
||||
cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
|
||||
else:
|
||||
cp_exclusion = []
|
||||
|
||||
if length <= (chunk_size * steps):
|
||||
logger.log(
|
||||
TRACE,
|
||||
"override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
|
||||
steps,
|
||||
chunk_size,
|
||||
length,
|
||||
)
|
||||
steps = 1
|
||||
chunk_size = length
|
||||
|
||||
if steps > 1 and length / steps < chunk_size:
|
||||
chunk_size = int(length / steps)
|
||||
|
||||
is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
|
||||
is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
|
||||
|
||||
if is_too_small_sequence:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
|
||||
length
|
||||
),
|
||||
)
|
||||
elif is_too_large_sequence:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
|
||||
length
|
||||
),
|
||||
)
|
||||
|
||||
prioritized_encodings: List[str] = []
|
||||
|
||||
specified_encoding: Optional[str] = (
|
||||
any_specified_encoding(sequences) if preemptive_behaviour else None
|
||||
)
|
||||
|
||||
if specified_encoding is not None:
|
||||
prioritized_encodings.append(specified_encoding)
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Detected declarative mark in sequence. Priority +1 given for %s.",
|
||||
specified_encoding,
|
||||
)
|
||||
|
||||
tested: Set[str] = set()
|
||||
tested_but_hard_failure: List[str] = []
|
||||
tested_but_soft_failure: List[str] = []
|
||||
|
||||
fallback_ascii: Optional[CharsetMatch] = None
|
||||
fallback_u8: Optional[CharsetMatch] = None
|
||||
fallback_specified: Optional[CharsetMatch] = None
|
||||
|
||||
results: CharsetMatches = CharsetMatches()
|
||||
|
||||
early_stop_results: CharsetMatches = CharsetMatches()
|
||||
|
||||
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
|
||||
|
||||
if sig_encoding is not None:
|
||||
prioritized_encodings.append(sig_encoding)
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
|
||||
len(sig_payload),
|
||||
sig_encoding,
|
||||
)
|
||||
|
||||
prioritized_encodings.append("ascii")
|
||||
|
||||
if "utf_8" not in prioritized_encodings:
|
||||
prioritized_encodings.append("utf_8")
|
||||
|
||||
for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
|
||||
if cp_isolation and encoding_iana not in cp_isolation:
|
||||
continue
|
||||
|
||||
if cp_exclusion and encoding_iana in cp_exclusion:
|
||||
continue
|
||||
|
||||
if encoding_iana in tested:
|
||||
continue
|
||||
|
||||
tested.add(encoding_iana)
|
||||
|
||||
decoded_payload: Optional[str] = None
|
||||
bom_or_sig_available: bool = sig_encoding == encoding_iana
|
||||
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
|
||||
encoding_iana
|
||||
)
|
||||
|
||||
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
|
||||
encoding_iana,
|
||||
)
|
||||
continue
|
||||
if encoding_iana in {"utf_7"} and not bom_or_sig_available:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
|
||||
encoding_iana,
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Encoding %s does not provide an IncrementalDecoder",
|
||||
encoding_iana,
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
if is_too_large_sequence and is_multi_byte_decoder is False:
|
||||
str(
|
||||
(
|
||||
sequences[: int(50e4)]
|
||||
if strip_sig_or_bom is False
|
||||
else sequences[len(sig_payload) : int(50e4)]
|
||||
),
|
||||
encoding=encoding_iana,
|
||||
)
|
||||
else:
|
||||
decoded_payload = str(
|
||||
(
|
||||
sequences
|
||||
if strip_sig_or_bom is False
|
||||
else sequences[len(sig_payload) :]
|
||||
),
|
||||
encoding=encoding_iana,
|
||||
)
|
||||
except (UnicodeDecodeError, LookupError) as e:
|
||||
if not isinstance(e, LookupError):
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Code page %s does not fit given bytes sequence at ALL. %s",
|
||||
encoding_iana,
|
||||
str(e),
|
||||
)
|
||||
tested_but_hard_failure.append(encoding_iana)
|
||||
continue
|
||||
|
||||
similar_soft_failure_test: bool = False
|
||||
|
||||
for encoding_soft_failed in tested_but_soft_failure:
|
||||
if is_cp_similar(encoding_iana, encoding_soft_failed):
|
||||
similar_soft_failure_test = True
|
||||
break
|
||||
|
||||
if similar_soft_failure_test:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
|
||||
encoding_iana,
|
||||
encoding_soft_failed,
|
||||
)
|
||||
continue
|
||||
|
||||
r_ = range(
|
||||
0 if not bom_or_sig_available else len(sig_payload),
|
||||
length,
|
||||
int(length / steps),
|
||||
)
|
||||
|
||||
multi_byte_bonus: bool = (
|
||||
is_multi_byte_decoder
|
||||
and decoded_payload is not None
|
||||
and len(decoded_payload) < length
|
||||
)
|
||||
|
||||
if multi_byte_bonus:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Code page %s is a multi byte encoding table and it appear that at least one character "
|
||||
"was encoded using n-bytes.",
|
||||
encoding_iana,
|
||||
)
|
||||
|
||||
max_chunk_gave_up: int = int(len(r_) / 4)
|
||||
|
||||
max_chunk_gave_up = max(max_chunk_gave_up, 2)
|
||||
early_stop_count: int = 0
|
||||
lazy_str_hard_failure = False
|
||||
|
||||
md_chunks: List[str] = []
|
||||
md_ratios = []
|
||||
|
||||
try:
|
||||
for chunk in cut_sequence_chunks(
|
||||
sequences,
|
||||
encoding_iana,
|
||||
r_,
|
||||
chunk_size,
|
||||
bom_or_sig_available,
|
||||
strip_sig_or_bom,
|
||||
sig_payload,
|
||||
is_multi_byte_decoder,
|
||||
decoded_payload,
|
||||
):
|
||||
md_chunks.append(chunk)
|
||||
|
||||
md_ratios.append(
|
||||
mess_ratio(
|
||||
chunk,
|
||||
threshold,
|
||||
explain is True and 1 <= len(cp_isolation) <= 2,
|
||||
)
|
||||
)
|
||||
|
||||
if md_ratios[-1] >= threshold:
|
||||
early_stop_count += 1
|
||||
|
||||
if (early_stop_count >= max_chunk_gave_up) or (
|
||||
bom_or_sig_available and strip_sig_or_bom is False
|
||||
):
|
||||
break
|
||||
except (
|
||||
UnicodeDecodeError
|
||||
) as e: # Lazy str loading may have missed something there
|
||||
logger.log(
|
||||
TRACE,
|
||||
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
|
||||
encoding_iana,
|
||||
str(e),
|
||||
)
|
||||
early_stop_count = max_chunk_gave_up
|
||||
lazy_str_hard_failure = True
|
||||
|
||||
# We might want to check the sequence again with the whole content
|
||||
# Only if initial MD tests passes
|
||||
if (
|
||||
not lazy_str_hard_failure
|
||||
and is_too_large_sequence
|
||||
and not is_multi_byte_decoder
|
||||
):
|
||||
try:
|
||||
sequences[int(50e3) :].decode(encoding_iana, errors="strict")
|
||||
except UnicodeDecodeError as e:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
|
||||
encoding_iana,
|
||||
str(e),
|
||||
)
|
||||
tested_but_hard_failure.append(encoding_iana)
|
||||
continue
|
||||
|
||||
mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
|
||||
if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
|
||||
tested_but_soft_failure.append(encoding_iana)
|
||||
logger.log(
|
||||
TRACE,
|
||||
"%s was excluded because of initial chaos probing. Gave up %i time(s). "
|
||||
"Computed mean chaos is %f %%.",
|
||||
encoding_iana,
|
||||
early_stop_count,
|
||||
round(mean_mess_ratio * 100, ndigits=3),
|
||||
)
|
||||
# Preparing those fallbacks in case we got nothing.
|
||||
if (
|
||||
enable_fallback
|
||||
and encoding_iana in ["ascii", "utf_8", specified_encoding]
|
||||
and not lazy_str_hard_failure
|
||||
):
|
||||
fallback_entry = CharsetMatch(
|
||||
sequences,
|
||||
encoding_iana,
|
||||
threshold,
|
||||
False,
|
||||
[],
|
||||
decoded_payload,
|
||||
preemptive_declaration=specified_encoding,
|
||||
)
|
||||
if encoding_iana == specified_encoding:
|
||||
fallback_specified = fallback_entry
|
||||
elif encoding_iana == "ascii":
|
||||
fallback_ascii = fallback_entry
|
||||
else:
|
||||
fallback_u8 = fallback_entry
|
||||
continue
|
||||
|
||||
logger.log(
|
||||
TRACE,
|
||||
"%s passed initial chaos probing. Mean measured chaos is %f %%",
|
||||
encoding_iana,
|
||||
round(mean_mess_ratio * 100, ndigits=3),
|
||||
)
|
||||
|
||||
if not is_multi_byte_decoder:
|
||||
target_languages: List[str] = encoding_languages(encoding_iana)
|
||||
else:
|
||||
target_languages = mb_encoding_languages(encoding_iana)
|
||||
|
||||
if target_languages:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"{} should target any language(s) of {}".format(
|
||||
encoding_iana, str(target_languages)
|
||||
),
|
||||
)
|
||||
|
||||
cd_ratios = []
|
||||
|
||||
# We shall skip the CD when its about ASCII
|
||||
# Most of the time its not relevant to run "language-detection" on it.
|
||||
if encoding_iana != "ascii":
|
||||
for chunk in md_chunks:
|
||||
chunk_languages = coherence_ratio(
|
||||
chunk,
|
||||
language_threshold,
|
||||
",".join(target_languages) if target_languages else None,
|
||||
)
|
||||
|
||||
cd_ratios.append(chunk_languages)
|
||||
|
||||
cd_ratios_merged = merge_coherence_ratios(cd_ratios)
|
||||
|
||||
if cd_ratios_merged:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"We detected language {} using {}".format(
|
||||
cd_ratios_merged, encoding_iana
|
||||
),
|
||||
)
|
||||
|
||||
current_match = CharsetMatch(
|
||||
sequences,
|
||||
encoding_iana,
|
||||
mean_mess_ratio,
|
||||
bom_or_sig_available,
|
||||
cd_ratios_merged,
|
||||
(
|
||||
decoded_payload
|
||||
if (
|
||||
is_too_large_sequence is False
|
||||
or encoding_iana in [specified_encoding, "ascii", "utf_8"]
|
||||
)
|
||||
else None
|
||||
),
|
||||
preemptive_declaration=specified_encoding,
|
||||
)
|
||||
|
||||
results.append(current_match)
|
||||
|
||||
if (
|
||||
encoding_iana in [specified_encoding, "ascii", "utf_8"]
|
||||
and mean_mess_ratio < 0.1
|
||||
):
|
||||
# If md says nothing to worry about, then... stop immediately!
|
||||
if mean_mess_ratio == 0.0:
|
||||
logger.debug(
|
||||
"Encoding detection: %s is most likely the one.",
|
||||
current_match.encoding,
|
||||
)
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level)
|
||||
return CharsetMatches([current_match])
|
||||
|
||||
early_stop_results.append(current_match)
|
||||
|
||||
if (
|
||||
len(early_stop_results)
|
||||
and (specified_encoding is None or specified_encoding in tested)
|
||||
and "ascii" in tested
|
||||
and "utf_8" in tested
|
||||
):
|
||||
probable_result: CharsetMatch = early_stop_results.best() # type: ignore[assignment]
|
||||
logger.debug(
|
||||
"Encoding detection: %s is most likely the one.",
|
||||
probable_result.encoding,
|
||||
)
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level)
|
||||
|
||||
return CharsetMatches([probable_result])
|
||||
|
||||
if encoding_iana == sig_encoding:
|
||||
logger.debug(
|
||||
"Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
|
||||
"the beginning of the sequence.",
|
||||
encoding_iana,
|
||||
)
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level)
|
||||
return CharsetMatches([results[encoding_iana]])
|
||||
|
||||
if len(results) == 0:
|
||||
if fallback_u8 or fallback_ascii or fallback_specified:
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
|
||||
)
|
||||
|
||||
if fallback_specified:
|
||||
logger.debug(
|
||||
"Encoding detection: %s will be used as a fallback match",
|
||||
fallback_specified.encoding,
|
||||
)
|
||||
results.append(fallback_specified)
|
||||
elif (
|
||||
(fallback_u8 and fallback_ascii is None)
|
||||
or (
|
||||
fallback_u8
|
||||
and fallback_ascii
|
||||
and fallback_u8.fingerprint != fallback_ascii.fingerprint
|
||||
)
|
||||
or (fallback_u8 is not None)
|
||||
):
|
||||
logger.debug("Encoding detection: utf_8 will be used as a fallback match")
|
||||
results.append(fallback_u8)
|
||||
elif fallback_ascii:
|
||||
logger.debug("Encoding detection: ascii will be used as a fallback match")
|
||||
results.append(fallback_ascii)
|
||||
|
||||
if results:
|
||||
logger.debug(
|
||||
"Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
|
||||
results.best().encoding, # type: ignore
|
||||
len(results) - 1,
|
||||
)
|
||||
else:
|
||||
logger.debug("Encoding detection: Unable to determine any suitable charset.")
|
||||
|
||||
if explain:
|
||||
logger.removeHandler(explain_handler)
|
||||
logger.setLevel(previous_logger_level)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def from_fp(
|
||||
fp: BinaryIO,
|
||||
steps: int = 5,
|
||||
chunk_size: int = 512,
|
||||
threshold: float = 0.20,
|
||||
cp_isolation: Optional[List[str]] = None,
|
||||
cp_exclusion: Optional[List[str]] = None,
|
||||
preemptive_behaviour: bool = True,
|
||||
explain: bool = False,
|
||||
language_threshold: float = 0.1,
|
||||
enable_fallback: bool = True,
|
||||
) -> CharsetMatches:
|
||||
"""
|
||||
Same thing than the function from_bytes but using a file pointer that is already ready.
|
||||
Will not close the file pointer.
|
||||
"""
|
||||
return from_bytes(
|
||||
fp.read(),
|
||||
steps,
|
||||
chunk_size,
|
||||
threshold,
|
||||
cp_isolation,
|
||||
cp_exclusion,
|
||||
preemptive_behaviour,
|
||||
explain,
|
||||
language_threshold,
|
||||
enable_fallback,
|
||||
)
|
||||
|
||||
|
||||
def from_path(
|
||||
path: Union[str, bytes, PathLike], # type: ignore[type-arg]
|
||||
steps: int = 5,
|
||||
chunk_size: int = 512,
|
||||
threshold: float = 0.20,
|
||||
cp_isolation: Optional[List[str]] = None,
|
||||
cp_exclusion: Optional[List[str]] = None,
|
||||
preemptive_behaviour: bool = True,
|
||||
explain: bool = False,
|
||||
language_threshold: float = 0.1,
|
||||
enable_fallback: bool = True,
|
||||
) -> CharsetMatches:
|
||||
"""
|
||||
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
|
||||
Can raise IOError.
|
||||
"""
|
||||
with open(path, "rb") as fp:
|
||||
return from_fp(
|
||||
fp,
|
||||
steps,
|
||||
chunk_size,
|
||||
threshold,
|
||||
cp_isolation,
|
||||
cp_exclusion,
|
||||
preemptive_behaviour,
|
||||
explain,
|
||||
language_threshold,
|
||||
enable_fallback,
|
||||
)
|
||||
|
||||
|
||||
def is_binary(
|
||||
fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type: ignore[type-arg]
|
||||
steps: int = 5,
|
||||
chunk_size: int = 512,
|
||||
threshold: float = 0.20,
|
||||
cp_isolation: Optional[List[str]] = None,
|
||||
cp_exclusion: Optional[List[str]] = None,
|
||||
preemptive_behaviour: bool = True,
|
||||
explain: bool = False,
|
||||
language_threshold: float = 0.1,
|
||||
enable_fallback: bool = False,
|
||||
) -> bool:
|
||||
"""
|
||||
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
|
||||
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
|
||||
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
|
||||
"""
|
||||
if isinstance(fp_or_path_or_payload, (str, PathLike)):
|
||||
guesses = from_path(
|
||||
fp_or_path_or_payload,
|
||||
steps=steps,
|
||||
chunk_size=chunk_size,
|
||||
threshold=threshold,
|
||||
cp_isolation=cp_isolation,
|
||||
cp_exclusion=cp_exclusion,
|
||||
preemptive_behaviour=preemptive_behaviour,
|
||||
explain=explain,
|
||||
language_threshold=language_threshold,
|
||||
enable_fallback=enable_fallback,
|
||||
)
|
||||
elif isinstance(
|
||||
fp_or_path_or_payload,
|
||||
(
|
||||
bytes,
|
||||
bytearray,
|
||||
),
|
||||
):
|
||||
guesses = from_bytes(
|
||||
fp_or_path_or_payload,
|
||||
steps=steps,
|
||||
chunk_size=chunk_size,
|
||||
threshold=threshold,
|
||||
cp_isolation=cp_isolation,
|
||||
cp_exclusion=cp_exclusion,
|
||||
preemptive_behaviour=preemptive_behaviour,
|
||||
explain=explain,
|
||||
language_threshold=language_threshold,
|
||||
enable_fallback=enable_fallback,
|
||||
)
|
||||
else:
|
||||
guesses = from_fp(
|
||||
fp_or_path_or_payload,
|
||||
steps=steps,
|
||||
chunk_size=chunk_size,
|
||||
threshold=threshold,
|
||||
cp_isolation=cp_isolation,
|
||||
cp_exclusion=cp_exclusion,
|
||||
preemptive_behaviour=preemptive_behaviour,
|
||||
explain=explain,
|
||||
language_threshold=language_threshold,
|
||||
enable_fallback=enable_fallback,
|
||||
)
|
||||
|
||||
return not guesses
|
395
env/lib/python3.12/site-packages/charset_normalizer/cd.py
vendored
Normal file
395
env/lib/python3.12/site-packages/charset_normalizer/cd.py
vendored
Normal file
@ -0,0 +1,395 @@
|
||||
import importlib
|
||||
from codecs import IncrementalDecoder
|
||||
from collections import Counter
|
||||
from functools import lru_cache
|
||||
from typing import Counter as TypeCounter, Dict, List, Optional, Tuple
|
||||
|
||||
from .constant import (
|
||||
FREQUENCIES,
|
||||
KO_NAMES,
|
||||
LANGUAGE_SUPPORTED_COUNT,
|
||||
TOO_SMALL_SEQUENCE,
|
||||
ZH_NAMES,
|
||||
)
|
||||
from .md import is_suspiciously_successive_range
|
||||
from .models import CoherenceMatches
|
||||
from .utils import (
|
||||
is_accentuated,
|
||||
is_latin,
|
||||
is_multi_byte_encoding,
|
||||
is_unicode_range_secondary,
|
||||
unicode_range,
|
||||
)
|
||||
|
||||
|
||||
def encoding_unicode_range(iana_name: str) -> List[str]:
|
||||
"""
|
||||
Return associated unicode ranges in a single byte code page.
|
||||
"""
|
||||
if is_multi_byte_encoding(iana_name):
|
||||
raise IOError("Function not supported on multi-byte code page")
|
||||
|
||||
decoder = importlib.import_module(
|
||||
"encodings.{}".format(iana_name)
|
||||
).IncrementalDecoder
|
||||
|
||||
p: IncrementalDecoder = decoder(errors="ignore")
|
||||
seen_ranges: Dict[str, int] = {}
|
||||
character_count: int = 0
|
||||
|
||||
for i in range(0x40, 0xFF):
|
||||
chunk: str = p.decode(bytes([i]))
|
||||
|
||||
if chunk:
|
||||
character_range: Optional[str] = unicode_range(chunk)
|
||||
|
||||
if character_range is None:
|
||||
continue
|
||||
|
||||
if is_unicode_range_secondary(character_range) is False:
|
||||
if character_range not in seen_ranges:
|
||||
seen_ranges[character_range] = 0
|
||||
seen_ranges[character_range] += 1
|
||||
character_count += 1
|
||||
|
||||
return sorted(
|
||||
[
|
||||
character_range
|
||||
for character_range in seen_ranges
|
||||
if seen_ranges[character_range] / character_count >= 0.15
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def unicode_range_languages(primary_range: str) -> List[str]:
|
||||
"""
|
||||
Return inferred languages used with a unicode range.
|
||||
"""
|
||||
languages: List[str] = []
|
||||
|
||||
for language, characters in FREQUENCIES.items():
|
||||
for character in characters:
|
||||
if unicode_range(character) == primary_range:
|
||||
languages.append(language)
|
||||
break
|
||||
|
||||
return languages
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def encoding_languages(iana_name: str) -> List[str]:
|
||||
"""
|
||||
Single-byte encoding language association. Some code page are heavily linked to particular language(s).
|
||||
This function does the correspondence.
|
||||
"""
|
||||
unicode_ranges: List[str] = encoding_unicode_range(iana_name)
|
||||
primary_range: Optional[str] = None
|
||||
|
||||
for specified_range in unicode_ranges:
|
||||
if "Latin" not in specified_range:
|
||||
primary_range = specified_range
|
||||
break
|
||||
|
||||
if primary_range is None:
|
||||
return ["Latin Based"]
|
||||
|
||||
return unicode_range_languages(primary_range)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def mb_encoding_languages(iana_name: str) -> List[str]:
|
||||
"""
|
||||
Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
|
||||
This function does the correspondence.
|
||||
"""
|
||||
if (
|
||||
iana_name.startswith("shift_")
|
||||
or iana_name.startswith("iso2022_jp")
|
||||
or iana_name.startswith("euc_j")
|
||||
or iana_name == "cp932"
|
||||
):
|
||||
return ["Japanese"]
|
||||
if iana_name.startswith("gb") or iana_name in ZH_NAMES:
|
||||
return ["Chinese"]
|
||||
if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
|
||||
return ["Korean"]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
|
||||
def get_target_features(language: str) -> Tuple[bool, bool]:
|
||||
"""
|
||||
Determine main aspects from a supported language if it contains accents and if is pure Latin.
|
||||
"""
|
||||
target_have_accents: bool = False
|
||||
target_pure_latin: bool = True
|
||||
|
||||
for character in FREQUENCIES[language]:
|
||||
if not target_have_accents and is_accentuated(character):
|
||||
target_have_accents = True
|
||||
if target_pure_latin and is_latin(character) is False:
|
||||
target_pure_latin = False
|
||||
|
||||
return target_have_accents, target_pure_latin
|
||||
|
||||
|
||||
def alphabet_languages(
|
||||
characters: List[str], ignore_non_latin: bool = False
|
||||
) -> List[str]:
|
||||
"""
|
||||
Return associated languages associated to given characters.
|
||||
"""
|
||||
languages: List[Tuple[str, float]] = []
|
||||
|
||||
source_have_accents = any(is_accentuated(character) for character in characters)
|
||||
|
||||
for language, language_characters in FREQUENCIES.items():
|
||||
target_have_accents, target_pure_latin = get_target_features(language)
|
||||
|
||||
if ignore_non_latin and target_pure_latin is False:
|
||||
continue
|
||||
|
||||
if target_have_accents is False and source_have_accents:
|
||||
continue
|
||||
|
||||
character_count: int = len(language_characters)
|
||||
|
||||
character_match_count: int = len(
|
||||
[c for c in language_characters if c in characters]
|
||||
)
|
||||
|
||||
ratio: float = character_match_count / character_count
|
||||
|
||||
if ratio >= 0.2:
|
||||
languages.append((language, ratio))
|
||||
|
||||
languages = sorted(languages, key=lambda x: x[1], reverse=True)
|
||||
|
||||
return [compatible_language[0] for compatible_language in languages]
|
||||
|
||||
|
||||
def characters_popularity_compare(
|
||||
language: str, ordered_characters: List[str]
|
||||
) -> float:
|
||||
"""
|
||||
Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
|
||||
The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
|
||||
Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
|
||||
"""
|
||||
if language not in FREQUENCIES:
|
||||
raise ValueError("{} not available".format(language))
|
||||
|
||||
character_approved_count: int = 0
|
||||
FREQUENCIES_language_set = set(FREQUENCIES[language])
|
||||
|
||||
ordered_characters_count: int = len(ordered_characters)
|
||||
target_language_characters_count: int = len(FREQUENCIES[language])
|
||||
|
||||
large_alphabet: bool = target_language_characters_count > 26
|
||||
|
||||
for character, character_rank in zip(
|
||||
ordered_characters, range(0, ordered_characters_count)
|
||||
):
|
||||
if character not in FREQUENCIES_language_set:
|
||||
continue
|
||||
|
||||
character_rank_in_language: int = FREQUENCIES[language].index(character)
|
||||
expected_projection_ratio: float = (
|
||||
target_language_characters_count / ordered_characters_count
|
||||
)
|
||||
character_rank_projection: int = int(character_rank * expected_projection_ratio)
|
||||
|
||||
if (
|
||||
large_alphabet is False
|
||||
and abs(character_rank_projection - character_rank_in_language) > 4
|
||||
):
|
||||
continue
|
||||
|
||||
if (
|
||||
large_alphabet is True
|
||||
and abs(character_rank_projection - character_rank_in_language)
|
||||
< target_language_characters_count / 3
|
||||
):
|
||||
character_approved_count += 1
|
||||
continue
|
||||
|
||||
characters_before_source: List[str] = FREQUENCIES[language][
|
||||
0:character_rank_in_language
|
||||
]
|
||||
characters_after_source: List[str] = FREQUENCIES[language][
|
||||
character_rank_in_language:
|
||||
]
|
||||
characters_before: List[str] = ordered_characters[0:character_rank]
|
||||
characters_after: List[str] = ordered_characters[character_rank:]
|
||||
|
||||
before_match_count: int = len(
|
||||
set(characters_before) & set(characters_before_source)
|
||||
)
|
||||
|
||||
after_match_count: int = len(
|
||||
set(characters_after) & set(characters_after_source)
|
||||
)
|
||||
|
||||
if len(characters_before_source) == 0 and before_match_count <= 4:
|
||||
character_approved_count += 1
|
||||
continue
|
||||
|
||||
if len(characters_after_source) == 0 and after_match_count <= 4:
|
||||
character_approved_count += 1
|
||||
continue
|
||||
|
||||
if (
|
||||
before_match_count / len(characters_before_source) >= 0.4
|
||||
or after_match_count / len(characters_after_source) >= 0.4
|
||||
):
|
||||
character_approved_count += 1
|
||||
continue
|
||||
|
||||
return character_approved_count / len(ordered_characters)
|
||||
|
||||
|
||||
def alpha_unicode_split(decoded_sequence: str) -> List[str]:
|
||||
"""
|
||||
Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
|
||||
Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
|
||||
One containing the latin letters and the other hebrew.
|
||||
"""
|
||||
layers: Dict[str, str] = {}
|
||||
|
||||
for character in decoded_sequence:
|
||||
if character.isalpha() is False:
|
||||
continue
|
||||
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
continue
|
||||
|
||||
layer_target_range: Optional[str] = None
|
||||
|
||||
for discovered_range in layers:
|
||||
if (
|
||||
is_suspiciously_successive_range(discovered_range, character_range)
|
||||
is False
|
||||
):
|
||||
layer_target_range = discovered_range
|
||||
break
|
||||
|
||||
if layer_target_range is None:
|
||||
layer_target_range = character_range
|
||||
|
||||
if layer_target_range not in layers:
|
||||
layers[layer_target_range] = character.lower()
|
||||
continue
|
||||
|
||||
layers[layer_target_range] += character.lower()
|
||||
|
||||
return list(layers.values())
|
||||
|
||||
|
||||
def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
|
||||
"""
|
||||
This function merge results previously given by the function coherence_ratio.
|
||||
The return type is the same as coherence_ratio.
|
||||
"""
|
||||
per_language_ratios: Dict[str, List[float]] = {}
|
||||
for result in results:
|
||||
for sub_result in result:
|
||||
language, ratio = sub_result
|
||||
if language not in per_language_ratios:
|
||||
per_language_ratios[language] = [ratio]
|
||||
continue
|
||||
per_language_ratios[language].append(ratio)
|
||||
|
||||
merge = [
|
||||
(
|
||||
language,
|
||||
round(
|
||||
sum(per_language_ratios[language]) / len(per_language_ratios[language]),
|
||||
4,
|
||||
),
|
||||
)
|
||||
for language in per_language_ratios
|
||||
]
|
||||
|
||||
return sorted(merge, key=lambda x: x[1], reverse=True)
|
||||
|
||||
|
||||
def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
|
||||
"""
|
||||
We shall NOT return "English—" in CoherenceMatches because it is an alternative
|
||||
of "English". This function only keeps the best match and remove the em-dash in it.
|
||||
"""
|
||||
index_results: Dict[str, List[float]] = dict()
|
||||
|
||||
for result in results:
|
||||
language, ratio = result
|
||||
no_em_name: str = language.replace("—", "")
|
||||
|
||||
if no_em_name not in index_results:
|
||||
index_results[no_em_name] = []
|
||||
|
||||
index_results[no_em_name].append(ratio)
|
||||
|
||||
if any(len(index_results[e]) > 1 for e in index_results):
|
||||
filtered_results: CoherenceMatches = []
|
||||
|
||||
for language in index_results:
|
||||
filtered_results.append((language, max(index_results[language])))
|
||||
|
||||
return filtered_results
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@lru_cache(maxsize=2048)
|
||||
def coherence_ratio(
|
||||
decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
|
||||
) -> CoherenceMatches:
|
||||
"""
|
||||
Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
|
||||
A layer = Character extraction by alphabets/ranges.
|
||||
"""
|
||||
|
||||
results: List[Tuple[str, float]] = []
|
||||
ignore_non_latin: bool = False
|
||||
|
||||
sufficient_match_count: int = 0
|
||||
|
||||
lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
|
||||
if "Latin Based" in lg_inclusion_list:
|
||||
ignore_non_latin = True
|
||||
lg_inclusion_list.remove("Latin Based")
|
||||
|
||||
for layer in alpha_unicode_split(decoded_sequence):
|
||||
sequence_frequencies: TypeCounter[str] = Counter(layer)
|
||||
most_common = sequence_frequencies.most_common()
|
||||
|
||||
character_count: int = sum(o for c, o in most_common)
|
||||
|
||||
if character_count <= TOO_SMALL_SEQUENCE:
|
||||
continue
|
||||
|
||||
popular_character_ordered: List[str] = [c for c, o in most_common]
|
||||
|
||||
for language in lg_inclusion_list or alphabet_languages(
|
||||
popular_character_ordered, ignore_non_latin
|
||||
):
|
||||
ratio: float = characters_popularity_compare(
|
||||
language, popular_character_ordered
|
||||
)
|
||||
|
||||
if ratio < threshold:
|
||||
continue
|
||||
elif ratio >= 0.8:
|
||||
sufficient_match_count += 1
|
||||
|
||||
results.append((language, round(ratio, 4)))
|
||||
|
||||
if sufficient_match_count >= 3:
|
||||
break
|
||||
|
||||
return sorted(
|
||||
filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
|
||||
)
|
6
env/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py
vendored
Normal file
6
env/lib/python3.12/site-packages/charset_normalizer/cli/__init__.py
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
from .__main__ import cli_detect, query_yes_no
|
||||
|
||||
__all__ = (
|
||||
"cli_detect",
|
||||
"query_yes_no",
|
||||
)
|
320
env/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py
vendored
Normal file
320
env/lib/python3.12/site-packages/charset_normalizer/cli/__main__.py
vendored
Normal file
@ -0,0 +1,320 @@
|
||||
import argparse
|
||||
import sys
|
||||
from json import dumps
|
||||
from os.path import abspath, basename, dirname, join, realpath
|
||||
from platform import python_version
|
||||
from typing import List, Optional
|
||||
from unicodedata import unidata_version
|
||||
|
||||
import charset_normalizer.md as md_module
|
||||
from charset_normalizer import from_fp
|
||||
from charset_normalizer.models import CliDetectionResult
|
||||
from charset_normalizer.version import __version__
|
||||
|
||||
|
||||
def query_yes_no(question: str, default: str = "yes") -> bool:
|
||||
"""Ask a yes/no question via input() and return their answer.
|
||||
|
||||
"question" is a string that is presented to the user.
|
||||
"default" is the presumed answer if the user just hits <Enter>.
|
||||
It must be "yes" (the default), "no" or None (meaning
|
||||
an answer is required of the user).
|
||||
|
||||
The "answer" return value is True for "yes" or False for "no".
|
||||
|
||||
Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
|
||||
"""
|
||||
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
|
||||
if default is None:
|
||||
prompt = " [y/n] "
|
||||
elif default == "yes":
|
||||
prompt = " [Y/n] "
|
||||
elif default == "no":
|
||||
prompt = " [y/N] "
|
||||
else:
|
||||
raise ValueError("invalid default answer: '%s'" % default)
|
||||
|
||||
while True:
|
||||
sys.stdout.write(question + prompt)
|
||||
choice = input().lower()
|
||||
if default is not None and choice == "":
|
||||
return valid[default]
|
||||
elif choice in valid:
|
||||
return valid[choice]
|
||||
else:
|
||||
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
|
||||
|
||||
|
||||
def cli_detect(argv: Optional[List[str]] = None) -> int:
|
||||
"""
|
||||
CLI assistant using ARGV and ArgumentParser
|
||||
:param argv:
|
||||
:return: 0 if everything is fine, anything else equal trouble
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="The Real First Universal Charset Detector. "
|
||||
"Discover originating encoding used on text file. "
|
||||
"Normalize text to unicode."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="verbose",
|
||||
help="Display complementary information about file if any. "
|
||||
"Stdout will contain logs about the detection process.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-a",
|
||||
"--with-alternative",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="alternatives",
|
||||
help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-n",
|
||||
"--normalize",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="normalize",
|
||||
help="Permit to normalize input file. If not set, program does not write anything.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--minimal",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="minimal",
|
||||
help="Only output the charset detected to STDOUT. Disabling JSON output.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--replace",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="replace",
|
||||
help="Replace file when trying to normalize it instead of creating a new one.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--force",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="force",
|
||||
help="Replace file without asking if you are sure, use this flag with caution.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--no-preemptive",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="no_preemptive",
|
||||
help="Disable looking at a charset declaration to hint the detector.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--threshold",
|
||||
action="store",
|
||||
default=0.2,
|
||||
type=float,
|
||||
dest="threshold",
|
||||
help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
action="version",
|
||||
version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
|
||||
__version__,
|
||||
python_version(),
|
||||
unidata_version,
|
||||
"OFF" if md_module.__file__.lower().endswith(".py") else "ON",
|
||||
),
|
||||
help="Show version information and exit.",
|
||||
)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if args.replace is True and args.normalize is False:
|
||||
if args.files:
|
||||
for my_file in args.files:
|
||||
my_file.close()
|
||||
print("Use --replace in addition of --normalize only.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if args.force is True and args.replace is False:
|
||||
if args.files:
|
||||
for my_file in args.files:
|
||||
my_file.close()
|
||||
print("Use --force in addition of --replace only.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if args.threshold < 0.0 or args.threshold > 1.0:
|
||||
if args.files:
|
||||
for my_file in args.files:
|
||||
my_file.close()
|
||||
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
x_ = []
|
||||
|
||||
for my_file in args.files:
|
||||
matches = from_fp(
|
||||
my_file,
|
||||
threshold=args.threshold,
|
||||
explain=args.verbose,
|
||||
preemptive_behaviour=args.no_preemptive is False,
|
||||
)
|
||||
|
||||
best_guess = matches.best()
|
||||
|
||||
if best_guess is None:
|
||||
print(
|
||||
'Unable to identify originating encoding for "{}". {}'.format(
|
||||
my_file.name,
|
||||
(
|
||||
"Maybe try increasing maximum amount of chaos."
|
||||
if args.threshold < 1.0
|
||||
else ""
|
||||
),
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
x_.append(
|
||||
CliDetectionResult(
|
||||
abspath(my_file.name),
|
||||
None,
|
||||
[],
|
||||
[],
|
||||
"Unknown",
|
||||
[],
|
||||
False,
|
||||
1.0,
|
||||
0.0,
|
||||
None,
|
||||
True,
|
||||
)
|
||||
)
|
||||
else:
|
||||
x_.append(
|
||||
CliDetectionResult(
|
||||
abspath(my_file.name),
|
||||
best_guess.encoding,
|
||||
best_guess.encoding_aliases,
|
||||
[
|
||||
cp
|
||||
for cp in best_guess.could_be_from_charset
|
||||
if cp != best_guess.encoding
|
||||
],
|
||||
best_guess.language,
|
||||
best_guess.alphabets,
|
||||
best_guess.bom,
|
||||
best_guess.percent_chaos,
|
||||
best_guess.percent_coherence,
|
||||
None,
|
||||
True,
|
||||
)
|
||||
)
|
||||
|
||||
if len(matches) > 1 and args.alternatives:
|
||||
for el in matches:
|
||||
if el != best_guess:
|
||||
x_.append(
|
||||
CliDetectionResult(
|
||||
abspath(my_file.name),
|
||||
el.encoding,
|
||||
el.encoding_aliases,
|
||||
[
|
||||
cp
|
||||
for cp in el.could_be_from_charset
|
||||
if cp != el.encoding
|
||||
],
|
||||
el.language,
|
||||
el.alphabets,
|
||||
el.bom,
|
||||
el.percent_chaos,
|
||||
el.percent_coherence,
|
||||
None,
|
||||
False,
|
||||
)
|
||||
)
|
||||
|
||||
if args.normalize is True:
|
||||
if best_guess.encoding.startswith("utf") is True:
|
||||
print(
|
||||
'"{}" file does not need to be normalized, as it already came from unicode.'.format(
|
||||
my_file.name
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
continue
|
||||
|
||||
dir_path = dirname(realpath(my_file.name))
|
||||
file_name = basename(realpath(my_file.name))
|
||||
|
||||
o_: List[str] = file_name.split(".")
|
||||
|
||||
if args.replace is False:
|
||||
o_.insert(-1, best_guess.encoding)
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
elif (
|
||||
args.force is False
|
||||
and query_yes_no(
|
||||
'Are you sure to normalize "{}" by replacing it ?'.format(
|
||||
my_file.name
|
||||
),
|
||||
"no",
|
||||
)
|
||||
is False
|
||||
):
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
continue
|
||||
|
||||
try:
|
||||
x_[0].unicode_path = join(dir_path, ".".join(o_))
|
||||
|
||||
with open(x_[0].unicode_path, "wb") as fp:
|
||||
fp.write(best_guess.output())
|
||||
except IOError as e:
|
||||
print(str(e), file=sys.stderr)
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
return 2
|
||||
|
||||
if my_file.closed is False:
|
||||
my_file.close()
|
||||
|
||||
if args.minimal is False:
|
||||
print(
|
||||
dumps(
|
||||
[el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
|
||||
ensure_ascii=True,
|
||||
indent=4,
|
||||
)
|
||||
)
|
||||
else:
|
||||
for my_file in args.files:
|
||||
print(
|
||||
", ".join(
|
||||
[
|
||||
el.encoding or "undefined"
|
||||
for el in x_
|
||||
if el.path == abspath(my_file.name)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli_detect()
|
BIN
env/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
1997
env/lib/python3.12/site-packages/charset_normalizer/constant.py
vendored
Normal file
1997
env/lib/python3.12/site-packages/charset_normalizer/constant.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
65
env/lib/python3.12/site-packages/charset_normalizer/legacy.py
vendored
Normal file
65
env/lib/python3.12/site-packages/charset_normalizer/legacy.py
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
from warnings import warn
|
||||
|
||||
from .api import from_bytes
|
||||
from .constant import CHARDET_CORRESPONDENCE
|
||||
|
||||
# TODO: remove this check when dropping Python 3.7 support
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
class ResultDict(TypedDict):
|
||||
encoding: Optional[str]
|
||||
language: str
|
||||
confidence: Optional[float]
|
||||
|
||||
|
||||
def detect(
|
||||
byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
|
||||
) -> ResultDict:
|
||||
"""
|
||||
chardet legacy method
|
||||
Detect the encoding of the given byte string. It should be mostly backward-compatible.
|
||||
Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
|
||||
This function is deprecated and should be used to migrate your project easily, consult the documentation for
|
||||
further information. Not planned for removal.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:param should_rename_legacy: Should we rename legacy encodings
|
||||
to their more modern equivalents?
|
||||
"""
|
||||
if len(kwargs):
|
||||
warn(
|
||||
f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
|
||||
)
|
||||
|
||||
if not isinstance(byte_str, (bytearray, bytes)):
|
||||
raise TypeError( # pragma: nocover
|
||||
"Expected object of type bytes or bytearray, got: "
|
||||
"{0}".format(type(byte_str))
|
||||
)
|
||||
|
||||
if isinstance(byte_str, bytearray):
|
||||
byte_str = bytes(byte_str)
|
||||
|
||||
r = from_bytes(byte_str).best()
|
||||
|
||||
encoding = r.encoding if r is not None else None
|
||||
language = r.language if r is not None and r.language != "Unknown" else ""
|
||||
confidence = 1.0 - r.chaos if r is not None else None
|
||||
|
||||
# Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
|
||||
# but chardet does return 'utf-8-sig' and it is a valid codec name.
|
||||
if r is not None and encoding == "utf_8" and r.bom:
|
||||
encoding += "_sig"
|
||||
|
||||
if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
|
||||
encoding = CHARDET_CORRESPONDENCE[encoding]
|
||||
|
||||
return {
|
||||
"encoding": encoding,
|
||||
"language": language,
|
||||
"confidence": confidence,
|
||||
}
|
BIN
env/lib/python3.12/site-packages/charset_normalizer/md.cpython-312-aarch64-linux-gnu.so
vendored
Executable file
BIN
env/lib/python3.12/site-packages/charset_normalizer/md.cpython-312-aarch64-linux-gnu.so
vendored
Executable file
Binary file not shown.
628
env/lib/python3.12/site-packages/charset_normalizer/md.py
vendored
Normal file
628
env/lib/python3.12/site-packages/charset_normalizer/md.py
vendored
Normal file
@ -0,0 +1,628 @@
|
||||
from functools import lru_cache
|
||||
from logging import getLogger
|
||||
from typing import List, Optional
|
||||
|
||||
from .constant import (
|
||||
COMMON_SAFE_ASCII_CHARACTERS,
|
||||
TRACE,
|
||||
UNICODE_SECONDARY_RANGE_KEYWORD,
|
||||
)
|
||||
from .utils import (
|
||||
is_accentuated,
|
||||
is_arabic,
|
||||
is_arabic_isolated_form,
|
||||
is_case_variable,
|
||||
is_cjk,
|
||||
is_emoticon,
|
||||
is_hangul,
|
||||
is_hiragana,
|
||||
is_katakana,
|
||||
is_latin,
|
||||
is_punctuation,
|
||||
is_separator,
|
||||
is_symbol,
|
||||
is_thai,
|
||||
is_unprintable,
|
||||
remove_accent,
|
||||
unicode_range,
|
||||
)
|
||||
|
||||
|
||||
class MessDetectorPlugin:
|
||||
"""
|
||||
Base abstract class used for mess detection plugins.
|
||||
All detectors MUST extend and implement given methods.
|
||||
"""
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
"""
|
||||
Determine if given character should be fed in.
|
||||
"""
|
||||
raise NotImplementedError # pragma: nocover
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
"""
|
||||
The main routine to be executed upon character.
|
||||
Insert the logic in witch the text would be considered chaotic.
|
||||
"""
|
||||
raise NotImplementedError # pragma: nocover
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
"""
|
||||
Permit to reset the plugin to the initial state.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
"""
|
||||
Compute the chaos ratio based on what your feed() has seen.
|
||||
Must NOT be lower than 0.; No restriction gt 0.
|
||||
"""
|
||||
raise NotImplementedError # pragma: nocover
|
||||
|
||||
|
||||
class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._punctuation_count: int = 0
|
||||
self._symbol_count: int = 0
|
||||
self._character_count: int = 0
|
||||
|
||||
self._last_printable_char: Optional[str] = None
|
||||
self._frenzy_symbol_in_word: bool = False
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return character.isprintable()
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
|
||||
if (
|
||||
character != self._last_printable_char
|
||||
and character not in COMMON_SAFE_ASCII_CHARACTERS
|
||||
):
|
||||
if is_punctuation(character):
|
||||
self._punctuation_count += 1
|
||||
elif (
|
||||
character.isdigit() is False
|
||||
and is_symbol(character)
|
||||
and is_emoticon(character) is False
|
||||
):
|
||||
self._symbol_count += 2
|
||||
|
||||
self._last_printable_char = character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._punctuation_count = 0
|
||||
self._character_count = 0
|
||||
self._symbol_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count == 0:
|
||||
return 0.0
|
||||
|
||||
ratio_of_punctuation: float = (
|
||||
self._punctuation_count + self._symbol_count
|
||||
) / self._character_count
|
||||
|
||||
return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
|
||||
|
||||
|
||||
class TooManyAccentuatedPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._character_count: int = 0
|
||||
self._accentuated_count: int = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return character.isalpha()
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
|
||||
if is_accentuated(character):
|
||||
self._accentuated_count += 1
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._character_count = 0
|
||||
self._accentuated_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count < 8:
|
||||
return 0.0
|
||||
|
||||
ratio_of_accentuation: float = self._accentuated_count / self._character_count
|
||||
return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
|
||||
|
||||
|
||||
class UnprintablePlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._unprintable_count: int = 0
|
||||
self._character_count: int = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return True
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
if is_unprintable(character):
|
||||
self._unprintable_count += 1
|
||||
self._character_count += 1
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._unprintable_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count == 0:
|
||||
return 0.0
|
||||
|
||||
return (self._unprintable_count * 8) / self._character_count
|
||||
|
||||
|
||||
class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._successive_count: int = 0
|
||||
self._character_count: int = 0
|
||||
|
||||
self._last_latin_character: Optional[str] = None
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return character.isalpha() and is_latin(character)
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
if (
|
||||
self._last_latin_character is not None
|
||||
and is_accentuated(character)
|
||||
and is_accentuated(self._last_latin_character)
|
||||
):
|
||||
if character.isupper() and self._last_latin_character.isupper():
|
||||
self._successive_count += 1
|
||||
# Worse if its the same char duplicated with different accent.
|
||||
if remove_accent(character) == remove_accent(self._last_latin_character):
|
||||
self._successive_count += 1
|
||||
self._last_latin_character = character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._successive_count = 0
|
||||
self._character_count = 0
|
||||
self._last_latin_character = None
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count == 0:
|
||||
return 0.0
|
||||
|
||||
return (self._successive_count * 2) / self._character_count
|
||||
|
||||
|
||||
class SuspiciousRange(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._suspicious_successive_range_count: int = 0
|
||||
self._character_count: int = 0
|
||||
self._last_printable_seen: Optional[str] = None
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return character.isprintable()
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
|
||||
if (
|
||||
character.isspace()
|
||||
or is_punctuation(character)
|
||||
or character in COMMON_SAFE_ASCII_CHARACTERS
|
||||
):
|
||||
self._last_printable_seen = None
|
||||
return
|
||||
|
||||
if self._last_printable_seen is None:
|
||||
self._last_printable_seen = character
|
||||
return
|
||||
|
||||
unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen)
|
||||
unicode_range_b: Optional[str] = unicode_range(character)
|
||||
|
||||
if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
|
||||
self._suspicious_successive_range_count += 1
|
||||
|
||||
self._last_printable_seen = character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._character_count = 0
|
||||
self._suspicious_successive_range_count = 0
|
||||
self._last_printable_seen = None
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count <= 13:
|
||||
return 0.0
|
||||
|
||||
ratio_of_suspicious_range_usage: float = (
|
||||
self._suspicious_successive_range_count * 2
|
||||
) / self._character_count
|
||||
|
||||
return ratio_of_suspicious_range_usage
|
||||
|
||||
|
||||
class SuperWeirdWordPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._word_count: int = 0
|
||||
self._bad_word_count: int = 0
|
||||
self._foreign_long_count: int = 0
|
||||
|
||||
self._is_current_word_bad: bool = False
|
||||
self._foreign_long_watch: bool = False
|
||||
|
||||
self._character_count: int = 0
|
||||
self._bad_character_count: int = 0
|
||||
|
||||
self._buffer: str = ""
|
||||
self._buffer_accent_count: int = 0
|
||||
self._buffer_glyph_count: int = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return True
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
if character.isalpha():
|
||||
self._buffer += character
|
||||
if is_accentuated(character):
|
||||
self._buffer_accent_count += 1
|
||||
if (
|
||||
self._foreign_long_watch is False
|
||||
and (is_latin(character) is False or is_accentuated(character))
|
||||
and is_cjk(character) is False
|
||||
and is_hangul(character) is False
|
||||
and is_katakana(character) is False
|
||||
and is_hiragana(character) is False
|
||||
and is_thai(character) is False
|
||||
):
|
||||
self._foreign_long_watch = True
|
||||
if (
|
||||
is_cjk(character)
|
||||
or is_hangul(character)
|
||||
or is_katakana(character)
|
||||
or is_hiragana(character)
|
||||
or is_thai(character)
|
||||
):
|
||||
self._buffer_glyph_count += 1
|
||||
return
|
||||
if not self._buffer:
|
||||
return
|
||||
if (
|
||||
character.isspace() or is_punctuation(character) or is_separator(character)
|
||||
) and self._buffer:
|
||||
self._word_count += 1
|
||||
buffer_length: int = len(self._buffer)
|
||||
|
||||
self._character_count += buffer_length
|
||||
|
||||
if buffer_length >= 4:
|
||||
if self._buffer_accent_count / buffer_length >= 0.5:
|
||||
self._is_current_word_bad = True
|
||||
# Word/Buffer ending with an upper case accentuated letter are so rare,
|
||||
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
|
||||
elif (
|
||||
is_accentuated(self._buffer[-1])
|
||||
and self._buffer[-1].isupper()
|
||||
and all(_.isupper() for _ in self._buffer) is False
|
||||
):
|
||||
self._foreign_long_count += 1
|
||||
self._is_current_word_bad = True
|
||||
elif self._buffer_glyph_count == 1:
|
||||
self._is_current_word_bad = True
|
||||
self._foreign_long_count += 1
|
||||
if buffer_length >= 24 and self._foreign_long_watch:
|
||||
camel_case_dst = [
|
||||
i
|
||||
for c, i in zip(self._buffer, range(0, buffer_length))
|
||||
if c.isupper()
|
||||
]
|
||||
probable_camel_cased: bool = False
|
||||
|
||||
if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
|
||||
probable_camel_cased = True
|
||||
|
||||
if not probable_camel_cased:
|
||||
self._foreign_long_count += 1
|
||||
self._is_current_word_bad = True
|
||||
|
||||
if self._is_current_word_bad:
|
||||
self._bad_word_count += 1
|
||||
self._bad_character_count += len(self._buffer)
|
||||
self._is_current_word_bad = False
|
||||
|
||||
self._foreign_long_watch = False
|
||||
self._buffer = ""
|
||||
self._buffer_accent_count = 0
|
||||
self._buffer_glyph_count = 0
|
||||
elif (
|
||||
character not in {"<", ">", "-", "=", "~", "|", "_"}
|
||||
and character.isdigit() is False
|
||||
and is_symbol(character)
|
||||
):
|
||||
self._is_current_word_bad = True
|
||||
self._buffer += character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._buffer = ""
|
||||
self._is_current_word_bad = False
|
||||
self._foreign_long_watch = False
|
||||
self._bad_word_count = 0
|
||||
self._word_count = 0
|
||||
self._character_count = 0
|
||||
self._bad_character_count = 0
|
||||
self._foreign_long_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._word_count <= 10 and self._foreign_long_count == 0:
|
||||
return 0.0
|
||||
|
||||
return self._bad_character_count / self._character_count
|
||||
|
||||
|
||||
class CjkInvalidStopPlugin(MessDetectorPlugin):
|
||||
"""
|
||||
GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
|
||||
can be easily detected. Searching for the overuse of '丅' and '丄'.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._wrong_stop_count: int = 0
|
||||
self._cjk_character_count: int = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return True
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
if character in {"丅", "丄"}:
|
||||
self._wrong_stop_count += 1
|
||||
return
|
||||
if is_cjk(character):
|
||||
self._cjk_character_count += 1
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._wrong_stop_count = 0
|
||||
self._cjk_character_count = 0
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._cjk_character_count < 16:
|
||||
return 0.0
|
||||
return self._wrong_stop_count / self._cjk_character_count
|
||||
|
||||
|
||||
class ArchaicUpperLowerPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._buf: bool = False
|
||||
|
||||
self._character_count_since_last_sep: int = 0
|
||||
|
||||
self._successive_upper_lower_count: int = 0
|
||||
self._successive_upper_lower_count_final: int = 0
|
||||
|
||||
self._character_count: int = 0
|
||||
|
||||
self._last_alpha_seen: Optional[str] = None
|
||||
self._current_ascii_only: bool = True
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return True
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
is_concerned = character.isalpha() and is_case_variable(character)
|
||||
chunk_sep = is_concerned is False
|
||||
|
||||
if chunk_sep and self._character_count_since_last_sep > 0:
|
||||
if (
|
||||
self._character_count_since_last_sep <= 64
|
||||
and character.isdigit() is False
|
||||
and self._current_ascii_only is False
|
||||
):
|
||||
self._successive_upper_lower_count_final += (
|
||||
self._successive_upper_lower_count
|
||||
)
|
||||
|
||||
self._successive_upper_lower_count = 0
|
||||
self._character_count_since_last_sep = 0
|
||||
self._last_alpha_seen = None
|
||||
self._buf = False
|
||||
self._character_count += 1
|
||||
self._current_ascii_only = True
|
||||
|
||||
return
|
||||
|
||||
if self._current_ascii_only is True and character.isascii() is False:
|
||||
self._current_ascii_only = False
|
||||
|
||||
if self._last_alpha_seen is not None:
|
||||
if (character.isupper() and self._last_alpha_seen.islower()) or (
|
||||
character.islower() and self._last_alpha_seen.isupper()
|
||||
):
|
||||
if self._buf is True:
|
||||
self._successive_upper_lower_count += 2
|
||||
self._buf = False
|
||||
else:
|
||||
self._buf = True
|
||||
else:
|
||||
self._buf = False
|
||||
|
||||
self._character_count += 1
|
||||
self._character_count_since_last_sep += 1
|
||||
self._last_alpha_seen = character
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._character_count = 0
|
||||
self._character_count_since_last_sep = 0
|
||||
self._successive_upper_lower_count = 0
|
||||
self._successive_upper_lower_count_final = 0
|
||||
self._last_alpha_seen = None
|
||||
self._buf = False
|
||||
self._current_ascii_only = True
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count == 0:
|
||||
return 0.0
|
||||
|
||||
return self._successive_upper_lower_count_final / self._character_count
|
||||
|
||||
|
||||
class ArabicIsolatedFormPlugin(MessDetectorPlugin):
|
||||
def __init__(self) -> None:
|
||||
self._character_count: int = 0
|
||||
self._isolated_form_count: int = 0
|
||||
|
||||
def reset(self) -> None: # pragma: no cover
|
||||
self._character_count = 0
|
||||
self._isolated_form_count = 0
|
||||
|
||||
def eligible(self, character: str) -> bool:
|
||||
return is_arabic(character)
|
||||
|
||||
def feed(self, character: str) -> None:
|
||||
self._character_count += 1
|
||||
|
||||
if is_arabic_isolated_form(character):
|
||||
self._isolated_form_count += 1
|
||||
|
||||
@property
|
||||
def ratio(self) -> float:
|
||||
if self._character_count < 8:
|
||||
return 0.0
|
||||
|
||||
isolated_form_usage: float = self._isolated_form_count / self._character_count
|
||||
|
||||
return isolated_form_usage
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def is_suspiciously_successive_range(
|
||||
unicode_range_a: Optional[str], unicode_range_b: Optional[str]
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if two Unicode range seen next to each other can be considered as suspicious.
|
||||
"""
|
||||
if unicode_range_a is None or unicode_range_b is None:
|
||||
return True
|
||||
|
||||
if unicode_range_a == unicode_range_b:
|
||||
return False
|
||||
|
||||
if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
|
||||
return False
|
||||
|
||||
if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
|
||||
return False
|
||||
|
||||
# Latin characters can be accompanied with a combining diacritical mark
|
||||
# eg. Vietnamese.
|
||||
if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
|
||||
"Combining" in unicode_range_a or "Combining" in unicode_range_b
|
||||
):
|
||||
return False
|
||||
|
||||
keywords_range_a, keywords_range_b = unicode_range_a.split(
|
||||
" "
|
||||
), unicode_range_b.split(" ")
|
||||
|
||||
for el in keywords_range_a:
|
||||
if el in UNICODE_SECONDARY_RANGE_KEYWORD:
|
||||
continue
|
||||
if el in keywords_range_b:
|
||||
return False
|
||||
|
||||
# Japanese Exception
|
||||
range_a_jp_chars, range_b_jp_chars = (
|
||||
unicode_range_a
|
||||
in (
|
||||
"Hiragana",
|
||||
"Katakana",
|
||||
),
|
||||
unicode_range_b in ("Hiragana", "Katakana"),
|
||||
)
|
||||
if (range_a_jp_chars or range_b_jp_chars) and (
|
||||
"CJK" in unicode_range_a or "CJK" in unicode_range_b
|
||||
):
|
||||
return False
|
||||
if range_a_jp_chars and range_b_jp_chars:
|
||||
return False
|
||||
|
||||
if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
|
||||
if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
|
||||
return False
|
||||
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
|
||||
return False
|
||||
|
||||
# Chinese/Japanese use dedicated range for punctuation and/or separators.
|
||||
if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
|
||||
unicode_range_a in ["Katakana", "Hiragana"]
|
||||
and unicode_range_b in ["Katakana", "Hiragana"]
|
||||
):
|
||||
if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
|
||||
return False
|
||||
if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
|
||||
return False
|
||||
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@lru_cache(maxsize=2048)
|
||||
def mess_ratio(
|
||||
decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
|
||||
) -> float:
|
||||
"""
|
||||
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
|
||||
"""
|
||||
|
||||
detectors: List[MessDetectorPlugin] = [
|
||||
md_class() for md_class in MessDetectorPlugin.__subclasses__()
|
||||
]
|
||||
|
||||
length: int = len(decoded_sequence) + 1
|
||||
|
||||
mean_mess_ratio: float = 0.0
|
||||
|
||||
if length < 512:
|
||||
intermediary_mean_mess_ratio_calc: int = 32
|
||||
elif length <= 1024:
|
||||
intermediary_mean_mess_ratio_calc = 64
|
||||
else:
|
||||
intermediary_mean_mess_ratio_calc = 128
|
||||
|
||||
for character, index in zip(decoded_sequence + "\n", range(length)):
|
||||
for detector in detectors:
|
||||
if detector.eligible(character):
|
||||
detector.feed(character)
|
||||
|
||||
if (
|
||||
index > 0 and index % intermediary_mean_mess_ratio_calc == 0
|
||||
) or index == length - 1:
|
||||
mean_mess_ratio = sum(dt.ratio for dt in detectors)
|
||||
|
||||
if mean_mess_ratio >= maximum_threshold:
|
||||
break
|
||||
|
||||
if debug:
|
||||
logger = getLogger("charset_normalizer")
|
||||
|
||||
logger.log(
|
||||
TRACE,
|
||||
"Mess-detector extended-analysis start. "
|
||||
f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
|
||||
f"maximum_threshold={maximum_threshold}",
|
||||
)
|
||||
|
||||
if len(decoded_sequence) > 16:
|
||||
logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
|
||||
logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
|
||||
|
||||
for dt in detectors: # pragma: nocover
|
||||
logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
|
||||
|
||||
return round(mean_mess_ratio, 3)
|
BIN
env/lib/python3.12/site-packages/charset_normalizer/md__mypyc.cpython-312-aarch64-linux-gnu.so
vendored
Executable file
BIN
env/lib/python3.12/site-packages/charset_normalizer/md__mypyc.cpython-312-aarch64-linux-gnu.so
vendored
Executable file
Binary file not shown.
359
env/lib/python3.12/site-packages/charset_normalizer/models.py
vendored
Normal file
359
env/lib/python3.12/site-packages/charset_normalizer/models.py
vendored
Normal file
@ -0,0 +1,359 @@
|
||||
from encodings.aliases import aliases
|
||||
from hashlib import sha256
|
||||
from json import dumps
|
||||
from re import sub
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
||||
|
||||
from .constant import RE_POSSIBLE_ENCODING_INDICATION, TOO_BIG_SEQUENCE
|
||||
from .utils import iana_name, is_multi_byte_encoding, unicode_range
|
||||
|
||||
|
||||
class CharsetMatch:
|
||||
def __init__(
|
||||
self,
|
||||
payload: bytes,
|
||||
guessed_encoding: str,
|
||||
mean_mess_ratio: float,
|
||||
has_sig_or_bom: bool,
|
||||
languages: "CoherenceMatches",
|
||||
decoded_payload: Optional[str] = None,
|
||||
preemptive_declaration: Optional[str] = None,
|
||||
):
|
||||
self._payload: bytes = payload
|
||||
|
||||
self._encoding: str = guessed_encoding
|
||||
self._mean_mess_ratio: float = mean_mess_ratio
|
||||
self._languages: CoherenceMatches = languages
|
||||
self._has_sig_or_bom: bool = has_sig_or_bom
|
||||
self._unicode_ranges: Optional[List[str]] = None
|
||||
|
||||
self._leaves: List[CharsetMatch] = []
|
||||
self._mean_coherence_ratio: float = 0.0
|
||||
|
||||
self._output_payload: Optional[bytes] = None
|
||||
self._output_encoding: Optional[str] = None
|
||||
|
||||
self._string: Optional[str] = decoded_payload
|
||||
|
||||
self._preemptive_declaration: Optional[str] = preemptive_declaration
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, CharsetMatch):
|
||||
if isinstance(other, str):
|
||||
return iana_name(other) == self.encoding
|
||||
return False
|
||||
return self.encoding == other.encoding and self.fingerprint == other.fingerprint
|
||||
|
||||
def __lt__(self, other: object) -> bool:
|
||||
"""
|
||||
Implemented to make sorted available upon CharsetMatches items.
|
||||
"""
|
||||
if not isinstance(other, CharsetMatch):
|
||||
raise ValueError
|
||||
|
||||
chaos_difference: float = abs(self.chaos - other.chaos)
|
||||
coherence_difference: float = abs(self.coherence - other.coherence)
|
||||
|
||||
# Below 1% difference --> Use Coherence
|
||||
if chaos_difference < 0.01 and coherence_difference > 0.02:
|
||||
return self.coherence > other.coherence
|
||||
elif chaos_difference < 0.01 and coherence_difference <= 0.02:
|
||||
# When having a difficult decision, use the result that decoded as many multi-byte as possible.
|
||||
# preserve RAM usage!
|
||||
if len(self._payload) >= TOO_BIG_SEQUENCE:
|
||||
return self.chaos < other.chaos
|
||||
return self.multi_byte_usage > other.multi_byte_usage
|
||||
|
||||
return self.chaos < other.chaos
|
||||
|
||||
@property
|
||||
def multi_byte_usage(self) -> float:
|
||||
return 1.0 - (len(str(self)) / len(self.raw))
|
||||
|
||||
def __str__(self) -> str:
|
||||
# Lazy Str Loading
|
||||
if self._string is None:
|
||||
self._string = str(self._payload, self._encoding, "strict")
|
||||
return self._string
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "<CharsetMatch '{}' bytes({})>".format(self.encoding, self.fingerprint)
|
||||
|
||||
def add_submatch(self, other: "CharsetMatch") -> None:
|
||||
if not isinstance(other, CharsetMatch) or other == self:
|
||||
raise ValueError(
|
||||
"Unable to add instance <{}> as a submatch of a CharsetMatch".format(
|
||||
other.__class__
|
||||
)
|
||||
)
|
||||
|
||||
other._string = None # Unload RAM usage; dirty trick.
|
||||
self._leaves.append(other)
|
||||
|
||||
@property
|
||||
def encoding(self) -> str:
|
||||
return self._encoding
|
||||
|
||||
@property
|
||||
def encoding_aliases(self) -> List[str]:
|
||||
"""
|
||||
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
|
||||
"""
|
||||
also_known_as: List[str] = []
|
||||
for u, p in aliases.items():
|
||||
if self.encoding == u:
|
||||
also_known_as.append(p)
|
||||
elif self.encoding == p:
|
||||
also_known_as.append(u)
|
||||
return also_known_as
|
||||
|
||||
@property
|
||||
def bom(self) -> bool:
|
||||
return self._has_sig_or_bom
|
||||
|
||||
@property
|
||||
def byte_order_mark(self) -> bool:
|
||||
return self._has_sig_or_bom
|
||||
|
||||
@property
|
||||
def languages(self) -> List[str]:
|
||||
"""
|
||||
Return the complete list of possible languages found in decoded sequence.
|
||||
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
|
||||
"""
|
||||
return [e[0] for e in self._languages]
|
||||
|
||||
@property
|
||||
def language(self) -> str:
|
||||
"""
|
||||
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
|
||||
"Unknown".
|
||||
"""
|
||||
if not self._languages:
|
||||
# Trying to infer the language based on the given encoding
|
||||
# Its either English or we should not pronounce ourselves in certain cases.
|
||||
if "ascii" in self.could_be_from_charset:
|
||||
return "English"
|
||||
|
||||
# doing it there to avoid circular import
|
||||
from charset_normalizer.cd import encoding_languages, mb_encoding_languages
|
||||
|
||||
languages = (
|
||||
mb_encoding_languages(self.encoding)
|
||||
if is_multi_byte_encoding(self.encoding)
|
||||
else encoding_languages(self.encoding)
|
||||
)
|
||||
|
||||
if len(languages) == 0 or "Latin Based" in languages:
|
||||
return "Unknown"
|
||||
|
||||
return languages[0]
|
||||
|
||||
return self._languages[0][0]
|
||||
|
||||
@property
|
||||
def chaos(self) -> float:
|
||||
return self._mean_mess_ratio
|
||||
|
||||
@property
|
||||
def coherence(self) -> float:
|
||||
if not self._languages:
|
||||
return 0.0
|
||||
return self._languages[0][1]
|
||||
|
||||
@property
|
||||
def percent_chaos(self) -> float:
|
||||
return round(self.chaos * 100, ndigits=3)
|
||||
|
||||
@property
|
||||
def percent_coherence(self) -> float:
|
||||
return round(self.coherence * 100, ndigits=3)
|
||||
|
||||
@property
|
||||
def raw(self) -> bytes:
|
||||
"""
|
||||
Original untouched bytes.
|
||||
"""
|
||||
return self._payload
|
||||
|
||||
@property
|
||||
def submatch(self) -> List["CharsetMatch"]:
|
||||
return self._leaves
|
||||
|
||||
@property
|
||||
def has_submatch(self) -> bool:
|
||||
return len(self._leaves) > 0
|
||||
|
||||
@property
|
||||
def alphabets(self) -> List[str]:
|
||||
if self._unicode_ranges is not None:
|
||||
return self._unicode_ranges
|
||||
# list detected ranges
|
||||
detected_ranges: List[Optional[str]] = [
|
||||
unicode_range(char) for char in str(self)
|
||||
]
|
||||
# filter and sort
|
||||
self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
|
||||
return self._unicode_ranges
|
||||
|
||||
@property
|
||||
def could_be_from_charset(self) -> List[str]:
|
||||
"""
|
||||
The complete list of encoding that output the exact SAME str result and therefore could be the originating
|
||||
encoding.
|
||||
This list does include the encoding available in property 'encoding'.
|
||||
"""
|
||||
return [self._encoding] + [m.encoding for m in self._leaves]
|
||||
|
||||
def output(self, encoding: str = "utf_8") -> bytes:
|
||||
"""
|
||||
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
|
||||
Any errors will be simply ignored by the encoder NOT replaced.
|
||||
"""
|
||||
if self._output_encoding is None or self._output_encoding != encoding:
|
||||
self._output_encoding = encoding
|
||||
decoded_string = str(self)
|
||||
if (
|
||||
self._preemptive_declaration is not None
|
||||
and self._preemptive_declaration.lower()
|
||||
not in ["utf-8", "utf8", "utf_8"]
|
||||
):
|
||||
patched_header = sub(
|
||||
RE_POSSIBLE_ENCODING_INDICATION,
|
||||
lambda m: m.string[m.span()[0] : m.span()[1]].replace(
|
||||
m.groups()[0], iana_name(self._output_encoding) # type: ignore[arg-type]
|
||||
),
|
||||
decoded_string[:8192],
|
||||
1,
|
||||
)
|
||||
|
||||
decoded_string = patched_header + decoded_string[8192:]
|
||||
|
||||
self._output_payload = decoded_string.encode(encoding, "replace")
|
||||
|
||||
return self._output_payload # type: ignore
|
||||
|
||||
@property
|
||||
def fingerprint(self) -> str:
|
||||
"""
|
||||
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
|
||||
"""
|
||||
return sha256(self.output()).hexdigest()
|
||||
|
||||
|
||||
class CharsetMatches:
|
||||
"""
|
||||
Container with every CharsetMatch items ordered by default from most probable to the less one.
|
||||
Act like a list(iterable) but does not implements all related methods.
|
||||
"""
|
||||
|
||||
def __init__(self, results: Optional[List[CharsetMatch]] = None):
|
||||
self._results: List[CharsetMatch] = sorted(results) if results else []
|
||||
|
||||
def __iter__(self) -> Iterator[CharsetMatch]:
|
||||
yield from self._results
|
||||
|
||||
def __getitem__(self, item: Union[int, str]) -> CharsetMatch:
|
||||
"""
|
||||
Retrieve a single item either by its position or encoding name (alias may be used here).
|
||||
Raise KeyError upon invalid index or encoding not present in results.
|
||||
"""
|
||||
if isinstance(item, int):
|
||||
return self._results[item]
|
||||
if isinstance(item, str):
|
||||
item = iana_name(item, False)
|
||||
for result in self._results:
|
||||
if item in result.could_be_from_charset:
|
||||
return result
|
||||
raise KeyError
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._results)
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return len(self._results) > 0
|
||||
|
||||
def append(self, item: CharsetMatch) -> None:
|
||||
"""
|
||||
Insert a single match. Will be inserted accordingly to preserve sort.
|
||||
Can be inserted as a submatch.
|
||||
"""
|
||||
if not isinstance(item, CharsetMatch):
|
||||
raise ValueError(
|
||||
"Cannot append instance '{}' to CharsetMatches".format(
|
||||
str(item.__class__)
|
||||
)
|
||||
)
|
||||
# We should disable the submatch factoring when the input file is too heavy (conserve RAM usage)
|
||||
if len(item.raw) < TOO_BIG_SEQUENCE:
|
||||
for match in self._results:
|
||||
if match.fingerprint == item.fingerprint and match.chaos == item.chaos:
|
||||
match.add_submatch(item)
|
||||
return
|
||||
self._results.append(item)
|
||||
self._results = sorted(self._results)
|
||||
|
||||
def best(self) -> Optional["CharsetMatch"]:
|
||||
"""
|
||||
Simply return the first match. Strict equivalent to matches[0].
|
||||
"""
|
||||
if not self._results:
|
||||
return None
|
||||
return self._results[0]
|
||||
|
||||
def first(self) -> Optional["CharsetMatch"]:
|
||||
"""
|
||||
Redundant method, call the method best(). Kept for BC reasons.
|
||||
"""
|
||||
return self.best()
|
||||
|
||||
|
||||
CoherenceMatch = Tuple[str, float]
|
||||
CoherenceMatches = List[CoherenceMatch]
|
||||
|
||||
|
||||
class CliDetectionResult:
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
encoding: Optional[str],
|
||||
encoding_aliases: List[str],
|
||||
alternative_encodings: List[str],
|
||||
language: str,
|
||||
alphabets: List[str],
|
||||
has_sig_or_bom: bool,
|
||||
chaos: float,
|
||||
coherence: float,
|
||||
unicode_path: Optional[str],
|
||||
is_preferred: bool,
|
||||
):
|
||||
self.path: str = path
|
||||
self.unicode_path: Optional[str] = unicode_path
|
||||
self.encoding: Optional[str] = encoding
|
||||
self.encoding_aliases: List[str] = encoding_aliases
|
||||
self.alternative_encodings: List[str] = alternative_encodings
|
||||
self.language: str = language
|
||||
self.alphabets: List[str] = alphabets
|
||||
self.has_sig_or_bom: bool = has_sig_or_bom
|
||||
self.chaos: float = chaos
|
||||
self.coherence: float = coherence
|
||||
self.is_preferred: bool = is_preferred
|
||||
|
||||
@property
|
||||
def __dict__(self) -> Dict[str, Any]: # type: ignore
|
||||
return {
|
||||
"path": self.path,
|
||||
"encoding": self.encoding,
|
||||
"encoding_aliases": self.encoding_aliases,
|
||||
"alternative_encodings": self.alternative_encodings,
|
||||
"language": self.language,
|
||||
"alphabets": self.alphabets,
|
||||
"has_sig_or_bom": self.has_sig_or_bom,
|
||||
"chaos": self.chaos,
|
||||
"coherence": self.coherence,
|
||||
"unicode_path": self.unicode_path,
|
||||
"is_preferred": self.is_preferred,
|
||||
}
|
||||
|
||||
def to_json(self) -> str:
|
||||
return dumps(self.__dict__, ensure_ascii=True, indent=4)
|
0
env/lib/python3.12/site-packages/charset_normalizer/py.typed
vendored
Normal file
0
env/lib/python3.12/site-packages/charset_normalizer/py.typed
vendored
Normal file
421
env/lib/python3.12/site-packages/charset_normalizer/utils.py
vendored
Normal file
421
env/lib/python3.12/site-packages/charset_normalizer/utils.py
vendored
Normal file
@ -0,0 +1,421 @@
|
||||
import importlib
|
||||
import logging
|
||||
import unicodedata
|
||||
from codecs import IncrementalDecoder
|
||||
from encodings.aliases import aliases
|
||||
from functools import lru_cache
|
||||
from re import findall
|
||||
from typing import Generator, List, Optional, Set, Tuple, Union
|
||||
|
||||
from _multibytecodec import MultibyteIncrementalDecoder
|
||||
|
||||
from .constant import (
|
||||
ENCODING_MARKS,
|
||||
IANA_SUPPORTED_SIMILAR,
|
||||
RE_POSSIBLE_ENCODING_INDICATION,
|
||||
UNICODE_RANGES_COMBINED,
|
||||
UNICODE_SECONDARY_RANGE_KEYWORD,
|
||||
UTF8_MAXIMAL_ALLOCATION,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_accentuated(character: str) -> bool:
|
||||
try:
|
||||
description: str = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
return (
|
||||
"WITH GRAVE" in description
|
||||
or "WITH ACUTE" in description
|
||||
or "WITH CEDILLA" in description
|
||||
or "WITH DIAERESIS" in description
|
||||
or "WITH CIRCUMFLEX" in description
|
||||
or "WITH TILDE" in description
|
||||
or "WITH MACRON" in description
|
||||
or "WITH RING ABOVE" in description
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def remove_accent(character: str) -> str:
|
||||
decomposed: str = unicodedata.decomposition(character)
|
||||
if not decomposed:
|
||||
return character
|
||||
|
||||
codes: List[str] = decomposed.split(" ")
|
||||
|
||||
return chr(int(codes[0], 16))
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def unicode_range(character: str) -> Optional[str]:
|
||||
"""
|
||||
Retrieve the Unicode range official name from a single character.
|
||||
"""
|
||||
character_ord: int = ord(character)
|
||||
|
||||
for range_name, ord_range in UNICODE_RANGES_COMBINED.items():
|
||||
if character_ord in ord_range:
|
||||
return range_name
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_latin(character: str) -> bool:
|
||||
try:
|
||||
description: str = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
return "LATIN" in description
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_punctuation(character: str) -> bool:
|
||||
character_category: str = unicodedata.category(character)
|
||||
|
||||
if "P" in character_category:
|
||||
return True
|
||||
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
return False
|
||||
|
||||
return "Punctuation" in character_range
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_symbol(character: str) -> bool:
|
||||
character_category: str = unicodedata.category(character)
|
||||
|
||||
if "S" in character_category or "N" in character_category:
|
||||
return True
|
||||
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
return False
|
||||
|
||||
return "Forms" in character_range and character_category != "Lo"
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_emoticon(character: str) -> bool:
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
return False
|
||||
|
||||
return "Emoticons" in character_range or "Pictographs" in character_range
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_separator(character: str) -> bool:
|
||||
if character.isspace() or character in {"|", "+", "<", ">"}:
|
||||
return True
|
||||
|
||||
character_category: str = unicodedata.category(character)
|
||||
|
||||
return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_case_variable(character: str) -> bool:
|
||||
return character.islower() != character.isupper()
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_cjk(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "CJK" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_hiragana(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "HIRAGANA" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_katakana(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "KATAKANA" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_hangul(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "HANGUL" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_thai(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "THAI" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_arabic(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "ARABIC" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_arabic_isolated_form(character: str) -> bool:
|
||||
try:
|
||||
character_name = unicodedata.name(character)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return "ARABIC" in character_name and "ISOLATED FORM" in character_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
|
||||
def is_unicode_range_secondary(range_name: str) -> bool:
|
||||
return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
|
||||
|
||||
|
||||
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
|
||||
def is_unprintable(character: str) -> bool:
|
||||
return (
|
||||
character.isspace() is False # includes \n \t \r \v
|
||||
and character.isprintable() is False
|
||||
and character != "\x1A" # Why? Its the ASCII substitute character.
|
||||
and character != "\ufeff" # bug discovered in Python,
|
||||
# Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space.
|
||||
)
|
||||
|
||||
|
||||
def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> Optional[str]:
|
||||
"""
|
||||
Extract using ASCII-only decoder any specified encoding in the first n-bytes.
|
||||
"""
|
||||
if not isinstance(sequence, bytes):
|
||||
raise TypeError
|
||||
|
||||
seq_len: int = len(sequence)
|
||||
|
||||
results: List[str] = findall(
|
||||
RE_POSSIBLE_ENCODING_INDICATION,
|
||||
sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"),
|
||||
)
|
||||
|
||||
if len(results) == 0:
|
||||
return None
|
||||
|
||||
for specified_encoding in results:
|
||||
specified_encoding = specified_encoding.lower().replace("-", "_")
|
||||
|
||||
encoding_alias: str
|
||||
encoding_iana: str
|
||||
|
||||
for encoding_alias, encoding_iana in aliases.items():
|
||||
if encoding_alias == specified_encoding:
|
||||
return encoding_iana
|
||||
if encoding_iana == specified_encoding:
|
||||
return encoding_iana
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def is_multi_byte_encoding(name: str) -> bool:
|
||||
"""
|
||||
Verify is a specific encoding is a multi byte one based on it IANA name
|
||||
"""
|
||||
return name in {
|
||||
"utf_8",
|
||||
"utf_8_sig",
|
||||
"utf_16",
|
||||
"utf_16_be",
|
||||
"utf_16_le",
|
||||
"utf_32",
|
||||
"utf_32_le",
|
||||
"utf_32_be",
|
||||
"utf_7",
|
||||
} or issubclass(
|
||||
importlib.import_module("encodings.{}".format(name)).IncrementalDecoder,
|
||||
MultibyteIncrementalDecoder,
|
||||
)
|
||||
|
||||
|
||||
def identify_sig_or_bom(sequence: bytes) -> Tuple[Optional[str], bytes]:
|
||||
"""
|
||||
Identify and extract SIG/BOM in given sequence.
|
||||
"""
|
||||
|
||||
for iana_encoding in ENCODING_MARKS:
|
||||
marks: Union[bytes, List[bytes]] = ENCODING_MARKS[iana_encoding]
|
||||
|
||||
if isinstance(marks, bytes):
|
||||
marks = [marks]
|
||||
|
||||
for mark in marks:
|
||||
if sequence.startswith(mark):
|
||||
return iana_encoding, mark
|
||||
|
||||
return None, b""
|
||||
|
||||
|
||||
def should_strip_sig_or_bom(iana_encoding: str) -> bool:
|
||||
return iana_encoding not in {"utf_16", "utf_32"}
|
||||
|
||||
|
||||
def iana_name(cp_name: str, strict: bool = True) -> str:
|
||||
cp_name = cp_name.lower().replace("-", "_")
|
||||
|
||||
encoding_alias: str
|
||||
encoding_iana: str
|
||||
|
||||
for encoding_alias, encoding_iana in aliases.items():
|
||||
if cp_name in [encoding_alias, encoding_iana]:
|
||||
return encoding_iana
|
||||
|
||||
if strict:
|
||||
raise ValueError("Unable to retrieve IANA for '{}'".format(cp_name))
|
||||
|
||||
return cp_name
|
||||
|
||||
|
||||
def range_scan(decoded_sequence: str) -> List[str]:
|
||||
ranges: Set[str] = set()
|
||||
|
||||
for character in decoded_sequence:
|
||||
character_range: Optional[str] = unicode_range(character)
|
||||
|
||||
if character_range is None:
|
||||
continue
|
||||
|
||||
ranges.add(character_range)
|
||||
|
||||
return list(ranges)
|
||||
|
||||
|
||||
def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
|
||||
if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
|
||||
return 0.0
|
||||
|
||||
decoder_a = importlib.import_module(
|
||||
"encodings.{}".format(iana_name_a)
|
||||
).IncrementalDecoder
|
||||
decoder_b = importlib.import_module(
|
||||
"encodings.{}".format(iana_name_b)
|
||||
).IncrementalDecoder
|
||||
|
||||
id_a: IncrementalDecoder = decoder_a(errors="ignore")
|
||||
id_b: IncrementalDecoder = decoder_b(errors="ignore")
|
||||
|
||||
character_match_count: int = 0
|
||||
|
||||
for i in range(255):
|
||||
to_be_decoded: bytes = bytes([i])
|
||||
if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded):
|
||||
character_match_count += 1
|
||||
|
||||
return character_match_count / 254
|
||||
|
||||
|
||||
def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool:
|
||||
"""
|
||||
Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
|
||||
the function cp_similarity.
|
||||
"""
|
||||
return (
|
||||
iana_name_a in IANA_SUPPORTED_SIMILAR
|
||||
and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a]
|
||||
)
|
||||
|
||||
|
||||
def set_logging_handler(
|
||||
name: str = "charset_normalizer",
|
||||
level: int = logging.INFO,
|
||||
format_string: str = "%(asctime)s | %(levelname)s | %(message)s",
|
||||
) -> None:
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(level)
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter(format_string))
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def cut_sequence_chunks(
|
||||
sequences: bytes,
|
||||
encoding_iana: str,
|
||||
offsets: range,
|
||||
chunk_size: int,
|
||||
bom_or_sig_available: bool,
|
||||
strip_sig_or_bom: bool,
|
||||
sig_payload: bytes,
|
||||
is_multi_byte_decoder: bool,
|
||||
decoded_payload: Optional[str] = None,
|
||||
) -> Generator[str, None, None]:
|
||||
if decoded_payload and is_multi_byte_decoder is False:
|
||||
for i in offsets:
|
||||
chunk = decoded_payload[i : i + chunk_size]
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
else:
|
||||
for i in offsets:
|
||||
chunk_end = i + chunk_size
|
||||
if chunk_end > len(sequences) + 8:
|
||||
continue
|
||||
|
||||
cut_sequence = sequences[i : i + chunk_size]
|
||||
|
||||
if bom_or_sig_available and strip_sig_or_bom is False:
|
||||
cut_sequence = sig_payload + cut_sequence
|
||||
|
||||
chunk = cut_sequence.decode(
|
||||
encoding_iana,
|
||||
errors="ignore" if is_multi_byte_decoder else "strict",
|
||||
)
|
||||
|
||||
# multi-byte bad cutting detector and adjustment
|
||||
# not the cleanest way to perform that fix but clever enough for now.
|
||||
if is_multi_byte_decoder and i > 0:
|
||||
chunk_partial_size_chk: int = min(chunk_size, 16)
|
||||
|
||||
if (
|
||||
decoded_payload
|
||||
and chunk[:chunk_partial_size_chk] not in decoded_payload
|
||||
):
|
||||
for j in range(i, i - 4, -1):
|
||||
cut_sequence = sequences[j:chunk_end]
|
||||
|
||||
if bom_or_sig_available and strip_sig_or_bom is False:
|
||||
cut_sequence = sig_payload + cut_sequence
|
||||
|
||||
chunk = cut_sequence.decode(encoding_iana, errors="ignore")
|
||||
|
||||
if chunk[:chunk_partial_size_chk] in decoded_payload:
|
||||
break
|
||||
|
||||
yield chunk
|
6
env/lib/python3.12/site-packages/charset_normalizer/version.py
vendored
Normal file
6
env/lib/python3.12/site-packages/charset_normalizer/version.py
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
"""
|
||||
Expose version
|
||||
"""
|
||||
|
||||
__version__ = "3.4.0"
|
||||
VERSION = __version__.split(".")
|
47
env/lib/python3.12/site-packages/gitlab/__init__.py
vendored
Normal file
47
env/lib/python3.12/site-packages/gitlab/__init__.py
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2013-2019 Gauvain Pocentek, 2019-2023 python-gitlab team
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Wrapper for the GitLab API."""
|
||||
|
||||
import warnings
|
||||
|
||||
import gitlab.config # noqa: F401
|
||||
from gitlab._version import ( # noqa: F401
|
||||
__author__,
|
||||
__copyright__,
|
||||
__email__,
|
||||
__license__,
|
||||
__title__,
|
||||
__version__,
|
||||
)
|
||||
from gitlab.client import Gitlab, GitlabList, GraphQL # noqa: F401
|
||||
from gitlab.exceptions import * # noqa: F401,F403
|
||||
|
||||
warnings.filterwarnings("default", category=DeprecationWarning, module="^gitlab")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"__author__",
|
||||
"__copyright__",
|
||||
"__email__",
|
||||
"__license__",
|
||||
"__title__",
|
||||
"__version__",
|
||||
"Gitlab",
|
||||
"GitlabList",
|
||||
"GraphQL",
|
||||
]
|
||||
__all__.extend(gitlab.exceptions.__all__)
|
4
env/lib/python3.12/site-packages/gitlab/__main__.py
vendored
Normal file
4
env/lib/python3.12/site-packages/gitlab/__main__.py
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
import gitlab.cli
|
||||
|
||||
if __name__ == "__main__":
|
||||
gitlab.cli.main()
|
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/__main__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/_version.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/_version.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/base.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/base.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/cli.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/cli.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/client.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/client.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/config.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/config.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/const.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/const.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/exceptions.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/exceptions.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/mixins.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/mixins.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/types.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/types.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/utils.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/__pycache__/utils.cpython-312.pyc
vendored
Normal file
Binary file not shown.
22
env/lib/python3.12/site-packages/gitlab/_backends/__init__.py
vendored
Normal file
22
env/lib/python3.12/site-packages/gitlab/_backends/__init__.py
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
"""
|
||||
Defines http backends for processing http requests
|
||||
"""
|
||||
|
||||
from .requests_backend import (
|
||||
JobTokenAuth,
|
||||
OAuthTokenAuth,
|
||||
PrivateTokenAuth,
|
||||
RequestsBackend,
|
||||
RequestsResponse,
|
||||
)
|
||||
|
||||
DefaultBackend = RequestsBackend
|
||||
DefaultResponse = RequestsResponse
|
||||
|
||||
__all__ = [
|
||||
"DefaultBackend",
|
||||
"DefaultResponse",
|
||||
"JobTokenAuth",
|
||||
"OAuthTokenAuth",
|
||||
"PrivateTokenAuth",
|
||||
]
|
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/__init__.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/graphql.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/graphql.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/protocol.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/protocol.cpython-312.pyc
vendored
Normal file
Binary file not shown.
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/requests_backend.cpython-312.pyc
vendored
Normal file
BIN
env/lib/python3.12/site-packages/gitlab/_backends/__pycache__/requests_backend.cpython-312.pyc
vendored
Normal file
Binary file not shown.
24
env/lib/python3.12/site-packages/gitlab/_backends/graphql.py
vendored
Normal file
24
env/lib/python3.12/site-packages/gitlab/_backends/graphql.py
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from gql.transport.httpx import HTTPXTransport
|
||||
|
||||
|
||||
class GitlabTransport(HTTPXTransport):
|
||||
"""A gql httpx transport that reuses an existing httpx.Client.
|
||||
By default, gql's transports do not have a keep-alive session
|
||||
and do not enable providing your own session that's kept open.
|
||||
This transport lets us provide and close our session on our own
|
||||
and provide additional auth.
|
||||
For details, see https://github.com/graphql-python/gql/issues/91.
|
||||
"""
|
||||
|
||||
def __init__(self, *args: Any, client: httpx.Client, **kwargs: Any):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.client = client
|
||||
|
||||
def connect(self) -> None:
|
||||
pass
|
||||
|
||||
def close(self) -> None:
|
||||
pass
|
32
env/lib/python3.12/site-packages/gitlab/_backends/protocol.py
vendored
Normal file
32
env/lib/python3.12/site-packages/gitlab/_backends/protocol.py
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
import abc
|
||||
import sys
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
import requests
|
||||
from requests_toolbelt.multipart.encoder import MultipartEncoder # type: ignore
|
||||
|
||||
if sys.version_info >= (3, 8):
|
||||
from typing import Protocol
|
||||
else:
|
||||
from typing_extensions import Protocol
|
||||
|
||||
|
||||
class BackendResponse(Protocol):
|
||||
@abc.abstractmethod
|
||||
def __init__(self, response: requests.Response) -> None: ...
|
||||
|
||||
|
||||
class Backend(Protocol):
|
||||
@abc.abstractmethod
|
||||
def http_request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
json: Optional[Union[Dict[str, Any], bytes]],
|
||||
data: Optional[Union[Dict[str, Any], MultipartEncoder]],
|
||||
params: Optional[Any],
|
||||
timeout: Optional[float],
|
||||
verify: Optional[Union[bool, str]],
|
||||
stream: Optional[bool],
|
||||
**kwargs: Any,
|
||||
) -> BackendResponse: ...
|
168
env/lib/python3.12/site-packages/gitlab/_backends/requests_backend.py
vendored
Normal file
168
env/lib/python3.12/site-packages/gitlab/_backends/requests_backend.py
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
from typing import Any, BinaryIO, Dict, Optional, TYPE_CHECKING, Union
|
||||
|
||||
import requests
|
||||
from requests import PreparedRequest
|
||||
from requests.auth import AuthBase
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from requests_toolbelt.multipart.encoder import MultipartEncoder # type: ignore
|
||||
|
||||
from . import protocol
|
||||
|
||||
|
||||
class TokenAuth:
|
||||
def __init__(self, token: str):
|
||||
self.token = token
|
||||
|
||||
|
||||
class OAuthTokenAuth(TokenAuth, AuthBase):
|
||||
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
||||
r.headers["Authorization"] = f"Bearer {self.token}"
|
||||
r.headers.pop("PRIVATE-TOKEN", None)
|
||||
r.headers.pop("JOB-TOKEN", None)
|
||||
return r
|
||||
|
||||
|
||||
class PrivateTokenAuth(TokenAuth, AuthBase):
|
||||
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
||||
r.headers["PRIVATE-TOKEN"] = self.token
|
||||
r.headers.pop("JOB-TOKEN", None)
|
||||
r.headers.pop("Authorization", None)
|
||||
return r
|
||||
|
||||
|
||||
class JobTokenAuth(TokenAuth, AuthBase):
|
||||
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
||||
r.headers["JOB-TOKEN"] = self.token
|
||||
r.headers.pop("PRIVATE-TOKEN", None)
|
||||
r.headers.pop("Authorization", None)
|
||||
return r
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SendData:
|
||||
content_type: str
|
||||
data: Optional[Union[Dict[str, Any], MultipartEncoder]] = None
|
||||
json: Optional[Union[Dict[str, Any], bytes]] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.json is not None and self.data is not None:
|
||||
raise ValueError(
|
||||
f"`json` and `data` are mutually exclusive. Only one can be set. "
|
||||
f"json={self.json!r} data={self.data!r}"
|
||||
)
|
||||
|
||||
|
||||
class RequestsResponse(protocol.BackendResponse):
|
||||
def __init__(self, response: requests.Response) -> None:
|
||||
self._response: requests.Response = response
|
||||
|
||||
@property
|
||||
def response(self) -> requests.Response:
|
||||
return self._response
|
||||
|
||||
@property
|
||||
def status_code(self) -> int:
|
||||
return self._response.status_code
|
||||
|
||||
@property
|
||||
def headers(self) -> CaseInsensitiveDict[str]:
|
||||
return self._response.headers
|
||||
|
||||
@property
|
||||
def content(self) -> bytes:
|
||||
return self._response.content
|
||||
|
||||
@property
|
||||
def reason(self) -> str:
|
||||
return self._response.reason
|
||||
|
||||
def json(self) -> Any:
|
||||
return self._response.json()
|
||||
|
||||
|
||||
class RequestsBackend(protocol.Backend):
|
||||
def __init__(self, session: Optional[requests.Session] = None) -> None:
|
||||
self._client: requests.Session = session or requests.Session()
|
||||
|
||||
@property
|
||||
def client(self) -> requests.Session:
|
||||
return self._client
|
||||
|
||||
@staticmethod
|
||||
def prepare_send_data(
|
||||
files: Optional[Dict[str, Any]] = None,
|
||||
post_data: Optional[Union[Dict[str, Any], bytes, BinaryIO]] = None,
|
||||
raw: bool = False,
|
||||
) -> SendData:
|
||||
if files:
|
||||
if post_data is None:
|
||||
post_data = {}
|
||||
else:
|
||||
# When creating a `MultipartEncoder` instance with data-types
|
||||
# which don't have an `encode` method it will cause an error:
|
||||
# object has no attribute 'encode'
|
||||
# So convert common non-string types into strings.
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(post_data, dict)
|
||||
for k, v in post_data.items():
|
||||
if isinstance(v, bool):
|
||||
v = int(v)
|
||||
if isinstance(v, (complex, float, int)):
|
||||
post_data[k] = str(v)
|
||||
post_data["file"] = files.get("file")
|
||||
post_data["avatar"] = files.get("avatar")
|
||||
|
||||
data = MultipartEncoder(fields=post_data)
|
||||
return SendData(data=data, content_type=data.content_type)
|
||||
|
||||
if raw and post_data:
|
||||
return SendData(data=post_data, content_type="application/octet-stream")
|
||||
|
||||
if TYPE_CHECKING:
|
||||
assert not isinstance(post_data, BinaryIO)
|
||||
|
||||
return SendData(json=post_data, content_type="application/json")
|
||||
|
||||
def http_request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
json: Optional[Union[Dict[str, Any], bytes]] = None,
|
||||
data: Optional[Union[Dict[str, Any], MultipartEncoder]] = None,
|
||||
params: Optional[Any] = None,
|
||||
timeout: Optional[float] = None,
|
||||
verify: Optional[Union[bool, str]] = True,
|
||||
stream: Optional[bool] = False,
|
||||
**kwargs: Any,
|
||||
) -> RequestsResponse:
|
||||
"""Make HTTP request
|
||||
|
||||
Args:
|
||||
method: The HTTP method to call ('get', 'post', 'put', 'delete', etc.)
|
||||
url: The full URL
|
||||
data: The data to send to the server in the body of the request
|
||||
json: Data to send in the body in json by default
|
||||
timeout: The timeout, in seconds, for the request
|
||||
verify: Whether SSL certificates should be validated. If
|
||||
the value is a string, it is the path to a CA file used for
|
||||
certificate validation.
|
||||
stream: Whether the data should be streamed
|
||||
|
||||
Returns:
|
||||
A requests Response object.
|
||||
"""
|
||||
response: requests.Response = self._client.request(
|
||||
method=method,
|
||||
url=url,
|
||||
params=params,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
stream=stream,
|
||||
verify=verify,
|
||||
json=json,
|
||||
**kwargs,
|
||||
)
|
||||
return RequestsResponse(response=response)
|
6
env/lib/python3.12/site-packages/gitlab/_version.py
vendored
Normal file
6
env/lib/python3.12/site-packages/gitlab/_version.py
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
__author__ = "Gauvain Pocentek, python-gitlab team"
|
||||
__copyright__ = "Copyright 2013-2019 Gauvain Pocentek, 2019-2023 python-gitlab team"
|
||||
__email__ = "gauvainpocentek@gmail.com"
|
||||
__license__ = "LGPL3"
|
||||
__title__ = "python-gitlab"
|
||||
__version__ = "5.1.0"
|
394
env/lib/python3.12/site-packages/gitlab/base.py
vendored
Normal file
394
env/lib/python3.12/site-packages/gitlab/base.py
vendored
Normal file
@ -0,0 +1,394 @@
|
||||
import copy
|
||||
import importlib
|
||||
import json
|
||||
import pprint
|
||||
import textwrap
|
||||
from types import ModuleType
|
||||
from typing import Any, Dict, Iterable, Optional, Type, TYPE_CHECKING, Union
|
||||
|
||||
import gitlab
|
||||
from gitlab import types as g_types
|
||||
from gitlab.exceptions import GitlabParsingError
|
||||
|
||||
from .client import Gitlab, GitlabList
|
||||
|
||||
__all__ = [
|
||||
"RESTObject",
|
||||
"RESTObjectList",
|
||||
"RESTManager",
|
||||
]
|
||||
|
||||
|
||||
_URL_ATTRIBUTE_ERROR = (
|
||||
f"https://python-gitlab.readthedocs.io/en/v{gitlab.__version__}/"
|
||||
f"faq.html#attribute-error-list"
|
||||
)
|
||||
|
||||
|
||||
class RESTObject:
|
||||
"""Represents an object built from server data.
|
||||
|
||||
It holds the attributes know from the server, and the updated attributes in
|
||||
another. This allows smart updates, if the object allows it.
|
||||
|
||||
You can redefine ``_id_attr`` in child classes to specify which attribute
|
||||
must be used as the unique ID. ``None`` means that the object can be updated
|
||||
without ID in the url.
|
||||
|
||||
Likewise, you can define a ``_repr_attr`` in subclasses to specify which
|
||||
attribute should be added as a human-readable identifier when called in the
|
||||
object's ``__repr__()`` method.
|
||||
"""
|
||||
|
||||
_id_attr: Optional[str] = "id"
|
||||
_attrs: Dict[str, Any]
|
||||
_created_from_list: bool # Indicates if object was created from a list() action
|
||||
_module: ModuleType
|
||||
_parent_attrs: Dict[str, Any]
|
||||
_repr_attr: Optional[str] = None
|
||||
_updated_attrs: Dict[str, Any]
|
||||
_lazy: bool
|
||||
manager: "RESTManager"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
manager: "RESTManager",
|
||||
attrs: Dict[str, Any],
|
||||
*,
|
||||
created_from_list: bool = False,
|
||||
lazy: bool = False,
|
||||
) -> None:
|
||||
if not isinstance(attrs, dict):
|
||||
raise GitlabParsingError(
|
||||
f"Attempted to initialize RESTObject with a non-dictionary value: "
|
||||
f"{attrs!r}\nThis likely indicates an incorrect or malformed server "
|
||||
f"response."
|
||||
)
|
||||
self.__dict__.update(
|
||||
{
|
||||
"manager": manager,
|
||||
"_attrs": attrs,
|
||||
"_updated_attrs": {},
|
||||
"_module": importlib.import_module(self.__module__),
|
||||
"_created_from_list": created_from_list,
|
||||
"_lazy": lazy,
|
||||
}
|
||||
)
|
||||
self.__dict__["_parent_attrs"] = self.manager.parent_attrs
|
||||
self._create_managers()
|
||||
|
||||
def __getstate__(self) -> Dict[str, Any]:
|
||||
state = self.__dict__.copy()
|
||||
module = state.pop("_module")
|
||||
state["_module_name"] = module.__name__
|
||||
return state
|
||||
|
||||
def __setstate__(self, state: Dict[str, Any]) -> None:
|
||||
module_name = state.pop("_module_name")
|
||||
self.__dict__.update(state)
|
||||
self.__dict__["_module"] = importlib.import_module(module_name)
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
if name in self.__dict__["_updated_attrs"]:
|
||||
return self.__dict__["_updated_attrs"][name]
|
||||
|
||||
if name in self.__dict__["_attrs"]:
|
||||
value = self.__dict__["_attrs"][name]
|
||||
# If the value is a list, we copy it in the _updated_attrs dict
|
||||
# because we are not able to detect changes made on the object
|
||||
# (append, insert, pop, ...). Without forcing the attr
|
||||
# creation __setattr__ is never called, the list never ends up
|
||||
# in the _updated_attrs dict, and the update() and save()
|
||||
# method never push the new data to the server.
|
||||
# See https://github.com/python-gitlab/python-gitlab/issues/306
|
||||
#
|
||||
# note: _parent_attrs will only store simple values (int) so we
|
||||
# don't make this check in the next block.
|
||||
if isinstance(value, list):
|
||||
self.__dict__["_updated_attrs"][name] = value[:]
|
||||
return self.__dict__["_updated_attrs"][name]
|
||||
|
||||
return value
|
||||
|
||||
if name in self.__dict__["_parent_attrs"]:
|
||||
return self.__dict__["_parent_attrs"][name]
|
||||
|
||||
message = f"{type(self).__name__!r} object has no attribute {name!r}"
|
||||
if self._created_from_list:
|
||||
message = (
|
||||
f"{message}\n\n"
|
||||
+ textwrap.fill(
|
||||
f"{self.__class__!r} was created via a list() call and "
|
||||
f"only a subset of the data may be present. To ensure "
|
||||
f"all data is present get the object using a "
|
||||
f"get(object.id) call. For more details, see:"
|
||||
)
|
||||
+ f"\n\n{_URL_ATTRIBUTE_ERROR}"
|
||||
)
|
||||
elif self._lazy:
|
||||
message = f"{message}\n\n" + textwrap.fill(
|
||||
f"If you tried to access object attributes returned from the server, "
|
||||
f"note that {self.__class__!r} was created as a `lazy` object and was "
|
||||
f"not initialized with any data."
|
||||
)
|
||||
raise AttributeError(message)
|
||||
|
||||
def __setattr__(self, name: str, value: Any) -> None:
|
||||
self.__dict__["_updated_attrs"][name] = value
|
||||
|
||||
def asdict(self, *, with_parent_attrs: bool = False) -> Dict[str, Any]:
|
||||
data = {}
|
||||
if with_parent_attrs:
|
||||
data.update(copy.deepcopy(self._parent_attrs))
|
||||
data.update(copy.deepcopy(self._attrs))
|
||||
data.update(copy.deepcopy(self._updated_attrs))
|
||||
return data
|
||||
|
||||
@property
|
||||
def attributes(self) -> Dict[str, Any]:
|
||||
return self.asdict(with_parent_attrs=True)
|
||||
|
||||
def to_json(self, *, with_parent_attrs: bool = False, **kwargs: Any) -> str:
|
||||
return json.dumps(self.asdict(with_parent_attrs=with_parent_attrs), **kwargs)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{type(self)} => {self.asdict()}"
|
||||
|
||||
def pformat(self) -> str:
|
||||
return f"{type(self)} => \n{pprint.pformat(self.asdict())}"
|
||||
|
||||
def pprint(self) -> None:
|
||||
print(self.pformat())
|
||||
|
||||
def __repr__(self) -> str:
|
||||
name = self.__class__.__name__
|
||||
|
||||
if (self._id_attr and self._repr_value) and (self._id_attr != self._repr_attr):
|
||||
return (
|
||||
f"<{name} {self._id_attr}:{self.get_id()} "
|
||||
f"{self._repr_attr}:{self._repr_value}>"
|
||||
)
|
||||
if self._id_attr:
|
||||
return f"<{name} {self._id_attr}:{self.get_id()}>"
|
||||
if self._repr_value:
|
||||
return f"<{name} {self._repr_attr}:{self._repr_value}>"
|
||||
|
||||
return f"<{name}>"
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, RESTObject):
|
||||
return NotImplemented
|
||||
if self.get_id() and other.get_id():
|
||||
return self.get_id() == other.get_id()
|
||||
return super() == other
|
||||
|
||||
def __ne__(self, other: object) -> bool:
|
||||
if not isinstance(other, RESTObject):
|
||||
return NotImplemented
|
||||
if self.get_id() and other.get_id():
|
||||
return self.get_id() != other.get_id()
|
||||
return super() != other
|
||||
|
||||
def __dir__(self) -> Iterable[str]:
|
||||
return set(self.attributes).union(super().__dir__())
|
||||
|
||||
def __hash__(self) -> int:
|
||||
if not self.get_id():
|
||||
return super().__hash__()
|
||||
return hash(self.get_id())
|
||||
|
||||
def _create_managers(self) -> None:
|
||||
# NOTE(jlvillal): We are creating our managers by looking at the class
|
||||
# annotations. If an attribute is annotated as being a *Manager type
|
||||
# then we create the manager and assign it to the attribute.
|
||||
for attr, annotation in sorted(self.__class__.__annotations__.items()):
|
||||
# We ignore creating a manager for the 'manager' attribute as that
|
||||
# is done in the self.__init__() method
|
||||
if attr in ("manager",):
|
||||
continue
|
||||
if not isinstance(annotation, (type, str)): # pragma: no cover
|
||||
continue
|
||||
if isinstance(annotation, type):
|
||||
cls_name = annotation.__name__
|
||||
else:
|
||||
cls_name = annotation
|
||||
# All *Manager classes are used except for the base "RESTManager" class
|
||||
if cls_name == "RESTManager" or not cls_name.endswith("Manager"):
|
||||
continue
|
||||
cls = getattr(self._module, cls_name)
|
||||
manager = cls(self.manager.gitlab, parent=self)
|
||||
# Since we have our own __setattr__ method, we can't use setattr()
|
||||
self.__dict__[attr] = manager
|
||||
|
||||
def _update_attrs(self, new_attrs: Dict[str, Any]) -> None:
|
||||
self.__dict__["_updated_attrs"] = {}
|
||||
self.__dict__["_attrs"] = new_attrs
|
||||
|
||||
def get_id(self) -> Optional[Union[int, str]]:
|
||||
"""Returns the id of the resource."""
|
||||
if self._id_attr is None or not hasattr(self, self._id_attr):
|
||||
return None
|
||||
id_val = getattr(self, self._id_attr)
|
||||
if TYPE_CHECKING:
|
||||
assert id_val is None or isinstance(id_val, (int, str))
|
||||
return id_val
|
||||
|
||||
@property
|
||||
def _repr_value(self) -> Optional[str]:
|
||||
"""Safely returns the human-readable resource name if present."""
|
||||
if self._repr_attr is None or not hasattr(self, self._repr_attr):
|
||||
return None
|
||||
repr_val = getattr(self, self._repr_attr)
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(repr_val, str)
|
||||
return repr_val
|
||||
|
||||
@property
|
||||
def encoded_id(self) -> Optional[Union[int, str]]:
|
||||
"""Ensure that the ID is url-encoded so that it can be safely used in a URL
|
||||
path"""
|
||||
obj_id = self.get_id()
|
||||
if isinstance(obj_id, str):
|
||||
obj_id = gitlab.utils.EncodedId(obj_id)
|
||||
return obj_id
|
||||
|
||||
|
||||
class RESTObjectList:
|
||||
"""Generator object representing a list of RESTObject's.
|
||||
|
||||
This generator uses the Gitlab pagination system to fetch new data when
|
||||
required.
|
||||
|
||||
Note: you should not instantiate such objects, they are returned by calls
|
||||
to RESTManager.list()
|
||||
|
||||
Args:
|
||||
manager: Manager to attach to the created objects
|
||||
obj_cls: Type of objects to create from the json data
|
||||
_list: A GitlabList object
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, manager: "RESTManager", obj_cls: Type[RESTObject], _list: GitlabList
|
||||
) -> None:
|
||||
"""Creates an objects list from a GitlabList.
|
||||
|
||||
You should not create objects of this type, but use managers list()
|
||||
methods instead.
|
||||
|
||||
Args:
|
||||
manager: the RESTManager to attach to the objects
|
||||
obj_cls: the class of the created objects
|
||||
_list: the GitlabList holding the data
|
||||
"""
|
||||
self.manager = manager
|
||||
self._obj_cls = obj_cls
|
||||
self._list = _list
|
||||
|
||||
def __iter__(self) -> "RESTObjectList":
|
||||
return self
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._list)
|
||||
|
||||
def __next__(self) -> RESTObject:
|
||||
return self.next()
|
||||
|
||||
def next(self) -> RESTObject:
|
||||
data = self._list.next()
|
||||
return self._obj_cls(self.manager, data, created_from_list=True)
|
||||
|
||||
@property
|
||||
def current_page(self) -> int:
|
||||
"""The current page number."""
|
||||
return self._list.current_page
|
||||
|
||||
@property
|
||||
def prev_page(self) -> Optional[int]:
|
||||
"""The previous page number.
|
||||
|
||||
If None, the current page is the first.
|
||||
"""
|
||||
return self._list.prev_page
|
||||
|
||||
@property
|
||||
def next_page(self) -> Optional[int]:
|
||||
"""The next page number.
|
||||
|
||||
If None, the current page is the last.
|
||||
"""
|
||||
return self._list.next_page
|
||||
|
||||
@property
|
||||
def per_page(self) -> Optional[int]:
|
||||
"""The number of items per page."""
|
||||
return self._list.per_page
|
||||
|
||||
@property
|
||||
def total_pages(self) -> Optional[int]:
|
||||
"""The total number of pages."""
|
||||
return self._list.total_pages
|
||||
|
||||
@property
|
||||
def total(self) -> Optional[int]:
|
||||
"""The total number of items."""
|
||||
return self._list.total
|
||||
|
||||
|
||||
class RESTManager:
|
||||
"""Base class for CRUD operations on objects.
|
||||
|
||||
Derived class must define ``_path`` and ``_obj_cls``.
|
||||
|
||||
``_path``: Base URL path on which requests will be sent (e.g. '/projects')
|
||||
``_obj_cls``: The class of objects that will be created
|
||||
"""
|
||||
|
||||
_create_attrs: g_types.RequiredOptional = g_types.RequiredOptional()
|
||||
_update_attrs: g_types.RequiredOptional = g_types.RequiredOptional()
|
||||
_path: Optional[str] = None
|
||||
_obj_cls: Optional[Type[RESTObject]] = None
|
||||
_from_parent_attrs: Dict[str, Any] = {}
|
||||
_types: Dict[str, Type[g_types.GitlabAttribute]] = {}
|
||||
|
||||
_computed_path: Optional[str]
|
||||
_parent: Optional[RESTObject]
|
||||
_parent_attrs: Dict[str, Any]
|
||||
gitlab: Gitlab
|
||||
|
||||
def __init__(self, gl: Gitlab, parent: Optional[RESTObject] = None) -> None:
|
||||
"""REST manager constructor.
|
||||
|
||||
Args:
|
||||
gl: :class:`~gitlab.Gitlab` connection to use to make requests.
|
||||
parent: REST object to which the manager is attached.
|
||||
"""
|
||||
self.gitlab = gl
|
||||
self._parent = parent # for nested managers
|
||||
self._computed_path = self._compute_path()
|
||||
|
||||
@property
|
||||
def parent_attrs(self) -> Optional[Dict[str, Any]]:
|
||||
return self._parent_attrs
|
||||
|
||||
def _compute_path(self, path: Optional[str] = None) -> Optional[str]:
|
||||
self._parent_attrs = {}
|
||||
if path is None:
|
||||
path = self._path
|
||||
if path is None:
|
||||
return None
|
||||
if self._parent is None or not self._from_parent_attrs:
|
||||
return path
|
||||
|
||||
data: Dict[str, Optional[gitlab.utils.EncodedId]] = {}
|
||||
for self_attr, parent_attr in self._from_parent_attrs.items():
|
||||
if not hasattr(self._parent, parent_attr):
|
||||
data[self_attr] = None
|
||||
continue
|
||||
data[self_attr] = gitlab.utils.EncodedId(getattr(self._parent, parent_attr))
|
||||
self._parent_attrs = data
|
||||
return path.format(**data)
|
||||
|
||||
@property
|
||||
def path(self) -> Optional[str]:
|
||||
return self._computed_path
|
420
env/lib/python3.12/site-packages/gitlab/cli.py
vendored
Normal file
420
env/lib/python3.12/site-packages/gitlab/cli.py
vendored
Normal file
@ -0,0 +1,420 @@
|
||||
import argparse
|
||||
import dataclasses
|
||||
import functools
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
from types import ModuleType
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
cast,
|
||||
Dict,
|
||||
NoReturn,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TYPE_CHECKING,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
import gitlab.config
|
||||
from gitlab.base import RESTObject
|
||||
|
||||
# This regex is based on:
|
||||
# https://github.com/jpvanhal/inflection/blob/master/inflection/__init__.py
|
||||
camel_upperlower_regex = re.compile(r"([A-Z]+)([A-Z][a-z])")
|
||||
camel_lowerupper_regex = re.compile(r"([a-z\d])([A-Z])")
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class CustomAction:
|
||||
required: Tuple[str, ...]
|
||||
optional: Tuple[str, ...]
|
||||
in_object: bool
|
||||
requires_id: bool # if the `_id_attr` value should be a required argument
|
||||
help: Optional[str] # help text for the custom action
|
||||
|
||||
|
||||
# custom_actions = {
|
||||
# cls: {
|
||||
# action: CustomAction,
|
||||
# },
|
||||
# }
|
||||
custom_actions: Dict[str, Dict[str, CustomAction]] = {}
|
||||
|
||||
|
||||
# For an explanation of how these type-hints work see:
|
||||
# https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
|
||||
#
|
||||
# The goal here is that functions which get decorated will retain their types.
|
||||
__F = TypeVar("__F", bound=Callable[..., Any])
|
||||
|
||||
|
||||
def register_custom_action(
|
||||
*,
|
||||
cls_names: Union[str, Tuple[str, ...]],
|
||||
required: Tuple[str, ...] = (),
|
||||
optional: Tuple[str, ...] = (),
|
||||
custom_action: Optional[str] = None,
|
||||
requires_id: bool = True, # if the `_id_attr` value should be a required argument
|
||||
help: Optional[str] = None, # help text for the action
|
||||
) -> Callable[[__F], __F]:
|
||||
def wrap(f: __F) -> __F:
|
||||
@functools.wraps(f)
|
||||
def wrapped_f(*args: Any, **kwargs: Any) -> Any:
|
||||
return f(*args, **kwargs)
|
||||
|
||||
# in_obj defines whether the method belongs to the obj or the manager
|
||||
in_obj = True
|
||||
if isinstance(cls_names, tuple):
|
||||
classes = cls_names
|
||||
else:
|
||||
classes = (cls_names,)
|
||||
|
||||
for cls_name in classes:
|
||||
final_name = cls_name
|
||||
if cls_name.endswith("Manager"):
|
||||
final_name = cls_name.replace("Manager", "")
|
||||
in_obj = False
|
||||
if final_name not in custom_actions:
|
||||
custom_actions[final_name] = {}
|
||||
|
||||
action = custom_action or f.__name__.replace("_", "-")
|
||||
custom_actions[final_name][action] = CustomAction(
|
||||
required=required,
|
||||
optional=optional,
|
||||
in_object=in_obj,
|
||||
requires_id=requires_id,
|
||||
help=help,
|
||||
)
|
||||
|
||||
return cast(__F, wrapped_f)
|
||||
|
||||
return wrap
|
||||
|
||||
|
||||
def die(msg: str, e: Optional[Exception] = None) -> NoReturn:
|
||||
if e:
|
||||
msg = f"{msg} ({e})"
|
||||
sys.stderr.write(f"{msg}\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def gitlab_resource_to_cls(
|
||||
gitlab_resource: str, namespace: ModuleType
|
||||
) -> Type[RESTObject]:
|
||||
classes = CaseInsensitiveDict(namespace.__dict__)
|
||||
lowercase_class = gitlab_resource.replace("-", "")
|
||||
class_type = classes[lowercase_class]
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(class_type, type)
|
||||
assert issubclass(class_type, RESTObject)
|
||||
return class_type
|
||||
|
||||
|
||||
def cls_to_gitlab_resource(cls: RESTObject) -> str:
|
||||
dasherized_uppercase = camel_upperlower_regex.sub(r"\1-\2", cls.__name__)
|
||||
dasherized_lowercase = camel_lowerupper_regex.sub(r"\1-\2", dasherized_uppercase)
|
||||
return dasherized_lowercase.lower()
|
||||
|
||||
|
||||
def _get_base_parser(add_help: bool = True) -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
add_help=add_help,
|
||||
description="GitLab API Command Line Interface",
|
||||
allow_abbrev=False,
|
||||
)
|
||||
parser.add_argument("--version", help="Display the version.", action="store_true")
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
"--fancy",
|
||||
help="Verbose mode (legacy format only) [env var: GITLAB_VERBOSE]",
|
||||
action="store_true",
|
||||
default=os.getenv("GITLAB_VERBOSE"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--debug",
|
||||
help="Debug mode (display HTTP requests) [env var: GITLAB_DEBUG]",
|
||||
action="store_true",
|
||||
default=os.getenv("GITLAB_DEBUG"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--config-file",
|
||||
action="append",
|
||||
help=(
|
||||
"Configuration file to use. Can be used multiple times. "
|
||||
"[env var: PYTHON_GITLAB_CFG]"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"-g",
|
||||
"--gitlab",
|
||||
help=(
|
||||
"Which configuration section should "
|
||||
"be used. If not defined, the default selection "
|
||||
"will be used."
|
||||
),
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
help="Output format (v4 only): json|legacy|yaml",
|
||||
required=False,
|
||||
choices=["json", "legacy", "yaml"],
|
||||
default="legacy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--fields",
|
||||
help=(
|
||||
"Fields to display in the output (comma "
|
||||
"separated). Not used with legacy output"
|
||||
),
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--server-url",
|
||||
help=("GitLab server URL [env var: GITLAB_URL]"),
|
||||
required=False,
|
||||
default=os.getenv("GITLAB_URL"),
|
||||
)
|
||||
|
||||
ssl_verify_group = parser.add_mutually_exclusive_group()
|
||||
ssl_verify_group.add_argument(
|
||||
"--ssl-verify",
|
||||
help=(
|
||||
"Path to a CA_BUNDLE file or directory with certificates of trusted CAs. "
|
||||
"[env var: GITLAB_SSL_VERIFY]"
|
||||
),
|
||||
required=False,
|
||||
default=os.getenv("GITLAB_SSL_VERIFY"),
|
||||
)
|
||||
ssl_verify_group.add_argument(
|
||||
"--no-ssl-verify",
|
||||
help="Disable SSL verification",
|
||||
required=False,
|
||||
dest="ssl_verify",
|
||||
action="store_false",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
help=(
|
||||
"Timeout to use for requests to the GitLab server. "
|
||||
"[env var: GITLAB_TIMEOUT]"
|
||||
),
|
||||
required=False,
|
||||
type=int,
|
||||
default=os.getenv("GITLAB_TIMEOUT"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-version",
|
||||
help=("GitLab API version [env var: GITLAB_API_VERSION]"),
|
||||
required=False,
|
||||
default=os.getenv("GITLAB_API_VERSION"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--per-page",
|
||||
help=(
|
||||
"Number of entries to return per page in the response. "
|
||||
"[env var: GITLAB_PER_PAGE]"
|
||||
),
|
||||
required=False,
|
||||
type=int,
|
||||
default=os.getenv("GITLAB_PER_PAGE"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pagination",
|
||||
help=(
|
||||
"Whether to use keyset or offset pagination [env var: GITLAB_PAGINATION]"
|
||||
),
|
||||
required=False,
|
||||
default=os.getenv("GITLAB_PAGINATION"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--order-by",
|
||||
help=("Set order_by globally [env var: GITLAB_ORDER_BY]"),
|
||||
required=False,
|
||||
default=os.getenv("GITLAB_ORDER_BY"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--user-agent",
|
||||
help=(
|
||||
"The user agent to send to GitLab with the HTTP request. "
|
||||
"[env var: GITLAB_USER_AGENT]"
|
||||
),
|
||||
required=False,
|
||||
default=os.getenv("GITLAB_USER_AGENT"),
|
||||
)
|
||||
|
||||
tokens = parser.add_mutually_exclusive_group()
|
||||
tokens.add_argument(
|
||||
"--private-token",
|
||||
help=("GitLab private access token [env var: GITLAB_PRIVATE_TOKEN]"),
|
||||
required=False,
|
||||
default=os.getenv("GITLAB_PRIVATE_TOKEN"),
|
||||
)
|
||||
tokens.add_argument(
|
||||
"--oauth-token",
|
||||
help=("GitLab OAuth token [env var: GITLAB_OAUTH_TOKEN]"),
|
||||
required=False,
|
||||
default=os.getenv("GITLAB_OAUTH_TOKEN"),
|
||||
)
|
||||
tokens.add_argument(
|
||||
"--job-token",
|
||||
help=("GitLab CI job token [env var: CI_JOB_TOKEN]"),
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-login",
|
||||
help=(
|
||||
"Skip initial authenticated API call to the current user endpoint. "
|
||||
"This may be useful when invoking the CLI in scripts. "
|
||||
"[env var: GITLAB_SKIP_LOGIN]"
|
||||
),
|
||||
action="store_true",
|
||||
default=os.getenv("GITLAB_SKIP_LOGIN"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-mask-credentials",
|
||||
help="Don't mask credentials in debug mode",
|
||||
dest="mask_credentials",
|
||||
action="store_false",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def _get_parser() -> argparse.ArgumentParser:
|
||||
# NOTE: We must delay import of gitlab.v4.cli until now or
|
||||
# otherwise it will cause circular import errors
|
||||
from gitlab.v4 import cli as v4_cli
|
||||
|
||||
parser = _get_base_parser()
|
||||
return v4_cli.extend_parser(parser)
|
||||
|
||||
|
||||
def _parse_value(v: Any) -> Any:
|
||||
if isinstance(v, str) and v.startswith("@@"):
|
||||
return v[1:]
|
||||
if isinstance(v, str) and v.startswith("@"):
|
||||
# If the user-provided value starts with @, we try to read the file
|
||||
# path provided after @ as the real value.
|
||||
filepath = pathlib.Path(v[1:]).expanduser().resolve()
|
||||
try:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
with open(filepath, "rb") as f:
|
||||
return f.read()
|
||||
except OSError as exc:
|
||||
exc_name = type(exc).__name__
|
||||
sys.stderr.write(f"{exc_name}: {exc}\n")
|
||||
sys.exit(1)
|
||||
|
||||
return v
|
||||
|
||||
|
||||
def docs() -> argparse.ArgumentParser: # pragma: no cover
|
||||
"""
|
||||
Provide a statically generated parser for sphinx only, so we don't need
|
||||
to provide dummy gitlab config for readthedocs.
|
||||
"""
|
||||
if "sphinx" not in sys.modules:
|
||||
sys.exit("Docs parser is only intended for build_sphinx")
|
||||
|
||||
return _get_parser()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if "--version" in sys.argv:
|
||||
print(gitlab.__version__)
|
||||
sys.exit(0)
|
||||
|
||||
parser = _get_base_parser(add_help=False)
|
||||
|
||||
# This first parsing step is used to find the gitlab config to use, and
|
||||
# load the propermodule (v3 or v4) accordingly. At that point we don't have
|
||||
# any subparser setup
|
||||
(options, _) = parser.parse_known_args(sys.argv)
|
||||
try:
|
||||
config = gitlab.config.GitlabConfigParser(options.gitlab, options.config_file)
|
||||
except gitlab.config.ConfigError as e:
|
||||
if "--help" in sys.argv or "-h" in sys.argv:
|
||||
parser.print_help()
|
||||
sys.exit(0)
|
||||
sys.exit(str(e))
|
||||
# We only support v4 API at this time
|
||||
if config.api_version not in ("4",): # dead code # pragma: no cover
|
||||
raise ModuleNotFoundError(f"gitlab.v{config.api_version}.cli")
|
||||
|
||||
# Now we build the entire set of subcommands and do the complete parsing
|
||||
parser = _get_parser()
|
||||
try:
|
||||
import argcomplete # type: ignore
|
||||
|
||||
argcomplete.autocomplete(parser) # pragma: no cover
|
||||
except Exception:
|
||||
pass
|
||||
args = parser.parse_args()
|
||||
|
||||
config_files = args.config_file
|
||||
gitlab_id = args.gitlab
|
||||
verbose = args.verbose
|
||||
output = args.output
|
||||
fields = []
|
||||
if args.fields:
|
||||
fields = [x.strip() for x in args.fields.split(",")]
|
||||
debug = args.debug
|
||||
gitlab_resource = args.gitlab_resource
|
||||
resource_action = args.resource_action
|
||||
skip_login = args.skip_login
|
||||
mask_credentials = args.mask_credentials
|
||||
|
||||
args_dict = vars(args)
|
||||
# Remove CLI behavior-related args
|
||||
for item in (
|
||||
"api_version",
|
||||
"config_file",
|
||||
"debug",
|
||||
"fields",
|
||||
"gitlab",
|
||||
"gitlab_resource",
|
||||
"job_token",
|
||||
"mask_credentials",
|
||||
"oauth_token",
|
||||
"output",
|
||||
"pagination",
|
||||
"private_token",
|
||||
"resource_action",
|
||||
"server_url",
|
||||
"skip_login",
|
||||
"ssl_verify",
|
||||
"timeout",
|
||||
"user_agent",
|
||||
"verbose",
|
||||
"version",
|
||||
):
|
||||
args_dict.pop(item)
|
||||
args_dict = {k: _parse_value(v) for k, v in args_dict.items() if v is not None}
|
||||
|
||||
try:
|
||||
gl = gitlab.Gitlab.merge_config(vars(options), gitlab_id, config_files)
|
||||
if debug:
|
||||
gl.enable_debug(mask_credentials=mask_credentials)
|
||||
if not skip_login and (gl.private_token or gl.oauth_token):
|
||||
gl.auth()
|
||||
except Exception as e:
|
||||
die(str(e))
|
||||
|
||||
gitlab.v4.cli.run(
|
||||
gl, gitlab_resource, resource_action, args_dict, verbose, output, fields
|
||||
)
|
1369
env/lib/python3.12/site-packages/gitlab/client.py
vendored
Normal file
1369
env/lib/python3.12/site-packages/gitlab/client.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
287
env/lib/python3.12/site-packages/gitlab/config.py
vendored
Normal file
287
env/lib/python3.12/site-packages/gitlab/config.py
vendored
Normal file
@ -0,0 +1,287 @@
|
||||
import configparser
|
||||
import os
|
||||
import shlex
|
||||
import subprocess
|
||||
from os.path import expanduser, expandvars
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from gitlab.const import USER_AGENT
|
||||
|
||||
_DEFAULT_FILES: List[str] = [
|
||||
"/etc/python-gitlab.cfg",
|
||||
str(Path.home() / ".python-gitlab.cfg"),
|
||||
]
|
||||
|
||||
HELPER_PREFIX = "helper:"
|
||||
|
||||
HELPER_ATTRIBUTES = ["job_token", "http_password", "private_token", "oauth_token"]
|
||||
|
||||
_CONFIG_PARSER_ERRORS = (configparser.NoOptionError, configparser.NoSectionError)
|
||||
|
||||
|
||||
def _resolve_file(filepath: Union[Path, str]) -> str:
|
||||
resolved = Path(filepath).resolve(strict=True)
|
||||
return str(resolved)
|
||||
|
||||
|
||||
def _get_config_files(
|
||||
config_files: Optional[List[str]] = None,
|
||||
) -> Union[str, List[str]]:
|
||||
"""
|
||||
Return resolved path(s) to config files if they exist, with precedence:
|
||||
1. Files passed in config_files
|
||||
2. File defined in PYTHON_GITLAB_CFG
|
||||
3. User- and system-wide config files
|
||||
"""
|
||||
resolved_files = []
|
||||
|
||||
if config_files:
|
||||
for config_file in config_files:
|
||||
try:
|
||||
resolved = _resolve_file(config_file)
|
||||
except OSError as e:
|
||||
raise GitlabConfigMissingError(
|
||||
f"Cannot read config from file: {e}"
|
||||
) from e
|
||||
resolved_files.append(resolved)
|
||||
|
||||
return resolved_files
|
||||
|
||||
try:
|
||||
env_config = os.environ["PYTHON_GITLAB_CFG"]
|
||||
return _resolve_file(env_config)
|
||||
except KeyError:
|
||||
pass
|
||||
except OSError as e:
|
||||
raise GitlabConfigMissingError(
|
||||
f"Cannot read config from PYTHON_GITLAB_CFG: {e}"
|
||||
) from e
|
||||
|
||||
for config_file in _DEFAULT_FILES:
|
||||
try:
|
||||
resolved = _resolve_file(config_file)
|
||||
except OSError:
|
||||
continue
|
||||
resolved_files.append(resolved)
|
||||
|
||||
return resolved_files
|
||||
|
||||
|
||||
class ConfigError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabIDError(ConfigError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabDataError(ConfigError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabConfigMissingError(ConfigError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabConfigHelperError(ConfigError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabConfigParser:
|
||||
def __init__(
|
||||
self, gitlab_id: Optional[str] = None, config_files: Optional[List[str]] = None
|
||||
) -> None:
|
||||
self.gitlab_id = gitlab_id
|
||||
self.http_username: Optional[str] = None
|
||||
self.http_password: Optional[str] = None
|
||||
self.job_token: Optional[str] = None
|
||||
self.oauth_token: Optional[str] = None
|
||||
self.private_token: Optional[str] = None
|
||||
|
||||
self.api_version: str = "4"
|
||||
self.order_by: Optional[str] = None
|
||||
self.pagination: Optional[str] = None
|
||||
self.per_page: Optional[int] = None
|
||||
self.retry_transient_errors: bool = False
|
||||
self.ssl_verify: Union[bool, str] = True
|
||||
self.timeout: int = 60
|
||||
self.url: Optional[str] = None
|
||||
self.user_agent: str = USER_AGENT
|
||||
self.keep_base_url: bool = False
|
||||
|
||||
self._files = _get_config_files(config_files)
|
||||
if self._files:
|
||||
self._parse_config()
|
||||
|
||||
if self.gitlab_id and not self._files:
|
||||
raise GitlabConfigMissingError(
|
||||
f"A gitlab id was provided ({self.gitlab_id}) but no config file found"
|
||||
)
|
||||
|
||||
def _parse_config(self) -> None:
|
||||
_config = configparser.ConfigParser()
|
||||
_config.read(self._files, encoding="utf-8")
|
||||
|
||||
if self.gitlab_id and not _config.has_section(self.gitlab_id):
|
||||
raise GitlabDataError(
|
||||
f"A gitlab id was provided ({self.gitlab_id}) "
|
||||
"but no config section found"
|
||||
)
|
||||
|
||||
if self.gitlab_id is None:
|
||||
try:
|
||||
self.gitlab_id = _config.get("global", "default")
|
||||
except Exception as e:
|
||||
raise GitlabIDError(
|
||||
"Impossible to get the gitlab id (not specified in config file)"
|
||||
) from e
|
||||
|
||||
try:
|
||||
self.url = _config.get(self.gitlab_id, "url")
|
||||
except Exception as e:
|
||||
raise GitlabDataError(
|
||||
"Impossible to get gitlab details from "
|
||||
f"configuration ({self.gitlab_id})"
|
||||
) from e
|
||||
|
||||
try:
|
||||
self.ssl_verify = _config.getboolean("global", "ssl_verify")
|
||||
except ValueError:
|
||||
# Value Error means the option exists but isn't a boolean.
|
||||
# Get as a string instead as it should then be a local path to a
|
||||
# CA bundle.
|
||||
self.ssl_verify = _config.get("global", "ssl_verify")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
try:
|
||||
self.ssl_verify = _config.getboolean(self.gitlab_id, "ssl_verify")
|
||||
except ValueError:
|
||||
# Value Error means the option exists but isn't a boolean.
|
||||
# Get as a string instead as it should then be a local path to a
|
||||
# CA bundle.
|
||||
self.ssl_verify = _config.get(self.gitlab_id, "ssl_verify")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.timeout = _config.getint("global", "timeout")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
try:
|
||||
self.timeout = _config.getint(self.gitlab_id, "timeout")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.private_token = _config.get(self.gitlab_id, "private_token")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.oauth_token = _config.get(self.gitlab_id, "oauth_token")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.job_token = _config.get(self.gitlab_id, "job_token")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.http_username = _config.get(self.gitlab_id, "http_username")
|
||||
self.http_password = _config.get(
|
||||
self.gitlab_id, "http_password"
|
||||
) # pragma: no cover
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
self._get_values_from_helper()
|
||||
|
||||
try:
|
||||
self.api_version = _config.get("global", "api_version")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
try:
|
||||
self.api_version = _config.get(self.gitlab_id, "api_version")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
if self.api_version not in ("4",):
|
||||
raise GitlabDataError(f"Unsupported API version: {self.api_version}")
|
||||
|
||||
for section in ["global", self.gitlab_id]:
|
||||
try:
|
||||
self.per_page = _config.getint(section, "per_page")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
if self.per_page is not None and not 0 <= self.per_page <= 100:
|
||||
raise GitlabDataError(f"Unsupported per_page number: {self.per_page}")
|
||||
|
||||
try:
|
||||
self.pagination = _config.get(self.gitlab_id, "pagination")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.order_by = _config.get(self.gitlab_id, "order_by")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.user_agent = _config.get("global", "user_agent")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
try:
|
||||
self.user_agent = _config.get(self.gitlab_id, "user_agent")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.keep_base_url = _config.getboolean("global", "keep_base_url")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
try:
|
||||
self.keep_base_url = _config.getboolean(self.gitlab_id, "keep_base_url")
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.retry_transient_errors = _config.getboolean(
|
||||
"global", "retry_transient_errors"
|
||||
)
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
try:
|
||||
self.retry_transient_errors = _config.getboolean(
|
||||
self.gitlab_id, "retry_transient_errors"
|
||||
)
|
||||
except _CONFIG_PARSER_ERRORS:
|
||||
pass
|
||||
|
||||
def _get_values_from_helper(self) -> None:
|
||||
"""Update attributes that may get values from an external helper program"""
|
||||
for attr in HELPER_ATTRIBUTES:
|
||||
value = getattr(self, attr)
|
||||
if not isinstance(value, str):
|
||||
continue
|
||||
|
||||
if not value.lower().strip().startswith(HELPER_PREFIX):
|
||||
continue
|
||||
|
||||
helper = value[len(HELPER_PREFIX) :].strip()
|
||||
commmand = [expanduser(expandvars(token)) for token in shlex.split(helper)]
|
||||
|
||||
try:
|
||||
value = (
|
||||
subprocess.check_output(commmand, stderr=subprocess.PIPE)
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
stderr = e.stderr.decode().strip()
|
||||
raise GitlabConfigHelperError(
|
||||
f"Failed to read {attr} value from helper "
|
||||
f"for {self.gitlab_id}:\n{stderr}"
|
||||
) from e
|
||||
|
||||
setattr(self, attr, value)
|
169
env/lib/python3.12/site-packages/gitlab/const.py
vendored
Normal file
169
env/lib/python3.12/site-packages/gitlab/const.py
vendored
Normal file
@ -0,0 +1,169 @@
|
||||
from enum import Enum, IntEnum
|
||||
|
||||
from gitlab._version import __title__, __version__
|
||||
|
||||
|
||||
class GitlabEnum(str, Enum):
|
||||
"""An enum mixed in with str to make it JSON-serializable."""
|
||||
|
||||
|
||||
# https://gitlab.com/gitlab-org/gitlab/-/blob/e97357824bedf007e75f8782259fe07435b64fbb/lib/gitlab/access.rb#L12-18
|
||||
class AccessLevel(IntEnum):
|
||||
NO_ACCESS: int = 0
|
||||
MINIMAL_ACCESS: int = 5
|
||||
GUEST: int = 10
|
||||
PLANNER: int = 15
|
||||
REPORTER: int = 20
|
||||
DEVELOPER: int = 30
|
||||
MAINTAINER: int = 40
|
||||
OWNER: int = 50
|
||||
ADMIN: int = 60
|
||||
|
||||
|
||||
# https://gitlab.com/gitlab-org/gitlab/-/blob/e97357824bedf007e75f8782259fe07435b64fbb/lib/gitlab/visibility_level.rb#L23-25
|
||||
class Visibility(GitlabEnum):
|
||||
PRIVATE: str = "private"
|
||||
INTERNAL: str = "internal"
|
||||
PUBLIC: str = "public"
|
||||
|
||||
|
||||
class NotificationLevel(GitlabEnum):
|
||||
DISABLED: str = "disabled"
|
||||
PARTICIPATING: str = "participating"
|
||||
WATCH: str = "watch"
|
||||
GLOBAL: str = "global"
|
||||
MENTION: str = "mention"
|
||||
CUSTOM: str = "custom"
|
||||
|
||||
|
||||
# https://gitlab.com/gitlab-org/gitlab/-/blob/e97357824bedf007e75f8782259fe07435b64fbb/app/views/search/_category.html.haml#L10-37
|
||||
class SearchScope(GitlabEnum):
|
||||
# all scopes (global, group and project)
|
||||
PROJECTS: str = "projects"
|
||||
ISSUES: str = "issues"
|
||||
MERGE_REQUESTS: str = "merge_requests"
|
||||
MILESTONES: str = "milestones"
|
||||
WIKI_BLOBS: str = "wiki_blobs"
|
||||
COMMITS: str = "commits"
|
||||
BLOBS: str = "blobs"
|
||||
USERS: str = "users"
|
||||
|
||||
# specific global scope
|
||||
GLOBAL_SNIPPET_TITLES: str = "snippet_titles"
|
||||
|
||||
# specific project scope
|
||||
PROJECT_NOTES: str = "notes"
|
||||
|
||||
|
||||
# https://docs.gitlab.com/ee/api/merge_requests.html#merge-status
|
||||
class DetailedMergeStatus(GitlabEnum):
|
||||
# possible values for the detailed_merge_status field of Merge Requests
|
||||
BLOCKED_STATUS: str = "blocked_status"
|
||||
BROKEN_STATUS: str = "broken_status"
|
||||
CHECKING: str = "checking"
|
||||
UNCHECKED: str = "unchecked"
|
||||
CI_MUST_PASS: str = "ci_must_pass"
|
||||
CI_STILL_RUNNING: str = "ci_still_running"
|
||||
DISCUSSIONS_NOT_RESOLVED: str = "discussions_not_resolved"
|
||||
DRAFT_STATUS: str = "draft_status"
|
||||
EXTERNAL_STATUS_CHECKS: str = "external_status_checks"
|
||||
MERGEABLE: str = "mergeable"
|
||||
NOT_APPROVED: str = "not_approved"
|
||||
NOT_OPEN: str = "not_open"
|
||||
POLICIES_DENIED: str = "policies_denied"
|
||||
|
||||
|
||||
# https://docs.gitlab.com/ee/api/pipelines.html
|
||||
class PipelineStatus(GitlabEnum):
|
||||
CREATED: str = "created"
|
||||
WAITING_FOR_RESOURCE: str = "waiting_for_resource"
|
||||
PREPARING: str = "preparing"
|
||||
PENDING: str = "pending"
|
||||
RUNNING: str = "running"
|
||||
SUCCESS: str = "success"
|
||||
FAILED: str = "failed"
|
||||
CANCELED: str = "canceled"
|
||||
SKIPPED: str = "skipped"
|
||||
MANUAL: str = "manual"
|
||||
SCHEDULED: str = "scheduled"
|
||||
|
||||
|
||||
DEFAULT_URL: str = "https://gitlab.com"
|
||||
|
||||
NO_ACCESS = AccessLevel.NO_ACCESS.value
|
||||
MINIMAL_ACCESS = AccessLevel.MINIMAL_ACCESS.value
|
||||
GUEST_ACCESS = AccessLevel.GUEST.value
|
||||
REPORTER_ACCESS = AccessLevel.REPORTER.value
|
||||
DEVELOPER_ACCESS = AccessLevel.DEVELOPER.value
|
||||
MAINTAINER_ACCESS = AccessLevel.MAINTAINER.value
|
||||
OWNER_ACCESS = AccessLevel.OWNER.value
|
||||
ADMIN_ACCESS = AccessLevel.ADMIN.value
|
||||
|
||||
VISIBILITY_PRIVATE = Visibility.PRIVATE.value
|
||||
VISIBILITY_INTERNAL = Visibility.INTERNAL.value
|
||||
VISIBILITY_PUBLIC = Visibility.PUBLIC.value
|
||||
|
||||
NOTIFICATION_LEVEL_DISABLED = NotificationLevel.DISABLED.value
|
||||
NOTIFICATION_LEVEL_PARTICIPATING = NotificationLevel.PARTICIPATING.value
|
||||
NOTIFICATION_LEVEL_WATCH = NotificationLevel.WATCH.value
|
||||
NOTIFICATION_LEVEL_GLOBAL = NotificationLevel.GLOBAL.value
|
||||
NOTIFICATION_LEVEL_MENTION = NotificationLevel.MENTION.value
|
||||
NOTIFICATION_LEVEL_CUSTOM = NotificationLevel.CUSTOM.value
|
||||
|
||||
# Search scopes
|
||||
# all scopes (global, group and project)
|
||||
SEARCH_SCOPE_PROJECTS = SearchScope.PROJECTS.value
|
||||
SEARCH_SCOPE_ISSUES = SearchScope.ISSUES.value
|
||||
SEARCH_SCOPE_MERGE_REQUESTS = SearchScope.MERGE_REQUESTS.value
|
||||
SEARCH_SCOPE_MILESTONES = SearchScope.MILESTONES.value
|
||||
SEARCH_SCOPE_WIKI_BLOBS = SearchScope.WIKI_BLOBS.value
|
||||
SEARCH_SCOPE_COMMITS = SearchScope.COMMITS.value
|
||||
SEARCH_SCOPE_BLOBS = SearchScope.BLOBS.value
|
||||
SEARCH_SCOPE_USERS = SearchScope.USERS.value
|
||||
|
||||
# specific global scope
|
||||
SEARCH_SCOPE_GLOBAL_SNIPPET_TITLES = SearchScope.GLOBAL_SNIPPET_TITLES.value
|
||||
|
||||
# specific project scope
|
||||
SEARCH_SCOPE_PROJECT_NOTES = SearchScope.PROJECT_NOTES.value
|
||||
|
||||
USER_AGENT: str = f"{__title__}/{__version__}"
|
||||
|
||||
NO_JSON_RESPONSE_CODES = [204]
|
||||
RETRYABLE_TRANSIENT_ERROR_CODES = [500, 502, 503, 504] + list(range(520, 531))
|
||||
|
||||
__all__ = [
|
||||
"AccessLevel",
|
||||
"Visibility",
|
||||
"NotificationLevel",
|
||||
"SearchScope",
|
||||
"ADMIN_ACCESS",
|
||||
"DEFAULT_URL",
|
||||
"DEVELOPER_ACCESS",
|
||||
"GUEST_ACCESS",
|
||||
"MAINTAINER_ACCESS",
|
||||
"MINIMAL_ACCESS",
|
||||
"NO_ACCESS",
|
||||
"NOTIFICATION_LEVEL_CUSTOM",
|
||||
"NOTIFICATION_LEVEL_DISABLED",
|
||||
"NOTIFICATION_LEVEL_GLOBAL",
|
||||
"NOTIFICATION_LEVEL_MENTION",
|
||||
"NOTIFICATION_LEVEL_PARTICIPATING",
|
||||
"NOTIFICATION_LEVEL_WATCH",
|
||||
"OWNER_ACCESS",
|
||||
"REPORTER_ACCESS",
|
||||
"SEARCH_SCOPE_BLOBS",
|
||||
"SEARCH_SCOPE_COMMITS",
|
||||
"SEARCH_SCOPE_GLOBAL_SNIPPET_TITLES",
|
||||
"SEARCH_SCOPE_ISSUES",
|
||||
"SEARCH_SCOPE_MERGE_REQUESTS",
|
||||
"SEARCH_SCOPE_MILESTONES",
|
||||
"SEARCH_SCOPE_PROJECT_NOTES",
|
||||
"SEARCH_SCOPE_PROJECTS",
|
||||
"SEARCH_SCOPE_USERS",
|
||||
"SEARCH_SCOPE_WIKI_BLOBS",
|
||||
"USER_AGENT",
|
||||
"VISIBILITY_INTERNAL",
|
||||
"VISIBILITY_PRIVATE",
|
||||
"VISIBILITY_PUBLIC",
|
||||
]
|
428
env/lib/python3.12/site-packages/gitlab/exceptions.py
vendored
Normal file
428
env/lib/python3.12/site-packages/gitlab/exceptions.py
vendored
Normal file
@ -0,0 +1,428 @@
|
||||
import functools
|
||||
from typing import Any, Callable, cast, Optional, Type, TYPE_CHECKING, TypeVar, Union
|
||||
|
||||
|
||||
class GitlabError(Exception):
|
||||
def __init__(
|
||||
self,
|
||||
error_message: Union[str, bytes] = "",
|
||||
response_code: Optional[int] = None,
|
||||
response_body: Optional[bytes] = None,
|
||||
) -> None:
|
||||
Exception.__init__(self, error_message)
|
||||
# Http status code
|
||||
self.response_code = response_code
|
||||
# Full http response
|
||||
self.response_body = response_body
|
||||
# Parsed error message from gitlab
|
||||
try:
|
||||
# if we receive str/bytes we try to convert to unicode/str to have
|
||||
# consistent message types (see #616)
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(error_message, bytes)
|
||||
self.error_message = error_message.decode()
|
||||
except Exception:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(error_message, str)
|
||||
self.error_message = error_message
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.response_code is not None:
|
||||
return f"{self.response_code}: {self.error_message}"
|
||||
return f"{self.error_message}"
|
||||
|
||||
|
||||
class GitlabAuthenticationError(GitlabError):
|
||||
pass
|
||||
|
||||
|
||||
class RedirectError(GitlabError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabParsingError(GitlabError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabCiLintError(GitlabError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabConnectionError(GitlabError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabOperationError(GitlabError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabHttpError(GitlabError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabListError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabGetError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabHeadError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabCreateError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabUpdateError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabDeleteError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabSetError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabProtectError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabTransferProjectError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabGroupTransferError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabProjectDeployKeyError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabPromoteError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabCancelError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabPipelineCancelError(GitlabCancelError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabRetryError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabBuildCancelError(GitlabCancelError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabBuildRetryError(GitlabRetryError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabBuildPlayError(GitlabRetryError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabBuildEraseError(GitlabRetryError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabJobCancelError(GitlabCancelError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabJobRetryError(GitlabRetryError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabJobPlayError(GitlabRetryError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabJobEraseError(GitlabRetryError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabPipelinePlayError(GitlabRetryError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabPipelineRetryError(GitlabRetryError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabBlockError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabUnblockError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabDeactivateError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabActivateError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabBanError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabUnbanError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabSubscribeError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabUnsubscribeError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabMRForbiddenError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabMRApprovalError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabMRRebaseError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabMRResetApprovalError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabMRClosedError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabMROnBuildSuccessError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabTodoError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabTopicMergeError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabTimeTrackingError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabUploadError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabAttachFileError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabImportError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabInvitationError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabCherryPickError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabHousekeepingError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabOwnershipError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabSearchError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabStopError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabMarkdownError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabVerifyError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabRenderError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabRepairError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabRestoreError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabRevertError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabRotateError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabLicenseError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabFollowError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabUnfollowError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabUserApproveError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabUserRejectError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabDeploymentApprovalError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
class GitlabHookTestError(GitlabOperationError):
|
||||
pass
|
||||
|
||||
|
||||
# For an explanation of how these type-hints work see:
|
||||
# https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
|
||||
#
|
||||
# The goal here is that functions which get decorated will retain their types.
|
||||
__F = TypeVar("__F", bound=Callable[..., Any])
|
||||
|
||||
|
||||
def on_http_error(error: Type[Exception]) -> Callable[[__F], __F]:
|
||||
"""Manage GitlabHttpError exceptions.
|
||||
|
||||
This decorator function can be used to catch GitlabHttpError exceptions
|
||||
raise specialized exceptions instead.
|
||||
|
||||
Args:
|
||||
The exception type to raise -- must inherit from GitlabError
|
||||
"""
|
||||
|
||||
def wrap(f: __F) -> __F:
|
||||
@functools.wraps(f)
|
||||
def wrapped_f(*args: Any, **kwargs: Any) -> Any:
|
||||
try:
|
||||
return f(*args, **kwargs)
|
||||
except GitlabHttpError as e:
|
||||
raise error(e.error_message, e.response_code, e.response_body) from e
|
||||
|
||||
return cast(__F, wrapped_f)
|
||||
|
||||
return wrap
|
||||
|
||||
|
||||
# Export manually to keep mypy happy
|
||||
__all__ = [
|
||||
"GitlabActivateError",
|
||||
"GitlabAttachFileError",
|
||||
"GitlabAuthenticationError",
|
||||
"GitlabBanError",
|
||||
"GitlabBlockError",
|
||||
"GitlabBuildCancelError",
|
||||
"GitlabBuildEraseError",
|
||||
"GitlabBuildPlayError",
|
||||
"GitlabBuildRetryError",
|
||||
"GitlabCancelError",
|
||||
"GitlabCherryPickError",
|
||||
"GitlabCiLintError",
|
||||
"GitlabConnectionError",
|
||||
"GitlabCreateError",
|
||||
"GitlabDeactivateError",
|
||||
"GitlabDeleteError",
|
||||
"GitlabDeploymentApprovalError",
|
||||
"GitlabError",
|
||||
"GitlabFollowError",
|
||||
"GitlabGetError",
|
||||
"GitlabGroupTransferError",
|
||||
"GitlabHeadError",
|
||||
"GitlabHookTestError",
|
||||
"GitlabHousekeepingError",
|
||||
"GitlabHttpError",
|
||||
"GitlabImportError",
|
||||
"GitlabInvitationError",
|
||||
"GitlabJobCancelError",
|
||||
"GitlabJobEraseError",
|
||||
"GitlabJobPlayError",
|
||||
"GitlabJobRetryError",
|
||||
"GitlabLicenseError",
|
||||
"GitlabListError",
|
||||
"GitlabMRApprovalError",
|
||||
"GitlabMRClosedError",
|
||||
"GitlabMRForbiddenError",
|
||||
"GitlabMROnBuildSuccessError",
|
||||
"GitlabMRRebaseError",
|
||||
"GitlabMRResetApprovalError",
|
||||
"GitlabMarkdownError",
|
||||
"GitlabOperationError",
|
||||
"GitlabOwnershipError",
|
||||
"GitlabParsingError",
|
||||
"GitlabPipelineCancelError",
|
||||
"GitlabPipelinePlayError",
|
||||
"GitlabPipelineRetryError",
|
||||
"GitlabProjectDeployKeyError",
|
||||
"GitlabPromoteError",
|
||||
"GitlabProtectError",
|
||||
"GitlabRenderError",
|
||||
"GitlabRepairError",
|
||||
"GitlabRestoreError",
|
||||
"GitlabRetryError",
|
||||
"GitlabRevertError",
|
||||
"GitlabRotateError",
|
||||
"GitlabSearchError",
|
||||
"GitlabSetError",
|
||||
"GitlabStopError",
|
||||
"GitlabSubscribeError",
|
||||
"GitlabTimeTrackingError",
|
||||
"GitlabTodoError",
|
||||
"GitlabTopicMergeError",
|
||||
"GitlabTransferProjectError",
|
||||
"GitlabUnbanError",
|
||||
"GitlabUnblockError",
|
||||
"GitlabUnfollowError",
|
||||
"GitlabUnsubscribeError",
|
||||
"GitlabUpdateError",
|
||||
"GitlabUploadError",
|
||||
"GitlabUserApproveError",
|
||||
"GitlabUserRejectError",
|
||||
"GitlabVerifyError",
|
||||
"RedirectError",
|
||||
]
|
1099
env/lib/python3.12/site-packages/gitlab/mixins.py
vendored
Normal file
1099
env/lib/python3.12/site-packages/gitlab/mixins.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
0
env/lib/python3.12/site-packages/gitlab/py.typed
vendored
Normal file
0
env/lib/python3.12/site-packages/gitlab/py.typed
vendored
Normal file
105
env/lib/python3.12/site-packages/gitlab/types.py
vendored
Normal file
105
env/lib/python3.12/site-packages/gitlab/types.py
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
import dataclasses
|
||||
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class RequiredOptional:
|
||||
required: Tuple[str, ...] = ()
|
||||
optional: Tuple[str, ...] = ()
|
||||
exclusive: Tuple[str, ...] = ()
|
||||
|
||||
def validate_attrs(
|
||||
self,
|
||||
*,
|
||||
data: Dict[str, Any],
|
||||
excludes: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
if excludes is None:
|
||||
excludes = []
|
||||
|
||||
if self.required:
|
||||
required = [k for k in self.required if k not in excludes]
|
||||
missing = [attr for attr in required if attr not in data]
|
||||
if missing:
|
||||
raise AttributeError(f"Missing attributes: {', '.join(missing)}")
|
||||
|
||||
if self.exclusive:
|
||||
exclusives = [attr for attr in data if attr in self.exclusive]
|
||||
if len(exclusives) > 1:
|
||||
raise AttributeError(
|
||||
f"Provide only one of these attributes: {', '.join(exclusives)}"
|
||||
)
|
||||
if not exclusives:
|
||||
raise AttributeError(
|
||||
f"Must provide one of these attributes: "
|
||||
f"{', '.join(self.exclusive)}"
|
||||
)
|
||||
|
||||
|
||||
class GitlabAttribute:
|
||||
def __init__(self, value: Any = None) -> None:
|
||||
self._value = value
|
||||
|
||||
def get(self) -> Any:
|
||||
return self._value
|
||||
|
||||
def set_from_cli(self, cli_value: Any) -> None:
|
||||
self._value = cli_value
|
||||
|
||||
def get_for_api(self, *, key: str) -> Tuple[str, Any]:
|
||||
return (key, self._value)
|
||||
|
||||
|
||||
class _ListArrayAttribute(GitlabAttribute):
|
||||
"""Helper class to support `list` / `array` types."""
|
||||
|
||||
def set_from_cli(self, cli_value: str) -> None:
|
||||
if not cli_value.strip():
|
||||
self._value = []
|
||||
else:
|
||||
self._value = [item.strip() for item in cli_value.split(",")]
|
||||
|
||||
def get_for_api(self, *, key: str) -> Tuple[str, str]:
|
||||
# Do not comma-split single value passed as string
|
||||
if isinstance(self._value, str):
|
||||
return (key, self._value)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(self._value, list)
|
||||
return (key, ",".join([str(x) for x in self._value]))
|
||||
|
||||
|
||||
class ArrayAttribute(_ListArrayAttribute):
|
||||
"""To support `array` types as documented in
|
||||
https://docs.gitlab.com/ee/api/#array"""
|
||||
|
||||
def get_for_api(self, *, key: str) -> Tuple[str, Any]:
|
||||
if isinstance(self._value, str):
|
||||
return (f"{key}[]", self._value)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(self._value, list)
|
||||
return (f"{key}[]", self._value)
|
||||
|
||||
|
||||
class CommaSeparatedListAttribute(_ListArrayAttribute):
|
||||
"""For values which are sent to the server as a Comma Separated Values
|
||||
(CSV) string. We allow them to be specified as a list and we convert it
|
||||
into a CSV"""
|
||||
|
||||
|
||||
class LowercaseStringAttribute(GitlabAttribute):
|
||||
def get_for_api(self, *, key: str) -> Tuple[str, str]:
|
||||
return (key, str(self._value).lower())
|
||||
|
||||
|
||||
class FileAttribute(GitlabAttribute):
|
||||
@staticmethod
|
||||
def get_file_name(attr_name: Optional[str] = None) -> Optional[str]:
|
||||
return attr_name
|
||||
|
||||
|
||||
class ImageAttribute(FileAttribute):
|
||||
@staticmethod
|
||||
def get_file_name(attr_name: Optional[str] = None) -> str:
|
||||
return f"{attr_name}.png" if attr_name else "image.png"
|
303
env/lib/python3.12/site-packages/gitlab/utils.py
vendored
Normal file
303
env/lib/python3.12/site-packages/gitlab/utils.py
vendored
Normal file
@ -0,0 +1,303 @@
|
||||
import dataclasses
|
||||
import email.message
|
||||
import logging
|
||||
import pathlib
|
||||
import time
|
||||
import traceback
|
||||
import urllib.parse
|
||||
import warnings
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterator,
|
||||
Literal,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
)
|
||||
|
||||
import requests
|
||||
|
||||
from gitlab import const, types
|
||||
|
||||
|
||||
class _StdoutStream:
|
||||
def __call__(self, chunk: Any) -> None:
|
||||
print(chunk)
|
||||
|
||||
|
||||
def get_base_url(url: Optional[str] = None) -> str:
|
||||
"""Return the base URL with the trailing slash stripped.
|
||||
If the URL is a Falsy value, return the default URL.
|
||||
Returns:
|
||||
The base URL
|
||||
"""
|
||||
if not url:
|
||||
return const.DEFAULT_URL
|
||||
|
||||
return url.rstrip("/")
|
||||
|
||||
|
||||
def get_content_type(content_type: Optional[str]) -> str:
|
||||
message = email.message.Message()
|
||||
if content_type is not None:
|
||||
message["content-type"] = content_type
|
||||
|
||||
return message.get_content_type()
|
||||
|
||||
|
||||
class MaskingFormatter(logging.Formatter):
|
||||
"""A logging formatter that can mask credentials"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
fmt: Optional[str] = logging.BASIC_FORMAT,
|
||||
datefmt: Optional[str] = None,
|
||||
style: Literal["%", "{", "$"] = "%",
|
||||
validate: bool = True,
|
||||
masked: Optional[str] = None,
|
||||
) -> None:
|
||||
super().__init__(fmt, datefmt, style, validate)
|
||||
self.masked = masked
|
||||
|
||||
def _filter(self, entry: str) -> str:
|
||||
if not self.masked:
|
||||
return entry
|
||||
|
||||
return entry.replace(self.masked, "[MASKED]")
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
original = logging.Formatter.format(self, record)
|
||||
return self._filter(original)
|
||||
|
||||
|
||||
def response_content(
|
||||
response: requests.Response,
|
||||
streamed: bool,
|
||||
action: Optional[Callable[[bytes], None]],
|
||||
chunk_size: int,
|
||||
*,
|
||||
iterator: bool,
|
||||
) -> Optional[Union[bytes, Iterator[Any]]]:
|
||||
if iterator:
|
||||
return response.iter_content(chunk_size=chunk_size)
|
||||
|
||||
if streamed is False:
|
||||
return response.content
|
||||
|
||||
if action is None:
|
||||
action = _StdoutStream()
|
||||
|
||||
for chunk in response.iter_content(chunk_size=chunk_size):
|
||||
if chunk:
|
||||
action(chunk)
|
||||
return None
|
||||
|
||||
|
||||
class Retry:
|
||||
def __init__(
|
||||
self,
|
||||
max_retries: int,
|
||||
obey_rate_limit: Optional[bool] = True,
|
||||
retry_transient_errors: Optional[bool] = False,
|
||||
) -> None:
|
||||
self.cur_retries = 0
|
||||
self.max_retries = max_retries
|
||||
self.obey_rate_limit = obey_rate_limit
|
||||
self.retry_transient_errors = retry_transient_errors
|
||||
|
||||
def _retryable_status_code(
|
||||
self, status_code: Optional[int], reason: str = ""
|
||||
) -> bool:
|
||||
if status_code == 429 and self.obey_rate_limit:
|
||||
return True
|
||||
|
||||
if not self.retry_transient_errors:
|
||||
return False
|
||||
if status_code in const.RETRYABLE_TRANSIENT_ERROR_CODES:
|
||||
return True
|
||||
if status_code == 409 and "Resource lock" in reason:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def handle_retry_on_status(
|
||||
self,
|
||||
status_code: Optional[int],
|
||||
headers: Optional[MutableMapping[str, str]] = None,
|
||||
reason: str = "",
|
||||
) -> bool:
|
||||
if not self._retryable_status_code(status_code, reason):
|
||||
return False
|
||||
|
||||
if headers is None:
|
||||
headers = {}
|
||||
|
||||
# Response headers documentation:
|
||||
# https://docs.gitlab.com/ee/user/admin_area/settings/user_and_ip_rate_limits.html#response-headers
|
||||
if self.max_retries == -1 or self.cur_retries < self.max_retries:
|
||||
wait_time = 2**self.cur_retries * 0.1
|
||||
if "Retry-After" in headers:
|
||||
wait_time = int(headers["Retry-After"])
|
||||
elif "RateLimit-Reset" in headers:
|
||||
wait_time = int(headers["RateLimit-Reset"]) - time.time()
|
||||
self.cur_retries += 1
|
||||
time.sleep(wait_time)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def handle_retry(self) -> bool:
|
||||
if self.retry_transient_errors and (
|
||||
self.max_retries == -1 or self.cur_retries < self.max_retries
|
||||
):
|
||||
wait_time = 2**self.cur_retries * 0.1
|
||||
self.cur_retries += 1
|
||||
time.sleep(wait_time)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _transform_types(
|
||||
data: Dict[str, Any],
|
||||
custom_types: Dict[str, Any],
|
||||
*,
|
||||
transform_data: bool,
|
||||
transform_files: Optional[bool] = True,
|
||||
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
"""Copy the data dict with attributes that have custom types and transform them
|
||||
before being sent to the server.
|
||||
|
||||
``transform_files``: If ``True`` (default), also populates the ``files`` dict for
|
||||
FileAttribute types with tuples to prepare fields for requests' MultipartEncoder:
|
||||
https://toolbelt.readthedocs.io/en/latest/user.html#multipart-form-data-encoder
|
||||
|
||||
``transform_data``: If ``True`` transforms the ``data`` dict with fields
|
||||
suitable for encoding as query parameters for GitLab's API:
|
||||
https://docs.gitlab.com/ee/api/#encoding-api-parameters-of-array-and-hash-types
|
||||
|
||||
Returns:
|
||||
A tuple of the transformed data dict and files dict"""
|
||||
|
||||
# Duplicate data to avoid messing with what the user sent us
|
||||
data = data.copy()
|
||||
if not transform_files and not transform_data:
|
||||
return data, {}
|
||||
|
||||
files = {}
|
||||
|
||||
for attr_name, attr_class in custom_types.items():
|
||||
if attr_name not in data:
|
||||
continue
|
||||
|
||||
gitlab_attribute = attr_class(data[attr_name])
|
||||
|
||||
# if the type is FileAttribute we need to pass the data as file
|
||||
if isinstance(gitlab_attribute, types.FileAttribute) and transform_files:
|
||||
key = gitlab_attribute.get_file_name(attr_name)
|
||||
files[attr_name] = (key, data.pop(attr_name))
|
||||
continue
|
||||
|
||||
if not transform_data:
|
||||
continue
|
||||
|
||||
if isinstance(gitlab_attribute, types.GitlabAttribute):
|
||||
key, value = gitlab_attribute.get_for_api(key=attr_name)
|
||||
if key != attr_name:
|
||||
del data[attr_name]
|
||||
data[key] = value
|
||||
|
||||
return data, files
|
||||
|
||||
|
||||
def copy_dict(
|
||||
*,
|
||||
src: Dict[str, Any],
|
||||
dest: Dict[str, Any],
|
||||
) -> None:
|
||||
for k, v in src.items():
|
||||
if isinstance(v, dict):
|
||||
# NOTE(jlvillal): This provides some support for the `hash` type
|
||||
# https://docs.gitlab.com/ee/api/#hash
|
||||
# Transform dict values to new attributes. For example:
|
||||
# custom_attributes: {'foo', 'bar'} =>
|
||||
# "custom_attributes['foo']": "bar"
|
||||
for dict_k, dict_v in v.items():
|
||||
dest[f"{k}[{dict_k}]"] = dict_v
|
||||
else:
|
||||
dest[k] = v
|
||||
|
||||
|
||||
class EncodedId(str):
|
||||
"""A custom `str` class that will return the URL-encoded value of the string.
|
||||
|
||||
* Using it recursively will only url-encode the value once.
|
||||
* Can accept either `str` or `int` as input value.
|
||||
* Can be used in an f-string and output the URL-encoded string.
|
||||
|
||||
Reference to documentation on why this is necessary.
|
||||
|
||||
See::
|
||||
|
||||
https://docs.gitlab.com/ee/api/index.html#namespaced-path-encoding
|
||||
https://docs.gitlab.com/ee/api/index.html#path-parameters
|
||||
"""
|
||||
|
||||
def __new__(cls, value: Union[str, int, "EncodedId"]) -> "EncodedId":
|
||||
if isinstance(value, EncodedId):
|
||||
return value
|
||||
|
||||
if not isinstance(value, (int, str)):
|
||||
raise TypeError(f"Unsupported type received: {type(value)}")
|
||||
if isinstance(value, str):
|
||||
value = urllib.parse.quote(value, safe="")
|
||||
return super().__new__(cls, value)
|
||||
|
||||
|
||||
def remove_none_from_dict(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {k: v for k, v in data.items() if v is not None}
|
||||
|
||||
|
||||
def warn(
|
||||
message: str,
|
||||
*,
|
||||
category: Optional[Type[Warning]] = None,
|
||||
source: Optional[Any] = None,
|
||||
show_caller: bool = True,
|
||||
) -> None:
|
||||
"""This `warnings.warn` wrapper function attempts to show the location causing the
|
||||
warning in the user code that called the library.
|
||||
|
||||
It does this by walking up the stack trace to find the first frame located outside
|
||||
the `gitlab/` directory. This is helpful to users as it shows them their code that
|
||||
is causing the warning.
|
||||
"""
|
||||
# Get `stacklevel` for user code so we indicate where issue is in
|
||||
# their code.
|
||||
pg_dir = pathlib.Path(__file__).parent.resolve()
|
||||
stack = traceback.extract_stack()
|
||||
stacklevel = 1
|
||||
warning_from = ""
|
||||
for stacklevel, frame in enumerate(reversed(stack), start=1):
|
||||
warning_from = f" (python-gitlab: {frame.filename}:{frame.lineno})"
|
||||
frame_dir = str(pathlib.Path(frame.filename).parent.resolve())
|
||||
if not frame_dir.startswith(str(pg_dir)):
|
||||
break
|
||||
if show_caller:
|
||||
message += warning_from
|
||||
warnings.warn(
|
||||
message=message,
|
||||
category=category,
|
||||
stacklevel=stacklevel,
|
||||
source=source,
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WarnMessageData:
|
||||
message: str
|
||||
show_caller: bool
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user