diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 763432b529..32c2a23f1e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -314,7 +314,7 @@ Bug Fixes --------- - Fix a bug that ``pipenv run`` doesn't set environment variables correctly. `#4831 `_ -- Fix a bug that certifi can't be loaded within ``notpip``'s vendor library. This makes several objects of ``pip`` fail to be imported. `#4833 `_ +- Fix a bug that certifi can't be loaded within ``pip``'s vendor library. This makes several objects of ``pip`` fail to be imported. `#4833 `_ - Fix a bug that ``3.10.0`` can be found be python finder. `#4837 `_ Vendored Libraries diff --git a/MANIFEST.in b/MANIFEST.in index 2f302dfa1e..f59f03968d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -12,7 +12,7 @@ recursive-include pipenv/vendor vendor.txt recursive-include pipenv *.json recursive-include pipenv *.rst -include pipenv/patched/notpip/_vendor/vendor.txt +include pipenv/patched/pip/_vendor/vendor.txt include pipenv/patched/patched.txt include pipenv/vendor/Makefile include pipenv/pipenv.1 diff --git a/Makefile b/Makefile index 810f8dbc3e..90579f7257 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ CLEAN_TARGETS := $(addprefix clean-py,$(PY_VERSIONS)) DATE_STRING := $(shell date +%Y.%m.%d) THIS_MONTH_DATE := $(shell date +%Y.%m.01) NEXT_MONTH_DATE := $(shell date -d "+1 month" +%Y.%m.01) -PATCHED_PIP_VERSION := $(shell awk '/__version__/{gsub(/"/,"",$$3); print $$3}' pipenv/patched/notpip/__init__.py) +PATCHED_PIP_VERSION := $(shell awk '/__version__/{gsub(/"/,"",$$3); print $$3}' pipenv/patched/pip/__init__.py) GITDIR_STAMPFILE := $(CURDIR)/.git-checkout-dir create_git_tmpdir = $(shell mktemp -dt pipenv-vendor-XXXXXXXX 2>/dev/null || mktemp -d 2>/dev/null) write_git_tmpdir = $(file > $(GITDIR_STAMPFILE),$(create_git_tmpdir)) diff --git a/news/5196.vendor.rst b/news/5196.vendor.rst new file mode 100644 index 0000000000..636116a0a5 --- /dev/null +++ b/news/5196.vendor.rst @@ -0,0 +1,5 @@ +* Rename patched ``notpip`` to ``pip`` in order to be clear that its a patched version of pip. +* Remove the part of _post_pip_import.patch that overrode the standalone pip to be the user installed pip, +now we fully rely on our vendored and patched ``pip``, even for all types of installs. +* Vendor in the next newest version of ``pip==22.2`` +* Modify patch for ``pipdeptree`` to not use ``pip-shims`` diff --git a/pipenv/__init__.py b/pipenv/__init__.py index eb5661fcb5..b793963fa5 100644 --- a/pipenv/__init__.py +++ b/pipenv/__init__.py @@ -24,7 +24,7 @@ warnings.filterwarnings("ignore", category=UserWarning) # Load patched pip instead of system pip -os.environ["PIP_SHIMS_BASE_MODULE"] = "pipenv.patched.notpip" +os.environ["PIP_SHIMS_BASE_MODULE"] = "pipenv.patched.pip" os.environ["PIP_DISABLE_PIP_VERSION_CHECK"] = "1" # Hack to make things work better. diff --git a/pipenv/core.py b/pipenv/core.py index f521a0979e..0413e481e9 100644 --- a/pipenv/core.py +++ b/pipenv/core.py @@ -84,7 +84,7 @@ def do_clear(project): - from pipenv.patched.notpip._internal import locations + from pipenv.patched.pip._internal import locations click.secho(fix_utf8("Clearing caches..."), bold=True) try: @@ -160,10 +160,10 @@ def cleanup_virtualenv(project, bare=True): def import_requirements(project, r=None, dev=False): - from pipenv.patched.notpip._internal.req.constructors import ( + from pipenv.patched.pip._internal.req.constructors import ( install_req_from_parsed_requirement, ) - from pipenv.patched.notpip._vendor import requests + from pipenv.patched.pip._vendor import requests from pipenv.vendor.pip_shims.shims import parse_requirements # Parse requirements.txt file with Pip's parser. @@ -1141,7 +1141,7 @@ def do_lock( # Support for --keep-outdated... if keep_outdated: - from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name + from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name for section_name, section in ( ("default", project.packages), @@ -1372,7 +1372,7 @@ def get_pip_args( selective_upgrade: bool = False, src_dir: Optional[str] = None, ) -> List[str]: - from pipenv.patched.notpip._vendor.packaging.version import parse as parse_version + from pipenv.patched.pip._vendor.packaging.version import parse as parse_version arg_map = { "pre": ["--pre"], @@ -1472,7 +1472,7 @@ def pip_install( trusted_hosts=None, use_pep517=True, ): - piplogger = logging.getLogger("pipenv.patched.notpip._internal.commands.install") + piplogger = logging.getLogger("pipenv.patched.pip._internal.commands.install") if not trusted_hosts: trusted_hosts = [] trusted_hosts.extend(os.environ.get("PIP_TRUSTED_HOSTS", [])) @@ -1850,7 +1850,7 @@ def do_outdated(project, pypi_mirror=None, pre=False, clear=False): from collections import namedtuple from collections.abc import Mapping - from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name + from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name from .vendor.requirementslib.models.requirements import Requirement from .vendor.requirementslib.models.utils import get_version @@ -2293,7 +2293,7 @@ def do_uninstall( pypi_mirror=None, ctx=None, ): - from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name + from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name from .vendor.requirementslib.models.requirements import Requirement @@ -3010,7 +3010,7 @@ def do_clean( system=False, ): # Ensure that virtualenv is available. - from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name + from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name ensure_project( project, three=three, python=python, validate=False, pypi_mirror=pypi_mirror diff --git a/pipenv/environment.py b/pipenv/environment.py index 50e0c9fedf..49dae5c23b 100644 --- a/pipenv/environment.py +++ b/pipenv/environment.py @@ -14,7 +14,7 @@ import pkg_resources import pipenv -from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name +from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name from pipenv.utils.constants import is_type_checking from pipenv.utils.indexes import prepare_pip_source_args from pipenv.utils.processes import subprocess_run @@ -29,7 +29,7 @@ import pip_shims.shims import tomlkit - from pipenv.patched.notpip._vendor.packaging.version import Version + from pipenv.patched.pip._vendor.packaging.version import Version from pipenv.project import Project, TPipfile, TSource BASE_WORKING_SET = pkg_resources.WorkingSet(sys.path) @@ -545,9 +545,7 @@ def pip_version(self) -> Version: Get the pip version in the environment. Useful for knowing which args we can use when installing. """ - from pipenv.patched.notpip._vendor.packaging.version import ( - parse as parse_version, - ) + from pipenv.patched.pip._vendor.packaging.version import parse as parse_version pip = next( iter(pkg for pkg in self.get_installed_packages() if pkg.key == "pip"), None diff --git a/pipenv/patched/notpip/_internal/utils/distutils_args.py b/pipenv/patched/notpip/_internal/utils/distutils_args.py deleted file mode 100644 index e4aa5b827f..0000000000 --- a/pipenv/patched/notpip/_internal/utils/distutils_args.py +++ /dev/null @@ -1,42 +0,0 @@ -from distutils.errors import DistutilsArgError -from distutils.fancy_getopt import FancyGetopt -from typing import Dict, List - -_options = [ - ("exec-prefix=", None, ""), - ("home=", None, ""), - ("install-base=", None, ""), - ("install-data=", None, ""), - ("install-headers=", None, ""), - ("install-lib=", None, ""), - ("install-platlib=", None, ""), - ("install-purelib=", None, ""), - ("install-scripts=", None, ""), - ("prefix=", None, ""), - ("root=", None, ""), - ("user", None, ""), -] - - -# typeshed doesn't permit Tuple[str, None, str], see python/typeshed#3469. -_distutils_getopt = FancyGetopt(_options) # type: ignore - - -def parse_distutils_args(args: List[str]) -> Dict[str, str]: - """Parse provided arguments, returning an object that has the - matched arguments. - - Any unknown arguments are ignored. - """ - result = {} - for arg in args: - try: - _, match = _distutils_getopt.getopt(args=[arg]) - except DistutilsArgError: - # We don't care about any other options, which here may be - # considered unrecognized since our option list is not - # exhaustive. - pass - else: - result.update(match.__dict__) - return result diff --git a/pipenv/patched/notpip/_vendor/certifi/__init__.py b/pipenv/patched/notpip/_vendor/certifi/__init__.py deleted file mode 100644 index 8db1a0e554..0000000000 --- a/pipenv/patched/notpip/_vendor/certifi/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .core import contents, where - -__version__ = "2021.10.08" diff --git a/pipenv/patched/notpip/_vendor/chardet/__init__.py b/pipenv/patched/notpip/_vendor/chardet/__init__.py deleted file mode 100644 index 80ad2546d7..0000000000 --- a/pipenv/patched/notpip/_vendor/chardet/__init__.py +++ /dev/null @@ -1,83 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - - -from .universaldetector import UniversalDetector -from .enums import InputState -from .version import __version__, VERSION - - -__all__ = ['UniversalDetector', 'detect', 'detect_all', '__version__', 'VERSION'] - - -def detect(byte_str): - """ - Detect the encoding of the given byte string. - - :param byte_str: The byte sequence to examine. - :type byte_str: ``bytes`` or ``bytearray`` - """ - if not isinstance(byte_str, bytearray): - if not isinstance(byte_str, bytes): - raise TypeError('Expected object of type bytes or bytearray, got: ' - '{}'.format(type(byte_str))) - else: - byte_str = bytearray(byte_str) - detector = UniversalDetector() - detector.feed(byte_str) - return detector.close() - - -def detect_all(byte_str): - """ - Detect all the possible encodings of the given byte string. - - :param byte_str: The byte sequence to examine. - :type byte_str: ``bytes`` or ``bytearray`` - """ - if not isinstance(byte_str, bytearray): - if not isinstance(byte_str, bytes): - raise TypeError('Expected object of type bytes or bytearray, got: ' - '{}'.format(type(byte_str))) - else: - byte_str = bytearray(byte_str) - - detector = UniversalDetector() - detector.feed(byte_str) - detector.close() - - if detector._input_state == InputState.HIGH_BYTE: - results = [] - for prober in detector._charset_probers: - if prober.get_confidence() > detector.MINIMUM_THRESHOLD: - charset_name = prober.charset_name - lower_charset_name = prober.charset_name.lower() - # Use Windows encoding name instead of ISO-8859 if we saw any - # extra Windows-specific bytes - if lower_charset_name.startswith('iso-8859'): - if detector._has_win_bytes: - charset_name = detector.ISO_WIN_MAP.get(lower_charset_name, - charset_name) - results.append({ - 'encoding': charset_name, - 'confidence': prober.get_confidence(), - 'language': prober.language, - }) - if len(results) > 0: - return sorted(results, key=lambda result: -result['confidence']) - - return [detector.result] diff --git a/pipenv/patched/notpip/_vendor/chardet/cli/__init__.py b/pipenv/patched/notpip/_vendor/chardet/cli/__init__.py deleted file mode 100644 index 8b13789179..0000000000 --- a/pipenv/patched/notpip/_vendor/chardet/cli/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/pipenv/patched/notpip/_vendor/chardet/escsm.py b/pipenv/patched/notpip/_vendor/chardet/escsm.py deleted file mode 100644 index 0069523a04..0000000000 --- a/pipenv/patched/notpip/_vendor/chardet/escsm.py +++ /dev/null @@ -1,246 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .enums import MachineState - -HZ_CLS = ( -1,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,0,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,4,0,5,2,0, # 78 - 7f -1,1,1,1,1,1,1,1, # 80 - 87 -1,1,1,1,1,1,1,1, # 88 - 8f -1,1,1,1,1,1,1,1, # 90 - 97 -1,1,1,1,1,1,1,1, # 98 - 9f -1,1,1,1,1,1,1,1, # a0 - a7 -1,1,1,1,1,1,1,1, # a8 - af -1,1,1,1,1,1,1,1, # b0 - b7 -1,1,1,1,1,1,1,1, # b8 - bf -1,1,1,1,1,1,1,1, # c0 - c7 -1,1,1,1,1,1,1,1, # c8 - cf -1,1,1,1,1,1,1,1, # d0 - d7 -1,1,1,1,1,1,1,1, # d8 - df -1,1,1,1,1,1,1,1, # e0 - e7 -1,1,1,1,1,1,1,1, # e8 - ef -1,1,1,1,1,1,1,1, # f0 - f7 -1,1,1,1,1,1,1,1, # f8 - ff -) - -HZ_ST = ( -MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17 - 5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f - 4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27 - 4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f -) - -HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) - -HZ_SM_MODEL = {'class_table': HZ_CLS, - 'class_factor': 6, - 'state_table': HZ_ST, - 'char_len_table': HZ_CHAR_LEN_TABLE, - 'name': "HZ-GB-2312", - 'language': 'Chinese'} - -ISO2022CN_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,3,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,4,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022CN_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 -MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f -MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27 - 5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f -) - -ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0) - -ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS, - 'class_factor': 9, - 'state_table': ISO2022CN_ST, - 'char_len_table': ISO2022CN_CHAR_LEN_TABLE, - 'name': "ISO-2022-CN", - 'language': 'Chinese'} - -ISO2022JP_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,2,2, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,7,0,0,0, # 20 - 27 -3,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -6,0,4,0,8,0,0,0, # 40 - 47 -0,9,5,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022JP_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 -MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f -MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47 -) - -ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - -ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS, - 'class_factor': 10, - 'state_table': ISO2022JP_ST, - 'char_len_table': ISO2022JP_CHAR_LEN_TABLE, - 'name': "ISO-2022-JP", - 'language': 'Japanese'} - -ISO2022KR_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,3,0,0,0, # 20 - 27 -0,4,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,5,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022KR_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27 -) - -ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) - -ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS, - 'class_factor': 6, - 'state_table': ISO2022KR_ST, - 'char_len_table': ISO2022KR_CHAR_LEN_TABLE, - 'name': "ISO-2022-KR", - 'language': 'Korean'} - - diff --git a/pipenv/patched/notpip/_vendor/chardet/euctwfreq.py b/pipenv/patched/notpip/_vendor/chardet/euctwfreq.py deleted file mode 100644 index ed7a995a3a..0000000000 --- a/pipenv/patched/notpip/_vendor/chardet/euctwfreq.py +++ /dev/null @@ -1,387 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# EUCTW frequency table -# Converted from big5 work -# by Taiwan's Mandarin Promotion Council -# - -# 128 --> 0.42261 -# 256 --> 0.57851 -# 512 --> 0.74851 -# 1024 --> 0.89384 -# 2048 --> 0.97583 -# -# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 -# Random Distribution Ration = 512/(5401-512)=0.105 -# -# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR - -EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 - -# Char to FreqOrder table , -EUCTW_TABLE_SIZE = 5376 - -EUCTW_CHAR_TO_FREQ_ORDER = ( - 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 -3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 -1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 - 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 -3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 -4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 -7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 - 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 - 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 - 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 -2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 -1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 -3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 - 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 -1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 -3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 -2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 - 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 -3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 -1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 -7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 - 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 -7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 -1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 - 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 - 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 -3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 -3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 - 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 -2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 -2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 - 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 - 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 -3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 -1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 -1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 -1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 -2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 - 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 -4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 -1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 -7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 -2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 - 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 - 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 - 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 - 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 -7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 - 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 -1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 - 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 - 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 -7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 -1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 - 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 -3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 -4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 -3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 - 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 - 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 -1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 -4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 -3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 -3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 -2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 -7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 -3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 -7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 -1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 -2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 -1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 - 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 -1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 -4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 -3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 - 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 - 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 - 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 -2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 -7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 -1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 -2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 -1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 -1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 -7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 -7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 -7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 -3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 -4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 -1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 -7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 -2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 -7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 -3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 -3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 -7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 -2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 -7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 - 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 -4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 -2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 -7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 -3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 -2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 -2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 - 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 -2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 -1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 -1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 -2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 -1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 -7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 -7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 -2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 -4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 -1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 -7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 - 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 -4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 - 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 -2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 - 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 -1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 -1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 - 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 -3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 -3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 -1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 -3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 -7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 -7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 -1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 -2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 -1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 -3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 -2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 -3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 -2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 -4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 -4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 -3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 - 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 -3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 - 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 -3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 -3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 -3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 -1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 -7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 - 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 -7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 -1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 - 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 -4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 -3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 - 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 -2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 -2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 -3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 -1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 -4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 -2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 -1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 -1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 -2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 -3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 -1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 -7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 -1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 -4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 -1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 - 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 -1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 -3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 -3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 -2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 -1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 -4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 - 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 -7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 -2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 -3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 -4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 - 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 -7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 -7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 -1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 -4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 -3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 -2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 -3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 -3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 -2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 -1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 -4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 -3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 -3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 -2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 -4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 -7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 -3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 -2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 -3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 -1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 -2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 -3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 -4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 -2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 -2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 -7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 -1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 -2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 -1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 -3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 -4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 -2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 -3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 -3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 -2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 -4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 -2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 -3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 -4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 -7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 -3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 - 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 -1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 -4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 -1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 -4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 -7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 - 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 -7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 -2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 -1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 -1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 -3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 - 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 - 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 - 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 -3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 -2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 - 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 -7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 -1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 -3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 -7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 -1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 -7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 -4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 -1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 -2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 -2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 -4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 - 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 - 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 -3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 -3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 -1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 -2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 -7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 -1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 -1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 -3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 - 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 -1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 -4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 -7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 -2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 -3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 - 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 -1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 -2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 -2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 -7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 -7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 -7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 -2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 -2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 -1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 -4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 -3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 -3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 -4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 -4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 -2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 -2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 -7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 -4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 -7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 -2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 -1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 -3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 -4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 -2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 - 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 -2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 -1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 -2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 -2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 -4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 -7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 -1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 -3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 -7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 -1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 -8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 -2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 -8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 -2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 -2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 -8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 -8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 -8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 - 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 -8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 -4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 -3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 -8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 -1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 -8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 - 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 -1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 - 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 -4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 -1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 -4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 -1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 - 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 -3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 -4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 -8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 - 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 -3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 - 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 -2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 -) - diff --git a/pipenv/patched/notpip/_vendor/chardet/jpcntx.py b/pipenv/patched/notpip/_vendor/chardet/jpcntx.py deleted file mode 100644 index 20044e4bc8..0000000000 --- a/pipenv/patched/notpip/_vendor/chardet/jpcntx.py +++ /dev/null @@ -1,233 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - - -# This is hiragana 2-char sequence table, the number in each cell represents its frequency category -jp2CharContext = ( -(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), -(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), -(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), -(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), -(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), -(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), -(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), -(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), -(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), -(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), -(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), -(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), -(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), -(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), -(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), -(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), -(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), -(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), -(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), -(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), -(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), -(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), -(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), -(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), -(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), -(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), -(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), -(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), -(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), -(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), -(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), -(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), -(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), -(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), -(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), -(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), -(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), -(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), -(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), -(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), -(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), -(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), -(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), -(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), -(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), -(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), -(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), -(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), -(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), -(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), -(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), -(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), -(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), -(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), -(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), -(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), -(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), -(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), -(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), -(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), -(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), -(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), -(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), -(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), -(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), -(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), -(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), -(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), -(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), -(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), -(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), -(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), -(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), -(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), -) - -class JapaneseContextAnalysis(object): - NUM_OF_CATEGORY = 6 - DONT_KNOW = -1 - ENOUGH_REL_THRESHOLD = 100 - MAX_REL_THRESHOLD = 1000 - MINIMUM_DATA_THRESHOLD = 4 - - def __init__(self): - self._total_rel = None - self._rel_sample = None - self._need_to_skip_char_num = None - self._last_char_order = None - self._done = None - self.reset() - - def reset(self): - self._total_rel = 0 # total sequence received - # category counters, each integer counts sequence in its category - self._rel_sample = [0] * self.NUM_OF_CATEGORY - # if last byte in current buffer is not the last byte of a character, - # we need to know how many bytes to skip in next buffer - self._need_to_skip_char_num = 0 - self._last_char_order = -1 # The order of previous char - # If this flag is set to True, detection is done and conclusion has - # been made - self._done = False - - def feed(self, byte_str, num_bytes): - if self._done: - return - - # The buffer we got is byte oriented, and a character may span in more than one - # buffers. In case the last one or two byte in last buffer is not - # complete, we record how many byte needed to complete that character - # and skip these bytes here. We can choose to record those bytes as - # well and analyse the character once it is complete, but since a - # character will not make much difference, by simply skipping - # this character will simply our logic and improve performance. - i = self._need_to_skip_char_num - while i < num_bytes: - order, char_len = self.get_order(byte_str[i:i + 2]) - i += char_len - if i > num_bytes: - self._need_to_skip_char_num = i - num_bytes - self._last_char_order = -1 - else: - if (order != -1) and (self._last_char_order != -1): - self._total_rel += 1 - if self._total_rel > self.MAX_REL_THRESHOLD: - self._done = True - break - self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1 - self._last_char_order = order - - def got_enough_data(self): - return self._total_rel > self.ENOUGH_REL_THRESHOLD - - def get_confidence(self): - # This is just one way to calculate confidence. It works well for me. - if self._total_rel > self.MINIMUM_DATA_THRESHOLD: - return (self._total_rel - self._rel_sample[0]) / self._total_rel - else: - return self.DONT_KNOW - - def get_order(self, byte_str): - return -1, 1 - -class SJISContextAnalysis(JapaneseContextAnalysis): - def __init__(self): - super(SJISContextAnalysis, self).__init__() - self._charset_name = "SHIFT_JIS" - - @property - def charset_name(self): - return self._charset_name - - def get_order(self, byte_str): - if not byte_str: - return -1, 1 - # find out current char's byte length - first_char = byte_str[0] - if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC): - char_len = 2 - if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): - self._charset_name = "CP932" - else: - char_len = 1 - - # return its order if it is hiragana - if len(byte_str) > 1: - second_char = byte_str[1] - if (first_char == 202) and (0x9F <= second_char <= 0xF1): - return second_char - 0x9F, char_len - - return -1, char_len - -class EUCJPContextAnalysis(JapaneseContextAnalysis): - def get_order(self, byte_str): - if not byte_str: - return -1, 1 - # find out current char's byte length - first_char = byte_str[0] - if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): - char_len = 2 - elif first_char == 0x8F: - char_len = 3 - else: - char_len = 1 - - # return its order if it is hiragana - if len(byte_str) > 1: - second_char = byte_str[1] - if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): - return second_char - 0xA1, char_len - - return -1, char_len - - diff --git a/pipenv/patched/notpip/_vendor/chardet/mbcssm.py b/pipenv/patched/notpip/_vendor/chardet/mbcssm.py deleted file mode 100644 index 8360d0f284..0000000000 --- a/pipenv/patched/notpip/_vendor/chardet/mbcssm.py +++ /dev/null @@ -1,572 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .enums import MachineState - -# BIG5 - -BIG5_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 4,4,4,4,4,4,4,4, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 4,3,3,3,3,3,3,3, # a0 - a7 - 3,3,3,3,3,3,3,3, # a8 - af - 3,3,3,3,3,3,3,3, # b0 - b7 - 3,3,3,3,3,3,3,3, # b8 - bf - 3,3,3,3,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff -) - -BIG5_ST = ( - MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 - MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f - MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17 -) - -BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) - -BIG5_SM_MODEL = {'class_table': BIG5_CLS, - 'class_factor': 5, - 'state_table': BIG5_ST, - 'char_len_table': BIG5_CHAR_LEN_TABLE, - 'name': 'Big5'} - -# CP949 - -CP949_CLS = ( - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f - 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f - 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f - 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f - 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f - 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f - 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f - 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f - 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af - 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf - 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff -) - -CP949_ST = ( -#cls= 0 1 2 3 4 5 6 7 8 9 # previous state = - MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME - MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3 - MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4 - MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5 - MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6 -) - -CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) - -CP949_SM_MODEL = {'class_table': CP949_CLS, - 'class_factor': 10, - 'state_table': CP949_ST, - 'char_len_table': CP949_CHAR_LEN_TABLE, - 'name': 'CP949'} - -# EUC-JP - -EUCJP_CLS = ( - 4,4,4,4,4,4,4,4, # 00 - 07 - 4,4,4,4,4,4,5,5, # 08 - 0f - 4,4,4,4,4,4,4,4, # 10 - 17 - 4,4,4,5,4,4,4,4, # 18 - 1f - 4,4,4,4,4,4,4,4, # 20 - 27 - 4,4,4,4,4,4,4,4, # 28 - 2f - 4,4,4,4,4,4,4,4, # 30 - 37 - 4,4,4,4,4,4,4,4, # 38 - 3f - 4,4,4,4,4,4,4,4, # 40 - 47 - 4,4,4,4,4,4,4,4, # 48 - 4f - 4,4,4,4,4,4,4,4, # 50 - 57 - 4,4,4,4,4,4,4,4, # 58 - 5f - 4,4,4,4,4,4,4,4, # 60 - 67 - 4,4,4,4,4,4,4,4, # 68 - 6f - 4,4,4,4,4,4,4,4, # 70 - 77 - 4,4,4,4,4,4,4,4, # 78 - 7f - 5,5,5,5,5,5,5,5, # 80 - 87 - 5,5,5,5,5,5,1,3, # 88 - 8f - 5,5,5,5,5,5,5,5, # 90 - 97 - 5,5,5,5,5,5,5,5, # 98 - 9f - 5,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,0,5 # f8 - ff -) - -EUCJP_ST = ( - 3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 - MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f - 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27 -) - -EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) - -EUCJP_SM_MODEL = {'class_table': EUCJP_CLS, - 'class_factor': 6, - 'state_table': EUCJP_ST, - 'char_len_table': EUCJP_CHAR_LEN_TABLE, - 'name': 'EUC-JP'} - -# EUC-KR - -EUCKR_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,3,3,3, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,3,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 2,2,2,2,2,2,2,2, # e0 - e7 - 2,2,2,2,2,2,2,2, # e8 - ef - 2,2,2,2,2,2,2,2, # f0 - f7 - 2,2,2,2,2,2,2,0 # f8 - ff -) - -EUCKR_ST = ( - MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f -) - -EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) - -EUCKR_SM_MODEL = {'class_table': EUCKR_CLS, - 'class_factor': 4, - 'state_table': EUCKR_ST, - 'char_len_table': EUCKR_CHAR_LEN_TABLE, - 'name': 'EUC-KR'} - -# EUC-TW - -EUCTW_CLS = ( - 2,2,2,2,2,2,2,2, # 00 - 07 - 2,2,2,2,2,2,0,0, # 08 - 0f - 2,2,2,2,2,2,2,2, # 10 - 17 - 2,2,2,0,2,2,2,2, # 18 - 1f - 2,2,2,2,2,2,2,2, # 20 - 27 - 2,2,2,2,2,2,2,2, # 28 - 2f - 2,2,2,2,2,2,2,2, # 30 - 37 - 2,2,2,2,2,2,2,2, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,2, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,6,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,3,4,4,4,4,4,4, # a0 - a7 - 5,5,1,1,1,1,1,1, # a8 - af - 1,1,1,1,1,1,1,1, # b0 - b7 - 1,1,1,1,1,1,1,1, # b8 - bf - 1,1,3,1,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff -) - -EUCTW_ST = ( - MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17 - MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f - 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27 - MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f -) - -EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) - -EUCTW_SM_MODEL = {'class_table': EUCTW_CLS, - 'class_factor': 7, - 'state_table': EUCTW_ST, - 'char_len_table': EUCTW_CHAR_LEN_TABLE, - 'name': 'x-euc-tw'} - -# GB2312 - -GB2312_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 3,3,3,3,3,3,3,3, # 30 - 37 - 3,3,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,4, # 78 - 7f - 5,6,6,6,6,6,6,6, # 80 - 87 - 6,6,6,6,6,6,6,6, # 88 - 8f - 6,6,6,6,6,6,6,6, # 90 - 97 - 6,6,6,6,6,6,6,6, # 98 - 9f - 6,6,6,6,6,6,6,6, # a0 - a7 - 6,6,6,6,6,6,6,6, # a8 - af - 6,6,6,6,6,6,6,6, # b0 - b7 - 6,6,6,6,6,6,6,6, # b8 - bf - 6,6,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 6,6,6,6,6,6,6,6, # e0 - e7 - 6,6,6,6,6,6,6,6, # e8 - ef - 6,6,6,6,6,6,6,6, # f0 - f7 - 6,6,6,6,6,6,6,0 # f8 - ff -) - -GB2312_ST = ( - MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17 - 4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f - MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27 - MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f -) - -# To be accurate, the length of class 6 can be either 2 or 4. -# But it is not necessary to discriminate between the two since -# it is used for frequency analysis only, and we are validating -# each code range there as well. So it is safe to set it to be -# 2 here. -GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) - -GB2312_SM_MODEL = {'class_table': GB2312_CLS, - 'class_factor': 7, - 'state_table': GB2312_ST, - 'char_len_table': GB2312_CHAR_LEN_TABLE, - 'name': 'GB2312'} - -# Shift_JIS - -SJIS_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,2,2,3, # 80 - 87 - 3,3,3,3,3,3,3,3, # 88 - 8f - 3,3,3,3,3,3,3,3, # 90 - 97 - 3,3,3,3,3,3,3,3, # 98 - 9f - #0xa0 is illegal in sjis encoding, but some pages does - #contain such byte. We need to be more error forgiven. - 2,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,4,4,4, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,0,0,0) # f8 - ff - - -SJIS_ST = ( - MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17 -) - -SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) - -SJIS_SM_MODEL = {'class_table': SJIS_CLS, - 'class_factor': 6, - 'state_table': SJIS_ST, - 'char_len_table': SJIS_CHAR_LEN_TABLE, - 'name': 'Shift_JIS'} - -# UCS2-BE - -UCS2BE_CLS = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff -) - -UCS2BE_ST = ( - 5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f - MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17 - 6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f - 6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27 - 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f - 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37 -) - -UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) - -UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS, - 'class_factor': 6, - 'state_table': UCS2BE_ST, - 'char_len_table': UCS2BE_CHAR_LEN_TABLE, - 'name': 'UTF-16BE'} - -# UCS2-LE - -UCS2LE_CLS = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff -) - -UCS2LE_ST = ( - 6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f - MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17 - 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f - 7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27 - 5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f - 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37 -) - -UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) - -UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS, - 'class_factor': 6, - 'state_table': UCS2LE_ST, - 'char_len_table': UCS2LE_CHAR_LEN_TABLE, - 'name': 'UTF-16LE'} - -# UTF-8 - -UTF8_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 2,2,2,2,3,3,3,3, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 5,5,5,5,5,5,5,5, # a0 - a7 - 5,5,5,5,5,5,5,5, # a8 - af - 5,5,5,5,5,5,5,5, # b0 - b7 - 5,5,5,5,5,5,5,5, # b8 - bf - 0,0,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 7,8,8,8,8,8,8,8, # e0 - e7 - 8,8,8,8,8,9,8,8, # e8 - ef - 10,11,11,11,11,11,11,11, # f0 - f7 - 12,13,13,13,14,15,0,0 # f8 - ff -) - -UTF8_ST = ( - MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07 - 9, 11, 8, 7, 6, 5, 4, 3,#08-0f - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27 - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f - MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f - MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f - MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f - MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af - MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf - MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7 - MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf -) - -UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) - -UTF8_SM_MODEL = {'class_table': UTF8_CLS, - 'class_factor': 16, - 'state_table': UTF8_ST, - 'char_len_table': UTF8_CHAR_LEN_TABLE, - 'name': 'UTF-8'} diff --git a/pipenv/patched/notpip/_vendor/chardet/metadata/languages.py b/pipenv/patched/notpip/_vendor/chardet/metadata/languages.py deleted file mode 100644 index 3237d5abf6..0000000000 --- a/pipenv/patched/notpip/_vendor/chardet/metadata/languages.py +++ /dev/null @@ -1,310 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -Metadata about languages used by our model training code for our -SingleByteCharSetProbers. Could be used for other things in the future. - -This code is based on the language metadata from the uchardet project. -""" -from __future__ import absolute_import, print_function - -from string import ascii_letters - - -# TODO: Add Ukranian (KOI8-U) - -class Language(object): - """Metadata about a language useful for training models - - :ivar name: The human name for the language, in English. - :type name: str - :ivar iso_code: 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise, - or use another catalog as a last resort. - :type iso_code: str - :ivar use_ascii: Whether or not ASCII letters should be included in trained - models. - :type use_ascii: bool - :ivar charsets: The charsets we want to support and create data for. - :type charsets: list of str - :ivar alphabet: The characters in the language's alphabet. If `use_ascii` is - `True`, you only need to add those not in the ASCII set. - :type alphabet: str - :ivar wiki_start_pages: The Wikipedia pages to start from if we're crawling - Wikipedia for training data. - :type wiki_start_pages: list of str - """ - def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None, - alphabet=None, wiki_start_pages=None): - super(Language, self).__init__() - self.name = name - self.iso_code = iso_code - self.use_ascii = use_ascii - self.charsets = charsets - if self.use_ascii: - if alphabet: - alphabet += ascii_letters - else: - alphabet = ascii_letters - elif not alphabet: - raise ValueError('Must supply alphabet if use_ascii is False') - self.alphabet = ''.join(sorted(set(alphabet))) if alphabet else None - self.wiki_start_pages = wiki_start_pages - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, - ', '.join('{}={!r}'.format(k, v) - for k, v in self.__dict__.items() - if not k.startswith('_'))) - - -LANGUAGES = {'Arabic': Language(name='Arabic', - iso_code='ar', - use_ascii=False, - # We only support encodings that use isolated - # forms, because the current recommendation is - # that the rendering system handles presentation - # forms. This means we purposefully skip IBM864. - charsets=['ISO-8859-6', 'WINDOWS-1256', - 'CP720', 'CP864'], - alphabet=u'ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ', - wiki_start_pages=[u'الصفحة_الرئيسية']), - 'Belarusian': Language(name='Belarusian', - iso_code='be', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'IBM866', 'MacCyrillic'], - alphabet=(u'АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ' - u'абвгдеёжзійклмнопрстуўфхцчшыьэюяʼ'), - wiki_start_pages=[u'Галоўная_старонка']), - 'Bulgarian': Language(name='Bulgarian', - iso_code='bg', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'IBM855'], - alphabet=(u'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯ' - u'абвгдежзийклмнопрстуфхцчшщъьюя'), - wiki_start_pages=[u'Начална_страница']), - 'Czech': Language(name='Czech', - iso_code='cz', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ', - wiki_start_pages=[u'Hlavní_strana']), - 'Danish': Language(name='Danish', - iso_code='da', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'æøåÆØÅ', - wiki_start_pages=[u'Forside']), - 'German': Language(name='German', - iso_code='de', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - alphabet=u'äöüßÄÖÜ', - wiki_start_pages=[u'Wikipedia:Hauptseite']), - 'Greek': Language(name='Greek', - iso_code='el', - use_ascii=False, - charsets=['ISO-8859-7', 'WINDOWS-1253'], - alphabet=(u'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ' - u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ'), - wiki_start_pages=[u'Πύλη:Κύρια']), - 'English': Language(name='English', - iso_code='en', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - wiki_start_pages=[u'Main_Page']), - 'Esperanto': Language(name='Esperanto', - iso_code='eo', - # Q, W, X, and Y not used at all - use_ascii=False, - charsets=['ISO-8859-3'], - alphabet=(u'abcĉdefgĝhĥijĵklmnoprsŝtuŭvz' - u'ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ'), - wiki_start_pages=[u'Vikipedio:Ĉefpaĝo']), - 'Spanish': Language(name='Spanish', - iso_code='es', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ñáéíóúüÑÁÉÍÓÚÜ', - wiki_start_pages=[u'Wikipedia:Portada']), - 'Estonian': Language(name='Estonian', - iso_code='et', - use_ascii=False, - charsets=['ISO-8859-4', 'ISO-8859-13', - 'WINDOWS-1257'], - # C, F, Š, Q, W, X, Y, Z, Ž are only for - # loanwords - alphabet=(u'ABDEGHIJKLMNOPRSTUVÕÄÖÜ' - u'abdeghijklmnoprstuvõäöü'), - wiki_start_pages=[u'Esileht']), - 'Finnish': Language(name='Finnish', - iso_code='fi', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÅÄÖŠŽåäöšž', - wiki_start_pages=[u'Wikipedia:Etusivu']), - 'French': Language(name='French', - iso_code='fr', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ', - wiki_start_pages=[u'Wikipédia:Accueil_principal', - u'Bœuf (animal)']), - 'Hebrew': Language(name='Hebrew', - iso_code='he', - use_ascii=False, - charsets=['ISO-8859-8', 'WINDOWS-1255'], - alphabet=u'אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ', - wiki_start_pages=[u'עמוד_ראשי']), - 'Croatian': Language(name='Croatian', - iso_code='hr', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcčćdđefghijklmnoprsštuvzž' - u'ABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ'), - wiki_start_pages=[u'Glavna_stranica']), - 'Hungarian': Language(name='Hungarian', - iso_code='hu', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcdefghijklmnoprstuvzáéíóöőúüű' - u'ABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ'), - wiki_start_pages=[u'Kezdőlap']), - 'Italian': Language(name='Italian', - iso_code='it', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÀÈÉÌÒÓÙàèéìòóù', - wiki_start_pages=[u'Pagina_principale']), - 'Lithuanian': Language(name='Lithuanian', - iso_code='lt', - use_ascii=False, - charsets=['ISO-8859-13', 'WINDOWS-1257', - 'ISO-8859-4'], - # Q, W, and X not used at all - alphabet=(u'AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽ' - u'aąbcčdeęėfghiįyjklmnoprsštuųūvzž'), - wiki_start_pages=[u'Pagrindinis_puslapis']), - 'Latvian': Language(name='Latvian', - iso_code='lv', - use_ascii=False, - charsets=['ISO-8859-13', 'WINDOWS-1257', - 'ISO-8859-4'], - # Q, W, X, Y are only for loanwords - alphabet=(u'AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽ' - u'aābcčdeēfgģhiījkķlļmnņoprsštuūvzž'), - wiki_start_pages=[u'Sākumlapa']), - 'Macedonian': Language(name='Macedonian', - iso_code='mk', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'MacCyrillic', 'IBM855'], - alphabet=(u'АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ' - u'абвгдѓежзѕијклљмнњопрстќуфхцчџш'), - wiki_start_pages=[u'Главна_страница']), - 'Dutch': Language(name='Dutch', - iso_code='nl', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - wiki_start_pages=[u'Hoofdpagina']), - 'Polish': Language(name='Polish', - iso_code='pl', - # Q and X are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ' - u'aąbcćdeęfghijklłmnńoóprsśtuwyzźż'), - wiki_start_pages=[u'Wikipedia:Strona_główna']), - 'Portuguese': Language(name='Portuguese', - iso_code='pt', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú', - wiki_start_pages=[u'Wikipédia:Página_principal']), - 'Romanian': Language(name='Romanian', - iso_code='ro', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'ăâîșțĂÂÎȘȚ', - wiki_start_pages=[u'Pagina_principală']), - 'Russian': Language(name='Russian', - iso_code='ru', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'KOI8-R', 'MacCyrillic', 'IBM866', - 'IBM855'], - alphabet=(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя' - u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'), - wiki_start_pages=[u'Заглавная_страница']), - 'Slovak': Language(name='Slovak', - iso_code='sk', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ', - wiki_start_pages=[u'Hlavná_stránka']), - 'Slovene': Language(name='Slovene', - iso_code='sl', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcčdefghijklmnoprsštuvzž' - u'ABCČDEFGHIJKLMNOPRSŠTUVZŽ'), - wiki_start_pages=[u'Glavna_stran']), - # Serbian can be written in both Latin and Cyrillic, but there's no - # simple way to get the Latin alphabet pages from Wikipedia through - # the API, so for now we just support Cyrillic. - 'Serbian': Language(name='Serbian', - iso_code='sr', - alphabet=(u'АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ' - u'абвгдђежзијклљмнњопрстћуфхцчџш'), - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'MacCyrillic', 'IBM855'], - wiki_start_pages=[u'Главна_страна']), - 'Thai': Language(name='Thai', - iso_code='th', - use_ascii=False, - charsets=['ISO-8859-11', 'TIS-620', 'CP874'], - alphabet=u'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛', - wiki_start_pages=[u'หน้าหลัก']), - 'Turkish': Language(name='Turkish', - iso_code='tr', - # Q, W, and X are not used by Turkish - use_ascii=False, - charsets=['ISO-8859-3', 'ISO-8859-9', - 'WINDOWS-1254'], - alphabet=(u'abcçdefgğhıijklmnoöprsştuüvyzâîû' - u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ'), - wiki_start_pages=[u'Ana_Sayfa']), - 'Vietnamese': Language(name='Vietnamese', - iso_code='vi', - use_ascii=False, - # Windows-1258 is the only common 8-bit - # Vietnamese encoding supported by Python. - # From Wikipedia: - # For systems that lack support for Unicode, - # dozens of 8-bit Vietnamese code pages are - # available.[1] The most common are VISCII - # (TCVN 5712:1993), VPS, and Windows-1258.[3] - # Where ASCII is required, such as when - # ensuring readability in plain text e-mail, - # Vietnamese letters are often encoded - # according to Vietnamese Quoted-Readable - # (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4] - # though usage of either variable-width - # scheme has declined dramatically following - # the adoption of Unicode on the World Wide - # Web. - charsets=['WINDOWS-1258'], - alphabet=(u'aăâbcdđeêghiklmnoôơpqrstuưvxy' - u'AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY'), - wiki_start_pages=[u'Chữ_Quốc_ngữ']), - } diff --git a/pipenv/patched/notpip/_vendor/distlib/_backport/__init__.py b/pipenv/patched/notpip/_vendor/distlib/_backport/__init__.py deleted file mode 100644 index f7dbf4c9aa..0000000000 --- a/pipenv/patched/notpip/_vendor/distlib/_backport/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Modules copied from Python 3 standard libraries, for internal use only. - -Individual classes and functions are found in d2._backport.misc. Intended -usage is to always import things missing from 3.1 from that module: the -built-in/stdlib objects will be used if found. -""" diff --git a/pipenv/patched/notpip/_vendor/distlib/_backport/misc.py b/pipenv/patched/notpip/_vendor/distlib/_backport/misc.py deleted file mode 100644 index cfb318d34f..0000000000 --- a/pipenv/patched/notpip/_vendor/distlib/_backport/misc.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -"""Backports for individual classes and functions.""" - -import os -import sys - -__all__ = ['cache_from_source', 'callable', 'fsencode'] - - -try: - from imp import cache_from_source -except ImportError: - def cache_from_source(py_file, debug=__debug__): - ext = debug and 'c' or 'o' - return py_file + ext - - -try: - callable = callable -except NameError: - from collections import Callable - - def callable(obj): - return isinstance(obj, Callable) - - -try: - fsencode = os.fsencode -except AttributeError: - def fsencode(filename): - if isinstance(filename, bytes): - return filename - elif isinstance(filename, str): - return filename.encode(sys.getfilesystemencoding()) - else: - raise TypeError("expect bytes or str, not %s" % - type(filename).__name__) diff --git a/pipenv/patched/notpip/_vendor/distlib/_backport/shutil.py b/pipenv/patched/notpip/_vendor/distlib/_backport/shutil.py deleted file mode 100644 index 10ed362539..0000000000 --- a/pipenv/patched/notpip/_vendor/distlib/_backport/shutil.py +++ /dev/null @@ -1,764 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -"""Utility functions for copying and archiving files and directory trees. - -XXX The functions here don't copy the resource fork or other metadata on Mac. - -""" - -import os -import sys -import stat -from os.path import abspath -import fnmatch -try: - from collections.abc import Callable -except ImportError: - from collections import Callable -import errno -from . import tarfile - -try: - import bz2 - _BZ2_SUPPORTED = True -except ImportError: - _BZ2_SUPPORTED = False - -try: - from pwd import getpwnam -except ImportError: - getpwnam = None - -try: - from grp import getgrnam -except ImportError: - getgrnam = None - -__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", - "copytree", "move", "rmtree", "Error", "SpecialFileError", - "ExecError", "make_archive", "get_archive_formats", - "register_archive_format", "unregister_archive_format", - "get_unpack_formats", "register_unpack_format", - "unregister_unpack_format", "unpack_archive", "ignore_patterns"] - -class Error(EnvironmentError): - pass - -class SpecialFileError(EnvironmentError): - """Raised when trying to do a kind of operation (e.g. copying) which is - not supported on a special file (e.g. a named pipe)""" - -class ExecError(EnvironmentError): - """Raised when a command could not be executed""" - -class ReadError(EnvironmentError): - """Raised when an archive cannot be read""" - -class RegistryError(Exception): - """Raised when a registry operation with the archiving - and unpacking registries fails""" - - -try: - WindowsError -except NameError: - WindowsError = None - -def copyfileobj(fsrc, fdst, length=16*1024): - """copy data from file-like object fsrc to file-like object fdst""" - while 1: - buf = fsrc.read(length) - if not buf: - break - fdst.write(buf) - -def _samefile(src, dst): - # Macintosh, Unix. - if hasattr(os.path, 'samefile'): - try: - return os.path.samefile(src, dst) - except OSError: - return False - - # All other platforms: check for same pathname. - return (os.path.normcase(os.path.abspath(src)) == - os.path.normcase(os.path.abspath(dst))) - -def copyfile(src, dst): - """Copy data from src to dst""" - if _samefile(src, dst): - raise Error("`%s` and `%s` are the same file" % (src, dst)) - - for fn in [src, dst]: - try: - st = os.stat(fn) - except OSError: - # File most likely does not exist - pass - else: - # XXX What about other special files? (sockets, devices...) - if stat.S_ISFIFO(st.st_mode): - raise SpecialFileError("`%s` is a named pipe" % fn) - - with open(src, 'rb') as fsrc: - with open(dst, 'wb') as fdst: - copyfileobj(fsrc, fdst) - -def copymode(src, dst): - """Copy mode bits from src to dst""" - if hasattr(os, 'chmod'): - st = os.stat(src) - mode = stat.S_IMODE(st.st_mode) - os.chmod(dst, mode) - -def copystat(src, dst): - """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" - st = os.stat(src) - mode = stat.S_IMODE(st.st_mode) - if hasattr(os, 'utime'): - os.utime(dst, (st.st_atime, st.st_mtime)) - if hasattr(os, 'chmod'): - os.chmod(dst, mode) - if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): - try: - os.chflags(dst, st.st_flags) - except OSError as why: - if (not hasattr(errno, 'EOPNOTSUPP') or - why.errno != errno.EOPNOTSUPP): - raise - -def copy(src, dst): - """Copy data and mode bits ("cp src dst"). - - The destination may be a directory. - - """ - if os.path.isdir(dst): - dst = os.path.join(dst, os.path.basename(src)) - copyfile(src, dst) - copymode(src, dst) - -def copy2(src, dst): - """Copy data and all stat info ("cp -p src dst"). - - The destination may be a directory. - - """ - if os.path.isdir(dst): - dst = os.path.join(dst, os.path.basename(src)) - copyfile(src, dst) - copystat(src, dst) - -def ignore_patterns(*patterns): - """Function that can be used as copytree() ignore parameter. - - Patterns is a sequence of glob-style patterns - that are used to exclude files""" - def _ignore_patterns(path, names): - ignored_names = [] - for pattern in patterns: - ignored_names.extend(fnmatch.filter(names, pattern)) - return set(ignored_names) - return _ignore_patterns - -def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, - ignore_dangling_symlinks=False): - """Recursively copy a directory tree. - - The destination directory must not already exist. - If exception(s) occur, an Error is raised with a list of reasons. - - If the optional symlinks flag is true, symbolic links in the - source tree result in symbolic links in the destination tree; if - it is false, the contents of the files pointed to by symbolic - links are copied. If the file pointed by the symlink doesn't - exist, an exception will be added in the list of errors raised in - an Error exception at the end of the copy process. - - You can set the optional ignore_dangling_symlinks flag to true if you - want to silence this exception. Notice that this has no effect on - platforms that don't support os.symlink. - - The optional ignore argument is a callable. If given, it - is called with the `src` parameter, which is the directory - being visited by copytree(), and `names` which is the list of - `src` contents, as returned by os.listdir(): - - callable(src, names) -> ignored_names - - Since copytree() is called recursively, the callable will be - called once for each directory that is copied. It returns a - list of names relative to the `src` directory that should - not be copied. - - The optional copy_function argument is a callable that will be used - to copy each file. It will be called with the source path and the - destination path as arguments. By default, copy2() is used, but any - function that supports the same signature (like copy()) can be used. - - """ - names = os.listdir(src) - if ignore is not None: - ignored_names = ignore(src, names) - else: - ignored_names = set() - - os.makedirs(dst) - errors = [] - for name in names: - if name in ignored_names: - continue - srcname = os.path.join(src, name) - dstname = os.path.join(dst, name) - try: - if os.path.islink(srcname): - linkto = os.readlink(srcname) - if symlinks: - os.symlink(linkto, dstname) - else: - # ignore dangling symlink if the flag is on - if not os.path.exists(linkto) and ignore_dangling_symlinks: - continue - # otherwise let the copy occurs. copy2 will raise an error - copy_function(srcname, dstname) - elif os.path.isdir(srcname): - copytree(srcname, dstname, symlinks, ignore, copy_function) - else: - # Will raise a SpecialFileError for unsupported file types - copy_function(srcname, dstname) - # catch the Error from the recursive copytree so that we can - # continue with other files - except Error as err: - errors.extend(err.args[0]) - except EnvironmentError as why: - errors.append((srcname, dstname, str(why))) - try: - copystat(src, dst) - except OSError as why: - if WindowsError is not None and isinstance(why, WindowsError): - # Copying file access times may fail on Windows - pass - else: - errors.extend((src, dst, str(why))) - if errors: - raise Error(errors) - -def rmtree(path, ignore_errors=False, onerror=None): - """Recursively delete a directory tree. - - If ignore_errors is set, errors are ignored; otherwise, if onerror - is set, it is called to handle the error with arguments (func, - path, exc_info) where func is os.listdir, os.remove, or os.rmdir; - path is the argument to that function that caused it to fail; and - exc_info is a tuple returned by sys.exc_info(). If ignore_errors - is false and onerror is None, an exception is raised. - - """ - if ignore_errors: - def onerror(*args): - pass - elif onerror is None: - def onerror(*args): - raise - try: - if os.path.islink(path): - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError: - onerror(os.path.islink, path, sys.exc_info()) - # can't continue even if onerror hook returns - return - names = [] - try: - names = os.listdir(path) - except os.error: - onerror(os.listdir, path, sys.exc_info()) - for name in names: - fullname = os.path.join(path, name) - try: - mode = os.lstat(fullname).st_mode - except os.error: - mode = 0 - if stat.S_ISDIR(mode): - rmtree(fullname, ignore_errors, onerror) - else: - try: - os.remove(fullname) - except os.error: - onerror(os.remove, fullname, sys.exc_info()) - try: - os.rmdir(path) - except os.error: - onerror(os.rmdir, path, sys.exc_info()) - - -def _basename(path): - # A basename() variant which first strips the trailing slash, if present. - # Thus we always get the last component of the path, even for directories. - return os.path.basename(path.rstrip(os.path.sep)) - -def move(src, dst): - """Recursively move a file or directory to another location. This is - similar to the Unix "mv" command. - - If the destination is a directory or a symlink to a directory, the source - is moved inside the directory. The destination path must not already - exist. - - If the destination already exists but is not a directory, it may be - overwritten depending on os.rename() semantics. - - If the destination is on our current filesystem, then rename() is used. - Otherwise, src is copied to the destination and then removed. - A lot more could be done here... A look at a mv.c shows a lot of - the issues this implementation glosses over. - - """ - real_dst = dst - if os.path.isdir(dst): - if _samefile(src, dst): - # We might be on a case insensitive filesystem, - # perform the rename anyway. - os.rename(src, dst) - return - - real_dst = os.path.join(dst, _basename(src)) - if os.path.exists(real_dst): - raise Error("Destination path '%s' already exists" % real_dst) - try: - os.rename(src, real_dst) - except OSError: - if os.path.isdir(src): - if _destinsrc(src, dst): - raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst)) - copytree(src, real_dst, symlinks=True) - rmtree(src) - else: - copy2(src, real_dst) - os.unlink(src) - -def _destinsrc(src, dst): - src = abspath(src) - dst = abspath(dst) - if not src.endswith(os.path.sep): - src += os.path.sep - if not dst.endswith(os.path.sep): - dst += os.path.sep - return dst.startswith(src) - -def _get_gid(name): - """Returns a gid, given a group name.""" - if getgrnam is None or name is None: - return None - try: - result = getgrnam(name) - except KeyError: - result = None - if result is not None: - return result[2] - return None - -def _get_uid(name): - """Returns an uid, given a user name.""" - if getpwnam is None or name is None: - return None - try: - result = getpwnam(name) - except KeyError: - result = None - if result is not None: - return result[2] - return None - -def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, - owner=None, group=None, logger=None): - """Create a (possibly compressed) tar file from all the files under - 'base_dir'. - - 'compress' must be "gzip" (the default), "bzip2", or None. - - 'owner' and 'group' can be used to define an owner and a group for the - archive that is being built. If not provided, the current owner and group - will be used. - - The output tar file will be named 'base_name' + ".tar", possibly plus - the appropriate compression extension (".gz", or ".bz2"). - - Returns the output filename. - """ - tar_compression = {'gzip': 'gz', None: ''} - compress_ext = {'gzip': '.gz'} - - if _BZ2_SUPPORTED: - tar_compression['bzip2'] = 'bz2' - compress_ext['bzip2'] = '.bz2' - - # flags for compression program, each element of list will be an argument - if compress is not None and compress not in compress_ext: - raise ValueError("bad value for 'compress', or compression format not " - "supported : {0}".format(compress)) - - archive_name = base_name + '.tar' + compress_ext.get(compress, '') - archive_dir = os.path.dirname(archive_name) - - if not os.path.exists(archive_dir): - if logger is not None: - logger.info("creating %s", archive_dir) - if not dry_run: - os.makedirs(archive_dir) - - # creating the tarball - if logger is not None: - logger.info('Creating tar archive') - - uid = _get_uid(owner) - gid = _get_gid(group) - - def _set_uid_gid(tarinfo): - if gid is not None: - tarinfo.gid = gid - tarinfo.gname = group - if uid is not None: - tarinfo.uid = uid - tarinfo.uname = owner - return tarinfo - - if not dry_run: - tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) - try: - tar.add(base_dir, filter=_set_uid_gid) - finally: - tar.close() - - return archive_name - -def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): - # XXX see if we want to keep an external call here - if verbose: - zipoptions = "-r" - else: - zipoptions = "-rq" - from distutils.errors import DistutilsExecError - from distutils.spawn import spawn - try: - spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) - except DistutilsExecError: - # XXX really should distinguish between "couldn't find - # external 'zip' command" and "zip failed". - raise ExecError("unable to create zip file '%s': " - "could neither import the 'zipfile' module nor " - "find a standalone zip utility") % zip_filename - -def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): - """Create a zip file from all the files under 'base_dir'. - - The output zip file will be named 'base_name' + ".zip". Uses either the - "zipfile" Python module (if available) or the InfoZIP "zip" utility - (if installed and found on the default search path). If neither tool is - available, raises ExecError. Returns the name of the output zip - file. - """ - zip_filename = base_name + ".zip" - archive_dir = os.path.dirname(base_name) - - if not os.path.exists(archive_dir): - if logger is not None: - logger.info("creating %s", archive_dir) - if not dry_run: - os.makedirs(archive_dir) - - # If zipfile module is not available, try spawning an external 'zip' - # command. - try: - import zipfile - except ImportError: - zipfile = None - - if zipfile is None: - _call_external_zip(base_dir, zip_filename, verbose, dry_run) - else: - if logger is not None: - logger.info("creating '%s' and adding '%s' to it", - zip_filename, base_dir) - - if not dry_run: - zip = zipfile.ZipFile(zip_filename, "w", - compression=zipfile.ZIP_DEFLATED) - - for dirpath, dirnames, filenames in os.walk(base_dir): - for name in filenames: - path = os.path.normpath(os.path.join(dirpath, name)) - if os.path.isfile(path): - zip.write(path, path) - if logger is not None: - logger.info("adding '%s'", path) - zip.close() - - return zip_filename - -_ARCHIVE_FORMATS = { - 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), - 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), - 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), - 'zip': (_make_zipfile, [], "ZIP file"), - } - -if _BZ2_SUPPORTED: - _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')], - "bzip2'ed tar-file") - -def get_archive_formats(): - """Returns a list of supported formats for archiving and unarchiving. - - Each element of the returned sequence is a tuple (name, description) - """ - formats = [(name, registry[2]) for name, registry in - _ARCHIVE_FORMATS.items()] - formats.sort() - return formats - -def register_archive_format(name, function, extra_args=None, description=''): - """Registers an archive format. - - name is the name of the format. function is the callable that will be - used to create archives. If provided, extra_args is a sequence of - (name, value) tuples that will be passed as arguments to the callable. - description can be provided to describe the format, and will be returned - by the get_archive_formats() function. - """ - if extra_args is None: - extra_args = [] - if not isinstance(function, Callable): - raise TypeError('The %s object is not callable' % function) - if not isinstance(extra_args, (tuple, list)): - raise TypeError('extra_args needs to be a sequence') - for element in extra_args: - if not isinstance(element, (tuple, list)) or len(element) !=2: - raise TypeError('extra_args elements are : (arg_name, value)') - - _ARCHIVE_FORMATS[name] = (function, extra_args, description) - -def unregister_archive_format(name): - del _ARCHIVE_FORMATS[name] - -def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, - dry_run=0, owner=None, group=None, logger=None): - """Create an archive file (eg. zip or tar). - - 'base_name' is the name of the file to create, minus any format-specific - extension; 'format' is the archive format: one of "zip", "tar", "bztar" - or "gztar". - - 'root_dir' is a directory that will be the root directory of the - archive; ie. we typically chdir into 'root_dir' before creating the - archive. 'base_dir' is the directory where we start archiving from; - ie. 'base_dir' will be the common prefix of all files and - directories in the archive. 'root_dir' and 'base_dir' both default - to the current directory. Returns the name of the archive file. - - 'owner' and 'group' are used when creating a tar archive. By default, - uses the current owner and group. - """ - save_cwd = os.getcwd() - if root_dir is not None: - if logger is not None: - logger.debug("changing into '%s'", root_dir) - base_name = os.path.abspath(base_name) - if not dry_run: - os.chdir(root_dir) - - if base_dir is None: - base_dir = os.curdir - - kwargs = {'dry_run': dry_run, 'logger': logger} - - try: - format_info = _ARCHIVE_FORMATS[format] - except KeyError: - raise ValueError("unknown archive format '%s'" % format) - - func = format_info[0] - for arg, val in format_info[1]: - kwargs[arg] = val - - if format != 'zip': - kwargs['owner'] = owner - kwargs['group'] = group - - try: - filename = func(base_name, base_dir, **kwargs) - finally: - if root_dir is not None: - if logger is not None: - logger.debug("changing back to '%s'", save_cwd) - os.chdir(save_cwd) - - return filename - - -def get_unpack_formats(): - """Returns a list of supported formats for unpacking. - - Each element of the returned sequence is a tuple - (name, extensions, description) - """ - formats = [(name, info[0], info[3]) for name, info in - _UNPACK_FORMATS.items()] - formats.sort() - return formats - -def _check_unpack_options(extensions, function, extra_args): - """Checks what gets registered as an unpacker.""" - # first make sure no other unpacker is registered for this extension - existing_extensions = {} - for name, info in _UNPACK_FORMATS.items(): - for ext in info[0]: - existing_extensions[ext] = name - - for extension in extensions: - if extension in existing_extensions: - msg = '%s is already registered for "%s"' - raise RegistryError(msg % (extension, - existing_extensions[extension])) - - if not isinstance(function, Callable): - raise TypeError('The registered function must be a callable') - - -def register_unpack_format(name, extensions, function, extra_args=None, - description=''): - """Registers an unpack format. - - `name` is the name of the format. `extensions` is a list of extensions - corresponding to the format. - - `function` is the callable that will be - used to unpack archives. The callable will receive archives to unpack. - If it's unable to handle an archive, it needs to raise a ReadError - exception. - - If provided, `extra_args` is a sequence of - (name, value) tuples that will be passed as arguments to the callable. - description can be provided to describe the format, and will be returned - by the get_unpack_formats() function. - """ - if extra_args is None: - extra_args = [] - _check_unpack_options(extensions, function, extra_args) - _UNPACK_FORMATS[name] = extensions, function, extra_args, description - -def unregister_unpack_format(name): - """Removes the pack format from the registry.""" - del _UNPACK_FORMATS[name] - -def _ensure_directory(path): - """Ensure that the parent directory of `path` exists""" - dirname = os.path.dirname(path) - if not os.path.isdir(dirname): - os.makedirs(dirname) - -def _unpack_zipfile(filename, extract_dir): - """Unpack zip `filename` to `extract_dir` - """ - try: - import zipfile - except ImportError: - raise ReadError('zlib not supported, cannot unpack this archive.') - - if not zipfile.is_zipfile(filename): - raise ReadError("%s is not a zip file" % filename) - - zip = zipfile.ZipFile(filename) - try: - for info in zip.infolist(): - name = info.filename - - # don't extract absolute paths or ones with .. in them - if name.startswith('/') or '..' in name: - continue - - target = os.path.join(extract_dir, *name.split('/')) - if not target: - continue - - _ensure_directory(target) - if not name.endswith('/'): - # file - data = zip.read(info.filename) - f = open(target, 'wb') - try: - f.write(data) - finally: - f.close() - del data - finally: - zip.close() - -def _unpack_tarfile(filename, extract_dir): - """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir` - """ - try: - tarobj = tarfile.open(filename) - except tarfile.TarError: - raise ReadError( - "%s is not a compressed or uncompressed tar file" % filename) - try: - tarobj.extractall(extract_dir) - finally: - tarobj.close() - -_UNPACK_FORMATS = { - 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"), - 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"), - 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file") - } - -if _BZ2_SUPPORTED: - _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [], - "bzip2'ed tar-file") - -def _find_unpack_format(filename): - for name, info in _UNPACK_FORMATS.items(): - for extension in info[0]: - if filename.endswith(extension): - return name - return None - -def unpack_archive(filename, extract_dir=None, format=None): - """Unpack an archive. - - `filename` is the name of the archive. - - `extract_dir` is the name of the target directory, where the archive - is unpacked. If not provided, the current working directory is used. - - `format` is the archive format: one of "zip", "tar", or "gztar". Or any - other registered format. If not provided, unpack_archive will use the - filename extension and see if an unpacker was registered for that - extension. - - In case none is found, a ValueError is raised. - """ - if extract_dir is None: - extract_dir = os.getcwd() - - if format is not None: - try: - format_info = _UNPACK_FORMATS[format] - except KeyError: - raise ValueError("Unknown unpack format '{0}'".format(format)) - - func = format_info[1] - func(filename, extract_dir, **dict(format_info[2])) - else: - # we need to look at the registered unpackers supported extensions - format = _find_unpack_format(filename) - if format is None: - raise ReadError("Unknown archive format '{0}'".format(filename)) - - func = _UNPACK_FORMATS[format][1] - kwargs = dict(_UNPACK_FORMATS[format][2]) - func(filename, extract_dir, **kwargs) diff --git a/pipenv/patched/notpip/_vendor/distlib/_backport/sysconfig.cfg b/pipenv/patched/notpip/_vendor/distlib/_backport/sysconfig.cfg deleted file mode 100644 index 1746bd01c1..0000000000 --- a/pipenv/patched/notpip/_vendor/distlib/_backport/sysconfig.cfg +++ /dev/null @@ -1,84 +0,0 @@ -[posix_prefix] -# Configuration directories. Some of these come straight out of the -# configure script. They are for implementing the other variables, not to -# be used directly in [resource_locations]. -confdir = /etc -datadir = /usr/share -libdir = /usr/lib -statedir = /var -# User resource directory -local = ~/.local/{distribution.name} - -stdlib = {base}/lib/python{py_version_short} -platstdlib = {platbase}/lib/python{py_version_short} -purelib = {base}/lib/python{py_version_short}/site-packages -platlib = {platbase}/lib/python{py_version_short}/site-packages -include = {base}/include/python{py_version_short}{abiflags} -platinclude = {platbase}/include/python{py_version_short}{abiflags} -data = {base} - -[posix_home] -stdlib = {base}/lib/python -platstdlib = {base}/lib/python -purelib = {base}/lib/python -platlib = {base}/lib/python -include = {base}/include/python -platinclude = {base}/include/python -scripts = {base}/bin -data = {base} - -[nt] -stdlib = {base}/Lib -platstdlib = {base}/Lib -purelib = {base}/Lib/site-packages -platlib = {base}/Lib/site-packages -include = {base}/Include -platinclude = {base}/Include -scripts = {base}/Scripts -data = {base} - -[os2] -stdlib = {base}/Lib -platstdlib = {base}/Lib -purelib = {base}/Lib/site-packages -platlib = {base}/Lib/site-packages -include = {base}/Include -platinclude = {base}/Include -scripts = {base}/Scripts -data = {base} - -[os2_home] -stdlib = {userbase}/lib/python{py_version_short} -platstdlib = {userbase}/lib/python{py_version_short} -purelib = {userbase}/lib/python{py_version_short}/site-packages -platlib = {userbase}/lib/python{py_version_short}/site-packages -include = {userbase}/include/python{py_version_short} -scripts = {userbase}/bin -data = {userbase} - -[nt_user] -stdlib = {userbase}/Python{py_version_nodot} -platstdlib = {userbase}/Python{py_version_nodot} -purelib = {userbase}/Python{py_version_nodot}/site-packages -platlib = {userbase}/Python{py_version_nodot}/site-packages -include = {userbase}/Python{py_version_nodot}/Include -scripts = {userbase}/Scripts -data = {userbase} - -[posix_user] -stdlib = {userbase}/lib/python{py_version_short} -platstdlib = {userbase}/lib/python{py_version_short} -purelib = {userbase}/lib/python{py_version_short}/site-packages -platlib = {userbase}/lib/python{py_version_short}/site-packages -include = {userbase}/include/python{py_version_short} -scripts = {userbase}/bin -data = {userbase} - -[osx_framework_user] -stdlib = {userbase}/lib/python -platstdlib = {userbase}/lib/python -purelib = {userbase}/lib/python/site-packages -platlib = {userbase}/lib/python/site-packages -include = {userbase}/include -scripts = {userbase}/bin -data = {userbase} diff --git a/pipenv/patched/notpip/_vendor/distlib/_backport/sysconfig.py b/pipenv/patched/notpip/_vendor/distlib/_backport/sysconfig.py deleted file mode 100644 index b470a373c8..0000000000 --- a/pipenv/patched/notpip/_vendor/distlib/_backport/sysconfig.py +++ /dev/null @@ -1,786 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -"""Access to Python's configuration information.""" - -import codecs -import os -import re -import sys -from os.path import pardir, realpath -try: - import configparser -except ImportError: - import ConfigParser as configparser - - -__all__ = [ - 'get_config_h_filename', - 'get_config_var', - 'get_config_vars', - 'get_makefile_filename', - 'get_path', - 'get_path_names', - 'get_paths', - 'get_platform', - 'get_python_version', - 'get_scheme_names', - 'parse_config_h', -] - - -def _safe_realpath(path): - try: - return realpath(path) - except OSError: - return path - - -if sys.executable: - _PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable)) -else: - # sys.executable can be empty if argv[0] has been changed and Python is - # unable to retrieve the real program name - _PROJECT_BASE = _safe_realpath(os.getcwd()) - -if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir)) -# PC/VS7.1 -if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) -# PC/AMD64 -if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) - - -def is_python_build(): - for fn in ("Setup.dist", "Setup.local"): - if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)): - return True - return False - -_PYTHON_BUILD = is_python_build() - -_cfg_read = False - -def _ensure_cfg_read(): - global _cfg_read - if not _cfg_read: - from ..resources import finder - backport_package = __name__.rsplit('.', 1)[0] - _finder = finder(backport_package) - _cfgfile = _finder.find('sysconfig.cfg') - assert _cfgfile, 'sysconfig.cfg exists' - with _cfgfile.as_stream() as s: - _SCHEMES.readfp(s) - if _PYTHON_BUILD: - for scheme in ('posix_prefix', 'posix_home'): - _SCHEMES.set(scheme, 'include', '{srcdir}/Include') - _SCHEMES.set(scheme, 'platinclude', '{projectbase}/.') - - _cfg_read = True - - -_SCHEMES = configparser.RawConfigParser() -_VAR_REPL = re.compile(r'\{([^{]*?)\}') - -def _expand_globals(config): - _ensure_cfg_read() - if config.has_section('globals'): - globals = config.items('globals') - else: - globals = tuple() - - sections = config.sections() - for section in sections: - if section == 'globals': - continue - for option, value in globals: - if config.has_option(section, option): - continue - config.set(section, option, value) - config.remove_section('globals') - - # now expanding local variables defined in the cfg file - # - for section in config.sections(): - variables = dict(config.items(section)) - - def _replacer(matchobj): - name = matchobj.group(1) - if name in variables: - return variables[name] - return matchobj.group(0) - - for option, value in config.items(section): - config.set(section, option, _VAR_REPL.sub(_replacer, value)) - -#_expand_globals(_SCHEMES) - -_PY_VERSION = '%s.%s.%s' % sys.version_info[:3] -_PY_VERSION_SHORT = '%s.%s' % sys.version_info[:2] -_PY_VERSION_SHORT_NO_DOT = '%s%s' % sys.version_info[:2] -_PREFIX = os.path.normpath(sys.prefix) -_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) -_CONFIG_VARS = None -_USER_BASE = None - - -def _subst_vars(path, local_vars): - """In the string `path`, replace tokens like {some.thing} with the - corresponding value from the map `local_vars`. - - If there is no corresponding value, leave the token unchanged. - """ - def _replacer(matchobj): - name = matchobj.group(1) - if name in local_vars: - return local_vars[name] - elif name in os.environ: - return os.environ[name] - return matchobj.group(0) - return _VAR_REPL.sub(_replacer, path) - - -def _extend_dict(target_dict, other_dict): - target_keys = target_dict.keys() - for key, value in other_dict.items(): - if key in target_keys: - continue - target_dict[key] = value - - -def _expand_vars(scheme, vars): - res = {} - if vars is None: - vars = {} - _extend_dict(vars, get_config_vars()) - - for key, value in _SCHEMES.items(scheme): - if os.name in ('posix', 'nt'): - value = os.path.expanduser(value) - res[key] = os.path.normpath(_subst_vars(value, vars)) - return res - - -def format_value(value, vars): - def _replacer(matchobj): - name = matchobj.group(1) - if name in vars: - return vars[name] - return matchobj.group(0) - return _VAR_REPL.sub(_replacer, value) - - -def _get_default_scheme(): - if os.name == 'posix': - # the default scheme for posix is posix_prefix - return 'posix_prefix' - return os.name - - -def _getuserbase(): - env_base = os.environ.get("PYTHONUSERBASE", None) - - def joinuser(*args): - return os.path.expanduser(os.path.join(*args)) - - # what about 'os2emx', 'riscos' ? - if os.name == "nt": - base = os.environ.get("APPDATA") or "~" - if env_base: - return env_base - else: - return joinuser(base, "Python") - - if sys.platform == "darwin": - framework = get_config_var("PYTHONFRAMEWORK") - if framework: - if env_base: - return env_base - else: - return joinuser("~", "Library", framework, "%d.%d" % - sys.version_info[:2]) - - if env_base: - return env_base - else: - return joinuser("~", ".local") - - -def _parse_makefile(filename, vars=None): - """Parse a Makefile-style file. - - A dictionary containing name/value pairs is returned. If an - optional dictionary is passed in as the second argument, it is - used instead of a new dictionary. - """ - # Regexes needed for parsing Makefile (and similar syntaxes, - # like old-style Setup files). - _variable_rx = re.compile(r"([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)") - _findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)") - _findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}") - - if vars is None: - vars = {} - done = {} - notdone = {} - - with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f: - lines = f.readlines() - - for line in lines: - if line.startswith('#') or line.strip() == '': - continue - m = _variable_rx.match(line) - if m: - n, v = m.group(1, 2) - v = v.strip() - # `$$' is a literal `$' in make - tmpv = v.replace('$$', '') - - if "$" in tmpv: - notdone[n] = v - else: - try: - v = int(v) - except ValueError: - # insert literal `$' - done[n] = v.replace('$$', '$') - else: - done[n] = v - - # do variable interpolation here - variables = list(notdone.keys()) - - # Variables with a 'PY_' prefix in the makefile. These need to - # be made available without that prefix through sysconfig. - # Special care is needed to ensure that variable expansion works, even - # if the expansion uses the name without a prefix. - renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS') - - while len(variables) > 0: - for name in tuple(variables): - value = notdone[name] - m = _findvar1_rx.search(value) or _findvar2_rx.search(value) - if m is not None: - n = m.group(1) - found = True - if n in done: - item = str(done[n]) - elif n in notdone: - # get it on a subsequent round - found = False - elif n in os.environ: - # do it like make: fall back to environment - item = os.environ[n] - - elif n in renamed_variables: - if (name.startswith('PY_') and - name[3:] in renamed_variables): - item = "" - - elif 'PY_' + n in notdone: - found = False - - else: - item = str(done['PY_' + n]) - - else: - done[n] = item = "" - - if found: - after = value[m.end():] - value = value[:m.start()] + item + after - if "$" in after: - notdone[name] = value - else: - try: - value = int(value) - except ValueError: - done[name] = value.strip() - else: - done[name] = value - variables.remove(name) - - if (name.startswith('PY_') and - name[3:] in renamed_variables): - - name = name[3:] - if name not in done: - done[name] = value - - else: - # bogus variable reference (e.g. "prefix=$/opt/python"); - # just drop it since we can't deal - done[name] = value - variables.remove(name) - - # strip spurious spaces - for k, v in done.items(): - if isinstance(v, str): - done[k] = v.strip() - - # save the results in the global dictionary - vars.update(done) - return vars - - -def get_makefile_filename(): - """Return the path of the Makefile.""" - if _PYTHON_BUILD: - return os.path.join(_PROJECT_BASE, "Makefile") - if hasattr(sys, 'abiflags'): - config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags) - else: - config_dir_name = 'config' - return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile') - - -def _init_posix(vars): - """Initialize the module as appropriate for POSIX systems.""" - # load the installed Makefile: - makefile = get_makefile_filename() - try: - _parse_makefile(makefile, vars) - except IOError as e: - msg = "invalid Python installation: unable to open %s" % makefile - if hasattr(e, "strerror"): - msg = msg + " (%s)" % e.strerror - raise IOError(msg) - # load the installed pyconfig.h: - config_h = get_config_h_filename() - try: - with open(config_h) as f: - parse_config_h(f, vars) - except IOError as e: - msg = "invalid Python installation: unable to open %s" % config_h - if hasattr(e, "strerror"): - msg = msg + " (%s)" % e.strerror - raise IOError(msg) - # On AIX, there are wrong paths to the linker scripts in the Makefile - # -- these paths are relative to the Python source, but when installed - # the scripts are in another directory. - if _PYTHON_BUILD: - vars['LDSHARED'] = vars['BLDSHARED'] - - -def _init_non_posix(vars): - """Initialize the module as appropriate for NT""" - # set basic install directories - vars['LIBDEST'] = get_path('stdlib') - vars['BINLIBDEST'] = get_path('platstdlib') - vars['INCLUDEPY'] = get_path('include') - vars['SO'] = '.pyd' - vars['EXE'] = '.exe' - vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT - vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable)) - -# -# public APIs -# - - -def parse_config_h(fp, vars=None): - """Parse a config.h-style file. - - A dictionary containing name/value pairs is returned. If an - optional dictionary is passed in as the second argument, it is - used instead of a new dictionary. - """ - if vars is None: - vars = {} - define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n") - undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n") - - while True: - line = fp.readline() - if not line: - break - m = define_rx.match(line) - if m: - n, v = m.group(1, 2) - try: - v = int(v) - except ValueError: - pass - vars[n] = v - else: - m = undef_rx.match(line) - if m: - vars[m.group(1)] = 0 - return vars - - -def get_config_h_filename(): - """Return the path of pyconfig.h.""" - if _PYTHON_BUILD: - if os.name == "nt": - inc_dir = os.path.join(_PROJECT_BASE, "PC") - else: - inc_dir = _PROJECT_BASE - else: - inc_dir = get_path('platinclude') - return os.path.join(inc_dir, 'pyconfig.h') - - -def get_scheme_names(): - """Return a tuple containing the schemes names.""" - return tuple(sorted(_SCHEMES.sections())) - - -def get_path_names(): - """Return a tuple containing the paths names.""" - # xxx see if we want a static list - return _SCHEMES.options('posix_prefix') - - -def get_paths(scheme=_get_default_scheme(), vars=None, expand=True): - """Return a mapping containing an install scheme. - - ``scheme`` is the install scheme name. If not provided, it will - return the default scheme for the current platform. - """ - _ensure_cfg_read() - if expand: - return _expand_vars(scheme, vars) - else: - return dict(_SCHEMES.items(scheme)) - - -def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True): - """Return a path corresponding to the scheme. - - ``scheme`` is the install scheme name. - """ - return get_paths(scheme, vars, expand)[name] - - -def get_config_vars(*args): - """With no arguments, return a dictionary of all configuration - variables relevant for the current platform. - - On Unix, this means every variable defined in Python's installed Makefile; - On Windows and Mac OS it's a much smaller set. - - With arguments, return a list of values that result from looking up - each argument in the configuration variable dictionary. - """ - global _CONFIG_VARS - if _CONFIG_VARS is None: - _CONFIG_VARS = {} - # Normalized versions of prefix and exec_prefix are handy to have; - # in fact, these are the standard versions used most places in the - # distutils2 module. - _CONFIG_VARS['prefix'] = _PREFIX - _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX - _CONFIG_VARS['py_version'] = _PY_VERSION - _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT - _CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2] - _CONFIG_VARS['base'] = _PREFIX - _CONFIG_VARS['platbase'] = _EXEC_PREFIX - _CONFIG_VARS['projectbase'] = _PROJECT_BASE - try: - _CONFIG_VARS['abiflags'] = sys.abiflags - except AttributeError: - # sys.abiflags may not be defined on all platforms. - _CONFIG_VARS['abiflags'] = '' - - if os.name in ('nt', 'os2'): - _init_non_posix(_CONFIG_VARS) - if os.name == 'posix': - _init_posix(_CONFIG_VARS) - # Setting 'userbase' is done below the call to the - # init function to enable using 'get_config_var' in - # the init-function. - if sys.version >= '2.6': - _CONFIG_VARS['userbase'] = _getuserbase() - - if 'srcdir' not in _CONFIG_VARS: - _CONFIG_VARS['srcdir'] = _PROJECT_BASE - else: - _CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir']) - - # Convert srcdir into an absolute path if it appears necessary. - # Normally it is relative to the build directory. However, during - # testing, for example, we might be running a non-installed python - # from a different directory. - if _PYTHON_BUILD and os.name == "posix": - base = _PROJECT_BASE - try: - cwd = os.getcwd() - except OSError: - cwd = None - if (not os.path.isabs(_CONFIG_VARS['srcdir']) and - base != cwd): - # srcdir is relative and we are not in the same directory - # as the executable. Assume executable is in the build - # directory and make srcdir absolute. - srcdir = os.path.join(base, _CONFIG_VARS['srcdir']) - _CONFIG_VARS['srcdir'] = os.path.normpath(srcdir) - - if sys.platform == 'darwin': - kernel_version = os.uname()[2] # Kernel version (8.4.3) - major_version = int(kernel_version.split('.')[0]) - - if major_version < 8: - # On Mac OS X before 10.4, check if -arch and -isysroot - # are in CFLAGS or LDFLAGS and remove them if they are. - # This is needed when building extensions on a 10.3 system - # using a universal build of python. - for key in ('LDFLAGS', 'BASECFLAGS', - # a number of derived variables. These need to be - # patched up as well. - 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): - flags = _CONFIG_VARS[key] - flags = re.sub(r'-arch\s+\w+\s', ' ', flags) - flags = re.sub('-isysroot [^ \t]*', ' ', flags) - _CONFIG_VARS[key] = flags - else: - # Allow the user to override the architecture flags using - # an environment variable. - # NOTE: This name was introduced by Apple in OSX 10.5 and - # is used by several scripting languages distributed with - # that OS release. - if 'ARCHFLAGS' in os.environ: - arch = os.environ['ARCHFLAGS'] - for key in ('LDFLAGS', 'BASECFLAGS', - # a number of derived variables. These need to be - # patched up as well. - 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): - - flags = _CONFIG_VARS[key] - flags = re.sub(r'-arch\s+\w+\s', ' ', flags) - flags = flags + ' ' + arch - _CONFIG_VARS[key] = flags - - # If we're on OSX 10.5 or later and the user tries to - # compiles an extension using an SDK that is not present - # on the current machine it is better to not use an SDK - # than to fail. - # - # The major usecase for this is users using a Python.org - # binary installer on OSX 10.6: that installer uses - # the 10.4u SDK, but that SDK is not installed by default - # when you install Xcode. - # - CFLAGS = _CONFIG_VARS.get('CFLAGS', '') - m = re.search(r'-isysroot\s+(\S+)', CFLAGS) - if m is not None: - sdk = m.group(1) - if not os.path.exists(sdk): - for key in ('LDFLAGS', 'BASECFLAGS', - # a number of derived variables. These need to be - # patched up as well. - 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): - - flags = _CONFIG_VARS[key] - flags = re.sub(r'-isysroot\s+\S+(\s|$)', ' ', flags) - _CONFIG_VARS[key] = flags - - if args: - vals = [] - for name in args: - vals.append(_CONFIG_VARS.get(name)) - return vals - else: - return _CONFIG_VARS - - -def get_config_var(name): - """Return the value of a single variable using the dictionary returned by - 'get_config_vars()'. - - Equivalent to get_config_vars().get(name) - """ - return get_config_vars().get(name) - - -def get_platform(): - """Return a string that identifies the current platform. - - This is used mainly to distinguish platform-specific build directories and - platform-specific built distributions. Typically includes the OS name - and version and the architecture (as supplied by 'os.uname()'), - although the exact information included depends on the OS; eg. for IRIX - the architecture isn't particularly important (IRIX only runs on SGI - hardware), but for Linux the kernel version isn't particularly - important. - - Examples of returned values: - linux-i586 - linux-alpha (?) - solaris-2.6-sun4u - irix-5.3 - irix64-6.2 - - Windows will return one of: - win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) - win-ia64 (64bit Windows on Itanium) - win32 (all others - specifically, sys.platform is returned) - - For other non-POSIX platforms, currently just returns 'sys.platform'. - """ - if os.name == 'nt': - # sniff sys.version for architecture. - prefix = " bit (" - i = sys.version.find(prefix) - if i == -1: - return sys.platform - j = sys.version.find(")", i) - look = sys.version[i+len(prefix):j].lower() - if look == 'amd64': - return 'win-amd64' - if look == 'itanium': - return 'win-ia64' - return sys.platform - - if os.name != "posix" or not hasattr(os, 'uname'): - # XXX what about the architecture? NT is Intel or Alpha, - # Mac OS is M68k or PPC, etc. - return sys.platform - - # Try to distinguish various flavours of Unix - osname, host, release, version, machine = os.uname() - - # Convert the OS name to lowercase, remove '/' characters - # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") - osname = osname.lower().replace('/', '') - machine = machine.replace(' ', '_') - machine = machine.replace('/', '-') - - if osname[:5] == "linux": - # At least on Linux/Intel, 'machine' is the processor -- - # i386, etc. - # XXX what about Alpha, SPARC, etc? - return "%s-%s" % (osname, machine) - elif osname[:5] == "sunos": - if release[0] >= "5": # SunOS 5 == Solaris 2 - osname = "solaris" - release = "%d.%s" % (int(release[0]) - 3, release[2:]) - # fall through to standard osname-release-machine representation - elif osname[:4] == "irix": # could be "irix64"! - return "%s-%s" % (osname, release) - elif osname[:3] == "aix": - return "%s-%s.%s" % (osname, version, release) - elif osname[:6] == "cygwin": - osname = "cygwin" - rel_re = re.compile(r'[\d.]+') - m = rel_re.match(release) - if m: - release = m.group() - elif osname[:6] == "darwin": - # - # For our purposes, we'll assume that the system version from - # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set - # to. This makes the compatibility story a bit more sane because the - # machine is going to compile and link as if it were - # MACOSX_DEPLOYMENT_TARGET. - cfgvars = get_config_vars() - macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET') - - if True: - # Always calculate the release of the running machine, - # needed to determine if we can build fat binaries or not. - - macrelease = macver - # Get the system version. Reading this plist is a documented - # way to get the system version (see the documentation for - # the Gestalt Manager) - try: - f = open('/System/Library/CoreServices/SystemVersion.plist') - except IOError: - # We're on a plain darwin box, fall back to the default - # behaviour. - pass - else: - try: - m = re.search(r'ProductUserVisibleVersion\s*' - r'(.*?)', f.read()) - finally: - f.close() - if m is not None: - macrelease = '.'.join(m.group(1).split('.')[:2]) - # else: fall back to the default behaviour - - if not macver: - macver = macrelease - - if macver: - release = macver - osname = "macosx" - - if ((macrelease + '.') >= '10.4.' and - '-arch' in get_config_vars().get('CFLAGS', '').strip()): - # The universal build will build fat binaries, but not on - # systems before 10.4 - # - # Try to detect 4-way universal builds, those have machine-type - # 'universal' instead of 'fat'. - - machine = 'fat' - cflags = get_config_vars().get('CFLAGS') - - archs = re.findall(r'-arch\s+(\S+)', cflags) - archs = tuple(sorted(set(archs))) - - if len(archs) == 1: - machine = archs[0] - elif archs == ('i386', 'ppc'): - machine = 'fat' - elif archs == ('i386', 'x86_64'): - machine = 'intel' - elif archs == ('i386', 'ppc', 'x86_64'): - machine = 'fat3' - elif archs == ('ppc64', 'x86_64'): - machine = 'fat64' - elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'): - machine = 'universal' - else: - raise ValueError( - "Don't know machine value for archs=%r" % (archs,)) - - elif machine == 'i386': - # On OSX the machine type returned by uname is always the - # 32-bit variant, even if the executable architecture is - # the 64-bit variant - if sys.maxsize >= 2**32: - machine = 'x86_64' - - elif machine in ('PowerPC', 'Power_Macintosh'): - # Pick a sane name for the PPC architecture. - # See 'i386' case - if sys.maxsize >= 2**32: - machine = 'ppc64' - else: - machine = 'ppc' - - return "%s-%s-%s" % (osname, release, machine) - - -def get_python_version(): - return _PY_VERSION_SHORT - - -def _print_dict(title, data): - for index, (key, value) in enumerate(sorted(data.items())): - if index == 0: - print('%s: ' % (title)) - print('\t%s = "%s"' % (key, value)) - - -def _main(): - """Display all information sysconfig detains.""" - print('Platform: "%s"' % get_platform()) - print('Python version: "%s"' % get_python_version()) - print('Current installation scheme: "%s"' % _get_default_scheme()) - print() - _print_dict('Paths', get_paths()) - print() - _print_dict('Variables', get_config_vars()) - - -if __name__ == '__main__': - _main() diff --git a/pipenv/patched/notpip/_vendor/distlib/_backport/tarfile.py b/pipenv/patched/notpip/_vendor/distlib/_backport/tarfile.py deleted file mode 100644 index d66d856637..0000000000 --- a/pipenv/patched/notpip/_vendor/distlib/_backport/tarfile.py +++ /dev/null @@ -1,2607 +0,0 @@ -#------------------------------------------------------------------- -# tarfile.py -#------------------------------------------------------------------- -# Copyright (C) 2002 Lars Gustaebel -# All rights reserved. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following -# conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -from __future__ import print_function - -"""Read from and write to tar format archives. -""" - -__version__ = "$Revision$" - -version = "0.9.0" -__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" -__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $" -__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $" -__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." - -#--------- -# Imports -#--------- -import sys -import os -import stat -import errno -import time -import struct -import copy -import re - -try: - import grp, pwd -except ImportError: - grp = pwd = None - -# os.symlink on Windows prior to 6.0 raises NotImplementedError -symlink_exception = (AttributeError, NotImplementedError) -try: - # WindowsError (1314) will be raised if the caller does not hold the - # SeCreateSymbolicLinkPrivilege privilege - symlink_exception += (WindowsError,) -except NameError: - pass - -# from tarfile import * -__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] - -if sys.version_info[0] < 3: - import __builtin__ as builtins -else: - import builtins - -_open = builtins.open # Since 'open' is TarFile.open - -#--------------------------------------------------------- -# tar constants -#--------------------------------------------------------- -NUL = b"\0" # the null character -BLOCKSIZE = 512 # length of processing blocks -RECORDSIZE = BLOCKSIZE * 20 # length of records -GNU_MAGIC = b"ustar \0" # magic gnu tar string -POSIX_MAGIC = b"ustar\x0000" # magic posix tar string - -LENGTH_NAME = 100 # maximum length of a filename -LENGTH_LINK = 100 # maximum length of a linkname -LENGTH_PREFIX = 155 # maximum length of the prefix field - -REGTYPE = b"0" # regular file -AREGTYPE = b"\0" # regular file -LNKTYPE = b"1" # link (inside tarfile) -SYMTYPE = b"2" # symbolic link -CHRTYPE = b"3" # character special device -BLKTYPE = b"4" # block special device -DIRTYPE = b"5" # directory -FIFOTYPE = b"6" # fifo special device -CONTTYPE = b"7" # contiguous file - -GNUTYPE_LONGNAME = b"L" # GNU tar longname -GNUTYPE_LONGLINK = b"K" # GNU tar longlink -GNUTYPE_SPARSE = b"S" # GNU tar sparse file - -XHDTYPE = b"x" # POSIX.1-2001 extended header -XGLTYPE = b"g" # POSIX.1-2001 global header -SOLARIS_XHDTYPE = b"X" # Solaris extended header - -USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format -GNU_FORMAT = 1 # GNU tar format -PAX_FORMAT = 2 # POSIX.1-2001 (pax) format -DEFAULT_FORMAT = GNU_FORMAT - -#--------------------------------------------------------- -# tarfile constants -#--------------------------------------------------------- -# File types that tarfile supports: -SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, - SYMTYPE, DIRTYPE, FIFOTYPE, - CONTTYPE, CHRTYPE, BLKTYPE, - GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, - GNUTYPE_SPARSE) - -# File types that will be treated as a regular file. -REGULAR_TYPES = (REGTYPE, AREGTYPE, - CONTTYPE, GNUTYPE_SPARSE) - -# File types that are part of the GNU tar format. -GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, - GNUTYPE_SPARSE) - -# Fields from a pax header that override a TarInfo attribute. -PAX_FIELDS = ("path", "linkpath", "size", "mtime", - "uid", "gid", "uname", "gname") - -# Fields from a pax header that are affected by hdrcharset. -PAX_NAME_FIELDS = set(("path", "linkpath", "uname", "gname")) - -# Fields in a pax header that are numbers, all other fields -# are treated as strings. -PAX_NUMBER_FIELDS = { - "atime": float, - "ctime": float, - "mtime": float, - "uid": int, - "gid": int, - "size": int -} - -#--------------------------------------------------------- -# Bits used in the mode field, values in octal. -#--------------------------------------------------------- -S_IFLNK = 0o120000 # symbolic link -S_IFREG = 0o100000 # regular file -S_IFBLK = 0o060000 # block device -S_IFDIR = 0o040000 # directory -S_IFCHR = 0o020000 # character device -S_IFIFO = 0o010000 # fifo - -TSUID = 0o4000 # set UID on execution -TSGID = 0o2000 # set GID on execution -TSVTX = 0o1000 # reserved - -TUREAD = 0o400 # read by owner -TUWRITE = 0o200 # write by owner -TUEXEC = 0o100 # execute/search by owner -TGREAD = 0o040 # read by group -TGWRITE = 0o020 # write by group -TGEXEC = 0o010 # execute/search by group -TOREAD = 0o004 # read by other -TOWRITE = 0o002 # write by other -TOEXEC = 0o001 # execute/search by other - -#--------------------------------------------------------- -# initialization -#--------------------------------------------------------- -if os.name in ("nt", "ce"): - ENCODING = "utf-8" -else: - ENCODING = sys.getfilesystemencoding() - -#--------------------------------------------------------- -# Some useful functions -#--------------------------------------------------------- - -def stn(s, length, encoding, errors): - """Convert a string to a null-terminated bytes object. - """ - s = s.encode(encoding, errors) - return s[:length] + (length - len(s)) * NUL - -def nts(s, encoding, errors): - """Convert a null-terminated bytes object to a string. - """ - p = s.find(b"\0") - if p != -1: - s = s[:p] - return s.decode(encoding, errors) - -def nti(s): - """Convert a number field to a python number. - """ - # There are two possible encodings for a number field, see - # itn() below. - if s[0] != chr(0o200): - try: - n = int(nts(s, "ascii", "strict") or "0", 8) - except ValueError: - raise InvalidHeaderError("invalid header") - else: - n = 0 - for i in range(len(s) - 1): - n <<= 8 - n += ord(s[i + 1]) - return n - -def itn(n, digits=8, format=DEFAULT_FORMAT): - """Convert a python number to a number field. - """ - # POSIX 1003.1-1988 requires numbers to be encoded as a string of - # octal digits followed by a null-byte, this allows values up to - # (8**(digits-1))-1. GNU tar allows storing numbers greater than - # that if necessary. A leading 0o200 byte indicates this particular - # encoding, the following digits-1 bytes are a big-endian - # representation. This allows values up to (256**(digits-1))-1. - if 0 <= n < 8 ** (digits - 1): - s = ("%0*o" % (digits - 1, n)).encode("ascii") + NUL - else: - if format != GNU_FORMAT or n >= 256 ** (digits - 1): - raise ValueError("overflow in number field") - - if n < 0: - # XXX We mimic GNU tar's behaviour with negative numbers, - # this could raise OverflowError. - n = struct.unpack("L", struct.pack("l", n))[0] - - s = bytearray() - for i in range(digits - 1): - s.insert(0, n & 0o377) - n >>= 8 - s.insert(0, 0o200) - return s - -def calc_chksums(buf): - """Calculate the checksum for a member's header by summing up all - characters except for the chksum field which is treated as if - it was filled with spaces. According to the GNU tar sources, - some tars (Sun and NeXT) calculate chksum with signed char, - which will be different if there are chars in the buffer with - the high bit set. So we calculate two checksums, unsigned and - signed. - """ - unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) - signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) - return unsigned_chksum, signed_chksum - -def copyfileobj(src, dst, length=None): - """Copy length bytes from fileobj src to fileobj dst. - If length is None, copy the entire content. - """ - if length == 0: - return - if length is None: - while True: - buf = src.read(16*1024) - if not buf: - break - dst.write(buf) - return - - BUFSIZE = 16 * 1024 - blocks, remainder = divmod(length, BUFSIZE) - for b in range(blocks): - buf = src.read(BUFSIZE) - if len(buf) < BUFSIZE: - raise IOError("end of file reached") - dst.write(buf) - - if remainder != 0: - buf = src.read(remainder) - if len(buf) < remainder: - raise IOError("end of file reached") - dst.write(buf) - return - -filemode_table = ( - ((S_IFLNK, "l"), - (S_IFREG, "-"), - (S_IFBLK, "b"), - (S_IFDIR, "d"), - (S_IFCHR, "c"), - (S_IFIFO, "p")), - - ((TUREAD, "r"),), - ((TUWRITE, "w"),), - ((TUEXEC|TSUID, "s"), - (TSUID, "S"), - (TUEXEC, "x")), - - ((TGREAD, "r"),), - ((TGWRITE, "w"),), - ((TGEXEC|TSGID, "s"), - (TSGID, "S"), - (TGEXEC, "x")), - - ((TOREAD, "r"),), - ((TOWRITE, "w"),), - ((TOEXEC|TSVTX, "t"), - (TSVTX, "T"), - (TOEXEC, "x")) -) - -def filemode(mode): - """Convert a file's mode to a string of the form - -rwxrwxrwx. - Used by TarFile.list() - """ - perm = [] - for table in filemode_table: - for bit, char in table: - if mode & bit == bit: - perm.append(char) - break - else: - perm.append("-") - return "".join(perm) - -class TarError(Exception): - """Base exception.""" - pass -class ExtractError(TarError): - """General exception for extract errors.""" - pass -class ReadError(TarError): - """Exception for unreadable tar archives.""" - pass -class CompressionError(TarError): - """Exception for unavailable compression methods.""" - pass -class StreamError(TarError): - """Exception for unsupported operations on stream-like TarFiles.""" - pass -class HeaderError(TarError): - """Base exception for header errors.""" - pass -class EmptyHeaderError(HeaderError): - """Exception for empty headers.""" - pass -class TruncatedHeaderError(HeaderError): - """Exception for truncated headers.""" - pass -class EOFHeaderError(HeaderError): - """Exception for end of file headers.""" - pass -class InvalidHeaderError(HeaderError): - """Exception for invalid headers.""" - pass -class SubsequentHeaderError(HeaderError): - """Exception for missing and invalid extended headers.""" - pass - -#--------------------------- -# internal stream interface -#--------------------------- -class _LowLevelFile(object): - """Low-level file object. Supports reading and writing. - It is used instead of a regular file object for streaming - access. - """ - - def __init__(self, name, mode): - mode = { - "r": os.O_RDONLY, - "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, - }[mode] - if hasattr(os, "O_BINARY"): - mode |= os.O_BINARY - self.fd = os.open(name, mode, 0o666) - - def close(self): - os.close(self.fd) - - def read(self, size): - return os.read(self.fd, size) - - def write(self, s): - os.write(self.fd, s) - -class _Stream(object): - """Class that serves as an adapter between TarFile and - a stream-like object. The stream-like object only - needs to have a read() or write() method and is accessed - blockwise. Use of gzip or bzip2 compression is possible. - A stream-like object could be for example: sys.stdin, - sys.stdout, a socket, a tape device etc. - - _Stream is intended to be used only internally. - """ - - def __init__(self, name, mode, comptype, fileobj, bufsize): - """Construct a _Stream object. - """ - self._extfileobj = True - if fileobj is None: - fileobj = _LowLevelFile(name, mode) - self._extfileobj = False - - if comptype == '*': - # Enable transparent compression detection for the - # stream interface - fileobj = _StreamProxy(fileobj) - comptype = fileobj.getcomptype() - - self.name = name or "" - self.mode = mode - self.comptype = comptype - self.fileobj = fileobj - self.bufsize = bufsize - self.buf = b"" - self.pos = 0 - self.closed = False - - try: - if comptype == "gz": - try: - import zlib - except ImportError: - raise CompressionError("zlib module is not available") - self.zlib = zlib - self.crc = zlib.crc32(b"") - if mode == "r": - self._init_read_gz() - else: - self._init_write_gz() - - if comptype == "bz2": - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - if mode == "r": - self.dbuf = b"" - self.cmp = bz2.BZ2Decompressor() - else: - self.cmp = bz2.BZ2Compressor() - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - def __del__(self): - if hasattr(self, "closed") and not self.closed: - self.close() - - def _init_write_gz(self): - """Initialize for writing with gzip compression. - """ - self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, - -self.zlib.MAX_WBITS, - self.zlib.DEF_MEM_LEVEL, - 0) - timestamp = struct.pack(" self.bufsize: - self.fileobj.write(self.buf[:self.bufsize]) - self.buf = self.buf[self.bufsize:] - - def close(self): - """Close the _Stream object. No operation should be - done on it afterwards. - """ - if self.closed: - return - - if self.mode == "w" and self.comptype != "tar": - self.buf += self.cmp.flush() - - if self.mode == "w" and self.buf: - self.fileobj.write(self.buf) - self.buf = b"" - if self.comptype == "gz": - # The native zlib crc is an unsigned 32-bit integer, but - # the Python wrapper implicitly casts that to a signed C - # long. So, on a 32-bit box self.crc may "look negative", - # while the same crc on a 64-bit box may "look positive". - # To avoid irksome warnings from the `struct` module, force - # it to look positive on all boxes. - self.fileobj.write(struct.pack("= 0: - blocks, remainder = divmod(pos - self.pos, self.bufsize) - for i in range(blocks): - self.read(self.bufsize) - self.read(remainder) - else: - raise StreamError("seeking backwards is not allowed") - return self.pos - - def read(self, size=None): - """Return the next size number of bytes from the stream. - If size is not defined, return all bytes of the stream - up to EOF. - """ - if size is None: - t = [] - while True: - buf = self._read(self.bufsize) - if not buf: - break - t.append(buf) - buf = "".join(t) - else: - buf = self._read(size) - self.pos += len(buf) - return buf - - def _read(self, size): - """Return size bytes from the stream. - """ - if self.comptype == "tar": - return self.__read(size) - - c = len(self.dbuf) - while c < size: - buf = self.__read(self.bufsize) - if not buf: - break - try: - buf = self.cmp.decompress(buf) - except IOError: - raise ReadError("invalid compressed data") - self.dbuf += buf - c += len(buf) - buf = self.dbuf[:size] - self.dbuf = self.dbuf[size:] - return buf - - def __read(self, size): - """Return size bytes from stream. If internal buffer is empty, - read another block from the stream. - """ - c = len(self.buf) - while c < size: - buf = self.fileobj.read(self.bufsize) - if not buf: - break - self.buf += buf - c += len(buf) - buf = self.buf[:size] - self.buf = self.buf[size:] - return buf -# class _Stream - -class _StreamProxy(object): - """Small proxy class that enables transparent compression - detection for the Stream interface (mode 'r|*'). - """ - - def __init__(self, fileobj): - self.fileobj = fileobj - self.buf = self.fileobj.read(BLOCKSIZE) - - def read(self, size): - self.read = self.fileobj.read - return self.buf - - def getcomptype(self): - if self.buf.startswith(b"\037\213\010"): - return "gz" - if self.buf.startswith(b"BZh91"): - return "bz2" - return "tar" - - def close(self): - self.fileobj.close() -# class StreamProxy - -class _BZ2Proxy(object): - """Small proxy class that enables external file object - support for "r:bz2" and "w:bz2" modes. This is actually - a workaround for a limitation in bz2 module's BZ2File - class which (unlike gzip.GzipFile) has no support for - a file object argument. - """ - - blocksize = 16 * 1024 - - def __init__(self, fileobj, mode): - self.fileobj = fileobj - self.mode = mode - self.name = getattr(self.fileobj, "name", None) - self.init() - - def init(self): - import bz2 - self.pos = 0 - if self.mode == "r": - self.bz2obj = bz2.BZ2Decompressor() - self.fileobj.seek(0) - self.buf = b"" - else: - self.bz2obj = bz2.BZ2Compressor() - - def read(self, size): - x = len(self.buf) - while x < size: - raw = self.fileobj.read(self.blocksize) - if not raw: - break - data = self.bz2obj.decompress(raw) - self.buf += data - x += len(data) - - buf = self.buf[:size] - self.buf = self.buf[size:] - self.pos += len(buf) - return buf - - def seek(self, pos): - if pos < self.pos: - self.init() - self.read(pos - self.pos) - - def tell(self): - return self.pos - - def write(self, data): - self.pos += len(data) - raw = self.bz2obj.compress(data) - self.fileobj.write(raw) - - def close(self): - if self.mode == "w": - raw = self.bz2obj.flush() - self.fileobj.write(raw) -# class _BZ2Proxy - -#------------------------ -# Extraction file object -#------------------------ -class _FileInFile(object): - """A thin wrapper around an existing file object that - provides a part of its data as an individual file - object. - """ - - def __init__(self, fileobj, offset, size, blockinfo=None): - self.fileobj = fileobj - self.offset = offset - self.size = size - self.position = 0 - - if blockinfo is None: - blockinfo = [(0, size)] - - # Construct a map with data and zero blocks. - self.map_index = 0 - self.map = [] - lastpos = 0 - realpos = self.offset - for offset, size in blockinfo: - if offset > lastpos: - self.map.append((False, lastpos, offset, None)) - self.map.append((True, offset, offset + size, realpos)) - realpos += size - lastpos = offset + size - if lastpos < self.size: - self.map.append((False, lastpos, self.size, None)) - - def seekable(self): - if not hasattr(self.fileobj, "seekable"): - # XXX gzip.GzipFile and bz2.BZ2File - return True - return self.fileobj.seekable() - - def tell(self): - """Return the current file position. - """ - return self.position - - def seek(self, position): - """Seek to a position in the file. - """ - self.position = position - - def read(self, size=None): - """Read data from the file. - """ - if size is None: - size = self.size - self.position - else: - size = min(size, self.size - self.position) - - buf = b"" - while size > 0: - while True: - data, start, stop, offset = self.map[self.map_index] - if start <= self.position < stop: - break - else: - self.map_index += 1 - if self.map_index == len(self.map): - self.map_index = 0 - length = min(size, stop - self.position) - if data: - self.fileobj.seek(offset + (self.position - start)) - buf += self.fileobj.read(length) - else: - buf += NUL * length - size -= length - self.position += length - return buf -#class _FileInFile - - -class ExFileObject(object): - """File-like object for reading an archive member. - Is returned by TarFile.extractfile(). - """ - blocksize = 1024 - - def __init__(self, tarfile, tarinfo): - self.fileobj = _FileInFile(tarfile.fileobj, - tarinfo.offset_data, - tarinfo.size, - tarinfo.sparse) - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.size = tarinfo.size - - self.position = 0 - self.buffer = b"" - - def readable(self): - return True - - def writable(self): - return False - - def seekable(self): - return self.fileobj.seekable() - - def read(self, size=None): - """Read at most size bytes from the file. If size is not - present or None, read all data until EOF is reached. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - buf = b"" - if self.buffer: - if size is None: - buf = self.buffer - self.buffer = b"" - else: - buf = self.buffer[:size] - self.buffer = self.buffer[size:] - - if size is None: - buf += self.fileobj.read() - else: - buf += self.fileobj.read(size - len(buf)) - - self.position += len(buf) - return buf - - # XXX TextIOWrapper uses the read1() method. - read1 = read - - def readline(self, size=-1): - """Read one entire line from the file. If size is present - and non-negative, return a string with at most that - size, which may be an incomplete line. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - pos = self.buffer.find(b"\n") + 1 - if pos == 0: - # no newline found. - while True: - buf = self.fileobj.read(self.blocksize) - self.buffer += buf - if not buf or b"\n" in buf: - pos = self.buffer.find(b"\n") + 1 - if pos == 0: - # no newline found. - pos = len(self.buffer) - break - - if size != -1: - pos = min(size, pos) - - buf = self.buffer[:pos] - self.buffer = self.buffer[pos:] - self.position += len(buf) - return buf - - def readlines(self): - """Return a list with all remaining lines. - """ - result = [] - while True: - line = self.readline() - if not line: break - result.append(line) - return result - - def tell(self): - """Return the current file position. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - return self.position - - def seek(self, pos, whence=os.SEEK_SET): - """Seek to a position in the file. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if whence == os.SEEK_SET: - self.position = min(max(pos, 0), self.size) - elif whence == os.SEEK_CUR: - if pos < 0: - self.position = max(self.position + pos, 0) - else: - self.position = min(self.position + pos, self.size) - elif whence == os.SEEK_END: - self.position = max(min(self.size + pos, self.size), 0) - else: - raise ValueError("Invalid argument") - - self.buffer = b"" - self.fileobj.seek(self.position) - - def close(self): - """Close the file object. - """ - self.closed = True - - def __iter__(self): - """Get an iterator over the file's lines. - """ - while True: - line = self.readline() - if not line: - break - yield line -#class ExFileObject - -#------------------ -# Exported Classes -#------------------ -class TarInfo(object): - """Informational class which holds the details about an - archive member given by a tar header block. - TarInfo objects are returned by TarFile.getmember(), - TarFile.getmembers() and TarFile.gettarinfo() and are - usually created internally. - """ - - __slots__ = ("name", "mode", "uid", "gid", "size", "mtime", - "chksum", "type", "linkname", "uname", "gname", - "devmajor", "devminor", - "offset", "offset_data", "pax_headers", "sparse", - "tarfile", "_sparse_structs", "_link_target") - - def __init__(self, name=""): - """Construct a TarInfo object. name is the optional name - of the member. - """ - self.name = name # member name - self.mode = 0o644 # file permissions - self.uid = 0 # user id - self.gid = 0 # group id - self.size = 0 # file size - self.mtime = 0 # modification time - self.chksum = 0 # header checksum - self.type = REGTYPE # member type - self.linkname = "" # link name - self.uname = "" # user name - self.gname = "" # group name - self.devmajor = 0 # device major number - self.devminor = 0 # device minor number - - self.offset = 0 # the tar header starts here - self.offset_data = 0 # the file's data starts here - - self.sparse = None # sparse member information - self.pax_headers = {} # pax header information - - # In pax headers the "name" and "linkname" field are called - # "path" and "linkpath". - def _getpath(self): - return self.name - def _setpath(self, name): - self.name = name - path = property(_getpath, _setpath) - - def _getlinkpath(self): - return self.linkname - def _setlinkpath(self, linkname): - self.linkname = linkname - linkpath = property(_getlinkpath, _setlinkpath) - - def __repr__(self): - return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) - - def get_info(self): - """Return the TarInfo's attributes as a dictionary. - """ - info = { - "name": self.name, - "mode": self.mode & 0o7777, - "uid": self.uid, - "gid": self.gid, - "size": self.size, - "mtime": self.mtime, - "chksum": self.chksum, - "type": self.type, - "linkname": self.linkname, - "uname": self.uname, - "gname": self.gname, - "devmajor": self.devmajor, - "devminor": self.devminor - } - - if info["type"] == DIRTYPE and not info["name"].endswith("/"): - info["name"] += "/" - - return info - - def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): - """Return a tar header as a string of 512 byte blocks. - """ - info = self.get_info() - - if format == USTAR_FORMAT: - return self.create_ustar_header(info, encoding, errors) - elif format == GNU_FORMAT: - return self.create_gnu_header(info, encoding, errors) - elif format == PAX_FORMAT: - return self.create_pax_header(info, encoding) - else: - raise ValueError("invalid format") - - def create_ustar_header(self, info, encoding, errors): - """Return the object as a ustar header block. - """ - info["magic"] = POSIX_MAGIC - - if len(info["linkname"]) > LENGTH_LINK: - raise ValueError("linkname is too long") - - if len(info["name"]) > LENGTH_NAME: - info["prefix"], info["name"] = self._posix_split_name(info["name"]) - - return self._create_header(info, USTAR_FORMAT, encoding, errors) - - def create_gnu_header(self, info, encoding, errors): - """Return the object as a GNU header block sequence. - """ - info["magic"] = GNU_MAGIC - - buf = b"" - if len(info["linkname"]) > LENGTH_LINK: - buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) - - if len(info["name"]) > LENGTH_NAME: - buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) - - return buf + self._create_header(info, GNU_FORMAT, encoding, errors) - - def create_pax_header(self, info, encoding): - """Return the object as a ustar header block. If it cannot be - represented this way, prepend a pax extended header sequence - with supplement information. - """ - info["magic"] = POSIX_MAGIC - pax_headers = self.pax_headers.copy() - - # Test string fields for values that exceed the field length or cannot - # be represented in ASCII encoding. - for name, hname, length in ( - ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), - ("uname", "uname", 32), ("gname", "gname", 32)): - - if hname in pax_headers: - # The pax header has priority. - continue - - # Try to encode the string as ASCII. - try: - info[name].encode("ascii", "strict") - except UnicodeEncodeError: - pax_headers[hname] = info[name] - continue - - if len(info[name]) > length: - pax_headers[hname] = info[name] - - # Test number fields for values that exceed the field limit or values - # that like to be stored as float. - for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): - if name in pax_headers: - # The pax header has priority. Avoid overflow. - info[name] = 0 - continue - - val = info[name] - if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): - pax_headers[name] = str(val) - info[name] = 0 - - # Create a pax extended header if necessary. - if pax_headers: - buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) - else: - buf = b"" - - return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") - - @classmethod - def create_pax_global_header(cls, pax_headers): - """Return the object as a pax global header block sequence. - """ - return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8") - - def _posix_split_name(self, name): - """Split a name longer than 100 chars into a prefix - and a name part. - """ - prefix = name[:LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": - prefix = prefix[:-1] - - name = name[len(prefix):] - prefix = prefix[:-1] - - if not prefix or len(name) > LENGTH_NAME: - raise ValueError("name is too long") - return prefix, name - - @staticmethod - def _create_header(info, format, encoding, errors): - """Return a header block. info is a dictionary with file - information, format must be one of the *_FORMAT constants. - """ - parts = [ - stn(info.get("name", ""), 100, encoding, errors), - itn(info.get("mode", 0) & 0o7777, 8, format), - itn(info.get("uid", 0), 8, format), - itn(info.get("gid", 0), 8, format), - itn(info.get("size", 0), 12, format), - itn(info.get("mtime", 0), 12, format), - b" ", # checksum field - info.get("type", REGTYPE), - stn(info.get("linkname", ""), 100, encoding, errors), - info.get("magic", POSIX_MAGIC), - stn(info.get("uname", ""), 32, encoding, errors), - stn(info.get("gname", ""), 32, encoding, errors), - itn(info.get("devmajor", 0), 8, format), - itn(info.get("devminor", 0), 8, format), - stn(info.get("prefix", ""), 155, encoding, errors) - ] - - buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) - chksum = calc_chksums(buf[-BLOCKSIZE:])[0] - buf = buf[:-364] + ("%06o\0" % chksum).encode("ascii") + buf[-357:] - return buf - - @staticmethod - def _create_payload(payload): - """Return the string payload filled with zero bytes - up to the next 512 byte border. - """ - blocks, remainder = divmod(len(payload), BLOCKSIZE) - if remainder > 0: - payload += (BLOCKSIZE - remainder) * NUL - return payload - - @classmethod - def _create_gnu_long_header(cls, name, type, encoding, errors): - """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence - for name. - """ - name = name.encode(encoding, errors) + NUL - - info = {} - info["name"] = "././@LongLink" - info["type"] = type - info["size"] = len(name) - info["magic"] = GNU_MAGIC - - # create extended header + name blocks. - return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ - cls._create_payload(name) - - @classmethod - def _create_pax_generic_header(cls, pax_headers, type, encoding): - """Return a POSIX.1-2008 extended or global header sequence - that contains a list of keyword, value pairs. The values - must be strings. - """ - # Check if one of the fields contains surrogate characters and thereby - # forces hdrcharset=BINARY, see _proc_pax() for more information. - binary = False - for keyword, value in pax_headers.items(): - try: - value.encode("utf8", "strict") - except UnicodeEncodeError: - binary = True - break - - records = b"" - if binary: - # Put the hdrcharset field at the beginning of the header. - records += b"21 hdrcharset=BINARY\n" - - for keyword, value in pax_headers.items(): - keyword = keyword.encode("utf8") - if binary: - # Try to restore the original byte representation of `value'. - # Needless to say, that the encoding must match the string. - value = value.encode(encoding, "surrogateescape") - else: - value = value.encode("utf8") - - l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' - n = p = 0 - while True: - n = l + len(str(p)) - if n == p: - break - p = n - records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" - - # We use a hardcoded "././@PaxHeader" name like star does - # instead of the one that POSIX recommends. - info = {} - info["name"] = "././@PaxHeader" - info["type"] = type - info["size"] = len(records) - info["magic"] = POSIX_MAGIC - - # Create pax header + record blocks. - return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ - cls._create_payload(records) - - @classmethod - def frombuf(cls, buf, encoding, errors): - """Construct a TarInfo object from a 512 byte bytes object. - """ - if len(buf) == 0: - raise EmptyHeaderError("empty header") - if len(buf) != BLOCKSIZE: - raise TruncatedHeaderError("truncated header") - if buf.count(NUL) == BLOCKSIZE: - raise EOFHeaderError("end of file header") - - chksum = nti(buf[148:156]) - if chksum not in calc_chksums(buf): - raise InvalidHeaderError("bad checksum") - - obj = cls() - obj.name = nts(buf[0:100], encoding, errors) - obj.mode = nti(buf[100:108]) - obj.uid = nti(buf[108:116]) - obj.gid = nti(buf[116:124]) - obj.size = nti(buf[124:136]) - obj.mtime = nti(buf[136:148]) - obj.chksum = chksum - obj.type = buf[156:157] - obj.linkname = nts(buf[157:257], encoding, errors) - obj.uname = nts(buf[265:297], encoding, errors) - obj.gname = nts(buf[297:329], encoding, errors) - obj.devmajor = nti(buf[329:337]) - obj.devminor = nti(buf[337:345]) - prefix = nts(buf[345:500], encoding, errors) - - # Old V7 tar format represents a directory as a regular - # file with a trailing slash. - if obj.type == AREGTYPE and obj.name.endswith("/"): - obj.type = DIRTYPE - - # The old GNU sparse format occupies some of the unused - # space in the buffer for up to 4 sparse structures. - # Save the them for later processing in _proc_sparse(). - if obj.type == GNUTYPE_SPARSE: - pos = 386 - structs = [] - for i in range(4): - try: - offset = nti(buf[pos:pos + 12]) - numbytes = nti(buf[pos + 12:pos + 24]) - except ValueError: - break - structs.append((offset, numbytes)) - pos += 24 - isextended = bool(buf[482]) - origsize = nti(buf[483:495]) - obj._sparse_structs = (structs, isextended, origsize) - - # Remove redundant slashes from directories. - if obj.isdir(): - obj.name = obj.name.rstrip("/") - - # Reconstruct a ustar longname. - if prefix and obj.type not in GNU_TYPES: - obj.name = prefix + "/" + obj.name - return obj - - @classmethod - def fromtarfile(cls, tarfile): - """Return the next TarInfo object from TarFile object - tarfile. - """ - buf = tarfile.fileobj.read(BLOCKSIZE) - obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) - obj.offset = tarfile.fileobj.tell() - BLOCKSIZE - return obj._proc_member(tarfile) - - #-------------------------------------------------------------------------- - # The following are methods that are called depending on the type of a - # member. The entry point is _proc_member() which can be overridden in a - # subclass to add custom _proc_*() methods. A _proc_*() method MUST - # implement the following - # operations: - # 1. Set self.offset_data to the position where the data blocks begin, - # if there is data that follows. - # 2. Set tarfile.offset to the position where the next member's header will - # begin. - # 3. Return self or another valid TarInfo object. - def _proc_member(self, tarfile): - """Choose the right processing method depending on - the type and call it. - """ - if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): - return self._proc_gnulong(tarfile) - elif self.type == GNUTYPE_SPARSE: - return self._proc_sparse(tarfile) - elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): - return self._proc_pax(tarfile) - else: - return self._proc_builtin(tarfile) - - def _proc_builtin(self, tarfile): - """Process a builtin type or an unknown type which - will be treated as a regular file. - """ - self.offset_data = tarfile.fileobj.tell() - offset = self.offset_data - if self.isreg() or self.type not in SUPPORTED_TYPES: - # Skip the following data blocks. - offset += self._block(self.size) - tarfile.offset = offset - - # Patch the TarInfo object with saved global - # header information. - self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) - - return self - - def _proc_gnulong(self, tarfile): - """Process the blocks that hold a GNU longname - or longlink member. - """ - buf = tarfile.fileobj.read(self._block(self.size)) - - # Fetch the next header and process it. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - # Patch the TarInfo object from the next header with - # the longname information. - next.offset = self.offset - if self.type == GNUTYPE_LONGNAME: - next.name = nts(buf, tarfile.encoding, tarfile.errors) - elif self.type == GNUTYPE_LONGLINK: - next.linkname = nts(buf, tarfile.encoding, tarfile.errors) - - return next - - def _proc_sparse(self, tarfile): - """Process a GNU sparse header plus extra headers. - """ - # We already collected some sparse structures in frombuf(). - structs, isextended, origsize = self._sparse_structs - del self._sparse_structs - - # Collect sparse structures from extended header blocks. - while isextended: - buf = tarfile.fileobj.read(BLOCKSIZE) - pos = 0 - for i in range(21): - try: - offset = nti(buf[pos:pos + 12]) - numbytes = nti(buf[pos + 12:pos + 24]) - except ValueError: - break - if offset and numbytes: - structs.append((offset, numbytes)) - pos += 24 - isextended = bool(buf[504]) - self.sparse = structs - - self.offset_data = tarfile.fileobj.tell() - tarfile.offset = self.offset_data + self._block(self.size) - self.size = origsize - return self - - def _proc_pax(self, tarfile): - """Process an extended or global header as described in - POSIX.1-2008. - """ - # Read the header information. - buf = tarfile.fileobj.read(self._block(self.size)) - - # A pax header stores supplemental information for either - # the following file (extended) or all following files - # (global). - if self.type == XGLTYPE: - pax_headers = tarfile.pax_headers - else: - pax_headers = tarfile.pax_headers.copy() - - # Check if the pax header contains a hdrcharset field. This tells us - # the encoding of the path, linkpath, uname and gname fields. Normally, - # these fields are UTF-8 encoded but since POSIX.1-2008 tar - # implementations are allowed to store them as raw binary strings if - # the translation to UTF-8 fails. - match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) - if match is not None: - pax_headers["hdrcharset"] = match.group(1).decode("utf8") - - # For the time being, we don't care about anything other than "BINARY". - # The only other value that is currently allowed by the standard is - # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. - hdrcharset = pax_headers.get("hdrcharset") - if hdrcharset == "BINARY": - encoding = tarfile.encoding - else: - encoding = "utf8" - - # Parse pax header information. A record looks like that: - # "%d %s=%s\n" % (length, keyword, value). length is the size - # of the complete record including the length field itself and - # the newline. keyword and value are both UTF-8 encoded strings. - regex = re.compile(br"(\d+) ([^=]+)=") - pos = 0 - while True: - match = regex.match(buf, pos) - if not match: - break - - length, keyword = match.groups() - length = int(length) - value = buf[match.end(2) + 1:match.start(1) + length - 1] - - # Normally, we could just use "utf8" as the encoding and "strict" - # as the error handler, but we better not take the risk. For - # example, GNU tar <= 1.23 is known to store filenames it cannot - # translate to UTF-8 as raw strings (unfortunately without a - # hdrcharset=BINARY header). - # We first try the strict standard encoding, and if that fails we - # fall back on the user's encoding and error handler. - keyword = self._decode_pax_field(keyword, "utf8", "utf8", - tarfile.errors) - if keyword in PAX_NAME_FIELDS: - value = self._decode_pax_field(value, encoding, tarfile.encoding, - tarfile.errors) - else: - value = self._decode_pax_field(value, "utf8", "utf8", - tarfile.errors) - - pax_headers[keyword] = value - pos += length - - # Fetch the next header. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - # Process GNU sparse information. - if "GNU.sparse.map" in pax_headers: - # GNU extended sparse format version 0.1. - self._proc_gnusparse_01(next, pax_headers) - - elif "GNU.sparse.size" in pax_headers: - # GNU extended sparse format version 0.0. - self._proc_gnusparse_00(next, pax_headers, buf) - - elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": - # GNU extended sparse format version 1.0. - self._proc_gnusparse_10(next, pax_headers, tarfile) - - if self.type in (XHDTYPE, SOLARIS_XHDTYPE): - # Patch the TarInfo object with the extended header info. - next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) - next.offset = self.offset - - if "size" in pax_headers: - # If the extended header replaces the size field, - # we need to recalculate the offset where the next - # header starts. - offset = next.offset_data - if next.isreg() or next.type not in SUPPORTED_TYPES: - offset += next._block(next.size) - tarfile.offset = offset - - return next - - def _proc_gnusparse_00(self, next, pax_headers, buf): - """Process a GNU tar extended sparse header, version 0.0. - """ - offsets = [] - for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): - offsets.append(int(match.group(1))) - numbytes = [] - for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): - numbytes.append(int(match.group(1))) - next.sparse = list(zip(offsets, numbytes)) - - def _proc_gnusparse_01(self, next, pax_headers): - """Process a GNU tar extended sparse header, version 0.1. - """ - sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] - next.sparse = list(zip(sparse[::2], sparse[1::2])) - - def _proc_gnusparse_10(self, next, pax_headers, tarfile): - """Process a GNU tar extended sparse header, version 1.0. - """ - fields = None - sparse = [] - buf = tarfile.fileobj.read(BLOCKSIZE) - fields, buf = buf.split(b"\n", 1) - fields = int(fields) - while len(sparse) < fields * 2: - if b"\n" not in buf: - buf += tarfile.fileobj.read(BLOCKSIZE) - number, buf = buf.split(b"\n", 1) - sparse.append(int(number)) - next.offset_data = tarfile.fileobj.tell() - next.sparse = list(zip(sparse[::2], sparse[1::2])) - - def _apply_pax_info(self, pax_headers, encoding, errors): - """Replace fields with supplemental information from a previous - pax extended or global header. - """ - for keyword, value in pax_headers.items(): - if keyword == "GNU.sparse.name": - setattr(self, "path", value) - elif keyword == "GNU.sparse.size": - setattr(self, "size", int(value)) - elif keyword == "GNU.sparse.realsize": - setattr(self, "size", int(value)) - elif keyword in PAX_FIELDS: - if keyword in PAX_NUMBER_FIELDS: - try: - value = PAX_NUMBER_FIELDS[keyword](value) - except ValueError: - value = 0 - if keyword == "path": - value = value.rstrip("/") - setattr(self, keyword, value) - - self.pax_headers = pax_headers.copy() - - def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): - """Decode a single field from a pax record. - """ - try: - return value.decode(encoding, "strict") - except UnicodeDecodeError: - return value.decode(fallback_encoding, fallback_errors) - - def _block(self, count): - """Round up a byte count by BLOCKSIZE and return it, - e.g. _block(834) => 1024. - """ - blocks, remainder = divmod(count, BLOCKSIZE) - if remainder: - blocks += 1 - return blocks * BLOCKSIZE - - def isreg(self): - return self.type in REGULAR_TYPES - def isfile(self): - return self.isreg() - def isdir(self): - return self.type == DIRTYPE - def issym(self): - return self.type == SYMTYPE - def islnk(self): - return self.type == LNKTYPE - def ischr(self): - return self.type == CHRTYPE - def isblk(self): - return self.type == BLKTYPE - def isfifo(self): - return self.type == FIFOTYPE - def issparse(self): - return self.sparse is not None - def isdev(self): - return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) -# class TarInfo - -class TarFile(object): - """The TarFile Class provides an interface to tar archives. - """ - - debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) - - dereference = False # If true, add content of linked file to the - # tar file, else the link. - - ignore_zeros = False # If true, skips empty or invalid blocks and - # continues processing. - - errorlevel = 1 # If 0, fatal errors only appear in debug - # messages (if debug >= 0). If > 0, errors - # are passed to the caller as exceptions. - - format = DEFAULT_FORMAT # The format to use when creating an archive. - - encoding = ENCODING # Encoding for 8-bit character strings. - - errors = None # Error handler for unicode conversion. - - tarinfo = TarInfo # The default TarInfo class to use. - - fileobject = ExFileObject # The default ExFileObject class to use. - - def __init__(self, name=None, mode="r", fileobj=None, format=None, - tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, - errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): - """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to - read from an existing archive, 'a' to append data to an existing - file or 'w' to create a new file overwriting an existing one. `mode' - defaults to 'r'. - If `fileobj' is given, it is used for reading or writing data. If it - can be determined, `mode' is overridden by `fileobj's mode. - `fileobj' is not closed, when TarFile is closed. - """ - if len(mode) > 1 or mode not in "raw": - raise ValueError("mode must be 'r', 'a' or 'w'") - self.mode = mode - self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] - - if not fileobj: - if self.mode == "a" and not os.path.exists(name): - # Create nonexistent files in append mode. - self.mode = "w" - self._mode = "wb" - fileobj = bltn_open(name, self._mode) - self._extfileobj = False - else: - if name is None and hasattr(fileobj, "name"): - name = fileobj.name - if hasattr(fileobj, "mode"): - self._mode = fileobj.mode - self._extfileobj = True - self.name = os.path.abspath(name) if name else None - self.fileobj = fileobj - - # Init attributes. - if format is not None: - self.format = format - if tarinfo is not None: - self.tarinfo = tarinfo - if dereference is not None: - self.dereference = dereference - if ignore_zeros is not None: - self.ignore_zeros = ignore_zeros - if encoding is not None: - self.encoding = encoding - self.errors = errors - - if pax_headers is not None and self.format == PAX_FORMAT: - self.pax_headers = pax_headers - else: - self.pax_headers = {} - - if debug is not None: - self.debug = debug - if errorlevel is not None: - self.errorlevel = errorlevel - - # Init datastructures. - self.closed = False - self.members = [] # list of members as TarInfo objects - self._loaded = False # flag if all members have been read - self.offset = self.fileobj.tell() - # current position in the archive file - self.inodes = {} # dictionary caching the inodes of - # archive members already added - - try: - if self.mode == "r": - self.firstmember = None - self.firstmember = self.next() - - if self.mode == "a": - # Move to the end of the archive, - # before the first empty block. - while True: - self.fileobj.seek(self.offset) - try: - tarinfo = self.tarinfo.fromtarfile(self) - self.members.append(tarinfo) - except EOFHeaderError: - self.fileobj.seek(self.offset) - break - except HeaderError as e: - raise ReadError(str(e)) - - if self.mode in "aw": - self._loaded = True - - if self.pax_headers: - buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) - self.fileobj.write(buf) - self.offset += len(buf) - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - #-------------------------------------------------------------------------- - # Below are the classmethods which act as alternate constructors to the - # TarFile class. The open() method is the only one that is needed for - # public use; it is the "super"-constructor and is able to select an - # adequate "sub"-constructor for a particular compression using the mapping - # from OPEN_METH. - # - # This concept allows one to subclass TarFile without losing the comfort of - # the super-constructor. A sub-constructor is registered and made available - # by adding it to the mapping in OPEN_METH. - - @classmethod - def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): - """Open a tar archive for reading, writing or appending. Return - an appropriate TarFile class. - - mode: - 'r' or 'r:*' open for reading with transparent compression - 'r:' open for reading exclusively uncompressed - 'r:gz' open for reading with gzip compression - 'r:bz2' open for reading with bzip2 compression - 'a' or 'a:' open for appending, creating the file if necessary - 'w' or 'w:' open for writing without compression - 'w:gz' open for writing with gzip compression - 'w:bz2' open for writing with bzip2 compression - - 'r|*' open a stream of tar blocks with transparent compression - 'r|' open an uncompressed stream of tar blocks for reading - 'r|gz' open a gzip compressed stream of tar blocks - 'r|bz2' open a bzip2 compressed stream of tar blocks - 'w|' open an uncompressed stream for writing - 'w|gz' open a gzip compressed stream for writing - 'w|bz2' open a bzip2 compressed stream for writing - """ - - if not name and not fileobj: - raise ValueError("nothing to open") - - if mode in ("r", "r:*"): - # Find out which *open() is appropriate for opening the file. - for comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - if fileobj is not None: - saved_pos = fileobj.tell() - try: - return func(name, "r", fileobj, **kwargs) - except (ReadError, CompressionError) as e: - if fileobj is not None: - fileobj.seek(saved_pos) - continue - raise ReadError("file could not be opened successfully") - - elif ":" in mode: - filemode, comptype = mode.split(":", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - # Select the *open() function according to - # given compression. - if comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - else: - raise CompressionError("unknown compression type %r" % comptype) - return func(name, filemode, fileobj, **kwargs) - - elif "|" in mode: - filemode, comptype = mode.split("|", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - if filemode not in "rw": - raise ValueError("mode must be 'r' or 'w'") - - stream = _Stream(name, filemode, comptype, fileobj, bufsize) - try: - t = cls(name, filemode, stream, **kwargs) - except: - stream.close() - raise - t._extfileobj = False - return t - - elif mode in "aw": - return cls.taropen(name, mode, fileobj, **kwargs) - - raise ValueError("undiscernible mode") - - @classmethod - def taropen(cls, name, mode="r", fileobj=None, **kwargs): - """Open uncompressed tar archive name for reading or writing. - """ - if len(mode) > 1 or mode not in "raw": - raise ValueError("mode must be 'r', 'a' or 'w'") - return cls(name, mode, fileobj, **kwargs) - - @classmethod - def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """Open gzip compressed tar archive name for reading or writing. - Appending is not allowed. - """ - if len(mode) > 1 or mode not in "rw": - raise ValueError("mode must be 'r' or 'w'") - - try: - import gzip - gzip.GzipFile - except (ImportError, AttributeError): - raise CompressionError("gzip module is not available") - - extfileobj = fileobj is not None - try: - fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj) - t = cls.taropen(name, mode, fileobj, **kwargs) - except IOError: - if not extfileobj and fileobj is not None: - fileobj.close() - if fileobj is None: - raise - raise ReadError("not a gzip file") - except: - if not extfileobj and fileobj is not None: - fileobj.close() - raise - t._extfileobj = extfileobj - return t - - @classmethod - def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """Open bzip2 compressed tar archive name for reading or writing. - Appending is not allowed. - """ - if len(mode) > 1 or mode not in "rw": - raise ValueError("mode must be 'r' or 'w'.") - - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - - if fileobj is not None: - fileobj = _BZ2Proxy(fileobj, mode) - else: - fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) - - try: - t = cls.taropen(name, mode, fileobj, **kwargs) - except (IOError, EOFError): - fileobj.close() - raise ReadError("not a bzip2 file") - t._extfileobj = False - return t - - # All *open() methods are registered here. - OPEN_METH = { - "tar": "taropen", # uncompressed tar - "gz": "gzopen", # gzip compressed tar - "bz2": "bz2open" # bzip2 compressed tar - } - - #-------------------------------------------------------------------------- - # The public methods which TarFile provides: - - def close(self): - """Close the TarFile. In write-mode, two finishing zero blocks are - appended to the archive. - """ - if self.closed: - return - - if self.mode in "aw": - self.fileobj.write(NUL * (BLOCKSIZE * 2)) - self.offset += (BLOCKSIZE * 2) - # fill up the end with zero-blocks - # (like option -b20 for tar does) - blocks, remainder = divmod(self.offset, RECORDSIZE) - if remainder > 0: - self.fileobj.write(NUL * (RECORDSIZE - remainder)) - - if not self._extfileobj: - self.fileobj.close() - self.closed = True - - def getmember(self, name): - """Return a TarInfo object for member `name'. If `name' can not be - found in the archive, KeyError is raised. If a member occurs more - than once in the archive, its last occurrence is assumed to be the - most up-to-date version. - """ - tarinfo = self._getmember(name) - if tarinfo is None: - raise KeyError("filename %r not found" % name) - return tarinfo - - def getmembers(self): - """Return the members of the archive as a list of TarInfo objects. The - list has the same order as the members in the archive. - """ - self._check() - if not self._loaded: # if we want to obtain a list of - self._load() # all members, we first have to - # scan the whole archive. - return self.members - - def getnames(self): - """Return the members of the archive as a list of their names. It has - the same order as the list returned by getmembers(). - """ - return [tarinfo.name for tarinfo in self.getmembers()] - - def gettarinfo(self, name=None, arcname=None, fileobj=None): - """Create a TarInfo object for either the file `name' or the file - object `fileobj' (using os.fstat on its file descriptor). You can - modify some of the TarInfo's attributes before you add it using - addfile(). If given, `arcname' specifies an alternative name for the - file in the archive. - """ - self._check("aw") - - # When fileobj is given, replace name by - # fileobj's real name. - if fileobj is not None: - name = fileobj.name - - # Building the name of the member in the archive. - # Backward slashes are converted to forward slashes, - # Absolute paths are turned to relative paths. - if arcname is None: - arcname = name - drv, arcname = os.path.splitdrive(arcname) - arcname = arcname.replace(os.sep, "/") - arcname = arcname.lstrip("/") - - # Now, fill the TarInfo object with - # information specific for the file. - tarinfo = self.tarinfo() - tarinfo.tarfile = self - - # Use os.stat or os.lstat, depending on platform - # and if symlinks shall be resolved. - if fileobj is None: - if hasattr(os, "lstat") and not self.dereference: - statres = os.lstat(name) - else: - statres = os.stat(name) - else: - statres = os.fstat(fileobj.fileno()) - linkname = "" - - stmd = statres.st_mode - if stat.S_ISREG(stmd): - inode = (statres.st_ino, statres.st_dev) - if not self.dereference and statres.st_nlink > 1 and \ - inode in self.inodes and arcname != self.inodes[inode]: - # Is it a hardlink to an already - # archived file? - type = LNKTYPE - linkname = self.inodes[inode] - else: - # The inode is added only if its valid. - # For win32 it is always 0. - type = REGTYPE - if inode[0]: - self.inodes[inode] = arcname - elif stat.S_ISDIR(stmd): - type = DIRTYPE - elif stat.S_ISFIFO(stmd): - type = FIFOTYPE - elif stat.S_ISLNK(stmd): - type = SYMTYPE - linkname = os.readlink(name) - elif stat.S_ISCHR(stmd): - type = CHRTYPE - elif stat.S_ISBLK(stmd): - type = BLKTYPE - else: - return None - - # Fill the TarInfo object with all - # information we can get. - tarinfo.name = arcname - tarinfo.mode = stmd - tarinfo.uid = statres.st_uid - tarinfo.gid = statres.st_gid - if type == REGTYPE: - tarinfo.size = statres.st_size - else: - tarinfo.size = 0 - tarinfo.mtime = statres.st_mtime - tarinfo.type = type - tarinfo.linkname = linkname - if pwd: - try: - tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] - except KeyError: - pass - if grp: - try: - tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] - except KeyError: - pass - - if type in (CHRTYPE, BLKTYPE): - if hasattr(os, "major") and hasattr(os, "minor"): - tarinfo.devmajor = os.major(statres.st_rdev) - tarinfo.devminor = os.minor(statres.st_rdev) - return tarinfo - - def list(self, verbose=True): - """Print a table of contents to sys.stdout. If `verbose' is False, only - the names of the members are printed. If it is True, an `ls -l'-like - output is produced. - """ - self._check() - - for tarinfo in self: - if verbose: - print(filemode(tarinfo.mode), end=' ') - print("%s/%s" % (tarinfo.uname or tarinfo.uid, - tarinfo.gname or tarinfo.gid), end=' ') - if tarinfo.ischr() or tarinfo.isblk(): - print("%10s" % ("%d,%d" \ - % (tarinfo.devmajor, tarinfo.devminor)), end=' ') - else: - print("%10d" % tarinfo.size, end=' ') - print("%d-%02d-%02d %02d:%02d:%02d" \ - % time.localtime(tarinfo.mtime)[:6], end=' ') - - print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ') - - if verbose: - if tarinfo.issym(): - print("->", tarinfo.linkname, end=' ') - if tarinfo.islnk(): - print("link to", tarinfo.linkname, end=' ') - print() - - def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): - """Add the file `name' to the archive. `name' may be any type of file - (directory, fifo, symbolic link, etc.). If given, `arcname' - specifies an alternative name for the file in the archive. - Directories are added recursively by default. This can be avoided by - setting `recursive' to False. `exclude' is a function that should - return True for each filename to be excluded. `filter' is a function - that expects a TarInfo object argument and returns the changed - TarInfo object, if it returns None the TarInfo object will be - excluded from the archive. - """ - self._check("aw") - - if arcname is None: - arcname = name - - # Exclude pathnames. - if exclude is not None: - import warnings - warnings.warn("use the filter argument instead", - DeprecationWarning, 2) - if exclude(name): - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Skip if somebody tries to archive the archive... - if self.name is not None and os.path.abspath(name) == self.name: - self._dbg(2, "tarfile: Skipped %r" % name) - return - - self._dbg(1, name) - - # Create a TarInfo object from the file. - tarinfo = self.gettarinfo(name, arcname) - - if tarinfo is None: - self._dbg(1, "tarfile: Unsupported type %r" % name) - return - - # Change or exclude the TarInfo object. - if filter is not None: - tarinfo = filter(tarinfo) - if tarinfo is None: - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Append the tar header and data to the archive. - if tarinfo.isreg(): - f = bltn_open(name, "rb") - self.addfile(tarinfo, f) - f.close() - - elif tarinfo.isdir(): - self.addfile(tarinfo) - if recursive: - for f in os.listdir(name): - self.add(os.path.join(name, f), os.path.join(arcname, f), - recursive, exclude, filter=filter) - - else: - self.addfile(tarinfo) - - def addfile(self, tarinfo, fileobj=None): - """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is - given, tarinfo.size bytes are read from it and added to the archive. - You can create TarInfo objects using gettarinfo(). - On Windows platforms, `fileobj' should always be opened with mode - 'rb' to avoid irritation about the file size. - """ - self._check("aw") - - tarinfo = copy.copy(tarinfo) - - buf = tarinfo.tobuf(self.format, self.encoding, self.errors) - self.fileobj.write(buf) - self.offset += len(buf) - - # If there's data to follow, append it. - if fileobj is not None: - copyfileobj(fileobj, self.fileobj, tarinfo.size) - blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) - if remainder > 0: - self.fileobj.write(NUL * (BLOCKSIZE - remainder)) - blocks += 1 - self.offset += blocks * BLOCKSIZE - - self.members.append(tarinfo) - - def extractall(self, path=".", members=None): - """Extract all members from the archive to the current working - directory and set owner, modification time and permissions on - directories afterwards. `path' specifies a different directory - to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). - """ - directories = [] - - if members is None: - members = self - - for tarinfo in members: - if tarinfo.isdir(): - # Extract directories with a safe mode. - directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 0o700 - # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) - - # Reverse sort directories. - directories.sort(key=lambda a: a.name) - directories.reverse() - - # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) - try: - self.chown(tarinfo, dirpath) - self.utime(tarinfo, dirpath) - self.chmod(tarinfo, dirpath) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extract(self, member, path="", set_attrs=True): - """Extract a member from the archive to the current working directory, - using its full name. Its file information is extracted as accurately - as possible. `member' may be a filename or a TarInfo object. You can - specify a different directory using `path'. File attributes (owner, - mtime, mode) are set unless `set_attrs' is False. - """ - self._check("r") - - if isinstance(member, str): - tarinfo = self.getmember(member) - else: - tarinfo = member - - # Prepare the link target for makelink(). - if tarinfo.islnk(): - tarinfo._link_target = os.path.join(path, tarinfo.linkname) - - try: - self._extract_member(tarinfo, os.path.join(path, tarinfo.name), - set_attrs=set_attrs) - except EnvironmentError as e: - if self.errorlevel > 0: - raise - else: - if e.filename is None: - self._dbg(1, "tarfile: %s" % e.strerror) - else: - self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extractfile(self, member): - """Extract a member from the archive as a file object. `member' may be - a filename or a TarInfo object. If `member' is a regular file, a - file-like object is returned. If `member' is a link, a file-like - object is constructed from the link's target. If `member' is none of - the above, None is returned. - The file-like object is read-only and provides the following - methods: read(), readline(), readlines(), seek() and tell() - """ - self._check("r") - - if isinstance(member, str): - tarinfo = self.getmember(member) - else: - tarinfo = member - - if tarinfo.isreg(): - return self.fileobject(self, tarinfo) - - elif tarinfo.type not in SUPPORTED_TYPES: - # If a member's type is unknown, it is treated as a - # regular file. - return self.fileobject(self, tarinfo) - - elif tarinfo.islnk() or tarinfo.issym(): - if isinstance(self.fileobj, _Stream): - # A small but ugly workaround for the case that someone tries - # to extract a (sym)link as a file-object from a non-seekable - # stream of tar blocks. - raise StreamError("cannot extract (sym)link as file object") - else: - # A (sym)link's file object is its target's file object. - return self.extractfile(self._find_link_target(tarinfo)) - else: - # If there's no data associated with the member (directory, chrdev, - # blkdev, etc.), return None instead of a file object. - return None - - def _extract_member(self, tarinfo, targetpath, set_attrs=True): - """Extract the TarInfo object tarinfo to a physical - file called targetpath. - """ - # Fetch the TarInfo object for the given name - # and build the destination pathname, replacing - # forward slashes to platform specific separators. - targetpath = targetpath.rstrip("/") - targetpath = targetpath.replace("/", os.sep) - - # Create all upper directories. - upperdirs = os.path.dirname(targetpath) - if upperdirs and not os.path.exists(upperdirs): - # Create directories that are not part of the archive with - # default permissions. - os.makedirs(upperdirs) - - if tarinfo.islnk() or tarinfo.issym(): - self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) - else: - self._dbg(1, tarinfo.name) - - if tarinfo.isreg(): - self.makefile(tarinfo, targetpath) - elif tarinfo.isdir(): - self.makedir(tarinfo, targetpath) - elif tarinfo.isfifo(): - self.makefifo(tarinfo, targetpath) - elif tarinfo.ischr() or tarinfo.isblk(): - self.makedev(tarinfo, targetpath) - elif tarinfo.islnk() or tarinfo.issym(): - self.makelink(tarinfo, targetpath) - elif tarinfo.type not in SUPPORTED_TYPES: - self.makeunknown(tarinfo, targetpath) - else: - self.makefile(tarinfo, targetpath) - - if set_attrs: - self.chown(tarinfo, targetpath) - if not tarinfo.issym(): - self.chmod(tarinfo, targetpath) - self.utime(tarinfo, targetpath) - - #-------------------------------------------------------------------------- - # Below are the different file methods. They are called via - # _extract_member() when extract() is called. They can be replaced in a - # subclass to implement other functionality. - - def makedir(self, tarinfo, targetpath): - """Make a directory called targetpath. - """ - try: - # Use a safe mode for the directory, the real mode is set - # later in _extract_member(). - os.mkdir(targetpath, 0o700) - except EnvironmentError as e: - if e.errno != errno.EEXIST: - raise - - def makefile(self, tarinfo, targetpath): - """Make a file called targetpath. - """ - source = self.fileobj - source.seek(tarinfo.offset_data) - target = bltn_open(targetpath, "wb") - if tarinfo.sparse is not None: - for offset, size in tarinfo.sparse: - target.seek(offset) - copyfileobj(source, target, size) - else: - copyfileobj(source, target, tarinfo.size) - target.seek(tarinfo.size) - target.truncate() - target.close() - - def makeunknown(self, tarinfo, targetpath): - """Make a file from a TarInfo object with an unknown type - at targetpath. - """ - self.makefile(tarinfo, targetpath) - self._dbg(1, "tarfile: Unknown file type %r, " \ - "extracted as regular file." % tarinfo.type) - - def makefifo(self, tarinfo, targetpath): - """Make a fifo called targetpath. - """ - if hasattr(os, "mkfifo"): - os.mkfifo(targetpath) - else: - raise ExtractError("fifo not supported by system") - - def makedev(self, tarinfo, targetpath): - """Make a character or block device called targetpath. - """ - if not hasattr(os, "mknod") or not hasattr(os, "makedev"): - raise ExtractError("special devices not supported by system") - - mode = tarinfo.mode - if tarinfo.isblk(): - mode |= stat.S_IFBLK - else: - mode |= stat.S_IFCHR - - os.mknod(targetpath, mode, - os.makedev(tarinfo.devmajor, tarinfo.devminor)) - - def makelink(self, tarinfo, targetpath): - """Make a (symbolic) link called targetpath. If it cannot be created - (platform limitation), we try to make a copy of the referenced file - instead of a link. - """ - try: - # For systems that support symbolic and hard links. - if tarinfo.issym(): - os.symlink(tarinfo.linkname, targetpath) - else: - # See extract(). - if os.path.exists(tarinfo._link_target): - os.link(tarinfo._link_target, targetpath) - else: - self._extract_member(self._find_link_target(tarinfo), - targetpath) - except symlink_exception: - if tarinfo.issym(): - linkpath = os.path.join(os.path.dirname(tarinfo.name), - tarinfo.linkname) - else: - linkpath = tarinfo.linkname - else: - try: - self._extract_member(self._find_link_target(tarinfo), - targetpath) - except KeyError: - raise ExtractError("unable to resolve link inside archive") - - def chown(self, tarinfo, targetpath): - """Set owner of targetpath according to tarinfo. - """ - if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: - # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: - g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: - u = tarinfo.uid - try: - if tarinfo.issym() and hasattr(os, "lchown"): - os.lchown(targetpath, u, g) - else: - if sys.platform != "os2emx": - os.chown(targetpath, u, g) - except EnvironmentError as e: - raise ExtractError("could not change owner") - - def chmod(self, tarinfo, targetpath): - """Set file permissions of targetpath according to tarinfo. - """ - if hasattr(os, 'chmod'): - try: - os.chmod(targetpath, tarinfo.mode) - except EnvironmentError as e: - raise ExtractError("could not change mode") - - def utime(self, tarinfo, targetpath): - """Set modification time of targetpath according to tarinfo. - """ - if not hasattr(os, 'utime'): - return - try: - os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) - except EnvironmentError as e: - raise ExtractError("could not change modification time") - - #-------------------------------------------------------------------------- - def next(self): - """Return the next member of the archive as a TarInfo object, when - TarFile is opened for reading. Return None if there is no more - available. - """ - self._check("ra") - if self.firstmember is not None: - m = self.firstmember - self.firstmember = None - return m - - # Read the next block. - self.fileobj.seek(self.offset) - tarinfo = None - while True: - try: - tarinfo = self.tarinfo.fromtarfile(self) - except EOFHeaderError as e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - except InvalidHeaderError as e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - elif self.offset == 0: - raise ReadError(str(e)) - except EmptyHeaderError: - if self.offset == 0: - raise ReadError("empty file") - except TruncatedHeaderError as e: - if self.offset == 0: - raise ReadError(str(e)) - except SubsequentHeaderError as e: - raise ReadError(str(e)) - break - - if tarinfo is not None: - self.members.append(tarinfo) - else: - self._loaded = True - - return tarinfo - - #-------------------------------------------------------------------------- - # Little helper methods: - - def _getmember(self, name, tarinfo=None, normalize=False): - """Find an archive member by name from bottom to top. - If tarinfo is given, it is used as the starting point. - """ - # Ensure that all members have been loaded. - members = self.getmembers() - - # Limit the member search list up to tarinfo. - if tarinfo is not None: - members = members[:members.index(tarinfo)] - - if normalize: - name = os.path.normpath(name) - - for member in reversed(members): - if normalize: - member_name = os.path.normpath(member.name) - else: - member_name = member.name - - if name == member_name: - return member - - def _load(self): - """Read through the entire archive file and look for readable - members. - """ - while True: - tarinfo = self.next() - if tarinfo is None: - break - self._loaded = True - - def _check(self, mode=None): - """Check if TarFile is still open, and if the operation's mode - corresponds to TarFile's mode. - """ - if self.closed: - raise IOError("%s is closed" % self.__class__.__name__) - if mode is not None and self.mode not in mode: - raise IOError("bad operation for mode %r" % self.mode) - - def _find_link_target(self, tarinfo): - """Find the target member of a symlink or hardlink member in the - archive. - """ - if tarinfo.issym(): - # Always search the entire archive. - linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname - limit = None - else: - # Search the archive before the link, because a hard link is - # just a reference to an already archived file. - linkname = tarinfo.linkname - limit = tarinfo - - member = self._getmember(linkname, tarinfo=limit, normalize=True) - if member is None: - raise KeyError("linkname %r not found" % linkname) - return member - - def __iter__(self): - """Provide an iterator object. - """ - if self._loaded: - return iter(self.members) - else: - return TarIter(self) - - def _dbg(self, level, msg): - """Write debugging output to sys.stderr. - """ - if level <= self.debug: - print(msg, file=sys.stderr) - - def __enter__(self): - self._check() - return self - - def __exit__(self, type, value, traceback): - if type is None: - self.close() - else: - # An exception occurred. We must not call close() because - # it would try to write end-of-archive blocks and padding. - if not self._extfileobj: - self.fileobj.close() - self.closed = True -# class TarFile - -class TarIter(object): - """Iterator Class. - - for tarinfo in TarFile(...): - suite... - """ - - def __init__(self, tarfile): - """Construct a TarIter object. - """ - self.tarfile = tarfile - self.index = 0 - def __iter__(self): - """Return iterator object. - """ - return self - - def __next__(self): - """Return the next item using TarFile's next() method. - When all members have been read, set TarFile as _loaded. - """ - # Fix for SF #1100429: Under rare circumstances it can - # happen that getmembers() is called during iteration, - # which will cause TarIter to stop prematurely. - if not self.tarfile._loaded: - tarinfo = self.tarfile.next() - if not tarinfo: - self.tarfile._loaded = True - raise StopIteration - else: - try: - tarinfo = self.tarfile.members[self.index] - except IndexError: - raise StopIteration - self.index += 1 - return tarinfo - - next = __next__ # for Python 2.x - -#-------------------- -# exported functions -#-------------------- -def is_tarfile(name): - """Return True if name points to a tar archive that we - are able to handle, else return False. - """ - try: - t = open(name) - t.close() - return True - except TarError: - return False - -bltn_open = open -open = TarFile.open diff --git a/pipenv/patched/notpip/_vendor/distlib/t32.exe b/pipenv/patched/notpip/_vendor/distlib/t32.exe deleted file mode 100644 index 8932a18e45..0000000000 Binary files a/pipenv/patched/notpip/_vendor/distlib/t32.exe and /dev/null differ diff --git a/pipenv/patched/notpip/_vendor/distlib/t64.exe b/pipenv/patched/notpip/_vendor/distlib/t64.exe deleted file mode 100644 index 325b8057c0..0000000000 Binary files a/pipenv/patched/notpip/_vendor/distlib/t64.exe and /dev/null differ diff --git a/pipenv/patched/notpip/_vendor/distlib/w32.exe b/pipenv/patched/notpip/_vendor/distlib/w32.exe deleted file mode 100644 index e6439e9e45..0000000000 Binary files a/pipenv/patched/notpip/_vendor/distlib/w32.exe and /dev/null differ diff --git a/pipenv/patched/notpip/_vendor/distlib/w64-arm.exe b/pipenv/patched/notpip/_vendor/distlib/w64-arm.exe deleted file mode 100644 index 70a2ec2685..0000000000 Binary files a/pipenv/patched/notpip/_vendor/distlib/w64-arm.exe and /dev/null differ diff --git a/pipenv/patched/notpip/_vendor/distlib/w64.exe b/pipenv/patched/notpip/_vendor/distlib/w64.exe deleted file mode 100644 index 46139dbf94..0000000000 Binary files a/pipenv/patched/notpip/_vendor/distlib/w64.exe and /dev/null differ diff --git a/pipenv/patched/notpip/_vendor/html5lib/LICENSE b/pipenv/patched/notpip/_vendor/html5lib/LICENSE deleted file mode 100644 index c87fa7a000..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -Copyright (c) 2006-2013 James Graham and other contributors - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/pipenv/patched/notpip/_vendor/html5lib/__init__.py b/pipenv/patched/notpip/_vendor/html5lib/__init__.py deleted file mode 100644 index 3cf6cecc3e..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -HTML parsing library based on the `WHATWG HTML specification -`_. The parser is designed to be compatible with -existing HTML found in the wild and implements well-defined error recovery that -is largely compatible with modern desktop web browsers. - -Example usage:: - - from pipenv.patched.notpip._vendor import html5lib - with open("my_document.html", "rb") as f: - tree = html5lib.parse(f) - -For convenience, this module re-exports the following names: - -* :func:`~.html5parser.parse` -* :func:`~.html5parser.parseFragment` -* :class:`~.html5parser.HTMLParser` -* :func:`~.treebuilders.getTreeBuilder` -* :func:`~.treewalkers.getTreeWalker` -* :func:`~.serializer.serialize` -""" - -from __future__ import absolute_import, division, unicode_literals - -from .html5parser import HTMLParser, parse, parseFragment -from .treebuilders import getTreeBuilder -from .treewalkers import getTreeWalker -from .serializer import serialize - -__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", - "getTreeWalker", "serialize"] - -# this has to be at the top level, see how setup.py parses this -#: Distribution version number. -__version__ = "1.1" diff --git a/pipenv/patched/notpip/_vendor/html5lib/_ihatexml.py b/pipenv/patched/notpip/_vendor/html5lib/_ihatexml.py deleted file mode 100644 index 3ff803c195..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/_ihatexml.py +++ /dev/null @@ -1,289 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re -import warnings - -from .constants import DataLossWarning - -baseChar = """ -[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | -[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | -[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | -[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | -[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | -[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | -[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | -[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | -[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | -[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | -[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | -[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | -[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | -[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | -[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | -[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | -[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | -[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | -[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | -[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | -[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | -[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | -[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | -[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | -[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | -[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | -[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | -[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | -[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | -[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | -#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | -#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | -#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | -[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | -[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | -#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | -[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | -[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | -[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | -[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | -[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | -#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | -[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | -[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | -[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | -[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" - -ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" - -combiningCharacter = """ -[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | -[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | -[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | -[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | -#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | -[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | -[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | -#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | -[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | -[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | -#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | -[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | -[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | -[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | -[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | -[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | -#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | -[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | -#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | -[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | -[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | -#x3099 | #x309A""" - -digit = """ -[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | -[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | -[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | -[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" - -extender = """ -#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | -#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" - -letter = " | ".join([baseChar, ideographic]) - -# Without the -name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, - extender]) -nameFirst = " | ".join([letter, "_"]) - -reChar = re.compile(r"#x([\d|A-F]{4,4})") -reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") - - -def charStringToList(chars): - charRanges = [item.strip() for item in chars.split(" | ")] - rv = [] - for item in charRanges: - foundMatch = False - for regexp in (reChar, reCharRange): - match = regexp.match(item) - if match is not None: - rv.append([hexToInt(item) for item in match.groups()]) - if len(rv[-1]) == 1: - rv[-1] = rv[-1] * 2 - foundMatch = True - break - if not foundMatch: - assert len(item) == 1 - - rv.append([ord(item)] * 2) - rv = normaliseCharList(rv) - return rv - - -def normaliseCharList(charList): - charList = sorted(charList) - for item in charList: - assert item[1] >= item[0] - rv = [] - i = 0 - while i < len(charList): - j = 1 - rv.append(charList[i]) - while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: - rv[-1][1] = charList[i + j][1] - j += 1 - i += j - return rv - - -# We don't really support characters above the BMP :( -max_unicode = int("FFFF", 16) - - -def missingRanges(charList): - rv = [] - if charList[0] != 0: - rv.append([0, charList[0][0] - 1]) - for i, item in enumerate(charList[:-1]): - rv.append([item[1] + 1, charList[i + 1][0] - 1]) - if charList[-1][1] != max_unicode: - rv.append([charList[-1][1] + 1, max_unicode]) - return rv - - -def listToRegexpStr(charList): - rv = [] - for item in charList: - if item[0] == item[1]: - rv.append(escapeRegexp(chr(item[0]))) - else: - rv.append(escapeRegexp(chr(item[0])) + "-" + - escapeRegexp(chr(item[1]))) - return "[%s]" % "".join(rv) - - -def hexToInt(hex_str): - return int(hex_str, 16) - - -def escapeRegexp(string): - specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", - "[", "]", "|", "(", ")", "-") - for char in specialCharacters: - string = string.replace(char, "\\" + char) - - return string - -# output from the above -nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa - -nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa - -# Simpler things -nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") - - -class InfosetFilter(object): - replacementRegexp = re.compile(r"U[\dA-F]{5,5}") - - def __init__(self, - dropXmlnsLocalName=False, - dropXmlnsAttrNs=False, - preventDoubleDashComments=False, - preventDashAtCommentEnd=False, - replaceFormFeedCharacters=True, - preventSingleQuotePubid=False): - - self.dropXmlnsLocalName = dropXmlnsLocalName - self.dropXmlnsAttrNs = dropXmlnsAttrNs - - self.preventDoubleDashComments = preventDoubleDashComments - self.preventDashAtCommentEnd = preventDashAtCommentEnd - - self.replaceFormFeedCharacters = replaceFormFeedCharacters - - self.preventSingleQuotePubid = preventSingleQuotePubid - - self.replaceCache = {} - - def coerceAttribute(self, name, namespace=None): - if self.dropXmlnsLocalName and name.startswith("xmlns:"): - warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) - return None - elif (self.dropXmlnsAttrNs and - namespace == "http://www.w3.org/2000/xmlns/"): - warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) - return None - else: - return self.toXmlName(name) - - def coerceElement(self, name): - return self.toXmlName(name) - - def coerceComment(self, data): - if self.preventDoubleDashComments: - while "--" in data: - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) - data = data.replace("--", "- -") - if data.endswith("-"): - warnings.warn("Comments cannot end in a dash", DataLossWarning) - data += " " - return data - - def coerceCharacters(self, data): - if self.replaceFormFeedCharacters: - for _ in range(data.count("\x0C")): - warnings.warn("Text cannot contain U+000C", DataLossWarning) - data = data.replace("\x0C", " ") - # Other non-xml characters - return data - - def coercePubid(self, data): - dataOutput = data - for char in nonPubidCharRegexp.findall(data): - warnings.warn("Coercing non-XML pubid", DataLossWarning) - replacement = self.getReplacementCharacter(char) - dataOutput = dataOutput.replace(char, replacement) - if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: - warnings.warn("Pubid cannot contain single quote", DataLossWarning) - dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) - return dataOutput - - def toXmlName(self, name): - nameFirst = name[0] - nameRest = name[1:] - m = nonXmlNameFirstBMPRegexp.match(nameFirst) - if m: - warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning) - nameFirstOutput = self.getReplacementCharacter(nameFirst) - else: - nameFirstOutput = nameFirst - - nameRestOutput = nameRest - replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) - for char in replaceChars: - warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning) - replacement = self.getReplacementCharacter(char) - nameRestOutput = nameRestOutput.replace(char, replacement) - return nameFirstOutput + nameRestOutput - - def getReplacementCharacter(self, char): - if char in self.replaceCache: - replacement = self.replaceCache[char] - else: - replacement = self.escapeChar(char) - return replacement - - def fromXmlName(self, name): - for item in set(self.replacementRegexp.findall(name)): - name = name.replace(item, self.unescapeChar(item)) - return name - - def escapeChar(self, char): - replacement = "U%05X" % ord(char) - self.replaceCache[char] = replacement - return replacement - - def unescapeChar(self, charcode): - return chr(int(charcode[1:], 16)) diff --git a/pipenv/patched/notpip/_vendor/html5lib/_inputstream.py b/pipenv/patched/notpip/_vendor/html5lib/_inputstream.py deleted file mode 100644 index 43d272efb2..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/_inputstream.py +++ /dev/null @@ -1,918 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pipenv.patched.notpip._vendor.six import text_type -from pipenv.patched.notpip._vendor.six.moves import http_client, urllib - -import codecs -import re -from io import BytesIO, StringIO - -from pipenv.patched.notpip._vendor import webencodings - -from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase -from .constants import _ReparseException -from . import _utils - -# Non-unicode versions of constants for use in the pre-parser -spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) -asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) -asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) -spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) - - -invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa - -if _utils.supports_lone_surrogates: - # Use one extra step of indirection and create surrogates with - # eval. Not using this indirection would introduce an illegal - # unicode literal on platforms not supporting such lone - # surrogates. - assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 - invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + - eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used - "]") -else: - invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) - -non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, - 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, - 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, - 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, - 0x10FFFE, 0x10FFFF} - -ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") - -# Cache for charsUntil() -charsUntilRegEx = {} - - -class BufferedStream(object): - """Buffering for streams that do not have buffering of their own - - The buffer is implemented as a list of chunks on the assumption that - joining many strings will be slow since it is O(n**2) - """ - - def __init__(self, stream): - self.stream = stream - self.buffer = [] - self.position = [-1, 0] # chunk number, offset - - def tell(self): - pos = 0 - for chunk in self.buffer[:self.position[0]]: - pos += len(chunk) - pos += self.position[1] - return pos - - def seek(self, pos): - assert pos <= self._bufferedBytes() - offset = pos - i = 0 - while len(self.buffer[i]) < offset: - offset -= len(self.buffer[i]) - i += 1 - self.position = [i, offset] - - def read(self, bytes): - if not self.buffer: - return self._readStream(bytes) - elif (self.position[0] == len(self.buffer) and - self.position[1] == len(self.buffer[-1])): - return self._readStream(bytes) - else: - return self._readFromBuffer(bytes) - - def _bufferedBytes(self): - return sum([len(item) for item in self.buffer]) - - def _readStream(self, bytes): - data = self.stream.read(bytes) - self.buffer.append(data) - self.position[0] += 1 - self.position[1] = len(data) - return data - - def _readFromBuffer(self, bytes): - remainingBytes = bytes - rv = [] - bufferIndex = self.position[0] - bufferOffset = self.position[1] - while bufferIndex < len(self.buffer) and remainingBytes != 0: - assert remainingBytes > 0 - bufferedData = self.buffer[bufferIndex] - - if remainingBytes <= len(bufferedData) - bufferOffset: - bytesToRead = remainingBytes - self.position = [bufferIndex, bufferOffset + bytesToRead] - else: - bytesToRead = len(bufferedData) - bufferOffset - self.position = [bufferIndex, len(bufferedData)] - bufferIndex += 1 - rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) - remainingBytes -= bytesToRead - - bufferOffset = 0 - - if remainingBytes: - rv.append(self._readStream(remainingBytes)) - - return b"".join(rv) - - -def HTMLInputStream(source, **kwargs): - # Work around Python bug #20007: read(0) closes the connection. - # http://bugs.python.org/issue20007 - if (isinstance(source, http_client.HTTPResponse) or - # Also check for addinfourl wrapping HTTPResponse - (isinstance(source, urllib.response.addbase) and - isinstance(source.fp, http_client.HTTPResponse))): - isUnicode = False - elif hasattr(source, "read"): - isUnicode = isinstance(source.read(0), text_type) - else: - isUnicode = isinstance(source, text_type) - - if isUnicode: - encodings = [x for x in kwargs if x.endswith("_encoding")] - if encodings: - raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) - - return HTMLUnicodeInputStream(source, **kwargs) - else: - return HTMLBinaryInputStream(source, **kwargs) - - -class HTMLUnicodeInputStream(object): - """Provides a unicode stream of characters to the HTMLTokenizer. - - This class takes care of character encoding and removing or replacing - incorrect byte-sequences and also provides column and line tracking. - - """ - - _defaultChunkSize = 10240 - - def __init__(self, source): - """Initialises the HTMLInputStream. - - HTMLInputStream(source, [encoding]) -> Normalized stream from source - for use by html5lib. - - source can be either a file-object, local filename or a string. - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - """ - - if not _utils.supports_lone_surrogates: - # Such platforms will have already checked for such - # surrogate errors, so no need to do this checking. - self.reportCharacterErrors = None - elif len("\U0010FFFF") == 1: - self.reportCharacterErrors = self.characterErrorsUCS4 - else: - self.reportCharacterErrors = self.characterErrorsUCS2 - - # List of where new lines occur - self.newLines = [0] - - self.charEncoding = (lookupEncoding("utf-8"), "certain") - self.dataStream = self.openStream(source) - - self.reset() - - def reset(self): - self.chunk = "" - self.chunkSize = 0 - self.chunkOffset = 0 - self.errors = [] - - # number of (complete) lines in previous chunks - self.prevNumLines = 0 - # number of columns in the last line of the previous chunk - self.prevNumCols = 0 - - # Deal with CR LF and surrogates split over chunk boundaries - self._bufferedCharacter = None - - def openStream(self, source): - """Produces a file object from source. - - source can be either a file object, local filename or a string. - - """ - # Already a file object - if hasattr(source, 'read'): - stream = source - else: - stream = StringIO(source) - - return stream - - def _position(self, offset): - chunk = self.chunk - nLines = chunk.count('\n', 0, offset) - positionLine = self.prevNumLines + nLines - lastLinePos = chunk.rfind('\n', 0, offset) - if lastLinePos == -1: - positionColumn = self.prevNumCols + offset - else: - positionColumn = offset - (lastLinePos + 1) - return (positionLine, positionColumn) - - def position(self): - """Returns (line, col) of the current position in the stream.""" - line, col = self._position(self.chunkOffset) - return (line + 1, col) - - def char(self): - """ Read one character from the stream or queue if available. Return - EOF when EOF is reached. - """ - # Read a new chunk from the input stream if necessary - if self.chunkOffset >= self.chunkSize: - if not self.readChunk(): - return EOF - - chunkOffset = self.chunkOffset - char = self.chunk[chunkOffset] - self.chunkOffset = chunkOffset + 1 - - return char - - def readChunk(self, chunkSize=None): - if chunkSize is None: - chunkSize = self._defaultChunkSize - - self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) - - self.chunk = "" - self.chunkSize = 0 - self.chunkOffset = 0 - - data = self.dataStream.read(chunkSize) - - # Deal with CR LF and surrogates broken across chunks - if self._bufferedCharacter: - data = self._bufferedCharacter + data - self._bufferedCharacter = None - elif not data: - # We have no more data, bye-bye stream - return False - - if len(data) > 1: - lastv = ord(data[-1]) - if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: - self._bufferedCharacter = data[-1] - data = data[:-1] - - if self.reportCharacterErrors: - self.reportCharacterErrors(data) - - # Replace invalid characters - data = data.replace("\r\n", "\n") - data = data.replace("\r", "\n") - - self.chunk = data - self.chunkSize = len(data) - - return True - - def characterErrorsUCS4(self, data): - for _ in range(len(invalid_unicode_re.findall(data))): - self.errors.append("invalid-codepoint") - - def characterErrorsUCS2(self, data): - # Someone picked the wrong compile option - # You lose - skip = False - for match in invalid_unicode_re.finditer(data): - if skip: - continue - codepoint = ord(match.group()) - pos = match.start() - # Pretty sure there should be endianness issues here - if _utils.isSurrogatePair(data[pos:pos + 2]): - # We have a surrogate pair! - char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) - if char_val in non_bmp_invalid_codepoints: - self.errors.append("invalid-codepoint") - skip = True - elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and - pos == len(data) - 1): - self.errors.append("invalid-codepoint") - else: - skip = False - self.errors.append("invalid-codepoint") - - def charsUntil(self, characters, opposite=False): - """ Returns a string of characters from the stream up to but not - including any character in 'characters' or EOF. 'characters' must be - a container that supports the 'in' method and iteration over its - characters. - """ - - # Use a cache of regexps to find the required characters - try: - chars = charsUntilRegEx[(characters, opposite)] - except KeyError: - if __debug__: - for c in characters: - assert(ord(c) < 128) - regex = "".join(["\\x%02x" % ord(c) for c in characters]) - if not opposite: - regex = "^%s" % regex - chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) - - rv = [] - - while True: - # Find the longest matching prefix - m = chars.match(self.chunk, self.chunkOffset) - if m is None: - # If nothing matched, and it wasn't because we ran out of chunk, - # then stop - if self.chunkOffset != self.chunkSize: - break - else: - end = m.end() - # If not the whole chunk matched, return everything - # up to the part that didn't match - if end != self.chunkSize: - rv.append(self.chunk[self.chunkOffset:end]) - self.chunkOffset = end - break - # If the whole remainder of the chunk matched, - # use it all and read the next chunk - rv.append(self.chunk[self.chunkOffset:]) - if not self.readChunk(): - # Reached EOF - break - - r = "".join(rv) - return r - - def unget(self, char): - # Only one character is allowed to be ungotten at once - it must - # be consumed again before any further call to unget - if char is not EOF: - if self.chunkOffset == 0: - # unget is called quite rarely, so it's a good idea to do - # more work here if it saves a bit of work in the frequently - # called char and charsUntil. - # So, just prepend the ungotten character onto the current - # chunk: - self.chunk = char + self.chunk - self.chunkSize += 1 - else: - self.chunkOffset -= 1 - assert self.chunk[self.chunkOffset] == char - - -class HTMLBinaryInputStream(HTMLUnicodeInputStream): - """Provides a unicode stream of characters to the HTMLTokenizer. - - This class takes care of character encoding and removing or replacing - incorrect byte-sequences and also provides column and line tracking. - - """ - - def __init__(self, source, override_encoding=None, transport_encoding=None, - same_origin_parent_encoding=None, likely_encoding=None, - default_encoding="windows-1252", useChardet=True): - """Initialises the HTMLInputStream. - - HTMLInputStream(source, [encoding]) -> Normalized stream from source - for use by html5lib. - - source can be either a file-object, local filename or a string. - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - """ - # Raw Stream - for unicode objects this will encode to utf-8 and set - # self.charEncoding as appropriate - self.rawStream = self.openStream(source) - - HTMLUnicodeInputStream.__init__(self, self.rawStream) - - # Encoding Information - # Number of bytes to use when looking for a meta element with - # encoding information - self.numBytesMeta = 1024 - # Number of bytes to use when using detecting encoding using chardet - self.numBytesChardet = 100 - # Things from args - self.override_encoding = override_encoding - self.transport_encoding = transport_encoding - self.same_origin_parent_encoding = same_origin_parent_encoding - self.likely_encoding = likely_encoding - self.default_encoding = default_encoding - - # Determine encoding - self.charEncoding = self.determineEncoding(useChardet) - assert self.charEncoding[0] is not None - - # Call superclass - self.reset() - - def reset(self): - self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') - HTMLUnicodeInputStream.reset(self) - - def openStream(self, source): - """Produces a file object from source. - - source can be either a file object, local filename or a string. - - """ - # Already a file object - if hasattr(source, 'read'): - stream = source - else: - stream = BytesIO(source) - - try: - stream.seek(stream.tell()) - except Exception: - stream = BufferedStream(stream) - - return stream - - def determineEncoding(self, chardet=True): - # BOMs take precedence over everything - # This will also read past the BOM if present - charEncoding = self.detectBOM(), "certain" - if charEncoding[0] is not None: - return charEncoding - - # If we've been overridden, we've been overridden - charEncoding = lookupEncoding(self.override_encoding), "certain" - if charEncoding[0] is not None: - return charEncoding - - # Now check the transport layer - charEncoding = lookupEncoding(self.transport_encoding), "certain" - if charEncoding[0] is not None: - return charEncoding - - # Look for meta elements with encoding information - charEncoding = self.detectEncodingMeta(), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Parent document encoding - charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" - if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): - return charEncoding - - # "likely" encoding - charEncoding = lookupEncoding(self.likely_encoding), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Guess with chardet, if available - if chardet: - try: - from pipenv.patched.notpip._vendor.chardet.universaldetector import UniversalDetector - except ImportError: - pass - else: - buffers = [] - detector = UniversalDetector() - while not detector.done: - buffer = self.rawStream.read(self.numBytesChardet) - assert isinstance(buffer, bytes) - if not buffer: - break - buffers.append(buffer) - detector.feed(buffer) - detector.close() - encoding = lookupEncoding(detector.result['encoding']) - self.rawStream.seek(0) - if encoding is not None: - return encoding, "tentative" - - # Try the default encoding - charEncoding = lookupEncoding(self.default_encoding), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Fallback to html5lib's default if even that hasn't worked - return lookupEncoding("windows-1252"), "tentative" - - def changeEncoding(self, newEncoding): - assert self.charEncoding[1] != "certain" - newEncoding = lookupEncoding(newEncoding) - if newEncoding is None: - return - if newEncoding.name in ("utf-16be", "utf-16le"): - newEncoding = lookupEncoding("utf-8") - assert newEncoding is not None - elif newEncoding == self.charEncoding[0]: - self.charEncoding = (self.charEncoding[0], "certain") - else: - self.rawStream.seek(0) - self.charEncoding = (newEncoding, "certain") - self.reset() - raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) - - def detectBOM(self): - """Attempts to detect at BOM at the start of the stream. If - an encoding can be determined from the BOM return the name of the - encoding otherwise return None""" - bomDict = { - codecs.BOM_UTF8: 'utf-8', - codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', - codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' - } - - # Go to beginning of file and read in 4 bytes - string = self.rawStream.read(4) - assert isinstance(string, bytes) - - # Try detecting the BOM using bytes from the string - encoding = bomDict.get(string[:3]) # UTF-8 - seek = 3 - if not encoding: - # Need to detect UTF-32 before UTF-16 - encoding = bomDict.get(string) # UTF-32 - seek = 4 - if not encoding: - encoding = bomDict.get(string[:2]) # UTF-16 - seek = 2 - - # Set the read position past the BOM if one was found, otherwise - # set it to the start of the stream - if encoding: - self.rawStream.seek(seek) - return lookupEncoding(encoding) - else: - self.rawStream.seek(0) - return None - - def detectEncodingMeta(self): - """Report the encoding declared by the meta element - """ - buffer = self.rawStream.read(self.numBytesMeta) - assert isinstance(buffer, bytes) - parser = EncodingParser(buffer) - self.rawStream.seek(0) - encoding = parser.getEncoding() - - if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): - encoding = lookupEncoding("utf-8") - - return encoding - - -class EncodingBytes(bytes): - """String-like object with an associated position and various extra methods - If the position is ever greater than the string length then an exception is - raised""" - def __new__(self, value): - assert isinstance(value, bytes) - return bytes.__new__(self, value.lower()) - - def __init__(self, value): - # pylint:disable=unused-argument - self._position = -1 - - def __iter__(self): - return self - - def __next__(self): - p = self._position = self._position + 1 - if p >= len(self): - raise StopIteration - elif p < 0: - raise TypeError - return self[p:p + 1] - - def next(self): - # Py2 compat - return self.__next__() - - def previous(self): - p = self._position - if p >= len(self): - raise StopIteration - elif p < 0: - raise TypeError - self._position = p = p - 1 - return self[p:p + 1] - - def setPosition(self, position): - if self._position >= len(self): - raise StopIteration - self._position = position - - def getPosition(self): - if self._position >= len(self): - raise StopIteration - if self._position >= 0: - return self._position - else: - return None - - position = property(getPosition, setPosition) - - def getCurrentByte(self): - return self[self.position:self.position + 1] - - currentByte = property(getCurrentByte) - - def skip(self, chars=spaceCharactersBytes): - """Skip past a list of characters""" - p = self.position # use property for the error-checking - while p < len(self): - c = self[p:p + 1] - if c not in chars: - self._position = p - return c - p += 1 - self._position = p - return None - - def skipUntil(self, chars): - p = self.position - while p < len(self): - c = self[p:p + 1] - if c in chars: - self._position = p - return c - p += 1 - self._position = p - return None - - def matchBytes(self, bytes): - """Look for a sequence of bytes at the start of a string. If the bytes - are found return True and advance the position to the byte after the - match. Otherwise return False and leave the position alone""" - rv = self.startswith(bytes, self.position) - if rv: - self.position += len(bytes) - return rv - - def jumpTo(self, bytes): - """Look for the next sequence of bytes matching a given sequence. If - a match is found advance the position to the last byte of the match""" - try: - self._position = self.index(bytes, self.position) + len(bytes) - 1 - except ValueError: - raise StopIteration - return True - - -class EncodingParser(object): - """Mini parser for detecting character encoding from meta elements""" - - def __init__(self, data): - """string - the data to work on for encoding detection""" - self.data = EncodingBytes(data) - self.encoding = None - - def getEncoding(self): - if b"") - - def handleMeta(self): - if self.data.currentByte not in spaceCharactersBytes: - # if we have ") - - def getAttribute(self): - """Return a name,value pair for the next attribute in the stream, - if one is found, or None""" - data = self.data - # Step 1 (skip chars) - c = data.skip(spaceCharactersBytes | frozenset([b"/"])) - assert c is None or len(c) == 1 - # Step 2 - if c in (b">", None): - return None - # Step 3 - attrName = [] - attrValue = [] - # Step 4 attribute name - while True: - if c == b"=" and attrName: - break - elif c in spaceCharactersBytes: - # Step 6! - c = data.skip() - break - elif c in (b"/", b">"): - return b"".join(attrName), b"" - elif c in asciiUppercaseBytes: - attrName.append(c.lower()) - elif c is None: - return None - else: - attrName.append(c) - # Step 5 - c = next(data) - # Step 7 - if c != b"=": - data.previous() - return b"".join(attrName), b"" - # Step 8 - next(data) - # Step 9 - c = data.skip() - # Step 10 - if c in (b"'", b'"'): - # 10.1 - quoteChar = c - while True: - # 10.2 - c = next(data) - # 10.3 - if c == quoteChar: - next(data) - return b"".join(attrName), b"".join(attrValue) - # 10.4 - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - # 10.5 - else: - attrValue.append(c) - elif c == b">": - return b"".join(attrName), b"" - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - elif c is None: - return None - else: - attrValue.append(c) - # Step 11 - while True: - c = next(data) - if c in spacesAngleBrackets: - return b"".join(attrName), b"".join(attrValue) - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - elif c is None: - return None - else: - attrValue.append(c) - - -class ContentAttrParser(object): - def __init__(self, data): - assert isinstance(data, bytes) - self.data = data - - def parse(self): - try: - # Check if the attr name is charset - # otherwise return - self.data.jumpTo(b"charset") - self.data.position += 1 - self.data.skip() - if not self.data.currentByte == b"=": - # If there is no = sign keep looking for attrs - return None - self.data.position += 1 - self.data.skip() - # Look for an encoding between matching quote marks - if self.data.currentByte in (b'"', b"'"): - quoteMark = self.data.currentByte - self.data.position += 1 - oldPosition = self.data.position - if self.data.jumpTo(quoteMark): - return self.data[oldPosition:self.data.position] - else: - return None - else: - # Unquoted value - oldPosition = self.data.position - try: - self.data.skipUntil(spaceCharactersBytes) - return self.data[oldPosition:self.data.position] - except StopIteration: - # Return the whole remaining value - return self.data[oldPosition:] - except StopIteration: - return None - - -def lookupEncoding(encoding): - """Return the python codec name corresponding to an encoding or None if the - string doesn't correspond to a valid encoding.""" - if isinstance(encoding, bytes): - try: - encoding = encoding.decode("ascii") - except UnicodeDecodeError: - return None - - if encoding is not None: - try: - return webencodings.lookup(encoding) - except AttributeError: - return None - else: - return None diff --git a/pipenv/patched/notpip/_vendor/html5lib/_tokenizer.py b/pipenv/patched/notpip/_vendor/html5lib/_tokenizer.py deleted file mode 100644 index 8740f4c2f4..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/_tokenizer.py +++ /dev/null @@ -1,1735 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pipenv.patched.notpip._vendor.six import unichr as chr - -from collections import deque, OrderedDict -from sys import version_info - -from .constants import spaceCharacters -from .constants import entities -from .constants import asciiLetters, asciiUpper2Lower -from .constants import digits, hexDigits, EOF -from .constants import tokenTypes, tagTokenTypes -from .constants import replacementCharacters - -from ._inputstream import HTMLInputStream - -from ._trie import Trie - -entitiesTrie = Trie(entities) - -if version_info >= (3, 7): - attributeMap = dict -else: - attributeMap = OrderedDict - - -class HTMLTokenizer(object): - """ This class takes care of tokenizing HTML. - - * self.currentToken - Holds the token that is currently being processed. - - * self.state - Holds a reference to the method to be invoked... XXX - - * self.stream - Points to HTMLInputStream object. - """ - - def __init__(self, stream, parser=None, **kwargs): - - self.stream = HTMLInputStream(stream, **kwargs) - self.parser = parser - - # Setup the initial tokenizer state - self.escapeFlag = False - self.lastFourChars = [] - self.state = self.dataState - self.escape = False - - # The current token being created - self.currentToken = None - super(HTMLTokenizer, self).__init__() - - def __iter__(self): - """ This is where the magic happens. - - We do our usually processing through the states and when we have a token - to return we yield the token which pauses processing until the next token - is requested. - """ - self.tokenQueue = deque([]) - # Start processing. When EOF is reached self.state will return False - # instead of True and the loop will terminate. - while self.state(): - while self.stream.errors: - yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} - while self.tokenQueue: - yield self.tokenQueue.popleft() - - def consumeNumberEntity(self, isHex): - """This function returns either U+FFFD or the character based on the - decimal or hexadecimal representation. It also discards ";" if present. - If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. - """ - - allowed = digits - radix = 10 - if isHex: - allowed = hexDigits - radix = 16 - - charStack = [] - - # Consume all the characters that are in range while making sure we - # don't hit an EOF. - c = self.stream.char() - while c in allowed and c is not EOF: - charStack.append(c) - c = self.stream.char() - - # Convert the set of characters consumed to an int. - charAsInt = int("".join(charStack), radix) - - # Certain characters get replaced with others - if charAsInt in replacementCharacters: - char = replacementCharacters[charAsInt] - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - elif ((0xD800 <= charAsInt <= 0xDFFF) or - (charAsInt > 0x10FFFF)): - char = "\uFFFD" - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - else: - # Should speed up this check somehow (e.g. move the set to a constant) - if ((0x0001 <= charAsInt <= 0x0008) or - (0x000E <= charAsInt <= 0x001F) or - (0x007F <= charAsInt <= 0x009F) or - (0xFDD0 <= charAsInt <= 0xFDEF) or - charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, - 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, - 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, - 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, - 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, - 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, - 0xFFFFF, 0x10FFFE, 0x10FFFF])): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - try: - # Try/except needed as UCS-2 Python builds' unichar only works - # within the BMP. - char = chr(charAsInt) - except ValueError: - v = charAsInt - 0x10000 - char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) - - # Discard the ; if present. Otherwise, put it back on the queue and - # invoke parseError on parser. - if c != ";": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "numeric-entity-without-semicolon"}) - self.stream.unget(c) - - return char - - def consumeEntity(self, allowedChar=None, fromAttribute=False): - # Initialise to the default output for when no entity is matched - output = "&" - - charStack = [self.stream.char()] - if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or - (allowedChar is not None and allowedChar == charStack[0])): - self.stream.unget(charStack[0]) - - elif charStack[0] == "#": - # Read the next character to see if it's hex or decimal - hex = False - charStack.append(self.stream.char()) - if charStack[-1] in ("x", "X"): - hex = True - charStack.append(self.stream.char()) - - # charStack[-1] should be the first digit - if (hex and charStack[-1] in hexDigits) \ - or (not hex and charStack[-1] in digits): - # At least one digit found, so consume the whole number - self.stream.unget(charStack[-1]) - output = self.consumeNumberEntity(hex) - else: - # No digits found - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "expected-numeric-entity"}) - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - - else: - # At this point in the process might have named entity. Entities - # are stored in the global variable "entities". - # - # Consume characters and compare to these to a substring of the - # entity names in the list until the substring no longer matches. - while (charStack[-1] is not EOF): - if not entitiesTrie.has_keys_with_prefix("".join(charStack)): - break - charStack.append(self.stream.char()) - - # At this point we have a string that starts with some characters - # that may match an entity - # Try to find the longest entity the string will match to take care - # of ¬i for instance. - try: - entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) - entityLength = len(entityName) - except KeyError: - entityName = None - - if entityName is not None: - if entityName[-1] != ";": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "named-entity-without-semicolon"}) - if (entityName[-1] != ";" and fromAttribute and - (charStack[entityLength] in asciiLetters or - charStack[entityLength] in digits or - charStack[entityLength] == "=")): - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - else: - output = entities[entityName] - self.stream.unget(charStack.pop()) - output += "".join(charStack[entityLength:]) - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-named-entity"}) - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - - if fromAttribute: - self.currentToken["data"][-1][1] += output - else: - if output in spaceCharacters: - tokenType = "SpaceCharacters" - else: - tokenType = "Characters" - self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) - - def processEntityInAttribute(self, allowedChar): - """This method replaces the need for "entityInAttributeValueState". - """ - self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) - - def emitCurrentToken(self): - """This method is a generic handler for emitting the tags. It also sets - the state to "data" because that's what's needed after a token has been - emitted. - """ - token = self.currentToken - # Add token to the queue to be yielded - if (token["type"] in tagTokenTypes): - token["name"] = token["name"].translate(asciiUpper2Lower) - if token["type"] == tokenTypes["StartTag"]: - raw = token["data"] - data = attributeMap(raw) - if len(raw) > len(data): - # we had some duplicated attribute, fix so first wins - data.update(raw[::-1]) - token["data"] = data - - if token["type"] == tokenTypes["EndTag"]: - if token["data"]: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "attributes-in-end-tag"}) - if token["selfClosing"]: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "self-closing-flag-on-end-tag"}) - self.tokenQueue.append(token) - self.state = self.dataState - - # Below are the various tokenizer states worked out. - def dataState(self): - data = self.stream.char() - if data == "&": - self.state = self.entityDataState - elif data == "<": - self.state = self.tagOpenState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\u0000"}) - elif data is EOF: - # Tokenization ends. - return False - elif data in spaceCharacters: - # Directly after emitting a token you switch back to the "data - # state". At that point spaceCharacters are important so they are - # emitted separately. - self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) - # No need to update lastFourChars here, since the first space will - # have already been appended to lastFourChars and will have broken - # any sequences - else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def entityDataState(self): - self.consumeEntity() - self.state = self.dataState - return True - - def rcdataState(self): - data = self.stream.char() - if data == "&": - self.state = self.characterReferenceInRcdata - elif data == "<": - self.state = self.rcdataLessThanSignState - elif data == EOF: - # Tokenization ends. - return False - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data in spaceCharacters: - # Directly after emitting a token you switch back to the "data - # state". At that point spaceCharacters are important so they are - # emitted separately. - self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) - # No need to update lastFourChars here, since the first space will - # have already been appended to lastFourChars and will have broken - # any sequences - else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def characterReferenceInRcdata(self): - self.consumeEntity() - self.state = self.rcdataState - return True - - def rawtextState(self): - data = self.stream.char() - if data == "<": - self.state = self.rawtextLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - # Tokenization ends. - return False - else: - chars = self.stream.charsUntil(("<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def scriptDataState(self): - data = self.stream.char() - if data == "<": - self.state = self.scriptDataLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - # Tokenization ends. - return False - else: - chars = self.stream.charsUntil(("<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def plaintextState(self): - data = self.stream.char() - if data == EOF: - # Tokenization ends. - return False - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + self.stream.charsUntil("\u0000")}) - return True - - def tagOpenState(self): - data = self.stream.char() - if data == "!": - self.state = self.markupDeclarationOpenState - elif data == "/": - self.state = self.closeTagOpenState - elif data in asciiLetters: - self.currentToken = {"type": tokenTypes["StartTag"], - "name": data, "data": [], - "selfClosing": False, - "selfClosingAcknowledged": False} - self.state = self.tagNameState - elif data == ">": - # XXX In theory it could be something besides a tag name. But - # do we really care? - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-right-bracket"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) - self.state = self.dataState - elif data == "?": - # XXX In theory it could be something besides a tag name. But - # do we really care? - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-question-mark"}) - self.stream.unget(data) - self.state = self.bogusCommentState - else: - # XXX - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.dataState - return True - - def closeTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.currentToken = {"type": tokenTypes["EndTag"], "name": data, - "data": [], "selfClosing": False} - self.state = self.tagNameState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-right-bracket"}) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-eof"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "": - self.emitCurrentToken() - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-tag-name"}) - self.state = self.dataState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] += "\uFFFD" - else: - self.currentToken["name"] += data - # (Don't use charsUntil here, because tag names are - # very short and it's faster to not do anything fancy) - return True - - def rcdataLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.temporaryBuffer = "" - self.state = self.rcdataEndTagOpenState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.rcdataState - return True - - def rcdataEndTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.temporaryBuffer += data - self.state = self.rcdataEndTagNameState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) - self.state = self.scriptDataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataEscapedState - elif data == EOF: - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataEscapedState - return True - - def scriptDataEscapedLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.temporaryBuffer = "" - self.state = self.scriptDataEscapedEndTagOpenState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) - self.temporaryBuffer = data - self.state = self.scriptDataDoubleEscapeStartState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.scriptDataEscapedState - return True - - def scriptDataEscapedEndTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.temporaryBuffer = data - self.state = self.scriptDataEscapedEndTagNameState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": ""))): - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - if self.temporaryBuffer.lower() == "script": - self.state = self.scriptDataDoubleEscapedState - else: - self.state = self.scriptDataEscapedState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.temporaryBuffer += data - else: - self.stream.unget(data) - self.state = self.scriptDataEscapedState - return True - - def scriptDataDoubleEscapedState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - self.state = self.scriptDataDoubleEscapedDashState - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - return True - - def scriptDataDoubleEscapedDashState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - self.state = self.scriptDataDoubleEscapedDashDashState - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataDoubleEscapedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapedDashDashState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) - self.state = self.scriptDataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataDoubleEscapedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapedLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) - self.temporaryBuffer = "" - self.state = self.scriptDataDoubleEscapeEndState - else: - self.stream.unget(data) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapeEndState(self): - data = self.stream.char() - if data in (spaceCharacters | frozenset(("/", ">"))): - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - if self.temporaryBuffer.lower() == "script": - self.state = self.scriptDataEscapedState - else: - self.state = self.scriptDataDoubleEscapedState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.temporaryBuffer += data - else: - self.stream.unget(data) - self.state = self.scriptDataDoubleEscapedState - return True - - def beforeAttributeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data in asciiLetters: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == ">": - self.emitCurrentToken() - elif data == "/": - self.state = self.selfClosingStartTagState - elif data in ("'", '"', "=", "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-in-attribute-name"}) - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"].append(["\uFFFD", ""]) - self.state = self.attributeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-name-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - return True - - def attributeNameState(self): - data = self.stream.char() - leavingThisState = True - emitToken = False - if data == "=": - self.state = self.beforeAttributeValueState - elif data in asciiLetters: - self.currentToken["data"][-1][0] += data +\ - self.stream.charsUntil(asciiLetters, True) - leavingThisState = False - elif data == ">": - # XXX If we emit here the attributes are converted to a dict - # without being checked and when the code below runs we error - # because data is a dict not a list - emitToken = True - elif data in spaceCharacters: - self.state = self.afterAttributeNameState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][0] += "\uFFFD" - leavingThisState = False - elif data in ("'", '"', "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "invalid-character-in-attribute-name"}) - self.currentToken["data"][-1][0] += data - leavingThisState = False - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "eof-in-attribute-name"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][0] += data - leavingThisState = False - - if leavingThisState: - # Attributes are not dropped at this stage. That happens when the - # start tag token is emitted so values can still be safely appended - # to attributes, but we do want to report the parse error in time. - self.currentToken["data"][-1][0] = ( - self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) - for name, _ in self.currentToken["data"][:-1]: - if self.currentToken["data"][-1][0] == name: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "duplicate-attribute"}) - break - # XXX Fix for above XXX - if emitToken: - self.emitCurrentToken() - return True - - def afterAttributeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data == "=": - self.state = self.beforeAttributeValueState - elif data == ">": - self.emitCurrentToken() - elif data in asciiLetters: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"].append(["\uFFFD", ""]) - self.state = self.attributeNameState - elif data in ("'", '"', "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-after-attribute-name"}) - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-end-of-tag-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - return True - - def beforeAttributeValueState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data == "\"": - self.state = self.attributeValueDoubleQuotedState - elif data == "&": - self.state = self.attributeValueUnQuotedState - self.stream.unget(data) - elif data == "'": - self.state = self.attributeValueSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-right-bracket"}) - self.emitCurrentToken() - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - self.state = self.attributeValueUnQuotedState - elif data in ("=", "<", "`"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "equals-in-unquoted-attribute-value"}) - self.currentToken["data"][-1][1] += data - self.state = self.attributeValueUnQuotedState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data - self.state = self.attributeValueUnQuotedState - return True - - def attributeValueDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterAttributeValueState - elif data == "&": - self.processEntityInAttribute('"') - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-double-quote"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("\"", "&", "\u0000")) - return True - - def attributeValueSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterAttributeValueState - elif data == "&": - self.processEntityInAttribute("'") - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-single-quote"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("'", "&", "\u0000")) - return True - - def attributeValueUnQuotedState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeAttributeNameState - elif data == "&": - self.processEntityInAttribute(">") - elif data == ">": - self.emitCurrentToken() - elif data in ('"', "'", "=", "<", "`"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-in-unquoted-attribute-value"}) - self.currentToken["data"][-1][1] += data - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-no-quotes"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data + self.stream.charsUntil( - frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) - return True - - def afterAttributeValueState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeAttributeNameState - elif data == ">": - self.emitCurrentToken() - elif data == "/": - self.state = self.selfClosingStartTagState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-EOF-after-attribute-value"}) - self.stream.unget(data) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-attribute-value"}) - self.stream.unget(data) - self.state = self.beforeAttributeNameState - return True - - def selfClosingStartTagState(self): - data = self.stream.char() - if data == ">": - self.currentToken["selfClosing"] = True - self.emitCurrentToken() - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "unexpected-EOF-after-solidus-in-tag"}) - self.stream.unget(data) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-solidus-in-tag"}) - self.stream.unget(data) - self.state = self.beforeAttributeNameState - return True - - def bogusCommentState(self): - # Make a new comment token and give it as value all the characters - # until the first > or EOF (charsUntil checks for EOF automatically) - # and emit it. - data = self.stream.charsUntil(">") - data = data.replace("\u0000", "\uFFFD") - self.tokenQueue.append( - {"type": tokenTypes["Comment"], "data": data}) - - # Eat the character directly after the bogus comment which is either a - # ">" or an EOF. - self.stream.char() - self.state = self.dataState - return True - - def markupDeclarationOpenState(self): - charStack = [self.stream.char()] - if charStack[-1] == "-": - charStack.append(self.stream.char()) - if charStack[-1] == "-": - self.currentToken = {"type": tokenTypes["Comment"], "data": ""} - self.state = self.commentStartState - return True - elif charStack[-1] in ('d', 'D'): - matched = True - for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), - ('y', 'Y'), ('p', 'P'), ('e', 'E')): - charStack.append(self.stream.char()) - if charStack[-1] not in expected: - matched = False - break - if matched: - self.currentToken = {"type": tokenTypes["Doctype"], - "name": "", - "publicId": None, "systemId": None, - "correct": True} - self.state = self.doctypeState - return True - elif (charStack[-1] == "[" and - self.parser is not None and - self.parser.tree.openElements and - self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): - matched = True - for expected in ["C", "D", "A", "T", "A", "["]: - charStack.append(self.stream.char()) - if charStack[-1] != expected: - matched = False - break - if matched: - self.state = self.cdataSectionState - return True - - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-dashes-or-doctype"}) - - while charStack: - self.stream.unget(charStack.pop()) - self.state = self.bogusCommentState - return True - - def commentStartState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentStartDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += data - self.state = self.commentState - return True - - def commentStartDashState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "-\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "-" + data - self.state = self.commentState - return True - - def commentState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += data + \ - self.stream.charsUntil(("-", "\u0000")) - return True - - def commentEndDashState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "-\uFFFD" - self.state = self.commentState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-dash"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "-" + data - self.state = self.commentState - return True - - def commentEndState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "--\uFFFD" - self.state = self.commentState - elif data == "!": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-bang-after-double-dash-in-comment"}) - self.state = self.commentEndBangState - elif data == "-": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-dash-after-double-dash-in-comment"}) - self.currentToken["data"] += data - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-double-dash"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - # XXX - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-comment"}) - self.currentToken["data"] += "--" + data - self.state = self.commentState - return True - - def commentEndBangState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "-": - self.currentToken["data"] += "--!" - self.state = self.commentEndDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "--!\uFFFD" - self.state = self.commentState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-bang-state"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "--!" + data - self.state = self.commentState - return True - - def doctypeState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "need-space-after-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypeNameState - return True - - def beforeDoctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-right-bracket"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] = "\uFFFD" - self.state = self.doctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["name"] = data - self.state = self.doctypeNameState - return True - - def doctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.state = self.afterDoctypeNameState - elif data == ">": - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] += "\uFFFD" - self.state = self.doctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype-name"}) - self.currentToken["correct"] = False - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["name"] += data - return True - - def afterDoctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.currentToken["correct"] = False - self.stream.unget(data) - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - if data in ("p", "P"): - matched = True - for expected in (("u", "U"), ("b", "B"), ("l", "L"), - ("i", "I"), ("c", "C")): - data = self.stream.char() - if data not in expected: - matched = False - break - if matched: - self.state = self.afterDoctypePublicKeywordState - return True - elif data in ("s", "S"): - matched = True - for expected in (("y", "Y"), ("s", "S"), ("t", "T"), - ("e", "E"), ("m", "M")): - data = self.stream.char() - if data not in expected: - matched = False - break - if matched: - self.state = self.afterDoctypeSystemKeywordState - return True - - # All the characters read before the current 'data' will be - # [a-zA-Z], so they're garbage in the bogus doctype and can be - # discarded; only the latest character might be '>' or EOF - # and needs to be ungetted - self.stream.unget(data) - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-space-or-right-bracket-in-doctype", "datavars": - {"data": data}}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - - return True - - def afterDoctypePublicKeywordState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypePublicIdentifierState - elif data in ("'", '"'): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypePublicIdentifierState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.stream.unget(data) - self.state = self.beforeDoctypePublicIdentifierState - return True - - def beforeDoctypePublicIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == "\"": - self.currentToken["publicId"] = "" - self.state = self.doctypePublicIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["publicId"] = "" - self.state = self.doctypePublicIdentifierSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def doctypePublicIdentifierDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterDoctypePublicIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["publicId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["publicId"] += data - return True - - def doctypePublicIdentifierSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterDoctypePublicIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["publicId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["publicId"] += data - return True - - def afterDoctypePublicIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.betweenDoctypePublicAndSystemIdentifiersState - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == '"': - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def betweenDoctypePublicAndSystemIdentifiersState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == '"': - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def afterDoctypeSystemKeywordState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypeSystemIdentifierState - elif data in ("'", '"'): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypeSystemIdentifierState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.stream.unget(data) - self.state = self.beforeDoctypeSystemIdentifierState - return True - - def beforeDoctypeSystemIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == "\"": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def doctypeSystemIdentifierDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterDoctypeSystemIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["systemId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["systemId"] += data - return True - - def doctypeSystemIdentifierSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterDoctypeSystemIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["systemId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["systemId"] += data - return True - - def afterDoctypeSystemIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.state = self.bogusDoctypeState - return True - - def bogusDoctypeState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - # XXX EMIT - self.stream.unget(data) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - pass - return True - - def cdataSectionState(self): - data = [] - while True: - data.append(self.stream.charsUntil("]")) - data.append(self.stream.charsUntil(">")) - char = self.stream.char() - if char == EOF: - break - else: - assert char == ">" - if data[-1][-2:] == "]]": - data[-1] = data[-1][:-2] - break - else: - data.append(char) - - data = "".join(data) # pylint:disable=redefined-variable-type - # Deal with null here rather than in the parser - nullCount = data.count("\u0000") - if nullCount > 0: - for _ in range(nullCount): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - data = data.replace("\u0000", "\uFFFD") - if data: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": data}) - self.state = self.dataState - return True diff --git a/pipenv/patched/notpip/_vendor/html5lib/_trie/__init__.py b/pipenv/patched/notpip/_vendor/html5lib/_trie/__init__.py deleted file mode 100644 index 07bad5d31c..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/_trie/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from .py import Trie - -__all__ = ["Trie"] diff --git a/pipenv/patched/notpip/_vendor/html5lib/_trie/_base.py b/pipenv/patched/notpip/_vendor/html5lib/_trie/_base.py deleted file mode 100644 index 6b71975f08..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/_trie/_base.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -try: - from collections.abc import Mapping -except ImportError: # Python 2.7 - from collections import Mapping - - -class Trie(Mapping): - """Abstract base class for tries""" - - def keys(self, prefix=None): - # pylint:disable=arguments-differ - keys = super(Trie, self).keys() - - if prefix is None: - return set(keys) - - return {x for x in keys if x.startswith(prefix)} - - def has_keys_with_prefix(self, prefix): - for key in self.keys(): - if key.startswith(prefix): - return True - - return False - - def longest_prefix(self, prefix): - if prefix in self: - return prefix - - for i in range(1, len(prefix) + 1): - if prefix[:-i] in self: - return prefix[:-i] - - raise KeyError(prefix) - - def longest_prefix_item(self, prefix): - lprefix = self.longest_prefix(prefix) - return (lprefix, self[lprefix]) diff --git a/pipenv/patched/notpip/_vendor/html5lib/_trie/py.py b/pipenv/patched/notpip/_vendor/html5lib/_trie/py.py deleted file mode 100644 index bb62a43f4b..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/_trie/py.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pipenv.patched.notpip._vendor.six import text_type - -from bisect import bisect_left - -from ._base import Trie as ABCTrie - - -class Trie(ABCTrie): - def __init__(self, data): - if not all(isinstance(x, text_type) for x in data.keys()): - raise TypeError("All keys must be strings") - - self._data = data - self._keys = sorted(data.keys()) - self._cachestr = "" - self._cachepoints = (0, len(data)) - - def __contains__(self, key): - return key in self._data - - def __len__(self): - return len(self._data) - - def __iter__(self): - return iter(self._data) - - def __getitem__(self, key): - return self._data[key] - - def keys(self, prefix=None): - if prefix is None or prefix == "" or not self._keys: - return set(self._keys) - - if prefix.startswith(self._cachestr): - lo, hi = self._cachepoints - start = i = bisect_left(self._keys, prefix, lo, hi) - else: - start = i = bisect_left(self._keys, prefix) - - keys = set() - if start == len(self._keys): - return keys - - while self._keys[i].startswith(prefix): - keys.add(self._keys[i]) - i += 1 - - self._cachestr = prefix - self._cachepoints = (start, i) - - return keys - - def has_keys_with_prefix(self, prefix): - if prefix in self._data: - return True - - if prefix.startswith(self._cachestr): - lo, hi = self._cachepoints - i = bisect_left(self._keys, prefix, lo, hi) - else: - i = bisect_left(self._keys, prefix) - - if i == len(self._keys): - return False - - return self._keys[i].startswith(prefix) diff --git a/pipenv/patched/notpip/_vendor/html5lib/_utils.py b/pipenv/patched/notpip/_vendor/html5lib/_utils.py deleted file mode 100644 index 25042f8264..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/_utils.py +++ /dev/null @@ -1,159 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from types import ModuleType - -try: - from collections.abc import Mapping -except ImportError: - from collections import Mapping - -from pipenv.patched.notpip._vendor.six import text_type, PY3 - -if PY3: - import xml.etree.ElementTree as default_etree -else: - try: - import xml.etree.cElementTree as default_etree - except ImportError: - import xml.etree.ElementTree as default_etree - - -__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", - "surrogatePairToCodepoint", "moduleFactoryFactory", - "supports_lone_surrogates"] - - -# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be -# caught by the below test. In general this would be any platform -# using UTF-16 as its encoding of unicode strings, such as -# Jython. This is because UTF-16 itself is based on the use of such -# surrogates, and there is no mechanism to further escape such -# escapes. -try: - _x = eval('"\\uD800"') # pylint:disable=eval-used - if not isinstance(_x, text_type): - # We need this with u"" because of http://bugs.jython.org/issue2039 - _x = eval('u"\\uD800"') # pylint:disable=eval-used - assert isinstance(_x, text_type) -except Exception: - supports_lone_surrogates = False -else: - supports_lone_surrogates = True - - -class MethodDispatcher(dict): - """Dict with 2 special properties: - - On initiation, keys that are lists, sets or tuples are converted to - multiple keys so accessing any one of the items in the original - list-like object returns the matching value - - md = MethodDispatcher({("foo", "bar"):"baz"}) - md["foo"] == "baz" - - A default value which can be set through the default attribute. - """ - - def __init__(self, items=()): - _dictEntries = [] - for name, value in items: - if isinstance(name, (list, tuple, frozenset, set)): - for item in name: - _dictEntries.append((item, value)) - else: - _dictEntries.append((name, value)) - dict.__init__(self, _dictEntries) - assert len(self) == len(_dictEntries) - self.default = None - - def __getitem__(self, key): - return dict.get(self, key, self.default) - - def __get__(self, instance, owner=None): - return BoundMethodDispatcher(instance, self) - - -class BoundMethodDispatcher(Mapping): - """Wraps a MethodDispatcher, binding its return values to `instance`""" - def __init__(self, instance, dispatcher): - self.instance = instance - self.dispatcher = dispatcher - - def __getitem__(self, key): - # see https://docs.python.org/3/reference/datamodel.html#object.__get__ - # on a function, __get__ is used to bind a function to an instance as a bound method - return self.dispatcher[key].__get__(self.instance) - - def get(self, key, default): - if key in self.dispatcher: - return self[key] - else: - return default - - def __iter__(self): - return iter(self.dispatcher) - - def __len__(self): - return len(self.dispatcher) - - def __contains__(self, key): - return key in self.dispatcher - - -# Some utility functions to deal with weirdness around UCS2 vs UCS4 -# python builds - -def isSurrogatePair(data): - return (len(data) == 2 and - ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and - ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) - - -def surrogatePairToCodepoint(data): - char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + - (ord(data[1]) - 0xDC00)) - return char_val - -# Module Factory Factory (no, this isn't Java, I know) -# Here to stop this being duplicated all over the place. - - -def moduleFactoryFactory(factory): - moduleCache = {} - - def moduleFactory(baseModule, *args, **kwargs): - if isinstance(ModuleType.__name__, type("")): - name = "_%s_factory" % baseModule.__name__ - else: - name = b"_%s_factory" % baseModule.__name__ - - kwargs_tuple = tuple(kwargs.items()) - - try: - return moduleCache[name][args][kwargs_tuple] - except KeyError: - mod = ModuleType(name) - objs = factory(baseModule, *args, **kwargs) - mod.__dict__.update(objs) - if "name" not in moduleCache: - moduleCache[name] = {} - if "args" not in moduleCache[name]: - moduleCache[name][args] = {} - if "kwargs" not in moduleCache[name][args]: - moduleCache[name][args][kwargs_tuple] = {} - moduleCache[name][args][kwargs_tuple] = mod - return mod - - return moduleFactory - - -def memoize(func): - cache = {} - - def wrapped(*args, **kwargs): - key = (tuple(args), tuple(kwargs.items())) - if key not in cache: - cache[key] = func(*args, **kwargs) - return cache[key] - - return wrapped diff --git a/pipenv/patched/notpip/_vendor/html5lib/constants.py b/pipenv/patched/notpip/_vendor/html5lib/constants.py deleted file mode 100644 index fe3e237cd8..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/constants.py +++ /dev/null @@ -1,2946 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import string - -EOF = None - -E = { - "null-character": - "Null character in input stream, replaced with U+FFFD.", - "invalid-codepoint": - "Invalid codepoint in stream.", - "incorrectly-placed-solidus": - "Solidus (/) incorrectly placed in tag.", - "incorrect-cr-newline-entity": - "Incorrect CR newline entity, replaced with LF.", - "illegal-windows-1252-entity": - "Entity used with illegal number (windows-1252 reference).", - "cant-convert-numeric-entity": - "Numeric entity couldn't be converted to character " - "(codepoint U+%(charAsInt)08x).", - "illegal-codepoint-for-numeric-entity": - "Numeric entity represents an illegal codepoint: " - "U+%(charAsInt)08x.", - "numeric-entity-without-semicolon": - "Numeric entity didn't end with ';'.", - "expected-numeric-entity-but-got-eof": - "Numeric entity expected. Got end of file instead.", - "expected-numeric-entity": - "Numeric entity expected but none found.", - "named-entity-without-semicolon": - "Named entity didn't end with ';'.", - "expected-named-entity": - "Named entity expected. Got none.", - "attributes-in-end-tag": - "End tag contains unexpected attributes.", - 'self-closing-flag-on-end-tag': - "End tag contains unexpected self-closing flag.", - "expected-tag-name-but-got-right-bracket": - "Expected tag name. Got '>' instead.", - "expected-tag-name-but-got-question-mark": - "Expected tag name. Got '?' instead. (HTML doesn't " - "support processing instructions.)", - "expected-tag-name": - "Expected tag name. Got something else instead", - "expected-closing-tag-but-got-right-bracket": - "Expected closing tag. Got '>' instead. Ignoring ''.", - "expected-closing-tag-but-got-eof": - "Expected closing tag. Unexpected end of file.", - "expected-closing-tag-but-got-char": - "Expected closing tag. Unexpected character '%(data)s' found.", - "eof-in-tag-name": - "Unexpected end of file in the tag name.", - "expected-attribute-name-but-got-eof": - "Unexpected end of file. Expected attribute name instead.", - "eof-in-attribute-name": - "Unexpected end of file in attribute name.", - "invalid-character-in-attribute-name": - "Invalid character in attribute name", - "duplicate-attribute": - "Dropped duplicate attribute on tag.", - "expected-end-of-tag-name-but-got-eof": - "Unexpected end of file. Expected = or end of tag.", - "expected-attribute-value-but-got-eof": - "Unexpected end of file. Expected attribute value.", - "expected-attribute-value-but-got-right-bracket": - "Expected attribute value. Got '>' instead.", - 'equals-in-unquoted-attribute-value': - "Unexpected = in unquoted attribute", - 'unexpected-character-in-unquoted-attribute-value': - "Unexpected character in unquoted attribute", - "invalid-character-after-attribute-name": - "Unexpected character after attribute name.", - "unexpected-character-after-attribute-value": - "Unexpected character after attribute value.", - "eof-in-attribute-value-double-quote": - "Unexpected end of file in attribute value (\").", - "eof-in-attribute-value-single-quote": - "Unexpected end of file in attribute value (').", - "eof-in-attribute-value-no-quotes": - "Unexpected end of file in attribute value.", - "unexpected-EOF-after-solidus-in-tag": - "Unexpected end of file in tag. Expected >", - "unexpected-character-after-solidus-in-tag": - "Unexpected character after / in tag. Expected >", - "expected-dashes-or-doctype": - "Expected '--' or 'DOCTYPE'. Not found.", - "unexpected-bang-after-double-dash-in-comment": - "Unexpected ! after -- in comment", - "unexpected-space-after-double-dash-in-comment": - "Unexpected space after -- in comment", - "incorrect-comment": - "Incorrect comment.", - "eof-in-comment": - "Unexpected end of file in comment.", - "eof-in-comment-end-dash": - "Unexpected end of file in comment (-)", - "unexpected-dash-after-double-dash-in-comment": - "Unexpected '-' after '--' found in comment.", - "eof-in-comment-double-dash": - "Unexpected end of file in comment (--).", - "eof-in-comment-end-space-state": - "Unexpected end of file in comment.", - "eof-in-comment-end-bang-state": - "Unexpected end of file in comment.", - "unexpected-char-in-comment": - "Unexpected character in comment found.", - "need-space-after-doctype": - "No space after literal string 'DOCTYPE'.", - "expected-doctype-name-but-got-right-bracket": - "Unexpected > character. Expected DOCTYPE name.", - "expected-doctype-name-but-got-eof": - "Unexpected end of file. Expected DOCTYPE name.", - "eof-in-doctype-name": - "Unexpected end of file in DOCTYPE name.", - "eof-in-doctype": - "Unexpected end of file in DOCTYPE.", - "expected-space-or-right-bracket-in-doctype": - "Expected space or '>'. Got '%(data)s'", - "unexpected-end-of-doctype": - "Unexpected end of DOCTYPE.", - "unexpected-char-in-doctype": - "Unexpected character in DOCTYPE.", - "eof-in-innerhtml": - "XXX innerHTML EOF", - "unexpected-doctype": - "Unexpected DOCTYPE. Ignored.", - "non-html-root": - "html needs to be the first start tag.", - "expected-doctype-but-got-eof": - "Unexpected End of file. Expected DOCTYPE.", - "unknown-doctype": - "Erroneous DOCTYPE.", - "expected-doctype-but-got-chars": - "Unexpected non-space characters. Expected DOCTYPE.", - "expected-doctype-but-got-start-tag": - "Unexpected start tag (%(name)s). Expected DOCTYPE.", - "expected-doctype-but-got-end-tag": - "Unexpected end tag (%(name)s). Expected DOCTYPE.", - "end-tag-after-implied-root": - "Unexpected end tag (%(name)s) after the (implied) root element.", - "expected-named-closing-tag-but-got-eof": - "Unexpected end of file. Expected end tag (%(name)s).", - "two-heads-are-not-better-than-one": - "Unexpected start tag head in existing head. Ignored.", - "unexpected-end-tag": - "Unexpected end tag (%(name)s). Ignored.", - "unexpected-start-tag-out-of-my-head": - "Unexpected start tag (%(name)s) that can be in head. Moved.", - "unexpected-start-tag": - "Unexpected start tag (%(name)s).", - "missing-end-tag": - "Missing end tag (%(name)s).", - "missing-end-tags": - "Missing end tags (%(name)s).", - "unexpected-start-tag-implies-end-tag": - "Unexpected start tag (%(startName)s) " - "implies end tag (%(endName)s).", - "unexpected-start-tag-treated-as": - "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", - "deprecated-tag": - "Unexpected start tag %(name)s. Don't use it!", - "unexpected-start-tag-ignored": - "Unexpected start tag %(name)s. Ignored.", - "expected-one-end-tag-but-got-another": - "Unexpected end tag (%(gotName)s). " - "Missing end tag (%(expectedName)s).", - "end-tag-too-early": - "End tag (%(name)s) seen too early. Expected other end tag.", - "end-tag-too-early-named": - "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", - "end-tag-too-early-ignored": - "End tag (%(name)s) seen too early. Ignored.", - "adoption-agency-1.1": - "End tag (%(name)s) violates step 1, " - "paragraph 1 of the adoption agency algorithm.", - "adoption-agency-1.2": - "End tag (%(name)s) violates step 1, " - "paragraph 2 of the adoption agency algorithm.", - "adoption-agency-1.3": - "End tag (%(name)s) violates step 1, " - "paragraph 3 of the adoption agency algorithm.", - "adoption-agency-4.4": - "End tag (%(name)s) violates step 4, " - "paragraph 4 of the adoption agency algorithm.", - "unexpected-end-tag-treated-as": - "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", - "no-end-tag": - "This element (%(name)s) has no end tag.", - "unexpected-implied-end-tag-in-table": - "Unexpected implied end tag (%(name)s) in the table phase.", - "unexpected-implied-end-tag-in-table-body": - "Unexpected implied end tag (%(name)s) in the table body phase.", - "unexpected-char-implies-table-voodoo": - "Unexpected non-space characters in " - "table context caused voodoo mode.", - "unexpected-hidden-input-in-table": - "Unexpected input with type hidden in table context.", - "unexpected-form-in-table": - "Unexpected form in table context.", - "unexpected-start-tag-implies-table-voodoo": - "Unexpected start tag (%(name)s) in " - "table context caused voodoo mode.", - "unexpected-end-tag-implies-table-voodoo": - "Unexpected end tag (%(name)s) in " - "table context caused voodoo mode.", - "unexpected-cell-in-table-body": - "Unexpected table cell start tag (%(name)s) " - "in the table body phase.", - "unexpected-cell-end-tag": - "Got table cell end tag (%(name)s) " - "while required end tags are missing.", - "unexpected-end-tag-in-table-body": - "Unexpected end tag (%(name)s) in the table body phase. Ignored.", - "unexpected-implied-end-tag-in-table-row": - "Unexpected implied end tag (%(name)s) in the table row phase.", - "unexpected-end-tag-in-table-row": - "Unexpected end tag (%(name)s) in the table row phase. Ignored.", - "unexpected-select-in-select": - "Unexpected select start tag in the select phase " - "treated as select end tag.", - "unexpected-input-in-select": - "Unexpected input start tag in the select phase.", - "unexpected-start-tag-in-select": - "Unexpected start tag token (%(name)s in the select phase. " - "Ignored.", - "unexpected-end-tag-in-select": - "Unexpected end tag (%(name)s) in the select phase. Ignored.", - "unexpected-table-element-start-tag-in-select-in-table": - "Unexpected table element start tag (%(name)s) in the select in table phase.", - "unexpected-table-element-end-tag-in-select-in-table": - "Unexpected table element end tag (%(name)s) in the select in table phase.", - "unexpected-char-after-body": - "Unexpected non-space characters in the after body phase.", - "unexpected-start-tag-after-body": - "Unexpected start tag token (%(name)s)" - " in the after body phase.", - "unexpected-end-tag-after-body": - "Unexpected end tag token (%(name)s)" - " in the after body phase.", - "unexpected-char-in-frameset": - "Unexpected characters in the frameset phase. Characters ignored.", - "unexpected-start-tag-in-frameset": - "Unexpected start tag token (%(name)s)" - " in the frameset phase. Ignored.", - "unexpected-frameset-in-frameset-innerhtml": - "Unexpected end tag token (frameset) " - "in the frameset phase (innerHTML).", - "unexpected-end-tag-in-frameset": - "Unexpected end tag token (%(name)s)" - " in the frameset phase. Ignored.", - "unexpected-char-after-frameset": - "Unexpected non-space characters in the " - "after frameset phase. Ignored.", - "unexpected-start-tag-after-frameset": - "Unexpected start tag (%(name)s)" - " in the after frameset phase. Ignored.", - "unexpected-end-tag-after-frameset": - "Unexpected end tag (%(name)s)" - " in the after frameset phase. Ignored.", - "unexpected-end-tag-after-body-innerhtml": - "Unexpected end tag after body(innerHtml)", - "expected-eof-but-got-char": - "Unexpected non-space characters. Expected end of file.", - "expected-eof-but-got-start-tag": - "Unexpected start tag (%(name)s)" - ". Expected end of file.", - "expected-eof-but-got-end-tag": - "Unexpected end tag (%(name)s)" - ". Expected end of file.", - "eof-in-table": - "Unexpected end of file. Expected table content.", - "eof-in-select": - "Unexpected end of file. Expected select content.", - "eof-in-frameset": - "Unexpected end of file. Expected frameset content.", - "eof-in-script-in-script": - "Unexpected end of file. Expected script content.", - "eof-in-foreign-lands": - "Unexpected end of file. Expected foreign content", - "non-void-element-with-trailing-solidus": - "Trailing solidus not allowed on element %(name)s", - "unexpected-html-element-in-foreign-content": - "Element %(name)s not allowed in a non-html context", - "unexpected-end-tag-before-html": - "Unexpected end tag (%(name)s) before html.", - "unexpected-inhead-noscript-tag": - "Element %(name)s not allowed in a inhead-noscript context", - "eof-in-head-noscript": - "Unexpected end of file. Expected inhead-noscript content", - "char-in-head-noscript": - "Unexpected non-space character. Expected inhead-noscript content", - "XXX-undefined-error": - "Undefined error (this sucks and should be fixed)", -} - -namespaces = { - "html": "http://www.w3.org/1999/xhtml", - "mathml": "http://www.w3.org/1998/Math/MathML", - "svg": "http://www.w3.org/2000/svg", - "xlink": "http://www.w3.org/1999/xlink", - "xml": "http://www.w3.org/XML/1998/namespace", - "xmlns": "http://www.w3.org/2000/xmlns/" -} - -scopingElements = frozenset([ - (namespaces["html"], "applet"), - (namespaces["html"], "caption"), - (namespaces["html"], "html"), - (namespaces["html"], "marquee"), - (namespaces["html"], "object"), - (namespaces["html"], "table"), - (namespaces["html"], "td"), - (namespaces["html"], "th"), - (namespaces["mathml"], "mi"), - (namespaces["mathml"], "mo"), - (namespaces["mathml"], "mn"), - (namespaces["mathml"], "ms"), - (namespaces["mathml"], "mtext"), - (namespaces["mathml"], "annotation-xml"), - (namespaces["svg"], "foreignObject"), - (namespaces["svg"], "desc"), - (namespaces["svg"], "title"), -]) - -formattingElements = frozenset([ - (namespaces["html"], "a"), - (namespaces["html"], "b"), - (namespaces["html"], "big"), - (namespaces["html"], "code"), - (namespaces["html"], "em"), - (namespaces["html"], "font"), - (namespaces["html"], "i"), - (namespaces["html"], "nobr"), - (namespaces["html"], "s"), - (namespaces["html"], "small"), - (namespaces["html"], "strike"), - (namespaces["html"], "strong"), - (namespaces["html"], "tt"), - (namespaces["html"], "u") -]) - -specialElements = frozenset([ - (namespaces["html"], "address"), - (namespaces["html"], "applet"), - (namespaces["html"], "area"), - (namespaces["html"], "article"), - (namespaces["html"], "aside"), - (namespaces["html"], "base"), - (namespaces["html"], "basefont"), - (namespaces["html"], "bgsound"), - (namespaces["html"], "blockquote"), - (namespaces["html"], "body"), - (namespaces["html"], "br"), - (namespaces["html"], "button"), - (namespaces["html"], "caption"), - (namespaces["html"], "center"), - (namespaces["html"], "col"), - (namespaces["html"], "colgroup"), - (namespaces["html"], "command"), - (namespaces["html"], "dd"), - (namespaces["html"], "details"), - (namespaces["html"], "dir"), - (namespaces["html"], "div"), - (namespaces["html"], "dl"), - (namespaces["html"], "dt"), - (namespaces["html"], "embed"), - (namespaces["html"], "fieldset"), - (namespaces["html"], "figure"), - (namespaces["html"], "footer"), - (namespaces["html"], "form"), - (namespaces["html"], "frame"), - (namespaces["html"], "frameset"), - (namespaces["html"], "h1"), - (namespaces["html"], "h2"), - (namespaces["html"], "h3"), - (namespaces["html"], "h4"), - (namespaces["html"], "h5"), - (namespaces["html"], "h6"), - (namespaces["html"], "head"), - (namespaces["html"], "header"), - (namespaces["html"], "hr"), - (namespaces["html"], "html"), - (namespaces["html"], "iframe"), - # Note that image is commented out in the spec as "this isn't an - # element that can end up on the stack, so it doesn't matter," - (namespaces["html"], "image"), - (namespaces["html"], "img"), - (namespaces["html"], "input"), - (namespaces["html"], "isindex"), - (namespaces["html"], "li"), - (namespaces["html"], "link"), - (namespaces["html"], "listing"), - (namespaces["html"], "marquee"), - (namespaces["html"], "menu"), - (namespaces["html"], "meta"), - (namespaces["html"], "nav"), - (namespaces["html"], "noembed"), - (namespaces["html"], "noframes"), - (namespaces["html"], "noscript"), - (namespaces["html"], "object"), - (namespaces["html"], "ol"), - (namespaces["html"], "p"), - (namespaces["html"], "param"), - (namespaces["html"], "plaintext"), - (namespaces["html"], "pre"), - (namespaces["html"], "script"), - (namespaces["html"], "section"), - (namespaces["html"], "select"), - (namespaces["html"], "style"), - (namespaces["html"], "table"), - (namespaces["html"], "tbody"), - (namespaces["html"], "td"), - (namespaces["html"], "textarea"), - (namespaces["html"], "tfoot"), - (namespaces["html"], "th"), - (namespaces["html"], "thead"), - (namespaces["html"], "title"), - (namespaces["html"], "tr"), - (namespaces["html"], "ul"), - (namespaces["html"], "wbr"), - (namespaces["html"], "xmp"), - (namespaces["svg"], "foreignObject") -]) - -htmlIntegrationPointElements = frozenset([ - (namespaces["mathml"], "annotation-xml"), - (namespaces["svg"], "foreignObject"), - (namespaces["svg"], "desc"), - (namespaces["svg"], "title") -]) - -mathmlTextIntegrationPointElements = frozenset([ - (namespaces["mathml"], "mi"), - (namespaces["mathml"], "mo"), - (namespaces["mathml"], "mn"), - (namespaces["mathml"], "ms"), - (namespaces["mathml"], "mtext") -]) - -adjustSVGAttributes = { - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "contentscripttype": "contentScriptType", - "contentstyletype": "contentStyleType", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "externalresourcesrequired": "externalResourcesRequired", - "filterres": "filterRes", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan" -} - -adjustMathMLAttributes = {"definitionurl": "definitionURL"} - -adjustForeignAttributes = { - "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), - "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), - "xlink:href": ("xlink", "href", namespaces["xlink"]), - "xlink:role": ("xlink", "role", namespaces["xlink"]), - "xlink:show": ("xlink", "show", namespaces["xlink"]), - "xlink:title": ("xlink", "title", namespaces["xlink"]), - "xlink:type": ("xlink", "type", namespaces["xlink"]), - "xml:base": ("xml", "base", namespaces["xml"]), - "xml:lang": ("xml", "lang", namespaces["xml"]), - "xml:space": ("xml", "space", namespaces["xml"]), - "xmlns": (None, "xmlns", namespaces["xmlns"]), - "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) -} - -unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in - adjustForeignAttributes.items()} - -spaceCharacters = frozenset([ - "\t", - "\n", - "\u000C", - " ", - "\r" -]) - -tableInsertModeElements = frozenset([ - "table", - "tbody", - "tfoot", - "thead", - "tr" -]) - -asciiLowercase = frozenset(string.ascii_lowercase) -asciiUppercase = frozenset(string.ascii_uppercase) -asciiLetters = frozenset(string.ascii_letters) -digits = frozenset(string.digits) -hexDigits = frozenset(string.hexdigits) - -asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase} - -# Heading elements need to be ordered -headingElements = ( - "h1", - "h2", - "h3", - "h4", - "h5", - "h6" -) - -voidElements = frozenset([ - "base", - "command", - "event-source", - "link", - "meta", - "hr", - "br", - "img", - "embed", - "param", - "area", - "col", - "input", - "source", - "track" -]) - -cdataElements = frozenset(['title', 'textarea']) - -rcdataElements = frozenset([ - 'style', - 'script', - 'xmp', - 'iframe', - 'noembed', - 'noframes', - 'noscript' -]) - -booleanAttributes = { - "": frozenset(["irrelevant", "itemscope"]), - "style": frozenset(["scoped"]), - "img": frozenset(["ismap"]), - "audio": frozenset(["autoplay", "controls"]), - "video": frozenset(["autoplay", "controls"]), - "script": frozenset(["defer", "async"]), - "details": frozenset(["open"]), - "datagrid": frozenset(["multiple", "disabled"]), - "command": frozenset(["hidden", "disabled", "checked", "default"]), - "hr": frozenset(["noshade"]), - "menu": frozenset(["autosubmit"]), - "fieldset": frozenset(["disabled", "readonly"]), - "option": frozenset(["disabled", "readonly", "selected"]), - "optgroup": frozenset(["disabled", "readonly"]), - "button": frozenset(["disabled", "autofocus"]), - "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), - "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), - "output": frozenset(["disabled", "readonly"]), - "iframe": frozenset(["seamless"]), -} - -# entitiesWindows1252 has to be _ordered_ and needs to have an index. It -# therefore can't be a frozenset. -entitiesWindows1252 = ( - 8364, # 0x80 0x20AC EURO SIGN - 65533, # 0x81 UNDEFINED - 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK - 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK - 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK - 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS - 8224, # 0x86 0x2020 DAGGER - 8225, # 0x87 0x2021 DOUBLE DAGGER - 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT - 8240, # 0x89 0x2030 PER MILLE SIGN - 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON - 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE - 65533, # 0x8D UNDEFINED - 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON - 65533, # 0x8F UNDEFINED - 65533, # 0x90 UNDEFINED - 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK - 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK - 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK - 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK - 8226, # 0x95 0x2022 BULLET - 8211, # 0x96 0x2013 EN DASH - 8212, # 0x97 0x2014 EM DASH - 732, # 0x98 0x02DC SMALL TILDE - 8482, # 0x99 0x2122 TRADE MARK SIGN - 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON - 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE - 65533, # 0x9D UNDEFINED - 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON - 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS -) - -xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) - -entities = { - "AElig": "\xc6", - "AElig;": "\xc6", - "AMP": "&", - "AMP;": "&", - "Aacute": "\xc1", - "Aacute;": "\xc1", - "Abreve;": "\u0102", - "Acirc": "\xc2", - "Acirc;": "\xc2", - "Acy;": "\u0410", - "Afr;": "\U0001d504", - "Agrave": "\xc0", - "Agrave;": "\xc0", - "Alpha;": "\u0391", - "Amacr;": "\u0100", - "And;": "\u2a53", - "Aogon;": "\u0104", - "Aopf;": "\U0001d538", - "ApplyFunction;": "\u2061", - "Aring": "\xc5", - "Aring;": "\xc5", - "Ascr;": "\U0001d49c", - "Assign;": "\u2254", - "Atilde": "\xc3", - "Atilde;": "\xc3", - "Auml": "\xc4", - "Auml;": "\xc4", - "Backslash;": "\u2216", - "Barv;": "\u2ae7", - "Barwed;": "\u2306", - "Bcy;": "\u0411", - "Because;": "\u2235", - "Bernoullis;": "\u212c", - "Beta;": "\u0392", - "Bfr;": "\U0001d505", - "Bopf;": "\U0001d539", - "Breve;": "\u02d8", - "Bscr;": "\u212c", - "Bumpeq;": "\u224e", - "CHcy;": "\u0427", - "COPY": "\xa9", - "COPY;": "\xa9", - "Cacute;": "\u0106", - "Cap;": "\u22d2", - "CapitalDifferentialD;": "\u2145", - "Cayleys;": "\u212d", - "Ccaron;": "\u010c", - "Ccedil": "\xc7", - "Ccedil;": "\xc7", - "Ccirc;": "\u0108", - "Cconint;": "\u2230", - "Cdot;": "\u010a", - "Cedilla;": "\xb8", - "CenterDot;": "\xb7", - "Cfr;": "\u212d", - "Chi;": "\u03a7", - "CircleDot;": "\u2299", - "CircleMinus;": "\u2296", - "CirclePlus;": "\u2295", - "CircleTimes;": "\u2297", - "ClockwiseContourIntegral;": "\u2232", - "CloseCurlyDoubleQuote;": "\u201d", - "CloseCurlyQuote;": "\u2019", - "Colon;": "\u2237", - "Colone;": "\u2a74", - "Congruent;": "\u2261", - "Conint;": "\u222f", - "ContourIntegral;": "\u222e", - "Copf;": "\u2102", - "Coproduct;": "\u2210", - "CounterClockwiseContourIntegral;": "\u2233", - "Cross;": "\u2a2f", - "Cscr;": "\U0001d49e", - "Cup;": "\u22d3", - "CupCap;": "\u224d", - "DD;": "\u2145", - "DDotrahd;": "\u2911", - "DJcy;": "\u0402", - "DScy;": "\u0405", - "DZcy;": "\u040f", - "Dagger;": "\u2021", - "Darr;": "\u21a1", - "Dashv;": "\u2ae4", - "Dcaron;": "\u010e", - "Dcy;": "\u0414", - "Del;": "\u2207", - "Delta;": "\u0394", - "Dfr;": "\U0001d507", - "DiacriticalAcute;": "\xb4", - "DiacriticalDot;": "\u02d9", - "DiacriticalDoubleAcute;": "\u02dd", - "DiacriticalGrave;": "`", - "DiacriticalTilde;": "\u02dc", - "Diamond;": "\u22c4", - "DifferentialD;": "\u2146", - "Dopf;": "\U0001d53b", - "Dot;": "\xa8", - "DotDot;": "\u20dc", - "DotEqual;": "\u2250", - "DoubleContourIntegral;": "\u222f", - "DoubleDot;": "\xa8", - "DoubleDownArrow;": "\u21d3", - "DoubleLeftArrow;": "\u21d0", - "DoubleLeftRightArrow;": "\u21d4", - "DoubleLeftTee;": "\u2ae4", - "DoubleLongLeftArrow;": "\u27f8", - "DoubleLongLeftRightArrow;": "\u27fa", - "DoubleLongRightArrow;": "\u27f9", - "DoubleRightArrow;": "\u21d2", - "DoubleRightTee;": "\u22a8", - "DoubleUpArrow;": "\u21d1", - "DoubleUpDownArrow;": "\u21d5", - "DoubleVerticalBar;": "\u2225", - "DownArrow;": "\u2193", - "DownArrowBar;": "\u2913", - "DownArrowUpArrow;": "\u21f5", - "DownBreve;": "\u0311", - "DownLeftRightVector;": "\u2950", - "DownLeftTeeVector;": "\u295e", - "DownLeftVector;": "\u21bd", - "DownLeftVectorBar;": "\u2956", - "DownRightTeeVector;": "\u295f", - "DownRightVector;": "\u21c1", - "DownRightVectorBar;": "\u2957", - "DownTee;": "\u22a4", - "DownTeeArrow;": "\u21a7", - "Downarrow;": "\u21d3", - "Dscr;": "\U0001d49f", - "Dstrok;": "\u0110", - "ENG;": "\u014a", - "ETH": "\xd0", - "ETH;": "\xd0", - "Eacute": "\xc9", - "Eacute;": "\xc9", - "Ecaron;": "\u011a", - "Ecirc": "\xca", - "Ecirc;": "\xca", - "Ecy;": "\u042d", - "Edot;": "\u0116", - "Efr;": "\U0001d508", - "Egrave": "\xc8", - "Egrave;": "\xc8", - "Element;": "\u2208", - "Emacr;": "\u0112", - "EmptySmallSquare;": "\u25fb", - "EmptyVerySmallSquare;": "\u25ab", - "Eogon;": "\u0118", - "Eopf;": "\U0001d53c", - "Epsilon;": "\u0395", - "Equal;": "\u2a75", - "EqualTilde;": "\u2242", - "Equilibrium;": "\u21cc", - "Escr;": "\u2130", - "Esim;": "\u2a73", - "Eta;": "\u0397", - "Euml": "\xcb", - "Euml;": "\xcb", - "Exists;": "\u2203", - "ExponentialE;": "\u2147", - "Fcy;": "\u0424", - "Ffr;": "\U0001d509", - "FilledSmallSquare;": "\u25fc", - "FilledVerySmallSquare;": "\u25aa", - "Fopf;": "\U0001d53d", - "ForAll;": "\u2200", - "Fouriertrf;": "\u2131", - "Fscr;": "\u2131", - "GJcy;": "\u0403", - "GT": ">", - "GT;": ">", - "Gamma;": "\u0393", - "Gammad;": "\u03dc", - "Gbreve;": "\u011e", - "Gcedil;": "\u0122", - "Gcirc;": "\u011c", - "Gcy;": "\u0413", - "Gdot;": "\u0120", - "Gfr;": "\U0001d50a", - "Gg;": "\u22d9", - "Gopf;": "\U0001d53e", - "GreaterEqual;": "\u2265", - "GreaterEqualLess;": "\u22db", - "GreaterFullEqual;": "\u2267", - "GreaterGreater;": "\u2aa2", - "GreaterLess;": "\u2277", - "GreaterSlantEqual;": "\u2a7e", - "GreaterTilde;": "\u2273", - "Gscr;": "\U0001d4a2", - "Gt;": "\u226b", - "HARDcy;": "\u042a", - "Hacek;": "\u02c7", - "Hat;": "^", - "Hcirc;": "\u0124", - "Hfr;": "\u210c", - "HilbertSpace;": "\u210b", - "Hopf;": "\u210d", - "HorizontalLine;": "\u2500", - "Hscr;": "\u210b", - "Hstrok;": "\u0126", - "HumpDownHump;": "\u224e", - "HumpEqual;": "\u224f", - "IEcy;": "\u0415", - "IJlig;": "\u0132", - "IOcy;": "\u0401", - "Iacute": "\xcd", - "Iacute;": "\xcd", - "Icirc": "\xce", - "Icirc;": "\xce", - "Icy;": "\u0418", - "Idot;": "\u0130", - "Ifr;": "\u2111", - "Igrave": "\xcc", - "Igrave;": "\xcc", - "Im;": "\u2111", - "Imacr;": "\u012a", - "ImaginaryI;": "\u2148", - "Implies;": "\u21d2", - "Int;": "\u222c", - "Integral;": "\u222b", - "Intersection;": "\u22c2", - "InvisibleComma;": "\u2063", - "InvisibleTimes;": "\u2062", - "Iogon;": "\u012e", - "Iopf;": "\U0001d540", - "Iota;": "\u0399", - "Iscr;": "\u2110", - "Itilde;": "\u0128", - "Iukcy;": "\u0406", - "Iuml": "\xcf", - "Iuml;": "\xcf", - "Jcirc;": "\u0134", - "Jcy;": "\u0419", - "Jfr;": "\U0001d50d", - "Jopf;": "\U0001d541", - "Jscr;": "\U0001d4a5", - "Jsercy;": "\u0408", - "Jukcy;": "\u0404", - "KHcy;": "\u0425", - "KJcy;": "\u040c", - "Kappa;": "\u039a", - "Kcedil;": "\u0136", - "Kcy;": "\u041a", - "Kfr;": "\U0001d50e", - "Kopf;": "\U0001d542", - "Kscr;": "\U0001d4a6", - "LJcy;": "\u0409", - "LT": "<", - "LT;": "<", - "Lacute;": "\u0139", - "Lambda;": "\u039b", - "Lang;": "\u27ea", - "Laplacetrf;": "\u2112", - "Larr;": "\u219e", - "Lcaron;": "\u013d", - "Lcedil;": "\u013b", - "Lcy;": "\u041b", - "LeftAngleBracket;": "\u27e8", - "LeftArrow;": "\u2190", - "LeftArrowBar;": "\u21e4", - "LeftArrowRightArrow;": "\u21c6", - "LeftCeiling;": "\u2308", - "LeftDoubleBracket;": "\u27e6", - "LeftDownTeeVector;": "\u2961", - "LeftDownVector;": "\u21c3", - "LeftDownVectorBar;": "\u2959", - "LeftFloor;": "\u230a", - "LeftRightArrow;": "\u2194", - "LeftRightVector;": "\u294e", - "LeftTee;": "\u22a3", - "LeftTeeArrow;": "\u21a4", - "LeftTeeVector;": "\u295a", - "LeftTriangle;": "\u22b2", - "LeftTriangleBar;": "\u29cf", - "LeftTriangleEqual;": "\u22b4", - "LeftUpDownVector;": "\u2951", - "LeftUpTeeVector;": "\u2960", - "LeftUpVector;": "\u21bf", - "LeftUpVectorBar;": "\u2958", - "LeftVector;": "\u21bc", - "LeftVectorBar;": "\u2952", - "Leftarrow;": "\u21d0", - "Leftrightarrow;": "\u21d4", - "LessEqualGreater;": "\u22da", - "LessFullEqual;": "\u2266", - "LessGreater;": "\u2276", - "LessLess;": "\u2aa1", - "LessSlantEqual;": "\u2a7d", - "LessTilde;": "\u2272", - "Lfr;": "\U0001d50f", - "Ll;": "\u22d8", - "Lleftarrow;": "\u21da", - "Lmidot;": "\u013f", - "LongLeftArrow;": "\u27f5", - "LongLeftRightArrow;": "\u27f7", - "LongRightArrow;": "\u27f6", - "Longleftarrow;": "\u27f8", - "Longleftrightarrow;": "\u27fa", - "Longrightarrow;": "\u27f9", - "Lopf;": "\U0001d543", - "LowerLeftArrow;": "\u2199", - "LowerRightArrow;": "\u2198", - "Lscr;": "\u2112", - "Lsh;": "\u21b0", - "Lstrok;": "\u0141", - "Lt;": "\u226a", - "Map;": "\u2905", - "Mcy;": "\u041c", - "MediumSpace;": "\u205f", - "Mellintrf;": "\u2133", - "Mfr;": "\U0001d510", - "MinusPlus;": "\u2213", - "Mopf;": "\U0001d544", - "Mscr;": "\u2133", - "Mu;": "\u039c", - "NJcy;": "\u040a", - "Nacute;": "\u0143", - "Ncaron;": "\u0147", - "Ncedil;": "\u0145", - "Ncy;": "\u041d", - "NegativeMediumSpace;": "\u200b", - "NegativeThickSpace;": "\u200b", - "NegativeThinSpace;": "\u200b", - "NegativeVeryThinSpace;": "\u200b", - "NestedGreaterGreater;": "\u226b", - "NestedLessLess;": "\u226a", - "NewLine;": "\n", - "Nfr;": "\U0001d511", - "NoBreak;": "\u2060", - "NonBreakingSpace;": "\xa0", - "Nopf;": "\u2115", - "Not;": "\u2aec", - "NotCongruent;": "\u2262", - "NotCupCap;": "\u226d", - "NotDoubleVerticalBar;": "\u2226", - "NotElement;": "\u2209", - "NotEqual;": "\u2260", - "NotEqualTilde;": "\u2242\u0338", - "NotExists;": "\u2204", - "NotGreater;": "\u226f", - "NotGreaterEqual;": "\u2271", - "NotGreaterFullEqual;": "\u2267\u0338", - "NotGreaterGreater;": "\u226b\u0338", - "NotGreaterLess;": "\u2279", - "NotGreaterSlantEqual;": "\u2a7e\u0338", - "NotGreaterTilde;": "\u2275", - "NotHumpDownHump;": "\u224e\u0338", - "NotHumpEqual;": "\u224f\u0338", - "NotLeftTriangle;": "\u22ea", - "NotLeftTriangleBar;": "\u29cf\u0338", - "NotLeftTriangleEqual;": "\u22ec", - "NotLess;": "\u226e", - "NotLessEqual;": "\u2270", - "NotLessGreater;": "\u2278", - "NotLessLess;": "\u226a\u0338", - "NotLessSlantEqual;": "\u2a7d\u0338", - "NotLessTilde;": "\u2274", - "NotNestedGreaterGreater;": "\u2aa2\u0338", - "NotNestedLessLess;": "\u2aa1\u0338", - "NotPrecedes;": "\u2280", - "NotPrecedesEqual;": "\u2aaf\u0338", - "NotPrecedesSlantEqual;": "\u22e0", - "NotReverseElement;": "\u220c", - "NotRightTriangle;": "\u22eb", - "NotRightTriangleBar;": "\u29d0\u0338", - "NotRightTriangleEqual;": "\u22ed", - "NotSquareSubset;": "\u228f\u0338", - "NotSquareSubsetEqual;": "\u22e2", - "NotSquareSuperset;": "\u2290\u0338", - "NotSquareSupersetEqual;": "\u22e3", - "NotSubset;": "\u2282\u20d2", - "NotSubsetEqual;": "\u2288", - "NotSucceeds;": "\u2281", - "NotSucceedsEqual;": "\u2ab0\u0338", - "NotSucceedsSlantEqual;": "\u22e1", - "NotSucceedsTilde;": "\u227f\u0338", - "NotSuperset;": "\u2283\u20d2", - "NotSupersetEqual;": "\u2289", - "NotTilde;": "\u2241", - "NotTildeEqual;": "\u2244", - "NotTildeFullEqual;": "\u2247", - "NotTildeTilde;": "\u2249", - "NotVerticalBar;": "\u2224", - "Nscr;": "\U0001d4a9", - "Ntilde": "\xd1", - "Ntilde;": "\xd1", - "Nu;": "\u039d", - "OElig;": "\u0152", - "Oacute": "\xd3", - "Oacute;": "\xd3", - "Ocirc": "\xd4", - "Ocirc;": "\xd4", - "Ocy;": "\u041e", - "Odblac;": "\u0150", - "Ofr;": "\U0001d512", - "Ograve": "\xd2", - "Ograve;": "\xd2", - "Omacr;": "\u014c", - "Omega;": "\u03a9", - "Omicron;": "\u039f", - "Oopf;": "\U0001d546", - "OpenCurlyDoubleQuote;": "\u201c", - "OpenCurlyQuote;": "\u2018", - "Or;": "\u2a54", - "Oscr;": "\U0001d4aa", - "Oslash": "\xd8", - "Oslash;": "\xd8", - "Otilde": "\xd5", - "Otilde;": "\xd5", - "Otimes;": "\u2a37", - "Ouml": "\xd6", - "Ouml;": "\xd6", - "OverBar;": "\u203e", - "OverBrace;": "\u23de", - "OverBracket;": "\u23b4", - "OverParenthesis;": "\u23dc", - "PartialD;": "\u2202", - "Pcy;": "\u041f", - "Pfr;": "\U0001d513", - "Phi;": "\u03a6", - "Pi;": "\u03a0", - "PlusMinus;": "\xb1", - "Poincareplane;": "\u210c", - "Popf;": "\u2119", - "Pr;": "\u2abb", - "Precedes;": "\u227a", - "PrecedesEqual;": "\u2aaf", - "PrecedesSlantEqual;": "\u227c", - "PrecedesTilde;": "\u227e", - "Prime;": "\u2033", - "Product;": "\u220f", - "Proportion;": "\u2237", - "Proportional;": "\u221d", - "Pscr;": "\U0001d4ab", - "Psi;": "\u03a8", - "QUOT": "\"", - "QUOT;": "\"", - "Qfr;": "\U0001d514", - "Qopf;": "\u211a", - "Qscr;": "\U0001d4ac", - "RBarr;": "\u2910", - "REG": "\xae", - "REG;": "\xae", - "Racute;": "\u0154", - "Rang;": "\u27eb", - "Rarr;": "\u21a0", - "Rarrtl;": "\u2916", - "Rcaron;": "\u0158", - "Rcedil;": "\u0156", - "Rcy;": "\u0420", - "Re;": "\u211c", - "ReverseElement;": "\u220b", - "ReverseEquilibrium;": "\u21cb", - "ReverseUpEquilibrium;": "\u296f", - "Rfr;": "\u211c", - "Rho;": "\u03a1", - "RightAngleBracket;": "\u27e9", - "RightArrow;": "\u2192", - "RightArrowBar;": "\u21e5", - "RightArrowLeftArrow;": "\u21c4", - "RightCeiling;": "\u2309", - "RightDoubleBracket;": "\u27e7", - "RightDownTeeVector;": "\u295d", - "RightDownVector;": "\u21c2", - "RightDownVectorBar;": "\u2955", - "RightFloor;": "\u230b", - "RightTee;": "\u22a2", - "RightTeeArrow;": "\u21a6", - "RightTeeVector;": "\u295b", - "RightTriangle;": "\u22b3", - "RightTriangleBar;": "\u29d0", - "RightTriangleEqual;": "\u22b5", - "RightUpDownVector;": "\u294f", - "RightUpTeeVector;": "\u295c", - "RightUpVector;": "\u21be", - "RightUpVectorBar;": "\u2954", - "RightVector;": "\u21c0", - "RightVectorBar;": "\u2953", - "Rightarrow;": "\u21d2", - "Ropf;": "\u211d", - "RoundImplies;": "\u2970", - "Rrightarrow;": "\u21db", - "Rscr;": "\u211b", - "Rsh;": "\u21b1", - "RuleDelayed;": "\u29f4", - "SHCHcy;": "\u0429", - "SHcy;": "\u0428", - "SOFTcy;": "\u042c", - "Sacute;": "\u015a", - "Sc;": "\u2abc", - "Scaron;": "\u0160", - "Scedil;": "\u015e", - "Scirc;": "\u015c", - "Scy;": "\u0421", - "Sfr;": "\U0001d516", - "ShortDownArrow;": "\u2193", - "ShortLeftArrow;": "\u2190", - "ShortRightArrow;": "\u2192", - "ShortUpArrow;": "\u2191", - "Sigma;": "\u03a3", - "SmallCircle;": "\u2218", - "Sopf;": "\U0001d54a", - "Sqrt;": "\u221a", - "Square;": "\u25a1", - "SquareIntersection;": "\u2293", - "SquareSubset;": "\u228f", - "SquareSubsetEqual;": "\u2291", - "SquareSuperset;": "\u2290", - "SquareSupersetEqual;": "\u2292", - "SquareUnion;": "\u2294", - "Sscr;": "\U0001d4ae", - "Star;": "\u22c6", - "Sub;": "\u22d0", - "Subset;": "\u22d0", - "SubsetEqual;": "\u2286", - "Succeeds;": "\u227b", - "SucceedsEqual;": "\u2ab0", - "SucceedsSlantEqual;": "\u227d", - "SucceedsTilde;": "\u227f", - "SuchThat;": "\u220b", - "Sum;": "\u2211", - "Sup;": "\u22d1", - "Superset;": "\u2283", - "SupersetEqual;": "\u2287", - "Supset;": "\u22d1", - "THORN": "\xde", - "THORN;": "\xde", - "TRADE;": "\u2122", - "TSHcy;": "\u040b", - "TScy;": "\u0426", - "Tab;": "\t", - "Tau;": "\u03a4", - "Tcaron;": "\u0164", - "Tcedil;": "\u0162", - "Tcy;": "\u0422", - "Tfr;": "\U0001d517", - "Therefore;": "\u2234", - "Theta;": "\u0398", - "ThickSpace;": "\u205f\u200a", - "ThinSpace;": "\u2009", - "Tilde;": "\u223c", - "TildeEqual;": "\u2243", - "TildeFullEqual;": "\u2245", - "TildeTilde;": "\u2248", - "Topf;": "\U0001d54b", - "TripleDot;": "\u20db", - "Tscr;": "\U0001d4af", - "Tstrok;": "\u0166", - "Uacute": "\xda", - "Uacute;": "\xda", - "Uarr;": "\u219f", - "Uarrocir;": "\u2949", - "Ubrcy;": "\u040e", - "Ubreve;": "\u016c", - "Ucirc": "\xdb", - "Ucirc;": "\xdb", - "Ucy;": "\u0423", - "Udblac;": "\u0170", - "Ufr;": "\U0001d518", - "Ugrave": "\xd9", - "Ugrave;": "\xd9", - "Umacr;": "\u016a", - "UnderBar;": "_", - "UnderBrace;": "\u23df", - "UnderBracket;": "\u23b5", - "UnderParenthesis;": "\u23dd", - "Union;": "\u22c3", - "UnionPlus;": "\u228e", - "Uogon;": "\u0172", - "Uopf;": "\U0001d54c", - "UpArrow;": "\u2191", - "UpArrowBar;": "\u2912", - "UpArrowDownArrow;": "\u21c5", - "UpDownArrow;": "\u2195", - "UpEquilibrium;": "\u296e", - "UpTee;": "\u22a5", - "UpTeeArrow;": "\u21a5", - "Uparrow;": "\u21d1", - "Updownarrow;": "\u21d5", - "UpperLeftArrow;": "\u2196", - "UpperRightArrow;": "\u2197", - "Upsi;": "\u03d2", - "Upsilon;": "\u03a5", - "Uring;": "\u016e", - "Uscr;": "\U0001d4b0", - "Utilde;": "\u0168", - "Uuml": "\xdc", - "Uuml;": "\xdc", - "VDash;": "\u22ab", - "Vbar;": "\u2aeb", - "Vcy;": "\u0412", - "Vdash;": "\u22a9", - "Vdashl;": "\u2ae6", - "Vee;": "\u22c1", - "Verbar;": "\u2016", - "Vert;": "\u2016", - "VerticalBar;": "\u2223", - "VerticalLine;": "|", - "VerticalSeparator;": "\u2758", - "VerticalTilde;": "\u2240", - "VeryThinSpace;": "\u200a", - "Vfr;": "\U0001d519", - "Vopf;": "\U0001d54d", - "Vscr;": "\U0001d4b1", - "Vvdash;": "\u22aa", - "Wcirc;": "\u0174", - "Wedge;": "\u22c0", - "Wfr;": "\U0001d51a", - "Wopf;": "\U0001d54e", - "Wscr;": "\U0001d4b2", - "Xfr;": "\U0001d51b", - "Xi;": "\u039e", - "Xopf;": "\U0001d54f", - "Xscr;": "\U0001d4b3", - "YAcy;": "\u042f", - "YIcy;": "\u0407", - "YUcy;": "\u042e", - "Yacute": "\xdd", - "Yacute;": "\xdd", - "Ycirc;": "\u0176", - "Ycy;": "\u042b", - "Yfr;": "\U0001d51c", - "Yopf;": "\U0001d550", - "Yscr;": "\U0001d4b4", - "Yuml;": "\u0178", - "ZHcy;": "\u0416", - "Zacute;": "\u0179", - "Zcaron;": "\u017d", - "Zcy;": "\u0417", - "Zdot;": "\u017b", - "ZeroWidthSpace;": "\u200b", - "Zeta;": "\u0396", - "Zfr;": "\u2128", - "Zopf;": "\u2124", - "Zscr;": "\U0001d4b5", - "aacute": "\xe1", - "aacute;": "\xe1", - "abreve;": "\u0103", - "ac;": "\u223e", - "acE;": "\u223e\u0333", - "acd;": "\u223f", - "acirc": "\xe2", - "acirc;": "\xe2", - "acute": "\xb4", - "acute;": "\xb4", - "acy;": "\u0430", - "aelig": "\xe6", - "aelig;": "\xe6", - "af;": "\u2061", - "afr;": "\U0001d51e", - "agrave": "\xe0", - "agrave;": "\xe0", - "alefsym;": "\u2135", - "aleph;": "\u2135", - "alpha;": "\u03b1", - "amacr;": "\u0101", - "amalg;": "\u2a3f", - "amp": "&", - "amp;": "&", - "and;": "\u2227", - "andand;": "\u2a55", - "andd;": "\u2a5c", - "andslope;": "\u2a58", - "andv;": "\u2a5a", - "ang;": "\u2220", - "ange;": "\u29a4", - "angle;": "\u2220", - "angmsd;": "\u2221", - "angmsdaa;": "\u29a8", - "angmsdab;": "\u29a9", - "angmsdac;": "\u29aa", - "angmsdad;": "\u29ab", - "angmsdae;": "\u29ac", - "angmsdaf;": "\u29ad", - "angmsdag;": "\u29ae", - "angmsdah;": "\u29af", - "angrt;": "\u221f", - "angrtvb;": "\u22be", - "angrtvbd;": "\u299d", - "angsph;": "\u2222", - "angst;": "\xc5", - "angzarr;": "\u237c", - "aogon;": "\u0105", - "aopf;": "\U0001d552", - "ap;": "\u2248", - "apE;": "\u2a70", - "apacir;": "\u2a6f", - "ape;": "\u224a", - "apid;": "\u224b", - "apos;": "'", - "approx;": "\u2248", - "approxeq;": "\u224a", - "aring": "\xe5", - "aring;": "\xe5", - "ascr;": "\U0001d4b6", - "ast;": "*", - "asymp;": "\u2248", - "asympeq;": "\u224d", - "atilde": "\xe3", - "atilde;": "\xe3", - "auml": "\xe4", - "auml;": "\xe4", - "awconint;": "\u2233", - "awint;": "\u2a11", - "bNot;": "\u2aed", - "backcong;": "\u224c", - "backepsilon;": "\u03f6", - "backprime;": "\u2035", - "backsim;": "\u223d", - "backsimeq;": "\u22cd", - "barvee;": "\u22bd", - "barwed;": "\u2305", - "barwedge;": "\u2305", - "bbrk;": "\u23b5", - "bbrktbrk;": "\u23b6", - "bcong;": "\u224c", - "bcy;": "\u0431", - "bdquo;": "\u201e", - "becaus;": "\u2235", - "because;": "\u2235", - "bemptyv;": "\u29b0", - "bepsi;": "\u03f6", - "bernou;": "\u212c", - "beta;": "\u03b2", - "beth;": "\u2136", - "between;": "\u226c", - "bfr;": "\U0001d51f", - "bigcap;": "\u22c2", - "bigcirc;": "\u25ef", - "bigcup;": "\u22c3", - "bigodot;": "\u2a00", - "bigoplus;": "\u2a01", - "bigotimes;": "\u2a02", - "bigsqcup;": "\u2a06", - "bigstar;": "\u2605", - "bigtriangledown;": "\u25bd", - "bigtriangleup;": "\u25b3", - "biguplus;": "\u2a04", - "bigvee;": "\u22c1", - "bigwedge;": "\u22c0", - "bkarow;": "\u290d", - "blacklozenge;": "\u29eb", - "blacksquare;": "\u25aa", - "blacktriangle;": "\u25b4", - "blacktriangledown;": "\u25be", - "blacktriangleleft;": "\u25c2", - "blacktriangleright;": "\u25b8", - "blank;": "\u2423", - "blk12;": "\u2592", - "blk14;": "\u2591", - "blk34;": "\u2593", - "block;": "\u2588", - "bne;": "=\u20e5", - "bnequiv;": "\u2261\u20e5", - "bnot;": "\u2310", - "bopf;": "\U0001d553", - "bot;": "\u22a5", - "bottom;": "\u22a5", - "bowtie;": "\u22c8", - "boxDL;": "\u2557", - "boxDR;": "\u2554", - "boxDl;": "\u2556", - "boxDr;": "\u2553", - "boxH;": "\u2550", - "boxHD;": "\u2566", - "boxHU;": "\u2569", - "boxHd;": "\u2564", - "boxHu;": "\u2567", - "boxUL;": "\u255d", - "boxUR;": "\u255a", - "boxUl;": "\u255c", - "boxUr;": "\u2559", - "boxV;": "\u2551", - "boxVH;": "\u256c", - "boxVL;": "\u2563", - "boxVR;": "\u2560", - "boxVh;": "\u256b", - "boxVl;": "\u2562", - "boxVr;": "\u255f", - "boxbox;": "\u29c9", - "boxdL;": "\u2555", - "boxdR;": "\u2552", - "boxdl;": "\u2510", - "boxdr;": "\u250c", - "boxh;": "\u2500", - "boxhD;": "\u2565", - "boxhU;": "\u2568", - "boxhd;": "\u252c", - "boxhu;": "\u2534", - "boxminus;": "\u229f", - "boxplus;": "\u229e", - "boxtimes;": "\u22a0", - "boxuL;": "\u255b", - "boxuR;": "\u2558", - "boxul;": "\u2518", - "boxur;": "\u2514", - "boxv;": "\u2502", - "boxvH;": "\u256a", - "boxvL;": "\u2561", - "boxvR;": "\u255e", - "boxvh;": "\u253c", - "boxvl;": "\u2524", - "boxvr;": "\u251c", - "bprime;": "\u2035", - "breve;": "\u02d8", - "brvbar": "\xa6", - "brvbar;": "\xa6", - "bscr;": "\U0001d4b7", - "bsemi;": "\u204f", - "bsim;": "\u223d", - "bsime;": "\u22cd", - "bsol;": "\\", - "bsolb;": "\u29c5", - "bsolhsub;": "\u27c8", - "bull;": "\u2022", - "bullet;": "\u2022", - "bump;": "\u224e", - "bumpE;": "\u2aae", - "bumpe;": "\u224f", - "bumpeq;": "\u224f", - "cacute;": "\u0107", - "cap;": "\u2229", - "capand;": "\u2a44", - "capbrcup;": "\u2a49", - "capcap;": "\u2a4b", - "capcup;": "\u2a47", - "capdot;": "\u2a40", - "caps;": "\u2229\ufe00", - "caret;": "\u2041", - "caron;": "\u02c7", - "ccaps;": "\u2a4d", - "ccaron;": "\u010d", - "ccedil": "\xe7", - "ccedil;": "\xe7", - "ccirc;": "\u0109", - "ccups;": "\u2a4c", - "ccupssm;": "\u2a50", - "cdot;": "\u010b", - "cedil": "\xb8", - "cedil;": "\xb8", - "cemptyv;": "\u29b2", - "cent": "\xa2", - "cent;": "\xa2", - "centerdot;": "\xb7", - "cfr;": "\U0001d520", - "chcy;": "\u0447", - "check;": "\u2713", - "checkmark;": "\u2713", - "chi;": "\u03c7", - "cir;": "\u25cb", - "cirE;": "\u29c3", - "circ;": "\u02c6", - "circeq;": "\u2257", - "circlearrowleft;": "\u21ba", - "circlearrowright;": "\u21bb", - "circledR;": "\xae", - "circledS;": "\u24c8", - "circledast;": "\u229b", - "circledcirc;": "\u229a", - "circleddash;": "\u229d", - "cire;": "\u2257", - "cirfnint;": "\u2a10", - "cirmid;": "\u2aef", - "cirscir;": "\u29c2", - "clubs;": "\u2663", - "clubsuit;": "\u2663", - "colon;": ":", - "colone;": "\u2254", - "coloneq;": "\u2254", - "comma;": ",", - "commat;": "@", - "comp;": "\u2201", - "compfn;": "\u2218", - "complement;": "\u2201", - "complexes;": "\u2102", - "cong;": "\u2245", - "congdot;": "\u2a6d", - "conint;": "\u222e", - "copf;": "\U0001d554", - "coprod;": "\u2210", - "copy": "\xa9", - "copy;": "\xa9", - "copysr;": "\u2117", - "crarr;": "\u21b5", - "cross;": "\u2717", - "cscr;": "\U0001d4b8", - "csub;": "\u2acf", - "csube;": "\u2ad1", - "csup;": "\u2ad0", - "csupe;": "\u2ad2", - "ctdot;": "\u22ef", - "cudarrl;": "\u2938", - "cudarrr;": "\u2935", - "cuepr;": "\u22de", - "cuesc;": "\u22df", - "cularr;": "\u21b6", - "cularrp;": "\u293d", - "cup;": "\u222a", - "cupbrcap;": "\u2a48", - "cupcap;": "\u2a46", - "cupcup;": "\u2a4a", - "cupdot;": "\u228d", - "cupor;": "\u2a45", - "cups;": "\u222a\ufe00", - "curarr;": "\u21b7", - "curarrm;": "\u293c", - "curlyeqprec;": "\u22de", - "curlyeqsucc;": "\u22df", - "curlyvee;": "\u22ce", - "curlywedge;": "\u22cf", - "curren": "\xa4", - "curren;": "\xa4", - "curvearrowleft;": "\u21b6", - "curvearrowright;": "\u21b7", - "cuvee;": "\u22ce", - "cuwed;": "\u22cf", - "cwconint;": "\u2232", - "cwint;": "\u2231", - "cylcty;": "\u232d", - "dArr;": "\u21d3", - "dHar;": "\u2965", - "dagger;": "\u2020", - "daleth;": "\u2138", - "darr;": "\u2193", - "dash;": "\u2010", - "dashv;": "\u22a3", - "dbkarow;": "\u290f", - "dblac;": "\u02dd", - "dcaron;": "\u010f", - "dcy;": "\u0434", - "dd;": "\u2146", - "ddagger;": "\u2021", - "ddarr;": "\u21ca", - "ddotseq;": "\u2a77", - "deg": "\xb0", - "deg;": "\xb0", - "delta;": "\u03b4", - "demptyv;": "\u29b1", - "dfisht;": "\u297f", - "dfr;": "\U0001d521", - "dharl;": "\u21c3", - "dharr;": "\u21c2", - "diam;": "\u22c4", - "diamond;": "\u22c4", - "diamondsuit;": "\u2666", - "diams;": "\u2666", - "die;": "\xa8", - "digamma;": "\u03dd", - "disin;": "\u22f2", - "div;": "\xf7", - "divide": "\xf7", - "divide;": "\xf7", - "divideontimes;": "\u22c7", - "divonx;": "\u22c7", - "djcy;": "\u0452", - "dlcorn;": "\u231e", - "dlcrop;": "\u230d", - "dollar;": "$", - "dopf;": "\U0001d555", - "dot;": "\u02d9", - "doteq;": "\u2250", - "doteqdot;": "\u2251", - "dotminus;": "\u2238", - "dotplus;": "\u2214", - "dotsquare;": "\u22a1", - "doublebarwedge;": "\u2306", - "downarrow;": "\u2193", - "downdownarrows;": "\u21ca", - "downharpoonleft;": "\u21c3", - "downharpoonright;": "\u21c2", - "drbkarow;": "\u2910", - "drcorn;": "\u231f", - "drcrop;": "\u230c", - "dscr;": "\U0001d4b9", - "dscy;": "\u0455", - "dsol;": "\u29f6", - "dstrok;": "\u0111", - "dtdot;": "\u22f1", - "dtri;": "\u25bf", - "dtrif;": "\u25be", - "duarr;": "\u21f5", - "duhar;": "\u296f", - "dwangle;": "\u29a6", - "dzcy;": "\u045f", - "dzigrarr;": "\u27ff", - "eDDot;": "\u2a77", - "eDot;": "\u2251", - "eacute": "\xe9", - "eacute;": "\xe9", - "easter;": "\u2a6e", - "ecaron;": "\u011b", - "ecir;": "\u2256", - "ecirc": "\xea", - "ecirc;": "\xea", - "ecolon;": "\u2255", - "ecy;": "\u044d", - "edot;": "\u0117", - "ee;": "\u2147", - "efDot;": "\u2252", - "efr;": "\U0001d522", - "eg;": "\u2a9a", - "egrave": "\xe8", - "egrave;": "\xe8", - "egs;": "\u2a96", - "egsdot;": "\u2a98", - "el;": "\u2a99", - "elinters;": "\u23e7", - "ell;": "\u2113", - "els;": "\u2a95", - "elsdot;": "\u2a97", - "emacr;": "\u0113", - "empty;": "\u2205", - "emptyset;": "\u2205", - "emptyv;": "\u2205", - "emsp13;": "\u2004", - "emsp14;": "\u2005", - "emsp;": "\u2003", - "eng;": "\u014b", - "ensp;": "\u2002", - "eogon;": "\u0119", - "eopf;": "\U0001d556", - "epar;": "\u22d5", - "eparsl;": "\u29e3", - "eplus;": "\u2a71", - "epsi;": "\u03b5", - "epsilon;": "\u03b5", - "epsiv;": "\u03f5", - "eqcirc;": "\u2256", - "eqcolon;": "\u2255", - "eqsim;": "\u2242", - "eqslantgtr;": "\u2a96", - "eqslantless;": "\u2a95", - "equals;": "=", - "equest;": "\u225f", - "equiv;": "\u2261", - "equivDD;": "\u2a78", - "eqvparsl;": "\u29e5", - "erDot;": "\u2253", - "erarr;": "\u2971", - "escr;": "\u212f", - "esdot;": "\u2250", - "esim;": "\u2242", - "eta;": "\u03b7", - "eth": "\xf0", - "eth;": "\xf0", - "euml": "\xeb", - "euml;": "\xeb", - "euro;": "\u20ac", - "excl;": "!", - "exist;": "\u2203", - "expectation;": "\u2130", - "exponentiale;": "\u2147", - "fallingdotseq;": "\u2252", - "fcy;": "\u0444", - "female;": "\u2640", - "ffilig;": "\ufb03", - "fflig;": "\ufb00", - "ffllig;": "\ufb04", - "ffr;": "\U0001d523", - "filig;": "\ufb01", - "fjlig;": "fj", - "flat;": "\u266d", - "fllig;": "\ufb02", - "fltns;": "\u25b1", - "fnof;": "\u0192", - "fopf;": "\U0001d557", - "forall;": "\u2200", - "fork;": "\u22d4", - "forkv;": "\u2ad9", - "fpartint;": "\u2a0d", - "frac12": "\xbd", - "frac12;": "\xbd", - "frac13;": "\u2153", - "frac14": "\xbc", - "frac14;": "\xbc", - "frac15;": "\u2155", - "frac16;": "\u2159", - "frac18;": "\u215b", - "frac23;": "\u2154", - "frac25;": "\u2156", - "frac34": "\xbe", - "frac34;": "\xbe", - "frac35;": "\u2157", - "frac38;": "\u215c", - "frac45;": "\u2158", - "frac56;": "\u215a", - "frac58;": "\u215d", - "frac78;": "\u215e", - "frasl;": "\u2044", - "frown;": "\u2322", - "fscr;": "\U0001d4bb", - "gE;": "\u2267", - "gEl;": "\u2a8c", - "gacute;": "\u01f5", - "gamma;": "\u03b3", - "gammad;": "\u03dd", - "gap;": "\u2a86", - "gbreve;": "\u011f", - "gcirc;": "\u011d", - "gcy;": "\u0433", - "gdot;": "\u0121", - "ge;": "\u2265", - "gel;": "\u22db", - "geq;": "\u2265", - "geqq;": "\u2267", - "geqslant;": "\u2a7e", - "ges;": "\u2a7e", - "gescc;": "\u2aa9", - "gesdot;": "\u2a80", - "gesdoto;": "\u2a82", - "gesdotol;": "\u2a84", - "gesl;": "\u22db\ufe00", - "gesles;": "\u2a94", - "gfr;": "\U0001d524", - "gg;": "\u226b", - "ggg;": "\u22d9", - "gimel;": "\u2137", - "gjcy;": "\u0453", - "gl;": "\u2277", - "glE;": "\u2a92", - "gla;": "\u2aa5", - "glj;": "\u2aa4", - "gnE;": "\u2269", - "gnap;": "\u2a8a", - "gnapprox;": "\u2a8a", - "gne;": "\u2a88", - "gneq;": "\u2a88", - "gneqq;": "\u2269", - "gnsim;": "\u22e7", - "gopf;": "\U0001d558", - "grave;": "`", - "gscr;": "\u210a", - "gsim;": "\u2273", - "gsime;": "\u2a8e", - "gsiml;": "\u2a90", - "gt": ">", - "gt;": ">", - "gtcc;": "\u2aa7", - "gtcir;": "\u2a7a", - "gtdot;": "\u22d7", - "gtlPar;": "\u2995", - "gtquest;": "\u2a7c", - "gtrapprox;": "\u2a86", - "gtrarr;": "\u2978", - "gtrdot;": "\u22d7", - "gtreqless;": "\u22db", - "gtreqqless;": "\u2a8c", - "gtrless;": "\u2277", - "gtrsim;": "\u2273", - "gvertneqq;": "\u2269\ufe00", - "gvnE;": "\u2269\ufe00", - "hArr;": "\u21d4", - "hairsp;": "\u200a", - "half;": "\xbd", - "hamilt;": "\u210b", - "hardcy;": "\u044a", - "harr;": "\u2194", - "harrcir;": "\u2948", - "harrw;": "\u21ad", - "hbar;": "\u210f", - "hcirc;": "\u0125", - "hearts;": "\u2665", - "heartsuit;": "\u2665", - "hellip;": "\u2026", - "hercon;": "\u22b9", - "hfr;": "\U0001d525", - "hksearow;": "\u2925", - "hkswarow;": "\u2926", - "hoarr;": "\u21ff", - "homtht;": "\u223b", - "hookleftarrow;": "\u21a9", - "hookrightarrow;": "\u21aa", - "hopf;": "\U0001d559", - "horbar;": "\u2015", - "hscr;": "\U0001d4bd", - "hslash;": "\u210f", - "hstrok;": "\u0127", - "hybull;": "\u2043", - "hyphen;": "\u2010", - "iacute": "\xed", - "iacute;": "\xed", - "ic;": "\u2063", - "icirc": "\xee", - "icirc;": "\xee", - "icy;": "\u0438", - "iecy;": "\u0435", - "iexcl": "\xa1", - "iexcl;": "\xa1", - "iff;": "\u21d4", - "ifr;": "\U0001d526", - "igrave": "\xec", - "igrave;": "\xec", - "ii;": "\u2148", - "iiiint;": "\u2a0c", - "iiint;": "\u222d", - "iinfin;": "\u29dc", - "iiota;": "\u2129", - "ijlig;": "\u0133", - "imacr;": "\u012b", - "image;": "\u2111", - "imagline;": "\u2110", - "imagpart;": "\u2111", - "imath;": "\u0131", - "imof;": "\u22b7", - "imped;": "\u01b5", - "in;": "\u2208", - "incare;": "\u2105", - "infin;": "\u221e", - "infintie;": "\u29dd", - "inodot;": "\u0131", - "int;": "\u222b", - "intcal;": "\u22ba", - "integers;": "\u2124", - "intercal;": "\u22ba", - "intlarhk;": "\u2a17", - "intprod;": "\u2a3c", - "iocy;": "\u0451", - "iogon;": "\u012f", - "iopf;": "\U0001d55a", - "iota;": "\u03b9", - "iprod;": "\u2a3c", - "iquest": "\xbf", - "iquest;": "\xbf", - "iscr;": "\U0001d4be", - "isin;": "\u2208", - "isinE;": "\u22f9", - "isindot;": "\u22f5", - "isins;": "\u22f4", - "isinsv;": "\u22f3", - "isinv;": "\u2208", - "it;": "\u2062", - "itilde;": "\u0129", - "iukcy;": "\u0456", - "iuml": "\xef", - "iuml;": "\xef", - "jcirc;": "\u0135", - "jcy;": "\u0439", - "jfr;": "\U0001d527", - "jmath;": "\u0237", - "jopf;": "\U0001d55b", - "jscr;": "\U0001d4bf", - "jsercy;": "\u0458", - "jukcy;": "\u0454", - "kappa;": "\u03ba", - "kappav;": "\u03f0", - "kcedil;": "\u0137", - "kcy;": "\u043a", - "kfr;": "\U0001d528", - "kgreen;": "\u0138", - "khcy;": "\u0445", - "kjcy;": "\u045c", - "kopf;": "\U0001d55c", - "kscr;": "\U0001d4c0", - "lAarr;": "\u21da", - "lArr;": "\u21d0", - "lAtail;": "\u291b", - "lBarr;": "\u290e", - "lE;": "\u2266", - "lEg;": "\u2a8b", - "lHar;": "\u2962", - "lacute;": "\u013a", - "laemptyv;": "\u29b4", - "lagran;": "\u2112", - "lambda;": "\u03bb", - "lang;": "\u27e8", - "langd;": "\u2991", - "langle;": "\u27e8", - "lap;": "\u2a85", - "laquo": "\xab", - "laquo;": "\xab", - "larr;": "\u2190", - "larrb;": "\u21e4", - "larrbfs;": "\u291f", - "larrfs;": "\u291d", - "larrhk;": "\u21a9", - "larrlp;": "\u21ab", - "larrpl;": "\u2939", - "larrsim;": "\u2973", - "larrtl;": "\u21a2", - "lat;": "\u2aab", - "latail;": "\u2919", - "late;": "\u2aad", - "lates;": "\u2aad\ufe00", - "lbarr;": "\u290c", - "lbbrk;": "\u2772", - "lbrace;": "{", - "lbrack;": "[", - "lbrke;": "\u298b", - "lbrksld;": "\u298f", - "lbrkslu;": "\u298d", - "lcaron;": "\u013e", - "lcedil;": "\u013c", - "lceil;": "\u2308", - "lcub;": "{", - "lcy;": "\u043b", - "ldca;": "\u2936", - "ldquo;": "\u201c", - "ldquor;": "\u201e", - "ldrdhar;": "\u2967", - "ldrushar;": "\u294b", - "ldsh;": "\u21b2", - "le;": "\u2264", - "leftarrow;": "\u2190", - "leftarrowtail;": "\u21a2", - "leftharpoondown;": "\u21bd", - "leftharpoonup;": "\u21bc", - "leftleftarrows;": "\u21c7", - "leftrightarrow;": "\u2194", - "leftrightarrows;": "\u21c6", - "leftrightharpoons;": "\u21cb", - "leftrightsquigarrow;": "\u21ad", - "leftthreetimes;": "\u22cb", - "leg;": "\u22da", - "leq;": "\u2264", - "leqq;": "\u2266", - "leqslant;": "\u2a7d", - "les;": "\u2a7d", - "lescc;": "\u2aa8", - "lesdot;": "\u2a7f", - "lesdoto;": "\u2a81", - "lesdotor;": "\u2a83", - "lesg;": "\u22da\ufe00", - "lesges;": "\u2a93", - "lessapprox;": "\u2a85", - "lessdot;": "\u22d6", - "lesseqgtr;": "\u22da", - "lesseqqgtr;": "\u2a8b", - "lessgtr;": "\u2276", - "lesssim;": "\u2272", - "lfisht;": "\u297c", - "lfloor;": "\u230a", - "lfr;": "\U0001d529", - "lg;": "\u2276", - "lgE;": "\u2a91", - "lhard;": "\u21bd", - "lharu;": "\u21bc", - "lharul;": "\u296a", - "lhblk;": "\u2584", - "ljcy;": "\u0459", - "ll;": "\u226a", - "llarr;": "\u21c7", - "llcorner;": "\u231e", - "llhard;": "\u296b", - "lltri;": "\u25fa", - "lmidot;": "\u0140", - "lmoust;": "\u23b0", - "lmoustache;": "\u23b0", - "lnE;": "\u2268", - "lnap;": "\u2a89", - "lnapprox;": "\u2a89", - "lne;": "\u2a87", - "lneq;": "\u2a87", - "lneqq;": "\u2268", - "lnsim;": "\u22e6", - "loang;": "\u27ec", - "loarr;": "\u21fd", - "lobrk;": "\u27e6", - "longleftarrow;": "\u27f5", - "longleftrightarrow;": "\u27f7", - "longmapsto;": "\u27fc", - "longrightarrow;": "\u27f6", - "looparrowleft;": "\u21ab", - "looparrowright;": "\u21ac", - "lopar;": "\u2985", - "lopf;": "\U0001d55d", - "loplus;": "\u2a2d", - "lotimes;": "\u2a34", - "lowast;": "\u2217", - "lowbar;": "_", - "loz;": "\u25ca", - "lozenge;": "\u25ca", - "lozf;": "\u29eb", - "lpar;": "(", - "lparlt;": "\u2993", - "lrarr;": "\u21c6", - "lrcorner;": "\u231f", - "lrhar;": "\u21cb", - "lrhard;": "\u296d", - "lrm;": "\u200e", - "lrtri;": "\u22bf", - "lsaquo;": "\u2039", - "lscr;": "\U0001d4c1", - "lsh;": "\u21b0", - "lsim;": "\u2272", - "lsime;": "\u2a8d", - "lsimg;": "\u2a8f", - "lsqb;": "[", - "lsquo;": "\u2018", - "lsquor;": "\u201a", - "lstrok;": "\u0142", - "lt": "<", - "lt;": "<", - "ltcc;": "\u2aa6", - "ltcir;": "\u2a79", - "ltdot;": "\u22d6", - "lthree;": "\u22cb", - "ltimes;": "\u22c9", - "ltlarr;": "\u2976", - "ltquest;": "\u2a7b", - "ltrPar;": "\u2996", - "ltri;": "\u25c3", - "ltrie;": "\u22b4", - "ltrif;": "\u25c2", - "lurdshar;": "\u294a", - "luruhar;": "\u2966", - "lvertneqq;": "\u2268\ufe00", - "lvnE;": "\u2268\ufe00", - "mDDot;": "\u223a", - "macr": "\xaf", - "macr;": "\xaf", - "male;": "\u2642", - "malt;": "\u2720", - "maltese;": "\u2720", - "map;": "\u21a6", - "mapsto;": "\u21a6", - "mapstodown;": "\u21a7", - "mapstoleft;": "\u21a4", - "mapstoup;": "\u21a5", - "marker;": "\u25ae", - "mcomma;": "\u2a29", - "mcy;": "\u043c", - "mdash;": "\u2014", - "measuredangle;": "\u2221", - "mfr;": "\U0001d52a", - "mho;": "\u2127", - "micro": "\xb5", - "micro;": "\xb5", - "mid;": "\u2223", - "midast;": "*", - "midcir;": "\u2af0", - "middot": "\xb7", - "middot;": "\xb7", - "minus;": "\u2212", - "minusb;": "\u229f", - "minusd;": "\u2238", - "minusdu;": "\u2a2a", - "mlcp;": "\u2adb", - "mldr;": "\u2026", - "mnplus;": "\u2213", - "models;": "\u22a7", - "mopf;": "\U0001d55e", - "mp;": "\u2213", - "mscr;": "\U0001d4c2", - "mstpos;": "\u223e", - "mu;": "\u03bc", - "multimap;": "\u22b8", - "mumap;": "\u22b8", - "nGg;": "\u22d9\u0338", - "nGt;": "\u226b\u20d2", - "nGtv;": "\u226b\u0338", - "nLeftarrow;": "\u21cd", - "nLeftrightarrow;": "\u21ce", - "nLl;": "\u22d8\u0338", - "nLt;": "\u226a\u20d2", - "nLtv;": "\u226a\u0338", - "nRightarrow;": "\u21cf", - "nVDash;": "\u22af", - "nVdash;": "\u22ae", - "nabla;": "\u2207", - "nacute;": "\u0144", - "nang;": "\u2220\u20d2", - "nap;": "\u2249", - "napE;": "\u2a70\u0338", - "napid;": "\u224b\u0338", - "napos;": "\u0149", - "napprox;": "\u2249", - "natur;": "\u266e", - "natural;": "\u266e", - "naturals;": "\u2115", - "nbsp": "\xa0", - "nbsp;": "\xa0", - "nbump;": "\u224e\u0338", - "nbumpe;": "\u224f\u0338", - "ncap;": "\u2a43", - "ncaron;": "\u0148", - "ncedil;": "\u0146", - "ncong;": "\u2247", - "ncongdot;": "\u2a6d\u0338", - "ncup;": "\u2a42", - "ncy;": "\u043d", - "ndash;": "\u2013", - "ne;": "\u2260", - "neArr;": "\u21d7", - "nearhk;": "\u2924", - "nearr;": "\u2197", - "nearrow;": "\u2197", - "nedot;": "\u2250\u0338", - "nequiv;": "\u2262", - "nesear;": "\u2928", - "nesim;": "\u2242\u0338", - "nexist;": "\u2204", - "nexists;": "\u2204", - "nfr;": "\U0001d52b", - "ngE;": "\u2267\u0338", - "nge;": "\u2271", - "ngeq;": "\u2271", - "ngeqq;": "\u2267\u0338", - "ngeqslant;": "\u2a7e\u0338", - "nges;": "\u2a7e\u0338", - "ngsim;": "\u2275", - "ngt;": "\u226f", - "ngtr;": "\u226f", - "nhArr;": "\u21ce", - "nharr;": "\u21ae", - "nhpar;": "\u2af2", - "ni;": "\u220b", - "nis;": "\u22fc", - "nisd;": "\u22fa", - "niv;": "\u220b", - "njcy;": "\u045a", - "nlArr;": "\u21cd", - "nlE;": "\u2266\u0338", - "nlarr;": "\u219a", - "nldr;": "\u2025", - "nle;": "\u2270", - "nleftarrow;": "\u219a", - "nleftrightarrow;": "\u21ae", - "nleq;": "\u2270", - "nleqq;": "\u2266\u0338", - "nleqslant;": "\u2a7d\u0338", - "nles;": "\u2a7d\u0338", - "nless;": "\u226e", - "nlsim;": "\u2274", - "nlt;": "\u226e", - "nltri;": "\u22ea", - "nltrie;": "\u22ec", - "nmid;": "\u2224", - "nopf;": "\U0001d55f", - "not": "\xac", - "not;": "\xac", - "notin;": "\u2209", - "notinE;": "\u22f9\u0338", - "notindot;": "\u22f5\u0338", - "notinva;": "\u2209", - "notinvb;": "\u22f7", - "notinvc;": "\u22f6", - "notni;": "\u220c", - "notniva;": "\u220c", - "notnivb;": "\u22fe", - "notnivc;": "\u22fd", - "npar;": "\u2226", - "nparallel;": "\u2226", - "nparsl;": "\u2afd\u20e5", - "npart;": "\u2202\u0338", - "npolint;": "\u2a14", - "npr;": "\u2280", - "nprcue;": "\u22e0", - "npre;": "\u2aaf\u0338", - "nprec;": "\u2280", - "npreceq;": "\u2aaf\u0338", - "nrArr;": "\u21cf", - "nrarr;": "\u219b", - "nrarrc;": "\u2933\u0338", - "nrarrw;": "\u219d\u0338", - "nrightarrow;": "\u219b", - "nrtri;": "\u22eb", - "nrtrie;": "\u22ed", - "nsc;": "\u2281", - "nsccue;": "\u22e1", - "nsce;": "\u2ab0\u0338", - "nscr;": "\U0001d4c3", - "nshortmid;": "\u2224", - "nshortparallel;": "\u2226", - "nsim;": "\u2241", - "nsime;": "\u2244", - "nsimeq;": "\u2244", - "nsmid;": "\u2224", - "nspar;": "\u2226", - "nsqsube;": "\u22e2", - "nsqsupe;": "\u22e3", - "nsub;": "\u2284", - "nsubE;": "\u2ac5\u0338", - "nsube;": "\u2288", - "nsubset;": "\u2282\u20d2", - "nsubseteq;": "\u2288", - "nsubseteqq;": "\u2ac5\u0338", - "nsucc;": "\u2281", - "nsucceq;": "\u2ab0\u0338", - "nsup;": "\u2285", - "nsupE;": "\u2ac6\u0338", - "nsupe;": "\u2289", - "nsupset;": "\u2283\u20d2", - "nsupseteq;": "\u2289", - "nsupseteqq;": "\u2ac6\u0338", - "ntgl;": "\u2279", - "ntilde": "\xf1", - "ntilde;": "\xf1", - "ntlg;": "\u2278", - "ntriangleleft;": "\u22ea", - "ntrianglelefteq;": "\u22ec", - "ntriangleright;": "\u22eb", - "ntrianglerighteq;": "\u22ed", - "nu;": "\u03bd", - "num;": "#", - "numero;": "\u2116", - "numsp;": "\u2007", - "nvDash;": "\u22ad", - "nvHarr;": "\u2904", - "nvap;": "\u224d\u20d2", - "nvdash;": "\u22ac", - "nvge;": "\u2265\u20d2", - "nvgt;": ">\u20d2", - "nvinfin;": "\u29de", - "nvlArr;": "\u2902", - "nvle;": "\u2264\u20d2", - "nvlt;": "<\u20d2", - "nvltrie;": "\u22b4\u20d2", - "nvrArr;": "\u2903", - "nvrtrie;": "\u22b5\u20d2", - "nvsim;": "\u223c\u20d2", - "nwArr;": "\u21d6", - "nwarhk;": "\u2923", - "nwarr;": "\u2196", - "nwarrow;": "\u2196", - "nwnear;": "\u2927", - "oS;": "\u24c8", - "oacute": "\xf3", - "oacute;": "\xf3", - "oast;": "\u229b", - "ocir;": "\u229a", - "ocirc": "\xf4", - "ocirc;": "\xf4", - "ocy;": "\u043e", - "odash;": "\u229d", - "odblac;": "\u0151", - "odiv;": "\u2a38", - "odot;": "\u2299", - "odsold;": "\u29bc", - "oelig;": "\u0153", - "ofcir;": "\u29bf", - "ofr;": "\U0001d52c", - "ogon;": "\u02db", - "ograve": "\xf2", - "ograve;": "\xf2", - "ogt;": "\u29c1", - "ohbar;": "\u29b5", - "ohm;": "\u03a9", - "oint;": "\u222e", - "olarr;": "\u21ba", - "olcir;": "\u29be", - "olcross;": "\u29bb", - "oline;": "\u203e", - "olt;": "\u29c0", - "omacr;": "\u014d", - "omega;": "\u03c9", - "omicron;": "\u03bf", - "omid;": "\u29b6", - "ominus;": "\u2296", - "oopf;": "\U0001d560", - "opar;": "\u29b7", - "operp;": "\u29b9", - "oplus;": "\u2295", - "or;": "\u2228", - "orarr;": "\u21bb", - "ord;": "\u2a5d", - "order;": "\u2134", - "orderof;": "\u2134", - "ordf": "\xaa", - "ordf;": "\xaa", - "ordm": "\xba", - "ordm;": "\xba", - "origof;": "\u22b6", - "oror;": "\u2a56", - "orslope;": "\u2a57", - "orv;": "\u2a5b", - "oscr;": "\u2134", - "oslash": "\xf8", - "oslash;": "\xf8", - "osol;": "\u2298", - "otilde": "\xf5", - "otilde;": "\xf5", - "otimes;": "\u2297", - "otimesas;": "\u2a36", - "ouml": "\xf6", - "ouml;": "\xf6", - "ovbar;": "\u233d", - "par;": "\u2225", - "para": "\xb6", - "para;": "\xb6", - "parallel;": "\u2225", - "parsim;": "\u2af3", - "parsl;": "\u2afd", - "part;": "\u2202", - "pcy;": "\u043f", - "percnt;": "%", - "period;": ".", - "permil;": "\u2030", - "perp;": "\u22a5", - "pertenk;": "\u2031", - "pfr;": "\U0001d52d", - "phi;": "\u03c6", - "phiv;": "\u03d5", - "phmmat;": "\u2133", - "phone;": "\u260e", - "pi;": "\u03c0", - "pitchfork;": "\u22d4", - "piv;": "\u03d6", - "planck;": "\u210f", - "planckh;": "\u210e", - "plankv;": "\u210f", - "plus;": "+", - "plusacir;": "\u2a23", - "plusb;": "\u229e", - "pluscir;": "\u2a22", - "plusdo;": "\u2214", - "plusdu;": "\u2a25", - "pluse;": "\u2a72", - "plusmn": "\xb1", - "plusmn;": "\xb1", - "plussim;": "\u2a26", - "plustwo;": "\u2a27", - "pm;": "\xb1", - "pointint;": "\u2a15", - "popf;": "\U0001d561", - "pound": "\xa3", - "pound;": "\xa3", - "pr;": "\u227a", - "prE;": "\u2ab3", - "prap;": "\u2ab7", - "prcue;": "\u227c", - "pre;": "\u2aaf", - "prec;": "\u227a", - "precapprox;": "\u2ab7", - "preccurlyeq;": "\u227c", - "preceq;": "\u2aaf", - "precnapprox;": "\u2ab9", - "precneqq;": "\u2ab5", - "precnsim;": "\u22e8", - "precsim;": "\u227e", - "prime;": "\u2032", - "primes;": "\u2119", - "prnE;": "\u2ab5", - "prnap;": "\u2ab9", - "prnsim;": "\u22e8", - "prod;": "\u220f", - "profalar;": "\u232e", - "profline;": "\u2312", - "profsurf;": "\u2313", - "prop;": "\u221d", - "propto;": "\u221d", - "prsim;": "\u227e", - "prurel;": "\u22b0", - "pscr;": "\U0001d4c5", - "psi;": "\u03c8", - "puncsp;": "\u2008", - "qfr;": "\U0001d52e", - "qint;": "\u2a0c", - "qopf;": "\U0001d562", - "qprime;": "\u2057", - "qscr;": "\U0001d4c6", - "quaternions;": "\u210d", - "quatint;": "\u2a16", - "quest;": "?", - "questeq;": "\u225f", - "quot": "\"", - "quot;": "\"", - "rAarr;": "\u21db", - "rArr;": "\u21d2", - "rAtail;": "\u291c", - "rBarr;": "\u290f", - "rHar;": "\u2964", - "race;": "\u223d\u0331", - "racute;": "\u0155", - "radic;": "\u221a", - "raemptyv;": "\u29b3", - "rang;": "\u27e9", - "rangd;": "\u2992", - "range;": "\u29a5", - "rangle;": "\u27e9", - "raquo": "\xbb", - "raquo;": "\xbb", - "rarr;": "\u2192", - "rarrap;": "\u2975", - "rarrb;": "\u21e5", - "rarrbfs;": "\u2920", - "rarrc;": "\u2933", - "rarrfs;": "\u291e", - "rarrhk;": "\u21aa", - "rarrlp;": "\u21ac", - "rarrpl;": "\u2945", - "rarrsim;": "\u2974", - "rarrtl;": "\u21a3", - "rarrw;": "\u219d", - "ratail;": "\u291a", - "ratio;": "\u2236", - "rationals;": "\u211a", - "rbarr;": "\u290d", - "rbbrk;": "\u2773", - "rbrace;": "}", - "rbrack;": "]", - "rbrke;": "\u298c", - "rbrksld;": "\u298e", - "rbrkslu;": "\u2990", - "rcaron;": "\u0159", - "rcedil;": "\u0157", - "rceil;": "\u2309", - "rcub;": "}", - "rcy;": "\u0440", - "rdca;": "\u2937", - "rdldhar;": "\u2969", - "rdquo;": "\u201d", - "rdquor;": "\u201d", - "rdsh;": "\u21b3", - "real;": "\u211c", - "realine;": "\u211b", - "realpart;": "\u211c", - "reals;": "\u211d", - "rect;": "\u25ad", - "reg": "\xae", - "reg;": "\xae", - "rfisht;": "\u297d", - "rfloor;": "\u230b", - "rfr;": "\U0001d52f", - "rhard;": "\u21c1", - "rharu;": "\u21c0", - "rharul;": "\u296c", - "rho;": "\u03c1", - "rhov;": "\u03f1", - "rightarrow;": "\u2192", - "rightarrowtail;": "\u21a3", - "rightharpoondown;": "\u21c1", - "rightharpoonup;": "\u21c0", - "rightleftarrows;": "\u21c4", - "rightleftharpoons;": "\u21cc", - "rightrightarrows;": "\u21c9", - "rightsquigarrow;": "\u219d", - "rightthreetimes;": "\u22cc", - "ring;": "\u02da", - "risingdotseq;": "\u2253", - "rlarr;": "\u21c4", - "rlhar;": "\u21cc", - "rlm;": "\u200f", - "rmoust;": "\u23b1", - "rmoustache;": "\u23b1", - "rnmid;": "\u2aee", - "roang;": "\u27ed", - "roarr;": "\u21fe", - "robrk;": "\u27e7", - "ropar;": "\u2986", - "ropf;": "\U0001d563", - "roplus;": "\u2a2e", - "rotimes;": "\u2a35", - "rpar;": ")", - "rpargt;": "\u2994", - "rppolint;": "\u2a12", - "rrarr;": "\u21c9", - "rsaquo;": "\u203a", - "rscr;": "\U0001d4c7", - "rsh;": "\u21b1", - "rsqb;": "]", - "rsquo;": "\u2019", - "rsquor;": "\u2019", - "rthree;": "\u22cc", - "rtimes;": "\u22ca", - "rtri;": "\u25b9", - "rtrie;": "\u22b5", - "rtrif;": "\u25b8", - "rtriltri;": "\u29ce", - "ruluhar;": "\u2968", - "rx;": "\u211e", - "sacute;": "\u015b", - "sbquo;": "\u201a", - "sc;": "\u227b", - "scE;": "\u2ab4", - "scap;": "\u2ab8", - "scaron;": "\u0161", - "sccue;": "\u227d", - "sce;": "\u2ab0", - "scedil;": "\u015f", - "scirc;": "\u015d", - "scnE;": "\u2ab6", - "scnap;": "\u2aba", - "scnsim;": "\u22e9", - "scpolint;": "\u2a13", - "scsim;": "\u227f", - "scy;": "\u0441", - "sdot;": "\u22c5", - "sdotb;": "\u22a1", - "sdote;": "\u2a66", - "seArr;": "\u21d8", - "searhk;": "\u2925", - "searr;": "\u2198", - "searrow;": "\u2198", - "sect": "\xa7", - "sect;": "\xa7", - "semi;": ";", - "seswar;": "\u2929", - "setminus;": "\u2216", - "setmn;": "\u2216", - "sext;": "\u2736", - "sfr;": "\U0001d530", - "sfrown;": "\u2322", - "sharp;": "\u266f", - "shchcy;": "\u0449", - "shcy;": "\u0448", - "shortmid;": "\u2223", - "shortparallel;": "\u2225", - "shy": "\xad", - "shy;": "\xad", - "sigma;": "\u03c3", - "sigmaf;": "\u03c2", - "sigmav;": "\u03c2", - "sim;": "\u223c", - "simdot;": "\u2a6a", - "sime;": "\u2243", - "simeq;": "\u2243", - "simg;": "\u2a9e", - "simgE;": "\u2aa0", - "siml;": "\u2a9d", - "simlE;": "\u2a9f", - "simne;": "\u2246", - "simplus;": "\u2a24", - "simrarr;": "\u2972", - "slarr;": "\u2190", - "smallsetminus;": "\u2216", - "smashp;": "\u2a33", - "smeparsl;": "\u29e4", - "smid;": "\u2223", - "smile;": "\u2323", - "smt;": "\u2aaa", - "smte;": "\u2aac", - "smtes;": "\u2aac\ufe00", - "softcy;": "\u044c", - "sol;": "/", - "solb;": "\u29c4", - "solbar;": "\u233f", - "sopf;": "\U0001d564", - "spades;": "\u2660", - "spadesuit;": "\u2660", - "spar;": "\u2225", - "sqcap;": "\u2293", - "sqcaps;": "\u2293\ufe00", - "sqcup;": "\u2294", - "sqcups;": "\u2294\ufe00", - "sqsub;": "\u228f", - "sqsube;": "\u2291", - "sqsubset;": "\u228f", - "sqsubseteq;": "\u2291", - "sqsup;": "\u2290", - "sqsupe;": "\u2292", - "sqsupset;": "\u2290", - "sqsupseteq;": "\u2292", - "squ;": "\u25a1", - "square;": "\u25a1", - "squarf;": "\u25aa", - "squf;": "\u25aa", - "srarr;": "\u2192", - "sscr;": "\U0001d4c8", - "ssetmn;": "\u2216", - "ssmile;": "\u2323", - "sstarf;": "\u22c6", - "star;": "\u2606", - "starf;": "\u2605", - "straightepsilon;": "\u03f5", - "straightphi;": "\u03d5", - "strns;": "\xaf", - "sub;": "\u2282", - "subE;": "\u2ac5", - "subdot;": "\u2abd", - "sube;": "\u2286", - "subedot;": "\u2ac3", - "submult;": "\u2ac1", - "subnE;": "\u2acb", - "subne;": "\u228a", - "subplus;": "\u2abf", - "subrarr;": "\u2979", - "subset;": "\u2282", - "subseteq;": "\u2286", - "subseteqq;": "\u2ac5", - "subsetneq;": "\u228a", - "subsetneqq;": "\u2acb", - "subsim;": "\u2ac7", - "subsub;": "\u2ad5", - "subsup;": "\u2ad3", - "succ;": "\u227b", - "succapprox;": "\u2ab8", - "succcurlyeq;": "\u227d", - "succeq;": "\u2ab0", - "succnapprox;": "\u2aba", - "succneqq;": "\u2ab6", - "succnsim;": "\u22e9", - "succsim;": "\u227f", - "sum;": "\u2211", - "sung;": "\u266a", - "sup1": "\xb9", - "sup1;": "\xb9", - "sup2": "\xb2", - "sup2;": "\xb2", - "sup3": "\xb3", - "sup3;": "\xb3", - "sup;": "\u2283", - "supE;": "\u2ac6", - "supdot;": "\u2abe", - "supdsub;": "\u2ad8", - "supe;": "\u2287", - "supedot;": "\u2ac4", - "suphsol;": "\u27c9", - "suphsub;": "\u2ad7", - "suplarr;": "\u297b", - "supmult;": "\u2ac2", - "supnE;": "\u2acc", - "supne;": "\u228b", - "supplus;": "\u2ac0", - "supset;": "\u2283", - "supseteq;": "\u2287", - "supseteqq;": "\u2ac6", - "supsetneq;": "\u228b", - "supsetneqq;": "\u2acc", - "supsim;": "\u2ac8", - "supsub;": "\u2ad4", - "supsup;": "\u2ad6", - "swArr;": "\u21d9", - "swarhk;": "\u2926", - "swarr;": "\u2199", - "swarrow;": "\u2199", - "swnwar;": "\u292a", - "szlig": "\xdf", - "szlig;": "\xdf", - "target;": "\u2316", - "tau;": "\u03c4", - "tbrk;": "\u23b4", - "tcaron;": "\u0165", - "tcedil;": "\u0163", - "tcy;": "\u0442", - "tdot;": "\u20db", - "telrec;": "\u2315", - "tfr;": "\U0001d531", - "there4;": "\u2234", - "therefore;": "\u2234", - "theta;": "\u03b8", - "thetasym;": "\u03d1", - "thetav;": "\u03d1", - "thickapprox;": "\u2248", - "thicksim;": "\u223c", - "thinsp;": "\u2009", - "thkap;": "\u2248", - "thksim;": "\u223c", - "thorn": "\xfe", - "thorn;": "\xfe", - "tilde;": "\u02dc", - "times": "\xd7", - "times;": "\xd7", - "timesb;": "\u22a0", - "timesbar;": "\u2a31", - "timesd;": "\u2a30", - "tint;": "\u222d", - "toea;": "\u2928", - "top;": "\u22a4", - "topbot;": "\u2336", - "topcir;": "\u2af1", - "topf;": "\U0001d565", - "topfork;": "\u2ada", - "tosa;": "\u2929", - "tprime;": "\u2034", - "trade;": "\u2122", - "triangle;": "\u25b5", - "triangledown;": "\u25bf", - "triangleleft;": "\u25c3", - "trianglelefteq;": "\u22b4", - "triangleq;": "\u225c", - "triangleright;": "\u25b9", - "trianglerighteq;": "\u22b5", - "tridot;": "\u25ec", - "trie;": "\u225c", - "triminus;": "\u2a3a", - "triplus;": "\u2a39", - "trisb;": "\u29cd", - "tritime;": "\u2a3b", - "trpezium;": "\u23e2", - "tscr;": "\U0001d4c9", - "tscy;": "\u0446", - "tshcy;": "\u045b", - "tstrok;": "\u0167", - "twixt;": "\u226c", - "twoheadleftarrow;": "\u219e", - "twoheadrightarrow;": "\u21a0", - "uArr;": "\u21d1", - "uHar;": "\u2963", - "uacute": "\xfa", - "uacute;": "\xfa", - "uarr;": "\u2191", - "ubrcy;": "\u045e", - "ubreve;": "\u016d", - "ucirc": "\xfb", - "ucirc;": "\xfb", - "ucy;": "\u0443", - "udarr;": "\u21c5", - "udblac;": "\u0171", - "udhar;": "\u296e", - "ufisht;": "\u297e", - "ufr;": "\U0001d532", - "ugrave": "\xf9", - "ugrave;": "\xf9", - "uharl;": "\u21bf", - "uharr;": "\u21be", - "uhblk;": "\u2580", - "ulcorn;": "\u231c", - "ulcorner;": "\u231c", - "ulcrop;": "\u230f", - "ultri;": "\u25f8", - "umacr;": "\u016b", - "uml": "\xa8", - "uml;": "\xa8", - "uogon;": "\u0173", - "uopf;": "\U0001d566", - "uparrow;": "\u2191", - "updownarrow;": "\u2195", - "upharpoonleft;": "\u21bf", - "upharpoonright;": "\u21be", - "uplus;": "\u228e", - "upsi;": "\u03c5", - "upsih;": "\u03d2", - "upsilon;": "\u03c5", - "upuparrows;": "\u21c8", - "urcorn;": "\u231d", - "urcorner;": "\u231d", - "urcrop;": "\u230e", - "uring;": "\u016f", - "urtri;": "\u25f9", - "uscr;": "\U0001d4ca", - "utdot;": "\u22f0", - "utilde;": "\u0169", - "utri;": "\u25b5", - "utrif;": "\u25b4", - "uuarr;": "\u21c8", - "uuml": "\xfc", - "uuml;": "\xfc", - "uwangle;": "\u29a7", - "vArr;": "\u21d5", - "vBar;": "\u2ae8", - "vBarv;": "\u2ae9", - "vDash;": "\u22a8", - "vangrt;": "\u299c", - "varepsilon;": "\u03f5", - "varkappa;": "\u03f0", - "varnothing;": "\u2205", - "varphi;": "\u03d5", - "varpi;": "\u03d6", - "varpropto;": "\u221d", - "varr;": "\u2195", - "varrho;": "\u03f1", - "varsigma;": "\u03c2", - "varsubsetneq;": "\u228a\ufe00", - "varsubsetneqq;": "\u2acb\ufe00", - "varsupsetneq;": "\u228b\ufe00", - "varsupsetneqq;": "\u2acc\ufe00", - "vartheta;": "\u03d1", - "vartriangleleft;": "\u22b2", - "vartriangleright;": "\u22b3", - "vcy;": "\u0432", - "vdash;": "\u22a2", - "vee;": "\u2228", - "veebar;": "\u22bb", - "veeeq;": "\u225a", - "vellip;": "\u22ee", - "verbar;": "|", - "vert;": "|", - "vfr;": "\U0001d533", - "vltri;": "\u22b2", - "vnsub;": "\u2282\u20d2", - "vnsup;": "\u2283\u20d2", - "vopf;": "\U0001d567", - "vprop;": "\u221d", - "vrtri;": "\u22b3", - "vscr;": "\U0001d4cb", - "vsubnE;": "\u2acb\ufe00", - "vsubne;": "\u228a\ufe00", - "vsupnE;": "\u2acc\ufe00", - "vsupne;": "\u228b\ufe00", - "vzigzag;": "\u299a", - "wcirc;": "\u0175", - "wedbar;": "\u2a5f", - "wedge;": "\u2227", - "wedgeq;": "\u2259", - "weierp;": "\u2118", - "wfr;": "\U0001d534", - "wopf;": "\U0001d568", - "wp;": "\u2118", - "wr;": "\u2240", - "wreath;": "\u2240", - "wscr;": "\U0001d4cc", - "xcap;": "\u22c2", - "xcirc;": "\u25ef", - "xcup;": "\u22c3", - "xdtri;": "\u25bd", - "xfr;": "\U0001d535", - "xhArr;": "\u27fa", - "xharr;": "\u27f7", - "xi;": "\u03be", - "xlArr;": "\u27f8", - "xlarr;": "\u27f5", - "xmap;": "\u27fc", - "xnis;": "\u22fb", - "xodot;": "\u2a00", - "xopf;": "\U0001d569", - "xoplus;": "\u2a01", - "xotime;": "\u2a02", - "xrArr;": "\u27f9", - "xrarr;": "\u27f6", - "xscr;": "\U0001d4cd", - "xsqcup;": "\u2a06", - "xuplus;": "\u2a04", - "xutri;": "\u25b3", - "xvee;": "\u22c1", - "xwedge;": "\u22c0", - "yacute": "\xfd", - "yacute;": "\xfd", - "yacy;": "\u044f", - "ycirc;": "\u0177", - "ycy;": "\u044b", - "yen": "\xa5", - "yen;": "\xa5", - "yfr;": "\U0001d536", - "yicy;": "\u0457", - "yopf;": "\U0001d56a", - "yscr;": "\U0001d4ce", - "yucy;": "\u044e", - "yuml": "\xff", - "yuml;": "\xff", - "zacute;": "\u017a", - "zcaron;": "\u017e", - "zcy;": "\u0437", - "zdot;": "\u017c", - "zeetrf;": "\u2128", - "zeta;": "\u03b6", - "zfr;": "\U0001d537", - "zhcy;": "\u0436", - "zigrarr;": "\u21dd", - "zopf;": "\U0001d56b", - "zscr;": "\U0001d4cf", - "zwj;": "\u200d", - "zwnj;": "\u200c", -} - -replacementCharacters = { - 0x0: "\uFFFD", - 0x0d: "\u000D", - 0x80: "\u20AC", - 0x81: "\u0081", - 0x82: "\u201A", - 0x83: "\u0192", - 0x84: "\u201E", - 0x85: "\u2026", - 0x86: "\u2020", - 0x87: "\u2021", - 0x88: "\u02C6", - 0x89: "\u2030", - 0x8A: "\u0160", - 0x8B: "\u2039", - 0x8C: "\u0152", - 0x8D: "\u008D", - 0x8E: "\u017D", - 0x8F: "\u008F", - 0x90: "\u0090", - 0x91: "\u2018", - 0x92: "\u2019", - 0x93: "\u201C", - 0x94: "\u201D", - 0x95: "\u2022", - 0x96: "\u2013", - 0x97: "\u2014", - 0x98: "\u02DC", - 0x99: "\u2122", - 0x9A: "\u0161", - 0x9B: "\u203A", - 0x9C: "\u0153", - 0x9D: "\u009D", - 0x9E: "\u017E", - 0x9F: "\u0178", -} - -tokenTypes = { - "Doctype": 0, - "Characters": 1, - "SpaceCharacters": 2, - "StartTag": 3, - "EndTag": 4, - "EmptyTag": 5, - "Comment": 6, - "ParseError": 7 -} - -tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], - tokenTypes["EmptyTag"]]) - - -prefixes = {v: k for k, v in namespaces.items()} -prefixes["http://www.w3.org/1998/Math/MathML"] = "math" - - -class DataLossWarning(UserWarning): - """Raised when the current tree is unable to represent the input data""" - pass - - -class _ReparseException(Exception): - pass diff --git a/pipenv/patched/notpip/_vendor/html5lib/filters/alphabeticalattributes.py b/pipenv/patched/notpip/_vendor/html5lib/filters/alphabeticalattributes.py deleted file mode 100644 index 5ba926e3b0..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/filters/alphabeticalattributes.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - -from collections import OrderedDict - - -def _attr_key(attr): - """Return an appropriate key for an attribute for sorting - - Attributes have a namespace that can be either ``None`` or a string. We - can't compare the two because they're different types, so we convert - ``None`` to an empty string first. - - """ - return (attr[0][0] or ''), attr[0][1] - - -class Filter(base.Filter): - """Alphabetizes attributes for elements""" - def __iter__(self): - for token in base.Filter.__iter__(self): - if token["type"] in ("StartTag", "EmptyTag"): - attrs = OrderedDict() - for name, value in sorted(token["data"].items(), - key=_attr_key): - attrs[name] = value - token["data"] = attrs - yield token diff --git a/pipenv/patched/notpip/_vendor/html5lib/filters/base.py b/pipenv/patched/notpip/_vendor/html5lib/filters/base.py deleted file mode 100644 index c7dbaed0fa..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/filters/base.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - - -class Filter(object): - def __init__(self, source): - self.source = source - - def __iter__(self): - return iter(self.source) - - def __getattr__(self, name): - return getattr(self.source, name) diff --git a/pipenv/patched/notpip/_vendor/html5lib/filters/inject_meta_charset.py b/pipenv/patched/notpip/_vendor/html5lib/filters/inject_meta_charset.py deleted file mode 100644 index aefb5c842c..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/filters/inject_meta_charset.py +++ /dev/null @@ -1,73 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - - -class Filter(base.Filter): - """Injects ```` tag into head of document""" - def __init__(self, source, encoding): - """Creates a Filter - - :arg source: the source token stream - - :arg encoding: the encoding to set - - """ - base.Filter.__init__(self, source) - self.encoding = encoding - - def __iter__(self): - state = "pre_head" - meta_found = (self.encoding is None) - pending = [] - - for token in base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag": - if token["name"].lower() == "head": - state = "in_head" - - elif type == "EmptyTag": - if token["name"].lower() == "meta": - # replace charset with actual encoding - has_http_equiv_content_type = False - for (namespace, name), value in token["data"].items(): - if namespace is not None: - continue - elif name.lower() == 'charset': - token["data"][(namespace, name)] = self.encoding - meta_found = True - break - elif name == 'http-equiv' and value.lower() == 'content-type': - has_http_equiv_content_type = True - else: - if has_http_equiv_content_type and (None, "content") in token["data"]: - token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding - meta_found = True - - elif token["name"].lower() == "head" and not meta_found: - # insert meta into empty head - yield {"type": "StartTag", "name": "head", - "data": token["data"]} - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - yield {"type": "EndTag", "name": "head"} - meta_found = True - continue - - elif type == "EndTag": - if token["name"].lower() == "head" and pending: - # insert meta into head (if necessary) and flush pending queue - yield pending.pop(0) - if not meta_found: - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - while pending: - yield pending.pop(0) - meta_found = True - state = "post_head" - - if state == "in_head": - pending.append(token) - else: - yield token diff --git a/pipenv/patched/notpip/_vendor/html5lib/filters/lint.py b/pipenv/patched/notpip/_vendor/html5lib/filters/lint.py deleted file mode 100644 index 51144bf28c..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/filters/lint.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pipenv.patched.notpip._vendor.six import text_type - -from . import base -from ..constants import namespaces, voidElements - -from ..constants import spaceCharacters -spaceCharacters = "".join(spaceCharacters) - - -class Filter(base.Filter): - """Lints the token stream for errors - - If it finds any errors, it'll raise an ``AssertionError``. - - """ - def __init__(self, source, require_matching_tags=True): - """Creates a Filter - - :arg source: the source token stream - - :arg require_matching_tags: whether or not to require matching tags - - """ - super(Filter, self).__init__(source) - self.require_matching_tags = require_matching_tags - - def __iter__(self): - open_elements = [] - for token in base.Filter.__iter__(self): - type = token["type"] - if type in ("StartTag", "EmptyTag"): - namespace = token["namespace"] - name = token["name"] - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - assert isinstance(token["data"], dict) - if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert type == "EmptyTag" - else: - assert type == "StartTag" - if type == "StartTag" and self.require_matching_tags: - open_elements.append((namespace, name)) - for (namespace, name), value in token["data"].items(): - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - assert isinstance(value, text_type) - - elif type == "EndTag": - namespace = token["namespace"] - name = token["name"] - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} - elif self.require_matching_tags: - start = open_elements.pop() - assert start == (namespace, name) - - elif type == "Comment": - data = token["data"] - assert isinstance(data, text_type) - - elif type in ("Characters", "SpaceCharacters"): - data = token["data"] - assert isinstance(data, text_type) - assert data != "" - if type == "SpaceCharacters": - assert data.strip(spaceCharacters) == "" - - elif type == "Doctype": - name = token["name"] - assert name is None or isinstance(name, text_type) - assert token["publicId"] is None or isinstance(name, text_type) - assert token["systemId"] is None or isinstance(name, text_type) - - elif type == "Entity": - assert isinstance(token["name"], text_type) - - elif type == "SerializerError": - assert isinstance(token["data"], text_type) - - else: - assert False, "Unknown token type: %(type)s" % {"type": type} - - yield token diff --git a/pipenv/patched/notpip/_vendor/html5lib/filters/optionaltags.py b/pipenv/patched/notpip/_vendor/html5lib/filters/optionaltags.py deleted file mode 100644 index 4a865012c1..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/filters/optionaltags.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - - -class Filter(base.Filter): - """Removes optional tags from the token stream""" - def slider(self): - previous1 = previous2 = None - for token in self.source: - if previous1 is not None: - yield previous2, previous1, token - previous2 = previous1 - previous1 = token - if previous1 is not None: - yield previous2, previous1, None - - def __iter__(self): - for previous, token, next in self.slider(): - type = token["type"] - if type == "StartTag": - if (token["data"] or - not self.is_optional_start(token["name"], previous, next)): - yield token - elif type == "EndTag": - if not self.is_optional_end(token["name"], next): - yield token - else: - yield token - - def is_optional_start(self, tagname, previous, next): - type = next and next["type"] or None - if tagname in 'html': - # An html element's start tag may be omitted if the first thing - # inside the html element is not a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname == 'head': - # A head element's start tag may be omitted if the first thing - # inside the head element is an element. - # XXX: we also omit the start tag if the head element is empty - if type in ("StartTag", "EmptyTag"): - return True - elif type == "EndTag": - return next["name"] == "head" - elif tagname == 'body': - # A body element's start tag may be omitted if the first thing - # inside the body element is not a space character or a comment, - # except if the first thing inside the body element is a script - # or style element and the node immediately preceding the body - # element is a head element whose end tag has been omitted. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we do not look at the preceding event, so we never omit - # the body element's start tag if it's followed by a script or - # a style element. - return next["name"] not in ('script', 'style') - else: - return True - elif tagname == 'colgroup': - # A colgroup element's start tag may be omitted if the first thing - # inside the colgroup element is a col element, and if the element - # is not immediately preceded by another colgroup element whose - # end tag has been omitted. - if type in ("StartTag", "EmptyTag"): - # XXX: we do not look at the preceding event, so instead we never - # omit the colgroup element's end tag when it is immediately - # followed by another colgroup element. See is_optional_end. - return next["name"] == "col" - else: - return False - elif tagname == 'tbody': - # A tbody element's start tag may be omitted if the first thing - # inside the tbody element is a tr element, and if the element is - # not immediately preceded by a tbody, thead, or tfoot element - # whose end tag has been omitted. - if type == "StartTag": - # omit the thead and tfoot elements' end tag when they are - # immediately followed by a tbody element. See is_optional_end. - if previous and previous['type'] == 'EndTag' and \ - previous['name'] in ('tbody', 'thead', 'tfoot'): - return False - return next["name"] == 'tr' - else: - return False - return False - - def is_optional_end(self, tagname, next): - type = next and next["type"] or None - if tagname in ('html', 'head', 'body'): - # An html element's end tag may be omitted if the html element - # is not immediately followed by a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname in ('li', 'optgroup', 'tr'): - # A li element's end tag may be omitted if the li element is - # immediately followed by another li element or if there is - # no more content in the parent element. - # An optgroup element's end tag may be omitted if the optgroup - # element is immediately followed by another optgroup element, - # or if there is no more content in the parent element. - # A tr element's end tag may be omitted if the tr element is - # immediately followed by another tr element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] == tagname - else: - return type == "EndTag" or type is None - elif tagname in ('dt', 'dd'): - # A dt element's end tag may be omitted if the dt element is - # immediately followed by another dt element or a dd element. - # A dd element's end tag may be omitted if the dd element is - # immediately followed by another dd element or a dt element, - # or if there is no more content in the parent element. - if type == "StartTag": - return next["name"] in ('dt', 'dd') - elif tagname == 'dd': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'p': - # A p element's end tag may be omitted if the p element is - # immediately followed by an address, article, aside, - # blockquote, datagrid, dialog, dir, div, dl, fieldset, - # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, - # nav, ol, p, pre, section, table, or ul, element, or if - # there is no more content in the parent element. - if type in ("StartTag", "EmptyTag"): - return next["name"] in ('address', 'article', 'aside', - 'blockquote', 'datagrid', 'dialog', - 'dir', 'div', 'dl', 'fieldset', 'footer', - 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', - 'header', 'hr', 'menu', 'nav', 'ol', - 'p', 'pre', 'section', 'table', 'ul') - else: - return type == "EndTag" or type is None - elif tagname == 'option': - # An option element's end tag may be omitted if the option - # element is immediately followed by another option element, - # or if it is immediately followed by an optgroup - # element, or if there is no more content in the parent - # element. - if type == "StartTag": - return next["name"] in ('option', 'optgroup') - else: - return type == "EndTag" or type is None - elif tagname in ('rt', 'rp'): - # An rt element's end tag may be omitted if the rt element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - # An rp element's end tag may be omitted if the rp element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('rt', 'rp') - else: - return type == "EndTag" or type is None - elif tagname == 'colgroup': - # A colgroup element's end tag may be omitted if the colgroup - # element is not immediately followed by a space character or - # a comment. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we also look for an immediately following colgroup - # element. See is_optional_start. - return next["name"] != 'colgroup' - else: - return True - elif tagname in ('thead', 'tbody'): - # A thead element's end tag may be omitted if the thead element - # is immediately followed by a tbody or tfoot element. - # A tbody element's end tag may be omitted if the tbody element - # is immediately followed by a tbody or tfoot element, or if - # there is no more content in the parent element. - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] in ['tbody', 'tfoot'] - elif tagname == 'tbody': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'tfoot': - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] == 'tbody' - else: - return type == "EndTag" or type is None - elif tagname in ('td', 'th'): - # A td element's end tag may be omitted if the td element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - # A th element's end tag may be omitted if the th element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('td', 'th') - else: - return type == "EndTag" or type is None - return False diff --git a/pipenv/patched/notpip/_vendor/html5lib/filters/sanitizer.py b/pipenv/patched/notpip/_vendor/html5lib/filters/sanitizer.py deleted file mode 100644 index 788e73d25c..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/filters/sanitizer.py +++ /dev/null @@ -1,916 +0,0 @@ -"""Deprecated from html5lib 1.1. - -See `here `_ for -information about its deprecation; `Bleach `_ -is recommended as a replacement. Please let us know in the aforementioned issue -if Bleach is unsuitable for your needs. - -""" -from __future__ import absolute_import, division, unicode_literals - -import re -import warnings -from xml.sax.saxutils import escape, unescape - -from pipenv.patched.notpip._vendor.six.moves import urllib_parse as urlparse - -from . import base -from ..constants import namespaces, prefixes - -__all__ = ["Filter"] - - -_deprecation_msg = ( - "html5lib's sanitizer is deprecated; see " + - "https://github.com/html5lib/html5lib-python/issues/443 and please let " + - "us know if Bleach is unsuitable for your needs" -) - -warnings.warn(_deprecation_msg, DeprecationWarning) - -allowed_elements = frozenset(( - (namespaces['html'], 'a'), - (namespaces['html'], 'abbr'), - (namespaces['html'], 'acronym'), - (namespaces['html'], 'address'), - (namespaces['html'], 'area'), - (namespaces['html'], 'article'), - (namespaces['html'], 'aside'), - (namespaces['html'], 'audio'), - (namespaces['html'], 'b'), - (namespaces['html'], 'big'), - (namespaces['html'], 'blockquote'), - (namespaces['html'], 'br'), - (namespaces['html'], 'button'), - (namespaces['html'], 'canvas'), - (namespaces['html'], 'caption'), - (namespaces['html'], 'center'), - (namespaces['html'], 'cite'), - (namespaces['html'], 'code'), - (namespaces['html'], 'col'), - (namespaces['html'], 'colgroup'), - (namespaces['html'], 'command'), - (namespaces['html'], 'datagrid'), - (namespaces['html'], 'datalist'), - (namespaces['html'], 'dd'), - (namespaces['html'], 'del'), - (namespaces['html'], 'details'), - (namespaces['html'], 'dfn'), - (namespaces['html'], 'dialog'), - (namespaces['html'], 'dir'), - (namespaces['html'], 'div'), - (namespaces['html'], 'dl'), - (namespaces['html'], 'dt'), - (namespaces['html'], 'em'), - (namespaces['html'], 'event-source'), - (namespaces['html'], 'fieldset'), - (namespaces['html'], 'figcaption'), - (namespaces['html'], 'figure'), - (namespaces['html'], 'footer'), - (namespaces['html'], 'font'), - (namespaces['html'], 'form'), - (namespaces['html'], 'header'), - (namespaces['html'], 'h1'), - (namespaces['html'], 'h2'), - (namespaces['html'], 'h3'), - (namespaces['html'], 'h4'), - (namespaces['html'], 'h5'), - (namespaces['html'], 'h6'), - (namespaces['html'], 'hr'), - (namespaces['html'], 'i'), - (namespaces['html'], 'img'), - (namespaces['html'], 'input'), - (namespaces['html'], 'ins'), - (namespaces['html'], 'keygen'), - (namespaces['html'], 'kbd'), - (namespaces['html'], 'label'), - (namespaces['html'], 'legend'), - (namespaces['html'], 'li'), - (namespaces['html'], 'm'), - (namespaces['html'], 'map'), - (namespaces['html'], 'menu'), - (namespaces['html'], 'meter'), - (namespaces['html'], 'multicol'), - (namespaces['html'], 'nav'), - (namespaces['html'], 'nextid'), - (namespaces['html'], 'ol'), - (namespaces['html'], 'output'), - (namespaces['html'], 'optgroup'), - (namespaces['html'], 'option'), - (namespaces['html'], 'p'), - (namespaces['html'], 'pre'), - (namespaces['html'], 'progress'), - (namespaces['html'], 'q'), - (namespaces['html'], 's'), - (namespaces['html'], 'samp'), - (namespaces['html'], 'section'), - (namespaces['html'], 'select'), - (namespaces['html'], 'small'), - (namespaces['html'], 'sound'), - (namespaces['html'], 'source'), - (namespaces['html'], 'spacer'), - (namespaces['html'], 'span'), - (namespaces['html'], 'strike'), - (namespaces['html'], 'strong'), - (namespaces['html'], 'sub'), - (namespaces['html'], 'sup'), - (namespaces['html'], 'table'), - (namespaces['html'], 'tbody'), - (namespaces['html'], 'td'), - (namespaces['html'], 'textarea'), - (namespaces['html'], 'time'), - (namespaces['html'], 'tfoot'), - (namespaces['html'], 'th'), - (namespaces['html'], 'thead'), - (namespaces['html'], 'tr'), - (namespaces['html'], 'tt'), - (namespaces['html'], 'u'), - (namespaces['html'], 'ul'), - (namespaces['html'], 'var'), - (namespaces['html'], 'video'), - (namespaces['mathml'], 'maction'), - (namespaces['mathml'], 'math'), - (namespaces['mathml'], 'merror'), - (namespaces['mathml'], 'mfrac'), - (namespaces['mathml'], 'mi'), - (namespaces['mathml'], 'mmultiscripts'), - (namespaces['mathml'], 'mn'), - (namespaces['mathml'], 'mo'), - (namespaces['mathml'], 'mover'), - (namespaces['mathml'], 'mpadded'), - (namespaces['mathml'], 'mphantom'), - (namespaces['mathml'], 'mprescripts'), - (namespaces['mathml'], 'mroot'), - (namespaces['mathml'], 'mrow'), - (namespaces['mathml'], 'mspace'), - (namespaces['mathml'], 'msqrt'), - (namespaces['mathml'], 'mstyle'), - (namespaces['mathml'], 'msub'), - (namespaces['mathml'], 'msubsup'), - (namespaces['mathml'], 'msup'), - (namespaces['mathml'], 'mtable'), - (namespaces['mathml'], 'mtd'), - (namespaces['mathml'], 'mtext'), - (namespaces['mathml'], 'mtr'), - (namespaces['mathml'], 'munder'), - (namespaces['mathml'], 'munderover'), - (namespaces['mathml'], 'none'), - (namespaces['svg'], 'a'), - (namespaces['svg'], 'animate'), - (namespaces['svg'], 'animateColor'), - (namespaces['svg'], 'animateMotion'), - (namespaces['svg'], 'animateTransform'), - (namespaces['svg'], 'clipPath'), - (namespaces['svg'], 'circle'), - (namespaces['svg'], 'defs'), - (namespaces['svg'], 'desc'), - (namespaces['svg'], 'ellipse'), - (namespaces['svg'], 'font-face'), - (namespaces['svg'], 'font-face-name'), - (namespaces['svg'], 'font-face-src'), - (namespaces['svg'], 'g'), - (namespaces['svg'], 'glyph'), - (namespaces['svg'], 'hkern'), - (namespaces['svg'], 'linearGradient'), - (namespaces['svg'], 'line'), - (namespaces['svg'], 'marker'), - (namespaces['svg'], 'metadata'), - (namespaces['svg'], 'missing-glyph'), - (namespaces['svg'], 'mpath'), - (namespaces['svg'], 'path'), - (namespaces['svg'], 'polygon'), - (namespaces['svg'], 'polyline'), - (namespaces['svg'], 'radialGradient'), - (namespaces['svg'], 'rect'), - (namespaces['svg'], 'set'), - (namespaces['svg'], 'stop'), - (namespaces['svg'], 'svg'), - (namespaces['svg'], 'switch'), - (namespaces['svg'], 'text'), - (namespaces['svg'], 'title'), - (namespaces['svg'], 'tspan'), - (namespaces['svg'], 'use'), -)) - -allowed_attributes = frozenset(( - # HTML attributes - (None, 'abbr'), - (None, 'accept'), - (None, 'accept-charset'), - (None, 'accesskey'), - (None, 'action'), - (None, 'align'), - (None, 'alt'), - (None, 'autocomplete'), - (None, 'autofocus'), - (None, 'axis'), - (None, 'background'), - (None, 'balance'), - (None, 'bgcolor'), - (None, 'bgproperties'), - (None, 'border'), - (None, 'bordercolor'), - (None, 'bordercolordark'), - (None, 'bordercolorlight'), - (None, 'bottompadding'), - (None, 'cellpadding'), - (None, 'cellspacing'), - (None, 'ch'), - (None, 'challenge'), - (None, 'char'), - (None, 'charoff'), - (None, 'choff'), - (None, 'charset'), - (None, 'checked'), - (None, 'cite'), - (None, 'class'), - (None, 'clear'), - (None, 'color'), - (None, 'cols'), - (None, 'colspan'), - (None, 'compact'), - (None, 'contenteditable'), - (None, 'controls'), - (None, 'coords'), - (None, 'data'), - (None, 'datafld'), - (None, 'datapagesize'), - (None, 'datasrc'), - (None, 'datetime'), - (None, 'default'), - (None, 'delay'), - (None, 'dir'), - (None, 'disabled'), - (None, 'draggable'), - (None, 'dynsrc'), - (None, 'enctype'), - (None, 'end'), - (None, 'face'), - (None, 'for'), - (None, 'form'), - (None, 'frame'), - (None, 'galleryimg'), - (None, 'gutter'), - (None, 'headers'), - (None, 'height'), - (None, 'hidefocus'), - (None, 'hidden'), - (None, 'high'), - (None, 'href'), - (None, 'hreflang'), - (None, 'hspace'), - (None, 'icon'), - (None, 'id'), - (None, 'inputmode'), - (None, 'ismap'), - (None, 'keytype'), - (None, 'label'), - (None, 'leftspacing'), - (None, 'lang'), - (None, 'list'), - (None, 'longdesc'), - (None, 'loop'), - (None, 'loopcount'), - (None, 'loopend'), - (None, 'loopstart'), - (None, 'low'), - (None, 'lowsrc'), - (None, 'max'), - (None, 'maxlength'), - (None, 'media'), - (None, 'method'), - (None, 'min'), - (None, 'multiple'), - (None, 'name'), - (None, 'nohref'), - (None, 'noshade'), - (None, 'nowrap'), - (None, 'open'), - (None, 'optimum'), - (None, 'pattern'), - (None, 'ping'), - (None, 'point-size'), - (None, 'poster'), - (None, 'pqg'), - (None, 'preload'), - (None, 'prompt'), - (None, 'radiogroup'), - (None, 'readonly'), - (None, 'rel'), - (None, 'repeat-max'), - (None, 'repeat-min'), - (None, 'replace'), - (None, 'required'), - (None, 'rev'), - (None, 'rightspacing'), - (None, 'rows'), - (None, 'rowspan'), - (None, 'rules'), - (None, 'scope'), - (None, 'selected'), - (None, 'shape'), - (None, 'size'), - (None, 'span'), - (None, 'src'), - (None, 'start'), - (None, 'step'), - (None, 'style'), - (None, 'summary'), - (None, 'suppress'), - (None, 'tabindex'), - (None, 'target'), - (None, 'template'), - (None, 'title'), - (None, 'toppadding'), - (None, 'type'), - (None, 'unselectable'), - (None, 'usemap'), - (None, 'urn'), - (None, 'valign'), - (None, 'value'), - (None, 'variable'), - (None, 'volume'), - (None, 'vspace'), - (None, 'vrml'), - (None, 'width'), - (None, 'wrap'), - (namespaces['xml'], 'lang'), - # MathML attributes - (None, 'actiontype'), - (None, 'align'), - (None, 'columnalign'), - (None, 'columnalign'), - (None, 'columnalign'), - (None, 'columnlines'), - (None, 'columnspacing'), - (None, 'columnspan'), - (None, 'depth'), - (None, 'display'), - (None, 'displaystyle'), - (None, 'equalcolumns'), - (None, 'equalrows'), - (None, 'fence'), - (None, 'fontstyle'), - (None, 'fontweight'), - (None, 'frame'), - (None, 'height'), - (None, 'linethickness'), - (None, 'lspace'), - (None, 'mathbackground'), - (None, 'mathcolor'), - (None, 'mathvariant'), - (None, 'mathvariant'), - (None, 'maxsize'), - (None, 'minsize'), - (None, 'other'), - (None, 'rowalign'), - (None, 'rowalign'), - (None, 'rowalign'), - (None, 'rowlines'), - (None, 'rowspacing'), - (None, 'rowspan'), - (None, 'rspace'), - (None, 'scriptlevel'), - (None, 'selection'), - (None, 'separator'), - (None, 'stretchy'), - (None, 'width'), - (None, 'width'), - (namespaces['xlink'], 'href'), - (namespaces['xlink'], 'show'), - (namespaces['xlink'], 'type'), - # SVG attributes - (None, 'accent-height'), - (None, 'accumulate'), - (None, 'additive'), - (None, 'alphabetic'), - (None, 'arabic-form'), - (None, 'ascent'), - (None, 'attributeName'), - (None, 'attributeType'), - (None, 'baseProfile'), - (None, 'bbox'), - (None, 'begin'), - (None, 'by'), - (None, 'calcMode'), - (None, 'cap-height'), - (None, 'class'), - (None, 'clip-path'), - (None, 'color'), - (None, 'color-rendering'), - (None, 'content'), - (None, 'cx'), - (None, 'cy'), - (None, 'd'), - (None, 'dx'), - (None, 'dy'), - (None, 'descent'), - (None, 'display'), - (None, 'dur'), - (None, 'end'), - (None, 'fill'), - (None, 'fill-opacity'), - (None, 'fill-rule'), - (None, 'font-family'), - (None, 'font-size'), - (None, 'font-stretch'), - (None, 'font-style'), - (None, 'font-variant'), - (None, 'font-weight'), - (None, 'from'), - (None, 'fx'), - (None, 'fy'), - (None, 'g1'), - (None, 'g2'), - (None, 'glyph-name'), - (None, 'gradientUnits'), - (None, 'hanging'), - (None, 'height'), - (None, 'horiz-adv-x'), - (None, 'horiz-origin-x'), - (None, 'id'), - (None, 'ideographic'), - (None, 'k'), - (None, 'keyPoints'), - (None, 'keySplines'), - (None, 'keyTimes'), - (None, 'lang'), - (None, 'marker-end'), - (None, 'marker-mid'), - (None, 'marker-start'), - (None, 'markerHeight'), - (None, 'markerUnits'), - (None, 'markerWidth'), - (None, 'mathematical'), - (None, 'max'), - (None, 'min'), - (None, 'name'), - (None, 'offset'), - (None, 'opacity'), - (None, 'orient'), - (None, 'origin'), - (None, 'overline-position'), - (None, 'overline-thickness'), - (None, 'panose-1'), - (None, 'path'), - (None, 'pathLength'), - (None, 'points'), - (None, 'preserveAspectRatio'), - (None, 'r'), - (None, 'refX'), - (None, 'refY'), - (None, 'repeatCount'), - (None, 'repeatDur'), - (None, 'requiredExtensions'), - (None, 'requiredFeatures'), - (None, 'restart'), - (None, 'rotate'), - (None, 'rx'), - (None, 'ry'), - (None, 'slope'), - (None, 'stemh'), - (None, 'stemv'), - (None, 'stop-color'), - (None, 'stop-opacity'), - (None, 'strikethrough-position'), - (None, 'strikethrough-thickness'), - (None, 'stroke'), - (None, 'stroke-dasharray'), - (None, 'stroke-dashoffset'), - (None, 'stroke-linecap'), - (None, 'stroke-linejoin'), - (None, 'stroke-miterlimit'), - (None, 'stroke-opacity'), - (None, 'stroke-width'), - (None, 'systemLanguage'), - (None, 'target'), - (None, 'text-anchor'), - (None, 'to'), - (None, 'transform'), - (None, 'type'), - (None, 'u1'), - (None, 'u2'), - (None, 'underline-position'), - (None, 'underline-thickness'), - (None, 'unicode'), - (None, 'unicode-range'), - (None, 'units-per-em'), - (None, 'values'), - (None, 'version'), - (None, 'viewBox'), - (None, 'visibility'), - (None, 'width'), - (None, 'widths'), - (None, 'x'), - (None, 'x-height'), - (None, 'x1'), - (None, 'x2'), - (namespaces['xlink'], 'actuate'), - (namespaces['xlink'], 'arcrole'), - (namespaces['xlink'], 'href'), - (namespaces['xlink'], 'role'), - (namespaces['xlink'], 'show'), - (namespaces['xlink'], 'title'), - (namespaces['xlink'], 'type'), - (namespaces['xml'], 'base'), - (namespaces['xml'], 'lang'), - (namespaces['xml'], 'space'), - (None, 'y'), - (None, 'y1'), - (None, 'y2'), - (None, 'zoomAndPan'), -)) - -attr_val_is_uri = frozenset(( - (None, 'href'), - (None, 'src'), - (None, 'cite'), - (None, 'action'), - (None, 'longdesc'), - (None, 'poster'), - (None, 'background'), - (None, 'datasrc'), - (None, 'dynsrc'), - (None, 'lowsrc'), - (None, 'ping'), - (namespaces['xlink'], 'href'), - (namespaces['xml'], 'base'), -)) - -svg_attr_val_allows_ref = frozenset(( - (None, 'clip-path'), - (None, 'color-profile'), - (None, 'cursor'), - (None, 'fill'), - (None, 'filter'), - (None, 'marker'), - (None, 'marker-start'), - (None, 'marker-mid'), - (None, 'marker-end'), - (None, 'mask'), - (None, 'stroke'), -)) - -svg_allow_local_href = frozenset(( - (None, 'altGlyph'), - (None, 'animate'), - (None, 'animateColor'), - (None, 'animateMotion'), - (None, 'animateTransform'), - (None, 'cursor'), - (None, 'feImage'), - (None, 'filter'), - (None, 'linearGradient'), - (None, 'pattern'), - (None, 'radialGradient'), - (None, 'textpath'), - (None, 'tref'), - (None, 'set'), - (None, 'use') -)) - -allowed_css_properties = frozenset(( - 'azimuth', - 'background-color', - 'border-bottom-color', - 'border-collapse', - 'border-color', - 'border-left-color', - 'border-right-color', - 'border-top-color', - 'clear', - 'color', - 'cursor', - 'direction', - 'display', - 'elevation', - 'float', - 'font', - 'font-family', - 'font-size', - 'font-style', - 'font-variant', - 'font-weight', - 'height', - 'letter-spacing', - 'line-height', - 'overflow', - 'pause', - 'pause-after', - 'pause-before', - 'pitch', - 'pitch-range', - 'richness', - 'speak', - 'speak-header', - 'speak-numeral', - 'speak-punctuation', - 'speech-rate', - 'stress', - 'text-align', - 'text-decoration', - 'text-indent', - 'unicode-bidi', - 'vertical-align', - 'voice-family', - 'volume', - 'white-space', - 'width', -)) - -allowed_css_keywords = frozenset(( - 'auto', - 'aqua', - 'black', - 'block', - 'blue', - 'bold', - 'both', - 'bottom', - 'brown', - 'center', - 'collapse', - 'dashed', - 'dotted', - 'fuchsia', - 'gray', - 'green', - '!important', - 'italic', - 'left', - 'lime', - 'maroon', - 'medium', - 'none', - 'navy', - 'normal', - 'nowrap', - 'olive', - 'pointer', - 'purple', - 'red', - 'right', - 'solid', - 'silver', - 'teal', - 'top', - 'transparent', - 'underline', - 'white', - 'yellow', -)) - -allowed_svg_properties = frozenset(( - 'fill', - 'fill-opacity', - 'fill-rule', - 'stroke', - 'stroke-width', - 'stroke-linecap', - 'stroke-linejoin', - 'stroke-opacity', -)) - -allowed_protocols = frozenset(( - 'ed2k', - 'ftp', - 'http', - 'https', - 'irc', - 'mailto', - 'news', - 'gopher', - 'nntp', - 'telnet', - 'webcal', - 'xmpp', - 'callto', - 'feed', - 'urn', - 'aim', - 'rsync', - 'tag', - 'ssh', - 'sftp', - 'rtsp', - 'afs', - 'data', -)) - -allowed_content_types = frozenset(( - 'image/png', - 'image/jpeg', - 'image/gif', - 'image/webp', - 'image/bmp', - 'text/plain', -)) - - -data_content_type = re.compile(r''' - ^ - # Match a content type / - (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) - # Match any character set and encoding - (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) - |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) - # Assume the rest is data - ,.* - $ - ''', - re.VERBOSE) - - -class Filter(base.Filter): - """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" - def __init__(self, - source, - allowed_elements=allowed_elements, - allowed_attributes=allowed_attributes, - allowed_css_properties=allowed_css_properties, - allowed_css_keywords=allowed_css_keywords, - allowed_svg_properties=allowed_svg_properties, - allowed_protocols=allowed_protocols, - allowed_content_types=allowed_content_types, - attr_val_is_uri=attr_val_is_uri, - svg_attr_val_allows_ref=svg_attr_val_allows_ref, - svg_allow_local_href=svg_allow_local_href): - """Creates a Filter - - :arg allowed_elements: set of elements to allow--everything else will - be escaped - - :arg allowed_attributes: set of attributes to allow in - elements--everything else will be stripped - - :arg allowed_css_properties: set of CSS properties to allow--everything - else will be stripped - - :arg allowed_css_keywords: set of CSS keywords to allow--everything - else will be stripped - - :arg allowed_svg_properties: set of SVG properties to allow--everything - else will be removed - - :arg allowed_protocols: set of allowed protocols for URIs - - :arg allowed_content_types: set of allowed content types for ``data`` URIs. - - :arg attr_val_is_uri: set of attributes that have URI values--values - that have a scheme not listed in ``allowed_protocols`` are removed - - :arg svg_attr_val_allows_ref: set of SVG attributes that can have - references - - :arg svg_allow_local_href: set of SVG elements that can have local - hrefs--these are removed - - """ - super(Filter, self).__init__(source) - - warnings.warn(_deprecation_msg, DeprecationWarning) - - self.allowed_elements = allowed_elements - self.allowed_attributes = allowed_attributes - self.allowed_css_properties = allowed_css_properties - self.allowed_css_keywords = allowed_css_keywords - self.allowed_svg_properties = allowed_svg_properties - self.allowed_protocols = allowed_protocols - self.allowed_content_types = allowed_content_types - self.attr_val_is_uri = attr_val_is_uri - self.svg_attr_val_allows_ref = svg_attr_val_allows_ref - self.svg_allow_local_href = svg_allow_local_href - - def __iter__(self): - for token in base.Filter.__iter__(self): - token = self.sanitize_token(token) - if token: - yield token - - # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and - # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes - # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and - # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI - # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are - # allowed. - # - # sanitize_html('') - # => <script> do_nasty_stuff() </script> - # sanitize_html('Click here for $100') - # => Click here for $100 - def sanitize_token(self, token): - - # accommodate filters which use token_type differently - token_type = token["type"] - if token_type in ("StartTag", "EndTag", "EmptyTag"): - name = token["name"] - namespace = token["namespace"] - if ((namespace, name) in self.allowed_elements or - (namespace is None and - (namespaces["html"], name) in self.allowed_elements)): - return self.allowed_token(token) - else: - return self.disallowed_token(token) - elif token_type == "Comment": - pass - else: - return token - - def allowed_token(self, token): - if "data" in token: - attrs = token["data"] - attr_names = set(attrs.keys()) - - # Remove forbidden attributes - for to_remove in (attr_names - self.allowed_attributes): - del token["data"][to_remove] - attr_names.remove(to_remove) - - # Remove attributes with disallowed URL values - for attr in (attr_names & self.attr_val_is_uri): - assert attr in attrs - # I don't have a clue where this regexp comes from or why it matches those - # characters, nor why we call unescape. I just know it's always been here. - # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all - # this will do is remove *more* than it otherwise would. - val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', - unescape(attrs[attr])).lower() - # remove replacement characters from unescaped characters - val_unescaped = val_unescaped.replace("\ufffd", "") - try: - uri = urlparse.urlparse(val_unescaped) - except ValueError: - uri = None - del attrs[attr] - if uri and uri.scheme: - if uri.scheme not in self.allowed_protocols: - del attrs[attr] - if uri.scheme == 'data': - m = data_content_type.match(uri.path) - if not m: - del attrs[attr] - elif m.group('content_type') not in self.allowed_content_types: - del attrs[attr] - - for attr in self.svg_attr_val_allows_ref: - if attr in attrs: - attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', - ' ', - unescape(attrs[attr])) - if (token["name"] in self.svg_allow_local_href and - (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', - attrs[(namespaces['xlink'], 'href')])): - del attrs[(namespaces['xlink'], 'href')] - if (None, 'style') in attrs: - attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) - token["data"] = attrs - return token - - def disallowed_token(self, token): - token_type = token["type"] - if token_type == "EndTag": - token["data"] = "" % token["name"] - elif token["data"]: - assert token_type in ("StartTag", "EmptyTag") - attrs = [] - for (ns, name), v in token["data"].items(): - attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) - token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) - else: - token["data"] = "<%s>" % token["name"] - if token.get("selfClosing"): - token["data"] = token["data"][:-1] + "/>" - - token["type"] = "Characters" - - del token["name"] - return token - - def sanitize_css(self, style): - # disallow urls - style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) - - # gauntlet - if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): - return '' - if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): - return '' - - clean = [] - for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): - if not value: - continue - if prop.lower() in self.allowed_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background', 'border', 'margin', - 'padding']: - for keyword in value.split(): - if keyword not in self.allowed_css_keywords and \ - not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa - break - else: - clean.append(prop + ': ' + value + ';') - elif prop.lower() in self.allowed_svg_properties: - clean.append(prop + ': ' + value + ';') - - return ' '.join(clean) diff --git a/pipenv/patched/notpip/_vendor/html5lib/filters/whitespace.py b/pipenv/patched/notpip/_vendor/html5lib/filters/whitespace.py deleted file mode 100644 index 0d12584b45..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/filters/whitespace.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re - -from . import base -from ..constants import rcdataElements, spaceCharacters -spaceCharacters = "".join(spaceCharacters) - -SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) - - -class Filter(base.Filter): - """Collapses whitespace except in pre, textarea, and script elements""" - spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) - - def __iter__(self): - preserve = 0 - for token in base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag" \ - and (preserve or token["name"] in self.spacePreserveElements): - preserve += 1 - - elif type == "EndTag" and preserve: - preserve -= 1 - - elif not preserve and type == "SpaceCharacters" and token["data"]: - # Test on token["data"] above to not introduce spaces where there were not - token["data"] = " " - - elif not preserve and type == "Characters": - token["data"] = collapse_spaces(token["data"]) - - yield token - - -def collapse_spaces(text): - return SPACES_REGEX.sub(' ', text) diff --git a/pipenv/patched/notpip/_vendor/html5lib/html5parser.py b/pipenv/patched/notpip/_vendor/html5lib/html5parser.py deleted file mode 100644 index 9c9a2ed5f8..0000000000 --- a/pipenv/patched/notpip/_vendor/html5lib/html5parser.py +++ /dev/null @@ -1,2795 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pipenv.patched.notpip._vendor.six import with_metaclass, viewkeys - -import types - -from . import _inputstream -from . import _tokenizer - -from . import treebuilders -from .treebuilders.base import Marker - -from . import _utils -from .constants import ( - spaceCharacters, asciiUpper2Lower, - specialElements, headingElements, cdataElements, rcdataElements, - tokenTypes, tagTokenTypes, - namespaces, - htmlIntegrationPointElements, mathmlTextIntegrationPointElements, - adjustForeignAttributes as adjustForeignAttributesMap, - adjustMathMLAttributes, adjustSVGAttributes, - E, - _ReparseException -) - - -def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): - """Parse an HTML document as a string or file-like object into a tree - - :arg doc: the document to parse as a string or file-like object - - :arg treebuilder: the treebuilder to use when parsing - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :returns: parsed tree - - Example: - - >>> from html5lib.html5parser import parse - >>> parse('

This is a doc

') - - - """ - tb = treebuilders.getTreeBuilder(treebuilder) - p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parse(doc, **kwargs) - - -def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): - """Parse an HTML fragment as a string or file-like object into a tree - - :arg doc: the fragment to parse as a string or file-like object - - :arg container: the container context to parse the fragment in - - :arg treebuilder: the treebuilder to use when parsing - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :returns: parsed tree - - Example: - - >>> from html5lib.html5libparser import parseFragment - >>> parseFragment('this is a fragment') - - - """ - tb = treebuilders.getTreeBuilder(treebuilder) - p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parseFragment(doc, container=container, **kwargs) - - -def method_decorator_metaclass(function): - class Decorated(type): - def __new__(meta, classname, bases, classDict): - for attributeName, attribute in classDict.items(): - if isinstance(attribute, types.FunctionType): - attribute = function(attribute) - - classDict[attributeName] = attribute - return type.__new__(meta, classname, bases, classDict) - return Decorated - - -class HTMLParser(object): - """HTML parser - - Generates a tree structure from a stream of (possibly malformed) HTML. - - """ - - def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): - """ - :arg tree: a treebuilder class controlling the type of tree that will be - returned. Built in treebuilders can be accessed through - html5lib.treebuilders.getTreeBuilder(treeType) - - :arg strict: raise an exception when a parse error is encountered - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :arg debug: whether or not to enable debug mode which logs things - - Example: - - >>> from html5lib.html5parser import HTMLParser - >>> parser = HTMLParser() # generates parser with etree builder - >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict - - """ - - # Raise an exception on the first error encountered - self.strict = strict - - if tree is None: - tree = treebuilders.getTreeBuilder("etree") - self.tree = tree(namespaceHTMLElements) - self.errors = [] - - self.phases = {name: cls(self, self.tree) for name, cls in - getPhases(debug).items()} - - def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): - - self.innerHTMLMode = innerHTML - self.container = container - self.scripting = scripting - self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) - self.reset() - - try: - self.mainLoop() - except _ReparseException: - self.reset() - self.mainLoop() - - def reset(self): - self.tree.reset() - self.firstStartTag = False - self.errors = [] - self.log = [] # only used with debug mode - # "quirks" / "limited quirks" / "no quirks" - self.compatMode = "no quirks" - - if self.innerHTMLMode: - self.innerHTML = self.container.lower() - - if self.innerHTML in cdataElements: - self.tokenizer.state = self.tokenizer.rcdataState - elif self.innerHTML in rcdataElements: - self.tokenizer.state = self.tokenizer.rawtextState - elif self.innerHTML == 'plaintext': - self.tokenizer.state = self.tokenizer.plaintextState - else: - # state already is data state - # self.tokenizer.state = self.tokenizer.dataState - pass - self.phase = self.phases["beforeHtml"] - self.phase.insertHtmlElement() - self.resetInsertionMode() - else: - self.innerHTML = False # pylint:disable=redefined-variable-type - self.phase = self.phases["initial"] - - self.lastPhase = None - - self.beforeRCDataPhase = None - - self.framesetOK = True - - @property - def documentEncoding(self): - """Name of the character encoding that was used to decode the input stream, or - :obj:`None` if that is not determined yet - - """ - if not hasattr(self, 'tokenizer'): - return None - return self.tokenizer.stream.charEncoding[0].name - - def isHTMLIntegrationPoint(self, element): - if (element.name == "annotation-xml" and - element.namespace == namespaces["mathml"]): - return ("encoding" in element.attributes and - element.attributes["encoding"].translate( - asciiUpper2Lower) in - ("text/html", "application/xhtml+xml")) - else: - return (element.namespace, element.name) in htmlIntegrationPointElements - - def isMathMLTextIntegrationPoint(self, element): - return (element.namespace, element.name) in mathmlTextIntegrationPointElements - - def mainLoop(self): - CharactersToken = tokenTypes["Characters"] - SpaceCharactersToken = tokenTypes["SpaceCharacters"] - StartTagToken = tokenTypes["StartTag"] - EndTagToken = tokenTypes["EndTag"] - CommentToken = tokenTypes["Comment"] - DoctypeToken = tokenTypes["Doctype"] - ParseErrorToken = tokenTypes["ParseError"] - - for token in self.tokenizer: - prev_token = None - new_token = token - while new_token is not None: - prev_token = new_token - currentNode = self.tree.openElements[-1] if self.tree.openElements else None - currentNodeNamespace = currentNode.namespace if currentNode else None - currentNodeName = currentNode.name if currentNode else None - - type = new_token["type"] - - if type == ParseErrorToken: - self.parseError(new_token["data"], new_token.get("datavars", {})) - new_token = None - else: - if (len(self.tree.openElements) == 0 or - currentNodeNamespace == self.tree.defaultNamespace or - (self.isMathMLTextIntegrationPoint(currentNode) and - ((type == StartTagToken and - token["name"] not in frozenset(["mglyph", "malignmark"])) or - type in (CharactersToken, SpaceCharactersToken))) or - (currentNodeNamespace == namespaces["mathml"] and - currentNodeName == "annotation-xml" and - type == StartTagToken and - token["name"] == "svg") or - (self.isHTMLIntegrationPoint(currentNode) and - type in (StartTagToken, CharactersToken, SpaceCharactersToken))): - phase = self.phase - else: - phase = self.phases["inForeignContent"] - - if type == CharactersToken: - new_token = phase.processCharacters(new_token) - elif type == SpaceCharactersToken: - new_token = phase.processSpaceCharacters(new_token) - elif type == StartTagToken: - new_token = phase.processStartTag(new_token) - elif type == EndTagToken: - new_token = phase.processEndTag(new_token) - elif type == CommentToken: - new_token = phase.processComment(new_token) - elif type == DoctypeToken: - new_token = phase.processDoctype(new_token) - - if (type == StartTagToken and prev_token["selfClosing"] and - not prev_token["selfClosingAcknowledged"]): - self.parseError("non-void-element-with-trailing-solidus", - {"name": prev_token["name"]}) - - # When the loop finishes it's EOF - reprocess = True - phases = [] - while reprocess: - phases.append(self.phase) - reprocess = self.phase.processEOF() - if reprocess: - assert self.phase not in phases - - def parse(self, stream, *args, **kwargs): - """Parse a HTML document into a well-formed tree - - :arg stream: a file-like object or string containing the HTML to be parsed - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element). - - :arg scripting: treat noscript elements as if JavaScript was turned on - - :returns: parsed tree - - Example: - - >>> from html5lib.html5parser import HTMLParser - >>> parser = HTMLParser() - >>> parser.parse('

This is a doc

') - - - """ - self._parse(stream, False, None, *args, **kwargs) - return self.tree.getDocument() - - def parseFragment(self, stream, *args, **kwargs): - """Parse a HTML fragment into a well-formed tree fragment - - :arg container: name of the element we're setting the innerHTML - property if set to None, default to 'div' - - :arg stream: a file-like object or string containing the HTML to be parsed - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - :arg scripting: treat noscript elements as if JavaScript was turned on - - :returns: parsed tree - - Example: - - >>> from html5lib.html5libparser import HTMLParser - >>> parser = HTMLParser() - >>> parser.parseFragment('this is a fragment') - - - """ - self._parse(stream, True, *args, **kwargs) - return self.tree.getFragment() - - def parseError(self, errorcode="XXX-undefined-error", datavars=None): - # XXX The idea is to make errorcode mandatory. - if datavars is None: - datavars = {} - self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) - if self.strict: - raise ParseError(E[errorcode] % datavars) - - def adjustMathMLAttributes(self, token): - adjust_attributes(token, adjustMathMLAttributes) - - def adjustSVGAttributes(self, token): - adjust_attributes(token, adjustSVGAttributes) - - def adjustForeignAttributes(self, token): - adjust_attributes(token, adjustForeignAttributesMap) - - def reparseTokenNormal(self, token): - # pylint:disable=unused-argument - self.parser.phase() - - def resetInsertionMode(self): - # The name of this method is mostly historical. (It's also used in the - # specification.) - last = False - newModes = { - "select": "inSelect", - "td": "inCell", - "th": "inCell", - "tr": "inRow", - "tbody": "inTableBody", - "thead": "inTableBody", - "tfoot": "inTableBody", - "caption": "inCaption", - "colgroup": "inColumnGroup", - "table": "inTable", - "head": "inBody", - "body": "inBody", - "frameset": "inFrameset", - "html": "beforeHead" - } - for node in self.tree.openElements[::-1]: - nodeName = node.name - new_phase = None - if node == self.tree.openElements[0]: - assert self.innerHTML - last = True - nodeName = self.innerHTML - # Check for conditions that should only happen in the innerHTML - # case - if nodeName in ("select", "colgroup", "head", "html"): - assert self.innerHTML - - if not last and node.namespace != self.tree.defaultNamespace: - continue - - if nodeName in newModes: - new_phase = self.phases[newModes[nodeName]] - break - elif last: - new_phase = self.phases["inBody"] - break - - self.phase = new_phase - - def parseRCDataRawtext(self, token, contentType): - # Generic RCDATA/RAWTEXT Parsing algorithm - assert contentType in ("RAWTEXT", "RCDATA") - - self.tree.insertElement(token) - - if contentType == "RAWTEXT": - self.tokenizer.state = self.tokenizer.rawtextState - else: - self.tokenizer.state = self.tokenizer.rcdataState - - self.originalPhase = self.phase - - self.phase = self.phases["text"] - - -@_utils.memoize -def getPhases(debug): - def log(function): - """Logger that records which phase processes each token""" - type_names = {value: key for key, value in tokenTypes.items()} - - def wrapped(self, *args, **kwargs): - if function.__name__.startswith("process") and len(args) > 0: - token = args[0] - info = {"type": type_names[token['type']]} - if token['type'] in tagTokenTypes: - info["name"] = token['name'] - - self.parser.log.append((self.parser.tokenizer.state.__name__, - self.parser.phase.__class__.__name__, - self.__class__.__name__, - function.__name__, - info)) - return function(self, *args, **kwargs) - else: - return function(self, *args, **kwargs) - return wrapped - - def getMetaclass(use_metaclass, metaclass_func): - if use_metaclass: - return method_decorator_metaclass(metaclass_func) - else: - return type - - # pylint:disable=unused-argument - class Phase(with_metaclass(getMetaclass(debug, log))): - """Base class for helper object that implements each phase of processing - """ - __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") - - def __init__(self, parser, tree): - self.parser = parser - self.tree = tree - self.__startTagCache = {} - self.__endTagCache = {} - - def processEOF(self): - raise NotImplementedError - - def processComment(self, token): - # For most phases the following is correct. Where it's not it will be - # overridden. - self.tree.insertComment(token, self.tree.openElements[-1]) - - def processDoctype(self, token): - self.parser.parseError("unexpected-doctype") - - def processCharacters(self, token): - self.tree.insertText(token["data"]) - - def processSpaceCharacters(self, token): - self.tree.insertText(token["data"]) - - def processStartTag(self, token): - # Note the caching is done here rather than BoundMethodDispatcher as doing it there - # requires a circular reference to the Phase, and this ends up with a significant - # (CPython 2.7, 3.8) GC cost when parsing many short inputs - name = token["name"] - # In Py2, using `in` is quicker in general than try/except KeyError - # In Py3, `in` is quicker when there are few cache hits (typically short inputs) - if name in self.__startTagCache: - func = self.__startTagCache[name] - else: - func = self.__startTagCache[name] = self.startTagHandler[name] - # bound the cache size in case we get loads of unknown tags - while len(self.__startTagCache) > len(self.startTagHandler) * 1.1: - # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 - self.__startTagCache.pop(next(iter(self.__startTagCache))) - return func(token) - - def startTagHtml(self, token): - if not self.parser.firstStartTag and token["name"] == "html": - self.parser.parseError("non-html-root") - # XXX Need a check here to see if the first start tag token emitted is - # this token... If it's not, invoke self.parser.parseError(). - for attr, value in token["data"].items(): - if attr not in self.tree.openElements[0].attributes: - self.tree.openElements[0].attributes[attr] = value - self.parser.firstStartTag = False - - def processEndTag(self, token): - # Note the caching is done here rather than BoundMethodDispatcher as doing it there - # requires a circular reference to the Phase, and this ends up with a significant - # (CPython 2.7, 3.8) GC cost when parsing many short inputs - name = token["name"] - # In Py2, using `in` is quicker in general than try/except KeyError - # In Py3, `in` is quicker when there are few cache hits (typically short inputs) - if name in self.__endTagCache: - func = self.__endTagCache[name] - else: - func = self.__endTagCache[name] = self.endTagHandler[name] - # bound the cache size in case we get loads of unknown tags - while len(self.__endTagCache) > len(self.endTagHandler) * 1.1: - # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 - self.__endTagCache.pop(next(iter(self.__endTagCache))) - return func(token) - - class InitialPhase(Phase): - __slots__ = tuple() - - def processSpaceCharacters(self, token): - pass - - def processComment(self, token): - self.tree.insertComment(token, self.tree.document) - - def processDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - correct = token["correct"] - - if (name != "html" or publicId is not None or - systemId is not None and systemId != "about:legacy-compat"): - self.parser.parseError("unknown-doctype") - - if publicId is None: - publicId = "" - - self.tree.insertDoctype(token) - - if publicId != "": - publicId = publicId.translate(asciiUpper2Lower) - - if (not correct or token["name"] != "html" or - publicId.startswith( - ("+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//")) or - publicId in ("-//w3o//dtd w3 html strict 3.0//en//", - "-/w3c/dtd html 4.0 transitional/en", - "html") or - publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is None or - systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): - self.parser.compatMode = "quirks" - elif (publicId.startswith( - ("-//w3c//dtd xhtml 1.0 frameset//", - "-//w3c//dtd xhtml 1.0 transitional//")) or - publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is not None): - self.parser.compatMode = "limited quirks" - - self.parser.phase = self.parser.phases["beforeHtml"] - - def anythingElse(self): - self.parser.compatMode = "quirks" - self.parser.phase = self.parser.phases["beforeHtml"] - - def processCharacters(self, token): - self.parser.parseError("expected-doctype-but-got-chars") - self.anythingElse() - return token - - def processStartTag(self, token): - self.parser.parseError("expected-doctype-but-got-start-tag", - {"name": token["name"]}) - self.anythingElse() - return token - - def processEndTag(self, token): - self.parser.parseError("expected-doctype-but-got-end-tag", - {"name": token["name"]}) - self.anythingElse() - return token - - def processEOF(self): - self.parser.parseError("expected-doctype-but-got-eof") - self.anythingElse() - return True - - class BeforeHtmlPhase(Phase): - __slots__ = tuple() - - # helper methods - def insertHtmlElement(self): - self.tree.insertRoot(impliedTagToken("html", "StartTag")) - self.parser.phase = self.parser.phases["beforeHead"] - - # other - def processEOF(self): - self.insertHtmlElement() - return True - - def processComment(self, token): - self.tree.insertComment(token, self.tree.document) - - def processSpaceCharacters(self, token): - pass - - def processCharacters(self, token): - self.insertHtmlElement() - return token - - def processStartTag(self, token): - if token["name"] == "html": - self.parser.firstStartTag = True - self.insertHtmlElement() - return token - - def processEndTag(self, token): - if token["name"] not in ("head", "body", "html", "br"): - self.parser.parseError("unexpected-end-tag-before-html", - {"name": token["name"]}) - else: - self.insertHtmlElement() - return token - - class BeforeHeadPhase(Phase): - __slots__ = tuple() - - def processEOF(self): - self.startTagHead(impliedTagToken("head", "StartTag")) - return True - - def processSpaceCharacters(self, token): - pass - - def processCharacters(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagHead(self, token): - self.tree.insertElement(token) - self.tree.headPointer = self.tree.openElements[-1] - self.parser.phase = self.parser.phases["inHead"] - - def startTagOther(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def endTagImplyHead(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def endTagOther(self, token): - self.parser.parseError("end-tag-after-implied-root", - {"name": token["name"]}) - - startTagHandler = _utils.MethodDispatcher([ - ("html", startTagHtml), - ("head", startTagHead) - ]) - startTagHandler.default = startTagOther - - endTagHandler = _utils.MethodDispatcher([ - (("head", "body", "html", "br"), endTagImplyHead) - ]) - endTagHandler.default = endTagOther - - class InHeadPhase(Phase): - __slots__ = tuple() - - # the real thing - def processEOF(self): - self.anythingElse() - return True - - def processCharacters(self, token): - self.anythingElse() - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagHead(self, token): - self.parser.parseError("two-heads-are-not-better-than-one") - - def startTagBaseLinkCommand(self, token): - self.tree.insertElement(token) - self.tree.openElements.pop() - token["selfClosingAcknowledged"] = True - - def startTagMeta(self, token): - self.tree.insertElement(token) - self.tree.openElements.pop() - token["selfClosingAcknowledged"] = True - - attributes = token["data"] - if self.parser.tokenizer.stream.charEncoding[1] == "tentative": - if "charset" in attributes: - self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) - elif ("content" in attributes and - "http-equiv" in attributes and - attributes["http-equiv"].lower() == "content-type"): - # Encoding it as UTF-8 here is a hack, as really we should pass - # the abstract Unicode string, and just use the - # ContentAttrParser on that, but using UTF-8 allows all chars - # to be encoded and as a ASCII-superset works. - data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) - parser = _inputstream.ContentAttrParser(data) - codec = parser.parse() - self.parser.tokenizer.stream.changeEncoding(codec) - - def startTagTitle(self, token): - self.parser.parseRCDataRawtext(token, "RCDATA") - - def startTagNoFramesStyle(self, token): - # Need to decide whether to implement the scripting-disabled case - self.parser.parseRCDataRawtext(token, "RAWTEXT") - - def startTagNoscript(self, token): - if self.parser.scripting: - self.parser.parseRCDataRawtext(token, "RAWTEXT") - else: - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inHeadNoscript"] - - def startTagScript(self, token): - self.tree.insertElement(token) - self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState - self.parser.originalPhase = self.parser.phase - self.parser.phase = self.parser.phases["text"] - - def startTagOther(self, token): - self.anythingElse() - return token - - def endTagHead(self, token): - node = self.parser.tree.openElements.pop() - assert node.name == "head", "Expected head got %s" % node.name - self.parser.phase = self.parser.phases["afterHead"] - - def endTagHtmlBodyBr(self, token): - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - self.endTagHead(impliedTagToken("head")) - - startTagHandler = _utils.MethodDispatcher([ - ("html", startTagHtml), - ("title", startTagTitle), - (("noframes", "style"), startTagNoFramesStyle), - ("noscript", startTagNoscript), - ("script", startTagScript), - (("base", "basefont", "bgsound", "command", "link"), - startTagBaseLinkCommand), - ("meta", startTagMeta), - ("head", startTagHead) - ]) - startTagHandler.default = startTagOther - - endTagHandler = _utils.MethodDispatcher([ - ("head", endTagHead), - (("br", "html", "body"), endTagHtmlBodyBr) - ]) - endTagHandler.default = endTagOther - - class InHeadNoscriptPhase(Phase): - __slots__ = tuple() - - def processEOF(self): - self.parser.parseError("eof-in-head-noscript") - self.anythingElse() - return True - - def processComment(self, token): - return self.parser.phases["inHead"].processComment(token) - - def processCharacters(self, token): - self.parser.parseError("char-in-head-noscript") - self.anythingElse() - return token - - def processSpaceCharacters(self, token): - return self.parser.phases["inHead"].processSpaceCharacters(token) - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagBaseLinkCommand(self, token): - return self.parser.phases["inHead"].processStartTag(token) - - def startTagHeadNoscript(self, token): - self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) - - def startTagOther(self, token): - self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) - self.anythingElse() - return token - - def endTagNoscript(self, token): - node = self.parser.tree.openElements.pop() - assert node.name == "noscript", "Expected noscript got %s" % node.name - self.parser.phase = self.parser.phases["inHead"] - - def endTagBr(self, token): - self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - # Caller must raise parse error first! - self.endTagNoscript(impliedTagToken("noscript")) - - startTagHandler = _utils.MethodDispatcher([ - ("html", startTagHtml), - (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand), - (("head", "noscript"), startTagHeadNoscript), - ]) - startTagHandler.default = startTagOther - - endTagHandler = _utils.MethodDispatcher([ - ("noscript", endTagNoscript), - ("br", endTagBr), - ]) - endTagHandler.default = endTagOther - - class AfterHeadPhase(Phase): - __slots__ = tuple() - - def processEOF(self): - self.anythingElse() - return True - - def processCharacters(self, token): - self.anythingElse() - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagBody(self, token): - self.parser.framesetOK = False - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inBody"] - - def startTagFrameset(self, token): - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inFrameset"] - - def startTagFromHead(self, token): - self.parser.parseError("unexpected-start-tag-out-of-my-head", - {"name": token["name"]}) - self.tree.openElements.append(self.tree.headPointer) - self.parser.phases["inHead"].processStartTag(token) - for node in self.tree.openElements[::-1]: - if node.name == "head": - self.tree.openElements.remove(node) - break - - def startTagHead(self, token): - self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) - - def startTagOther(self, token): - self.anythingElse() - return token - - def endTagHtmlBodyBr(self, token): - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - self.tree.insertElement(impliedTagToken("body", "StartTag")) - self.parser.phase = self.parser.phases["inBody"] - self.parser.framesetOK = True - - startTagHandler = _utils.MethodDispatcher([ - ("html", startTagHtml), - ("body", startTagBody), - ("frameset", startTagFrameset), - (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", - "style", "title"), - startTagFromHead), - ("head", startTagHead) - ]) - startTagHandler.default = startTagOther - endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), - endTagHtmlBodyBr)]) - endTagHandler.default = endTagOther - - class InBodyPhase(Phase): - # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody - # the really-really-really-very crazy mode - __slots__ = ("processSpaceCharacters",) - - def __init__(self, *args, **kwargs): - super(InBodyPhase, self).__init__(*args, **kwargs) - # Set this to the default handler - self.processSpaceCharacters = self.processSpaceCharactersNonPre - - def isMatchingFormattingElement(self, node1, node2): - return (node1.name == node2.name and - node1.namespace == node2.namespace and - node1.attributes == node2.attributes) - - # helper - def addFormattingElement(self, token): - self.tree.insertElement(token) - element = self.tree.openElements[-1] - - matchingElements = [] - for node in self.tree.activeFormattingElements[::-1]: - if node is Marker: - break - elif self.isMatchingFormattingElement(node, element): - matchingElements.append(node) - - assert len(matchingElements) <= 3 - if len(matchingElements) == 3: - self.tree.activeFormattingElements.remove(matchingElements[-1]) - self.tree.activeFormattingElements.append(element) - - # the real deal - def processEOF(self): - allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", - "tfoot", "th", "thead", "tr", "body", - "html")) - for node in self.tree.openElements[::-1]: - if node.name not in allowed_elements: - self.parser.parseError("expected-closing-tag-but-got-eof") - break - # Stop parsing - - def processSpaceCharactersDropNewline(self, token): - # Sometimes (start of
, , and