diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 18a44aa0..511b7c28 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -13,10 +13,10 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 22315ff0..95857301 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -24,9 +24,9 @@ jobs: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@master + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: python-version: 3.9 @@ -78,6 +78,6 @@ jobs: - name: Publish to PyPI if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@master + uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index 339dca50..2d48196f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.py[cod] # virtualenv and other misc bits +/src/*.egg-info *.egg-info /dist /build diff --git a/.readthedocs.yml b/.readthedocs.yml index 1b71cd9e..8ab23688 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,6 +5,17 @@ # Required version: 2 +# Build in latest ubuntu/python +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +# Build PDF & ePub +formats: + - epub + - pdf + # Where the Sphinx conf.py file is located sphinx: configuration: docs/source/conf.py diff --git a/Makefile b/Makefile index 7d6ec8b8..a3da02ef 100644 --- a/Makefile +++ b/Makefile @@ -9,11 +9,12 @@ # # Python version can be specified with `$ PYTHON_EXE=python3.x make conf` +PYTHON_EXE?=python3 VENV=venv ACTIVATE?=. ${VENV}/bin/activate; dev: - @echo "-> Configure the dev envt." + @echo "-> Configure the development envt." ./configure --dev isort: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f9ccf5e2..0dfc01c3 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -9,32 +9,40 @@ jobs: - template: etc/ci/azure-posix.yml parameters: - job_name: ubuntu18_cpython - image_name: ubuntu-18.04 + job_name: ubuntu20_cpython + image_name: ubuntu-20.04 python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] test_suites: all: venv/bin/pytest -n 2 -vvs --reruns 2 - template: etc/ci/azure-posix.yml parameters: - job_name: ubuntu20_cpython - image_name: ubuntu-20.04 + job_name: ubuntu22_cpython + image_name: ubuntu-22.04 python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] test_suites: all: venv/bin/pytest -n 2 -vvs --reruns 2 - template: etc/ci/azure-posix.yml parameters: - job_name: ubuntu22_cpython - image_name: ubuntu-22.04 + job_name: macos11_cpython + image_name: macOS-11 python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] test_suites: all: venv/bin/pytest -n 2 -vvs --reruns 2 - template: etc/ci/azure-posix.yml parameters: - job_name: macos11_cpython - image_name: macos-11 + job_name: macos12_cpython + image_name: macOS-12 + python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] + test_suites: + all: venv/bin/pytest -n 2 -vvs + + - template: etc/ci/azure-posix.yml + parameters: + job_name: macos13_cpython + image_name: macOS-13 python_versions: ['3.7', '3.8', '3.9', '3.10', '3.11'] test_suites: all: venv/bin/pytest -n 2 -vvs --reruns 2 diff --git a/docs/source/conf.py b/docs/source/conf.py index 94428966..184453bd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -29,8 +29,14 @@ # ones. extensions = [ "sphinx.ext.intersphinx", + "sphinx_reredirects", ] + +# Redirects for olds pages +# See https://documatt.gitlab.io/sphinx-reredirects/usage.html +redirects = {} + # This points to aboutcode.readthedocs.io # In case of "undefined label" ERRORS check docs on intersphinx to troubleshoot # Link was created at commit - https://github.com/nexB/aboutcode/commit/faea9fcf3248f8f198844fe34d43833224ac4a83 @@ -95,3 +101,9 @@ .. role:: img-title-para """ + +# -- Options for LaTeX output ------------------------------------------------- + +latex_elements = { + 'classoptions': ',openany,oneside' +} \ No newline at end of file diff --git a/etc/scripts/README.rst b/etc/scripts/README.rst index edf82e44..5e54a2cc 100755 --- a/etc/scripts/README.rst +++ b/etc/scripts/README.rst @@ -21,7 +21,7 @@ Pre-requisites virtualenv or in the the main configured development virtualenv. These requireements need to be installed:: - pip install --requirement etc/release/requirements.txt + pip install --requirement etc/scripts/requirements.txt TODO: we need to pin the versions of these tools @@ -34,7 +34,7 @@ Scripts ~~~~~~~ **gen_requirements.py**: create/update requirements files from currently - installed requirements. + installed requirements. **gen_requirements_dev.py** does the same but can subtract the main requirements to get extra requirements used in only development. @@ -50,7 +50,7 @@ The sequence of commands to run are: ./configure --clean ./configure - python etc/release/gen_requirements.py --site-packages-dir + python etc/scripts/gen_requirements.py --site-packages-dir * You can optionally install or update extra main requirements after the ./configure step such that these are included in the generated main requirements. @@ -59,7 +59,7 @@ The sequence of commands to run are: ./configure --clean ./configure --dev - python etc/release/gen_requirements_dev.py --site-packages-dir + python etc/scripts/gen_requirements_dev.py --site-packages-dir * You can optionally install or update extra dev requirements after the ./configure step such that these are included in the generated dev diff --git a/etc/scripts/fetch_thirdparty.py b/etc/scripts/fetch_thirdparty.py index 89d17ded..eedf05c6 100644 --- a/etc/scripts/fetch_thirdparty.py +++ b/etc/scripts/fetch_thirdparty.py @@ -12,6 +12,7 @@ import itertools import os import sys +from collections import defaultdict import click @@ -110,6 +111,39 @@ is_flag=True, help="Use on disk cached PyPI indexes list of packages and versions and do not refetch if present.", ) +@click.option( + "--sdist-only", + "sdist_only", + type=str, + metavar="SDIST", + default=tuple(), + show_default=False, + multiple=True, + help="Package name(s) that come only in sdist format (no wheels). " + "The command will not fail and exit if no wheel exists for these names", +) +@click.option( + "--wheel-only", + "wheel_only", + type=str, + metavar="WHEEL", + default=tuple(), + show_default=False, + multiple=True, + help="Package name(s) that come only in wheel format (no sdist). " + "The command will not fail and exit if no sdist exists for these names", +) +@click.option( + "--no-dist", + "no_dist", + type=str, + metavar="DIST", + default=tuple(), + show_default=False, + multiple=True, + help="Package name(s) that do not come either in wheel or sdist format. " + "The command will not fail and exit if no distribution exists for these names", +) @click.help_option("-h", "--help") def fetch_thirdparty( requirements_files, @@ -122,6 +156,9 @@ def fetch_thirdparty( sdists, index_urls, use_cached_index, + sdist_only, + wheel_only, + no_dist, ): """ Download to --dest THIRDPARTY_DIR the PyPI wheels, source distributions, @@ -204,58 +241,62 @@ def fetch_thirdparty( ) repos.append(repo) - wheels_fetched = [] - wheels_not_found = [] - - sdists_fetched = [] - sdists_not_found = [] + wheels_or_sdist_not_found = defaultdict(list) for name, version in sorted(required_name_versions): nv = name, version print(f"Processing: {name} @ {version}") if wheels: for environment in environments: + if TRACE: print(f" ==> Fetching wheel for envt: {environment}") - fwfns = utils_thirdparty.download_wheel( + + fetched = utils_thirdparty.download_wheel( name=name, version=version, environment=environment, dest_dir=dest_dir, repos=repos, ) - if fwfns: - wheels_fetched.extend(fwfns) - else: - wheels_not_found.append(f"{name}=={version} for: {environment}") + if not fetched: + wheels_or_sdist_not_found[f"{name}=={version}"].append(environment) if TRACE: print(f" NOT FOUND") - if sdists: + if (sdists or + (f"{name}=={version}" in wheels_or_sdist_not_found and name in sdist_only) + ): if TRACE: print(f" ==> Fetching sdist: {name}=={version}") + fetched = utils_thirdparty.download_sdist( name=name, version=version, dest_dir=dest_dir, repos=repos, ) - if fetched: - sdists_fetched.append(fetched) - else: - sdists_not_found.append(f"{name}=={version}") + if not fetched: + wheels_or_sdist_not_found[f"{name}=={version}"].append("sdist") if TRACE: print(f" NOT FOUND") - if wheels and wheels_not_found: - print(f"==> MISSING WHEELS") - for wh in wheels_not_found: - print(f" {wh}") + mia = [] + for nv, dists in wheels_or_sdist_not_found.items(): + name, _, version = nv.partition("==") + if name in no_dist: + continue + sdist_missing = sdists and "sdist" in dists and not name in wheel_only + if sdist_missing: + mia.append(f"SDist missing: {nv} {dists}") + wheels_missing = wheels and any(d for d in dists if d != "sdist") and not name in sdist_only + if wheels_missing: + mia.append(f"Wheels missing: {nv} {dists}") - if sdists and sdists_not_found: - print(f"==> MISSING SDISTS") - for sd in sdists_not_found: - print(f" {sd}") + if mia: + for m in mia: + print(m) + raise Exception(mia) print(f"==> FETCHING OR CREATING ABOUT AND LICENSE FILES") utils_thirdparty.fetch_abouts_and_licenses(dest_dir=dest_dir, use_cached_index=use_cached_index) diff --git a/etc/scripts/gen_pypi_simple.py b/etc/scripts/gen_pypi_simple.py index 03312ab3..214d90dc 100644 --- a/etc/scripts/gen_pypi_simple.py +++ b/etc/scripts/gen_pypi_simple.py @@ -118,7 +118,7 @@ def build_per_package_index(pkg_name, packages, base_url): """ document.append(header) - for package in packages: + for package in sorted(packages, key=lambda p: p.archive_file): document.append(package.simple_index_entry(base_url)) footer = """ @@ -141,8 +141,8 @@ def build_links_package_index(packages_by_package_name, base_url): """ document.append(header) - for _name, packages in packages_by_package_name.items(): - for package in packages: + for _name, packages in sorted(packages_by_package_name.items(), key=lambda i: i[0]): + for package in sorted(packages, key=lambda p: p.archive_file): document.append(package.simple_index_entry(base_url)) footer = """ diff --git a/etc/scripts/requirements.txt b/etc/scripts/requirements.txt index ebb404b7..7c514da9 100644 --- a/etc/scripts/requirements.txt +++ b/etc/scripts/requirements.txt @@ -8,4 +8,5 @@ pip setuptools twine wheel -build \ No newline at end of file +build +packvers diff --git a/etc/scripts/utils_dejacode.py b/etc/scripts/utils_dejacode.py index f28e2479..c42e6c93 100644 --- a/etc/scripts/utils_dejacode.py +++ b/etc/scripts/utils_dejacode.py @@ -15,7 +15,7 @@ import requests import saneyaml -from packaging import version as packaging_version +from packvers import version as packaging_version """ Utility to create and retrieve package and ABOUT file data from DejaCode. diff --git a/etc/scripts/utils_pip_compatibility_tags.py b/etc/scripts/utils_pip_compatibility_tags.py index 5d5eb34c..af42a0cd 100644 --- a/etc/scripts/utils_pip_compatibility_tags.py +++ b/etc/scripts/utils_pip_compatibility_tags.py @@ -27,7 +27,7 @@ import re -from packaging.tags import ( +from packvers.tags import ( compatible_tags, cpython_tags, generic_tags, diff --git a/etc/scripts/utils_thirdparty.py b/etc/scripts/utils_thirdparty.py index 53f2d33c..addf8e5e 100644 --- a/etc/scripts/utils_thirdparty.py +++ b/etc/scripts/utils_thirdparty.py @@ -28,8 +28,8 @@ from commoncode import fileutils from commoncode.hash import multi_checksums from commoncode.text import python_safe_name -from packaging import tags as packaging_tags -from packaging import version as packaging_version +from packvers import tags as packaging_tags +from packvers import version as packaging_version import utils_pip_compatibility_tags @@ -115,10 +115,9 @@ TRACE_ULTRA_DEEP = False # Supported environments -PYTHON_VERSIONS = "36", "37", "38", "39", "310" +PYTHON_VERSIONS = "37", "38", "39", "310" PYTHON_DOT_VERSIONS_BY_VER = { - "36": "3.6", "37": "3.7", "38": "3.8", "39": "3.9", @@ -134,7 +133,6 @@ def get_python_dot_version(version): ABIS_BY_PYTHON_VERSION = { - "36": ["cp36", "cp36m", "abi3"], "37": ["cp37", "cp37m", "abi3"], "38": ["cp38", "cp38m", "abi3"], "39": ["cp39", "cp39m", "abi3"], @@ -912,7 +910,7 @@ def load_pkginfo_data(self, dest_dir=THIRDPARTY_DIR): declared_license = [raw_data["License"]] + [ c for c in classifiers if c.startswith("License") ] - license_expression = compute_normalized_license_expression(declared_license) + license_expression = get_license_expression(declared_license) other_classifiers = [c for c in classifiers if not c.startswith("License")] holder = raw_data["Author"] @@ -1337,10 +1335,10 @@ def package_from_dists(cls, dists): For example: >>> w1 = Wheel(name='bitarray', version='0.8.1', build='', - ... python_versions=['cp36'], abis=['cp36m'], + ... python_versions=['cp38'], abis=['cp38m'], ... platforms=['linux_x86_64']) >>> w2 = Wheel(name='bitarray', version='0.8.1', build='', - ... python_versions=['cp36'], abis=['cp36m'], + ... python_versions=['cp38'], abis=['cp38m'], ... platforms=['macosx_10_9_x86_64', 'macosx_10_10_x86_64']) >>> sd = Sdist(name='bitarray', version='0.8.1') >>> package = PypiPackage.package_from_dists(dists=[w1, w2, sd]) @@ -2274,16 +2272,16 @@ def find_problems( check_about(dest_dir=dest_dir) -def compute_normalized_license_expression(declared_licenses): +def get_license_expression(declared_licenses): """ Return a normalized license expression or None. """ if not declared_licenses: return try: - from packagedcode import pypi + from packagedcode.licensing import get_only_expression_from_extracted_license - return pypi.compute_normalized_license(declared_licenses) + return get_only_expression_from_extracted_license(declared_licenses) except ImportError: # Scancode is not installed, clean and join all the licenses lics = [python_safe_name(l).lower() for l in declared_licenses] diff --git a/setup.cfg b/setup.cfg index ec86a674..40e295d8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -81,13 +81,14 @@ testing = pytest >= 6, != 7.0.0 pytest-xdist >= 2 aboutcode-toolkit >= 7.0.2 + pycodestyle >= 2.8.0 twine black isort - pycodestyle - pytest-rerunfailures docs = - Sphinx >= 3.3.1,<7.0.0 - sphinx-rtd-theme >= 0.5.0 - doc8 >= 0.8.1 + Sphinx>=5.0.2 + sphinx-rtd-theme>=1.0.0 + sphinx-reredirects >= 0.1.2 + doc8>=0.11.2 + diff --git a/tests/data/azure-devops.req-310-expected.json b/tests/data/azure-devops.req-310-expected.json index 49323ed4..e5e3c3c0 100644 --- a/tests/data/azure-devops.req-310-expected.json +++ b/tests/data/azure-devops.req-310-expected.json @@ -4,7 +4,7 @@ "tool_homepageurl": "https://github.com/nexB/python-inspector", "tool_version": "0.9.8", "options": [ - "--requirement /home/tg1999/Desktop/example/tools/python-inspector/tests/data/azure-devops.req.txt", + "--requirement /home/tg1999/Desktop/python-inspector-1/tests/data/azure-devops.req.txt", "--index-url https://pypi.org/simple", "--python-version 310", "--operating-system linux", @@ -17,7 +17,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/azure-devops.req.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/azure-devops.req.txt", "package_data": [ { "type": "pypi", @@ -627,12 +627,12 @@ "type": "pypi", "namespace": null, "name": "cffi", - "version": "1.15.1", + "version": "1.16.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "CFFI\n====\n\nForeign Function Interface for Python calling C code.\nPlease see the `Documentation `_.\n\nContact\n-------\n\n`Mailing list `_", - "release_date": "2022-06-30T18:15:50", + "release_date": "2023-09-28T18:00:37", "parties": [ { "type": "person", @@ -644,26 +644,24 @@ ], "keywords": [ "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy" ], "homepage_url": "http://cffi.readthedocs.org", - "download_url": "https://files.pythonhosted.org/packages/88/89/c34caf63029fb7628ec2ebd5c88ae0c9bd17db98c812e4065a4d020ca41f/cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", - "size": 441830, + "download_url": "https://files.pythonhosted.org/packages/c9/7c/43d81bdd5a915923c3bad5bb4bff401ea00ccc8e28433fb6083d2e3bf58e/cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "size": 443865, "sha1": null, - "md5": "a29947fc15af734d35f4d1e537373898", - "sha256": "dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4", + "md5": "107b486471232e528f26e6f13f5141e5", + "sha256": "e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614", "sha512": null, - "bug_tracking_url": null, - "code_view_url": null, + "bug_tracking_url": "https://github.com/python-cffi/cffi/issues", + "code_view_url": "https://github.com/python-cffi/cffi", "vcs_url": null, "copyright": null, "license_expression": null, @@ -680,20 +678,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/cffi/1.15.1/json", + "api_data_url": "https://pypi.org/pypi/cffi/1.16.0/json", "datasource_id": null, - "purl": "pkg:pypi/cffi@1.15.1" + "purl": "pkg:pypi/cffi@1.16.0" }, { "type": "pypi", "namespace": null, "name": "cffi", - "version": "1.15.1", + "version": "1.16.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "CFFI\n====\n\nForeign Function Interface for Python calling C code.\nPlease see the `Documentation `_.\n\nContact\n-------\n\n`Mailing list `_", - "release_date": "2022-06-30T18:18:32", + "release_date": "2023-09-28T18:02:04", "parties": [ { "type": "person", @@ -705,26 +703,24 @@ ], "keywords": [ "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy" ], "homepage_url": "http://cffi.readthedocs.org", - "download_url": "https://files.pythonhosted.org/packages/2b/a8/050ab4f0c3d4c1b8aaa805f70e26e84d0e27004907c5b8ecc1d31815f92a/cffi-1.15.1.tar.gz", - "size": 508501, + "download_url": "https://files.pythonhosted.org/packages/68/ce/95b0bae7968c65473e1298efb042e10cafc7bafc14d9e4f154008241c91d/cffi-1.16.0.tar.gz", + "size": 512873, "sha1": null, - "md5": "f493860a6e98cd0c4178149568a6b4f6", - "sha256": "d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9", + "md5": "0bcaed453da3004d0bea103038345c1e", + "sha256": "bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0", "sha512": null, - "bug_tracking_url": null, - "code_view_url": null, + "bug_tracking_url": "https://github.com/python-cffi/cffi/issues", + "code_view_url": "https://github.com/python-cffi/cffi", "vcs_url": null, "copyright": null, "license_expression": null, @@ -741,20 +737,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/cffi/1.15.1/json", + "api_data_url": "https://pypi.org/pypi/cffi/1.16.0/json", "datasource_id": null, - "purl": "pkg:pypi/cffi@1.15.1" + "purl": "pkg:pypi/cffi@1.16.0" }, { "type": "pypi", "namespace": null, "name": "charset-normalizer", - "version": "3.2.0", + "version": "3.3.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", - "description": "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.\n

Charset Detection, for Everyone \ud83d\udc4b

\n\n

\n The Real First Universal Charset Detector
\n \n \n \n \n \"Download\n \n \n \n \n

\n\n> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`,\n> I'm trying to resolve the issue by taking a new approach.\n> All IANA character set names for which the Python core library provides codecs are supported.\n\n

\n >>>>> \ud83d\udc49 Try Me Online Now, Then Adopt Me \ud83d\udc48 <<<<<\n

\n\nThis project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.\n\n| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |\n|--------------------------------------------------|:---------------------------------------------:|:------------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|\n| `Fast` | \u274c
| \u2705
| \u2705
|\n| `Universal**` | \u274c | \u2705 | \u274c |\n| `Reliable` **without** distinguishable standards | \u274c | \u2705 | \u2705 |\n| `Reliable` **with** distinguishable standards | \u2705 | \u2705 | \u2705 |\n| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ |\n| `Native Python` | \u2705 | \u2705 | \u274c |\n| `Detect spoken language` | \u274c | \u2705 | N/A |\n| `UnicodeDecodeError Safety` | \u274c | \u2705 | \u274c |\n| `Whl Size` | 193.6 kB | 40 kB | ~200 kB |\n| `Supported Encoding` | 33 | \ud83c\udf89 [90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |\n\n

\n\"Reading\"Cat\n\n*\\*\\* : They are clearly using specific code for a specific encoding even if covering most of used one*
\nDid you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)\n\n## \u26a1 Performance\n\nThis package offer better performance than its counterpart Chardet. Here are some numbers.\n\n| Package | Accuracy | Mean per file (ms) | File per sec (est) |\n|-----------------------------------------------|:--------:|:------------------:|:------------------:|\n| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |\n| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |\n\n| Package | 99th percentile | 95th percentile | 50th percentile |\n|-----------------------------------------------|:---------------:|:---------------:|:---------------:|\n| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |\n| charset-normalizer | 100 ms | 50 ms | 5 ms |\n\nChardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.\n\n> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.\n> And yes, these results might change at any time. The dataset can be updated to include more files.\n> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.\n> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability\n> (eg. Supported Encoding) Challenge-them if you want.\n\n## \u2728 Installation\n\nUsing pip:\n\n```sh\npip install charset-normalizer -U\n```\n\n## \ud83d\ude80 Basic Usage\n\n### CLI\nThis package comes with a CLI.\n\n```\nusage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]\n file [file ...]\n\nThe Real First Universal Charset Detector. Discover originating encoding used\non text file. Normalize text to unicode.\n\npositional arguments:\n files File(s) to be analysed\n\noptional arguments:\n -h, --help show this help message and exit\n -v, --verbose Display complementary information about file if any.\n Stdout will contain logs about the detection process.\n -a, --with-alternative\n Output complementary possibilities if any. Top-level\n JSON WILL be a list.\n -n, --normalize Permit to normalize input file. If not set, program\n does not write anything.\n -m, --minimal Only output the charset detected to STDOUT. Disabling\n JSON output.\n -r, --replace Replace file when trying to normalize it instead of\n creating a new one.\n -f, --force Replace file without asking if you are sure, use this\n flag with caution.\n -t THRESHOLD, --threshold THRESHOLD\n Define a custom maximum amount of chaos allowed in\n decoded content. 0. <= chaos <= 1.\n --version Show version information and exit.\n```\n\n```bash\nnormalizer ./data/sample.1.fr.srt\n```\n\n\ud83c\udf89 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.\n\n```json\n{\n \"path\": \"/home/default/projects/charset_normalizer/data/sample.1.fr.srt\",\n \"encoding\": \"cp1252\",\n \"encoding_aliases\": [\n \"1252\",\n \"windows_1252\"\n ],\n \"alternative_encodings\": [\n \"cp1254\",\n \"cp1256\",\n \"cp1258\",\n \"iso8859_14\",\n \"iso8859_15\",\n \"iso8859_16\",\n \"iso8859_3\",\n \"iso8859_9\",\n \"latin_1\",\n \"mbcs\"\n ],\n \"language\": \"French\",\n \"alphabets\": [\n \"Basic Latin\",\n \"Latin-1 Supplement\"\n ],\n \"has_sig_or_bom\": false,\n \"chaos\": 0.149,\n \"coherence\": 97.152,\n \"unicode_path\": null,\n \"is_preferred\": true\n}\n```\n\n### Python\n*Just print out normalized text*\n```python\nfrom charset_normalizer import from_path\n\nresults = from_path('./my_subtitle.srt')\n\nprint(str(results.best()))\n```\n\n*Upgrade your code without effort*\n```python\nfrom charset_normalizer import detect\n```\n\nThe above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.\n\nSee the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)\n\n## \ud83d\ude07 Why\n\nWhen I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a\nreliable alternative using a completely different method. Also! I never back down on a good challenge!\n\nI **don't care** about the **originating charset** encoding, because **two different tables** can\nproduce **two identical rendered string.**\nWhat I want is to get readable text, the best I can. \n\nIn a way, **I'm brute forcing text decoding.** How cool is that ? \ud83d\ude0e\n\nDon't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.\n\n## \ud83c\udf70 How\n\n - Discard all charset encoding table that could not fit the binary content.\n - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.\n - Extract matches with the lowest mess detected.\n - Additionally, we measure coherence / probe for a language.\n\n**Wait a minute**, what is noise/mess and coherence according to **YOU ?**\n\n*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then\n**I established** some ground rules about **what is obvious** when **it seems like** a mess.\n I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to\n improve or rewrite it.\n\n*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought\nthat intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.\n\n## \u26a1 Known limitations\n\n - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))\n - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.\n\n## \u26a0\ufe0f About Python EOLs\n\n**If you are running:**\n\n- Python >=2.7,<3.5: Unsupported\n- Python 3.5: charset-normalizer < 2.1\n- Python 3.6: charset-normalizer < 3.1\n\nUpgrade your Python interpreter as soon as possible.\n\n## \ud83d\udc64 Contributing\n\nContributions, issues and feature requests are very much welcome.
\nFeel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.\n\n## \ud83d\udcdd License\n\nCopyright \u00a9 [Ahmed TAHRI @Ousret](https://github.com/Ousret).
\nThis project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.\n\nCharacters frequencies used in this project \u00a9 2012 [Denny Vrande\u010di\u0107](http://simia.net/letters/)\n\n## \ud83d\udcbc For Enterprise\n\nProfessional support for charset-normalizer is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme\n\n# Changelog\nAll notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).\n\n## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)\n\n### Changed\n- Typehint for function `from_path` no longer enforce `PathLike` as its first argument\n- Minor improvement over the global detection reliability\n\n### Added\n- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries\n- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)\n- Explicit support for Python 3.12\n\n### Fixed\n- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)\n\n## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)\n\n### Added\n- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)\n\n### Removed\n- Support for Python 3.6 (PR #260)\n\n### Changed\n- Optional speedup provided by mypy/c 1.0.1\n\n## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)\n\n### Fixed\n- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)\n\n### Changed\n- Speedup provided by mypy/c 0.990 on Python >= 3.7\n\n## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n- Sphinx warnings when generating the documentation\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n\n## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)\n\n### Added\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Removed\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n\n### Fixed\n- Sphinx warnings when generating the documentation\n\n## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)\n\n### Changed\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Removed\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)\n\n### Deprecated\n- Function `normalize` scheduled for removal in 3.0\n\n### Changed\n- Removed useless call to decode in fn is_unprintable (#206)\n\n### Fixed\n- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)\n\n## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)\n\n### Added\n- Output the Unicode table version when running the CLI with `--version` (PR #194)\n\n### Changed\n- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)\n- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)\n\n### Fixed\n- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)\n- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)\n\n### Removed\n- Support for Python 3.5 (PR #192)\n\n### Deprecated\n- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)\n\n## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)\n\n### Fixed\n- ASCII miss-detection on rare cases (PR #170) \n\n## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)\n\n### Added\n- Explicit support for Python 3.11 (PR #164)\n\n### Changed\n- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)\n\n## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)\n\n### Fixed\n- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)\n\n### Changed\n- Skipping the language-detection (CD) on ASCII (PR #155)\n\n## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)\n\n### Changed\n- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)\n\n### Fixed\n- Wrong logging level applied when setting kwarg `explain` to True (PR #146)\n\n## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)\n### Changed\n- Improvement over Vietnamese detection (PR #126)\n- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)\n- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)\n- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)\n- Code style as refactored by Sourcery-AI (PR #131) \n- Minor adjustment on the MD around european words (PR #133)\n- Remove and replace SRTs from assets / tests (PR #139)\n- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)\n\n### Fixed\n- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)\n- Avoid using too insignificant chunk (PR #137)\n\n### Added\n- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)\n\n## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)\n### Added\n- Add support for Kazakh (Cyrillic) language detection (PR #109)\n\n### Changed\n- Further, improve inferring the language from a given single-byte code page (PR #112)\n- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)\n- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)\n- Various detection improvement (MD+CD) (PR #117)\n\n### Removed\n- Remove redundant logging entry about detected language(s) (PR #115)\n\n### Fixed\n- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)\n\n## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)\n### Fixed\n- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)\n- Fix CLI crash when using --minimal output in certain cases (PR #103)\n\n### Changed\n- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)\n\n## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)\n### Changed\n- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)\n- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)\n- The Unicode detection is slightly improved (PR #93)\n- Add syntax sugar \\_\\_bool\\_\\_ for results CharsetMatches list-container (PR #91)\n\n### Removed\n- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)\n\n### Fixed\n- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)\n- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)\n- The MANIFEST.in was not exhaustive (PR #78)\n\n## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)\n### Fixed\n- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)\n- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)\n- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)\n- Submatch factoring could be wrong in rare edge cases (PR #72)\n- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)\n- Fix line endings from CRLF to LF for certain project files (PR #67)\n\n### Changed\n- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)\n- Allow fallback on specified encoding if any (PR #71)\n\n## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)\n### Changed\n- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)\n- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)\n\n## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)\n### Fixed\n- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) \n\n### Changed\n- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)\n\n## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)\n### Fixed\n- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)\n- Using explain=False permanently disable the verbose output in the current runtime (PR #47)\n- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)\n- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)\n\n### Changed\n- Public function normalize default args values were not aligned with from_bytes (PR #53)\n\n### Added\n- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)\n\n## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)\n### Changed\n- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.\n- Accent has been made on UTF-8 detection, should perform rather instantaneous.\n- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.\n- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)\n- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+\n- utf_7 detection has been reinstated.\n\n### Removed\n- This package no longer require anything when used with Python 3.5 (Dropped cached_property)\n- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volap\u00fck, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.\n- The exception hook on UnicodeDecodeError has been removed.\n\n### Deprecated\n- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0\n\n### Fixed\n- The CLI output used the relative path of the file(s). Should be absolute.\n\n## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)\n### Fixed\n- Logger configuration/usage no longer conflict with others (PR #44)\n\n## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)\n### Removed\n- Using standard logging instead of using the package loguru.\n- Dropping nose test framework in favor of the maintained pytest.\n- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.\n- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.\n- Stop support for UTF-7 that does not contain a SIG.\n- Dropping PrettyTable, replaced with pure JSON output in CLI.\n\n### Fixed\n- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.\n- Not searching properly for the BOM when trying utf32/16 parent codec.\n\n### Changed\n- Improving the package final size by compressing frequencies.json.\n- Huge improvement over the larges payload.\n\n### Added\n- CLI now produces JSON consumable output.\n- Return ASCII if given sequences fit. Given reasonable confidence.\n\n## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)\n\n### Fixed\n- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)\n\n## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)\n\n### Fixed\n- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)\n\n## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)\n\n### Fixed\n- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)\n\n## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)\n\n### Changed\n- Amend the previous release to allow prettytable 2.0 (PR #35)\n\n## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)\n\n### Fixed\n- Fix error while using the package with a python pre-release interpreter (PR #33)\n\n### Changed\n- Dependencies refactoring, constraints revised.\n\n### Added\n- Add python 3.9 and 3.10 to the supported interpreters\n\nMIT License\n\nCopyright (c) 2019 TAHRI Ahmed R.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.", - "release_date": "2023-07-07T20:19:07", + "description": "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.\n

Charset Detection, for Everyone \ud83d\udc4b

\n\n

\n The Real First Universal Charset Detector
\n \n \n \n \n \"Download\n \n \n \n \n

\n

\n Featured Packages
\n \n \"Static\n \n \n \"Static\n \n

\n

\n In other language (unofficial port - by the community)
\n \n \"Static\n \n

\n\n> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`,\n> I'm trying to resolve the issue by taking a new approach.\n> All IANA character set names for which the Python core library provides codecs are supported.\n\n

\n >>>>> \ud83d\udc49 Try Me Online Now, Then Adopt Me \ud83d\udc48 <<<<<\n

\n\nThis project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.\n\n| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |\n|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|\n| `Fast` | \u274c | \u2705 | \u2705 |\n| `Universal**` | \u274c | \u2705 | \u274c |\n| `Reliable` **without** distinguishable standards | \u274c | \u2705 | \u2705 |\n| `Reliable` **with** distinguishable standards | \u2705 | \u2705 | \u2705 |\n| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ |\n| `Native Python` | \u2705 | \u2705 | \u274c |\n| `Detect spoken language` | \u274c | \u2705 | N/A |\n| `UnicodeDecodeError Safety` | \u274c | \u2705 | \u274c |\n| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |\n| `Supported Encoding` | 33 | \ud83c\udf89 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |\n\n

\n\"Reading\"Cat\n

\n\n*\\*\\* : They are clearly using specific code for a specific encoding even if covering most of used one*
\nDid you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)\n\n## \u26a1 Performance\n\nThis package offer better performance than its counterpart Chardet. Here are some numbers.\n\n| Package | Accuracy | Mean per file (ms) | File per sec (est) |\n|-----------------------------------------------|:--------:|:------------------:|:------------------:|\n| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |\n| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |\n\n| Package | 99th percentile | 95th percentile | 50th percentile |\n|-----------------------------------------------|:---------------:|:---------------:|:---------------:|\n| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |\n| charset-normalizer | 100 ms | 50 ms | 5 ms |\n\nChardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.\n\n> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.\n> And yes, these results might change at any time. The dataset can be updated to include more files.\n> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.\n> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability\n> (eg. Supported Encoding) Challenge-them if you want.\n\n## \u2728 Installation\n\nUsing pip:\n\n```sh\npip install charset-normalizer -U\n```\n\n## \ud83d\ude80 Basic Usage\n\n### CLI\nThis package comes with a CLI.\n\n```\nusage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]\n file [file ...]\n\nThe Real First Universal Charset Detector. Discover originating encoding used\non text file. Normalize text to unicode.\n\npositional arguments:\n files File(s) to be analysed\n\noptional arguments:\n -h, --help show this help message and exit\n -v, --verbose Display complementary information about file if any.\n Stdout will contain logs about the detection process.\n -a, --with-alternative\n Output complementary possibilities if any. Top-level\n JSON WILL be a list.\n -n, --normalize Permit to normalize input file. If not set, program\n does not write anything.\n -m, --minimal Only output the charset detected to STDOUT. Disabling\n JSON output.\n -r, --replace Replace file when trying to normalize it instead of\n creating a new one.\n -f, --force Replace file without asking if you are sure, use this\n flag with caution.\n -t THRESHOLD, --threshold THRESHOLD\n Define a custom maximum amount of chaos allowed in\n decoded content. 0. <= chaos <= 1.\n --version Show version information and exit.\n```\n\n```bash\nnormalizer ./data/sample.1.fr.srt\n```\n\nor\n\n```bash\npython -m charset_normalizer ./data/sample.1.fr.srt\n```\n\n\ud83c\udf89 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.\n\n```json\n{\n \"path\": \"/home/default/projects/charset_normalizer/data/sample.1.fr.srt\",\n \"encoding\": \"cp1252\",\n \"encoding_aliases\": [\n \"1252\",\n \"windows_1252\"\n ],\n \"alternative_encodings\": [\n \"cp1254\",\n \"cp1256\",\n \"cp1258\",\n \"iso8859_14\",\n \"iso8859_15\",\n \"iso8859_16\",\n \"iso8859_3\",\n \"iso8859_9\",\n \"latin_1\",\n \"mbcs\"\n ],\n \"language\": \"French\",\n \"alphabets\": [\n \"Basic Latin\",\n \"Latin-1 Supplement\"\n ],\n \"has_sig_or_bom\": false,\n \"chaos\": 0.149,\n \"coherence\": 97.152,\n \"unicode_path\": null,\n \"is_preferred\": true\n}\n```\n\n### Python\n*Just print out normalized text*\n```python\nfrom charset_normalizer import from_path\n\nresults = from_path('./my_subtitle.srt')\n\nprint(str(results.best()))\n```\n\n*Upgrade your code without effort*\n```python\nfrom charset_normalizer import detect\n```\n\nThe above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.\n\nSee the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)\n\n## \ud83d\ude07 Why\n\nWhen I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a\nreliable alternative using a completely different method. Also! I never back down on a good challenge!\n\nI **don't care** about the **originating charset** encoding, because **two different tables** can\nproduce **two identical rendered string.**\nWhat I want is to get readable text, the best I can. \n\nIn a way, **I'm brute forcing text decoding.** How cool is that ? \ud83d\ude0e\n\nDon't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.\n\n## \ud83c\udf70 How\n\n - Discard all charset encoding table that could not fit the binary content.\n - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.\n - Extract matches with the lowest mess detected.\n - Additionally, we measure coherence / probe for a language.\n\n**Wait a minute**, what is noise/mess and coherence according to **YOU ?**\n\n*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then\n**I established** some ground rules about **what is obvious** when **it seems like** a mess.\n I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to\n improve or rewrite it.\n\n*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought\nthat intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.\n\n## \u26a1 Known limitations\n\n - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))\n - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.\n\n## \u26a0\ufe0f About Python EOLs\n\n**If you are running:**\n\n- Python >=2.7,<3.5: Unsupported\n- Python 3.5: charset-normalizer < 2.1\n- Python 3.6: charset-normalizer < 3.1\n- Python 3.7: charset-normalizer < 4.0\n\nUpgrade your Python interpreter as soon as possible.\n\n## \ud83d\udc64 Contributing\n\nContributions, issues and feature requests are very much welcome.
\nFeel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.\n\n## \ud83d\udcdd License\n\nCopyright \u00a9 [Ahmed TAHRI @Ousret](https://github.com/Ousret).
\nThis project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.\n\nCharacters frequencies used in this project \u00a9 2012 [Denny Vrande\u010di\u0107](http://simia.net/letters/)\n\n## \ud83d\udcbc For Enterprise\n\nProfessional support for charset-normalizer is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme\n\n# Changelog\nAll notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).\n\n## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)\n\n### Added\n- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`\n- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)\n\n### Removed\n- (internal) Redundant utils.is_ascii function and unused function is_private_use_only\n- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant\n\n### Changed\n- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection\n- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.7\n\n### Fixed\n- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \\_\\_lt\\_\\_ (#350)\n\n## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)\n\n### Changed\n- Typehint for function `from_path` no longer enforce `PathLike` as its first argument\n- Minor improvement over the global detection reliability\n\n### Added\n- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries\n- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)\n- Explicit support for Python 3.12\n\n### Fixed\n- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)\n\n## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)\n\n### Added\n- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)\n\n### Removed\n- Support for Python 3.6 (PR #260)\n\n### Changed\n- Optional speedup provided by mypy/c 1.0.1\n\n## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)\n\n### Fixed\n- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)\n\n### Changed\n- Speedup provided by mypy/c 0.990 on Python >= 3.7\n\n## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n- Sphinx warnings when generating the documentation\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n\n## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)\n\n### Added\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Removed\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n\n### Fixed\n- Sphinx warnings when generating the documentation\n\n## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)\n\n### Changed\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Removed\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)\n\n### Deprecated\n- Function `normalize` scheduled for removal in 3.0\n\n### Changed\n- Removed useless call to decode in fn is_unprintable (#206)\n\n### Fixed\n- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)\n\n## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)\n\n### Added\n- Output the Unicode table version when running the CLI with `--version` (PR #194)\n\n### Changed\n- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)\n- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)\n\n### Fixed\n- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)\n- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)\n\n### Removed\n- Support for Python 3.5 (PR #192)\n\n### Deprecated\n- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)\n\n## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)\n\n### Fixed\n- ASCII miss-detection on rare cases (PR #170) \n\n## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)\n\n### Added\n- Explicit support for Python 3.11 (PR #164)\n\n### Changed\n- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)\n\n## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)\n\n### Fixed\n- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)\n\n### Changed\n- Skipping the language-detection (CD) on ASCII (PR #155)\n\n## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)\n\n### Changed\n- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)\n\n### Fixed\n- Wrong logging level applied when setting kwarg `explain` to True (PR #146)\n\n## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)\n### Changed\n- Improvement over Vietnamese detection (PR #126)\n- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)\n- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)\n- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)\n- Code style as refactored by Sourcery-AI (PR #131) \n- Minor adjustment on the MD around european words (PR #133)\n- Remove and replace SRTs from assets / tests (PR #139)\n- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)\n\n### Fixed\n- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)\n- Avoid using too insignificant chunk (PR #137)\n\n### Added\n- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)\n\n## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)\n### Added\n- Add support for Kazakh (Cyrillic) language detection (PR #109)\n\n### Changed\n- Further, improve inferring the language from a given single-byte code page (PR #112)\n- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)\n- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)\n- Various detection improvement (MD+CD) (PR #117)\n\n### Removed\n- Remove redundant logging entry about detected language(s) (PR #115)\n\n### Fixed\n- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)\n\n## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)\n### Fixed\n- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)\n- Fix CLI crash when using --minimal output in certain cases (PR #103)\n\n### Changed\n- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)\n\n## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)\n### Changed\n- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)\n- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)\n- The Unicode detection is slightly improved (PR #93)\n- Add syntax sugar \\_\\_bool\\_\\_ for results CharsetMatches list-container (PR #91)\n\n### Removed\n- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)\n\n### Fixed\n- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)\n- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)\n- The MANIFEST.in was not exhaustive (PR #78)\n\n## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)\n### Fixed\n- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)\n- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)\n- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)\n- Submatch factoring could be wrong in rare edge cases (PR #72)\n- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)\n- Fix line endings from CRLF to LF for certain project files (PR #67)\n\n### Changed\n- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)\n- Allow fallback on specified encoding if any (PR #71)\n\n## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)\n### Changed\n- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)\n- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)\n\n## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)\n### Fixed\n- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) \n\n### Changed\n- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)\n\n## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)\n### Fixed\n- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)\n- Using explain=False permanently disable the verbose output in the current runtime (PR #47)\n- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)\n- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)\n\n### Changed\n- Public function normalize default args values were not aligned with from_bytes (PR #53)\n\n### Added\n- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)\n\n## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)\n### Changed\n- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.\n- Accent has been made on UTF-8 detection, should perform rather instantaneous.\n- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.\n- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)\n- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+\n- utf_7 detection has been reinstated.\n\n### Removed\n- This package no longer require anything when used with Python 3.5 (Dropped cached_property)\n- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volap\u00fck, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.\n- The exception hook on UnicodeDecodeError has been removed.\n\n### Deprecated\n- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0\n\n### Fixed\n- The CLI output used the relative path of the file(s). Should be absolute.\n\n## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)\n### Fixed\n- Logger configuration/usage no longer conflict with others (PR #44)\n\n## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)\n### Removed\n- Using standard logging instead of using the package loguru.\n- Dropping nose test framework in favor of the maintained pytest.\n- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.\n- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.\n- Stop support for UTF-7 that does not contain a SIG.\n- Dropping PrettyTable, replaced with pure JSON output in CLI.\n\n### Fixed\n- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.\n- Not searching properly for the BOM when trying utf32/16 parent codec.\n\n### Changed\n- Improving the package final size by compressing frequencies.json.\n- Huge improvement over the larges payload.\n\n### Added\n- CLI now produces JSON consumable output.\n- Return ASCII if given sequences fit. Given reasonable confidence.\n\n## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)\n\n### Fixed\n- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)\n\n## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)\n\n### Fixed\n- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)\n\n## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)\n\n### Fixed\n- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)\n\n## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)\n\n### Changed\n- Amend the previous release to allow prettytable 2.0 (PR #35)\n\n## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)\n\n### Fixed\n- Fix error while using the package with a python pre-release interpreter (PR #33)\n\n### Changed\n- Dependencies refactoring, constraints revised.\n\n### Added\n- Add python 3.9 and 3.10 to the supported interpreters\n\nMIT License\n\nCopyright (c) 2019 TAHRI Ahmed R.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.", + "release_date": "2023-09-30T09:12:42", "parties": [ { "type": "person", @@ -791,11 +787,11 @@ "Typing :: Typed" ], "homepage_url": "https://github.com/Ousret/charset_normalizer", - "download_url": "https://files.pythonhosted.org/packages/bf/a0/188f223c7d8b924fb9b554b9d27e0e7506fd5bf9cfb6dbacb2dfd5832b53/charset_normalizer-3.2.0-py3-none-any.whl", - "size": 46668, + "download_url": "https://files.pythonhosted.org/packages/a3/dc/efab5b27839f04be4b8058c1eb85b7ab7dbc55ef8067250bea0518392756/charset_normalizer-3.3.0-py3-none-any.whl", + "size": 48155, "sha1": null, - "md5": "24d43cfdd131f63edf655456709eb904", - "sha256": "8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6", + "md5": "b8d8dcceed572fa80a875f6e8a4f5916", + "sha256": "e46cd37076971c1040fc8c41273a8b3e2c624ce4f2be3f5dfcb7a430c1d3acc2", "sha512": null, "bug_tracking_url": null, "code_view_url": null, @@ -815,20 +811,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/charset-normalizer/3.2.0/json", + "api_data_url": "https://pypi.org/pypi/charset-normalizer/3.3.0/json", "datasource_id": null, - "purl": "pkg:pypi/charset-normalizer@3.2.0" + "purl": "pkg:pypi/charset-normalizer@3.3.0" }, { "type": "pypi", "namespace": null, "name": "charset-normalizer", - "version": "3.2.0", + "version": "3.3.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", - "description": "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.\n

Charset Detection, for Everyone \ud83d\udc4b

\n\n

\n The Real First Universal Charset Detector
\n \n \n \n \n \"Download\n \n \n \n \n

\n\n> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`,\n> I'm trying to resolve the issue by taking a new approach.\n> All IANA character set names for which the Python core library provides codecs are supported.\n\n

\n >>>>> \ud83d\udc49 Try Me Online Now, Then Adopt Me \ud83d\udc48 <<<<<\n

\n\nThis project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.\n\n| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |\n|--------------------------------------------------|:---------------------------------------------:|:------------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|\n| `Fast` | \u274c
| \u2705
| \u2705
|\n| `Universal**` | \u274c | \u2705 | \u274c |\n| `Reliable` **without** distinguishable standards | \u274c | \u2705 | \u2705 |\n| `Reliable` **with** distinguishable standards | \u2705 | \u2705 | \u2705 |\n| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ |\n| `Native Python` | \u2705 | \u2705 | \u274c |\n| `Detect spoken language` | \u274c | \u2705 | N/A |\n| `UnicodeDecodeError Safety` | \u274c | \u2705 | \u274c |\n| `Whl Size` | 193.6 kB | 40 kB | ~200 kB |\n| `Supported Encoding` | 33 | \ud83c\udf89 [90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |\n\n

\n\"Reading\"Cat\n\n*\\*\\* : They are clearly using specific code for a specific encoding even if covering most of used one*
\nDid you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)\n\n## \u26a1 Performance\n\nThis package offer better performance than its counterpart Chardet. Here are some numbers.\n\n| Package | Accuracy | Mean per file (ms) | File per sec (est) |\n|-----------------------------------------------|:--------:|:------------------:|:------------------:|\n| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |\n| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |\n\n| Package | 99th percentile | 95th percentile | 50th percentile |\n|-----------------------------------------------|:---------------:|:---------------:|:---------------:|\n| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |\n| charset-normalizer | 100 ms | 50 ms | 5 ms |\n\nChardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.\n\n> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.\n> And yes, these results might change at any time. The dataset can be updated to include more files.\n> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.\n> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability\n> (eg. Supported Encoding) Challenge-them if you want.\n\n## \u2728 Installation\n\nUsing pip:\n\n```sh\npip install charset-normalizer -U\n```\n\n## \ud83d\ude80 Basic Usage\n\n### CLI\nThis package comes with a CLI.\n\n```\nusage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]\n file [file ...]\n\nThe Real First Universal Charset Detector. Discover originating encoding used\non text file. Normalize text to unicode.\n\npositional arguments:\n files File(s) to be analysed\n\noptional arguments:\n -h, --help show this help message and exit\n -v, --verbose Display complementary information about file if any.\n Stdout will contain logs about the detection process.\n -a, --with-alternative\n Output complementary possibilities if any. Top-level\n JSON WILL be a list.\n -n, --normalize Permit to normalize input file. If not set, program\n does not write anything.\n -m, --minimal Only output the charset detected to STDOUT. Disabling\n JSON output.\n -r, --replace Replace file when trying to normalize it instead of\n creating a new one.\n -f, --force Replace file without asking if you are sure, use this\n flag with caution.\n -t THRESHOLD, --threshold THRESHOLD\n Define a custom maximum amount of chaos allowed in\n decoded content. 0. <= chaos <= 1.\n --version Show version information and exit.\n```\n\n```bash\nnormalizer ./data/sample.1.fr.srt\n```\n\n\ud83c\udf89 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.\n\n```json\n{\n \"path\": \"/home/default/projects/charset_normalizer/data/sample.1.fr.srt\",\n \"encoding\": \"cp1252\",\n \"encoding_aliases\": [\n \"1252\",\n \"windows_1252\"\n ],\n \"alternative_encodings\": [\n \"cp1254\",\n \"cp1256\",\n \"cp1258\",\n \"iso8859_14\",\n \"iso8859_15\",\n \"iso8859_16\",\n \"iso8859_3\",\n \"iso8859_9\",\n \"latin_1\",\n \"mbcs\"\n ],\n \"language\": \"French\",\n \"alphabets\": [\n \"Basic Latin\",\n \"Latin-1 Supplement\"\n ],\n \"has_sig_or_bom\": false,\n \"chaos\": 0.149,\n \"coherence\": 97.152,\n \"unicode_path\": null,\n \"is_preferred\": true\n}\n```\n\n### Python\n*Just print out normalized text*\n```python\nfrom charset_normalizer import from_path\n\nresults = from_path('./my_subtitle.srt')\n\nprint(str(results.best()))\n```\n\n*Upgrade your code without effort*\n```python\nfrom charset_normalizer import detect\n```\n\nThe above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.\n\nSee the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)\n\n## \ud83d\ude07 Why\n\nWhen I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a\nreliable alternative using a completely different method. Also! I never back down on a good challenge!\n\nI **don't care** about the **originating charset** encoding, because **two different tables** can\nproduce **two identical rendered string.**\nWhat I want is to get readable text, the best I can. \n\nIn a way, **I'm brute forcing text decoding.** How cool is that ? \ud83d\ude0e\n\nDon't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.\n\n## \ud83c\udf70 How\n\n - Discard all charset encoding table that could not fit the binary content.\n - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.\n - Extract matches with the lowest mess detected.\n - Additionally, we measure coherence / probe for a language.\n\n**Wait a minute**, what is noise/mess and coherence according to **YOU ?**\n\n*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then\n**I established** some ground rules about **what is obvious** when **it seems like** a mess.\n I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to\n improve or rewrite it.\n\n*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought\nthat intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.\n\n## \u26a1 Known limitations\n\n - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))\n - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.\n\n## \u26a0\ufe0f About Python EOLs\n\n**If you are running:**\n\n- Python >=2.7,<3.5: Unsupported\n- Python 3.5: charset-normalizer < 2.1\n- Python 3.6: charset-normalizer < 3.1\n\nUpgrade your Python interpreter as soon as possible.\n\n## \ud83d\udc64 Contributing\n\nContributions, issues and feature requests are very much welcome.
\nFeel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.\n\n## \ud83d\udcdd License\n\nCopyright \u00a9 [Ahmed TAHRI @Ousret](https://github.com/Ousret).
\nThis project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.\n\nCharacters frequencies used in this project \u00a9 2012 [Denny Vrande\u010di\u0107](http://simia.net/letters/)\n\n## \ud83d\udcbc For Enterprise\n\nProfessional support for charset-normalizer is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme\n\n# Changelog\nAll notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).\n\n## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)\n\n### Changed\n- Typehint for function `from_path` no longer enforce `PathLike` as its first argument\n- Minor improvement over the global detection reliability\n\n### Added\n- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries\n- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)\n- Explicit support for Python 3.12\n\n### Fixed\n- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)\n\n## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)\n\n### Added\n- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)\n\n### Removed\n- Support for Python 3.6 (PR #260)\n\n### Changed\n- Optional speedup provided by mypy/c 1.0.1\n\n## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)\n\n### Fixed\n- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)\n\n### Changed\n- Speedup provided by mypy/c 0.990 on Python >= 3.7\n\n## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n- Sphinx warnings when generating the documentation\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n\n## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)\n\n### Added\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Removed\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n\n### Fixed\n- Sphinx warnings when generating the documentation\n\n## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)\n\n### Changed\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Removed\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)\n\n### Deprecated\n- Function `normalize` scheduled for removal in 3.0\n\n### Changed\n- Removed useless call to decode in fn is_unprintable (#206)\n\n### Fixed\n- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)\n\n## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)\n\n### Added\n- Output the Unicode table version when running the CLI with `--version` (PR #194)\n\n### Changed\n- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)\n- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)\n\n### Fixed\n- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)\n- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)\n\n### Removed\n- Support for Python 3.5 (PR #192)\n\n### Deprecated\n- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)\n\n## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)\n\n### Fixed\n- ASCII miss-detection on rare cases (PR #170) \n\n## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)\n\n### Added\n- Explicit support for Python 3.11 (PR #164)\n\n### Changed\n- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)\n\n## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)\n\n### Fixed\n- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)\n\n### Changed\n- Skipping the language-detection (CD) on ASCII (PR #155)\n\n## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)\n\n### Changed\n- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)\n\n### Fixed\n- Wrong logging level applied when setting kwarg `explain` to True (PR #146)\n\n## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)\n### Changed\n- Improvement over Vietnamese detection (PR #126)\n- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)\n- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)\n- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)\n- Code style as refactored by Sourcery-AI (PR #131) \n- Minor adjustment on the MD around european words (PR #133)\n- Remove and replace SRTs from assets / tests (PR #139)\n- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)\n\n### Fixed\n- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)\n- Avoid using too insignificant chunk (PR #137)\n\n### Added\n- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)\n\n## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)\n### Added\n- Add support for Kazakh (Cyrillic) language detection (PR #109)\n\n### Changed\n- Further, improve inferring the language from a given single-byte code page (PR #112)\n- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)\n- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)\n- Various detection improvement (MD+CD) (PR #117)\n\n### Removed\n- Remove redundant logging entry about detected language(s) (PR #115)\n\n### Fixed\n- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)\n\n## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)\n### Fixed\n- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)\n- Fix CLI crash when using --minimal output in certain cases (PR #103)\n\n### Changed\n- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)\n\n## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)\n### Changed\n- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)\n- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)\n- The Unicode detection is slightly improved (PR #93)\n- Add syntax sugar \\_\\_bool\\_\\_ for results CharsetMatches list-container (PR #91)\n\n### Removed\n- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)\n\n### Fixed\n- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)\n- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)\n- The MANIFEST.in was not exhaustive (PR #78)\n\n## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)\n### Fixed\n- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)\n- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)\n- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)\n- Submatch factoring could be wrong in rare edge cases (PR #72)\n- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)\n- Fix line endings from CRLF to LF for certain project files (PR #67)\n\n### Changed\n- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)\n- Allow fallback on specified encoding if any (PR #71)\n\n## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)\n### Changed\n- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)\n- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)\n\n## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)\n### Fixed\n- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) \n\n### Changed\n- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)\n\n## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)\n### Fixed\n- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)\n- Using explain=False permanently disable the verbose output in the current runtime (PR #47)\n- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)\n- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)\n\n### Changed\n- Public function normalize default args values were not aligned with from_bytes (PR #53)\n\n### Added\n- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)\n\n## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)\n### Changed\n- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.\n- Accent has been made on UTF-8 detection, should perform rather instantaneous.\n- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.\n- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)\n- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+\n- utf_7 detection has been reinstated.\n\n### Removed\n- This package no longer require anything when used with Python 3.5 (Dropped cached_property)\n- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volap\u00fck, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.\n- The exception hook on UnicodeDecodeError has been removed.\n\n### Deprecated\n- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0\n\n### Fixed\n- The CLI output used the relative path of the file(s). Should be absolute.\n\n## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)\n### Fixed\n- Logger configuration/usage no longer conflict with others (PR #44)\n\n## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)\n### Removed\n- Using standard logging instead of using the package loguru.\n- Dropping nose test framework in favor of the maintained pytest.\n- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.\n- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.\n- Stop support for UTF-7 that does not contain a SIG.\n- Dropping PrettyTable, replaced with pure JSON output in CLI.\n\n### Fixed\n- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.\n- Not searching properly for the BOM when trying utf32/16 parent codec.\n\n### Changed\n- Improving the package final size by compressing frequencies.json.\n- Huge improvement over the larges payload.\n\n### Added\n- CLI now produces JSON consumable output.\n- Return ASCII if given sequences fit. Given reasonable confidence.\n\n## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)\n\n### Fixed\n- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)\n\n## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)\n\n### Fixed\n- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)\n\n## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)\n\n### Fixed\n- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)\n\n## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)\n\n### Changed\n- Amend the previous release to allow prettytable 2.0 (PR #35)\n\n## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)\n\n### Fixed\n- Fix error while using the package with a python pre-release interpreter (PR #33)\n\n### Changed\n- Dependencies refactoring, constraints revised.\n\n### Added\n- Add python 3.9 and 3.10 to the supported interpreters\n\nMIT License\n\nCopyright (c) 2019 TAHRI Ahmed R.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.", - "release_date": "2023-07-07T20:19:09", + "description": "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.\n

Charset Detection, for Everyone \ud83d\udc4b

\n\n

\n The Real First Universal Charset Detector
\n \n \n \n \n \"Download\n \n \n \n \n

\n

\n Featured Packages
\n \n \"Static\n \n \n \"Static\n \n

\n

\n In other language (unofficial port - by the community)
\n \n \"Static\n \n

\n\n> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`,\n> I'm trying to resolve the issue by taking a new approach.\n> All IANA character set names for which the Python core library provides codecs are supported.\n\n

\n >>>>> \ud83d\udc49 Try Me Online Now, Then Adopt Me \ud83d\udc48 <<<<<\n

\n\nThis project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.\n\n| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |\n|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|\n| `Fast` | \u274c | \u2705 | \u2705 |\n| `Universal**` | \u274c | \u2705 | \u274c |\n| `Reliable` **without** distinguishable standards | \u274c | \u2705 | \u2705 |\n| `Reliable` **with** distinguishable standards | \u2705 | \u2705 | \u2705 |\n| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ |\n| `Native Python` | \u2705 | \u2705 | \u274c |\n| `Detect spoken language` | \u274c | \u2705 | N/A |\n| `UnicodeDecodeError Safety` | \u274c | \u2705 | \u274c |\n| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |\n| `Supported Encoding` | 33 | \ud83c\udf89 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |\n\n

\n\"Reading\"Cat\n

\n\n*\\*\\* : They are clearly using specific code for a specific encoding even if covering most of used one*
\nDid you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)\n\n## \u26a1 Performance\n\nThis package offer better performance than its counterpart Chardet. Here are some numbers.\n\n| Package | Accuracy | Mean per file (ms) | File per sec (est) |\n|-----------------------------------------------|:--------:|:------------------:|:------------------:|\n| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |\n| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |\n\n| Package | 99th percentile | 95th percentile | 50th percentile |\n|-----------------------------------------------|:---------------:|:---------------:|:---------------:|\n| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |\n| charset-normalizer | 100 ms | 50 ms | 5 ms |\n\nChardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.\n\n> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.\n> And yes, these results might change at any time. The dataset can be updated to include more files.\n> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.\n> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability\n> (eg. Supported Encoding) Challenge-them if you want.\n\n## \u2728 Installation\n\nUsing pip:\n\n```sh\npip install charset-normalizer -U\n```\n\n## \ud83d\ude80 Basic Usage\n\n### CLI\nThis package comes with a CLI.\n\n```\nusage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]\n file [file ...]\n\nThe Real First Universal Charset Detector. Discover originating encoding used\non text file. Normalize text to unicode.\n\npositional arguments:\n files File(s) to be analysed\n\noptional arguments:\n -h, --help show this help message and exit\n -v, --verbose Display complementary information about file if any.\n Stdout will contain logs about the detection process.\n -a, --with-alternative\n Output complementary possibilities if any. Top-level\n JSON WILL be a list.\n -n, --normalize Permit to normalize input file. If not set, program\n does not write anything.\n -m, --minimal Only output the charset detected to STDOUT. Disabling\n JSON output.\n -r, --replace Replace file when trying to normalize it instead of\n creating a new one.\n -f, --force Replace file without asking if you are sure, use this\n flag with caution.\n -t THRESHOLD, --threshold THRESHOLD\n Define a custom maximum amount of chaos allowed in\n decoded content. 0. <= chaos <= 1.\n --version Show version information and exit.\n```\n\n```bash\nnormalizer ./data/sample.1.fr.srt\n```\n\nor\n\n```bash\npython -m charset_normalizer ./data/sample.1.fr.srt\n```\n\n\ud83c\udf89 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.\n\n```json\n{\n \"path\": \"/home/default/projects/charset_normalizer/data/sample.1.fr.srt\",\n \"encoding\": \"cp1252\",\n \"encoding_aliases\": [\n \"1252\",\n \"windows_1252\"\n ],\n \"alternative_encodings\": [\n \"cp1254\",\n \"cp1256\",\n \"cp1258\",\n \"iso8859_14\",\n \"iso8859_15\",\n \"iso8859_16\",\n \"iso8859_3\",\n \"iso8859_9\",\n \"latin_1\",\n \"mbcs\"\n ],\n \"language\": \"French\",\n \"alphabets\": [\n \"Basic Latin\",\n \"Latin-1 Supplement\"\n ],\n \"has_sig_or_bom\": false,\n \"chaos\": 0.149,\n \"coherence\": 97.152,\n \"unicode_path\": null,\n \"is_preferred\": true\n}\n```\n\n### Python\n*Just print out normalized text*\n```python\nfrom charset_normalizer import from_path\n\nresults = from_path('./my_subtitle.srt')\n\nprint(str(results.best()))\n```\n\n*Upgrade your code without effort*\n```python\nfrom charset_normalizer import detect\n```\n\nThe above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.\n\nSee the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)\n\n## \ud83d\ude07 Why\n\nWhen I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a\nreliable alternative using a completely different method. Also! I never back down on a good challenge!\n\nI **don't care** about the **originating charset** encoding, because **two different tables** can\nproduce **two identical rendered string.**\nWhat I want is to get readable text, the best I can. \n\nIn a way, **I'm brute forcing text decoding.** How cool is that ? \ud83d\ude0e\n\nDon't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.\n\n## \ud83c\udf70 How\n\n - Discard all charset encoding table that could not fit the binary content.\n - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.\n - Extract matches with the lowest mess detected.\n - Additionally, we measure coherence / probe for a language.\n\n**Wait a minute**, what is noise/mess and coherence according to **YOU ?**\n\n*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then\n**I established** some ground rules about **what is obvious** when **it seems like** a mess.\n I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to\n improve or rewrite it.\n\n*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought\nthat intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.\n\n## \u26a1 Known limitations\n\n - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))\n - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.\n\n## \u26a0\ufe0f About Python EOLs\n\n**If you are running:**\n\n- Python >=2.7,<3.5: Unsupported\n- Python 3.5: charset-normalizer < 2.1\n- Python 3.6: charset-normalizer < 3.1\n- Python 3.7: charset-normalizer < 4.0\n\nUpgrade your Python interpreter as soon as possible.\n\n## \ud83d\udc64 Contributing\n\nContributions, issues and feature requests are very much welcome.
\nFeel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.\n\n## \ud83d\udcdd License\n\nCopyright \u00a9 [Ahmed TAHRI @Ousret](https://github.com/Ousret).
\nThis project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.\n\nCharacters frequencies used in this project \u00a9 2012 [Denny Vrande\u010di\u0107](http://simia.net/letters/)\n\n## \ud83d\udcbc For Enterprise\n\nProfessional support for charset-normalizer is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme\n\n# Changelog\nAll notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).\n\n## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)\n\n### Added\n- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`\n- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)\n\n### Removed\n- (internal) Redundant utils.is_ascii function and unused function is_private_use_only\n- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant\n\n### Changed\n- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection\n- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.7\n\n### Fixed\n- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \\_\\_lt\\_\\_ (#350)\n\n## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)\n\n### Changed\n- Typehint for function `from_path` no longer enforce `PathLike` as its first argument\n- Minor improvement over the global detection reliability\n\n### Added\n- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries\n- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)\n- Explicit support for Python 3.12\n\n### Fixed\n- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)\n\n## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)\n\n### Added\n- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)\n\n### Removed\n- Support for Python 3.6 (PR #260)\n\n### Changed\n- Optional speedup provided by mypy/c 1.0.1\n\n## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)\n\n### Fixed\n- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)\n\n### Changed\n- Speedup provided by mypy/c 0.990 on Python >= 3.7\n\n## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n- Sphinx warnings when generating the documentation\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n\n## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)\n\n### Added\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Removed\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n\n### Fixed\n- Sphinx warnings when generating the documentation\n\n## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)\n\n### Changed\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Removed\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)\n\n### Deprecated\n- Function `normalize` scheduled for removal in 3.0\n\n### Changed\n- Removed useless call to decode in fn is_unprintable (#206)\n\n### Fixed\n- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)\n\n## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)\n\n### Added\n- Output the Unicode table version when running the CLI with `--version` (PR #194)\n\n### Changed\n- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)\n- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)\n\n### Fixed\n- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)\n- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)\n\n### Removed\n- Support for Python 3.5 (PR #192)\n\n### Deprecated\n- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)\n\n## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)\n\n### Fixed\n- ASCII miss-detection on rare cases (PR #170) \n\n## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)\n\n### Added\n- Explicit support for Python 3.11 (PR #164)\n\n### Changed\n- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)\n\n## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)\n\n### Fixed\n- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)\n\n### Changed\n- Skipping the language-detection (CD) on ASCII (PR #155)\n\n## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)\n\n### Changed\n- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)\n\n### Fixed\n- Wrong logging level applied when setting kwarg `explain` to True (PR #146)\n\n## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)\n### Changed\n- Improvement over Vietnamese detection (PR #126)\n- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)\n- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)\n- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)\n- Code style as refactored by Sourcery-AI (PR #131) \n- Minor adjustment on the MD around european words (PR #133)\n- Remove and replace SRTs from assets / tests (PR #139)\n- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)\n\n### Fixed\n- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)\n- Avoid using too insignificant chunk (PR #137)\n\n### Added\n- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)\n\n## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)\n### Added\n- Add support for Kazakh (Cyrillic) language detection (PR #109)\n\n### Changed\n- Further, improve inferring the language from a given single-byte code page (PR #112)\n- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)\n- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)\n- Various detection improvement (MD+CD) (PR #117)\n\n### Removed\n- Remove redundant logging entry about detected language(s) (PR #115)\n\n### Fixed\n- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)\n\n## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)\n### Fixed\n- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)\n- Fix CLI crash when using --minimal output in certain cases (PR #103)\n\n### Changed\n- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)\n\n## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)\n### Changed\n- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)\n- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)\n- The Unicode detection is slightly improved (PR #93)\n- Add syntax sugar \\_\\_bool\\_\\_ for results CharsetMatches list-container (PR #91)\n\n### Removed\n- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)\n\n### Fixed\n- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)\n- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)\n- The MANIFEST.in was not exhaustive (PR #78)\n\n## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)\n### Fixed\n- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)\n- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)\n- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)\n- Submatch factoring could be wrong in rare edge cases (PR #72)\n- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)\n- Fix line endings from CRLF to LF for certain project files (PR #67)\n\n### Changed\n- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)\n- Allow fallback on specified encoding if any (PR #71)\n\n## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)\n### Changed\n- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)\n- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)\n\n## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)\n### Fixed\n- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) \n\n### Changed\n- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)\n\n## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)\n### Fixed\n- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)\n- Using explain=False permanently disable the verbose output in the current runtime (PR #47)\n- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)\n- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)\n\n### Changed\n- Public function normalize default args values were not aligned with from_bytes (PR #53)\n\n### Added\n- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)\n\n## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)\n### Changed\n- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.\n- Accent has been made on UTF-8 detection, should perform rather instantaneous.\n- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.\n- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)\n- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+\n- utf_7 detection has been reinstated.\n\n### Removed\n- This package no longer require anything when used with Python 3.5 (Dropped cached_property)\n- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volap\u00fck, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.\n- The exception hook on UnicodeDecodeError has been removed.\n\n### Deprecated\n- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0\n\n### Fixed\n- The CLI output used the relative path of the file(s). Should be absolute.\n\n## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)\n### Fixed\n- Logger configuration/usage no longer conflict with others (PR #44)\n\n## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)\n### Removed\n- Using standard logging instead of using the package loguru.\n- Dropping nose test framework in favor of the maintained pytest.\n- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.\n- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.\n- Stop support for UTF-7 that does not contain a SIG.\n- Dropping PrettyTable, replaced with pure JSON output in CLI.\n\n### Fixed\n- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.\n- Not searching properly for the BOM when trying utf32/16 parent codec.\n\n### Changed\n- Improving the package final size by compressing frequencies.json.\n- Huge improvement over the larges payload.\n\n### Added\n- CLI now produces JSON consumable output.\n- Return ASCII if given sequences fit. Given reasonable confidence.\n\n## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)\n\n### Fixed\n- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)\n\n## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)\n\n### Fixed\n- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)\n\n## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)\n\n### Fixed\n- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)\n\n## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)\n\n### Changed\n- Amend the previous release to allow prettytable 2.0 (PR #35)\n\n## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)\n\n### Fixed\n- Fix error while using the package with a python pre-release interpreter (PR #33)\n\n### Changed\n- Dependencies refactoring, constraints revised.\n\n### Added\n- Add python 3.9 and 3.10 to the supported interpreters\n\nMIT License\n\nCopyright (c) 2019 TAHRI Ahmed R.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.", + "release_date": "2023-09-30T09:12:43", "parties": [ { "type": "person", @@ -865,11 +861,11 @@ "Typing :: Typed" ], "homepage_url": "https://github.com/Ousret/charset_normalizer", - "download_url": "https://files.pythonhosted.org/packages/2a/53/cf0a48de1bdcf6ff6e1c9a023f5f523dfe303e4024f216feac64b6eb7f67/charset-normalizer-3.2.0.tar.gz", - "size": 97063, + "download_url": "https://files.pythonhosted.org/packages/cf/ac/e89b2f2f75f51e9859979b56d2ec162f7f893221975d244d8d5277aa9489/charset-normalizer-3.3.0.tar.gz", + "size": 103776, "sha1": null, - "md5": "dbb8c5b745beddbaae67d06dce0b7c29", - "sha256": "3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace", + "md5": "c18756e76247680573aa6ca30fc915a7", + "sha256": "63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6", "sha512": null, "bug_tracking_url": null, "code_view_url": null, @@ -889,9 +885,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/charset-normalizer/3.2.0/json", + "api_data_url": "https://pypi.org/pypi/charset-normalizer/3.3.0/json", "datasource_id": null, - "purl": "pkg:pypi/charset-normalizer@3.2.0" + "purl": "pkg:pypi/charset-normalizer@3.3.0" }, { "type": "pypi", @@ -2327,12 +2323,12 @@ "type": "pypi", "namespace": null, "name": "urllib3", - "version": "2.0.5", + "version": "2.0.6", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "HTTP library with thread-safe connection pooling, file post, and more.\n

\n\n![urllib3](https://github.com/urllib3/urllib3/raw/main/docs/_static/banner_github.svg)\n\n

\n\n

\n \"PyPI\n \"Python\n \"Join\n \"Coverage\n \"Build\n \"Documentation
\n \"OpenSSF\n \"SLSA\n \"CII\n

\n\nurllib3 is a powerful, *user-friendly* HTTP client for Python. Much of the\nPython ecosystem already uses urllib3 and you should too.\nurllib3 brings many critical features that are missing from the Python\nstandard libraries:\n\n- Thread safety.\n- Connection pooling.\n- Client-side SSL/TLS verification.\n- File uploads with multipart encoding.\n- Helpers for retrying requests and dealing with HTTP redirects.\n- Support for gzip, deflate, brotli, and zstd encoding.\n- Proxy support for HTTP and SOCKS.\n- 100% test coverage.\n\nurllib3 is powerful and easy to use:\n\n```python3\n>>> import urllib3\n>>> resp = urllib3.request(\"GET\", \"http://httpbin.org/robots.txt\")\n>>> resp.status\n200\n>>> resp.data\nb\"User-agent: *\\nDisallow: /deny\\n\"\n```\n\n## Installing\n\nurllib3 can be installed with [pip](https://pip.pypa.io):\n\n```bash\n$ python -m pip install urllib3\n```\n\nAlternatively, you can grab the latest source code from [GitHub](https://github.com/urllib3/urllib3):\n\n```bash\n$ git clone https://github.com/urllib3/urllib3.git\n$ cd urllib3\n$ pip install .\n```\n\n\n## Documentation\n\nurllib3 has usage and reference documentation at [urllib3.readthedocs.io](https://urllib3.readthedocs.io).\n\n\n## Community\n\nurllib3 has a [community Discord channel](https://discord.gg/urllib3) for asking questions and\ncollaborating with other contributors. Drop by and say hello \ud83d\udc4b\n\n\n## Contributing\n\nurllib3 happily accepts contributions. Please see our\n[contributing documentation](https://urllib3.readthedocs.io/en/latest/contributing.html)\nfor some tips on getting started.\n\n\n## Security Disclosures\n\nTo report a security vulnerability, please use the\n[Tidelift security contact](https://tidelift.com/security).\nTidelift will coordinate the fix and disclosure with maintainers.\n\n\n## Maintainers\n\n- [@sethmlarson](https://github.com/sethmlarson) (Seth M. Larson)\n- [@pquentin](https://github.com/pquentin) (Quentin Pradet)\n- [@theacodes](https://github.com/theacodes) (Thea Flowers)\n- [@haikuginger](https://github.com/haikuginger) (Jess Shapiro)\n- [@lukasa](https://github.com/lukasa) (Cory Benfield)\n- [@sigmavirus24](https://github.com/sigmavirus24) (Ian Stapleton Cordasco)\n- [@shazow](https://github.com/shazow) (Andrey Petrov)\n\n\ud83d\udc4b\n\n\n## Sponsorship\n\nIf your company benefits from this library, please consider [sponsoring its\ndevelopment](https://urllib3.readthedocs.io/en/latest/sponsors.html).\n\n\n## For Enterprise\n\nProfessional support for urllib3 is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-urllib3?utm_source=pypi-urllib3&utm_medium=referral&utm_campaign=readme", - "release_date": "2023-09-20T07:30:33", + "release_date": "2023-10-02T17:22:34", "parties": [ { "type": "person", @@ -2376,11 +2372,11 @@ "Topic :: Software Development :: Libraries" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/37/dc/399e63f5d1d96bb643404ee830657f4dfcf8503f5ba8fa3c6d465d0c57fe/urllib3-2.0.5-py3-none-any.whl", - "size": 123792, + "download_url": "https://files.pythonhosted.org/packages/26/40/9957270221b6d3e9a3b92fdfba80dd5c9661ff45a664b47edd5d00f707f5/urllib3-2.0.6-py3-none-any.whl", + "size": 123799, "sha1": null, - "md5": "d7a2bde43ed2ebf36b6e09e01cce0411", - "sha256": "ef16afa8ba34a1f989db38e1dbbe0c302e4289a47856990d0682e374563ce35e", + "md5": "b39061189ea3bd9378aaf58a1b485b8a", + "sha256": "7a7c7003b000adf9e7ca2a377c9688bbc54ed41b985789ed576570342a375cd2", "sha512": null, "bug_tracking_url": null, "code_view_url": "https://github.com/urllib3/urllib3", @@ -2399,20 +2395,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/urllib3/2.0.5/json", + "api_data_url": "https://pypi.org/pypi/urllib3/2.0.6/json", "datasource_id": null, - "purl": "pkg:pypi/urllib3@2.0.5" + "purl": "pkg:pypi/urllib3@2.0.6" }, { "type": "pypi", "namespace": null, "name": "urllib3", - "version": "2.0.5", + "version": "2.0.6", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "HTTP library with thread-safe connection pooling, file post, and more.\n

\n\n![urllib3](https://github.com/urllib3/urllib3/raw/main/docs/_static/banner_github.svg)\n\n

\n\n

\n \"PyPI\n \"Python\n \"Join\n \"Coverage\n \"Build\n \"Documentation
\n \"OpenSSF\n \"SLSA\n \"CII\n

\n\nurllib3 is a powerful, *user-friendly* HTTP client for Python. Much of the\nPython ecosystem already uses urllib3 and you should too.\nurllib3 brings many critical features that are missing from the Python\nstandard libraries:\n\n- Thread safety.\n- Connection pooling.\n- Client-side SSL/TLS verification.\n- File uploads with multipart encoding.\n- Helpers for retrying requests and dealing with HTTP redirects.\n- Support for gzip, deflate, brotli, and zstd encoding.\n- Proxy support for HTTP and SOCKS.\n- 100% test coverage.\n\nurllib3 is powerful and easy to use:\n\n```python3\n>>> import urllib3\n>>> resp = urllib3.request(\"GET\", \"http://httpbin.org/robots.txt\")\n>>> resp.status\n200\n>>> resp.data\nb\"User-agent: *\\nDisallow: /deny\\n\"\n```\n\n## Installing\n\nurllib3 can be installed with [pip](https://pip.pypa.io):\n\n```bash\n$ python -m pip install urllib3\n```\n\nAlternatively, you can grab the latest source code from [GitHub](https://github.com/urllib3/urllib3):\n\n```bash\n$ git clone https://github.com/urllib3/urllib3.git\n$ cd urllib3\n$ pip install .\n```\n\n\n## Documentation\n\nurllib3 has usage and reference documentation at [urllib3.readthedocs.io](https://urllib3.readthedocs.io).\n\n\n## Community\n\nurllib3 has a [community Discord channel](https://discord.gg/urllib3) for asking questions and\ncollaborating with other contributors. Drop by and say hello \ud83d\udc4b\n\n\n## Contributing\n\nurllib3 happily accepts contributions. Please see our\n[contributing documentation](https://urllib3.readthedocs.io/en/latest/contributing.html)\nfor some tips on getting started.\n\n\n## Security Disclosures\n\nTo report a security vulnerability, please use the\n[Tidelift security contact](https://tidelift.com/security).\nTidelift will coordinate the fix and disclosure with maintainers.\n\n\n## Maintainers\n\n- [@sethmlarson](https://github.com/sethmlarson) (Seth M. Larson)\n- [@pquentin](https://github.com/pquentin) (Quentin Pradet)\n- [@theacodes](https://github.com/theacodes) (Thea Flowers)\n- [@haikuginger](https://github.com/haikuginger) (Jess Shapiro)\n- [@lukasa](https://github.com/lukasa) (Cory Benfield)\n- [@sigmavirus24](https://github.com/sigmavirus24) (Ian Stapleton Cordasco)\n- [@shazow](https://github.com/shazow) (Andrey Petrov)\n\n\ud83d\udc4b\n\n\n## Sponsorship\n\nIf your company benefits from this library, please consider [sponsoring its\ndevelopment](https://urllib3.readthedocs.io/en/latest/sponsors.html).\n\n\n## For Enterprise\n\nProfessional support for urllib3 is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-urllib3?utm_source=pypi-urllib3&utm_medium=referral&utm_campaign=readme", - "release_date": "2023-09-20T07:30:34", + "release_date": "2023-10-02T17:22:36", "parties": [ { "type": "person", @@ -2456,11 +2452,11 @@ "Topic :: Software Development :: Libraries" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/51/13/62cb4a0af89fdf72db4a0ead8026e724c7f3cbf69706d84a4eff439be853/urllib3-2.0.5.tar.gz", - "size": 281697, + "download_url": "https://files.pythonhosted.org/packages/8b/00/db794bb94bf09cadb4ecd031c4295dd4e3536db4da958e20331d95f1edb7/urllib3-2.0.6.tar.gz", + "size": 281846, "sha1": null, - "md5": "4d824b7bba1976591fc05fad02fb258d", - "sha256": "13abf37382ea2ce6fb744d4dad67838eec857c9f4f57009891805e0b5e123594", + "md5": "6ca3ae8220bf1df8697038b94fb6ea48", + "sha256": "b19e1a85d206b56d7df1d5e683df4a7725252a964e3993648dd0fb5a1c157564", "sha512": null, "bug_tracking_url": null, "code_view_url": "https://github.com/urllib3/urllib3", @@ -2479,9 +2475,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/urllib3/2.0.5/json", + "api_data_url": "https://pypi.org/pypi/urllib3/2.0.6/json", "datasource_id": null, - "purl": "pkg:pypi/urllib3@2.0.5" + "purl": "pkg:pypi/urllib3@2.0.6" } ], "resolved_dependencies_graph": [ @@ -2513,13 +2509,13 @@ "dependencies": [] }, { - "package": "pkg:pypi/cffi@1.15.1", + "package": "pkg:pypi/cffi@1.16.0", "dependencies": [ "pkg:pypi/pycparser@2.21" ] }, { - "package": "pkg:pypi/charset-normalizer@3.2.0", + "package": "pkg:pypi/charset-normalizer@3.3.0", "dependencies": [] }, { @@ -2529,7 +2525,7 @@ { "package": "pkg:pypi/cryptography@41.0.4", "dependencies": [ - "pkg:pypi/cffi@1.15.1" + "pkg:pypi/cffi@1.16.0" ] }, { @@ -2571,9 +2567,9 @@ "package": "pkg:pypi/requests@2.31.0", "dependencies": [ "pkg:pypi/certifi@2023.7.22", - "pkg:pypi/charset-normalizer@3.2.0", + "pkg:pypi/charset-normalizer@3.3.0", "pkg:pypi/idna@3.4", - "pkg:pypi/urllib3@2.0.5" + "pkg:pypi/urllib3@2.0.6" ] }, { @@ -2585,7 +2581,7 @@ "dependencies": [] }, { - "package": "pkg:pypi/urllib3@2.0.5", + "package": "pkg:pypi/urllib3@2.0.6", "dependencies": [] } ] diff --git a/tests/data/azure-devops.req-38-expected.json b/tests/data/azure-devops.req-38-expected.json index 919c75f0..d4d573fb 100644 --- a/tests/data/azure-devops.req-38-expected.json +++ b/tests/data/azure-devops.req-38-expected.json @@ -4,7 +4,7 @@ "tool_homepageurl": "https://github.com/nexB/python-inspector", "tool_version": "0.9.8", "options": [ - "--requirement /home/tg1999/Desktop/example/tools/python-inspector/tests/data/azure-devops.req.txt", + "--requirement /home/tg1999/Desktop/python-inspector-1/tests/data/azure-devops.req.txt", "--index-url https://pypi.org/simple", "--python-version 38", "--operating-system linux", @@ -17,7 +17,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/azure-devops.req.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/azure-devops.req.txt", "package_data": [ { "type": "pypi", @@ -627,12 +627,12 @@ "type": "pypi", "namespace": null, "name": "cffi", - "version": "1.15.1", + "version": "1.16.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "CFFI\n====\n\nForeign Function Interface for Python calling C code.\nPlease see the `Documentation `_.\n\nContact\n-------\n\n`Mailing list `_", - "release_date": "2022-06-30T18:17:48", + "release_date": "2023-09-28T18:01:36", "parties": [ { "type": "person", @@ -644,26 +644,24 @@ ], "keywords": [ "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy" ], "homepage_url": "http://cffi.readthedocs.org", - "download_url": "https://files.pythonhosted.org/packages/b7/8b/06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79/cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", - "size": 442655, + "download_url": "https://files.pythonhosted.org/packages/f1/c9/326611aa83e16b13b6db4dbb73b5455c668159a003c4c2f0c3bcb2ddabaf/cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "size": 444654, "sha1": null, - "md5": "2719a841c9d7ea47de38f83ca2487966", - "sha256": "4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192", + "md5": "4937e765ddb1a294320d3f05954eab34", + "sha256": "6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324", "sha512": null, - "bug_tracking_url": null, - "code_view_url": null, + "bug_tracking_url": "https://github.com/python-cffi/cffi/issues", + "code_view_url": "https://github.com/python-cffi/cffi", "vcs_url": null, "copyright": null, "license_expression": null, @@ -680,20 +678,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/cffi/1.15.1/json", + "api_data_url": "https://pypi.org/pypi/cffi/1.16.0/json", "datasource_id": null, - "purl": "pkg:pypi/cffi@1.15.1" + "purl": "pkg:pypi/cffi@1.16.0" }, { "type": "pypi", "namespace": null, "name": "cffi", - "version": "1.15.1", + "version": "1.16.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "CFFI\n====\n\nForeign Function Interface for Python calling C code.\nPlease see the `Documentation `_.\n\nContact\n-------\n\n`Mailing list `_", - "release_date": "2022-06-30T18:18:32", + "release_date": "2023-09-28T18:02:04", "parties": [ { "type": "person", @@ -705,26 +703,24 @@ ], "keywords": [ "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy" ], "homepage_url": "http://cffi.readthedocs.org", - "download_url": "https://files.pythonhosted.org/packages/2b/a8/050ab4f0c3d4c1b8aaa805f70e26e84d0e27004907c5b8ecc1d31815f92a/cffi-1.15.1.tar.gz", - "size": 508501, + "download_url": "https://files.pythonhosted.org/packages/68/ce/95b0bae7968c65473e1298efb042e10cafc7bafc14d9e4f154008241c91d/cffi-1.16.0.tar.gz", + "size": 512873, "sha1": null, - "md5": "f493860a6e98cd0c4178149568a6b4f6", - "sha256": "d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9", + "md5": "0bcaed453da3004d0bea103038345c1e", + "sha256": "bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0", "sha512": null, - "bug_tracking_url": null, - "code_view_url": null, + "bug_tracking_url": "https://github.com/python-cffi/cffi/issues", + "code_view_url": "https://github.com/python-cffi/cffi", "vcs_url": null, "copyright": null, "license_expression": null, @@ -741,20 +737,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/cffi/1.15.1/json", + "api_data_url": "https://pypi.org/pypi/cffi/1.16.0/json", "datasource_id": null, - "purl": "pkg:pypi/cffi@1.15.1" + "purl": "pkg:pypi/cffi@1.16.0" }, { "type": "pypi", "namespace": null, "name": "charset-normalizer", - "version": "3.2.0", + "version": "3.3.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", - "description": "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.\n

Charset Detection, for Everyone \ud83d\udc4b

\n\n

\n The Real First Universal Charset Detector
\n \n \n \n \n \"Download\n \n \n \n \n

\n\n> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`,\n> I'm trying to resolve the issue by taking a new approach.\n> All IANA character set names for which the Python core library provides codecs are supported.\n\n

\n >>>>> \ud83d\udc49 Try Me Online Now, Then Adopt Me \ud83d\udc48 <<<<<\n

\n\nThis project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.\n\n| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |\n|--------------------------------------------------|:---------------------------------------------:|:------------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|\n| `Fast` | \u274c
| \u2705
| \u2705
|\n| `Universal**` | \u274c | \u2705 | \u274c |\n| `Reliable` **without** distinguishable standards | \u274c | \u2705 | \u2705 |\n| `Reliable` **with** distinguishable standards | \u2705 | \u2705 | \u2705 |\n| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ |\n| `Native Python` | \u2705 | \u2705 | \u274c |\n| `Detect spoken language` | \u274c | \u2705 | N/A |\n| `UnicodeDecodeError Safety` | \u274c | \u2705 | \u274c |\n| `Whl Size` | 193.6 kB | 40 kB | ~200 kB |\n| `Supported Encoding` | 33 | \ud83c\udf89 [90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |\n\n

\n\"Reading\"Cat\n\n*\\*\\* : They are clearly using specific code for a specific encoding even if covering most of used one*
\nDid you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)\n\n## \u26a1 Performance\n\nThis package offer better performance than its counterpart Chardet. Here are some numbers.\n\n| Package | Accuracy | Mean per file (ms) | File per sec (est) |\n|-----------------------------------------------|:--------:|:------------------:|:------------------:|\n| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |\n| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |\n\n| Package | 99th percentile | 95th percentile | 50th percentile |\n|-----------------------------------------------|:---------------:|:---------------:|:---------------:|\n| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |\n| charset-normalizer | 100 ms | 50 ms | 5 ms |\n\nChardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.\n\n> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.\n> And yes, these results might change at any time. The dataset can be updated to include more files.\n> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.\n> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability\n> (eg. Supported Encoding) Challenge-them if you want.\n\n## \u2728 Installation\n\nUsing pip:\n\n```sh\npip install charset-normalizer -U\n```\n\n## \ud83d\ude80 Basic Usage\n\n### CLI\nThis package comes with a CLI.\n\n```\nusage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]\n file [file ...]\n\nThe Real First Universal Charset Detector. Discover originating encoding used\non text file. Normalize text to unicode.\n\npositional arguments:\n files File(s) to be analysed\n\noptional arguments:\n -h, --help show this help message and exit\n -v, --verbose Display complementary information about file if any.\n Stdout will contain logs about the detection process.\n -a, --with-alternative\n Output complementary possibilities if any. Top-level\n JSON WILL be a list.\n -n, --normalize Permit to normalize input file. If not set, program\n does not write anything.\n -m, --minimal Only output the charset detected to STDOUT. Disabling\n JSON output.\n -r, --replace Replace file when trying to normalize it instead of\n creating a new one.\n -f, --force Replace file without asking if you are sure, use this\n flag with caution.\n -t THRESHOLD, --threshold THRESHOLD\n Define a custom maximum amount of chaos allowed in\n decoded content. 0. <= chaos <= 1.\n --version Show version information and exit.\n```\n\n```bash\nnormalizer ./data/sample.1.fr.srt\n```\n\n\ud83c\udf89 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.\n\n```json\n{\n \"path\": \"/home/default/projects/charset_normalizer/data/sample.1.fr.srt\",\n \"encoding\": \"cp1252\",\n \"encoding_aliases\": [\n \"1252\",\n \"windows_1252\"\n ],\n \"alternative_encodings\": [\n \"cp1254\",\n \"cp1256\",\n \"cp1258\",\n \"iso8859_14\",\n \"iso8859_15\",\n \"iso8859_16\",\n \"iso8859_3\",\n \"iso8859_9\",\n \"latin_1\",\n \"mbcs\"\n ],\n \"language\": \"French\",\n \"alphabets\": [\n \"Basic Latin\",\n \"Latin-1 Supplement\"\n ],\n \"has_sig_or_bom\": false,\n \"chaos\": 0.149,\n \"coherence\": 97.152,\n \"unicode_path\": null,\n \"is_preferred\": true\n}\n```\n\n### Python\n*Just print out normalized text*\n```python\nfrom charset_normalizer import from_path\n\nresults = from_path('./my_subtitle.srt')\n\nprint(str(results.best()))\n```\n\n*Upgrade your code without effort*\n```python\nfrom charset_normalizer import detect\n```\n\nThe above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.\n\nSee the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)\n\n## \ud83d\ude07 Why\n\nWhen I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a\nreliable alternative using a completely different method. Also! I never back down on a good challenge!\n\nI **don't care** about the **originating charset** encoding, because **two different tables** can\nproduce **two identical rendered string.**\nWhat I want is to get readable text, the best I can. \n\nIn a way, **I'm brute forcing text decoding.** How cool is that ? \ud83d\ude0e\n\nDon't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.\n\n## \ud83c\udf70 How\n\n - Discard all charset encoding table that could not fit the binary content.\n - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.\n - Extract matches with the lowest mess detected.\n - Additionally, we measure coherence / probe for a language.\n\n**Wait a minute**, what is noise/mess and coherence according to **YOU ?**\n\n*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then\n**I established** some ground rules about **what is obvious** when **it seems like** a mess.\n I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to\n improve or rewrite it.\n\n*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought\nthat intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.\n\n## \u26a1 Known limitations\n\n - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))\n - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.\n\n## \u26a0\ufe0f About Python EOLs\n\n**If you are running:**\n\n- Python >=2.7,<3.5: Unsupported\n- Python 3.5: charset-normalizer < 2.1\n- Python 3.6: charset-normalizer < 3.1\n\nUpgrade your Python interpreter as soon as possible.\n\n## \ud83d\udc64 Contributing\n\nContributions, issues and feature requests are very much welcome.
\nFeel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.\n\n## \ud83d\udcdd License\n\nCopyright \u00a9 [Ahmed TAHRI @Ousret](https://github.com/Ousret).
\nThis project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.\n\nCharacters frequencies used in this project \u00a9 2012 [Denny Vrande\u010di\u0107](http://simia.net/letters/)\n\n## \ud83d\udcbc For Enterprise\n\nProfessional support for charset-normalizer is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme\n\n# Changelog\nAll notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).\n\n## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)\n\n### Changed\n- Typehint for function `from_path` no longer enforce `PathLike` as its first argument\n- Minor improvement over the global detection reliability\n\n### Added\n- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries\n- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)\n- Explicit support for Python 3.12\n\n### Fixed\n- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)\n\n## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)\n\n### Added\n- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)\n\n### Removed\n- Support for Python 3.6 (PR #260)\n\n### Changed\n- Optional speedup provided by mypy/c 1.0.1\n\n## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)\n\n### Fixed\n- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)\n\n### Changed\n- Speedup provided by mypy/c 0.990 on Python >= 3.7\n\n## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n- Sphinx warnings when generating the documentation\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n\n## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)\n\n### Added\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Removed\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n\n### Fixed\n- Sphinx warnings when generating the documentation\n\n## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)\n\n### Changed\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Removed\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)\n\n### Deprecated\n- Function `normalize` scheduled for removal in 3.0\n\n### Changed\n- Removed useless call to decode in fn is_unprintable (#206)\n\n### Fixed\n- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)\n\n## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)\n\n### Added\n- Output the Unicode table version when running the CLI with `--version` (PR #194)\n\n### Changed\n- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)\n- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)\n\n### Fixed\n- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)\n- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)\n\n### Removed\n- Support for Python 3.5 (PR #192)\n\n### Deprecated\n- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)\n\n## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)\n\n### Fixed\n- ASCII miss-detection on rare cases (PR #170) \n\n## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)\n\n### Added\n- Explicit support for Python 3.11 (PR #164)\n\n### Changed\n- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)\n\n## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)\n\n### Fixed\n- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)\n\n### Changed\n- Skipping the language-detection (CD) on ASCII (PR #155)\n\n## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)\n\n### Changed\n- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)\n\n### Fixed\n- Wrong logging level applied when setting kwarg `explain` to True (PR #146)\n\n## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)\n### Changed\n- Improvement over Vietnamese detection (PR #126)\n- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)\n- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)\n- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)\n- Code style as refactored by Sourcery-AI (PR #131) \n- Minor adjustment on the MD around european words (PR #133)\n- Remove and replace SRTs from assets / tests (PR #139)\n- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)\n\n### Fixed\n- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)\n- Avoid using too insignificant chunk (PR #137)\n\n### Added\n- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)\n\n## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)\n### Added\n- Add support for Kazakh (Cyrillic) language detection (PR #109)\n\n### Changed\n- Further, improve inferring the language from a given single-byte code page (PR #112)\n- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)\n- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)\n- Various detection improvement (MD+CD) (PR #117)\n\n### Removed\n- Remove redundant logging entry about detected language(s) (PR #115)\n\n### Fixed\n- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)\n\n## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)\n### Fixed\n- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)\n- Fix CLI crash when using --minimal output in certain cases (PR #103)\n\n### Changed\n- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)\n\n## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)\n### Changed\n- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)\n- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)\n- The Unicode detection is slightly improved (PR #93)\n- Add syntax sugar \\_\\_bool\\_\\_ for results CharsetMatches list-container (PR #91)\n\n### Removed\n- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)\n\n### Fixed\n- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)\n- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)\n- The MANIFEST.in was not exhaustive (PR #78)\n\n## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)\n### Fixed\n- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)\n- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)\n- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)\n- Submatch factoring could be wrong in rare edge cases (PR #72)\n- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)\n- Fix line endings from CRLF to LF for certain project files (PR #67)\n\n### Changed\n- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)\n- Allow fallback on specified encoding if any (PR #71)\n\n## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)\n### Changed\n- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)\n- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)\n\n## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)\n### Fixed\n- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) \n\n### Changed\n- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)\n\n## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)\n### Fixed\n- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)\n- Using explain=False permanently disable the verbose output in the current runtime (PR #47)\n- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)\n- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)\n\n### Changed\n- Public function normalize default args values were not aligned with from_bytes (PR #53)\n\n### Added\n- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)\n\n## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)\n### Changed\n- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.\n- Accent has been made on UTF-8 detection, should perform rather instantaneous.\n- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.\n- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)\n- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+\n- utf_7 detection has been reinstated.\n\n### Removed\n- This package no longer require anything when used with Python 3.5 (Dropped cached_property)\n- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volap\u00fck, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.\n- The exception hook on UnicodeDecodeError has been removed.\n\n### Deprecated\n- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0\n\n### Fixed\n- The CLI output used the relative path of the file(s). Should be absolute.\n\n## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)\n### Fixed\n- Logger configuration/usage no longer conflict with others (PR #44)\n\n## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)\n### Removed\n- Using standard logging instead of using the package loguru.\n- Dropping nose test framework in favor of the maintained pytest.\n- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.\n- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.\n- Stop support for UTF-7 that does not contain a SIG.\n- Dropping PrettyTable, replaced with pure JSON output in CLI.\n\n### Fixed\n- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.\n- Not searching properly for the BOM when trying utf32/16 parent codec.\n\n### Changed\n- Improving the package final size by compressing frequencies.json.\n- Huge improvement over the larges payload.\n\n### Added\n- CLI now produces JSON consumable output.\n- Return ASCII if given sequences fit. Given reasonable confidence.\n\n## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)\n\n### Fixed\n- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)\n\n## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)\n\n### Fixed\n- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)\n\n## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)\n\n### Fixed\n- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)\n\n## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)\n\n### Changed\n- Amend the previous release to allow prettytable 2.0 (PR #35)\n\n## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)\n\n### Fixed\n- Fix error while using the package with a python pre-release interpreter (PR #33)\n\n### Changed\n- Dependencies refactoring, constraints revised.\n\n### Added\n- Add python 3.9 and 3.10 to the supported interpreters\n\nMIT License\n\nCopyright (c) 2019 TAHRI Ahmed R.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.", - "release_date": "2023-07-07T20:18:23", + "description": "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.\n

Charset Detection, for Everyone \ud83d\udc4b

\n\n

\n The Real First Universal Charset Detector
\n \n \n \n \n \"Download\n \n \n \n \n

\n

\n Featured Packages
\n \n \"Static\n \n \n \"Static\n \n

\n

\n In other language (unofficial port - by the community)
\n \n \"Static\n \n

\n\n> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`,\n> I'm trying to resolve the issue by taking a new approach.\n> All IANA character set names for which the Python core library provides codecs are supported.\n\n

\n >>>>> \ud83d\udc49 Try Me Online Now, Then Adopt Me \ud83d\udc48 <<<<<\n

\n\nThis project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.\n\n| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |\n|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|\n| `Fast` | \u274c | \u2705 | \u2705 |\n| `Universal**` | \u274c | \u2705 | \u274c |\n| `Reliable` **without** distinguishable standards | \u274c | \u2705 | \u2705 |\n| `Reliable` **with** distinguishable standards | \u2705 | \u2705 | \u2705 |\n| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ |\n| `Native Python` | \u2705 | \u2705 | \u274c |\n| `Detect spoken language` | \u274c | \u2705 | N/A |\n| `UnicodeDecodeError Safety` | \u274c | \u2705 | \u274c |\n| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |\n| `Supported Encoding` | 33 | \ud83c\udf89 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |\n\n

\n\"Reading\"Cat\n

\n\n*\\*\\* : They are clearly using specific code for a specific encoding even if covering most of used one*
\nDid you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)\n\n## \u26a1 Performance\n\nThis package offer better performance than its counterpart Chardet. Here are some numbers.\n\n| Package | Accuracy | Mean per file (ms) | File per sec (est) |\n|-----------------------------------------------|:--------:|:------------------:|:------------------:|\n| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |\n| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |\n\n| Package | 99th percentile | 95th percentile | 50th percentile |\n|-----------------------------------------------|:---------------:|:---------------:|:---------------:|\n| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |\n| charset-normalizer | 100 ms | 50 ms | 5 ms |\n\nChardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.\n\n> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.\n> And yes, these results might change at any time. The dataset can be updated to include more files.\n> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.\n> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability\n> (eg. Supported Encoding) Challenge-them if you want.\n\n## \u2728 Installation\n\nUsing pip:\n\n```sh\npip install charset-normalizer -U\n```\n\n## \ud83d\ude80 Basic Usage\n\n### CLI\nThis package comes with a CLI.\n\n```\nusage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]\n file [file ...]\n\nThe Real First Universal Charset Detector. Discover originating encoding used\non text file. Normalize text to unicode.\n\npositional arguments:\n files File(s) to be analysed\n\noptional arguments:\n -h, --help show this help message and exit\n -v, --verbose Display complementary information about file if any.\n Stdout will contain logs about the detection process.\n -a, --with-alternative\n Output complementary possibilities if any. Top-level\n JSON WILL be a list.\n -n, --normalize Permit to normalize input file. If not set, program\n does not write anything.\n -m, --minimal Only output the charset detected to STDOUT. Disabling\n JSON output.\n -r, --replace Replace file when trying to normalize it instead of\n creating a new one.\n -f, --force Replace file without asking if you are sure, use this\n flag with caution.\n -t THRESHOLD, --threshold THRESHOLD\n Define a custom maximum amount of chaos allowed in\n decoded content. 0. <= chaos <= 1.\n --version Show version information and exit.\n```\n\n```bash\nnormalizer ./data/sample.1.fr.srt\n```\n\nor\n\n```bash\npython -m charset_normalizer ./data/sample.1.fr.srt\n```\n\n\ud83c\udf89 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.\n\n```json\n{\n \"path\": \"/home/default/projects/charset_normalizer/data/sample.1.fr.srt\",\n \"encoding\": \"cp1252\",\n \"encoding_aliases\": [\n \"1252\",\n \"windows_1252\"\n ],\n \"alternative_encodings\": [\n \"cp1254\",\n \"cp1256\",\n \"cp1258\",\n \"iso8859_14\",\n \"iso8859_15\",\n \"iso8859_16\",\n \"iso8859_3\",\n \"iso8859_9\",\n \"latin_1\",\n \"mbcs\"\n ],\n \"language\": \"French\",\n \"alphabets\": [\n \"Basic Latin\",\n \"Latin-1 Supplement\"\n ],\n \"has_sig_or_bom\": false,\n \"chaos\": 0.149,\n \"coherence\": 97.152,\n \"unicode_path\": null,\n \"is_preferred\": true\n}\n```\n\n### Python\n*Just print out normalized text*\n```python\nfrom charset_normalizer import from_path\n\nresults = from_path('./my_subtitle.srt')\n\nprint(str(results.best()))\n```\n\n*Upgrade your code without effort*\n```python\nfrom charset_normalizer import detect\n```\n\nThe above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.\n\nSee the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)\n\n## \ud83d\ude07 Why\n\nWhen I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a\nreliable alternative using a completely different method. Also! I never back down on a good challenge!\n\nI **don't care** about the **originating charset** encoding, because **two different tables** can\nproduce **two identical rendered string.**\nWhat I want is to get readable text, the best I can. \n\nIn a way, **I'm brute forcing text decoding.** How cool is that ? \ud83d\ude0e\n\nDon't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.\n\n## \ud83c\udf70 How\n\n - Discard all charset encoding table that could not fit the binary content.\n - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.\n - Extract matches with the lowest mess detected.\n - Additionally, we measure coherence / probe for a language.\n\n**Wait a minute**, what is noise/mess and coherence according to **YOU ?**\n\n*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then\n**I established** some ground rules about **what is obvious** when **it seems like** a mess.\n I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to\n improve or rewrite it.\n\n*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought\nthat intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.\n\n## \u26a1 Known limitations\n\n - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))\n - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.\n\n## \u26a0\ufe0f About Python EOLs\n\n**If you are running:**\n\n- Python >=2.7,<3.5: Unsupported\n- Python 3.5: charset-normalizer < 2.1\n- Python 3.6: charset-normalizer < 3.1\n- Python 3.7: charset-normalizer < 4.0\n\nUpgrade your Python interpreter as soon as possible.\n\n## \ud83d\udc64 Contributing\n\nContributions, issues and feature requests are very much welcome.
\nFeel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.\n\n## \ud83d\udcdd License\n\nCopyright \u00a9 [Ahmed TAHRI @Ousret](https://github.com/Ousret).
\nThis project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.\n\nCharacters frequencies used in this project \u00a9 2012 [Denny Vrande\u010di\u0107](http://simia.net/letters/)\n\n## \ud83d\udcbc For Enterprise\n\nProfessional support for charset-normalizer is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme\n\n# Changelog\nAll notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).\n\n## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)\n\n### Added\n- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`\n- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)\n\n### Removed\n- (internal) Redundant utils.is_ascii function and unused function is_private_use_only\n- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant\n\n### Changed\n- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection\n- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.7\n\n### Fixed\n- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \\_\\_lt\\_\\_ (#350)\n\n## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)\n\n### Changed\n- Typehint for function `from_path` no longer enforce `PathLike` as its first argument\n- Minor improvement over the global detection reliability\n\n### Added\n- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries\n- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)\n- Explicit support for Python 3.12\n\n### Fixed\n- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)\n\n## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)\n\n### Added\n- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)\n\n### Removed\n- Support for Python 3.6 (PR #260)\n\n### Changed\n- Optional speedup provided by mypy/c 1.0.1\n\n## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)\n\n### Fixed\n- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)\n\n### Changed\n- Speedup provided by mypy/c 0.990 on Python >= 3.7\n\n## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n- Sphinx warnings when generating the documentation\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n\n## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)\n\n### Added\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Removed\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n\n### Fixed\n- Sphinx warnings when generating the documentation\n\n## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)\n\n### Changed\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Removed\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)\n\n### Deprecated\n- Function `normalize` scheduled for removal in 3.0\n\n### Changed\n- Removed useless call to decode in fn is_unprintable (#206)\n\n### Fixed\n- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)\n\n## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)\n\n### Added\n- Output the Unicode table version when running the CLI with `--version` (PR #194)\n\n### Changed\n- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)\n- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)\n\n### Fixed\n- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)\n- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)\n\n### Removed\n- Support for Python 3.5 (PR #192)\n\n### Deprecated\n- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)\n\n## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)\n\n### Fixed\n- ASCII miss-detection on rare cases (PR #170) \n\n## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)\n\n### Added\n- Explicit support for Python 3.11 (PR #164)\n\n### Changed\n- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)\n\n## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)\n\n### Fixed\n- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)\n\n### Changed\n- Skipping the language-detection (CD) on ASCII (PR #155)\n\n## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)\n\n### Changed\n- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)\n\n### Fixed\n- Wrong logging level applied when setting kwarg `explain` to True (PR #146)\n\n## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)\n### Changed\n- Improvement over Vietnamese detection (PR #126)\n- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)\n- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)\n- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)\n- Code style as refactored by Sourcery-AI (PR #131) \n- Minor adjustment on the MD around european words (PR #133)\n- Remove and replace SRTs from assets / tests (PR #139)\n- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)\n\n### Fixed\n- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)\n- Avoid using too insignificant chunk (PR #137)\n\n### Added\n- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)\n\n## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)\n### Added\n- Add support for Kazakh (Cyrillic) language detection (PR #109)\n\n### Changed\n- Further, improve inferring the language from a given single-byte code page (PR #112)\n- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)\n- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)\n- Various detection improvement (MD+CD) (PR #117)\n\n### Removed\n- Remove redundant logging entry about detected language(s) (PR #115)\n\n### Fixed\n- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)\n\n## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)\n### Fixed\n- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)\n- Fix CLI crash when using --minimal output in certain cases (PR #103)\n\n### Changed\n- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)\n\n## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)\n### Changed\n- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)\n- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)\n- The Unicode detection is slightly improved (PR #93)\n- Add syntax sugar \\_\\_bool\\_\\_ for results CharsetMatches list-container (PR #91)\n\n### Removed\n- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)\n\n### Fixed\n- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)\n- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)\n- The MANIFEST.in was not exhaustive (PR #78)\n\n## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)\n### Fixed\n- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)\n- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)\n- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)\n- Submatch factoring could be wrong in rare edge cases (PR #72)\n- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)\n- Fix line endings from CRLF to LF for certain project files (PR #67)\n\n### Changed\n- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)\n- Allow fallback on specified encoding if any (PR #71)\n\n## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)\n### Changed\n- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)\n- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)\n\n## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)\n### Fixed\n- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) \n\n### Changed\n- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)\n\n## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)\n### Fixed\n- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)\n- Using explain=False permanently disable the verbose output in the current runtime (PR #47)\n- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)\n- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)\n\n### Changed\n- Public function normalize default args values were not aligned with from_bytes (PR #53)\n\n### Added\n- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)\n\n## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)\n### Changed\n- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.\n- Accent has been made on UTF-8 detection, should perform rather instantaneous.\n- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.\n- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)\n- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+\n- utf_7 detection has been reinstated.\n\n### Removed\n- This package no longer require anything when used with Python 3.5 (Dropped cached_property)\n- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volap\u00fck, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.\n- The exception hook on UnicodeDecodeError has been removed.\n\n### Deprecated\n- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0\n\n### Fixed\n- The CLI output used the relative path of the file(s). Should be absolute.\n\n## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)\n### Fixed\n- Logger configuration/usage no longer conflict with others (PR #44)\n\n## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)\n### Removed\n- Using standard logging instead of using the package loguru.\n- Dropping nose test framework in favor of the maintained pytest.\n- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.\n- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.\n- Stop support for UTF-7 that does not contain a SIG.\n- Dropping PrettyTable, replaced with pure JSON output in CLI.\n\n### Fixed\n- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.\n- Not searching properly for the BOM when trying utf32/16 parent codec.\n\n### Changed\n- Improving the package final size by compressing frequencies.json.\n- Huge improvement over the larges payload.\n\n### Added\n- CLI now produces JSON consumable output.\n- Return ASCII if given sequences fit. Given reasonable confidence.\n\n## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)\n\n### Fixed\n- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)\n\n## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)\n\n### Fixed\n- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)\n\n## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)\n\n### Fixed\n- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)\n\n## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)\n\n### Changed\n- Amend the previous release to allow prettytable 2.0 (PR #35)\n\n## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)\n\n### Fixed\n- Fix error while using the package with a python pre-release interpreter (PR #33)\n\n### Changed\n- Dependencies refactoring, constraints revised.\n\n### Added\n- Add python 3.9 and 3.10 to the supported interpreters\n\nMIT License\n\nCopyright (c) 2019 TAHRI Ahmed R.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.", + "release_date": "2023-09-30T09:12:42", "parties": [ { "type": "person", @@ -791,11 +787,11 @@ "Typing :: Typed" ], "homepage_url": "https://github.com/Ousret/charset_normalizer", - "download_url": "https://files.pythonhosted.org/packages/cb/e7/5e43745003bf1f90668c7be23fc5952b3a2b9c2558f16749411c18039b36/charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", - "size": 199117, + "download_url": "https://files.pythonhosted.org/packages/a3/dc/efab5b27839f04be4b8058c1eb85b7ab7dbc55ef8067250bea0518392756/charset_normalizer-3.3.0-py3-none-any.whl", + "size": 48155, "sha1": null, - "md5": "ffb7e21381f7f634bfccc82a66adc739", - "sha256": "89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a", + "md5": "b8d8dcceed572fa80a875f6e8a4f5916", + "sha256": "e46cd37076971c1040fc8c41273a8b3e2c624ce4f2be3f5dfcb7a430c1d3acc2", "sha512": null, "bug_tracking_url": null, "code_view_url": null, @@ -815,20 +811,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/charset-normalizer/3.2.0/json", + "api_data_url": "https://pypi.org/pypi/charset-normalizer/3.3.0/json", "datasource_id": null, - "purl": "pkg:pypi/charset-normalizer@3.2.0" + "purl": "pkg:pypi/charset-normalizer@3.3.0" }, { "type": "pypi", "namespace": null, "name": "charset-normalizer", - "version": "3.2.0", + "version": "3.3.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", - "description": "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.\n

Charset Detection, for Everyone \ud83d\udc4b

\n\n

\n The Real First Universal Charset Detector
\n \n \n \n \n \"Download\n \n \n \n \n

\n\n> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`,\n> I'm trying to resolve the issue by taking a new approach.\n> All IANA character set names for which the Python core library provides codecs are supported.\n\n

\n >>>>> \ud83d\udc49 Try Me Online Now, Then Adopt Me \ud83d\udc48 <<<<<\n

\n\nThis project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.\n\n| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |\n|--------------------------------------------------|:---------------------------------------------:|:------------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|\n| `Fast` | \u274c
| \u2705
| \u2705
|\n| `Universal**` | \u274c | \u2705 | \u274c |\n| `Reliable` **without** distinguishable standards | \u274c | \u2705 | \u2705 |\n| `Reliable` **with** distinguishable standards | \u2705 | \u2705 | \u2705 |\n| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ |\n| `Native Python` | \u2705 | \u2705 | \u274c |\n| `Detect spoken language` | \u274c | \u2705 | N/A |\n| `UnicodeDecodeError Safety` | \u274c | \u2705 | \u274c |\n| `Whl Size` | 193.6 kB | 40 kB | ~200 kB |\n| `Supported Encoding` | 33 | \ud83c\udf89 [90](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |\n\n

\n\"Reading\"Cat\n\n*\\*\\* : They are clearly using specific code for a specific encoding even if covering most of used one*
\nDid you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)\n\n## \u26a1 Performance\n\nThis package offer better performance than its counterpart Chardet. Here are some numbers.\n\n| Package | Accuracy | Mean per file (ms) | File per sec (est) |\n|-----------------------------------------------|:--------:|:------------------:|:------------------:|\n| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |\n| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |\n\n| Package | 99th percentile | 95th percentile | 50th percentile |\n|-----------------------------------------------|:---------------:|:---------------:|:---------------:|\n| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |\n| charset-normalizer | 100 ms | 50 ms | 5 ms |\n\nChardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.\n\n> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.\n> And yes, these results might change at any time. The dataset can be updated to include more files.\n> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.\n> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability\n> (eg. Supported Encoding) Challenge-them if you want.\n\n## \u2728 Installation\n\nUsing pip:\n\n```sh\npip install charset-normalizer -U\n```\n\n## \ud83d\ude80 Basic Usage\n\n### CLI\nThis package comes with a CLI.\n\n```\nusage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]\n file [file ...]\n\nThe Real First Universal Charset Detector. Discover originating encoding used\non text file. Normalize text to unicode.\n\npositional arguments:\n files File(s) to be analysed\n\noptional arguments:\n -h, --help show this help message and exit\n -v, --verbose Display complementary information about file if any.\n Stdout will contain logs about the detection process.\n -a, --with-alternative\n Output complementary possibilities if any. Top-level\n JSON WILL be a list.\n -n, --normalize Permit to normalize input file. If not set, program\n does not write anything.\n -m, --minimal Only output the charset detected to STDOUT. Disabling\n JSON output.\n -r, --replace Replace file when trying to normalize it instead of\n creating a new one.\n -f, --force Replace file without asking if you are sure, use this\n flag with caution.\n -t THRESHOLD, --threshold THRESHOLD\n Define a custom maximum amount of chaos allowed in\n decoded content. 0. <= chaos <= 1.\n --version Show version information and exit.\n```\n\n```bash\nnormalizer ./data/sample.1.fr.srt\n```\n\n\ud83c\udf89 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.\n\n```json\n{\n \"path\": \"/home/default/projects/charset_normalizer/data/sample.1.fr.srt\",\n \"encoding\": \"cp1252\",\n \"encoding_aliases\": [\n \"1252\",\n \"windows_1252\"\n ],\n \"alternative_encodings\": [\n \"cp1254\",\n \"cp1256\",\n \"cp1258\",\n \"iso8859_14\",\n \"iso8859_15\",\n \"iso8859_16\",\n \"iso8859_3\",\n \"iso8859_9\",\n \"latin_1\",\n \"mbcs\"\n ],\n \"language\": \"French\",\n \"alphabets\": [\n \"Basic Latin\",\n \"Latin-1 Supplement\"\n ],\n \"has_sig_or_bom\": false,\n \"chaos\": 0.149,\n \"coherence\": 97.152,\n \"unicode_path\": null,\n \"is_preferred\": true\n}\n```\n\n### Python\n*Just print out normalized text*\n```python\nfrom charset_normalizer import from_path\n\nresults = from_path('./my_subtitle.srt')\n\nprint(str(results.best()))\n```\n\n*Upgrade your code without effort*\n```python\nfrom charset_normalizer import detect\n```\n\nThe above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.\n\nSee the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)\n\n## \ud83d\ude07 Why\n\nWhen I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a\nreliable alternative using a completely different method. Also! I never back down on a good challenge!\n\nI **don't care** about the **originating charset** encoding, because **two different tables** can\nproduce **two identical rendered string.**\nWhat I want is to get readable text, the best I can. \n\nIn a way, **I'm brute forcing text decoding.** How cool is that ? \ud83d\ude0e\n\nDon't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.\n\n## \ud83c\udf70 How\n\n - Discard all charset encoding table that could not fit the binary content.\n - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.\n - Extract matches with the lowest mess detected.\n - Additionally, we measure coherence / probe for a language.\n\n**Wait a minute**, what is noise/mess and coherence according to **YOU ?**\n\n*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then\n**I established** some ground rules about **what is obvious** when **it seems like** a mess.\n I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to\n improve or rewrite it.\n\n*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought\nthat intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.\n\n## \u26a1 Known limitations\n\n - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))\n - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.\n\n## \u26a0\ufe0f About Python EOLs\n\n**If you are running:**\n\n- Python >=2.7,<3.5: Unsupported\n- Python 3.5: charset-normalizer < 2.1\n- Python 3.6: charset-normalizer < 3.1\n\nUpgrade your Python interpreter as soon as possible.\n\n## \ud83d\udc64 Contributing\n\nContributions, issues and feature requests are very much welcome.
\nFeel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.\n\n## \ud83d\udcdd License\n\nCopyright \u00a9 [Ahmed TAHRI @Ousret](https://github.com/Ousret).
\nThis project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.\n\nCharacters frequencies used in this project \u00a9 2012 [Denny Vrande\u010di\u0107](http://simia.net/letters/)\n\n## \ud83d\udcbc For Enterprise\n\nProfessional support for charset-normalizer is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme\n\n# Changelog\nAll notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).\n\n## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)\n\n### Changed\n- Typehint for function `from_path` no longer enforce `PathLike` as its first argument\n- Minor improvement over the global detection reliability\n\n### Added\n- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries\n- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)\n- Explicit support for Python 3.12\n\n### Fixed\n- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)\n\n## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)\n\n### Added\n- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)\n\n### Removed\n- Support for Python 3.6 (PR #260)\n\n### Changed\n- Optional speedup provided by mypy/c 1.0.1\n\n## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)\n\n### Fixed\n- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)\n\n### Changed\n- Speedup provided by mypy/c 0.990 on Python >= 3.7\n\n## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n- Sphinx warnings when generating the documentation\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n\n## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)\n\n### Added\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Removed\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n\n### Fixed\n- Sphinx warnings when generating the documentation\n\n## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)\n\n### Changed\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Removed\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)\n\n### Deprecated\n- Function `normalize` scheduled for removal in 3.0\n\n### Changed\n- Removed useless call to decode in fn is_unprintable (#206)\n\n### Fixed\n- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)\n\n## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)\n\n### Added\n- Output the Unicode table version when running the CLI with `--version` (PR #194)\n\n### Changed\n- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)\n- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)\n\n### Fixed\n- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)\n- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)\n\n### Removed\n- Support for Python 3.5 (PR #192)\n\n### Deprecated\n- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)\n\n## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)\n\n### Fixed\n- ASCII miss-detection on rare cases (PR #170) \n\n## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)\n\n### Added\n- Explicit support for Python 3.11 (PR #164)\n\n### Changed\n- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)\n\n## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)\n\n### Fixed\n- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)\n\n### Changed\n- Skipping the language-detection (CD) on ASCII (PR #155)\n\n## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)\n\n### Changed\n- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)\n\n### Fixed\n- Wrong logging level applied when setting kwarg `explain` to True (PR #146)\n\n## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)\n### Changed\n- Improvement over Vietnamese detection (PR #126)\n- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)\n- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)\n- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)\n- Code style as refactored by Sourcery-AI (PR #131) \n- Minor adjustment on the MD around european words (PR #133)\n- Remove and replace SRTs from assets / tests (PR #139)\n- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)\n\n### Fixed\n- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)\n- Avoid using too insignificant chunk (PR #137)\n\n### Added\n- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)\n\n## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)\n### Added\n- Add support for Kazakh (Cyrillic) language detection (PR #109)\n\n### Changed\n- Further, improve inferring the language from a given single-byte code page (PR #112)\n- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)\n- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)\n- Various detection improvement (MD+CD) (PR #117)\n\n### Removed\n- Remove redundant logging entry about detected language(s) (PR #115)\n\n### Fixed\n- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)\n\n## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)\n### Fixed\n- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)\n- Fix CLI crash when using --minimal output in certain cases (PR #103)\n\n### Changed\n- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)\n\n## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)\n### Changed\n- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)\n- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)\n- The Unicode detection is slightly improved (PR #93)\n- Add syntax sugar \\_\\_bool\\_\\_ for results CharsetMatches list-container (PR #91)\n\n### Removed\n- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)\n\n### Fixed\n- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)\n- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)\n- The MANIFEST.in was not exhaustive (PR #78)\n\n## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)\n### Fixed\n- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)\n- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)\n- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)\n- Submatch factoring could be wrong in rare edge cases (PR #72)\n- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)\n- Fix line endings from CRLF to LF for certain project files (PR #67)\n\n### Changed\n- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)\n- Allow fallback on specified encoding if any (PR #71)\n\n## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)\n### Changed\n- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)\n- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)\n\n## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)\n### Fixed\n- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) \n\n### Changed\n- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)\n\n## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)\n### Fixed\n- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)\n- Using explain=False permanently disable the verbose output in the current runtime (PR #47)\n- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)\n- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)\n\n### Changed\n- Public function normalize default args values were not aligned with from_bytes (PR #53)\n\n### Added\n- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)\n\n## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)\n### Changed\n- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.\n- Accent has been made on UTF-8 detection, should perform rather instantaneous.\n- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.\n- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)\n- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+\n- utf_7 detection has been reinstated.\n\n### Removed\n- This package no longer require anything when used with Python 3.5 (Dropped cached_property)\n- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volap\u00fck, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.\n- The exception hook on UnicodeDecodeError has been removed.\n\n### Deprecated\n- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0\n\n### Fixed\n- The CLI output used the relative path of the file(s). Should be absolute.\n\n## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)\n### Fixed\n- Logger configuration/usage no longer conflict with others (PR #44)\n\n## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)\n### Removed\n- Using standard logging instead of using the package loguru.\n- Dropping nose test framework in favor of the maintained pytest.\n- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.\n- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.\n- Stop support for UTF-7 that does not contain a SIG.\n- Dropping PrettyTable, replaced with pure JSON output in CLI.\n\n### Fixed\n- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.\n- Not searching properly for the BOM when trying utf32/16 parent codec.\n\n### Changed\n- Improving the package final size by compressing frequencies.json.\n- Huge improvement over the larges payload.\n\n### Added\n- CLI now produces JSON consumable output.\n- Return ASCII if given sequences fit. Given reasonable confidence.\n\n## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)\n\n### Fixed\n- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)\n\n## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)\n\n### Fixed\n- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)\n\n## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)\n\n### Fixed\n- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)\n\n## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)\n\n### Changed\n- Amend the previous release to allow prettytable 2.0 (PR #35)\n\n## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)\n\n### Fixed\n- Fix error while using the package with a python pre-release interpreter (PR #33)\n\n### Changed\n- Dependencies refactoring, constraints revised.\n\n### Added\n- Add python 3.9 and 3.10 to the supported interpreters\n\nMIT License\n\nCopyright (c) 2019 TAHRI Ahmed R.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.", - "release_date": "2023-07-07T20:19:09", + "description": "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.\n

Charset Detection, for Everyone \ud83d\udc4b

\n\n

\n The Real First Universal Charset Detector
\n \n \n \n \n \"Download\n \n \n \n \n

\n

\n Featured Packages
\n \n \"Static\n \n \n \"Static\n \n

\n

\n In other language (unofficial port - by the community)
\n \n \"Static\n \n

\n\n> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`,\n> I'm trying to resolve the issue by taking a new approach.\n> All IANA character set names for which the Python core library provides codecs are supported.\n\n

\n >>>>> \ud83d\udc49 Try Me Online Now, Then Adopt Me \ud83d\udc48 <<<<<\n

\n\nThis project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.\n\n| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |\n|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|\n| `Fast` | \u274c | \u2705 | \u2705 |\n| `Universal**` | \u274c | \u2705 | \u274c |\n| `Reliable` **without** distinguishable standards | \u274c | \u2705 | \u2705 |\n| `Reliable` **with** distinguishable standards | \u2705 | \u2705 | \u2705 |\n| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ |\n| `Native Python` | \u2705 | \u2705 | \u274c |\n| `Detect spoken language` | \u274c | \u2705 | N/A |\n| `UnicodeDecodeError Safety` | \u274c | \u2705 | \u274c |\n| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |\n| `Supported Encoding` | 33 | \ud83c\udf89 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |\n\n

\n\"Reading\"Cat\n

\n\n*\\*\\* : They are clearly using specific code for a specific encoding even if covering most of used one*
\nDid you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)\n\n## \u26a1 Performance\n\nThis package offer better performance than its counterpart Chardet. Here are some numbers.\n\n| Package | Accuracy | Mean per file (ms) | File per sec (est) |\n|-----------------------------------------------|:--------:|:------------------:|:------------------:|\n| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |\n| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |\n\n| Package | 99th percentile | 95th percentile | 50th percentile |\n|-----------------------------------------------|:---------------:|:---------------:|:---------------:|\n| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |\n| charset-normalizer | 100 ms | 50 ms | 5 ms |\n\nChardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.\n\n> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.\n> And yes, these results might change at any time. The dataset can be updated to include more files.\n> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.\n> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability\n> (eg. Supported Encoding) Challenge-them if you want.\n\n## \u2728 Installation\n\nUsing pip:\n\n```sh\npip install charset-normalizer -U\n```\n\n## \ud83d\ude80 Basic Usage\n\n### CLI\nThis package comes with a CLI.\n\n```\nusage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]\n file [file ...]\n\nThe Real First Universal Charset Detector. Discover originating encoding used\non text file. Normalize text to unicode.\n\npositional arguments:\n files File(s) to be analysed\n\noptional arguments:\n -h, --help show this help message and exit\n -v, --verbose Display complementary information about file if any.\n Stdout will contain logs about the detection process.\n -a, --with-alternative\n Output complementary possibilities if any. Top-level\n JSON WILL be a list.\n -n, --normalize Permit to normalize input file. If not set, program\n does not write anything.\n -m, --minimal Only output the charset detected to STDOUT. Disabling\n JSON output.\n -r, --replace Replace file when trying to normalize it instead of\n creating a new one.\n -f, --force Replace file without asking if you are sure, use this\n flag with caution.\n -t THRESHOLD, --threshold THRESHOLD\n Define a custom maximum amount of chaos allowed in\n decoded content. 0. <= chaos <= 1.\n --version Show version information and exit.\n```\n\n```bash\nnormalizer ./data/sample.1.fr.srt\n```\n\nor\n\n```bash\npython -m charset_normalizer ./data/sample.1.fr.srt\n```\n\n\ud83c\udf89 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.\n\n```json\n{\n \"path\": \"/home/default/projects/charset_normalizer/data/sample.1.fr.srt\",\n \"encoding\": \"cp1252\",\n \"encoding_aliases\": [\n \"1252\",\n \"windows_1252\"\n ],\n \"alternative_encodings\": [\n \"cp1254\",\n \"cp1256\",\n \"cp1258\",\n \"iso8859_14\",\n \"iso8859_15\",\n \"iso8859_16\",\n \"iso8859_3\",\n \"iso8859_9\",\n \"latin_1\",\n \"mbcs\"\n ],\n \"language\": \"French\",\n \"alphabets\": [\n \"Basic Latin\",\n \"Latin-1 Supplement\"\n ],\n \"has_sig_or_bom\": false,\n \"chaos\": 0.149,\n \"coherence\": 97.152,\n \"unicode_path\": null,\n \"is_preferred\": true\n}\n```\n\n### Python\n*Just print out normalized text*\n```python\nfrom charset_normalizer import from_path\n\nresults = from_path('./my_subtitle.srt')\n\nprint(str(results.best()))\n```\n\n*Upgrade your code without effort*\n```python\nfrom charset_normalizer import detect\n```\n\nThe above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.\n\nSee the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)\n\n## \ud83d\ude07 Why\n\nWhen I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a\nreliable alternative using a completely different method. Also! I never back down on a good challenge!\n\nI **don't care** about the **originating charset** encoding, because **two different tables** can\nproduce **two identical rendered string.**\nWhat I want is to get readable text, the best I can. \n\nIn a way, **I'm brute forcing text decoding.** How cool is that ? \ud83d\ude0e\n\nDon't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.\n\n## \ud83c\udf70 How\n\n - Discard all charset encoding table that could not fit the binary content.\n - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.\n - Extract matches with the lowest mess detected.\n - Additionally, we measure coherence / probe for a language.\n\n**Wait a minute**, what is noise/mess and coherence according to **YOU ?**\n\n*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then\n**I established** some ground rules about **what is obvious** when **it seems like** a mess.\n I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to\n improve or rewrite it.\n\n*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought\nthat intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.\n\n## \u26a1 Known limitations\n\n - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))\n - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.\n\n## \u26a0\ufe0f About Python EOLs\n\n**If you are running:**\n\n- Python >=2.7,<3.5: Unsupported\n- Python 3.5: charset-normalizer < 2.1\n- Python 3.6: charset-normalizer < 3.1\n- Python 3.7: charset-normalizer < 4.0\n\nUpgrade your Python interpreter as soon as possible.\n\n## \ud83d\udc64 Contributing\n\nContributions, issues and feature requests are very much welcome.
\nFeel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.\n\n## \ud83d\udcdd License\n\nCopyright \u00a9 [Ahmed TAHRI @Ousret](https://github.com/Ousret).
\nThis project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.\n\nCharacters frequencies used in this project \u00a9 2012 [Denny Vrande\u010di\u0107](http://simia.net/letters/)\n\n## \ud83d\udcbc For Enterprise\n\nProfessional support for charset-normalizer is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme\n\n# Changelog\nAll notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).\n\n## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)\n\n### Added\n- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`\n- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)\n\n### Removed\n- (internal) Redundant utils.is_ascii function and unused function is_private_use_only\n- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant\n\n### Changed\n- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection\n- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.7\n\n### Fixed\n- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \\_\\_lt\\_\\_ (#350)\n\n## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)\n\n### Changed\n- Typehint for function `from_path` no longer enforce `PathLike` as its first argument\n- Minor improvement over the global detection reliability\n\n### Added\n- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries\n- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)\n- Explicit support for Python 3.12\n\n### Fixed\n- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)\n\n## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)\n\n### Added\n- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)\n\n### Removed\n- Support for Python 3.6 (PR #260)\n\n### Changed\n- Optional speedup provided by mypy/c 1.0.1\n\n## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)\n\n### Fixed\n- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)\n\n### Changed\n- Speedup provided by mypy/c 0.990 on Python >= 3.7\n\n## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n- Sphinx warnings when generating the documentation\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)\n\n### Added\n- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results\n- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES\n- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio\n\n### Changed\n- Build with static metadata using 'build' frontend\n- Make the language detection stricter\n\n### Fixed\n- CLI with opt --normalize fail when using full path for files\n- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it\n\n### Removed\n- Coherence detector no longer return 'Simple English' instead return 'English'\n- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'\n\n## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)\n\n### Added\n- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)\n\n### Removed\n- Breaking: Method `first()` and `best()` from CharsetMatch\n- UTF-7 will no longer appear as \"detected\" without a recognized SIG/mark (is unreliable/conflict with ASCII)\n\n### Fixed\n- Sphinx warnings when generating the documentation\n\n## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)\n\n### Changed\n- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1\n\n### Removed\n- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches\n- Breaking: Top-level function `normalize`\n- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch\n- Support for the backport `unicodedata2`\n\n## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)\n\n### Deprecated\n- Function `normalize` scheduled for removal in 3.0\n\n### Changed\n- Removed useless call to decode in fn is_unprintable (#206)\n\n### Fixed\n- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)\n\n## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)\n\n### Added\n- Output the Unicode table version when running the CLI with `--version` (PR #194)\n\n### Changed\n- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)\n- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)\n\n### Fixed\n- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)\n- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)\n\n### Removed\n- Support for Python 3.5 (PR #192)\n\n### Deprecated\n- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)\n\n## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)\n\n### Fixed\n- ASCII miss-detection on rare cases (PR #170) \n\n## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)\n\n### Added\n- Explicit support for Python 3.11 (PR #164)\n\n### Changed\n- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)\n\n## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)\n\n### Fixed\n- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)\n\n### Changed\n- Skipping the language-detection (CD) on ASCII (PR #155)\n\n## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)\n\n### Changed\n- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)\n\n### Fixed\n- Wrong logging level applied when setting kwarg `explain` to True (PR #146)\n\n## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)\n### Changed\n- Improvement over Vietnamese detection (PR #126)\n- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)\n- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)\n- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)\n- Code style as refactored by Sourcery-AI (PR #131) \n- Minor adjustment on the MD around european words (PR #133)\n- Remove and replace SRTs from assets / tests (PR #139)\n- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)\n\n### Fixed\n- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)\n- Avoid using too insignificant chunk (PR #137)\n\n### Added\n- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)\n- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)\n\n## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)\n### Added\n- Add support for Kazakh (Cyrillic) language detection (PR #109)\n\n### Changed\n- Further, improve inferring the language from a given single-byte code page (PR #112)\n- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)\n- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)\n- Various detection improvement (MD+CD) (PR #117)\n\n### Removed\n- Remove redundant logging entry about detected language(s) (PR #115)\n\n### Fixed\n- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)\n\n## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)\n### Fixed\n- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)\n- Fix CLI crash when using --minimal output in certain cases (PR #103)\n\n### Changed\n- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)\n\n## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)\n### Changed\n- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)\n- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)\n- The Unicode detection is slightly improved (PR #93)\n- Add syntax sugar \\_\\_bool\\_\\_ for results CharsetMatches list-container (PR #91)\n\n### Removed\n- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)\n\n### Fixed\n- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)\n- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)\n- The MANIFEST.in was not exhaustive (PR #78)\n\n## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)\n### Fixed\n- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)\n- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)\n- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)\n- Submatch factoring could be wrong in rare edge cases (PR #72)\n- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)\n- Fix line endings from CRLF to LF for certain project files (PR #67)\n\n### Changed\n- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)\n- Allow fallback on specified encoding if any (PR #71)\n\n## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)\n### Changed\n- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)\n- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)\n\n## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)\n### Fixed\n- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) \n\n### Changed\n- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)\n\n## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)\n### Fixed\n- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)\n- Using explain=False permanently disable the verbose output in the current runtime (PR #47)\n- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)\n- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)\n\n### Changed\n- Public function normalize default args values were not aligned with from_bytes (PR #53)\n\n### Added\n- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)\n\n## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)\n### Changed\n- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.\n- Accent has been made on UTF-8 detection, should perform rather instantaneous.\n- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.\n- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)\n- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+\n- utf_7 detection has been reinstated.\n\n### Removed\n- This package no longer require anything when used with Python 3.5 (Dropped cached_property)\n- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volap\u00fck, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.\n- The exception hook on UnicodeDecodeError has been removed.\n\n### Deprecated\n- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0\n\n### Fixed\n- The CLI output used the relative path of the file(s). Should be absolute.\n\n## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)\n### Fixed\n- Logger configuration/usage no longer conflict with others (PR #44)\n\n## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)\n### Removed\n- Using standard logging instead of using the package loguru.\n- Dropping nose test framework in favor of the maintained pytest.\n- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.\n- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.\n- Stop support for UTF-7 that does not contain a SIG.\n- Dropping PrettyTable, replaced with pure JSON output in CLI.\n\n### Fixed\n- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.\n- Not searching properly for the BOM when trying utf32/16 parent codec.\n\n### Changed\n- Improving the package final size by compressing frequencies.json.\n- Huge improvement over the larges payload.\n\n### Added\n- CLI now produces JSON consumable output.\n- Return ASCII if given sequences fit. Given reasonable confidence.\n\n## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)\n\n### Fixed\n- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)\n\n## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)\n\n### Fixed\n- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)\n\n## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)\n\n### Fixed\n- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)\n\n## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)\n\n### Changed\n- Amend the previous release to allow prettytable 2.0 (PR #35)\n\n## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)\n\n### Fixed\n- Fix error while using the package with a python pre-release interpreter (PR #33)\n\n### Changed\n- Dependencies refactoring, constraints revised.\n\n### Added\n- Add python 3.9 and 3.10 to the supported interpreters\n\nMIT License\n\nCopyright (c) 2019 TAHRI Ahmed R.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.", + "release_date": "2023-09-30T09:12:43", "parties": [ { "type": "person", @@ -865,11 +861,11 @@ "Typing :: Typed" ], "homepage_url": "https://github.com/Ousret/charset_normalizer", - "download_url": "https://files.pythonhosted.org/packages/2a/53/cf0a48de1bdcf6ff6e1c9a023f5f523dfe303e4024f216feac64b6eb7f67/charset-normalizer-3.2.0.tar.gz", - "size": 97063, + "download_url": "https://files.pythonhosted.org/packages/cf/ac/e89b2f2f75f51e9859979b56d2ec162f7f893221975d244d8d5277aa9489/charset-normalizer-3.3.0.tar.gz", + "size": 103776, "sha1": null, - "md5": "dbb8c5b745beddbaae67d06dce0b7c29", - "sha256": "3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace", + "md5": "c18756e76247680573aa6ca30fc915a7", + "sha256": "63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6", "sha512": null, "bug_tracking_url": null, "code_view_url": null, @@ -889,9 +885,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/charset-normalizer/3.2.0/json", + "api_data_url": "https://pypi.org/pypi/charset-normalizer/3.3.0/json", "datasource_id": null, - "purl": "pkg:pypi/charset-normalizer@3.2.0" + "purl": "pkg:pypi/charset-normalizer@3.3.0" }, { "type": "pypi", @@ -2327,12 +2323,12 @@ "type": "pypi", "namespace": null, "name": "urllib3", - "version": "2.0.5", + "version": "2.0.6", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "HTTP library with thread-safe connection pooling, file post, and more.\n

\n\n![urllib3](https://github.com/urllib3/urllib3/raw/main/docs/_static/banner_github.svg)\n\n

\n\n

\n \"PyPI\n \"Python\n \"Join\n \"Coverage\n \"Build\n \"Documentation
\n \"OpenSSF\n \"SLSA\n \"CII\n

\n\nurllib3 is a powerful, *user-friendly* HTTP client for Python. Much of the\nPython ecosystem already uses urllib3 and you should too.\nurllib3 brings many critical features that are missing from the Python\nstandard libraries:\n\n- Thread safety.\n- Connection pooling.\n- Client-side SSL/TLS verification.\n- File uploads with multipart encoding.\n- Helpers for retrying requests and dealing with HTTP redirects.\n- Support for gzip, deflate, brotli, and zstd encoding.\n- Proxy support for HTTP and SOCKS.\n- 100% test coverage.\n\nurllib3 is powerful and easy to use:\n\n```python3\n>>> import urllib3\n>>> resp = urllib3.request(\"GET\", \"http://httpbin.org/robots.txt\")\n>>> resp.status\n200\n>>> resp.data\nb\"User-agent: *\\nDisallow: /deny\\n\"\n```\n\n## Installing\n\nurllib3 can be installed with [pip](https://pip.pypa.io):\n\n```bash\n$ python -m pip install urllib3\n```\n\nAlternatively, you can grab the latest source code from [GitHub](https://github.com/urllib3/urllib3):\n\n```bash\n$ git clone https://github.com/urllib3/urllib3.git\n$ cd urllib3\n$ pip install .\n```\n\n\n## Documentation\n\nurllib3 has usage and reference documentation at [urllib3.readthedocs.io](https://urllib3.readthedocs.io).\n\n\n## Community\n\nurllib3 has a [community Discord channel](https://discord.gg/urllib3) for asking questions and\ncollaborating with other contributors. Drop by and say hello \ud83d\udc4b\n\n\n## Contributing\n\nurllib3 happily accepts contributions. Please see our\n[contributing documentation](https://urllib3.readthedocs.io/en/latest/contributing.html)\nfor some tips on getting started.\n\n\n## Security Disclosures\n\nTo report a security vulnerability, please use the\n[Tidelift security contact](https://tidelift.com/security).\nTidelift will coordinate the fix and disclosure with maintainers.\n\n\n## Maintainers\n\n- [@sethmlarson](https://github.com/sethmlarson) (Seth M. Larson)\n- [@pquentin](https://github.com/pquentin) (Quentin Pradet)\n- [@theacodes](https://github.com/theacodes) (Thea Flowers)\n- [@haikuginger](https://github.com/haikuginger) (Jess Shapiro)\n- [@lukasa](https://github.com/lukasa) (Cory Benfield)\n- [@sigmavirus24](https://github.com/sigmavirus24) (Ian Stapleton Cordasco)\n- [@shazow](https://github.com/shazow) (Andrey Petrov)\n\n\ud83d\udc4b\n\n\n## Sponsorship\n\nIf your company benefits from this library, please consider [sponsoring its\ndevelopment](https://urllib3.readthedocs.io/en/latest/sponsors.html).\n\n\n## For Enterprise\n\nProfessional support for urllib3 is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-urllib3?utm_source=pypi-urllib3&utm_medium=referral&utm_campaign=readme", - "release_date": "2023-09-20T07:30:33", + "release_date": "2023-10-02T17:22:34", "parties": [ { "type": "person", @@ -2376,11 +2372,11 @@ "Topic :: Software Development :: Libraries" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/37/dc/399e63f5d1d96bb643404ee830657f4dfcf8503f5ba8fa3c6d465d0c57fe/urllib3-2.0.5-py3-none-any.whl", - "size": 123792, + "download_url": "https://files.pythonhosted.org/packages/26/40/9957270221b6d3e9a3b92fdfba80dd5c9661ff45a664b47edd5d00f707f5/urllib3-2.0.6-py3-none-any.whl", + "size": 123799, "sha1": null, - "md5": "d7a2bde43ed2ebf36b6e09e01cce0411", - "sha256": "ef16afa8ba34a1f989db38e1dbbe0c302e4289a47856990d0682e374563ce35e", + "md5": "b39061189ea3bd9378aaf58a1b485b8a", + "sha256": "7a7c7003b000adf9e7ca2a377c9688bbc54ed41b985789ed576570342a375cd2", "sha512": null, "bug_tracking_url": null, "code_view_url": "https://github.com/urllib3/urllib3", @@ -2399,20 +2395,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/urllib3/2.0.5/json", + "api_data_url": "https://pypi.org/pypi/urllib3/2.0.6/json", "datasource_id": null, - "purl": "pkg:pypi/urllib3@2.0.5" + "purl": "pkg:pypi/urllib3@2.0.6" }, { "type": "pypi", "namespace": null, "name": "urllib3", - "version": "2.0.5", + "version": "2.0.6", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "HTTP library with thread-safe connection pooling, file post, and more.\n

\n\n![urllib3](https://github.com/urllib3/urllib3/raw/main/docs/_static/banner_github.svg)\n\n

\n\n

\n \"PyPI\n \"Python\n \"Join\n \"Coverage\n \"Build\n \"Documentation
\n \"OpenSSF\n \"SLSA\n \"CII\n

\n\nurllib3 is a powerful, *user-friendly* HTTP client for Python. Much of the\nPython ecosystem already uses urllib3 and you should too.\nurllib3 brings many critical features that are missing from the Python\nstandard libraries:\n\n- Thread safety.\n- Connection pooling.\n- Client-side SSL/TLS verification.\n- File uploads with multipart encoding.\n- Helpers for retrying requests and dealing with HTTP redirects.\n- Support for gzip, deflate, brotli, and zstd encoding.\n- Proxy support for HTTP and SOCKS.\n- 100% test coverage.\n\nurllib3 is powerful and easy to use:\n\n```python3\n>>> import urllib3\n>>> resp = urllib3.request(\"GET\", \"http://httpbin.org/robots.txt\")\n>>> resp.status\n200\n>>> resp.data\nb\"User-agent: *\\nDisallow: /deny\\n\"\n```\n\n## Installing\n\nurllib3 can be installed with [pip](https://pip.pypa.io):\n\n```bash\n$ python -m pip install urllib3\n```\n\nAlternatively, you can grab the latest source code from [GitHub](https://github.com/urllib3/urllib3):\n\n```bash\n$ git clone https://github.com/urllib3/urllib3.git\n$ cd urllib3\n$ pip install .\n```\n\n\n## Documentation\n\nurllib3 has usage and reference documentation at [urllib3.readthedocs.io](https://urllib3.readthedocs.io).\n\n\n## Community\n\nurllib3 has a [community Discord channel](https://discord.gg/urllib3) for asking questions and\ncollaborating with other contributors. Drop by and say hello \ud83d\udc4b\n\n\n## Contributing\n\nurllib3 happily accepts contributions. Please see our\n[contributing documentation](https://urllib3.readthedocs.io/en/latest/contributing.html)\nfor some tips on getting started.\n\n\n## Security Disclosures\n\nTo report a security vulnerability, please use the\n[Tidelift security contact](https://tidelift.com/security).\nTidelift will coordinate the fix and disclosure with maintainers.\n\n\n## Maintainers\n\n- [@sethmlarson](https://github.com/sethmlarson) (Seth M. Larson)\n- [@pquentin](https://github.com/pquentin) (Quentin Pradet)\n- [@theacodes](https://github.com/theacodes) (Thea Flowers)\n- [@haikuginger](https://github.com/haikuginger) (Jess Shapiro)\n- [@lukasa](https://github.com/lukasa) (Cory Benfield)\n- [@sigmavirus24](https://github.com/sigmavirus24) (Ian Stapleton Cordasco)\n- [@shazow](https://github.com/shazow) (Andrey Petrov)\n\n\ud83d\udc4b\n\n\n## Sponsorship\n\nIf your company benefits from this library, please consider [sponsoring its\ndevelopment](https://urllib3.readthedocs.io/en/latest/sponsors.html).\n\n\n## For Enterprise\n\nProfessional support for urllib3 is available as part of the [Tidelift\nSubscription][1]. Tidelift gives software development teams a single source for\npurchasing and maintaining their software, with professional grade assurances\nfrom the experts who know it best, while seamlessly integrating with existing\ntools.\n\n[1]: https://tidelift.com/subscription/pkg/pypi-urllib3?utm_source=pypi-urllib3&utm_medium=referral&utm_campaign=readme", - "release_date": "2023-09-20T07:30:34", + "release_date": "2023-10-02T17:22:36", "parties": [ { "type": "person", @@ -2456,11 +2452,11 @@ "Topic :: Software Development :: Libraries" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/51/13/62cb4a0af89fdf72db4a0ead8026e724c7f3cbf69706d84a4eff439be853/urllib3-2.0.5.tar.gz", - "size": 281697, + "download_url": "https://files.pythonhosted.org/packages/8b/00/db794bb94bf09cadb4ecd031c4295dd4e3536db4da958e20331d95f1edb7/urllib3-2.0.6.tar.gz", + "size": 281846, "sha1": null, - "md5": "4d824b7bba1976591fc05fad02fb258d", - "sha256": "13abf37382ea2ce6fb744d4dad67838eec857c9f4f57009891805e0b5e123594", + "md5": "6ca3ae8220bf1df8697038b94fb6ea48", + "sha256": "b19e1a85d206b56d7df1d5e683df4a7725252a964e3993648dd0fb5a1c157564", "sha512": null, "bug_tracking_url": null, "code_view_url": "https://github.com/urllib3/urllib3", @@ -2479,9 +2475,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/urllib3/2.0.5/json", + "api_data_url": "https://pypi.org/pypi/urllib3/2.0.6/json", "datasource_id": null, - "purl": "pkg:pypi/urllib3@2.0.5" + "purl": "pkg:pypi/urllib3@2.0.6" } ], "resolved_dependencies_graph": [ @@ -2513,13 +2509,13 @@ "dependencies": [] }, { - "package": "pkg:pypi/cffi@1.15.1", + "package": "pkg:pypi/cffi@1.16.0", "dependencies": [ "pkg:pypi/pycparser@2.21" ] }, { - "package": "pkg:pypi/charset-normalizer@3.2.0", + "package": "pkg:pypi/charset-normalizer@3.3.0", "dependencies": [] }, { @@ -2529,7 +2525,7 @@ { "package": "pkg:pypi/cryptography@41.0.4", "dependencies": [ - "pkg:pypi/cffi@1.15.1" + "pkg:pypi/cffi@1.16.0" ] }, { @@ -2571,9 +2567,9 @@ "package": "pkg:pypi/requests@2.31.0", "dependencies": [ "pkg:pypi/certifi@2023.7.22", - "pkg:pypi/charset-normalizer@3.2.0", + "pkg:pypi/charset-normalizer@3.3.0", "pkg:pypi/idna@3.4", - "pkg:pypi/urllib3@2.0.5" + "pkg:pypi/urllib3@2.0.6" ] }, { @@ -2585,7 +2581,7 @@ "dependencies": [] }, { - "package": "pkg:pypi/urllib3@2.0.5", + "package": "pkg:pypi/urllib3@2.0.6", "dependencies": [] } ] diff --git a/tests/data/environment-marker-test-requirements.txt-expected.json b/tests/data/environment-marker-test-requirements.txt-expected.json index 9ea777a2..bb6bb75e 100644 --- a/tests/data/environment-marker-test-requirements.txt-expected.json +++ b/tests/data/environment-marker-test-requirements.txt-expected.json @@ -4,7 +4,7 @@ "tool_homepageurl": "https://github.com/nexB/python-inspector", "tool_version": "0.9.8", "options": [ - "--requirement /home/tg1999/Desktop/example/tools/python-inspector/tests/data/environment-marker-test-requirements.txt", + "--requirement /home/tg1999/Desktop/python-inspector-1/tests/data/environment-marker-test-requirements.txt", "--index-url https://pypi.org/simple", "--python-version 37", "--operating-system linux", @@ -17,7 +17,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/environment-marker-test-requirements.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/environment-marker-test-requirements.txt", "package_data": [ { "type": "pypi", diff --git a/tests/data/frozen-requirements.txt-expected.json b/tests/data/frozen-requirements.txt-expected.json index 0dded3b6..3de03568 100644 --- a/tests/data/frozen-requirements.txt-expected.json +++ b/tests/data/frozen-requirements.txt-expected.json @@ -4,7 +4,7 @@ "tool_homepageurl": "https://github.com/nexB/python-inspector", "tool_version": "0.9.8", "options": [ - "--requirement /home/tg1999/Desktop/example/tools/python-inspector/tests/data/frozen-requirements.txt", + "--requirement /home/tg1999/Desktop/python-inspector-1/tests/data/frozen-requirements.txt", "--index-url https://pypi.org/simple", "--python-version 38", "--operating-system linux", @@ -17,7 +17,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/frozen-requirements.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/frozen-requirements.txt", "package_data": [ { "type": "pypi", diff --git a/tests/data/insecure-setup-2/setup.py-expected.json b/tests/data/insecure-setup-2/setup.py-expected.json index 6b871e69..210c0a6e 100644 --- a/tests/data/insecure-setup-2/setup.py-expected.json +++ b/tests/data/insecure-setup-2/setup.py-expected.json @@ -16,7 +16,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/insecure-setup-2/setup.py", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/insecure-setup-2/setup.py", "package_data": [ { "type": "pypi", @@ -7792,12 +7792,12 @@ "type": "pypi", "namespace": null, "name": "wcwidth", - "version": "0.2.6", + "version": "0.2.8", "qualifiers": {}, "subpath": null, "primary_language": "Python", - "description": "Measures the displayed width of unicode strings in a terminal\n|pypi_downloads| |codecov| |license|\n\n============\nIntroduction\n============\n\nThis library is mainly for CLI programs that carefully produce output for\nTerminals, or make pretend to be an emulator.\n\n**Problem Statement**: The printable length of *most* strings are equal to the\nnumber of cells they occupy on the screen ``1 character : 1 cell``. However,\nthere are categories of characters that *occupy 2 cells* (full-wide), and\nothers that *occupy 0* cells (zero-width).\n\n**Solution**: POSIX.1-2001 and POSIX.1-2008 conforming systems provide\n`wcwidth(3)`_ and `wcswidth(3)`_ C functions of which this python module's\nfunctions precisely copy. *These functions return the number of cells a\nunicode string is expected to occupy.*\n\nInstallation\n------------\n\nThe stable version of this package is maintained on pypi, install using pip::\n\n pip install wcwidth\n\nExample\n-------\n\n**Problem**: given the following phrase (Japanese),\n\n >>> text = u'\u30b3\u30f3\u30cb\u30c1\u30cf'\n\nPython **incorrectly** uses the *string length* of 5 codepoints rather than the\n*printible length* of 10 cells, so that when using the `rjust` function, the\noutput length is wrong::\n\n >>> print(len('\u30b3\u30f3\u30cb\u30c1\u30cf'))\n 5\n\n >>> print('\u30b3\u30f3\u30cb\u30c1\u30cf'.rjust(20, '_'))\n _______________\u30b3\u30f3\u30cb\u30c1\u30cf\n\nBy defining our own \"rjust\" function that uses wcwidth, we can correct this::\n\n >>> def wc_rjust(text, length, padding=' '):\n ... from wcwidth import wcswidth\n ... return padding * max(0, (length - wcswidth(text))) + text\n ...\n\nOur **Solution** uses wcswidth to determine the string length correctly::\n\n >>> from wcwidth import wcswidth\n >>> print(wcswidth('\u30b3\u30f3\u30cb\u30c1\u30cf'))\n 10\n\n >>> print(wc_rjust('\u30b3\u30f3\u30cb\u30c1\u30cf', 20, '_'))\n __________\u30b3\u30f3\u30cb\u30c1\u30cf\n\n\nChoosing a Version\n------------------\n\nExport an environment variable, ``UNICODE_VERSION``. This should be done by\n*terminal emulators* or those developers experimenting with authoring one of\ntheir own, from shell::\n\n $ export UNICODE_VERSION=13.0\n\nIf unspecified, the latest version is used. If your Terminal Emulator does not\nexport this variable, you can use the `jquast/ucs-detect`_ utility to\nautomatically detect and export it to your shell.\n\nwcwidth, wcswidth\n-----------------\nUse function ``wcwidth()`` to determine the length of a *single unicode\ncharacter*, and ``wcswidth()`` to determine the length of many, a *string\nof unicode characters*.\n\nBriefly, return values of function ``wcwidth()`` are:\n\n``-1``\n Indeterminate (not printable).\n\n``0``\n Does not advance the cursor, such as NULL or Combining.\n\n``2``\n Characters of category East Asian Wide (W) or East Asian\n Full-width (F) which are displayed using two terminal cells.\n\n``1``\n All others.\n\nFunction ``wcswidth()`` simply returns the sum of all values for each character\nalong a string, or ``-1`` when it occurs anywhere along a string.\n\nFull API Documentation at http://wcwidth.readthedocs.org\n\n==========\nDeveloping\n==========\n\nInstall wcwidth in editable mode::\n\n pip install -e.\n\nExecute unit tests using tox_::\n\n tox\n\nRegenerate python code tables from latest Unicode Specification data files::\n\n tox -e update\n\nSupplementary tools for browsing and testing terminals for wide unicode\ncharacters are found in the `bin/`_ of this project's source code. Just ensure\nto first ``pip install -erequirements-develop.txt`` from this projects main\nfolder. For example, an interactive browser for testing::\n\n python ./bin/wcwidth-browser.py\n\nUses\n----\n\nThis library is used in:\n\n- `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in\n Python.\n\n- `jonathanslenders/python-prompt-toolkit`_: a Library for building powerful\n interactive command lines in Python.\n\n- `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting.\n\n- `thomasballinger/curtsies`_: a Curses-like terminal wrapper with a display\n based on compositing 2d arrays of text.\n\n- `selectel/pyte`_: Simple VTXXX-compatible linux terminal emulator.\n\n- `astanin/python-tabulate`_: Pretty-print tabular data in Python, a library\n and a command-line utility.\n\n- `LuminosoInsight/python-ftfy`_: Fixes mojibake and other glitches in Unicode\n text.\n\n- `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG\n animations.\n\n- `peterbrittain/asciimatics`_: Package to help people create full-screen text\n UIs.\n\nOther Languages\n---------------\n\n- `timoxley/wcwidth`_: JavaScript\n- `janlelis/unicode-display_width`_: Ruby\n- `alecrabbit/php-wcwidth`_: PHP\n- `Text::CharWidth`_: Perl\n- `bluebear94/Terminal-WCWidth`: Perl 6\n- `mattn/go-runewidth`_: Go\n- `emugel/wcwidth`_: Haxe\n- `aperezdc/lua-wcwidth`: Lua\n- `joachimschmidt557/zig-wcwidth`: Zig\n- `fumiyas/wcwidth-cjk`: `LD_PRELOAD` override\n- `joshuarubin/wcwidth9`: Unicode version 9 in C\n\nHistory\n-------\n\n0.2.6 *2023-01-14*\n * **Updated** tables to include Unicode Specification 14.0.0 and 15.0.0.\n * **Changed** developer tools to use pip-compile, and to use jinja2 templates\n for code generation in `bin/update-tables.py` to prepare for possible\n compiler optimization release.\n\n0.2.1 .. 0.2.5 *2020-06-23*\n * **Repository** changes to update tests and packaging issues, and\n begin tagging repository with matching release versions.\n\n0.2.0 *2020-06-01*\n * **Enhancement**: Unicode version may be selected by exporting the\n Environment variable ``UNICODE_VERSION``, such as ``13.0``, or ``6.3.0``.\n See the `jquast/ucs-detect`_ CLI utility for automatic detection.\n * **Enhancement**:\n API Documentation is published to readthedocs.org.\n * **Updated** tables for *all* Unicode Specifications with files\n published in a programmatically consumable format, versions 4.1.0\n through 13.0\n\n0.1.9 *2020-03-22*\n * **Performance** optimization by `Avram Lubkin`_, `PR #35`_.\n * **Updated** tables to Unicode Specification 13.0.0.\n\n0.1.8 *2020-01-01*\n * **Updated** tables to Unicode Specification 12.0.0. (`PR #30`_).\n\n0.1.7 *2016-07-01*\n * **Updated** tables to Unicode Specification 9.0.0. (`PR #18`_).\n\n0.1.6 *2016-01-08 Production/Stable*\n * ``LICENSE`` file now included with distribution.\n\n0.1.5 *2015-09-13 Alpha*\n * **Bugfix**:\n Resolution of \"combining_ character width\" issue, most especially\n those that previously returned -1 now often (correctly) return 0.\n resolved by `Philip Craig`_ via `PR #11`_.\n * **Deprecated**:\n The module path ``wcwidth.table_comb`` is no longer available,\n it has been superseded by module path ``wcwidth.table_zero``.\n\n0.1.4 *2014-11-20 Pre-Alpha*\n * **Feature**: ``wcswidth()`` now determines printable length\n for (most) combining_ characters. The developer's tool\n `bin/wcwidth-browser.py`_ is improved to display combining_\n characters when provided the ``--combining`` option\n (`Thomas Ballinger`_ and `Leta Montopoli`_ `PR #5`_).\n * **Feature**: added static analysis (prospector_) to testing\n framework.\n\n0.1.3 *2014-10-29 Pre-Alpha*\n * **Bugfix**: 2nd parameter of wcswidth was not honored.\n (`Thomas Ballinger`_, `PR #4`_).\n\n0.1.2 *2014-10-28 Pre-Alpha*\n * **Updated** tables to Unicode Specification 7.0.0.\n (`Thomas Ballinger`_, `PR #3`_).\n\n0.1.1 *2014-05-14 Pre-Alpha*\n * Initial release to pypi, Based on Unicode Specification 6.3.0\n\nThis code was originally derived directly from C code of the same name,\nwhose latest version is available at\nhttp://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::\n\n * Markus Kuhn -- 2007-05-26 (Unicode 5.0)\n *\n * Permission to use, copy, modify, and distribute this software\n * for any purpose and without fee is hereby granted. The author\n * disclaims all warranties with regard to this software.\n\n.. _`tox`: https://testrun.org/tox/latest/install.html\n.. _`prospector`: https://github.com/landscapeio/prospector\n.. _`combining`: https://en.wikipedia.org/wiki/Combining_character\n.. _`bin/`: https://github.com/jquast/wcwidth/tree/master/bin\n.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/tree/master/bin/wcwidth-browser.py\n.. _`Thomas Ballinger`: https://github.com/thomasballinger\n.. _`Leta Montopoli`: https://github.com/lmontopo\n.. _`Philip Craig`: https://github.com/philipc\n.. _`PR #3`: https://github.com/jquast/wcwidth/pull/3\n.. _`PR #4`: https://github.com/jquast/wcwidth/pull/4\n.. _`PR #5`: https://github.com/jquast/wcwidth/pull/5\n.. _`PR #11`: https://github.com/jquast/wcwidth/pull/11\n.. _`PR #18`: https://github.com/jquast/wcwidth/pull/18\n.. _`PR #30`: https://github.com/jquast/wcwidth/pull/30\n.. _`PR #35`: https://github.com/jquast/wcwidth/pull/35\n.. _`jquast/blessed`: https://github.com/jquast/blessed\n.. _`selectel/pyte`: https://github.com/selectel/pyte\n.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies\n.. _`dbcli/pgcli`: https://github.com/dbcli/pgcli\n.. _`jonathanslenders/python-prompt-toolkit`: https://github.com/jonathanslenders/python-prompt-toolkit\n.. _`timoxley/wcwidth`: https://github.com/timoxley/wcwidth\n.. _`wcwidth(3)`: http://man7.org/linux/man-pages/man3/wcwidth.3.html\n.. _`wcswidth(3)`: http://man7.org/linux/man-pages/man3/wcswidth.3.html\n.. _`astanin/python-tabulate`: https://github.com/astanin/python-tabulate\n.. _`janlelis/unicode-display_width`: https://github.com/janlelis/unicode-display_width\n.. _`LuminosoInsight/python-ftfy`: https://github.com/LuminosoInsight/python-ftfy\n.. _`alecrabbit/php-wcwidth`: https://github.com/alecrabbit/php-wcwidth\n.. _`Text::CharWidth`: https://metacpan.org/pod/Text::CharWidth\n.. _`bluebear94/Terminal-WCWidth`: https://github.com/bluebear94/Terminal-WCWidth\n.. _`mattn/go-runewidth`: https://github.com/mattn/go-runewidth\n.. _`emugel/wcwidth`: https://github.com/emugel/wcwidth\n.. _`jquast/ucs-detect`: https://github.com/jquast/ucs-detect\n.. _`Avram Lubkin`: https://github.com/avylove\n.. _`nbedos/termtosvg`: https://github.com/nbedos/termtosvg\n.. _`peterbrittain/asciimatics`: https://github.com/peterbrittain/asciimatics\n.. _`aperezdc/lua-wcwidth`: https://github.com/aperezdc/lua-wcwidth\n.. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk\n.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi\n :alt: Downloads\n :target: https://pypi.org/project/wcwidth/\n.. |codecov| image:: https://codecov.io/gh/jquast/wcwidth/branch/master/graph/badge.svg\n :alt: codecov.io Code Coverage\n :target: https://codecov.io/gh/jquast/wcwidth/\n.. |license| image:: https://img.shields.io/github/license/jquast/wcwidth.svg\n :target: https://pypi.python.org/pypi/wcwidth/\n :alt: MIT License", - "release_date": "2023-01-16T15:09:15", + "description": "Measures the displayed width of unicode strings in a terminal\n|pypi_downloads| |codecov| |license|\n\n============\nIntroduction\n============\n\nThis library is mainly for CLI programs that carefully produce output for\nTerminals, or make pretend to be an emulator.\n\n**Problem Statement**: The printable length of *most* strings are equal to the\nnumber of cells they occupy on the screen ``1 character : 1 cell``. However,\nthere are categories of characters that *occupy 2 cells* (full-wide), and\nothers that *occupy 0* cells (zero-width).\n\n**Solution**: POSIX.1-2001 and POSIX.1-2008 conforming systems provide\n`wcwidth(3)`_ and `wcswidth(3)`_ C functions of which this python module's\nfunctions precisely copy. *These functions return the number of cells a\nunicode string is expected to occupy.*\n\nInstallation\n------------\n\nThe stable version of this package is maintained on pypi, install using pip::\n\n pip install wcwidth\n\nExample\n-------\n\n**Problem**: given the following phrase (Japanese),\n\n >>> text = u'\u30b3\u30f3\u30cb\u30c1\u30cf'\n\nPython **incorrectly** uses the *string length* of 5 codepoints rather than the\n*printible length* of 10 cells, so that when using the `rjust` function, the\noutput length is wrong::\n\n >>> print(len('\u30b3\u30f3\u30cb\u30c1\u30cf'))\n 5\n\n >>> print('\u30b3\u30f3\u30cb\u30c1\u30cf'.rjust(20, '_'))\n _______________\u30b3\u30f3\u30cb\u30c1\u30cf\n\nBy defining our own \"rjust\" function that uses wcwidth, we can correct this::\n\n >>> def wc_rjust(text, length, padding=' '):\n ... from wcwidth import wcswidth\n ... return padding * max(0, (length - wcswidth(text))) + text\n ...\n\nOur **Solution** uses wcswidth to determine the string length correctly::\n\n >>> from wcwidth import wcswidth\n >>> print(wcswidth('\u30b3\u30f3\u30cb\u30c1\u30cf'))\n 10\n\n >>> print(wc_rjust('\u30b3\u30f3\u30cb\u30c1\u30cf', 20, '_'))\n __________\u30b3\u30f3\u30cb\u30c1\u30cf\n\n\nChoosing a Version\n------------------\n\nExport an environment variable, ``UNICODE_VERSION``. This should be done by\n*terminal emulators* or those developers experimenting with authoring one of\ntheir own, from shell::\n\n $ export UNICODE_VERSION=13.0\n\nIf unspecified, the latest version is used. If your Terminal Emulator does not\nexport this variable, you can use the `jquast/ucs-detect`_ utility to\nautomatically detect and export it to your shell.\n\nwcwidth, wcswidth\n-----------------\nUse function ``wcwidth()`` to determine the length of a *single unicode\ncharacter*, and ``wcswidth()`` to determine the length of many, a *string\nof unicode characters*.\n\nBriefly, return values of function ``wcwidth()`` are:\n\n``-1``\n Indeterminate (not printable).\n\n``0``\n Does not advance the cursor, such as NULL or Combining.\n\n``2``\n Characters of category East Asian Wide (W) or East Asian\n Full-width (F) which are displayed using two terminal cells.\n\n``1``\n All others.\n\nFunction ``wcswidth()`` simply returns the sum of all values for each character\nalong a string, or ``-1`` when it occurs anywhere along a string.\n\nFull API Documentation at https://wcwidth.readthedocs.org\n\n==========\nDeveloping\n==========\n\nInstall wcwidth in editable mode::\n\n pip install -e .\n\nExecute unit tests using tox_::\n\n tox -e py27,py35,py36,py37,py38,py39,py310,py311,py312\n\nUpdating Unicode Version\n------------------------\n\nRegenerate python code tables from latest Unicode Specification data files::\n\n tox -e update\n\nThe script is located at ``bin/update-tables.py``, requires Python 3.9 or\nlater. It is recommended but not necessary to run this script with the newest\nPython, because the newest Python has the latest ``unicodedata`` for generating\ncomments.\n\nBuilding Documentation\n----------------------\n\nThis project is using `sphinx`_ 4.5 to build documentation::\n\n tox -e sphinx\n\nThe output will be in ``docs/_build/html/``.\n\nUpdating Requirements\n---------------------\n\nThis project is using `pip-tools`_ to manage requirements.\n\nTo upgrade requirements for updating unicode version, run::\n\n tox -e update_requirements_update\n\nTo upgrade requirements for testing, run::\n\n tox -e update_requirements37,update_requirements39\n\nTo upgrade requirements for building documentation, run::\n\n tox -e update_requirements_docs\n\nUtilities\n---------\n\nSupplementary tools for browsing and testing terminals for wide unicode\ncharacters are found in the `bin/`_ of this project's source code. Just ensure\nto first ``pip install -r requirements-develop.txt`` from this projects main\nfolder. For example, an interactive browser for testing::\n\n python ./bin/wcwidth-browser.py\n\n====\nUses\n====\n\nThis library is used in:\n\n- `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in\n Python.\n\n- `prompt-toolkit/python-prompt-toolkit`_: a Library for building powerful\n interactive command lines in Python.\n\n- `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting.\n\n- `thomasballinger/curtsies`_: a Curses-like terminal wrapper with a display\n based on compositing 2d arrays of text.\n\n- `selectel/pyte`_: Simple VTXXX-compatible linux terminal emulator.\n\n- `astanin/python-tabulate`_: Pretty-print tabular data in Python, a library\n and a command-line utility.\n\n- `rspeer/python-ftfy`_: Fixes mojibake and other glitches in Unicode\n text.\n\n- `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG\n animations.\n\n- `peterbrittain/asciimatics`_: Package to help people create full-screen text\n UIs.\n\n- `python-cmd2/cmd2`_: A tool for building interactive command line apps\n\n- `stratis-storage/stratis-cli`_: CLI for the Stratis project\n\n- `ihabunek/toot`_: A Mastodon CLI/TUI client\n\n- `saulpw/visidata`_: Terminal spreadsheet multitool for discovering and\n arranging data\n\n===============\nOther Languages\n===============\n\n- `timoxley/wcwidth`_: JavaScript\n- `janlelis/unicode-display_width`_: Ruby\n- `alecrabbit/php-wcwidth`_: PHP\n- `Text::CharWidth`_: Perl\n- `bluebear94/Terminal-WCWidth`_: Perl 6\n- `mattn/go-runewidth`_: Go\n- `grepsuzette/wcwidth`_: Haxe\n- `aperezdc/lua-wcwidth`_: Lua\n- `joachimschmidt557/zig-wcwidth`_: Zig\n- `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override\n- `joshuarubin/wcwidth9`_: Unicode version 9 in C\n\n=======\nHistory\n=======\n\n0.2.8 *2023-09-30*\n * Include requirements files in the source distibution (`PR #82`).\n\n0.2.7 *2023-09-28*\n * **Updated** tables to include Unicode Specification 15.1.0.\n * Include ``bin``, ``docs``, and ``tox.ini`` in the source distribution\n\n0.2.6 *2023-01-14*\n * **Updated** tables to include Unicode Specification 14.0.0 and 15.0.0.\n * **Changed** developer tools to use pip-compile, and to use jinja2 templates\n for code generation in `bin/update-tables.py` to prepare for possible\n compiler optimization release.\n\n0.2.1 .. 0.2.5 *2020-06-23*\n * **Repository** changes to update tests and packaging issues, and\n begin tagging repository with matching release versions.\n\n0.2.0 *2020-06-01*\n * **Enhancement**: Unicode version may be selected by exporting the\n Environment variable ``UNICODE_VERSION``, such as ``13.0``, or ``6.3.0``.\n See the `jquast/ucs-detect`_ CLI utility for automatic detection.\n * **Enhancement**:\n API Documentation is published to readthedocs.org.\n * **Updated** tables for *all* Unicode Specifications with files\n published in a programmatically consumable format, versions 4.1.0\n through 13.0\n\n0.1.9 *2020-03-22*\n * **Performance** optimization by `Avram Lubkin`_, `PR #35`_.\n * **Updated** tables to Unicode Specification 13.0.0.\n\n0.1.8 *2020-01-01*\n * **Updated** tables to Unicode Specification 12.0.0. (`PR #30`_).\n\n0.1.7 *2016-07-01*\n * **Updated** tables to Unicode Specification 9.0.0. (`PR #18`_).\n\n0.1.6 *2016-01-08 Production/Stable*\n * ``LICENSE`` file now included with distribution.\n\n0.1.5 *2015-09-13 Alpha*\n * **Bugfix**:\n Resolution of \"combining_ character width\" issue, most especially\n those that previously returned -1 now often (correctly) return 0.\n resolved by `Philip Craig`_ via `PR #11`_.\n * **Deprecated**:\n The module path ``wcwidth.table_comb`` is no longer available,\n it has been superseded by module path ``wcwidth.table_zero``.\n\n0.1.4 *2014-11-20 Pre-Alpha*\n * **Feature**: ``wcswidth()`` now determines printable length\n for (most) combining_ characters. The developer's tool\n `bin/wcwidth-browser.py`_ is improved to display combining_\n characters when provided the ``--combining`` option\n (`Thomas Ballinger`_ and `Leta Montopoli`_ `PR #5`_).\n * **Feature**: added static analysis (prospector_) to testing\n framework.\n\n0.1.3 *2014-10-29 Pre-Alpha*\n * **Bugfix**: 2nd parameter of wcswidth was not honored.\n (`Thomas Ballinger`_, `PR #4`_).\n\n0.1.2 *2014-10-28 Pre-Alpha*\n * **Updated** tables to Unicode Specification 7.0.0.\n (`Thomas Ballinger`_, `PR #3`_).\n\n0.1.1 *2014-05-14 Pre-Alpha*\n * Initial release to pypi, Based on Unicode Specification 6.3.0\n\nThis code was originally derived directly from C code of the same name,\nwhose latest version is available at\nhttps://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::\n\n * Markus Kuhn -- 2007-05-26 (Unicode 5.0)\n *\n * Permission to use, copy, modify, and distribute this software\n * for any purpose and without fee is hereby granted. The author\n * disclaims all warranties with regard to this software.\n\n.. _`tox`: https://tox.wiki/en/latest/\n.. _`prospector`: https://github.com/landscapeio/prospector\n.. _`combining`: https://en.wikipedia.org/wiki/Combining_character\n.. _`bin/`: https://github.com/jquast/wcwidth/tree/master/bin\n.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/blob/master/bin/wcwidth-browser.py\n.. _`Thomas Ballinger`: https://github.com/thomasballinger\n.. _`Leta Montopoli`: https://github.com/lmontopo\n.. _`Philip Craig`: https://github.com/philipc\n.. _`PR #3`: https://github.com/jquast/wcwidth/pull/3\n.. _`PR #4`: https://github.com/jquast/wcwidth/pull/4\n.. _`PR #5`: https://github.com/jquast/wcwidth/pull/5\n.. _`PR #11`: https://github.com/jquast/wcwidth/pull/11\n.. _`PR #18`: https://github.com/jquast/wcwidth/pull/18\n.. _`PR #30`: https://github.com/jquast/wcwidth/pull/30\n.. _`PR #35`: https://github.com/jquast/wcwidth/pull/35\n.. _`jquast/blessed`: https://github.com/jquast/blessed\n.. _`selectel/pyte`: https://github.com/selectel/pyte\n.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies\n.. _`dbcli/pgcli`: https://github.com/dbcli/pgcli\n.. _`prompt-toolkit/python-prompt-toolkit`: https://github.com/prompt-toolkit/python-prompt-toolkit\n.. _`timoxley/wcwidth`: https://github.com/timoxley/wcwidth\n.. _`wcwidth(3)`: https://man7.org/linux/man-pages/man3/wcwidth.3.html\n.. _`wcswidth(3)`: https://man7.org/linux/man-pages/man3/wcswidth.3.html\n.. _`astanin/python-tabulate`: https://github.com/astanin/python-tabulate\n.. _`janlelis/unicode-display_width`: https://github.com/janlelis/unicode-display_width\n.. _`rspeer/python-ftfy`: https://github.com/rspeer/python-ftfy\n.. _`alecrabbit/php-wcwidth`: https://github.com/alecrabbit/php-wcwidth\n.. _`Text::CharWidth`: https://metacpan.org/pod/Text::CharWidth\n.. _`bluebear94/Terminal-WCWidth`: https://github.com/bluebear94/Terminal-WCWidth\n.. _`mattn/go-runewidth`: https://github.com/mattn/go-runewidth\n.. _`grepsuzette/wcwidth`: https://github.com/grepsuzette/wcwidth\n.. _`jquast/ucs-detect`: https://github.com/jquast/ucs-detect\n.. _`Avram Lubkin`: https://github.com/avylove\n.. _`nbedos/termtosvg`: https://github.com/nbedos/termtosvg\n.. _`peterbrittain/asciimatics`: https://github.com/peterbrittain/asciimatics\n.. _`aperezdc/lua-wcwidth`: https://github.com/aperezdc/lua-wcwidth\n.. _`joachimschmidt557/zig-wcwidth`: https://github.com/joachimschmidt557/zig-wcwidth\n.. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk\n.. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9\n.. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2\n.. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli\n.. _`ihabunek/toot`: https://github.com/ihabunek/toot\n.. _`saulpw/visidata`: https://github.com/saulpw/visidata\n.. _`pip-tools`: https://pip-tools.readthedocs.io/\n.. _`sphinx`: https://www.sphinx-doc.org/\n.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi\n :alt: Downloads\n :target: https://pypi.org/project/wcwidth/\n.. |codecov| image:: https://codecov.io/gh/jquast/wcwidth/branch/master/graph/badge.svg\n :alt: codecov.io Code Coverage\n :target: https://app.codecov.io/gh/jquast/wcwidth/\n.. |license| image:: https://img.shields.io/pypi/l/wcwidth.svg\n :target: https://pypi.org/project/wcwidth/\n :alt: MIT License", + "release_date": "2023-09-30T05:29:57", "parties": [ { "type": "person", @@ -7825,21 +7825,25 @@ "Natural Language :: English", "Operating System :: POSIX", "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Software Development :: Internationalization", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Localization", "Topic :: Terminals" ], "homepage_url": "https://github.com/jquast/wcwidth", - "download_url": "https://files.pythonhosted.org/packages/20/f4/c0584a25144ce20bfcf1aecd041768b8c762c1eb0aa77502a3f0baa83f11/wcwidth-0.2.6-py2.py3-none-any.whl", - "size": 29995, + "download_url": "https://files.pythonhosted.org/packages/58/19/a9ce39f89cf58cf1e7ce01c8bb76ab7e2c7aadbc5a2136c3e192097344f5/wcwidth-0.2.8-py2.py3-none-any.whl", + "size": 31296, "sha1": null, - "md5": "d7c8ae889093a1cdb6f4e00d2196ecdc", - "sha256": "795b138f6875577cd91bba52baf9e445cd5118fd32723b460e30a0af30ea230e", + "md5": "54d60959ddd3e99278c7b8c0ccbbe6e1", + "sha256": "77f719e01648ed600dfa5402c347481c0992263b81a027344f3e1ba25493a704", "sha512": null, "bug_tracking_url": null, "code_view_url": null, @@ -7859,20 +7863,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/wcwidth/0.2.6/json", + "api_data_url": "https://pypi.org/pypi/wcwidth/0.2.8/json", "datasource_id": null, - "purl": "pkg:pypi/wcwidth@0.2.6" + "purl": "pkg:pypi/wcwidth@0.2.8" }, { "type": "pypi", "namespace": null, "name": "wcwidth", - "version": "0.2.6", + "version": "0.2.8", "qualifiers": {}, "subpath": null, "primary_language": "Python", - "description": "Measures the displayed width of unicode strings in a terminal\n|pypi_downloads| |codecov| |license|\n\n============\nIntroduction\n============\n\nThis library is mainly for CLI programs that carefully produce output for\nTerminals, or make pretend to be an emulator.\n\n**Problem Statement**: The printable length of *most* strings are equal to the\nnumber of cells they occupy on the screen ``1 character : 1 cell``. However,\nthere are categories of characters that *occupy 2 cells* (full-wide), and\nothers that *occupy 0* cells (zero-width).\n\n**Solution**: POSIX.1-2001 and POSIX.1-2008 conforming systems provide\n`wcwidth(3)`_ and `wcswidth(3)`_ C functions of which this python module's\nfunctions precisely copy. *These functions return the number of cells a\nunicode string is expected to occupy.*\n\nInstallation\n------------\n\nThe stable version of this package is maintained on pypi, install using pip::\n\n pip install wcwidth\n\nExample\n-------\n\n**Problem**: given the following phrase (Japanese),\n\n >>> text = u'\u30b3\u30f3\u30cb\u30c1\u30cf'\n\nPython **incorrectly** uses the *string length* of 5 codepoints rather than the\n*printible length* of 10 cells, so that when using the `rjust` function, the\noutput length is wrong::\n\n >>> print(len('\u30b3\u30f3\u30cb\u30c1\u30cf'))\n 5\n\n >>> print('\u30b3\u30f3\u30cb\u30c1\u30cf'.rjust(20, '_'))\n _______________\u30b3\u30f3\u30cb\u30c1\u30cf\n\nBy defining our own \"rjust\" function that uses wcwidth, we can correct this::\n\n >>> def wc_rjust(text, length, padding=' '):\n ... from wcwidth import wcswidth\n ... return padding * max(0, (length - wcswidth(text))) + text\n ...\n\nOur **Solution** uses wcswidth to determine the string length correctly::\n\n >>> from wcwidth import wcswidth\n >>> print(wcswidth('\u30b3\u30f3\u30cb\u30c1\u30cf'))\n 10\n\n >>> print(wc_rjust('\u30b3\u30f3\u30cb\u30c1\u30cf', 20, '_'))\n __________\u30b3\u30f3\u30cb\u30c1\u30cf\n\n\nChoosing a Version\n------------------\n\nExport an environment variable, ``UNICODE_VERSION``. This should be done by\n*terminal emulators* or those developers experimenting with authoring one of\ntheir own, from shell::\n\n $ export UNICODE_VERSION=13.0\n\nIf unspecified, the latest version is used. If your Terminal Emulator does not\nexport this variable, you can use the `jquast/ucs-detect`_ utility to\nautomatically detect and export it to your shell.\n\nwcwidth, wcswidth\n-----------------\nUse function ``wcwidth()`` to determine the length of a *single unicode\ncharacter*, and ``wcswidth()`` to determine the length of many, a *string\nof unicode characters*.\n\nBriefly, return values of function ``wcwidth()`` are:\n\n``-1``\n Indeterminate (not printable).\n\n``0``\n Does not advance the cursor, such as NULL or Combining.\n\n``2``\n Characters of category East Asian Wide (W) or East Asian\n Full-width (F) which are displayed using two terminal cells.\n\n``1``\n All others.\n\nFunction ``wcswidth()`` simply returns the sum of all values for each character\nalong a string, or ``-1`` when it occurs anywhere along a string.\n\nFull API Documentation at http://wcwidth.readthedocs.org\n\n==========\nDeveloping\n==========\n\nInstall wcwidth in editable mode::\n\n pip install -e.\n\nExecute unit tests using tox_::\n\n tox\n\nRegenerate python code tables from latest Unicode Specification data files::\n\n tox -e update\n\nSupplementary tools for browsing and testing terminals for wide unicode\ncharacters are found in the `bin/`_ of this project's source code. Just ensure\nto first ``pip install -erequirements-develop.txt`` from this projects main\nfolder. For example, an interactive browser for testing::\n\n python ./bin/wcwidth-browser.py\n\nUses\n----\n\nThis library is used in:\n\n- `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in\n Python.\n\n- `jonathanslenders/python-prompt-toolkit`_: a Library for building powerful\n interactive command lines in Python.\n\n- `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting.\n\n- `thomasballinger/curtsies`_: a Curses-like terminal wrapper with a display\n based on compositing 2d arrays of text.\n\n- `selectel/pyte`_: Simple VTXXX-compatible linux terminal emulator.\n\n- `astanin/python-tabulate`_: Pretty-print tabular data in Python, a library\n and a command-line utility.\n\n- `LuminosoInsight/python-ftfy`_: Fixes mojibake and other glitches in Unicode\n text.\n\n- `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG\n animations.\n\n- `peterbrittain/asciimatics`_: Package to help people create full-screen text\n UIs.\n\nOther Languages\n---------------\n\n- `timoxley/wcwidth`_: JavaScript\n- `janlelis/unicode-display_width`_: Ruby\n- `alecrabbit/php-wcwidth`_: PHP\n- `Text::CharWidth`_: Perl\n- `bluebear94/Terminal-WCWidth`: Perl 6\n- `mattn/go-runewidth`_: Go\n- `emugel/wcwidth`_: Haxe\n- `aperezdc/lua-wcwidth`: Lua\n- `joachimschmidt557/zig-wcwidth`: Zig\n- `fumiyas/wcwidth-cjk`: `LD_PRELOAD` override\n- `joshuarubin/wcwidth9`: Unicode version 9 in C\n\nHistory\n-------\n\n0.2.6 *2023-01-14*\n * **Updated** tables to include Unicode Specification 14.0.0 and 15.0.0.\n * **Changed** developer tools to use pip-compile, and to use jinja2 templates\n for code generation in `bin/update-tables.py` to prepare for possible\n compiler optimization release.\n\n0.2.1 .. 0.2.5 *2020-06-23*\n * **Repository** changes to update tests and packaging issues, and\n begin tagging repository with matching release versions.\n\n0.2.0 *2020-06-01*\n * **Enhancement**: Unicode version may be selected by exporting the\n Environment variable ``UNICODE_VERSION``, such as ``13.0``, or ``6.3.0``.\n See the `jquast/ucs-detect`_ CLI utility for automatic detection.\n * **Enhancement**:\n API Documentation is published to readthedocs.org.\n * **Updated** tables for *all* Unicode Specifications with files\n published in a programmatically consumable format, versions 4.1.0\n through 13.0\n\n0.1.9 *2020-03-22*\n * **Performance** optimization by `Avram Lubkin`_, `PR #35`_.\n * **Updated** tables to Unicode Specification 13.0.0.\n\n0.1.8 *2020-01-01*\n * **Updated** tables to Unicode Specification 12.0.0. (`PR #30`_).\n\n0.1.7 *2016-07-01*\n * **Updated** tables to Unicode Specification 9.0.0. (`PR #18`_).\n\n0.1.6 *2016-01-08 Production/Stable*\n * ``LICENSE`` file now included with distribution.\n\n0.1.5 *2015-09-13 Alpha*\n * **Bugfix**:\n Resolution of \"combining_ character width\" issue, most especially\n those that previously returned -1 now often (correctly) return 0.\n resolved by `Philip Craig`_ via `PR #11`_.\n * **Deprecated**:\n The module path ``wcwidth.table_comb`` is no longer available,\n it has been superseded by module path ``wcwidth.table_zero``.\n\n0.1.4 *2014-11-20 Pre-Alpha*\n * **Feature**: ``wcswidth()`` now determines printable length\n for (most) combining_ characters. The developer's tool\n `bin/wcwidth-browser.py`_ is improved to display combining_\n characters when provided the ``--combining`` option\n (`Thomas Ballinger`_ and `Leta Montopoli`_ `PR #5`_).\n * **Feature**: added static analysis (prospector_) to testing\n framework.\n\n0.1.3 *2014-10-29 Pre-Alpha*\n * **Bugfix**: 2nd parameter of wcswidth was not honored.\n (`Thomas Ballinger`_, `PR #4`_).\n\n0.1.2 *2014-10-28 Pre-Alpha*\n * **Updated** tables to Unicode Specification 7.0.0.\n (`Thomas Ballinger`_, `PR #3`_).\n\n0.1.1 *2014-05-14 Pre-Alpha*\n * Initial release to pypi, Based on Unicode Specification 6.3.0\n\nThis code was originally derived directly from C code of the same name,\nwhose latest version is available at\nhttp://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::\n\n * Markus Kuhn -- 2007-05-26 (Unicode 5.0)\n *\n * Permission to use, copy, modify, and distribute this software\n * for any purpose and without fee is hereby granted. The author\n * disclaims all warranties with regard to this software.\n\n.. _`tox`: https://testrun.org/tox/latest/install.html\n.. _`prospector`: https://github.com/landscapeio/prospector\n.. _`combining`: https://en.wikipedia.org/wiki/Combining_character\n.. _`bin/`: https://github.com/jquast/wcwidth/tree/master/bin\n.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/tree/master/bin/wcwidth-browser.py\n.. _`Thomas Ballinger`: https://github.com/thomasballinger\n.. _`Leta Montopoli`: https://github.com/lmontopo\n.. _`Philip Craig`: https://github.com/philipc\n.. _`PR #3`: https://github.com/jquast/wcwidth/pull/3\n.. _`PR #4`: https://github.com/jquast/wcwidth/pull/4\n.. _`PR #5`: https://github.com/jquast/wcwidth/pull/5\n.. _`PR #11`: https://github.com/jquast/wcwidth/pull/11\n.. _`PR #18`: https://github.com/jquast/wcwidth/pull/18\n.. _`PR #30`: https://github.com/jquast/wcwidth/pull/30\n.. _`PR #35`: https://github.com/jquast/wcwidth/pull/35\n.. _`jquast/blessed`: https://github.com/jquast/blessed\n.. _`selectel/pyte`: https://github.com/selectel/pyte\n.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies\n.. _`dbcli/pgcli`: https://github.com/dbcli/pgcli\n.. _`jonathanslenders/python-prompt-toolkit`: https://github.com/jonathanslenders/python-prompt-toolkit\n.. _`timoxley/wcwidth`: https://github.com/timoxley/wcwidth\n.. _`wcwidth(3)`: http://man7.org/linux/man-pages/man3/wcwidth.3.html\n.. _`wcswidth(3)`: http://man7.org/linux/man-pages/man3/wcswidth.3.html\n.. _`astanin/python-tabulate`: https://github.com/astanin/python-tabulate\n.. _`janlelis/unicode-display_width`: https://github.com/janlelis/unicode-display_width\n.. _`LuminosoInsight/python-ftfy`: https://github.com/LuminosoInsight/python-ftfy\n.. _`alecrabbit/php-wcwidth`: https://github.com/alecrabbit/php-wcwidth\n.. _`Text::CharWidth`: https://metacpan.org/pod/Text::CharWidth\n.. _`bluebear94/Terminal-WCWidth`: https://github.com/bluebear94/Terminal-WCWidth\n.. _`mattn/go-runewidth`: https://github.com/mattn/go-runewidth\n.. _`emugel/wcwidth`: https://github.com/emugel/wcwidth\n.. _`jquast/ucs-detect`: https://github.com/jquast/ucs-detect\n.. _`Avram Lubkin`: https://github.com/avylove\n.. _`nbedos/termtosvg`: https://github.com/nbedos/termtosvg\n.. _`peterbrittain/asciimatics`: https://github.com/peterbrittain/asciimatics\n.. _`aperezdc/lua-wcwidth`: https://github.com/aperezdc/lua-wcwidth\n.. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk\n.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi\n :alt: Downloads\n :target: https://pypi.org/project/wcwidth/\n.. |codecov| image:: https://codecov.io/gh/jquast/wcwidth/branch/master/graph/badge.svg\n :alt: codecov.io Code Coverage\n :target: https://codecov.io/gh/jquast/wcwidth/\n.. |license| image:: https://img.shields.io/github/license/jquast/wcwidth.svg\n :target: https://pypi.python.org/pypi/wcwidth/\n :alt: MIT License", - "release_date": "2023-01-15T04:07:25", + "description": "Measures the displayed width of unicode strings in a terminal\n|pypi_downloads| |codecov| |license|\n\n============\nIntroduction\n============\n\nThis library is mainly for CLI programs that carefully produce output for\nTerminals, or make pretend to be an emulator.\n\n**Problem Statement**: The printable length of *most* strings are equal to the\nnumber of cells they occupy on the screen ``1 character : 1 cell``. However,\nthere are categories of characters that *occupy 2 cells* (full-wide), and\nothers that *occupy 0* cells (zero-width).\n\n**Solution**: POSIX.1-2001 and POSIX.1-2008 conforming systems provide\n`wcwidth(3)`_ and `wcswidth(3)`_ C functions of which this python module's\nfunctions precisely copy. *These functions return the number of cells a\nunicode string is expected to occupy.*\n\nInstallation\n------------\n\nThe stable version of this package is maintained on pypi, install using pip::\n\n pip install wcwidth\n\nExample\n-------\n\n**Problem**: given the following phrase (Japanese),\n\n >>> text = u'\u30b3\u30f3\u30cb\u30c1\u30cf'\n\nPython **incorrectly** uses the *string length* of 5 codepoints rather than the\n*printible length* of 10 cells, so that when using the `rjust` function, the\noutput length is wrong::\n\n >>> print(len('\u30b3\u30f3\u30cb\u30c1\u30cf'))\n 5\n\n >>> print('\u30b3\u30f3\u30cb\u30c1\u30cf'.rjust(20, '_'))\n _______________\u30b3\u30f3\u30cb\u30c1\u30cf\n\nBy defining our own \"rjust\" function that uses wcwidth, we can correct this::\n\n >>> def wc_rjust(text, length, padding=' '):\n ... from wcwidth import wcswidth\n ... return padding * max(0, (length - wcswidth(text))) + text\n ...\n\nOur **Solution** uses wcswidth to determine the string length correctly::\n\n >>> from wcwidth import wcswidth\n >>> print(wcswidth('\u30b3\u30f3\u30cb\u30c1\u30cf'))\n 10\n\n >>> print(wc_rjust('\u30b3\u30f3\u30cb\u30c1\u30cf', 20, '_'))\n __________\u30b3\u30f3\u30cb\u30c1\u30cf\n\n\nChoosing a Version\n------------------\n\nExport an environment variable, ``UNICODE_VERSION``. This should be done by\n*terminal emulators* or those developers experimenting with authoring one of\ntheir own, from shell::\n\n $ export UNICODE_VERSION=13.0\n\nIf unspecified, the latest version is used. If your Terminal Emulator does not\nexport this variable, you can use the `jquast/ucs-detect`_ utility to\nautomatically detect and export it to your shell.\n\nwcwidth, wcswidth\n-----------------\nUse function ``wcwidth()`` to determine the length of a *single unicode\ncharacter*, and ``wcswidth()`` to determine the length of many, a *string\nof unicode characters*.\n\nBriefly, return values of function ``wcwidth()`` are:\n\n``-1``\n Indeterminate (not printable).\n\n``0``\n Does not advance the cursor, such as NULL or Combining.\n\n``2``\n Characters of category East Asian Wide (W) or East Asian\n Full-width (F) which are displayed using two terminal cells.\n\n``1``\n All others.\n\nFunction ``wcswidth()`` simply returns the sum of all values for each character\nalong a string, or ``-1`` when it occurs anywhere along a string.\n\nFull API Documentation at https://wcwidth.readthedocs.org\n\n==========\nDeveloping\n==========\n\nInstall wcwidth in editable mode::\n\n pip install -e .\n\nExecute unit tests using tox_::\n\n tox -e py27,py35,py36,py37,py38,py39,py310,py311,py312\n\nUpdating Unicode Version\n------------------------\n\nRegenerate python code tables from latest Unicode Specification data files::\n\n tox -e update\n\nThe script is located at ``bin/update-tables.py``, requires Python 3.9 or\nlater. It is recommended but not necessary to run this script with the newest\nPython, because the newest Python has the latest ``unicodedata`` for generating\ncomments.\n\nBuilding Documentation\n----------------------\n\nThis project is using `sphinx`_ 4.5 to build documentation::\n\n tox -e sphinx\n\nThe output will be in ``docs/_build/html/``.\n\nUpdating Requirements\n---------------------\n\nThis project is using `pip-tools`_ to manage requirements.\n\nTo upgrade requirements for updating unicode version, run::\n\n tox -e update_requirements_update\n\nTo upgrade requirements for testing, run::\n\n tox -e update_requirements37,update_requirements39\n\nTo upgrade requirements for building documentation, run::\n\n tox -e update_requirements_docs\n\nUtilities\n---------\n\nSupplementary tools for browsing and testing terminals for wide unicode\ncharacters are found in the `bin/`_ of this project's source code. Just ensure\nto first ``pip install -r requirements-develop.txt`` from this projects main\nfolder. For example, an interactive browser for testing::\n\n python ./bin/wcwidth-browser.py\n\n====\nUses\n====\n\nThis library is used in:\n\n- `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in\n Python.\n\n- `prompt-toolkit/python-prompt-toolkit`_: a Library for building powerful\n interactive command lines in Python.\n\n- `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting.\n\n- `thomasballinger/curtsies`_: a Curses-like terminal wrapper with a display\n based on compositing 2d arrays of text.\n\n- `selectel/pyte`_: Simple VTXXX-compatible linux terminal emulator.\n\n- `astanin/python-tabulate`_: Pretty-print tabular data in Python, a library\n and a command-line utility.\n\n- `rspeer/python-ftfy`_: Fixes mojibake and other glitches in Unicode\n text.\n\n- `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG\n animations.\n\n- `peterbrittain/asciimatics`_: Package to help people create full-screen text\n UIs.\n\n- `python-cmd2/cmd2`_: A tool for building interactive command line apps\n\n- `stratis-storage/stratis-cli`_: CLI for the Stratis project\n\n- `ihabunek/toot`_: A Mastodon CLI/TUI client\n\n- `saulpw/visidata`_: Terminal spreadsheet multitool for discovering and\n arranging data\n\n===============\nOther Languages\n===============\n\n- `timoxley/wcwidth`_: JavaScript\n- `janlelis/unicode-display_width`_: Ruby\n- `alecrabbit/php-wcwidth`_: PHP\n- `Text::CharWidth`_: Perl\n- `bluebear94/Terminal-WCWidth`_: Perl 6\n- `mattn/go-runewidth`_: Go\n- `grepsuzette/wcwidth`_: Haxe\n- `aperezdc/lua-wcwidth`_: Lua\n- `joachimschmidt557/zig-wcwidth`_: Zig\n- `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override\n- `joshuarubin/wcwidth9`_: Unicode version 9 in C\n\n=======\nHistory\n=======\n\n0.2.8 *2023-09-30*\n * Include requirements files in the source distibution (`PR #82`).\n\n0.2.7 *2023-09-28*\n * **Updated** tables to include Unicode Specification 15.1.0.\n * Include ``bin``, ``docs``, and ``tox.ini`` in the source distribution\n\n0.2.6 *2023-01-14*\n * **Updated** tables to include Unicode Specification 14.0.0 and 15.0.0.\n * **Changed** developer tools to use pip-compile, and to use jinja2 templates\n for code generation in `bin/update-tables.py` to prepare for possible\n compiler optimization release.\n\n0.2.1 .. 0.2.5 *2020-06-23*\n * **Repository** changes to update tests and packaging issues, and\n begin tagging repository with matching release versions.\n\n0.2.0 *2020-06-01*\n * **Enhancement**: Unicode version may be selected by exporting the\n Environment variable ``UNICODE_VERSION``, such as ``13.0``, or ``6.3.0``.\n See the `jquast/ucs-detect`_ CLI utility for automatic detection.\n * **Enhancement**:\n API Documentation is published to readthedocs.org.\n * **Updated** tables for *all* Unicode Specifications with files\n published in a programmatically consumable format, versions 4.1.0\n through 13.0\n\n0.1.9 *2020-03-22*\n * **Performance** optimization by `Avram Lubkin`_, `PR #35`_.\n * **Updated** tables to Unicode Specification 13.0.0.\n\n0.1.8 *2020-01-01*\n * **Updated** tables to Unicode Specification 12.0.0. (`PR #30`_).\n\n0.1.7 *2016-07-01*\n * **Updated** tables to Unicode Specification 9.0.0. (`PR #18`_).\n\n0.1.6 *2016-01-08 Production/Stable*\n * ``LICENSE`` file now included with distribution.\n\n0.1.5 *2015-09-13 Alpha*\n * **Bugfix**:\n Resolution of \"combining_ character width\" issue, most especially\n those that previously returned -1 now often (correctly) return 0.\n resolved by `Philip Craig`_ via `PR #11`_.\n * **Deprecated**:\n The module path ``wcwidth.table_comb`` is no longer available,\n it has been superseded by module path ``wcwidth.table_zero``.\n\n0.1.4 *2014-11-20 Pre-Alpha*\n * **Feature**: ``wcswidth()`` now determines printable length\n for (most) combining_ characters. The developer's tool\n `bin/wcwidth-browser.py`_ is improved to display combining_\n characters when provided the ``--combining`` option\n (`Thomas Ballinger`_ and `Leta Montopoli`_ `PR #5`_).\n * **Feature**: added static analysis (prospector_) to testing\n framework.\n\n0.1.3 *2014-10-29 Pre-Alpha*\n * **Bugfix**: 2nd parameter of wcswidth was not honored.\n (`Thomas Ballinger`_, `PR #4`_).\n\n0.1.2 *2014-10-28 Pre-Alpha*\n * **Updated** tables to Unicode Specification 7.0.0.\n (`Thomas Ballinger`_, `PR #3`_).\n\n0.1.1 *2014-05-14 Pre-Alpha*\n * Initial release to pypi, Based on Unicode Specification 6.3.0\n\nThis code was originally derived directly from C code of the same name,\nwhose latest version is available at\nhttps://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::\n\n * Markus Kuhn -- 2007-05-26 (Unicode 5.0)\n *\n * Permission to use, copy, modify, and distribute this software\n * for any purpose and without fee is hereby granted. The author\n * disclaims all warranties with regard to this software.\n\n.. _`tox`: https://tox.wiki/en/latest/\n.. _`prospector`: https://github.com/landscapeio/prospector\n.. _`combining`: https://en.wikipedia.org/wiki/Combining_character\n.. _`bin/`: https://github.com/jquast/wcwidth/tree/master/bin\n.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/blob/master/bin/wcwidth-browser.py\n.. _`Thomas Ballinger`: https://github.com/thomasballinger\n.. _`Leta Montopoli`: https://github.com/lmontopo\n.. _`Philip Craig`: https://github.com/philipc\n.. _`PR #3`: https://github.com/jquast/wcwidth/pull/3\n.. _`PR #4`: https://github.com/jquast/wcwidth/pull/4\n.. _`PR #5`: https://github.com/jquast/wcwidth/pull/5\n.. _`PR #11`: https://github.com/jquast/wcwidth/pull/11\n.. _`PR #18`: https://github.com/jquast/wcwidth/pull/18\n.. _`PR #30`: https://github.com/jquast/wcwidth/pull/30\n.. _`PR #35`: https://github.com/jquast/wcwidth/pull/35\n.. _`jquast/blessed`: https://github.com/jquast/blessed\n.. _`selectel/pyte`: https://github.com/selectel/pyte\n.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies\n.. _`dbcli/pgcli`: https://github.com/dbcli/pgcli\n.. _`prompt-toolkit/python-prompt-toolkit`: https://github.com/prompt-toolkit/python-prompt-toolkit\n.. _`timoxley/wcwidth`: https://github.com/timoxley/wcwidth\n.. _`wcwidth(3)`: https://man7.org/linux/man-pages/man3/wcwidth.3.html\n.. _`wcswidth(3)`: https://man7.org/linux/man-pages/man3/wcswidth.3.html\n.. _`astanin/python-tabulate`: https://github.com/astanin/python-tabulate\n.. _`janlelis/unicode-display_width`: https://github.com/janlelis/unicode-display_width\n.. _`rspeer/python-ftfy`: https://github.com/rspeer/python-ftfy\n.. _`alecrabbit/php-wcwidth`: https://github.com/alecrabbit/php-wcwidth\n.. _`Text::CharWidth`: https://metacpan.org/pod/Text::CharWidth\n.. _`bluebear94/Terminal-WCWidth`: https://github.com/bluebear94/Terminal-WCWidth\n.. _`mattn/go-runewidth`: https://github.com/mattn/go-runewidth\n.. _`grepsuzette/wcwidth`: https://github.com/grepsuzette/wcwidth\n.. _`jquast/ucs-detect`: https://github.com/jquast/ucs-detect\n.. _`Avram Lubkin`: https://github.com/avylove\n.. _`nbedos/termtosvg`: https://github.com/nbedos/termtosvg\n.. _`peterbrittain/asciimatics`: https://github.com/peterbrittain/asciimatics\n.. _`aperezdc/lua-wcwidth`: https://github.com/aperezdc/lua-wcwidth\n.. _`joachimschmidt557/zig-wcwidth`: https://github.com/joachimschmidt557/zig-wcwidth\n.. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk\n.. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9\n.. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2\n.. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli\n.. _`ihabunek/toot`: https://github.com/ihabunek/toot\n.. _`saulpw/visidata`: https://github.com/saulpw/visidata\n.. _`pip-tools`: https://pip-tools.readthedocs.io/\n.. _`sphinx`: https://www.sphinx-doc.org/\n.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi\n :alt: Downloads\n :target: https://pypi.org/project/wcwidth/\n.. |codecov| image:: https://codecov.io/gh/jquast/wcwidth/branch/master/graph/badge.svg\n :alt: codecov.io Code Coverage\n :target: https://app.codecov.io/gh/jquast/wcwidth/\n.. |license| image:: https://img.shields.io/pypi/l/wcwidth.svg\n :target: https://pypi.org/project/wcwidth/\n :alt: MIT License", + "release_date": "2023-09-30T05:29:59", "parties": [ { "type": "person", @@ -7900,21 +7904,25 @@ "Natural Language :: English", "Operating System :: POSIX", "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Software Development :: Internationalization", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Localization", "Topic :: Terminals" ], "homepage_url": "https://github.com/jquast/wcwidth", - "download_url": "https://files.pythonhosted.org/packages/5e/5f/1e4bd82a9cc1f17b2c2361a2d876d4c38973a997003ba5eb400e8a932b6c/wcwidth-0.2.6.tar.gz", - "size": 35452, + "download_url": "https://files.pythonhosted.org/packages/cb/ee/20850e9f388d8b52b481726d41234f67bc89a85eeade6e2d6e2965be04ba/wcwidth-0.2.8.tar.gz", + "size": 61713, "sha1": null, - "md5": "976b997f2ed155b5c2e9a4d50e528d90", - "sha256": "a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0", + "md5": "b79ba96d19fa108980cf28d05ea2101c", + "sha256": "8705c569999ffbb4f6a87c6d1b80f324bd6db952f5eb0b95bc07517f4c1813d4", "sha512": null, "bug_tracking_url": null, "code_view_url": null, @@ -7934,9 +7942,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/wcwidth/0.2.6/json", + "api_data_url": "https://pypi.org/pypi/wcwidth/0.2.8/json", "datasource_id": null, - "purl": "pkg:pypi/wcwidth@0.2.6" + "purl": "pkg:pypi/wcwidth@0.2.8" }, { "type": "pypi", @@ -8546,7 +8554,7 @@ "package": "pkg:pypi/prompt-toolkit@1.0.18", "dependencies": [ "pkg:pypi/six@1.16.0", - "pkg:pypi/wcwidth@0.2.6" + "pkg:pypi/wcwidth@0.2.8" ] }, { @@ -8613,7 +8621,7 @@ "dependencies": [] }, { - "package": "pkg:pypi/wcwidth@0.2.6", + "package": "pkg:pypi/wcwidth@0.2.8", "dependencies": [ "pkg:pypi/backports-functools-lru-cache@1.6.6" ] diff --git a/tests/data/insecure-setup/setup.py-expected.json b/tests/data/insecure-setup/setup.py-expected.json index 91ce7d1a..0cc5ce58 100644 --- a/tests/data/insecure-setup/setup.py-expected.json +++ b/tests/data/insecure-setup/setup.py-expected.json @@ -16,7 +16,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/insecure-setup/setup.py", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/insecure-setup/setup.py", "package_data": [ { "type": "pypi", diff --git a/tests/data/pdt-requirements.txt-expected.json b/tests/data/pdt-requirements.txt-expected.json index 759729f7..3162b9e0 100644 --- a/tests/data/pdt-requirements.txt-expected.json +++ b/tests/data/pdt-requirements.txt-expected.json @@ -4,7 +4,7 @@ "tool_homepageurl": "https://github.com/nexB/python-inspector", "tool_version": "0.9.8", "options": [ - "--requirement /home/tg1999/Desktop/example/tools/python-inspector/tests/data/pdt-requirements.txt", + "--requirement /home/tg1999/Desktop/python-inspector-1/tests/data/pdt-requirements.txt", "--index-url https://pypi.org/simple", "--python-version 38", "--operating-system linux", @@ -17,7 +17,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/pdt-requirements.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/pdt-requirements.txt", "package_data": [ { "type": "pypi", diff --git a/tests/data/pinned-pdt-requirements.txt-expected.json b/tests/data/pinned-pdt-requirements.txt-expected.json index bf65322c..360f5e44 100644 --- a/tests/data/pinned-pdt-requirements.txt-expected.json +++ b/tests/data/pinned-pdt-requirements.txt-expected.json @@ -4,7 +4,7 @@ "tool_homepageurl": "https://github.com/nexB/python-inspector", "tool_version": "0.9.8", "options": [ - "--requirement /home/tg1999/Desktop/example/tools/python-inspector/tests/data/pinned-pdt-requirements.txt", + "--requirement /home/tg1999/Desktop/python-inspector-1/tests/data/pinned-pdt-requirements.txt", "--index-url https://pypi.org/simple", "--python-version 38", "--operating-system linux", @@ -17,7 +17,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/pinned-pdt-requirements.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/pinned-pdt-requirements.txt", "package_data": [ { "type": "pypi", diff --git a/tests/data/pinned-requirements.txt-expected.json b/tests/data/pinned-requirements.txt-expected.json index 94e3456f..30e2972c 100644 --- a/tests/data/pinned-requirements.txt-expected.json +++ b/tests/data/pinned-requirements.txt-expected.json @@ -4,7 +4,7 @@ "tool_homepageurl": "https://github.com/nexB/python-inspector", "tool_version": "0.9.8", "options": [ - "--requirement /home/tg1999/Desktop/example/tools/python-inspector/tests/data/pinned-requirements.txt", + "--requirement /home/tg1999/Desktop/python-inspector-1/tests/data/pinned-requirements.txt", "--index-url https://pypi.org/simple", "--python-version 38", "--operating-system linux", @@ -17,7 +17,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/pinned-requirements.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/pinned-requirements.txt", "package_data": [ { "type": "pypi", diff --git a/tests/data/setup/no-direct-dependencies-setup.py-expected.json b/tests/data/setup/no-direct-dependencies-setup.py-expected.json index 23268a97..534544f3 100644 --- a/tests/data/setup/no-direct-dependencies-setup.py-expected.json +++ b/tests/data/setup/no-direct-dependencies-setup.py-expected.json @@ -16,7 +16,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/setup/no-direct-dependencies-setup.py", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/setup/no-direct-dependencies-setup.py", "package_data": [ { "type": "pypi", diff --git a/tests/data/setup/simple-setup.py-expected.json b/tests/data/setup/simple-setup.py-expected.json index 9d6ef619..6189b662 100644 --- a/tests/data/setup/simple-setup.py-expected.json +++ b/tests/data/setup/simple-setup.py-expected.json @@ -16,7 +16,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/setup/simple-setup.py", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/setup/simple-setup.py", "package_data": [ { "type": "pypi", diff --git a/tests/data/setup/spdx-setup.py-expected.json b/tests/data/setup/spdx-setup.py-expected.json index 0dd39ee7..f894abc3 100644 --- a/tests/data/setup/spdx-setup.py-expected.json +++ b/tests/data/setup/spdx-setup.py-expected.json @@ -16,7 +16,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/setup/spdx-setup.py", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/setup/spdx-setup.py", "package_data": [ { "type": "pypi", diff --git a/tests/data/single-url-except-simple-expected.json b/tests/data/single-url-except-simple-expected.json index a701e958..404fe732 100644 --- a/tests/data/single-url-except-simple-expected.json +++ b/tests/data/single-url-except-simple-expected.json @@ -20,12 +20,12 @@ "type": "pypi", "namespace": null, "name": "blinker", - "version": "1.6.2", + "version": "1.6.3", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "Fast, simple object-to-object and broadcast signaling\nBlinker\n=======\n\nBlinker provides a fast dispatching system that allows any number of\ninterested parties to subscribe to events, or \"signals\".\n\nSignal receivers can subscribe to specific senders or receive signals\nsent by any sender.\n\n.. code-block:: pycon\n\n >>> from blinker import signal\n >>> started = signal('round-started')\n >>> def each(round):\n ... print(f\"Round {round}\")\n ...\n >>> started.connect(each)\n\n >>> def round_two(round):\n ... print(\"This is round two.\")\n ...\n >>> started.connect(round_two, sender=2)\n\n >>> for round in range(1, 4):\n ... started.send(round)\n ...\n Round 1!\n Round 2!\n This is round two.\n Round 3!\n\n\nLinks\n-----\n\n- Documentation: https://blinker.readthedocs.io/\n- Changes: https://blinker.readthedocs.io/#changes\n- PyPI Releases: https://pypi.org/project/blinker/\n- Source Code: https://github.com/pallets-eco/blinker/\n- Issue Tracker: https://github.com/pallets-eco/blinker/issues/", - "release_date": "2023-04-12T21:45:23", + "release_date": "2023-10-07T14:16:19", "parties": [ { "type": "person", @@ -54,11 +54,11 @@ "Topic :: Software Development :: Libraries" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/0d/f1/5f39e771cd730d347539bb74c6d496737b9d5f0a53bc9fdbf3e170f1ee48/blinker-1.6.2-py3-none-any.whl", - "size": 13665, + "download_url": "https://files.pythonhosted.org/packages/bf/2b/11bcedb7dee4923253a4a21bae3be854bcc4f06295bd827756352016d97c/blinker-1.6.3-py3-none-any.whl", + "size": 13398, "sha1": null, - "md5": "c7fad677855c3843630a1e47d6dc379c", - "sha256": "c3d739772abb7bc2860abf5f2ec284223d9ad5c76da018234f6f50d6f31ab1f0", + "md5": "4ab7b0e126ce1c1b364e41807d477d7e", + "sha256": "296320d6c28b006eb5e32d4712202dbcdcbf5dc482da298c2f44881c43884aaa", "sha512": null, "bug_tracking_url": "https://github.com/pallets-eco/blinker/issues/", "code_view_url": "https://github.com/pallets-eco/blinker/", @@ -66,7 +66,6 @@ "copyright": null, "license_expression": null, "declared_license": { - "license": "MIT License", "classifiers": [ "License :: OSI Approved :: MIT License" ] @@ -78,20 +77,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/blinker/1.6.2/json", + "api_data_url": "https://pypi.org/pypi/blinker/1.6.3/json", "datasource_id": null, - "purl": "pkg:pypi/blinker@1.6.2" + "purl": "pkg:pypi/blinker@1.6.3" }, { "type": "pypi", "namespace": null, "name": "blinker", - "version": "1.6.2", + "version": "1.6.3", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "Fast, simple object-to-object and broadcast signaling\nBlinker\n=======\n\nBlinker provides a fast dispatching system that allows any number of\ninterested parties to subscribe to events, or \"signals\".\n\nSignal receivers can subscribe to specific senders or receive signals\nsent by any sender.\n\n.. code-block:: pycon\n\n >>> from blinker import signal\n >>> started = signal('round-started')\n >>> def each(round):\n ... print(f\"Round {round}\")\n ...\n >>> started.connect(each)\n\n >>> def round_two(round):\n ... print(\"This is round two.\")\n ...\n >>> started.connect(round_two, sender=2)\n\n >>> for round in range(1, 4):\n ... started.send(round)\n ...\n Round 1!\n Round 2!\n This is round two.\n Round 3!\n\n\nLinks\n-----\n\n- Documentation: https://blinker.readthedocs.io/\n- Changes: https://blinker.readthedocs.io/#changes\n- PyPI Releases: https://pypi.org/project/blinker/\n- Source Code: https://github.com/pallets-eco/blinker/\n- Issue Tracker: https://github.com/pallets-eco/blinker/issues/", - "release_date": "2023-04-12T21:45:25", + "release_date": "2023-10-07T14:16:21", "parties": [ { "type": "person", @@ -120,11 +119,11 @@ "Topic :: Software Development :: Libraries" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/e8/f9/a05287f3d5c54d20f51a235ace01f50620984bc7ca5ceee781dc645211c5/blinker-1.6.2.tar.gz", - "size": 28699, + "download_url": "https://files.pythonhosted.org/packages/ea/96/ed1420a974540da7419094f2553bc198c454cee5f72576e7c7629dd12d6e/blinker-1.6.3.tar.gz", + "size": 28092, "sha1": null, - "md5": "1c7375d100a67ba368d9cde0ab2d8cfa", - "sha256": "4afd3de66ef3a9f8067559fb7a1cbe555c17dcbe15971b05d1b625c3e7abe213", + "md5": "911d92a757f609b4344c04c207affea4", + "sha256": "152090d27c1c5c722ee7e48504b02d76502811ce02e1523553b4cf8c8b3d3a8d", "sha512": null, "bug_tracking_url": "https://github.com/pallets-eco/blinker/issues/", "code_view_url": "https://github.com/pallets-eco/blinker/", @@ -132,7 +131,6 @@ "copyright": null, "license_expression": null, "declared_license": { - "license": "MIT License", "classifiers": [ "License :: OSI Approved :: MIT License" ] @@ -144,9 +142,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/blinker/1.6.2/json", + "api_data_url": "https://pypi.org/pypi/blinker/1.6.3/json", "datasource_id": null, - "purl": "pkg:pypi/blinker@1.6.2" + "purl": "pkg:pypi/blinker@1.6.3" }, { "type": "pypi", @@ -260,12 +258,12 @@ "type": "pypi", "namespace": null, "name": "flask", - "version": "2.3.3", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "A simple framework for building complex web applications.\nFlask\n=====\n\nFlask is a lightweight `WSGI`_ web application framework. It is designed\nto make getting started quick and easy, with the ability to scale up to\ncomplex applications. It began as a simple wrapper around `Werkzeug`_\nand `Jinja`_ and has become one of the most popular Python web\napplication frameworks.\n\nFlask offers suggestions, but doesn't enforce any dependencies or\nproject layout. It is up to the developer to choose the tools and\nlibraries they want to use. There are many extensions provided by the\ncommunity that make adding new functionality easy.\n\n.. _WSGI: https://wsgi.readthedocs.io/\n.. _Werkzeug: https://werkzeug.palletsprojects.com/\n.. _Jinja: https://jinja.palletsprojects.com/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n $ pip install -U Flask\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n # save this as app.py\n from flask import Flask\n\n app = Flask(__name__)\n\n @app.route(\"/\")\n def hello():\n return \"Hello, World!\"\n\n.. code-block:: text\n\n $ flask run\n * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)\n\n\nContributing\n------------\n\nFor guidance on setting up a development environment and how to make a\ncontribution to Flask, see the `contributing guidelines`_.\n\n.. _contributing guidelines: https://github.com/pallets/flask/blob/main/CONTRIBUTING.rst\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Flask and the libraries\nit uses. In order to grow the community of contributors and users, and\nallow the maintainers to devote more time to the projects, `please\ndonate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://flask.palletsprojects.com/\n- Changes: https://flask.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Flask/\n- Source Code: https://github.com/pallets/flask/\n- Issue Tracker: https://github.com/pallets/flask/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-21T19:52:33", + "release_date": "2023-09-30T14:36:10", "parties": [ { "type": "person", @@ -288,11 +286,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/fd/56/26f0be8adc2b4257df20c1c4260ddd0aa396cf8e75d90ab2f7ff99bc34f9/flask-2.3.3-py3-none-any.whl", - "size": 96112, + "download_url": "https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl", + "size": 99724, "sha1": null, - "md5": "acbf286236b8040816042af736f2adb5", - "sha256": "f69fcd559dc907ed196ab9df0e48471709175e696d6e698dd4dbe940f96ce66b", + "md5": "a89da9cc87440ebd82366d605eb866f2", + "sha256": "21128f47e4e3b9d597a3e8521a329bf56909b690fcc3fa3e477725aa81367638", "sha512": null, "bug_tracking_url": "https://github.com/pallets/flask/issues/", "code_view_url": "https://github.com/pallets/flask/", @@ -311,20 +309,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/flask/2.3.3/json", + "api_data_url": "https://pypi.org/pypi/flask/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/flask@2.3.3" + "purl": "pkg:pypi/flask@3.0.0" }, { "type": "pypi", "namespace": null, "name": "flask", - "version": "2.3.3", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "A simple framework for building complex web applications.\nFlask\n=====\n\nFlask is a lightweight `WSGI`_ web application framework. It is designed\nto make getting started quick and easy, with the ability to scale up to\ncomplex applications. It began as a simple wrapper around `Werkzeug`_\nand `Jinja`_ and has become one of the most popular Python web\napplication frameworks.\n\nFlask offers suggestions, but doesn't enforce any dependencies or\nproject layout. It is up to the developer to choose the tools and\nlibraries they want to use. There are many extensions provided by the\ncommunity that make adding new functionality easy.\n\n.. _WSGI: https://wsgi.readthedocs.io/\n.. _Werkzeug: https://werkzeug.palletsprojects.com/\n.. _Jinja: https://jinja.palletsprojects.com/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n $ pip install -U Flask\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n # save this as app.py\n from flask import Flask\n\n app = Flask(__name__)\n\n @app.route(\"/\")\n def hello():\n return \"Hello, World!\"\n\n.. code-block:: text\n\n $ flask run\n * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)\n\n\nContributing\n------------\n\nFor guidance on setting up a development environment and how to make a\ncontribution to Flask, see the `contributing guidelines`_.\n\n.. _contributing guidelines: https://github.com/pallets/flask/blob/main/CONTRIBUTING.rst\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Flask and the libraries\nit uses. In order to grow the community of contributors and users, and\nallow the maintainers to devote more time to the projects, `please\ndonate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://flask.palletsprojects.com/\n- Changes: https://flask.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Flask/\n- Source Code: https://github.com/pallets/flask/\n- Issue Tracker: https://github.com/pallets/flask/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-21T19:52:35", + "release_date": "2023-09-30T14:36:12", "parties": [ { "type": "person", @@ -347,11 +345,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/46/b7/4ace17e37abd9c21715dea5ee11774a25e404c486a7893fa18e764326ead/flask-2.3.3.tar.gz", - "size": 672756, + "download_url": "https://files.pythonhosted.org/packages/d8/09/c1a7354d3925a3c6c8cfdebf4245bae67d633ffda1ba415add06ffc839c5/flask-3.0.0.tar.gz", + "size": 674171, "sha1": null, - "md5": "87c2f9544380d251e7054b960547ee7f", - "sha256": "09c347a92aa7ff4a8e7f3206795f30d826654baf38b873d0744cd571ca609efc", + "md5": "4848c9d5305197822b5b00c8e9a6d9aa", + "sha256": "cfadcdb638b609361d29ec22360d6070a77d7463dcb3ab08d2c2f2f168845f58", "sha512": null, "bug_tracking_url": "https://github.com/pallets/flask/issues/", "code_view_url": "https://github.com/pallets/flask/", @@ -370,9 +368,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/flask/2.3.3/json", + "api_data_url": "https://pypi.org/pypi/flask/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/flask@2.3.3" + "purl": "pkg:pypi/flask@3.0.0" }, { "type": "pypi", @@ -848,12 +846,12 @@ "type": "pypi", "namespace": null, "name": "werkzeug", - "version": "2.3.7", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "The comprehensive WSGI web application library.\nWerkzeug\n========\n\n*werkzeug* German noun: \"tool\". Etymology: *werk* (\"work\"), *zeug* (\"stuff\")\n\nWerkzeug is a comprehensive `WSGI`_ web application library. It began as\na simple collection of various utilities for WSGI applications and has\nbecome one of the most advanced WSGI utility libraries.\n\nIt includes:\n\n- An interactive debugger that allows inspecting stack traces and\n source code in the browser with an interactive interpreter for any\n frame in the stack.\n- A full-featured request object with objects to interact with\n headers, query args, form data, files, and cookies.\n- A response object that can wrap other WSGI applications and handle\n streaming data.\n- A routing system for matching URLs to endpoints and generating URLs\n for endpoints, with an extensible system for capturing variables\n from URLs.\n- HTTP utilities to handle entity tags, cache control, dates, user\n agents, cookies, files, and more.\n- A threaded WSGI server for use while developing applications\n locally.\n- A test client for simulating HTTP requests during testing without\n requiring running a server.\n\nWerkzeug doesn't enforce any dependencies. It is up to the developer to\nchoose a template engine, database adapter, and even how to handle\nrequests. It can be used to build all sorts of end user applications\nsuch as blogs, wikis, or bulletin boards.\n\n`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while\nproviding more structure and patterns for defining powerful\napplications.\n\n.. _WSGI: https://wsgi.readthedocs.io/en/latest/\n.. _Flask: https://www.palletsprojects.com/p/flask/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n pip install -U Werkzeug\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n from werkzeug.wrappers import Request, Response\n\n @Request.application\n def application(request):\n return Response('Hello, World!')\n\n if __name__ == '__main__':\n from werkzeug.serving import run_simple\n run_simple('localhost', 4000, application)\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Werkzeug and other\npopular packages. In order to grow the community of contributors and\nusers, and allow the maintainers to devote more time to the projects,\n`please donate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://werkzeug.palletsprojects.com/\n- Changes: https://werkzeug.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Werkzeug/\n- Source Code: https://github.com/pallets/werkzeug/\n- Issue Tracker: https://github.com/pallets/werkzeug/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-14T09:30:45", + "release_date": "2023-09-30T13:58:46", "parties": [ { "type": "person", @@ -876,11 +874,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/9b/59/a7c32e3d8d0e546a206e0552a2c04444544f15c1da4a01df8938d20c6ffc/werkzeug-2.3.7-py3-none-any.whl", - "size": 242216, + "download_url": "https://files.pythonhosted.org/packages/b6/a5/54b01f663d60d5334f6c9c87c26274e94617a4fd463d812463626423b10d/werkzeug-3.0.0-py3-none-any.whl", + "size": 226556, "sha1": null, - "md5": "201b5bf3fa4dd3eb572704a6e61e69de", - "sha256": "effc12dba7f3bd72e605ce49807bbe692bd729c3bb122a3b91747a6ae77df528", + "md5": "ceb6681d9b633493a4621914aaf95037", + "sha256": "cbb2600f7eabe51dbc0502f58be0b3e1b96b893b05695ea2b35b43d4de2d9962", "sha512": null, "bug_tracking_url": "https://github.com/pallets/werkzeug/issues/", "code_view_url": "https://github.com/pallets/werkzeug/", @@ -899,20 +897,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/werkzeug/2.3.7/json", + "api_data_url": "https://pypi.org/pypi/werkzeug/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/werkzeug@2.3.7" + "purl": "pkg:pypi/werkzeug@3.0.0" }, { "type": "pypi", "namespace": null, "name": "werkzeug", - "version": "2.3.7", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "The comprehensive WSGI web application library.\nWerkzeug\n========\n\n*werkzeug* German noun: \"tool\". Etymology: *werk* (\"work\"), *zeug* (\"stuff\")\n\nWerkzeug is a comprehensive `WSGI`_ web application library. It began as\na simple collection of various utilities for WSGI applications and has\nbecome one of the most advanced WSGI utility libraries.\n\nIt includes:\n\n- An interactive debugger that allows inspecting stack traces and\n source code in the browser with an interactive interpreter for any\n frame in the stack.\n- A full-featured request object with objects to interact with\n headers, query args, form data, files, and cookies.\n- A response object that can wrap other WSGI applications and handle\n streaming data.\n- A routing system for matching URLs to endpoints and generating URLs\n for endpoints, with an extensible system for capturing variables\n from URLs.\n- HTTP utilities to handle entity tags, cache control, dates, user\n agents, cookies, files, and more.\n- A threaded WSGI server for use while developing applications\n locally.\n- A test client for simulating HTTP requests during testing without\n requiring running a server.\n\nWerkzeug doesn't enforce any dependencies. It is up to the developer to\nchoose a template engine, database adapter, and even how to handle\nrequests. It can be used to build all sorts of end user applications\nsuch as blogs, wikis, or bulletin boards.\n\n`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while\nproviding more structure and patterns for defining powerful\napplications.\n\n.. _WSGI: https://wsgi.readthedocs.io/en/latest/\n.. _Flask: https://www.palletsprojects.com/p/flask/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n pip install -U Werkzeug\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n from werkzeug.wrappers import Request, Response\n\n @Request.application\n def application(request):\n return Response('Hello, World!')\n\n if __name__ == '__main__':\n from werkzeug.serving import run_simple\n run_simple('localhost', 4000, application)\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Werkzeug and other\npopular packages. In order to grow the community of contributors and\nusers, and allow the maintainers to devote more time to the projects,\n`please donate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://werkzeug.palletsprojects.com/\n- Changes: https://werkzeug.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Werkzeug/\n- Source Code: https://github.com/pallets/werkzeug/\n- Issue Tracker: https://github.com/pallets/werkzeug/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-14T09:30:46", + "release_date": "2023-09-30T13:58:47", "parties": [ { "type": "person", @@ -935,11 +933,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/ef/56/0acc9f560053478a4987fa35c95d904f04b6915f6b5c4d1c14dc8862ba0a/werkzeug-2.3.7.tar.gz", - "size": 819625, + "download_url": "https://files.pythonhosted.org/packages/8c/47/75c7099c78dc207486e30cdb2b16059ca6d5c6cdcf9290f4621368bd06e4/werkzeug-3.0.0.tar.gz", + "size": 801219, "sha1": null, - "md5": "31bb8f7f0be8724e6da89df0f6fc5d12", - "sha256": "2b8c0e447b4b9dbcc85dd97b6eeb4dcbaf6c8b6c3be0bd654e25553e0a2157d8", + "md5": "efae4055e3c354a6772d12d4d5aeed44", + "sha256": "3ffff4dcc32db52ef3cc94dff3000a3c2846890f3a5a51800a27b909c5e770f0", "sha512": null, "bug_tracking_url": "https://github.com/pallets/werkzeug/issues/", "code_view_url": "https://github.com/pallets/werkzeug/", @@ -958,9 +956,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/werkzeug/2.3.7/json", + "api_data_url": "https://pypi.org/pypi/werkzeug/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/werkzeug@2.3.7" + "purl": "pkg:pypi/werkzeug@3.0.0" }, { "type": "pypi", @@ -1071,7 +1069,7 @@ ], "resolved_dependencies_graph": [ { - "package": "pkg:pypi/blinker@1.6.2", + "package": "pkg:pypi/blinker@1.6.3", "dependencies": [] }, { @@ -1079,14 +1077,14 @@ "dependencies": [] }, { - "package": "pkg:pypi/flask@2.3.3", + "package": "pkg:pypi/flask@3.0.0", "dependencies": [ - "pkg:pypi/blinker@1.6.2", + "pkg:pypi/blinker@1.6.3", "pkg:pypi/click@8.1.7", "pkg:pypi/importlib-metadata@6.8.0", "pkg:pypi/itsdangerous@2.1.2", "pkg:pypi/jinja2@3.1.2", - "pkg:pypi/werkzeug@2.3.7" + "pkg:pypi/werkzeug@3.0.0" ] }, { @@ -1110,7 +1108,7 @@ "dependencies": [] }, { - "package": "pkg:pypi/werkzeug@2.3.7", + "package": "pkg:pypi/werkzeug@3.0.0", "dependencies": [ "pkg:pypi/markupsafe@2.1.3" ] diff --git a/tests/data/test-api-expected.json b/tests/data/test-api-expected.json index 5f7fbed9..fe120dec 100644 --- a/tests/data/test-api-expected.json +++ b/tests/data/test-api-expected.json @@ -611,12 +611,12 @@ "type": "pypi", "namespace": null, "name": "werkzeug", - "version": "2.3.7", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "The comprehensive WSGI web application library.\nWerkzeug\n========\n\n*werkzeug* German noun: \"tool\". Etymology: *werk* (\"work\"), *zeug* (\"stuff\")\n\nWerkzeug is a comprehensive `WSGI`_ web application library. It began as\na simple collection of various utilities for WSGI applications and has\nbecome one of the most advanced WSGI utility libraries.\n\nIt includes:\n\n- An interactive debugger that allows inspecting stack traces and\n source code in the browser with an interactive interpreter for any\n frame in the stack.\n- A full-featured request object with objects to interact with\n headers, query args, form data, files, and cookies.\n- A response object that can wrap other WSGI applications and handle\n streaming data.\n- A routing system for matching URLs to endpoints and generating URLs\n for endpoints, with an extensible system for capturing variables\n from URLs.\n- HTTP utilities to handle entity tags, cache control, dates, user\n agents, cookies, files, and more.\n- A threaded WSGI server for use while developing applications\n locally.\n- A test client for simulating HTTP requests during testing without\n requiring running a server.\n\nWerkzeug doesn't enforce any dependencies. It is up to the developer to\nchoose a template engine, database adapter, and even how to handle\nrequests. It can be used to build all sorts of end user applications\nsuch as blogs, wikis, or bulletin boards.\n\n`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while\nproviding more structure and patterns for defining powerful\napplications.\n\n.. _WSGI: https://wsgi.readthedocs.io/en/latest/\n.. _Flask: https://www.palletsprojects.com/p/flask/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n pip install -U Werkzeug\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n from werkzeug.wrappers import Request, Response\n\n @Request.application\n def application(request):\n return Response('Hello, World!')\n\n if __name__ == '__main__':\n from werkzeug.serving import run_simple\n run_simple('localhost', 4000, application)\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Werkzeug and other\npopular packages. In order to grow the community of contributors and\nusers, and allow the maintainers to devote more time to the projects,\n`please donate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://werkzeug.palletsprojects.com/\n- Changes: https://werkzeug.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Werkzeug/\n- Source Code: https://github.com/pallets/werkzeug/\n- Issue Tracker: https://github.com/pallets/werkzeug/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-14T09:30:45", + "release_date": "2023-09-30T13:58:46", "parties": [ { "type": "person", @@ -639,11 +639,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/9b/59/a7c32e3d8d0e546a206e0552a2c04444544f15c1da4a01df8938d20c6ffc/werkzeug-2.3.7-py3-none-any.whl", - "size": 242216, + "download_url": "https://files.pythonhosted.org/packages/b6/a5/54b01f663d60d5334f6c9c87c26274e94617a4fd463d812463626423b10d/werkzeug-3.0.0-py3-none-any.whl", + "size": 226556, "sha1": null, - "md5": "201b5bf3fa4dd3eb572704a6e61e69de", - "sha256": "effc12dba7f3bd72e605ce49807bbe692bd729c3bb122a3b91747a6ae77df528", + "md5": "ceb6681d9b633493a4621914aaf95037", + "sha256": "cbb2600f7eabe51dbc0502f58be0b3e1b96b893b05695ea2b35b43d4de2d9962", "sha512": null, "bug_tracking_url": "https://github.com/pallets/werkzeug/issues/", "code_view_url": "https://github.com/pallets/werkzeug/", @@ -662,20 +662,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/werkzeug/2.3.7/json", + "api_data_url": "https://pypi.org/pypi/werkzeug/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/werkzeug@2.3.7" + "purl": "pkg:pypi/werkzeug@3.0.0" }, { "type": "pypi", "namespace": null, "name": "werkzeug", - "version": "2.3.7", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "The comprehensive WSGI web application library.\nWerkzeug\n========\n\n*werkzeug* German noun: \"tool\". Etymology: *werk* (\"work\"), *zeug* (\"stuff\")\n\nWerkzeug is a comprehensive `WSGI`_ web application library. It began as\na simple collection of various utilities for WSGI applications and has\nbecome one of the most advanced WSGI utility libraries.\n\nIt includes:\n\n- An interactive debugger that allows inspecting stack traces and\n source code in the browser with an interactive interpreter for any\n frame in the stack.\n- A full-featured request object with objects to interact with\n headers, query args, form data, files, and cookies.\n- A response object that can wrap other WSGI applications and handle\n streaming data.\n- A routing system for matching URLs to endpoints and generating URLs\n for endpoints, with an extensible system for capturing variables\n from URLs.\n- HTTP utilities to handle entity tags, cache control, dates, user\n agents, cookies, files, and more.\n- A threaded WSGI server for use while developing applications\n locally.\n- A test client for simulating HTTP requests during testing without\n requiring running a server.\n\nWerkzeug doesn't enforce any dependencies. It is up to the developer to\nchoose a template engine, database adapter, and even how to handle\nrequests. It can be used to build all sorts of end user applications\nsuch as blogs, wikis, or bulletin boards.\n\n`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while\nproviding more structure and patterns for defining powerful\napplications.\n\n.. _WSGI: https://wsgi.readthedocs.io/en/latest/\n.. _Flask: https://www.palletsprojects.com/p/flask/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n pip install -U Werkzeug\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n from werkzeug.wrappers import Request, Response\n\n @Request.application\n def application(request):\n return Response('Hello, World!')\n\n if __name__ == '__main__':\n from werkzeug.serving import run_simple\n run_simple('localhost', 4000, application)\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Werkzeug and other\npopular packages. In order to grow the community of contributors and\nusers, and allow the maintainers to devote more time to the projects,\n`please donate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://werkzeug.palletsprojects.com/\n- Changes: https://werkzeug.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Werkzeug/\n- Source Code: https://github.com/pallets/werkzeug/\n- Issue Tracker: https://github.com/pallets/werkzeug/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-14T09:30:46", + "release_date": "2023-09-30T13:58:47", "parties": [ { "type": "person", @@ -698,11 +698,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/ef/56/0acc9f560053478a4987fa35c95d904f04b6915f6b5c4d1c14dc8862ba0a/werkzeug-2.3.7.tar.gz", - "size": 819625, + "download_url": "https://files.pythonhosted.org/packages/8c/47/75c7099c78dc207486e30cdb2b16059ca6d5c6cdcf9290f4621368bd06e4/werkzeug-3.0.0.tar.gz", + "size": 801219, "sha1": null, - "md5": "31bb8f7f0be8724e6da89df0f6fc5d12", - "sha256": "2b8c0e447b4b9dbcc85dd97b6eeb4dcbaf6c8b6c3be0bd654e25553e0a2157d8", + "md5": "efae4055e3c354a6772d12d4d5aeed44", + "sha256": "3ffff4dcc32db52ef3cc94dff3000a3c2846890f3a5a51800a27b909c5e770f0", "sha512": null, "bug_tracking_url": "https://github.com/pallets/werkzeug/issues/", "code_view_url": "https://github.com/pallets/werkzeug/", @@ -721,9 +721,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/werkzeug/2.3.7/json", + "api_data_url": "https://pypi.org/pypi/werkzeug/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/werkzeug@2.3.7" + "purl": "pkg:pypi/werkzeug@3.0.0" } ], "resolution": [ @@ -737,7 +737,7 @@ "pkg:pypi/click@8.1.7", "pkg:pypi/itsdangerous@2.1.2", "pkg:pypi/jinja2@3.1.2", - "pkg:pypi/werkzeug@2.3.7" + "pkg:pypi/werkzeug@3.0.0" ] }, { @@ -755,7 +755,7 @@ "dependencies": [] }, { - "package": "pkg:pypi/werkzeug@2.3.7", + "package": "pkg:pypi/werkzeug@3.0.0", "dependencies": [ "pkg:pypi/markupsafe@2.1.3" ] diff --git a/tests/data/test-api-pdt-expected.json b/tests/data/test-api-pdt-expected.json index 428ca1a2..12307330 100644 --- a/tests/data/test-api-pdt-expected.json +++ b/tests/data/test-api-pdt-expected.json @@ -611,12 +611,12 @@ "type": "pypi", "namespace": null, "name": "werkzeug", - "version": "2.3.7", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "The comprehensive WSGI web application library.\nWerkzeug\n========\n\n*werkzeug* German noun: \"tool\". Etymology: *werk* (\"work\"), *zeug* (\"stuff\")\n\nWerkzeug is a comprehensive `WSGI`_ web application library. It began as\na simple collection of various utilities for WSGI applications and has\nbecome one of the most advanced WSGI utility libraries.\n\nIt includes:\n\n- An interactive debugger that allows inspecting stack traces and\n source code in the browser with an interactive interpreter for any\n frame in the stack.\n- A full-featured request object with objects to interact with\n headers, query args, form data, files, and cookies.\n- A response object that can wrap other WSGI applications and handle\n streaming data.\n- A routing system for matching URLs to endpoints and generating URLs\n for endpoints, with an extensible system for capturing variables\n from URLs.\n- HTTP utilities to handle entity tags, cache control, dates, user\n agents, cookies, files, and more.\n- A threaded WSGI server for use while developing applications\n locally.\n- A test client for simulating HTTP requests during testing without\n requiring running a server.\n\nWerkzeug doesn't enforce any dependencies. It is up to the developer to\nchoose a template engine, database adapter, and even how to handle\nrequests. It can be used to build all sorts of end user applications\nsuch as blogs, wikis, or bulletin boards.\n\n`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while\nproviding more structure and patterns for defining powerful\napplications.\n\n.. _WSGI: https://wsgi.readthedocs.io/en/latest/\n.. _Flask: https://www.palletsprojects.com/p/flask/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n pip install -U Werkzeug\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n from werkzeug.wrappers import Request, Response\n\n @Request.application\n def application(request):\n return Response('Hello, World!')\n\n if __name__ == '__main__':\n from werkzeug.serving import run_simple\n run_simple('localhost', 4000, application)\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Werkzeug and other\npopular packages. In order to grow the community of contributors and\nusers, and allow the maintainers to devote more time to the projects,\n`please donate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://werkzeug.palletsprojects.com/\n- Changes: https://werkzeug.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Werkzeug/\n- Source Code: https://github.com/pallets/werkzeug/\n- Issue Tracker: https://github.com/pallets/werkzeug/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-14T09:30:45", + "release_date": "2023-09-30T13:58:46", "parties": [ { "type": "person", @@ -639,11 +639,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/9b/59/a7c32e3d8d0e546a206e0552a2c04444544f15c1da4a01df8938d20c6ffc/werkzeug-2.3.7-py3-none-any.whl", - "size": 242216, + "download_url": "https://files.pythonhosted.org/packages/b6/a5/54b01f663d60d5334f6c9c87c26274e94617a4fd463d812463626423b10d/werkzeug-3.0.0-py3-none-any.whl", + "size": 226556, "sha1": null, - "md5": "201b5bf3fa4dd3eb572704a6e61e69de", - "sha256": "effc12dba7f3bd72e605ce49807bbe692bd729c3bb122a3b91747a6ae77df528", + "md5": "ceb6681d9b633493a4621914aaf95037", + "sha256": "cbb2600f7eabe51dbc0502f58be0b3e1b96b893b05695ea2b35b43d4de2d9962", "sha512": null, "bug_tracking_url": "https://github.com/pallets/werkzeug/issues/", "code_view_url": "https://github.com/pallets/werkzeug/", @@ -662,20 +662,20 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/werkzeug/2.3.7/json", + "api_data_url": "https://pypi.org/pypi/werkzeug/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/werkzeug@2.3.7" + "purl": "pkg:pypi/werkzeug@3.0.0" }, { "type": "pypi", "namespace": null, "name": "werkzeug", - "version": "2.3.7", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "The comprehensive WSGI web application library.\nWerkzeug\n========\n\n*werkzeug* German noun: \"tool\". Etymology: *werk* (\"work\"), *zeug* (\"stuff\")\n\nWerkzeug is a comprehensive `WSGI`_ web application library. It began as\na simple collection of various utilities for WSGI applications and has\nbecome one of the most advanced WSGI utility libraries.\n\nIt includes:\n\n- An interactive debugger that allows inspecting stack traces and\n source code in the browser with an interactive interpreter for any\n frame in the stack.\n- A full-featured request object with objects to interact with\n headers, query args, form data, files, and cookies.\n- A response object that can wrap other WSGI applications and handle\n streaming data.\n- A routing system for matching URLs to endpoints and generating URLs\n for endpoints, with an extensible system for capturing variables\n from URLs.\n- HTTP utilities to handle entity tags, cache control, dates, user\n agents, cookies, files, and more.\n- A threaded WSGI server for use while developing applications\n locally.\n- A test client for simulating HTTP requests during testing without\n requiring running a server.\n\nWerkzeug doesn't enforce any dependencies. It is up to the developer to\nchoose a template engine, database adapter, and even how to handle\nrequests. It can be used to build all sorts of end user applications\nsuch as blogs, wikis, or bulletin boards.\n\n`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while\nproviding more structure and patterns for defining powerful\napplications.\n\n.. _WSGI: https://wsgi.readthedocs.io/en/latest/\n.. _Flask: https://www.palletsprojects.com/p/flask/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n pip install -U Werkzeug\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n from werkzeug.wrappers import Request, Response\n\n @Request.application\n def application(request):\n return Response('Hello, World!')\n\n if __name__ == '__main__':\n from werkzeug.serving import run_simple\n run_simple('localhost', 4000, application)\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Werkzeug and other\npopular packages. In order to grow the community of contributors and\nusers, and allow the maintainers to devote more time to the projects,\n`please donate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://werkzeug.palletsprojects.com/\n- Changes: https://werkzeug.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Werkzeug/\n- Source Code: https://github.com/pallets/werkzeug/\n- Issue Tracker: https://github.com/pallets/werkzeug/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-14T09:30:46", + "release_date": "2023-09-30T13:58:47", "parties": [ { "type": "person", @@ -698,11 +698,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/ef/56/0acc9f560053478a4987fa35c95d904f04b6915f6b5c4d1c14dc8862ba0a/werkzeug-2.3.7.tar.gz", - "size": 819625, + "download_url": "https://files.pythonhosted.org/packages/8c/47/75c7099c78dc207486e30cdb2b16059ca6d5c6cdcf9290f4621368bd06e4/werkzeug-3.0.0.tar.gz", + "size": 801219, "sha1": null, - "md5": "31bb8f7f0be8724e6da89df0f6fc5d12", - "sha256": "2b8c0e447b4b9dbcc85dd97b6eeb4dcbaf6c8b6c3be0bd654e25553e0a2157d8", + "md5": "efae4055e3c354a6772d12d4d5aeed44", + "sha256": "3ffff4dcc32db52ef3cc94dff3000a3c2846890f3a5a51800a27b909c5e770f0", "sha512": null, "bug_tracking_url": "https://github.com/pallets/werkzeug/issues/", "code_view_url": "https://github.com/pallets/werkzeug/", @@ -721,9 +721,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/werkzeug/2.3.7/json", + "api_data_url": "https://pypi.org/pypi/werkzeug/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/werkzeug@2.3.7" + "purl": "pkg:pypi/werkzeug@3.0.0" } ], "resolution": [ @@ -760,7 +760,7 @@ { "key": "werkzeug", "package_name": "werkzeug", - "installed_version": "2.3.7", + "installed_version": "3.0.0", "dependencies": [ { "key": "markupsafe", diff --git a/tests/data/test-api-with-partial-setup-py.json b/tests/data/test-api-with-partial-setup-py.json index 20f9403c..8494c8a1 100644 --- a/tests/data/test-api-with-partial-setup-py.json +++ b/tests/data/test-api-with-partial-setup-py.json @@ -2,7 +2,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/partial-setup.py", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/partial-setup.py", "package_data": [ { "type": "pypi", @@ -59,12 +59,12 @@ "type": "pypi", "namespace": null, "name": "semver", - "version": "3.0.1", + "version": "3.0.2", "qualifiers": {}, "subpath": null, "primary_language": "Python", - "description": "Python helper for Semantic Versioning (https://semver.org)\nQuickstart\n==========\n\n.. teaser-begin\n\nA Python module for `semantic versioning`_. Simplifies comparing versions.\n\n|GHAction| |python-support| |downloads| |license| |docs| |black|\n|openissues| |GHDiscussion|\n\n.. teaser-end\n\n.. note::\n\n This project works for Python 3.7 and greater only. If you are\n looking for a compatible version for Python 2, use the\n maintenance branch |MAINT|_.\n\n The last version of semver which supports Python 2.7 to 3.5 will be\n 2.x.y However, keep in mind, the major 2 release is frozen: no new\n features nor backports will be integrated.\n\n We recommend to upgrade your workflow to Python 3 to gain support,\n bugfixes, and new features.\n\n.. |MAINT| replace:: ``maint/v2``\n.. _MAINT: https://github.com/python-semver/python-semver/tree/maint/v2\n\n\nThe module follows the ``MAJOR.MINOR.PATCH`` style:\n\n* ``MAJOR`` version when you make incompatible API changes,\n* ``MINOR`` version when you add functionality in a backwards compatible manner, and\n* ``PATCH`` version when you make backwards compatible bug fixes.\n\nAdditional labels for pre-release and build metadata are supported.\n\nTo import this library, use:\n\n.. code-block:: python\n\n >>> import semver\n\nWorking with the library is quite straightforward. To turn a version string into the\ndifferent parts, use the ``semver.Version.parse`` function:\n\n.. code-block:: python\n\n >>> ver = semver.Version.parse('1.2.3-pre.2+build.4')\n >>> ver.major\n 1\n >>> ver.minor\n 2\n >>> ver.patch\n 3\n >>> ver.prerelease\n 'pre.2'\n >>> ver.build\n 'build.4'\n\nTo raise parts of a version, there are a couple of functions available for\nyou. The function ``semver.Version.bump_major`` leaves the original object untouched, but\nreturns a new ``semver.Version`` instance with the raised major part:\n\n.. code-block:: python\n\n >>> ver = semver.Version.parse(\"3.4.5\")\n >>> ver.bump_major()\n Version(major=4, minor=0, patch=0, prerelease=None, build=None)\n\nIt is allowed to concatenate different \"bump functions\":\n\n.. code-block:: python\n\n >>> ver.bump_major().bump_minor()\n Version(major=4, minor=1, patch=0, prerelease=None, build=None)\n\nTo compare two versions, semver provides the ``semver.compare`` function.\nThe return value indicates the relationship between the first and second\nversion:\n\n.. code-block:: python\n\n >>> semver.compare(\"1.0.0\", \"2.0.0\")\n -1\n >>> semver.compare(\"2.0.0\", \"1.0.0\")\n 1\n >>> semver.compare(\"2.0.0\", \"2.0.0\")\n 0\n\n\nThere are other functions to discover. Read on!\n\n\n.. |latest-version| image:: https://img.shields.io/pypi/v/semver.svg\n :alt: Latest version on PyPI\n :target: https://pypi.org/project/semver\n.. |python-support| image:: https://img.shields.io/pypi/pyversions/semver.svg\n :target: https://pypi.org/project/semver\n :alt: Python versions\n.. |downloads| image:: https://img.shields.io/pypi/dm/semver.svg\n :alt: Monthly downloads from PyPI\n :target: https://pypi.org/project/semver\n.. |license| image:: https://img.shields.io/pypi/l/semver.svg\n :alt: Software license\n :target: https://github.com/python-semver/python-semver/blob/master/LICENSE.txt\n.. |docs| image:: https://readthedocs.org/projects/python-semver/badge/?version=latest\n :target: http://python-semver.readthedocs.io/en/latest/?badge=latest\n :alt: Documentation Status\n.. _semantic versioning: https://semver.org/\n.. |black| image:: https://img.shields.io/badge/code%20style-black-000000.svg\n :target: https://github.com/psf/black\n :alt: Black Formatter\n.. |Gitter| image:: https://badges.gitter.im/python-semver/community.svg\n :target: https://gitter.im/python-semver/community\n :alt: Gitter\n.. |openissues| image:: http://isitmaintained.com/badge/open/python-semver/python-semver.svg\n :target: http://isitmaintained.com/project/python-semver/python-semver\n :alt: Percentage of open issues\n.. |GHAction| image:: https://github.com/python-semver/python-semver/workflows/Python/badge.svg\n :alt: Python\n.. |GHDiscussion| image:: https://shields.io/badge/GitHub-%20Discussions-green?logo=github\n :target: https://github.com/python-semver/python-semver/discussions\n :alt: GitHub Discussion", - "release_date": "2023-06-14T11:43:22", + "description": "Python helper for Semantic Versioning (https://semver.org)\nQuickstart\n==========\n\n.. teaser-begin\n\nA Python module to simplify `semantic versioning`_.\n\n|GHAction| |python-support| |downloads| |license| |docs| |black|\n|openissues| |GHDiscussion|\n\n.. teaser-end\n\nThe module follows the ``MAJOR.MINOR.PATCH`` style:\n\n* ``MAJOR`` version when you make incompatible API changes,\n* ``MINOR`` version when you add functionality in a backwards compatible manner, and\n* ``PATCH`` version when you make backwards compatible bug fixes.\n\nAdditional labels for pre-release and build metadata are supported.\n\nTo import this library, use:\n\n.. code-block:: python\n\n >>> import semver\n\nWorking with the library is quite straightforward. To turn a version string into the\ndifferent parts, use the ``semver.Version.parse`` function:\n\n.. code-block:: python\n\n >>> ver = semver.Version.parse('1.2.3-pre.2+build.4')\n >>> ver.major\n 1\n >>> ver.minor\n 2\n >>> ver.patch\n 3\n >>> ver.prerelease\n 'pre.2'\n >>> ver.build\n 'build.4'\n\nTo raise parts of a version, there are a couple of functions available for\nyou. The function ``semver.Version.bump_major`` leaves the original object untouched, but\nreturns a new ``semver.Version`` instance with the raised major part:\n\n.. code-block:: python\n\n >>> ver = semver.Version.parse(\"3.4.5\")\n >>> ver.bump_major()\n Version(major=4, minor=0, patch=0, prerelease=None, build=None)\n\nIt is allowed to concatenate different \"bump functions\":\n\n.. code-block:: python\n\n >>> ver.bump_major().bump_minor()\n Version(major=4, minor=1, patch=0, prerelease=None, build=None)\n\nTo compare two versions, semver provides the ``semver.compare`` function.\nThe return value indicates the relationship between the first and second\nversion:\n\n.. code-block:: python\n\n >>> semver.compare(\"1.0.0\", \"2.0.0\")\n -1\n >>> semver.compare(\"2.0.0\", \"1.0.0\")\n 1\n >>> semver.compare(\"2.0.0\", \"2.0.0\")\n 0\n\n\nThere are other functions to discover. Read on!\n\n\n.. |latest-version| image:: https://img.shields.io/pypi/v/semver.svg\n :alt: Latest version on PyPI\n :target: https://pypi.org/project/semver\n.. |python-support| image:: https://img.shields.io/pypi/pyversions/semver.svg\n :target: https://pypi.org/project/semver\n :alt: Python versions\n.. |downloads| image:: https://img.shields.io/pypi/dm/semver.svg\n :alt: Monthly downloads from PyPI\n :target: https://pypi.org/project/semver\n.. |license| image:: https://img.shields.io/pypi/l/semver.svg\n :alt: Software license\n :target: https://github.com/python-semver/python-semver/blob/master/LICENSE.txt\n.. |docs| image:: https://readthedocs.org/projects/python-semver/badge/?version=latest\n :target: http://python-semver.readthedocs.io/en/latest/?badge=latest\n :alt: Documentation Status\n.. _semantic versioning: https://semver.org/\n.. |black| image:: https://img.shields.io/badge/code%20style-black-000000.svg\n :target: https://github.com/psf/black\n :alt: Black Formatter\n.. |Gitter| image:: https://badges.gitter.im/python-semver/community.svg\n :target: https://gitter.im/python-semver/community\n :alt: Gitter\n.. |openissues| image:: http://isitmaintained.com/badge/open/python-semver/python-semver.svg\n :target: http://isitmaintained.com/project/python-semver/python-semver\n :alt: Percentage of open issues\n.. |GHAction| image:: https://github.com/python-semver/python-semver/workflows/Python/badge.svg\n :alt: Python\n.. |GHDiscussion| image:: https://shields.io/badge/GitHub-%20Discussions-green?logo=github\n :target: https://github.com/python-semver/python-semver/discussions\n :alt: GitHub Discussion", + "release_date": "2023-10-09T11:58:25", "parties": [ { "type": "person", @@ -96,11 +96,11 @@ "Topic :: Software Development :: Libraries :: Python Modules" ], "homepage_url": "https://github.com/python-semver/python-semver", - "download_url": "https://files.pythonhosted.org/packages/46/30/a14b56e500e8eabf8c349edd0583d736b231e652b7dce776e85df11e9e0b/semver-3.0.1.tar.gz", - "size": 205762, + "download_url": "https://files.pythonhosted.org/packages/41/6c/a536cc008f38fd83b3c1b98ce19ead13b746b5588c9a0cb9dd9f6ea434bc/semver-3.0.2.tar.gz", + "size": 214988, "sha1": null, - "md5": "b7502c12ce325ffffeab694fed52f6f5", - "sha256": "9ec78c5447883c67b97f98c3b6212796708191d22e4ad30f4570f840171cbce1", + "md5": "0bbc4d74959c9f1522f75ffb4ae0934d", + "sha256": "6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc", "sha512": null, "bug_tracking_url": "https://github.com/python-semver/python-semver/issues", "code_view_url": null, @@ -120,14 +120,14 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/semver/3.0.1/json", + "api_data_url": "https://pypi.org/pypi/semver/3.0.2/json", "datasource_id": null, - "purl": "pkg:pypi/semver@3.0.1" + "purl": "pkg:pypi/semver@3.0.2" } ], "resolution": [ { - "package": "pkg:pypi/semver@3.0.1", + "package": "pkg:pypi/semver@3.0.2", "dependencies": [] } ] diff --git a/tests/data/test-api-with-prefer-source.json b/tests/data/test-api-with-prefer-source.json index ca1d9283..b58615fc 100644 --- a/tests/data/test-api-with-prefer-source.json +++ b/tests/data/test-api-with-prefer-source.json @@ -308,12 +308,12 @@ "type": "pypi", "namespace": null, "name": "werkzeug", - "version": "2.3.7", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "The comprehensive WSGI web application library.\nWerkzeug\n========\n\n*werkzeug* German noun: \"tool\". Etymology: *werk* (\"work\"), *zeug* (\"stuff\")\n\nWerkzeug is a comprehensive `WSGI`_ web application library. It began as\na simple collection of various utilities for WSGI applications and has\nbecome one of the most advanced WSGI utility libraries.\n\nIt includes:\n\n- An interactive debugger that allows inspecting stack traces and\n source code in the browser with an interactive interpreter for any\n frame in the stack.\n- A full-featured request object with objects to interact with\n headers, query args, form data, files, and cookies.\n- A response object that can wrap other WSGI applications and handle\n streaming data.\n- A routing system for matching URLs to endpoints and generating URLs\n for endpoints, with an extensible system for capturing variables\n from URLs.\n- HTTP utilities to handle entity tags, cache control, dates, user\n agents, cookies, files, and more.\n- A threaded WSGI server for use while developing applications\n locally.\n- A test client for simulating HTTP requests during testing without\n requiring running a server.\n\nWerkzeug doesn't enforce any dependencies. It is up to the developer to\nchoose a template engine, database adapter, and even how to handle\nrequests. It can be used to build all sorts of end user applications\nsuch as blogs, wikis, or bulletin boards.\n\n`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while\nproviding more structure and patterns for defining powerful\napplications.\n\n.. _WSGI: https://wsgi.readthedocs.io/en/latest/\n.. _Flask: https://www.palletsprojects.com/p/flask/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n pip install -U Werkzeug\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n from werkzeug.wrappers import Request, Response\n\n @Request.application\n def application(request):\n return Response('Hello, World!')\n\n if __name__ == '__main__':\n from werkzeug.serving import run_simple\n run_simple('localhost', 4000, application)\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Werkzeug and other\npopular packages. In order to grow the community of contributors and\nusers, and allow the maintainers to devote more time to the projects,\n`please donate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://werkzeug.palletsprojects.com/\n- Changes: https://werkzeug.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Werkzeug/\n- Source Code: https://github.com/pallets/werkzeug/\n- Issue Tracker: https://github.com/pallets/werkzeug/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-14T09:30:46", + "release_date": "2023-09-30T13:58:47", "parties": [ { "type": "person", @@ -336,11 +336,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/ef/56/0acc9f560053478a4987fa35c95d904f04b6915f6b5c4d1c14dc8862ba0a/werkzeug-2.3.7.tar.gz", - "size": 819625, + "download_url": "https://files.pythonhosted.org/packages/8c/47/75c7099c78dc207486e30cdb2b16059ca6d5c6cdcf9290f4621368bd06e4/werkzeug-3.0.0.tar.gz", + "size": 801219, "sha1": null, - "md5": "31bb8f7f0be8724e6da89df0f6fc5d12", - "sha256": "2b8c0e447b4b9dbcc85dd97b6eeb4dcbaf6c8b6c3be0bd654e25553e0a2157d8", + "md5": "efae4055e3c354a6772d12d4d5aeed44", + "sha256": "3ffff4dcc32db52ef3cc94dff3000a3c2846890f3a5a51800a27b909c5e770f0", "sha512": null, "bug_tracking_url": "https://github.com/pallets/werkzeug/issues/", "code_view_url": "https://github.com/pallets/werkzeug/", @@ -359,9 +359,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/werkzeug/2.3.7/json", + "api_data_url": "https://pypi.org/pypi/werkzeug/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/werkzeug@2.3.7" + "purl": "pkg:pypi/werkzeug@3.0.0" } ], "resolution": [ @@ -375,7 +375,7 @@ "pkg:pypi/click@8.1.7", "pkg:pypi/itsdangerous@2.1.2", "pkg:pypi/jinja2@3.1.2", - "pkg:pypi/werkzeug@2.3.7" + "pkg:pypi/werkzeug@3.0.0" ] }, { @@ -393,7 +393,7 @@ "dependencies": [] }, { - "package": "pkg:pypi/werkzeug@2.3.7", + "package": "pkg:pypi/werkzeug@3.0.0", "dependencies": [ "pkg:pypi/markupsafe@2.1.3" ] diff --git a/tests/data/test-api-with-python-311.json b/tests/data/test-api-with-python-311.json index ca1d9283..b58615fc 100644 --- a/tests/data/test-api-with-python-311.json +++ b/tests/data/test-api-with-python-311.json @@ -308,12 +308,12 @@ "type": "pypi", "namespace": null, "name": "werkzeug", - "version": "2.3.7", + "version": "3.0.0", "qualifiers": {}, "subpath": null, "primary_language": "Python", "description": "The comprehensive WSGI web application library.\nWerkzeug\n========\n\n*werkzeug* German noun: \"tool\". Etymology: *werk* (\"work\"), *zeug* (\"stuff\")\n\nWerkzeug is a comprehensive `WSGI`_ web application library. It began as\na simple collection of various utilities for WSGI applications and has\nbecome one of the most advanced WSGI utility libraries.\n\nIt includes:\n\n- An interactive debugger that allows inspecting stack traces and\n source code in the browser with an interactive interpreter for any\n frame in the stack.\n- A full-featured request object with objects to interact with\n headers, query args, form data, files, and cookies.\n- A response object that can wrap other WSGI applications and handle\n streaming data.\n- A routing system for matching URLs to endpoints and generating URLs\n for endpoints, with an extensible system for capturing variables\n from URLs.\n- HTTP utilities to handle entity tags, cache control, dates, user\n agents, cookies, files, and more.\n- A threaded WSGI server for use while developing applications\n locally.\n- A test client for simulating HTTP requests during testing without\n requiring running a server.\n\nWerkzeug doesn't enforce any dependencies. It is up to the developer to\nchoose a template engine, database adapter, and even how to handle\nrequests. It can be used to build all sorts of end user applications\nsuch as blogs, wikis, or bulletin boards.\n\n`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while\nproviding more structure and patterns for defining powerful\napplications.\n\n.. _WSGI: https://wsgi.readthedocs.io/en/latest/\n.. _Flask: https://www.palletsprojects.com/p/flask/\n\n\nInstalling\n----------\n\nInstall and update using `pip`_:\n\n.. code-block:: text\n\n pip install -U Werkzeug\n\n.. _pip: https://pip.pypa.io/en/stable/getting-started/\n\n\nA Simple Example\n----------------\n\n.. code-block:: python\n\n from werkzeug.wrappers import Request, Response\n\n @Request.application\n def application(request):\n return Response('Hello, World!')\n\n if __name__ == '__main__':\n from werkzeug.serving import run_simple\n run_simple('localhost', 4000, application)\n\n\nDonate\n------\n\nThe Pallets organization develops and supports Werkzeug and other\npopular packages. In order to grow the community of contributors and\nusers, and allow the maintainers to devote more time to the projects,\n`please donate today`_.\n\n.. _please donate today: https://palletsprojects.com/donate\n\n\nLinks\n-----\n\n- Documentation: https://werkzeug.palletsprojects.com/\n- Changes: https://werkzeug.palletsprojects.com/changes/\n- PyPI Releases: https://pypi.org/project/Werkzeug/\n- Source Code: https://github.com/pallets/werkzeug/\n- Issue Tracker: https://github.com/pallets/werkzeug/issues/\n- Chat: https://discord.gg/pallets", - "release_date": "2023-08-14T09:30:46", + "release_date": "2023-09-30T13:58:47", "parties": [ { "type": "person", @@ -336,11 +336,11 @@ "Topic :: Software Development :: Libraries :: Application Frameworks" ], "homepage_url": "", - "download_url": "https://files.pythonhosted.org/packages/ef/56/0acc9f560053478a4987fa35c95d904f04b6915f6b5c4d1c14dc8862ba0a/werkzeug-2.3.7.tar.gz", - "size": 819625, + "download_url": "https://files.pythonhosted.org/packages/8c/47/75c7099c78dc207486e30cdb2b16059ca6d5c6cdcf9290f4621368bd06e4/werkzeug-3.0.0.tar.gz", + "size": 801219, "sha1": null, - "md5": "31bb8f7f0be8724e6da89df0f6fc5d12", - "sha256": "2b8c0e447b4b9dbcc85dd97b6eeb4dcbaf6c8b6c3be0bd654e25553e0a2157d8", + "md5": "efae4055e3c354a6772d12d4d5aeed44", + "sha256": "3ffff4dcc32db52ef3cc94dff3000a3c2846890f3a5a51800a27b909c5e770f0", "sha512": null, "bug_tracking_url": "https://github.com/pallets/werkzeug/issues/", "code_view_url": "https://github.com/pallets/werkzeug/", @@ -359,9 +359,9 @@ "dependencies": [], "repository_homepage_url": null, "repository_download_url": null, - "api_data_url": "https://pypi.org/pypi/werkzeug/2.3.7/json", + "api_data_url": "https://pypi.org/pypi/werkzeug/3.0.0/json", "datasource_id": null, - "purl": "pkg:pypi/werkzeug@2.3.7" + "purl": "pkg:pypi/werkzeug@3.0.0" } ], "resolution": [ @@ -375,7 +375,7 @@ "pkg:pypi/click@8.1.7", "pkg:pypi/itsdangerous@2.1.2", "pkg:pypi/jinja2@3.1.2", - "pkg:pypi/werkzeug@2.3.7" + "pkg:pypi/werkzeug@3.0.0" ] }, { @@ -393,7 +393,7 @@ "dependencies": [] }, { - "package": "pkg:pypi/werkzeug@2.3.7", + "package": "pkg:pypi/werkzeug@3.0.0", "dependencies": [ "pkg:pypi/markupsafe@2.1.3" ] diff --git a/tests/data/test-api-with-recursive-requirement-file.json b/tests/data/test-api-with-recursive-requirement-file.json index 8dd599d0..dff2d6e9 100644 --- a/tests/data/test-api-with-recursive-requirement-file.json +++ b/tests/data/test-api-with-recursive-requirement-file.json @@ -2,7 +2,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/recursive_requirements/r.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/recursive_requirements/r.txt", "package_data": [ { "type": "pypi", diff --git a/tests/data/test-api-with-requirement-file.json b/tests/data/test-api-with-requirement-file.json index f9df7724..57807842 100644 --- a/tests/data/test-api-with-requirement-file.json +++ b/tests/data/test-api-with-requirement-file.json @@ -2,7 +2,7 @@ "files": [ { "type": "file", - "path": "/home/tg1999/Desktop/example/tools/python-inspector/tests/data/frozen-requirements.txt", + "path": "/home/tg1999/Desktop/python-inspector-1/tests/data/frozen-requirements.txt", "package_data": [ { "type": "pypi", diff --git a/tests/test_resolution.py b/tests/test_resolution.py index 46771c0e..351146c8 100644 --- a/tests/test_resolution.py +++ b/tests/test_resolution.py @@ -50,7 +50,7 @@ def test_get_resolved_dependencies_with_flask_and_python_310(): "pkg:pypi/itsdangerous@2.1.2", "pkg:pypi/jinja2@3.1.2", "pkg:pypi/markupsafe@2.1.3", - "pkg:pypi/werkzeug@2.3.7", + "pkg:pypi/werkzeug@3.0.0", ] @@ -74,7 +74,7 @@ def test_get_resolved_dependencies_with_flask_and_python_310_windows(): "pkg:pypi/itsdangerous@2.1.2", "pkg:pypi/jinja2@3.1.2", "pkg:pypi/markupsafe@2.1.3", - "pkg:pypi/werkzeug@2.3.7", + "pkg:pypi/werkzeug@3.0.0", ] @@ -125,7 +125,7 @@ def test_get_resolved_dependencies_with_tilde_requirement_using_json_api(): "pkg:pypi/itsdangerous@2.1.2", "pkg:pypi/jinja2@3.1.2", "pkg:pypi/markupsafe@2.1.3", - "pkg:pypi/werkzeug@2.3.7", + "pkg:pypi/werkzeug@3.0.0", "pkg:pypi/zipp@3.17.0", ] @@ -146,7 +146,7 @@ def test_without_supported_wheels(): assert plist == [ "pkg:pypi/autobahn@22.3.2", - "pkg:pypi/cffi@1.15.1", + "pkg:pypi/cffi@1.16.0", "pkg:pypi/cryptography@41.0.4", "pkg:pypi/hyperlink@21.0.0", "pkg:pypi/idna@3.4",