Skip to content

Commit

Permalink
Merge pull request #2257 from Kozea/html5
Browse files Browse the repository at this point in the history
Use tinyhtml5 instead of html5lib
  • Loading branch information
liZe committed Sep 21, 2024
2 parents 41c818d + 3ad8c23 commit e6de621
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions docs/first_steps.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ WeasyPrint |version| depends on:
* Pango_ ≥ 1.44.0
* pydyf_ ≥ 0.10.0
* CFFI_ ≥ 0.6
* html5lib_1.1
* tinyhtml5_2.0.0b1
* tinycss2_ ≥ 1.3.0
* cssselect2_ ≥ 0.1
* Pyphen_ ≥ 0.9.1
Expand All @@ -23,8 +23,8 @@ WeasyPrint |version| depends on:
.. _Python: https://www.python.org/
.. _Pango: https://pango.gnome.org/
.. _CFFI: https://cffi.readthedocs.io/
.. _html5lib: https://html5lib.readthedocs.io/
.. _pydyf: https://doc.courtbouillon.org/pydyf/
.. _tinyhtml5: https://doc.courtbouillon.org/tinyhtml5/
.. _tinycss2: https://doc.courtbouillon.org/tinycss2/
.. _cssselect2: https://doc.courtbouillon.org/cssselect2/
.. _Pyphen: https://pyphen.org/
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ license = {file = 'LICENSE'}
dependencies = [
'pydyf >=0.11.0,<0.12',
'cffi >=0.6,<2',
'html5lib >=1.1,<2',
'tinyhtml5 >=2.0.0b1,<3',
'tinycss2 >=1.3.0,<2',
'cssselect2 >=0.1,<0.8',
'Pyphen >=0.9.1,<0.16',
Expand Down
10 changes: 5 additions & 5 deletions weasyprint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from urllib.parse import urljoin

import cssselect2
import html5lib
import tinycss2
import tinyhtml5

VERSION = __version__ = '62.3'

Expand Down Expand Up @@ -111,7 +111,7 @@ def _find_base_url(html_document, fallback_base_url):


class HTML:
"""HTML document parsed by html5lib.
"""HTML document parsed by tinyhtml5.
You can just create an instance with a positional argument:
``doc = HTML(something)``
Expand Down Expand Up @@ -169,14 +169,14 @@ def __init__(self, guess=None, filename=None, url=None, file_obj=None,
guess, filename, url, file_obj, string, base_url, url_fetcher)
with result as (source_type, source, base_url, protocol_encoding):
if isinstance(source, str):
result = html5lib.parse(source, namespaceHTMLElements=False)
result = tinyhtml5.parse(source, namespace_html_elements=False)
else:
kwargs = {'namespaceHTMLElements': False}
kwargs = {'namespace_html_elements': False}
if protocol_encoding is not None:
kwargs['transport_encoding'] = protocol_encoding
if encoding is not None:
kwargs['override_encoding'] = encoding
result = html5lib.parse(source, **kwargs)
result = tinyhtml5.parse(source, **kwargs)
self.base_url = _find_base_url(result, base_url)
self.url_fetcher = url_fetcher
self.media_type = media_type
Expand Down
4 changes: 2 additions & 2 deletions weasyprint/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def close(self):
GzipFile.close(self)
self.fileobj_to_close.close()

# Inform html5lib to not rely on these:
seek = tell = None
def seekable(self):
return False


def iri_to_uri(url):
Expand Down

0 comments on commit e6de621

Please sign in to comment.