From c4dd6771836841ee796af8035dbf8239074ed5ec Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Tue, 12 Jul 2016 02:52:37 +0100 Subject: [PATCH] Move a whole bunch of private modules to be underscore prefixed This moves: html5lib.ihatexml -> html5lib._ihatexml html5lib.inputstream -> html5lib._inputstream html5lib.tokenizer -> html5lib._tokenizer html5lib.trie -> html5lib._trie html5lib.utils -> html5lib._utils --- html5lib/{ihatexml.py => _ihatexml.py} | 0 html5lib/{inputstream.py => _inputstream.py} | 10 +-- html5lib/{tokenizer.py => _tokenizer.py} | 4 +- html5lib/{trie => _trie}/__init__.py | 0 html5lib/{trie => _trie}/_base.py | 0 html5lib/{trie => _trie}/datrie.py | 0 html5lib/{trie => _trie}/py.py | 0 html5lib/{utils.py => _utils.py} | 0 html5lib/html5parser.py | 90 ++++++++++---------- html5lib/serializer.py | 8 +- html5lib/tests/test_encoding.py | 12 +-- html5lib/tests/test_stream.py | 6 +- html5lib/tests/tokenizer.py | 6 +- html5lib/treebuilders/__init__.py | 2 +- html5lib/treebuilders/dom.py | 2 +- html5lib/treebuilders/etree.py | 6 +- html5lib/treebuilders/etree_lxml.py | 6 +- html5lib/treewalkers/__init__.py | 2 +- html5lib/treewalkers/etree.py | 2 +- html5lib/treewalkers/etree_lxml.py | 4 +- parse.py | 4 +- 21 files changed, 82 insertions(+), 82 deletions(-) rename html5lib/{ihatexml.py => _ihatexml.py} (100%) rename html5lib/{inputstream.py => _inputstream.py} (99%) rename html5lib/{tokenizer.py => _tokenizer.py} (99%) rename html5lib/{trie => _trie}/__init__.py (100%) rename html5lib/{trie => _trie}/_base.py (100%) rename html5lib/{trie => _trie}/datrie.py (100%) rename html5lib/{trie => _trie}/py.py (100%) rename html5lib/{utils.py => _utils.py} (100%) diff --git a/html5lib/ihatexml.py b/html5lib/_ihatexml.py similarity index 100% rename from html5lib/ihatexml.py rename to html5lib/_ihatexml.py diff --git a/html5lib/inputstream.py b/html5lib/_inputstream.py similarity index 99% rename from html5lib/inputstream.py rename to html5lib/_inputstream.py index dafe33ca..79f2331e 100644 --- a/html5lib/inputstream.py +++ b/html5lib/_inputstream.py @@ -10,7 +10,7 @@ from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase from .constants import ReparseException -from . import utils +from . import _utils from io import StringIO @@ -28,7 +28,7 @@ invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa -if utils.supports_lone_surrogates: +if _utils.supports_lone_surrogates: # Use one extra step of indirection and create surrogates with # eval. Not using this indirection would introduce an illegal # unicode literal on platforms not supporting such lone @@ -176,7 +176,7 @@ def __init__(self, source): """ - if not utils.supports_lone_surrogates: + if not _utils.supports_lone_surrogates: # Such platforms will have already checked for such # surrogate errors, so no need to do this checking. self.reportCharacterErrors = None @@ -304,9 +304,9 @@ def characterErrorsUCS2(self, data): codepoint = ord(match.group()) pos = match.start() # Pretty sure there should be endianness issues here - if utils.isSurrogatePair(data[pos:pos + 2]): + if _utils.isSurrogatePair(data[pos:pos + 2]): # We have a surrogate pair! - char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2]) + char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) if char_val in non_bmp_invalid_codepoints: self.errors.append("invalid-codepoint") skip = True diff --git a/html5lib/tokenizer.py b/html5lib/_tokenizer.py similarity index 99% rename from html5lib/tokenizer.py rename to html5lib/_tokenizer.py index 3f10c01f..6078f66a 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/_tokenizer.py @@ -11,9 +11,9 @@ from .constants import tokenTypes, tagTokenTypes from .constants import replacementCharacters -from .inputstream import HTMLInputStream +from ._inputstream import HTMLInputStream -from .trie import Trie +from ._trie import Trie entitiesTrie = Trie(entities) diff --git a/html5lib/trie/__init__.py b/html5lib/_trie/__init__.py similarity index 100% rename from html5lib/trie/__init__.py rename to html5lib/_trie/__init__.py diff --git a/html5lib/trie/_base.py b/html5lib/_trie/_base.py similarity index 100% rename from html5lib/trie/_base.py rename to html5lib/_trie/_base.py diff --git a/html5lib/trie/datrie.py b/html5lib/_trie/datrie.py similarity index 100% rename from html5lib/trie/datrie.py rename to html5lib/_trie/datrie.py diff --git a/html5lib/trie/py.py b/html5lib/_trie/py.py similarity index 100% rename from html5lib/trie/py.py rename to html5lib/_trie/py.py diff --git a/html5lib/utils.py b/html5lib/_utils.py similarity index 100% rename from html5lib/utils.py rename to html5lib/_utils.py diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index c51f73b1..470c8a7d 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -8,13 +8,13 @@ except ImportError: from ordereddict import OrderedDict -from . import inputstream -from . import tokenizer +from . import _inputstream +from . import _tokenizer from . import treebuilders from .treebuilders.base import Marker -from . import utils +from . import _utils from .constants import ( spaceCharacters, asciiUpper2Lower, specialElements, headingElements, cdataElements, rcdataElements, @@ -82,7 +82,7 @@ def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kw self.innerHTMLMode = innerHTML self.container = container self.scripting = scripting - self.tokenizer = tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) + self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) self.reset() try: @@ -344,7 +344,7 @@ def parseRCDataRawtext(self, token, contentType): self.phase = self.phases["text"] -@utils.memoize +@_utils.memoize def getPhases(debug): def log(function): """Logger that records which phase processes each token""" @@ -586,13 +586,13 @@ class BeforeHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("head", "body", "html", "br"), self.endTagImplyHead) ]) self.endTagHandler.default = self.endTagOther @@ -632,7 +632,7 @@ class InHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("title", self.startTagTitle), (("noframes", "style"), self.startTagNoFramesStyle), @@ -645,7 +645,7 @@ def __init__(self, parser, tree): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("head", self.endTagHead), (("br", "html", "body"), self.endTagHtmlBodyBr) ]) @@ -687,8 +687,8 @@ def startTagMeta(self, token): # the abstract Unicode string, and just use the # ContentAttrParser on that, but using UTF-8 allows all chars # to be encoded and as a ASCII-superset works. - data = inputstream.EncodingBytes(attributes["content"].encode("utf-8")) - parser = inputstream.ContentAttrParser(data) + data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) + parser = _inputstream.ContentAttrParser(data) codec = parser.parse() self.parser.tokenizer.stream.changeEncoding(codec) @@ -735,14 +735,14 @@ class InHeadNoscriptPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), (("head", "noscript"), self.startTagHeadNoscript), ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("noscript", self.endTagNoscript), ("br", self.endTagBr), ]) @@ -799,7 +799,7 @@ class AfterHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("body", self.startTagBody), ("frameset", self.startTagFrameset), @@ -809,8 +809,8 @@ def __init__(self, parser, tree): ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), - self.endTagHtmlBodyBr)]) + self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), + self.endTagHtmlBodyBr)]) self.endTagHandler.default = self.endTagOther def processEOF(self): @@ -871,7 +871,7 @@ def __init__(self, parser, tree): # Set this to the default handler self.processSpaceCharacters = self.processSpaceCharactersNonPre - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("base", "basefont", "bgsound", "command", "link", "meta", "script", "style", "title"), @@ -918,7 +918,7 @@ def __init__(self, parser, tree): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("body", self.endTagBody), ("html", self.endTagHtml), (("address", "article", "aside", "blockquote", "button", "center", @@ -1588,9 +1588,9 @@ def endTagOther(self, token): class TextPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([]) + self.startTagHandler = _utils.MethodDispatcher([]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("script", self.endTagScript)]) self.endTagHandler.default = self.endTagOther @@ -1622,7 +1622,7 @@ class InTablePhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-table def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("caption", self.startTagCaption), ("colgroup", self.startTagColgroup), @@ -1636,7 +1636,7 @@ def __init__(self, parser, tree): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("table", self.endTagTable), (("body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"), self.endTagIgnore) @@ -1813,14 +1813,14 @@ class InCaptionPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"), self.startTagTableElement) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("caption", self.endTagCaption), ("table", self.endTagTable), (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", @@ -1885,13 +1885,13 @@ class InColumnGroupPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("col", self.startTagCol) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("colgroup", self.endTagColgroup), ("col", self.endTagCol) ]) @@ -1949,7 +1949,7 @@ class InTableBodyPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("tr", self.startTagTr), (("td", "th"), self.startTagTableCell), @@ -1958,7 +1958,7 @@ def __init__(self, parser, tree): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), ("table", self.endTagTable), (("body", "caption", "col", "colgroup", "html", "td", "th", @@ -2047,7 +2047,7 @@ class InRowPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-row def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("td", "th"), self.startTagTableCell), (("caption", "col", "colgroup", "tbody", "tfoot", "thead", @@ -2055,7 +2055,7 @@ def __init__(self, parser, tree): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("tr", self.endTagTr), ("table", self.endTagTable), (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), @@ -2136,14 +2136,14 @@ class InCellPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-cell def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"), self.startTagTableOther) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("td", "th"), self.endTagTableCell), (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) @@ -2212,7 +2212,7 @@ class InSelectPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("option", self.startTagOption), ("optgroup", self.startTagOptgroup), @@ -2222,7 +2222,7 @@ def __init__(self, parser, tree): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("option", self.endTagOption), ("optgroup", self.endTagOptgroup), ("select", self.endTagSelect) @@ -2312,13 +2312,13 @@ class InSelectInTablePhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), self.startTagTable) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), self.endTagTable) ]) @@ -2466,12 +2466,12 @@ class AfterBodyPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)]) + self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) self.endTagHandler.default = self.endTagOther def processEOF(self): @@ -2514,7 +2514,7 @@ class InFramesetPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("frameset", self.startTagFrameset), ("frame", self.startTagFrame), @@ -2522,7 +2522,7 @@ def __init__(self, parser, tree): ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("frameset", self.endTagFrameset) ]) self.endTagHandler.default = self.endTagOther @@ -2571,13 +2571,13 @@ class AfterFramesetPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("noframes", self.startTagNoframes) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([ + self.endTagHandler = _utils.MethodDispatcher([ ("html", self.endTagHtml) ]) self.endTagHandler.default = self.endTagOther @@ -2607,7 +2607,7 @@ class AfterAfterBodyPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml) ]) self.startTagHandler.default = self.startTagOther @@ -2645,7 +2645,7 @@ class AfterAfterFramesetPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("noframes", self.startTagNoFrames) ]) @@ -2707,7 +2707,7 @@ def processEndTag(self, token): def adjust_attributes(token, replacements): - if PY3 or utils.PY27: + if PY3 or _utils.PY27: needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) else: needs_adjustment = frozenset(token['data']) & frozenset(replacements) diff --git a/html5lib/serializer.py b/html5lib/serializer.py index d58a6857..8a780c58 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -7,7 +7,7 @@ from .constants import voidElements, booleanAttributes, spaceCharacters from .constants import rcdataElements, entities, xmlEntities -from . import treewalkers, utils +from . import treewalkers, _utils from xml.sax.saxutils import escape spaceCharacters = "".join(spaceCharacters) @@ -33,7 +33,7 @@ continue if v != "&": if len(v) == 2: - v = utils.surrogatePairToCodepoint(v) + v = _utils.surrogatePairToCodepoint(v) else: v = ord(v) if v not in encode_entity_map or k.islower(): @@ -51,8 +51,8 @@ def htmlentityreplace_errors(exc): skip = False continue index = i + exc.start - if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): - codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2]) + if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): + codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2]) skip = True else: codepoint = ord(c) diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index b6d20f24..9a411c77 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -5,7 +5,7 @@ import pytest from .support import get_data_files, test_dir, errorMessage, TestData as _TestData -from html5lib import HTMLParser, inputstream +from html5lib import HTMLParser, _inputstream def test_basic_prescan_length(): @@ -13,7 +13,7 @@ def test_basic_prescan_length(): pad = 1024 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 1024 # Sanity - stream = inputstream.HTMLBinaryInputStream(data, useChardet=False) + stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False) assert 'utf-8' == stream.charEncoding[0].name @@ -22,7 +22,7 @@ def test_parser_reparse(): pad = 10240 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 10240 # Sanity - stream = inputstream.HTMLBinaryInputStream(data, useChardet=False) + stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False) assert 'windows-1252' == stream.charEncoding[0].name p = HTMLParser(namespaceHTMLElements=False) doc = p.parse(data, useChardet=False) @@ -47,7 +47,7 @@ def test_parser_reparse(): ("windows-1252", b"", {}), ]) def test_parser_args(expected, data, kwargs): - stream = inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs) + stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs) assert expected == stream.charEncoding[0].name p = HTMLParser() p.parse(data, useChardet=False, **kwargs) @@ -85,7 +85,7 @@ def runParserEncodingTest(data, encoding): def runPreScanEncodingTest(data, encoding): - stream = inputstream.HTMLBinaryInputStream(data, useChardet=False) + stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False) encoding = encoding.lower().decode("ascii") # Very crude way to ignore irrelevant tests @@ -111,6 +111,6 @@ def test_encoding(): else: def test_chardet(): with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp: - encoding = inputstream.HTMLInputStream(fp.read()).charEncoding + encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding assert encoding[0].name == "big5" # pylint:enable=wrong-import-position diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index e8d9fd86..27c39538 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -11,9 +11,9 @@ import six from six.moves import http_client, urllib -from html5lib.inputstream import (BufferedStream, HTMLInputStream, - HTMLUnicodeInputStream, HTMLBinaryInputStream) -from html5lib.utils import supports_lone_surrogates +from html5lib._inputstream import (BufferedStream, HTMLInputStream, + HTMLUnicodeInputStream, HTMLBinaryInputStream) +from html5lib._utils import supports_lone_surrogates def test_basic(): diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index 255c1859..1440a722 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -8,8 +8,8 @@ import pytest from six import unichr -from html5lib.tokenizer import HTMLTokenizer -from html5lib import constants, utils +from html5lib._tokenizer import HTMLTokenizer +from html5lib import constants, _utils class TokenizerTestParser(object): @@ -156,7 +156,7 @@ def repl(m): except ValueError: # This occurs when unichr throws ValueError, which should # only be for a lone-surrogate. - if utils.supports_lone_surrogates: + if _utils.supports_lone_surrogates: raise return None diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py index 6a6b2a4c..e2328847 100644 --- a/html5lib/treebuilders/__init__.py +++ b/html5lib/treebuilders/__init__.py @@ -28,7 +28,7 @@ from __future__ import absolute_import, division, unicode_literals -from ..utils import default_etree +from .._utils import default_etree treeBuilderCache = {} diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index 461dc04f..dcfac220 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -8,7 +8,7 @@ from . import base from .. import constants from ..constants import namespaces -from ..utils import moduleFactoryFactory +from .._utils import moduleFactoryFactory def getDomBuilder(DomImplementation): diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 956a717b..cb1d4aef 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -6,10 +6,10 @@ import re from . import base -from .. import ihatexml +from .. import _ihatexml from .. import constants from ..constants import namespaces -from ..utils import moduleFactoryFactory +from .._utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") @@ -259,7 +259,7 @@ def serializeElement(element, indent=0): def tostring(element): # pylint:disable=unused-variable """Serialize an element and its child nodes to a string""" rv = [] - filter = ihatexml.InfosetFilter() + filter = _ihatexml.InfosetFilter() def serializeElement(element): if isinstance(element, ElementTree.ElementTree): diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 15e22a57..908820c0 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -20,7 +20,7 @@ from ..constants import DataLossWarning from .. import constants from . import etree as etree_builders -from .. import ihatexml +from .. import _ihatexml import lxml.etree as etree @@ -54,7 +54,7 @@ def _getChildNodes(self): def testSerializer(element): rv = [] - infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True) + infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) def serializeElement(element, indent=0): if not hasattr(element, "tag"): @@ -182,7 +182,7 @@ class TreeBuilder(base.TreeBuilder): def __init__(self, namespaceHTMLElements, fullTree=False): builder = etree_builders.getETreeModule(etree, fullTree=fullTree) - infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True) + infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) self.namespaceHTMLElements = namespaceHTMLElements class Attributes(dict): diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index 61172656..9e19a559 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -11,7 +11,7 @@ from __future__ import absolute_import, division, unicode_literals from .. import constants -from ..utils import default_etree +from .._utils import default_etree __all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"] diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 41c652a9..8f30f078 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -13,7 +13,7 @@ from six import string_types from . import base -from ..utils import moduleFactoryFactory +from .._utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py index d456e3b9..fb236311 100644 --- a/html5lib/treewalkers/etree_lxml.py +++ b/html5lib/treewalkers/etree_lxml.py @@ -6,7 +6,7 @@ from . import base -from .. import ihatexml +from .. import _ihatexml def ensure_str(s): @@ -132,7 +132,7 @@ def __init__(self, tree): self.fragmentChildren = set() tree = Root(tree) base.NonRecursiveTreeWalker.__init__(self, tree) - self.filter = ihatexml.InfosetFilter() + self.filter = _ihatexml.InfosetFilter() def getNodeDetails(self, node): if isinstance(node, tuple): # Text node diff --git a/parse.py b/parse.py index d5087fb8..3e65c330 100755 --- a/parse.py +++ b/parse.py @@ -11,7 +11,7 @@ from html5lib import html5parser from html5lib import treebuilders, serializer, treewalkers from html5lib import constants -from html5lib import utils +from html5lib import _utils def parse(): @@ -116,7 +116,7 @@ def printOutput(parser, document, opts): import lxml.etree sys.stdout.write(lxml.etree.tostring(document, encoding="unicode")) elif tb == "etree": - sys.stdout.write(utils.default_etree.tostring(document, encoding="unicode")) + sys.stdout.write(_utils.default_etree.tostring(document, encoding="unicode")) elif opts.tree: if not hasattr(document, '__getitem__'): document = [document]