diff --git a/gensim/corpora/wikicorpus.py b/gensim/corpora/wikicorpus.py index f9963b0b27..7214d6b2b0 100644 --- a/gensim/corpora/wikicorpus.py +++ b/gensim/corpora/wikicorpus.py @@ -26,10 +26,7 @@ import signal from pickle import PicklingError # LXML isn't faster, so let's go with the built-in solution -try: - from xml.etree.cElementTree import iterparse -except ImportError: - from xml.etree.ElementTree import iterparse +from xml.etree.ElementTree import iterparse from gensim import utils diff --git a/gensim/scripts/segment_wiki.py b/gensim/scripts/segment_wiki.py index 3e812d715c..ce9c3398c0 100644 --- a/gensim/scripts/segment_wiki.py +++ b/gensim/scripts/segment_wiki.py @@ -61,10 +61,7 @@ import multiprocessing import re import sys -try: - from xml.etree import cElementTree as ET -except ImportError: - from xml.etree import ElementTree as ET +from xml.etree import ElementTree from functools import partial from gensim.corpora.wikicorpus import IGNORED_NAMESPACES, WikiCorpus, filter_wiki, find_interlinks, get_namespace, utils @@ -186,7 +183,7 @@ def extract_page_xmls(f): XML strings for page tags. """ - elems = (elem for _, elem in ET.iterparse(f, events=("end",))) + elems = (elem for _, elem in ElementTree.iterparse(f, events=("end",))) elem = next(elems) namespace = get_namespace(elem.tag) @@ -195,7 +192,7 @@ def extract_page_xmls(f): for elem in elems: if elem.tag == page_tag: - yield ET.tostring(elem) + yield ElementTree.tostring(elem) # Prune the element tree, as per # http://www.ibm.com/developerworks/xml/library/x-hiperfparse/ # except that we don't need to prune backlinks from the parent @@ -224,7 +221,7 @@ def segment(page_xml, include_interlinks=False): (Optionally) [(interlink_article, interlink_text), ...]). """ - elem = ET.fromstring(page_xml) + elem = ElementTree.fromstring(page_xml) filter_namespaces = ('0',) namespace = get_namespace(elem.tag) ns_mapping = {"ns": namespace} diff --git a/setup.py b/setup.py index 09607d951a..38b7319f96 100644 --- a/setup.py +++ b/setup.py @@ -371,11 +371,11 @@ def run(self): 'Intended Audience :: Science/Research', 'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3 :: Only', 'Topic :: Scientific/Engineering :: Artificial Intelligence', 'Topic :: Scientific/Engineering :: Information Analysis', 'Topic :: Text Processing :: Linguistic',