Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor by pep8 #1550

Merged
merged 21 commits into from
Sep 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions ez_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,25 @@
DEFAULT_VERSION = "1.3.2"
DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/"


def _python_cmd(*args):
args = (sys.executable,) + args
return subprocess.call(args) == 0


def _check_call_py24(cmd, *args, **kwargs):
res = subprocess.call(cmd, *args, **kwargs)

class CalledProcessError(Exception):
pass
if not res == 0:
msg = "Command '%s' return non-zero exit status %d" % (cmd, res)
raise CalledProcessError(msg)


vars(subprocess).setdefault('check_call', _check_call_py24)


def _install(tarball, install_args=()):
# extracting the tarball
tmpdir = tempfile.mkdtemp()
Expand Down Expand Up @@ -151,6 +157,7 @@ def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
return _do_download(version, download_base, to_dir,
download_delay)


def _clean_check(cmd, target):
"""
Run the command to download target. If the command fails, clean up before
Expand All @@ -163,6 +170,7 @@ def _clean_check(cmd, target):
os.unlink(target)
raise


def download_file_powershell(url, target):
"""
Download the file at url to target using Powershell (which will validate
Expand All @@ -176,6 +184,7 @@ def download_file_powershell(url, target):
]
_clean_check(cmd, target)


def has_powershell():
if platform.system() != 'Windows':
return False
Expand All @@ -184,50 +193,58 @@ def has_powershell():
try:
try:
subprocess.check_call(cmd, stdout=devnull, stderr=devnull)
except:
except Exception:
return False
finally:
devnull.close()
return True


download_file_powershell.viable = has_powershell


def download_file_curl(url, target):
cmd = ['curl', url, '--silent', '--output', target]
_clean_check(cmd, target)


def has_curl():
cmd = ['curl', '--version']
devnull = open(os.path.devnull, 'wb')
try:
try:
subprocess.check_call(cmd, stdout=devnull, stderr=devnull)
except:
except Exception:
return False
finally:
devnull.close()
return True


download_file_curl.viable = has_curl


def download_file_wget(url, target):
cmd = ['wget', url, '--quiet', '--output-document', target]
_clean_check(cmd, target)


def has_wget():
cmd = ['wget', '--version']
devnull = open(os.path.devnull, 'wb')
try:
try:
subprocess.check_call(cmd, stdout=devnull, stderr=devnull)
except:
except Exception:
return False
finally:
devnull.close()
return True


download_file_wget.viable = has_wget


def download_file_insecure(url, target):
"""
Use Python to download the file, even though it cannot authenticate the
Expand All @@ -251,8 +268,10 @@ def download_file_insecure(url, target):
if dst:
dst.close()


download_file_insecure.viable = lambda: True


def get_best_downloader():
downloaders = [
download_file_powershell,
Expand All @@ -265,6 +284,7 @@ def get_best_downloader():
if dl.viable():
return dl


def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
to_dir=os.curdir, delay=15,
downloader_factory=get_best_downloader):
Expand Down Expand Up @@ -317,7 +337,7 @@ def _extractall(self, path=".", members=None):
# Reverse sort directories.
if sys.version_info < (2, 4):
def sorter(dir1, dir2):
return cmp(dir1.name, dir2.name)
return cmp(dir1.name, dir2.name) # noqa:F821
directories.sort(sorter)
directories.reverse()
else:
Expand Down Expand Up @@ -350,6 +370,7 @@ def _build_install_args(options):
install_args.append('--user')
return install_args


def _parse_args():
"""
Parse the command line for options
Expand All @@ -371,12 +392,14 @@ def _parse_args():
# positional arguments are ignored
return options


def main(version=DEFAULT_VERSION):
"""Install or upgrade setuptools and EasyInstall"""
options = _parse_args()
tarball = download_setuptools(download_base=options.download_base,
downloader_factory=options.downloader_factory)
return _install(tarball, _build_install_args(options))


if __name__ == '__main__':
sys.exit(main())
7 changes: 5 additions & 2 deletions gensim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
similarities within a corpus of documents.
"""

from gensim import parsing, matutils, interfaces, corpora, models, similarities, summarization
from gensim import parsing, matutils, interfaces, corpora, models, similarities, summarization # noqa:F401
import logging

__version__ = '2.3.0'


class NullHandler(logging.Handler):
"""For python versions <= 2.6; same as `logging.NullHandler` in 2.7."""

def emit(self, record):
pass


logger = logging.getLogger('gensim')
if len(logger.handlers) == 0: # To ensure reload() doesn't add another one
if len(logger.handlers) == 0: # To ensure reload() doesn't add another one
logger.addHandler(NullHandler())
22 changes: 11 additions & 11 deletions gensim/corpora/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
"""

# bring corpus classes directly into package namespace, to save some typing
from .indexedcorpus import IndexedCorpus # must appear before the other classes
from .indexedcorpus import IndexedCorpus # noqa:F401 must appear before the other classes

from .mmcorpus import MmCorpus
from .bleicorpus import BleiCorpus
from .svmlightcorpus import SvmLightCorpus
from .lowcorpus import LowCorpus
from .dictionary import Dictionary
from .hashdictionary import HashDictionary
from .wikicorpus import WikiCorpus
from .textcorpus import TextCorpus
from .ucicorpus import UciCorpus
from .malletcorpus import MalletCorpus
from .mmcorpus import MmCorpus # noqa:F401
Copy link
Owner

@piskvorky piskvorky Aug 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-1: we don't want to litter the code with some tool-specific constructs.

Instead, open an issue with the tool that requires this, it's a bug on their side if they cannot handle this line.

If you find such comments in other parts of gensim, please remove them too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@piskvorky it's not a tool bug, it's really "unused import" by PEP

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it is used -- it makes the classes available directly from the package. Without it, you won't be able to from corpora import MmCorpus.

Or how can this be done in other way, that doesn't trigger F401?

Copy link
Contributor

@menshikh-iv menshikh-iv Aug 28, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it makes the classes available directly from the package

You are right, but now in the code, it's unused import.

I know another way - use this imports (like your example) in gensim internally, but I don't think that this is a good idea. For my opinion, noqa suppress is OK.

from .bleicorpus import BleiCorpus # noqa:F401
from .svmlightcorpus import SvmLightCorpus # noqa:F401
from .lowcorpus import LowCorpus # noqa:F401
from .dictionary import Dictionary # noqa:F401
from .hashdictionary import HashDictionary # noqa:F401
from .wikicorpus import WikiCorpus # noqa:F401
from .textcorpus import TextCorpus # noqa:F401
from .ucicorpus import UciCorpus # noqa:F401
from .malletcorpus import MalletCorpus # noqa:F401
2 changes: 1 addition & 1 deletion gensim/corpora/bleicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from os import path
import logging

from gensim import interfaces, utils
from gensim import utils
from gensim.corpora import IndexedCorpus
from six.moves import xrange

Expand Down
4 changes: 2 additions & 2 deletions gensim/corpora/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,14 @@ def merge_with(self, other):
old2new[other_id] = new_id
try:
self.dfs[new_id] += other.dfs[other_id]
except:
except Exception:
# `other` isn't a Dictionary (probably just a dict) => ignore dfs, keep going
pass
try:
self.num_docs += other.num_docs
self.num_nnz += other.num_nnz
self.num_pos += other.num_pos
except:
except Exception:
pass

import gensim.models
Expand Down
2 changes: 1 addition & 1 deletion gensim/corpora/hashdictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def add_documents(self, documents):
for docno, document in enumerate(documents):
if docno % 10000 == 0:
logger.info("adding document #%i to %s" % (docno, self))
_ = self.doc2bow(document, allow_update=True) # ignore the result, here we only care about updating token ids
self.doc2bow(document, allow_update=True) # ignore the result, here we only care about updating token ids
logger.info(
"built %s from %i documents (total %i corpus positions)",
self, self.num_docs, self.num_pos)
Expand Down
3 changes: 1 addition & 2 deletions gensim/corpora/indexedcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(self, fname, index_fname=None):
# change self.index into a numpy.ndarray to support fancy indexing
self.index = numpy.asarray(self.index)
logger.info("loaded corpus index from %s" % index_fname)
except:
except Exception:
self.index = None
self.length = None

Expand Down Expand Up @@ -130,5 +130,4 @@ def __getitem__(self, docno):
raise ValueError('Unrecognised value for docno, use either a single integer, a slice or a numpy.ndarray')



# endclass IndexedCorpus
14 changes: 7 additions & 7 deletions gensim/corpora/lowcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,24 +65,24 @@ def __init__(self, fname, id2word=None, line2words=split_on_space):
IndexedCorpus.__init__(self, fname)
logger.info("loading corpus from %s" % fname)

self.fname = fname # input file, see class doc for format
self.line2words = line2words # how to translate lines into words (simply split on space by default)
self.fname = fname # input file, see class doc for format
self.line2words = line2words # how to translate lines into words (simply split on space by default)
self.num_docs = self._calculate_num_docs()

if not id2word:
# build a list of all word types in the corpus (distinct words)
logger.info("extracting vocabulary from the corpus")
all_terms = set()
self.use_wordids = False # return documents as (word, wordCount) 2-tuples
self.use_wordids = False # return documents as (word, wordCount) 2-tuples
for doc in self:
all_terms.update(word for word, wordCnt in doc)
all_terms = sorted(all_terms) # sort the list of all words; rank in that list = word's integer id
self.id2word = dict(izip(xrange(len(all_terms)), all_terms)) # build a mapping of word id(int) -> word (string)
all_terms = sorted(all_terms) # sort the list of all words; rank in that list = word's integer id
self.id2word = dict(izip(xrange(len(all_terms)), all_terms)) # build a mapping of word id(int) -> word (string)
else:
logger.info("using provided word mapping (%i ids)" % len(id2word))
self.id2word = id2word
self.num_terms = len(self.word2id)
self.use_wordids = True # return documents as (wordIndex, wordCount) 2-tuples
self.use_wordids = True # return documents as (wordIndex, wordCount) 2-tuples

logger.info("loaded corpus with %i documents and %i terms from %s" %
(self.num_docs, self.num_terms, fname))
Expand Down Expand Up @@ -135,7 +135,7 @@ def __iter__(self):
"""
with utils.smart_open(self.fname) as fin:
for lineno, line in enumerate(fin):
if lineno > 0: # ignore the first line = number of documents
if lineno > 0: # ignore the first line = number of documents
yield self.line2doc(line)

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions gensim/corpora/malletcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def __iter__(self):
yield self.line2doc(line)

def line2doc(self, line):
l = [word for word in utils.to_unicode(line).strip().split(' ') if word]
docid, doclang, words = l[0], l[1], l[2:]
splited_line = [word for word in utils.to_unicode(line).strip().split(' ') if word]
docid, doclang, words = splited_line[0], splited_line[1], splited_line[2:]

doc = super(MalletCorpus, self).line2doc(' '.join(words))

Expand Down
3 changes: 2 additions & 1 deletion gensim/corpora/mmcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import logging

from gensim import interfaces, matutils
from gensim import matutils
from gensim.corpora import IndexedCorpus


Expand All @@ -23,6 +23,7 @@ class MmCorpus(matutils.MmReader, IndexedCorpus):
"""
Corpus in the Matrix Market format.
"""

def __init__(self, fname):
# avoid calling super(), too confusing
IndexedCorpus.__init__(self, fname)
Expand Down
Loading