diff --git a/.gitignore b/.gitignore index 7fc43c278..1bc5b4a87 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ venv *.pyc *.swp *.egg-info +*.egg/* # Database files *.sqlite3 diff --git a/chatterbot/utils/stop_words.py b/chatterbot/utils/stop_words.py index 0aff12c49..cf15481ad 100644 --- a/chatterbot/utils/stop_words.py +++ b/chatterbot/utils/stop_words.py @@ -11,10 +11,19 @@ class StopWordsManager(object): def __init__(self): from nltk.data import find from nltk import download + import os # Download the stopwords data only if it is not already downloaded + stopwords_path = None + if os.name == 'nt': + stopwords_path = os.path.join(os.getenv('APPDATA'), 'nltk_data', + 'corpora', 'stopwords.zip') + else: + stopwords_path = os.path.join(os.path.expanduser('~'), 'nltk_data', + 'corpora', 'stopwords.zip') try: - find('stopwords.zip') + if not os.path.isfile(stopwords_path): + find('stopwords.zip') except LookupError: download('stopwords') diff --git a/chatterbot/utils/tokenizer.py b/chatterbot/utils/tokenizer.py index 525fb3c6c..236e4528f 100644 --- a/chatterbot/utils/tokenizer.py +++ b/chatterbot/utils/tokenizer.py @@ -6,10 +6,19 @@ class Tokenizer(object): def __init__(self): from nltk.data import find from nltk import download + import os # Download the punkt data only if it is not already downloaded + punkt_path = None + if os.name == 'nt': + punkt_path = os.path.join(os.getenv('APPDATA'), 'nltk_data', + 'tokenizers', 'punkt.zip') + else: + punkt_path = os.path.join(os.path.expanduser('~'), 'nltk_data', + 'tokenizers', 'punkt.zip') try: - find('punkt.zip') + if not os.path.isfile(punkt_path): + find('punkt.zip') except LookupError: download('punkt') diff --git a/chatterbot/utils/wordnet.py b/chatterbot/utils/wordnet.py index 10a42e0d5..96fb84d17 100644 --- a/chatterbot/utils/wordnet.py +++ b/chatterbot/utils/wordnet.py @@ -11,9 +11,19 @@ class Wordnet(object): def __init__(self): from nltk.data import find from nltk import download - + import os + + # Download the wordnet data only if it is not already downloaded + wordnet_path = None + if os.name == 'nt': + wordnet_path = os.path.join(os.getenv('APPDATA'), 'nltk_data', + 'corpora', 'wordnet.zip') + else: + wordnet_path = os.path.join(os.path.expanduser('~'), 'nltk_data', + 'corpora', 'wordnet.zip') try: - find('wordnet.zip') + if not os.path.isfile(wordnet_path): + find('wordnet.zip') except LookupError: download('wordnet')