Skip to content

Commit

Permalink
Remove indexing from ubuntu corpus trainer
Browse files Browse the repository at this point in the history
This seems to be failing on some systems.
  • Loading branch information
gunthercox committed Apr 6, 2019
1 parent a9d7cb3 commit 8e651cd
Showing 1 changed file with 2 additions and 8 deletions.
10 changes: 2 additions & 8 deletions chatterbot/trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from multiprocessing import Pool, Manager
from dateutil import parser as date_parser
from chatterbot.conversation import Statement
from chatterbot.tagging import PosLemmaTagger
from chatterbot import utils


Expand Down Expand Up @@ -176,7 +175,7 @@ def train(self, *corpus_paths):
self.chatbot.storage.create_many(statements_to_create)


def read_file(files, queue, preprocessors, tagger):
def read_file(files, queue, preprocessors):

statements_from_file = []

Expand All @@ -200,11 +199,9 @@ def read_file(files, queue, preprocessors, tagger):
for preprocessor in preprocessors:
statement = preprocessor(statement)

statement.search_text = tagger.get_bigram_pair_string(statement.text)
statement.search_in_response_to = previous_statement_search_text

previous_statement_text = statement.text
previous_statement_search_text = statement.search_text

statements_from_file.append(statement)

Expand Down Expand Up @@ -325,8 +322,6 @@ def track_progress(members):
def train(self):
import glob

tagger = PosLemmaTagger(language=self.chatbot.storage.tagger.language)

# Download and extract the Ubuntu dialog corpus if needed
corpus_download_path = self.download(self.data_download_url)

Expand Down Expand Up @@ -355,8 +350,7 @@ def chunks(items, items_per_chunk):
(
file_names,
queue,
self.chatbot.preprocessors,
tagger,
self.chatbot.preprocessors
) for file_names in file_groups
)

Expand Down

0 comments on commit 8e651cd

Please sign in to comment.