Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
gavishpoddar authored Jul 26, 2021
1 parent ad50964 commit 2412f66
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions dateparser/languages/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def _generate_relative_translations(self, normalize=False):

def translate_search(self, search_string, settings=None):
dashes = ['-', '——', '—', '~']
word_joint_unsupported_laguage = ["zh", "ja"]
sentences = self._sentence_split(search_string, settings=settings)
dictionary = self._get_dictionary(settings=settings)
translated = []
Expand All @@ -184,10 +185,28 @@ def translate_search(self, search_string, settings=None):
original_tokens, simplified_tokens = self._simplify_split_align(sentence, settings=settings)
translated_chunk = []
original_chunk = []
simplified_tokens_length = len(simplified_tokens)
skip_next_token = False
for i, word in enumerate(simplified_tokens):
next_word = simplified_tokens[i + 1] if (simplified_tokens_length - 1) > i else ""
current_and_next_joined = self._join_chunk([word, next_word], settings=settings)
if skip_next_token:
skip_next_token = False
continue

if word == '' or word == ' ':
translated_chunk.append(word)
original_chunk.append(original_tokens[i])
elif (
current_and_next_joined in dictionary
and word not in dashes
and self.shortname not in word_joint_unsupported_laguage
):
translated_chunk.append(dictionary[current_and_next_joined])
original_chunk.append(
self._join_chunk([original_tokens[i], original_tokens[i + 1]], settings=settings)
)
skip_next_token = True
elif word in dictionary and word not in dashes:
translated_chunk.append(dictionary[word])
original_chunk.append(original_tokens[i])
Expand Down

0 comments on commit 2412f66

Please sign in to comment.