-
Notifications
You must be signed in to change notification settings - Fork 6
/
translate.py
137 lines (112 loc) · 4.21 KB
/
translate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# -*- coding: utf-8 -*-
"""
Translator module that uses the Google Translate API.
Adapted from Terry Yin's google-translate-python.
Language detection added by Steven Loria.
"""
from __future__ import absolute_import
import sys
PY2 = int(sys.version[0]) == 2
import codecs
import ctypes
import json
import re
if PY2:
import urllib2 as request
from urllib import urlencode
else: # PY3
from urllib import request
from urllib.parse import urlencode
class Translator(object):
"""A language translator and detector.
Usage:
::
>>> from translate import Translator
>>> t = Translator()
>>> t.translate('hello', from_lang='en', to_lang='fr')
u'bonjour'
>>> t.detect("hola")
u'es'
"""
url = "https://translate.google.se/translate_a/t?client=webapp&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&dt=at&ie=UTF-8&oe=UTF-8&otf=2&ssel=0&tsel=0&kc=1"
headers = {
'Accept': '*/*',
'Connection': 'keep-alive',
'User-Agent': (
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
}
def translate(self, source, from_lang='auto', to_lang='en', host=None, type_=None):
"""Translate the source text from one language to another."""
if PY2:
source = source.encode('utf-8')
data = {"q": source}
url = self.url + "&sl=" + from_lang + "&tl=" + to_lang + "&hl=" + to_lang + "&tk=" + _calculate_tk(source)
response = self._request(url, host=host, type_=type_, data=data)
result = json.loads(response)
if isinstance(result, list):
try:
result = result[0] # ignore detected language
except IndexError:
pass
self._validate_translation(source, result)
return result
def detect(self, source, host=None, type_=None):
"""Detect the source text's language."""
if PY2:
source = source.encode('utf-8')
if len(source) < 3:
raise Exception('Must provide a string with at least 3 characters.')
data = {"q": source}
url = self.url + "&sl=auto&tk=" + _calculate_tk(source)
response = self._request(url, host=host, type_=type_, data=data)
result, language = json.loads(response)
return language
def _validate_translation(self, source, result):
"""Validate API returned expected schema, and that the translated text
is different than the original string.
"""
if not result:
raise Exception('Translation API returned and empty response.')
if PY2:
result = result.encode('utf-8')
def _request(self, url, host=None, type_=None, data=None):
encoded_data = urlencode(data).encode('utf-8')
req = request.Request(url=url, headers=self.headers, data=encoded_data)
if host or type_:
req.set_proxy(host=host, type=type_)
resp = request.urlopen(req)
content = resp.read()
return content.decode('utf-8')
def _unescape(text):
"""Unescape unicode character codes within a string.
"""
pattern = r'\\{1,2}u[0-9a-fA-F]{4}'
decode = lambda x: codecs.getdecoder('unicode_escape')(x.group())[0]
return re.sub(pattern, decode, text)
def _calculate_tk(source):
"""Reverse engineered cross-site request protection."""
# Source: https://github.com/soimort/translate-shell/issues/94#issuecomment-165433715
# Source: http://www.liuxiatool.com/t.php
tkk = [406398, 561666268 + 1526272306]
b = tkk[0]
if PY2:
d = map(ord, source)
else:
d = source.encode('utf-8')
def RL(a, b):
for c in range(0, len(b) - 2, 3):
d = b[c + 2]
d = ord(d) - 87 if d >= 'a' else int(d)
xa = ctypes.c_uint32(a).value
d = xa >> d if b[c + 1] == '+' else xa << d
a = a + d & 4294967295 if b[c] == '+' else a ^ d
return ctypes.c_int32(a).value
a = b
for di in d:
a = RL(a + di, "+-a^+6")
a = RL(a, "+-3^+b+-f")
a ^= tkk[1]
a = a if a >= 0 else ((a & 2147483647) + 2147483648)
a %= pow(10, 6)
tk = '{0:d}.{1:d}'.format(a, a ^ b)
return tk