From b5b49a55d00ad8077612fba027b15e3afbbccd31 Mon Sep 17 00:00:00 2001 From: wd Date: Wed, 13 Nov 2019 14:41:43 +0800 Subject: [PATCH 1/3] Add eudict query api --- addon/queryApi/__init__.py | 4 +- addon/queryApi/eudict.py | 162 +++++++++++++++++++++++++++++++++++++ test/test_queryapi.py | 26 ++++++ 3 files changed, 190 insertions(+), 2 deletions(-) create mode 100644 addon/queryApi/eudict.py create mode 100644 test/test_queryapi.py diff --git a/addon/queryApi/__init__.py b/addon/queryApi/__init__.py index 1e7202b..8f441a8 100644 --- a/addon/queryApi/__init__.py +++ b/addon/queryApi/__init__.py @@ -1,3 +1,3 @@ -from . import youdao, bing +from . import youdao, bing, eudict -apis = [youdao.API, bing.API] +apis = [youdao.API, bing.API, eudict.API] diff --git a/addon/queryApi/eudict.py b/addon/queryApi/eudict.py new file mode 100644 index 0000000..04b40d7 --- /dev/null +++ b/addon/queryApi/eudict.py @@ -0,0 +1,162 @@ +import logging +import requests +from urllib3 import Retry +from urllib.parse import urlencode +from requests.adapters import HTTPAdapter +from ..misc import AbstractQueryAPI +from bs4 import BeautifulSoup +from bs4.element import Comment +logger = logging.getLogger('dict2Anki.queryApi.youdao') +__all__ = ['API'] + + +class Parser: + def __init__(self, html, term): + self._soap= BeautifulSoup(html, 'html.parser') + self.term = term + + @property + def definition(self) -> list: + els = self._soap.select('div #ExpFCChild li') + ret = [] + for el in els: + ret.append(el.get_text(strip=True)) + return ret + + @property + def pronunciations(self) -> dict: + url = 'https://api.frdic.com/api/v2/speech/speakweb?' + pron = { + 'AmEPhonetic': None, + 'AmEUrl': None, + 'BrEPhonetic': None, + 'BrEUrl': None + } + + els = self._soap.select('.phonitic-line') + if els: + el = els[0] + links = el.select('a') + phons = el.select('.Phonitic') + + try: + pron['BrEPhonetic'] = phons[0].get_text(strip=True) + except KeyError: + pass + + try: + pron['BrEUrl'] = url + links[0]['data-rel'] + except (TypeError, KeyError): + pass + + + try: + pron['AmEPhonetic'] = phons[1].get_text(strip=True) + except KeyError: + pass + + try: + pron['AmEUrl'] = url + links[0]['data-rel'] + except (TypeError, KeyError): + pass + + return pron + + @property + def BrEPhonetic(self)->str: + """英式音标""" + return self.pronunciations['BrEPhonetic'] + + @property + def AmEPhonetic(self)->str: + """美式音标""" + return self.pronunciations['AmEPhonetic'] + + @property + def BrEPron(self)->str: + """英式发音url""" + return self.pronunciations['BrEUrl'] + + @property + def AmEPron(self)->str: + """美式发音url""" + return self.pronunciations['AmEUrl'] + + @property + def sentence(self) -> list: + els = self._soap.select('div #ExpLJChild .lj_item') + ret = [] + for el in els: + try: + line = el.select('p') + sentence = line[0].get_text(strip=True) + sentence_translation = line[1].get_text(strip=True) + ret.append((sentence, sentence_translation)) + except KeyError as e: + pass + return ret + + @property + def image(self)->str: + els = self._soap.select('div .word-thumbnail-container img') + ret = None + if els: + try: + img = els[0] + if 'title' not in img.attrs: + ret = img['src'] + except KeyError: + pass + return ret + + @property + def phrase(self) -> list: + els = self._soap.select('div #ExpSPECChild #phrase') + ret = [] + for el in els: + try: + phrase = el.find('i').get_text(strip=True) + exp = el.find('div').get_text(strip=True) + ret.append((phrase, exp)) + except AttributeError: + pass + return ret + + @property + def result(self): + return { + 'term': self.term, + 'definition': self.definition, + 'phrase': self.phrase, + 'image': self.image, + 'sentence': self.sentence, + 'BrEPhonetic': self.BrEPhonetic, + 'AmEPhonetic': self.AmEPhonetic, + 'BrEPron': self.BrEPron, + 'AmEPron': self.AmEPron + } + + +class API(AbstractQueryAPI): + name = '欧陆词典 API' + timeout = 10 + headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'} + retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504]) + session = requests.Session() + session.mount('http://', HTTPAdapter(max_retries=retries)) + session.mount('https://', HTTPAdapter(max_retries=retries)) + url = 'https://dict.eudic.net/dicts/en/{}' + parser = Parser + + @classmethod + def query(cls, word) -> dict: + queryResult = None + try: + rsp = cls.session.get(cls.url.format(word), timeout=cls.timeout) + logger.debug(f'code:{rsp.status_code}- word:{word} text:{rsp.text}') + queryResult = cls.parser(rsp.text, word).result + except Exception as e: + logger.exception(e) + finally: + logger.debug(queryResult) + return queryResult diff --git a/test/test_queryapi.py b/test/test_queryapi.py new file mode 100644 index 0000000..0cee6d2 --- /dev/null +++ b/test/test_queryapi.py @@ -0,0 +1,26 @@ +from addon.queryApi.eudict import API + +api = API() + +keys = ('term', 'definition', 'phrase', 'image', 'sentence', 'BrEPhonetic', 'AmEPhonetic', 'BrEPron', 'AmEPron') +def check_result(res): + ret = [] + for key in keys: + if not res.get(key): + ret.append(key) + return ret + +def test_eudict_no_phrase_and_image(): + res = api.query('stint') + ret = check_result(res) + assert set(ret) - set(['image', 'phrase']) == set() + +def test_eudict_with_all(): + res = api.query('flower') + ret = check_result(res) + assert set(ret) == set() + +def test_eudict_with_none(): + res = api.query('asafesdf') + ret = check_result(res) + assert set(ret) - set(keys) == set() From b51945843e0fd9b92236a8867a8926c5427c57ba Mon Sep 17 00:00:00 2001 From: wd Date: Wed, 13 Nov 2019 15:11:54 +0800 Subject: [PATCH 2/3] fix query definition failed for implication --- addon/queryApi/eudict.py | 3 ++- test/test_queryapi.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/addon/queryApi/eudict.py b/addon/queryApi/eudict.py index 04b40d7..530ff6b 100644 --- a/addon/queryApi/eudict.py +++ b/addon/queryApi/eudict.py @@ -17,7 +17,8 @@ def __init__(self, html, term): @property def definition(self) -> list: - els = self._soap.select('div #ExpFCChild li') + els = self._soap.select('div #ExpFCChild li') # 多词性 + els = self._soap.select('div #ExpFCChild .exp') if not els else els # 单一词性 ret = [] for el in els: ret.append(el.get_text(strip=True)) diff --git a/test/test_queryapi.py b/test/test_queryapi.py index 0cee6d2..6b06883 100644 --- a/test/test_queryapi.py +++ b/test/test_queryapi.py @@ -24,3 +24,8 @@ def test_eudict_with_none(): res = api.query('asafesdf') ret = check_result(res) assert set(ret) - set(keys) == set() + +def test_eudict_implication_all(): + res = api.query('implication') + ret = check_result(res) + assert set(ret) - set(['image']) == set() From dc8abf9e67ecb063adbb714e8655c0f3b39f9065 Mon Sep 17 00:00:00 2001 From: megachweng Date: Wed, 13 Nov 2019 15:51:32 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=AC=A7=E9=99=86?= =?UTF-8?q?=E8=AF=8D=E5=85=B8=E6=9F=A5=E8=AF=A2API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- addon/constants.py | 2 +- addon/queryApi/eudict.py | 20 +++++++++----------- anki_addon_page.tpl | 5 +++++ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/addon/constants.py b/addon/constants.py index d307132..703ebc4 100644 --- a/addon/constants.py +++ b/addon/constants.py @@ -1,4 +1,4 @@ -VERSION = 'v6.1.0' +VERSION = 'v6.1.1' RELEASE_URL = 'https://github.com/megachweng/Dict2Anki' VERSION_CHECK_API = 'https://api.github.com/repos/megachweng/Dict2Anki/releases/latest' MODEL_NAME = f'Dict2Anki-{VERSION}' diff --git a/addon/queryApi/eudict.py b/addon/queryApi/eudict.py index 530ff6b..8c913ed 100644 --- a/addon/queryApi/eudict.py +++ b/addon/queryApi/eudict.py @@ -1,24 +1,23 @@ import logging import requests from urllib3 import Retry -from urllib.parse import urlencode from requests.adapters import HTTPAdapter from ..misc import AbstractQueryAPI from bs4 import BeautifulSoup -from bs4.element import Comment + logger = logging.getLogger('dict2Anki.queryApi.youdao') __all__ = ['API'] class Parser: def __init__(self, html, term): - self._soap= BeautifulSoup(html, 'html.parser') + self._soap = BeautifulSoup(html, 'html.parser') self.term = term @property def definition(self) -> list: - els = self._soap.select('div #ExpFCChild li') # 多词性 - els = self._soap.select('div #ExpFCChild .exp') if not els else els # 单一词性 + els = self._soap.select('div #ExpFCChild li') # 多词性 + els = self._soap.select('div #ExpFCChild .exp') if not els else els # 单一词性 ret = [] for el in els: ret.append(el.get_text(strip=True)) @@ -50,7 +49,6 @@ def pronunciations(self) -> dict: except (TypeError, KeyError): pass - try: pron['AmEPhonetic'] = phons[1].get_text(strip=True) except KeyError: @@ -64,22 +62,22 @@ def pronunciations(self) -> dict: return pron @property - def BrEPhonetic(self)->str: + def BrEPhonetic(self) -> str: """英式音标""" return self.pronunciations['BrEPhonetic'] @property - def AmEPhonetic(self)->str: + def AmEPhonetic(self) -> str: """美式音标""" return self.pronunciations['AmEPhonetic'] @property - def BrEPron(self)->str: + def BrEPron(self) -> str: """英式发音url""" return self.pronunciations['BrEUrl'] @property - def AmEPron(self)->str: + def AmEPron(self) -> str: """美式发音url""" return self.pronunciations['AmEUrl'] @@ -98,7 +96,7 @@ def sentence(self) -> list: return ret @property - def image(self)->str: + def image(self) -> str: els = self._soap.select('div .word-thumbnail-container img') ret = None if els: diff --git a/anki_addon_page.tpl b/anki_addon_page.tpl index 0c57503..17b8f0d 100644 --- a/anki_addon_page.tpl +++ b/anki_addon_page.tpl @@ -2,6 +2,11 @@ Dict2Anki 是一款方便有道词典欧陆词典用户同步生成单词本卡片至Anki的插件 Change Logs: +v6.1.1: + 添加欧陆词典查询API THX to wd +v6.1.0: + 支持第三方登陆 + 加入模版字段检查 v6.0.2: 添加英英注释 THX to deluser8 v6.0.1: