From c8f2f52f507dfc4766141157243bd42e7a1e7abb Mon Sep 17 00:00:00 2001 From: wd Date: Fri, 15 Nov 2019 15:03:39 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=9C=89=E7=9A=84=E8=AF=8D?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E4=B8=8D=E5=88=B0=20defination=20=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98=20(#75)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 修复有的词获取不到 defination 的问题 * fix lost spaces in sentences --- addon/queryApi/eudict.py | 26 ++++++++++++++++++++++---- test/test_queryapi.py | 15 ++++++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/addon/queryApi/eudict.py b/addon/queryApi/eudict.py index 8c913ed..c7889bf 100644 --- a/addon/queryApi/eudict.py +++ b/addon/queryApi/eudict.py @@ -16,11 +16,29 @@ def __init__(self, html, term): @property def definition(self) -> list: - els = self._soap.select('div #ExpFCChild li') # 多词性 - els = self._soap.select('div #ExpFCChild .exp') if not els else els # 单一词性 ret = [] + div = self._soap.select('div #ExpFCChild') + if not div: + return ret + + div = div[0] + els = div.select('li') # 多词性 + if not els: # 单一词性 + els = div.select('.exp') + if not els: # 还有一奇怪的情况,不在任何的标签里面 + trans = div.find(id='trans') + trans.replace_with('') if trans else '' + + script = div.find('script') + script.replace_with('') if script else '' + + for atag in div.find_all('a'): # 赞踩这些字样 + atag.replace_with('') + els = [div] + for el in els: ret.append(el.get_text(strip=True)) + return ret @property @@ -88,7 +106,7 @@ def sentence(self) -> list: for el in els: try: line = el.select('p') - sentence = line[0].get_text(strip=True) + sentence = "".join([ str(c) for c in line[0].contents]) sentence_translation = line[1].get_text(strip=True) ret.append((sentence, sentence_translation)) except KeyError as e: @@ -152,7 +170,7 @@ def query(cls, word) -> dict: queryResult = None try: rsp = cls.session.get(cls.url.format(word), timeout=cls.timeout) - logger.debug(f'code:{rsp.status_code}- word:{word} text:{rsp.text}') + logger.debug(f'code:{rsp.status_code}- word:{word} text:{rsp.text[:100]}') queryResult = cls.parser(rsp.text, word).result except Exception as e: logger.exception(e) diff --git a/test/test_queryapi.py b/test/test_queryapi.py index 6b06883..e6d9d00 100644 --- a/test/test_queryapi.py +++ b/test/test_queryapi.py @@ -25,7 +25,20 @@ def test_eudict_with_none(): ret = check_result(res) assert set(ret) - set(keys) == set() -def test_eudict_implication_all(): +def test_eudict_implication(): + # 不包含图片,定义不在正常规则内,包含 trans res = api.query('implication') ret = check_result(res) assert set(ret) - set(['image']) == set() + +def test_eudict_epitomize(): + # 不包含图片,定义不在正常规则内 + res = api.query('epitomize') + ret = check_result(res) + assert set(ret) - set(['image', 'phrase']) == set() + +def test_eudict_periodical(): + # 包含图片,定义不在正常规则内 + res = api.query('periodical') + ret = check_result(res) + assert set(ret) - set(['image', 'phrase']) == set()