From af73c1ec738229d1bc3f1b0486d9b5196d237d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20H=C3=B6rist?= Date: Mon, 29 Apr 2024 18:43:34 +0200 Subject: [PATCH 1/2] Tests: Add type hints --- pyproject.toml | 3 -- tests/test_analyze.py | 30 +++++++------- tests/test_core.py | 78 ++++++++++++++++++++++--------------- tests/test_dict.py | 6 +-- tests/test_nfkc.py | 7 +++- tests/test_unicode_codes.py | 14 +++---- tests/test_versions.py | 17 ++++---- tests/test_zwj_keep.py | 5 ++- tests/test_zwj_remove.py | 5 ++- 9 files changed, 89 insertions(+), 76 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1256ba90..24d65046 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,8 +67,5 @@ exclude = [ ".git", ".venv", "build", - "docs", - "example", - "tests", "utils", ] diff --git a/tests/test_analyze.py b/tests/test_analyze.py index d892d55f..724dadc3 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -15,44 +15,57 @@ def test_analyze(): result = list(emoji.analyze('abc\U0001F472')) assert len(result) == 1 + assert not isinstance(result[0].value, str) assert result[0].value.emoji == '\U0001F472' result = list(emoji.analyze('abc\U0001F472', non_emoji=True)) assert result[0].value == 'a' + assert not isinstance(result[3].value, str) assert result[3].value.emoji == '\U0001F472' result = list(emoji.analyze('\U0001F477\U0001F3FB\U0000200D\U00002640')) assert len(result) == 1 + assert not isinstance(result[0].value, str) assert result[0].value.emoji == '\U0001F477\U0001F3FB\U0000200D\U00002640' result = list(emoji.analyze('\U0001F477\U0001F3FC\U0001F477\U0001F3FB\U0000200D\U00002640')) assert len(result) == 2 + assert not isinstance(result[0].value, str) assert result[0].value.emoji == '\U0001F477\U0001F3FC' + assert not isinstance(result[1].value, str) assert result[1].value.emoji == '\U0001F477\U0001F3FB\U0000200D\U00002640' def test_analyze_non_rgi_zwj(): result = list(emoji.analyze('\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FD')) assert len(result) == 1 + assert not isinstance(result[0].value, str) assert result[0].value.emoji == '\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FD' result = list(emoji.analyze('\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FD', join_emoji=False)) assert len(result) == 3 + assert not isinstance(result[0].value, str) assert result[0].value.emoji == '\U0001F468\U0001F3FF' + assert not isinstance(result[1].value, str) assert result[1].value.emoji == '\U0001F469\U0001F3FB' + assert not isinstance(result[2].value, str) assert result[2].value.emoji == '\U0001F467\U0001F3FD' result = list(emoji.analyze('\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FDx', join_emoji=False, non_emoji=True)) assert len(result) == 6 + assert not isinstance(result[0].value, str) assert result[0].value.emoji == '\U0001F468\U0001F3FF' assert result[1].value == '\U0000200D' + assert not isinstance(result[2].value, str) assert result[2].value.emoji == '\U0001F469\U0001F3FB' assert result[3].value == '\U0000200D' + assert not isinstance(result[4].value, str) assert result[4].value.emoji == '\U0001F467\U0001F3FD' assert result[5].value == 'x' result = list(emoji.analyze('\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FDx', join_emoji=True, non_emoji=True)) assert len(result) == 2 + assert not isinstance(result[0].value, str) assert result[0].value.emoji == '\U0001F468\U0001F3FF\U0000200D\U0001F469\U0001F3FB\U0000200D\U0001F467\U0001F3FD' assert result[1].value == 'x' @@ -72,23 +85,6 @@ def test_analyze_non_rgi_zwj(): assert isinstance(result[3].value, emoji.EmojiMatch) -def test_emoji_match(): - s = 'a\U0001F309b' - token = next(emoji.analyze(s)) - assert isinstance(token, emoji.Token) - - assert token.chars == s[1:-1] - - match = token.value - - assert isinstance(match, emoji.EmojiMatch) - assert match.emoji == s[1:-1] - assert match.start == 1 - assert match.end == 2 - assert match.is_zwj() == False - assert str(match).startswith('EmojiMatch(') - - def test_emoji_match(): s = 'a\U0001F309b' token = next(emoji.analyze(s)) diff --git a/tests/test_core.py b/tests/test_core.py index 2845f2ee..76e58d71 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -2,17 +2,21 @@ import random import re -import emoji +from typing import Any, Callable, Dict, List, Tuple, Union +from typing_extensions import Literal +import emoji.unicode_codes import pytest import unicodedata +_NormalizationForm = Literal['NFC', 'NFD', 'NFKC', 'NFKD'] + # Build all language packs (i.e. fill the cache): emoji.emojize("", language="alias") for lang_code in emoji.LANGUAGES: emoji.emojize("", language=lang_code) -def ascii(s): +def ascii(s: str) -> str: # return escaped Code points \U000AB123 return s.encode("unicode-escape").decode() @@ -24,13 +28,13 @@ def all_language_and_alias_packs(): yield (lang_code, emoji.unicode_codes.get_emoji_unicode_dict(lang_code)) -def normalize(form, s): +def normalize(form: _NormalizationForm, s: str) -> str: return unicodedata.normalize(form, s) def test_emojize_name_only(): # Check that the regular expression emoji.core._EMOJI_NAME_PATTERN contains all the necesseary characters - from emoji.core import _EMOJI_NAME_PATTERN + from emoji.core import _EMOJI_NAME_PATTERN # pyright: ignore [reportPrivateUsage] pattern = re.compile('[^%s]' % (_EMOJI_NAME_PATTERN, )) @@ -59,7 +63,7 @@ def test_emojize_name_only(): def test_regular_expression_minimal(): # Check that the regular expression emoji.core._EMOJI_NAME_PATTERN only contains the necesseary characters - from emoji.core import _EMOJI_NAME_PATTERN + from emoji.core import _EMOJI_NAME_PATTERN # pyright: ignore [reportPrivateUsage] pattern_str = '[^%s]' % (_EMOJI_NAME_PATTERN, ) i = 2 @@ -70,7 +74,7 @@ def test_regular_expression_minimal(): continue pattern = re.compile(pattern_str.replace(c, '')) failed = False - for lang_code, emoji_pack in all_language_and_alias_packs(): + for _, emoji_pack in all_language_and_alias_packs(): for name_in_db in emoji_pack.keys(): name_in_db = name_in_db[1:-1] names = [ @@ -112,45 +116,46 @@ def test_emojize_complicated_string(): def test_emojize_languages(): - for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): + for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): # pyright: ignore [reportPrivateUsage] for name, emj in emoji_pack.items(): assert emoji.emojize(name, language=lang_code) == emj def test_demojize_languages(): - for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): + for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): # pyright: ignore [reportPrivateUsage] for name, emj in emoji_pack.items(): assert emoji.demojize(emj, language=lang_code) == name def test_emojize_variant(): - def remove_variant(s): return re.sub('[\ufe0e\ufe0f]$', '', s) + def remove_variant(s: str) -> str: + return re.sub('[\ufe0e\ufe0f]$', '', s) assert emoji.emojize( - ':Taurus:', variant=None) == emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:'] + ':Taurus:', variant=None) == emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:'] # pyright: ignore [reportPrivateUsage] assert emoji.emojize(':Taurus:', variant=None) == emoji.emojize(':Taurus:') assert emoji.emojize(':Taurus:', variant='text_type') == remove_variant( - emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']) + '\ufe0e' + emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']) + '\ufe0e' # pyright: ignore [reportPrivateUsage] assert emoji.emojize(':Taurus:', variant='emoji_type') == remove_variant( - emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']) + '\ufe0f' + emoji.unicode_codes._EMOJI_UNICODE['en'][':Taurus:']) + '\ufe0f' # pyright: ignore [reportPrivateUsage] assert emoji.emojize( - ':admission_tickets:', variant=None) == emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:'] + ':admission_tickets:', variant=None) == emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:'] # pyright: ignore [reportPrivateUsage] assert emoji.emojize(':admission_tickets:', variant=None) == emoji.emojize( ':admission_tickets:') assert emoji.emojize(':admission_tickets:', variant='text_type') == remove_variant( - emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']) + '\ufe0e' + emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']) + '\ufe0e' # pyright: ignore [reportPrivateUsage] assert emoji.emojize(':admission_tickets:', variant='emoji_type') == remove_variant( - emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']) + '\ufe0f' + emoji.unicode_codes._EMOJI_UNICODE['en'][':admission_tickets:']) + '\ufe0f' # pyright: ignore [reportPrivateUsage] with pytest.raises(ValueError): - emoji.emojize(':admission_tickets:', variant=False) + emoji.emojize(':admission_tickets:', variant=False) # pyright: ignore [reportArgumentType] with pytest.raises(ValueError): - emoji.emojize(':admission_tickets:', variant=True) + emoji.emojize(':admission_tickets:', variant=True) # pyright: ignore [reportArgumentType] with pytest.raises(ValueError): - emoji.emojize(':admission_tickets:', variant='wrong') + emoji.emojize(':admission_tickets:', variant='wrong') # pyright: ignore [reportArgumentType] assert emoji.emojize(":football:") == ':football:' assert emoji.emojize(":football:", variant="text_type") == ':football:' @@ -198,11 +203,11 @@ def test_emojize_version(): assert emoji.emojize("Biking :man_biking: is in 4.0", version=3.0, handle_version=lambda e, data: '') == "Biking is in 4.0" assert emoji.emojize("Biking :man_biking: is in 4.0", version=3.0, handle_version=lambda e, data: data["fr"]) == "Biking :cycliste_homme: is in 4.0" - def f(emj, data): + def f(emj: str, data: Dict[str, str]) -> str: assert data['E'] == 5 + return '' - assert emoji.emojize(':bowl_with_spoon:', version=- - 1, handle_version=f) == '' + assert emoji.emojize(':bowl_with_spoon:', version=-1, handle_version=f) == '' assert emoji.emojize(':bowl_with_spoon:') == '\U0001F963' assert emoji.emojize(':bowl_with_spoon:', version=4) == '' assert emoji.emojize(':bowl_with_spoon:', version=4.9) == '' @@ -348,7 +353,7 @@ def test_replace_emoji(): assert emoji.replace_emoji('Hello 🇫🇷👌') == 'Hello ' assert emoji.replace_emoji('Hello 🇫🇷👌', 'x') == 'Hello xx' - def replace(emj, data): + def replace(emj: str, data: Dict[str, str]) -> str: assert emj in ["🇫🇷", "👌"] return 'x' assert emoji.replace_emoji('Hello 🇫🇷👌', replace) == 'Hello xx' @@ -373,7 +378,7 @@ def test_long_emoji(): def test_untranslated(): - for emj, item in emoji.EMOJI_DATA.items(): + for item in emoji.EMOJI_DATA.values(): if item['status'] != emoji.STATUS['fully_qualified']: continue if 'es' not in item: @@ -408,9 +413,16 @@ def test_text(): Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. """ - def add_random_emoji(text, lst, select=lambda emj_data: emj_data['en']): + def default_select(emj_data: Dict[str, Any]) -> str: + return emj_data['en'] + + def add_random_emoji( + text: str, + lst: List[Tuple[str, Dict[str, Any]]], + select: Callable[[Dict[str, Any]], Union[str, Literal[False]]] = default_select + ) -> Tuple[str, str, List[str]]: - emoji_list = [] + emoji_list: List[str] = [] text_with_unicode = "" text_with_placeholder = "" for i in range(0, len(text), 10): @@ -439,7 +451,7 @@ def add_random_emoji(text, lst, select=lambda emj_data: emj_data['en']): return text_with_unicode, text_with_placeholder, emoji_list - def clean(s): + def clean(s: str) -> str: return s.replace('\u200d', '').replace('\ufe0f', '') all_emoji_list = list(emoji.EMOJI_DATA.items()) @@ -456,8 +468,10 @@ def clean(s): assert lis['emoji'] == emoji_list[i] # qualified emoji from "es" - selector = lambda emoji_data: emoji_data["es"] if "es" in emoji_data else False - text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, selector) + def select_es(emj_data: Dict[str, Any]) -> Union[str, Literal[False]]: + return emj_data["es"] if "es" in emj_data else False + + text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, select=select_es) assert emoji.demojize(text_with_unicode, language="es") == text_with_placeholder assert emoji.emojize(text_with_placeholder, language="es") == text_with_unicode if not UCS2: @@ -467,8 +481,10 @@ def clean(s): assert lis['emoji'] == emoji_list[i] # qualified emoji from "alias" - selector = lambda emoji_data: emoji_data["alias"][0] if "alias" in emoji_data else False - text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, selector) + def select_alias(emj_data: Dict[str, Any]) -> Union[str, Literal[False]]: + return emj_data["alias"][0] if "alias" in emj_data else False + + text_with_unicode, text_with_placeholder, emoji_list = add_random_emoji(text, qualified_emoji_list, select=select_alias) assert emoji.demojize(text_with_unicode, language="alias") == text_with_placeholder assert emoji.emojize(text_with_placeholder, language="alias") == text_with_unicode if not UCS2: @@ -490,7 +506,7 @@ def clean(s): def test_text_multiple_times(): # Run test_text() multiple times because it relies on a random text - for i in range(100): + for _ in range(100): test_text() diff --git a/tests/test_dict.py b/tests/test_dict.py index 08919c87..38a5840b 100644 --- a/tests/test_dict.py +++ b/tests/test_dict.py @@ -8,7 +8,7 @@ def test_all_languages_list(): """Compare all language keys in EMOJI_DATA with the emoji.LANGUAGES list""" - langs = set() + langs: set[str] = set() for item in emoji.EMOJI_DATA.values(): langs.update(item.keys()) all_languages = {lang for lang in langs if len(lang) == 2 and lang.lower() == lang} @@ -25,10 +25,10 @@ def test_emoji_versions(): assert v >= 0.6 -def check_duplicate_names(lang): +def check_duplicate_names(lang: str): """Check that there are no duplicate names in the fully_qualified except for different variants""" seen = {} - for emj, item in emoji.EMOJI_DATA.items(): + for item in emoji.EMOJI_DATA.values(): if item["status"] > emoji.STATUS["fully_qualified"]: continue diff --git a/tests/test_nfkc.py b/tests/test_nfkc.py index 094ed8fe..93a6968a 100644 --- a/tests/test_nfkc.py +++ b/tests/test_nfkc.py @@ -1,11 +1,14 @@ """Unittests for canonically equivalent Unicode sequences""" import sys -import emoji import unicodedata +import emoji +from typing_extensions import Literal + +_NormalizationForm = Literal['NFC', 'NFD', 'NFKC', 'NFKD'] -def is_normalized(form, s): +def is_normalized(form: _NormalizationForm, s: str) -> bool: if sys.version_info >= (3, 8): return unicodedata.is_normalized(form, s) else: diff --git a/tests/test_unicode_codes.py b/tests/test_unicode_codes.py index 43c8f506..d506d62d 100644 --- a/tests/test_unicode_codes.py +++ b/tests/test_unicode_codes.py @@ -1,8 +1,6 @@ """Unittests for emoji.unicode_codes.""" - -import emoji - +import emoji.unicode_codes # Build all language packs (i.e. fill the cache): emoji.emojize("", language="alias") @@ -13,8 +11,8 @@ def test_emoji_english_names(): for language, group in ( - ('en', emoji.unicode_codes._EMOJI_UNICODE['en']), - ('alias', emoji.unicode_codes._ALIASES_UNICODE) + ('en', emoji.unicode_codes._EMOJI_UNICODE['en']), # pyright: ignore [reportPrivateUsage] + ('alias', emoji.unicode_codes._ALIASES_UNICODE) # pyright: ignore [reportPrivateUsage] ): for name, ucode in group.items(): assert name.startswith(':') and name.endswith(':') and len(name) >= 3 @@ -26,14 +24,14 @@ def test_compare_normal_and_aliases(): # There should always be more aliases than normal codes # since the aliases contain the normal codes - assert len(emoji.unicode_codes._EMOJI_UNICODE['en']) < len( - emoji.unicode_codes._ALIASES_UNICODE) + assert len(emoji.unicode_codes._EMOJI_UNICODE['en']) < len( # pyright: ignore [reportPrivateUsage] + emoji.unicode_codes._ALIASES_UNICODE) # pyright: ignore [reportPrivateUsage] def test_no_alias_duplicates(): # There should not be two emoji with the same alias # (aliases still can be the same as another 'en'-name) - all_aliases = set() + all_aliases: set[str] = set() for data in emoji.EMOJI_DATA.values(): if data['status'] <= emoji.STATUS['fully_qualified'] and 'alias' in data: for alias in data['alias']: diff --git a/tests/test_versions.py b/tests/test_versions.py index 03d7cc01..68337dec 100644 --- a/tests/test_versions.py +++ b/tests/test_versions.py @@ -1,17 +1,18 @@ """Unittests for versions in EMOJI_DATA""" -import emoji +from typing import Any, Dict, List +import emoji.unicode_codes import pytest def test_emoji_versions_complete_emojize(): # Check that every emoji has a valid version replacement = "<3" - for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): + for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): # pyright: ignore [reportPrivateUsage] for name in emoji_pack.keys(): - version = [] + version: List[float] = [] - def f(e, d): + def f(e: str, d: Dict[str, Any]) -> str: v = d['E'] n = d[lang_code] assert n == name @@ -27,11 +28,11 @@ def f(e, d): def test_emoji_versions_complete_demojize(): # Check that every emoji has a valid version - for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): + for lang_code, emoji_pack in emoji.unicode_codes._EMOJI_UNICODE.items(): # pyright: ignore [reportPrivateUsage] for name in emoji_pack.keys(): - version = [] + version: List[float] = [] - def f(e, d): + def f(e: str, d: Dict[str, Any]) -> str: v = d['E'] assert isinstance(v, (int, float)) assert v >= 0.6 @@ -86,7 +87,7 @@ def test_method_replace_version(): assert emoji.replace_emoji('Hello 🇫🇷👌', 'x', version=0,) == 'Hello xx' assert emoji.replace_emoji('Hello 🇫🇷👌', 'x', version=1,) == 'Hello 🇫🇷👌' - def replace(emj, data): + def replace(emj: str, data: Dict[str, Any]) -> str: assert emj in ["🇫🇷", "👌"] return 'x' assert emoji.replace_emoji('Hello 🇫🇷👌', replace, version=0.1) == 'Hello xx' diff --git a/tests/test_zwj_keep.py b/tests/test_zwj_keep.py index 7db19efd..4871ffde 100644 --- a/tests/test_zwj_keep.py +++ b/tests/test_zwj_keep.py @@ -3,10 +3,11 @@ See test_zwj_remove.py for tests when the ZWJ is removed. """ +from typing import Any, Dict import emoji -def ascii(s): +def ascii(s: str) -> str: # return escaped Code points \U000AB123 return s.encode("unicode-escape").decode() @@ -95,7 +96,7 @@ def test_non_rgi_zwj_replace(): # Replace with different length index = [0] - def replace_f(e, emoji_data): + def replace_f(e: str, emoji_data: Dict[str, Any]) -> str: index[0] += 1 if index[0] % 2 == 0: return 'X' diff --git a/tests/test_zwj_remove.py b/tests/test_zwj_remove.py index da94a822..51ca7b6c 100644 --- a/tests/test_zwj_remove.py +++ b/tests/test_zwj_remove.py @@ -3,10 +3,11 @@ See test_zwj_keep.py for tests when the ZWJ is kept. """ +from typing import Any, Dict import emoji -def ascii(s): +def ascii(s: str) -> str: # return escaped Code points \U000AB123 return s.encode("unicode-escape").decode() @@ -97,7 +98,7 @@ def test_non_rgi_zwj_replace(): # Replace with different length index = [0] - def replace_f(e, emoji_data): + def replace_f(e: str, emoji_data: Dict[str, Any]) -> str: index[0] += 1 if index[0] % 2 == 0: return 'X' From ff78b93183bcf6c01bb316aeb887cbcbc7566835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20H=C3=B6rist?= Date: Mon, 29 Apr 2024 18:52:36 +0200 Subject: [PATCH 2/2] Install test dependencies for lint job --- .github/workflows/pythonTests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonTests.yml b/.github/workflows/pythonTests.yml index 3dc81ed2..e419f777 100644 --- a/.github/workflows/pythonTests.yml +++ b/.github/workflows/pythonTests.yml @@ -42,7 +42,7 @@ jobs: - name: Install Dependencies run: | - python -m pip install . + python -m pip install .[dev] - name: Pyright uses: jakebailey/pyright-action@v2