From 025ccac6616869c1d8e6f736f5bc74d370d5a38c Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 31 Aug 2024 21:21:20 -0600 Subject: [PATCH 01/19] move remaining CSVs over to Talon's resource.watch functionality --- apps/emacs/emacs_commands.py | 13 +++---- core/abbreviate/abbreviate.py | 43 +++++++++++------------ core/create_spoken_forms.py | 45 +++++++++++++++--------- core/user_settings.py | 66 +++++++++++++++++++++-------------- core/vocabulary/vocabulary.py | 46 +++++++++++++----------- 5 files changed, 119 insertions(+), 94 deletions(-) diff --git a/apps/emacs/emacs_commands.py b/apps/emacs/emacs_commands.py index 9618b7c388..12529f4a37 100644 --- a/apps/emacs/emacs_commands.py +++ b/apps/emacs/emacs_commands.py @@ -32,11 +32,9 @@ def emacs_command_short_form(command_name: str) -> Optional[str]: "Looks up the short form for command_name in emacs_commands.csv." return emacs_commands.get(command_name, Command(command_name)).short - -def load_csv(): - filepath = Path(__file__).parents[0] / "emacs_commands.csv" - with resource.open(filepath) as f: - rows = list(csv.reader(f)) +@resource.watch("emacs_commands.csv") +def load_commands(f): + rows = list(csv.reader(f)) # Check headers assert rows[0] == ["Command", " Key binding", " Short form", " Spoken form"] @@ -46,7 +44,7 @@ def load_csv(): continue if len(row) > 4: print( - f'"{filepath}": More than four values in row: {row}. ' + f'emacs_commands.csv: More than four values in row: {row}. ' + " Ignoring the extras" ) name, keys, short, spoken = ( @@ -71,6 +69,3 @@ def load_csv(): command_list[c.spoken] = c.name ctx.lists["self.emacs_command"] = command_list - -# TODO: register on change to file! -app.register("ready", load_csv) diff --git a/core/abbreviate/abbreviate.py b/core/abbreviate/abbreviate.py index 2544dbf93c..5d5cc0406f 100644 --- a/core/abbreviate/abbreviate.py +++ b/core/abbreviate/abbreviate.py @@ -2,12 +2,13 @@ from talon import Context, Module -from ..user_settings import get_list_from_csv +from ..user_settings import track_csv_list mod = Module() +ctx = Context() mod.list("abbreviation", desc="Common abbreviation") - +abbreviations_list = {} abbreviations = { "J peg": "jpg", "abbreviate": "abbr", @@ -447,24 +448,22 @@ "work in progress": "wip", } -# This variable is also considered exported for the create_spoken_forms module -abbreviations_list = get_list_from_csv( - "abbreviations.csv", - headers=("Abbreviation", "Spoken Form"), - default=abbreviations, -) - -# Matches letters and spaces, as currently, Talon doesn't accept other characters in spoken forms. -PATTERN = re.compile(r"^[a-zA-Z ]+$") -abbreviation_values = { - v: v for v in abbreviations_list.values() if PATTERN.match(v) is not None -} +@track_csv_list("abbreviations.csv", headers=("Abbreviation", "Spoken Form"), default=abbreviations) +def on_abbreviations(values): + global abbreviations_list + + # Matches letters and spaces, as currently, Talon doesn't accept other characters in spoken forms. + PATTERN = re.compile(r"^[a-zA-Z ]+$") + abbreviation_values = { + v: v for v in values.values() if PATTERN.match(v) is not None + } -# Allows the abbreviated/short form to be used as spoken phrase. eg "brief app" -> app -abbreviations_list_with_values = { - **abbreviation_values, - **abbreviations_list, -} - -ctx = Context() -ctx.lists["user.abbreviation"] = abbreviations_list_with_values + # Allows the abbreviated/short form to be used as spoken phrase. eg "brief app" -> app + abbreviations_list_with_values = { + **{v: v for v in abbreviation_values.values()}, + **abbreviations_list, + } + + # abbreviations_list is also imported by the create_spoken_forms module + abbreviations_list = abbreviations_list_with_values + ctx.lists["user.abbreviation"] = abbreviations_list_with_values diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index 02fe77b098..ab731ab06d 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -7,33 +7,46 @@ from talon import Module, actions from .abbreviate.abbreviate import abbreviations_list -from .file_extension.file_extension import file_extensions from .keys.keys import symbol_key_words from .numbers.numbers import digits_map, scales, teens, tens +from .user_settings import track_csv_list mod = Module() - DEFAULT_MINIMUM_TERM_LENGTH = 2 EXPLODE_MAX_LEN = 3 FANCY_REGULAR_EXPRESSION = r"[A-Z]?[a-z]+|[A-Z]+(?![a-z])|[0-9]+" -FILE_EXTENSIONS_REGEX = "|".join( - re.escape(file_extension.strip()) + "$" - for file_extension in file_extensions.values() -) SYMBOLS_REGEX = "|".join(re.escape(symbol) for symbol in set(symbol_key_words.values())) -REGEX_NO_SYMBOLS = re.compile( - "|".join( - [ - FANCY_REGULAR_EXPRESSION, - FILE_EXTENSIONS_REGEX, - ] +FILE_EXTENSIONS_REGEX = r'^\b$' +file_extensions = {} + +def update_regex(): + global REGEX_NO_SYMBOLS + global REGEX_WITH_SYMBOLS + REGEX_NO_SYMBOLS = re.compile( + "|".join( + [ + FANCY_REGULAR_EXPRESSION, + FILE_EXTENSIONS_REGEX, + ] + ) + ) + REGEX_WITH_SYMBOLS = re.compile( + "|".join([FANCY_REGULAR_EXPRESSION, FILE_EXTENSIONS_REGEX, SYMBOLS_REGEX]) ) -) -REGEX_WITH_SYMBOLS = re.compile( - "|".join([FANCY_REGULAR_EXPRESSION, FILE_EXTENSIONS_REGEX, SYMBOLS_REGEX]) -) +update_regex() + +@track_csv_list("file_extensions.csv", headers=("File extension", "Name")) +def on_extensions(values): + global FILE_EXTENSIONS_REGEX + global file_extensions + file_extensions = values + FILE_EXTENSIONS_REGEX = "|".join( + re.escape(file_extension.strip()) + "$" + for file_extension in values.values() + ) + update_regex() REVERSE_PRONUNCIATION_MAP = { **{str(value): key for key, value in digits_map.items()}, diff --git a/core/user_settings.py b/core/user_settings.py index 2630f2c518..e662edb21f 100644 --- a/core/user_settings.py +++ b/core/user_settings.py @@ -1,35 +1,20 @@ +from pathlib import Path +from typing import Callable, IO import csv import os -from pathlib import Path from talon import resource # NOTE: This method requires this module to be one folder below the top-level # community/knausj folder. SETTINGS_DIR = Path(__file__).parents[1] / "settings" +SETTINGS_DIR.mkdir(exist_ok=True) -if not SETTINGS_DIR.is_dir(): - os.mkdir(SETTINGS_DIR) - - -def get_list_from_csv( - filename: str, headers: tuple[str, str], default: dict[str, str] = {} -): - """Retrieves list from CSV""" - path = SETTINGS_DIR / filename - assert filename.endswith(".csv") - - if not path.is_file(): - with open(path, "w", encoding="utf-8", newline="") as file: - writer = csv.writer(file) - writer.writerow(headers) - for key, value in default.items(): - writer.writerow([key] if key == value else [value, key]) +CallbackT = Callable[[dict[str, str]], None] +DecoratorT = Callable[[CallbackT], CallbackT] - # Now read via resource to take advantage of talon's - # ability to reload this script for us when the resource changes - with resource.open(str(path), "r") as f: - rows = list(csv.reader(f)) +def read_csv_list(f: IO, headers: tuple[str, str], is_spoken_form_first: bool = False) -> dict[str, str]: + rows = list(csv.reader(f)) # print(str(rows)) mapping = {} @@ -37,7 +22,7 @@ def get_list_from_csv( actual_headers = rows[0] if not actual_headers == list(headers): print( - f'"{filename}": Malformed headers - {actual_headers}.' + f'"{f}": Malformed headers - {actual_headers}.' + f" Should be {list(headers)}. Ignoring row." ) for row in rows[1:]: @@ -47,10 +32,14 @@ def get_list_from_csv( if len(row) == 1: output = spoken_form = row[0] else: - output, spoken_form = row[:2] + if is_spoken_form_first: + spoken_form, output = row[:2] + else: + output, spoken_form = row[:2] + if len(row) > 2: print( - f'"{filename}": More than two values in row: {row}.' + f'"{f}": More than two values in row: {row}.' + " Ignoring the extras." ) # Leading/trailing whitespace in spoken form can prevent recognition. @@ -59,6 +48,31 @@ def get_list_from_csv( return mapping +def write_csv_defaults(path: Path, headers: tuple[str, str], default: dict[str, str]=None, is_spoken_form_first: bool = False) -> None: + if not path.is_file() and default is not None: + with open(path, "w", encoding="utf-8", newline="") as file: + writer = csv.writer(file) + writer.writerow(headers) + for key, value in default.items(): + if key == value: + writer.writerow([key]) + elif is_spoken_form_first: + writer.writerow([key, value]) + else: + writer.writerow([value, key]) + +def track_csv_list(filename: str, headers: tuple[str, str], default: dict[str, str]=None, is_spoken_form_first: bool = False) -> DecoratorT: + assert filename.endswith(".csv") + path = SETTINGS_DIR / filename + write_csv_defaults(path, headers, default, is_spoken_form_first) + + def decorator(fn: CallbackT) -> CallbackT: + @resource.watch(str(path)) + def on_update(f): + data = read_csv_list(f, headers, is_spoken_form_first) + fn(data) + + return decorator def append_to_csv(filename: str, rows: dict[str, str]): path = SETTINGS_DIR / filename @@ -74,4 +88,4 @@ def append_to_csv(filename: str, rows: dict[str, str]): if needs_newline: writer.writerow([]) for key, value in rows.items(): - writer.writerow([key] if key == value else [value, key]) + writer.writerow([key] if key == value else [value, key]) \ No newline at end of file diff --git a/core/vocabulary/vocabulary.py b/core/vocabulary/vocabulary.py index 2b49703ce2..27f8948472 100644 --- a/core/vocabulary/vocabulary.py +++ b/core/vocabulary/vocabulary.py @@ -6,7 +6,7 @@ from talon import Context, Module, actions from talon.grammar import Phrase -from ..user_settings import append_to_csv, get_list_from_csv +from ..user_settings import append_to_csv, track_csv_list mod = Module() ctx = Context() @@ -43,23 +43,7 @@ # This is the opposite ordering to words_to_replace.csv (the latter has the target word first) } _word_map_defaults.update({word.lower(): word for word in _capitalize_defaults}) - - -# phrases_to_replace is a spoken form -> written form map, used by our -# implementation of `dictate.replace_words` (at bottom of file) to rewrite words -# and phrases Talon recognized. This does not change the priority with which -# Talon recognizes particular phrases over others. -phrases_to_replace = get_list_from_csv( - "words_to_replace.csv", - headers=("Replacement", "Original"), - default=_word_map_defaults, -) - -# "dictate.word_map" is used by Talon's built-in default implementation of -# `dictate.replace_words`, but supports only single-word replacements. -# Multi-word phrases are ignored. -ctx.settings["dictate.word_map"] = phrases_to_replace - +phrases_to_replace = {} class PhraseReplacer: """Utility for replacing phrases by other phrases inside text or word lists. @@ -70,7 +54,10 @@ class PhraseReplacer: - phrase_dict: dictionary mapping recognized/spoken forms to written forms """ - def __init__(self, phrase_dict: dict[str, str]): + def __init__(self): + self.phrase_index = {} + + def update(self, phrase_dict: dict[str, str]): # Index phrases by first word, then number of subsequent words n_next phrase_index = dict() for spoken_form, written_form in phrase_dict.items(): @@ -120,7 +107,8 @@ def replace_string(self, text: str) -> str: # Unit tests for PhraseReplacer -rep = PhraseReplacer( +rep = PhraseReplacer() +rep.update( { "this": "foo", "that": "bar", @@ -136,7 +124,23 @@ def replace_string(self, text: str) -> str: assert rep.replace_string("try this is too") == "try stopping early too" assert rep.replace_string("this is a tricky one") == "stopping early a tricky one" -phrase_replacer = PhraseReplacer(phrases_to_replace) +phrase_replacer = PhraseReplacer() + +# phrases_to_replace is a spoken form -> written form map, used by our +# implementation of `dictate.replace_words` (at bottom of file) to rewrite words +# and phrases Talon recognized. This does not change the priority with which +# Talon recognizes particular phrases over others. +@track_csv_list("words_to_replace.csv", headers=("Replacement", "Original"), default=_word_map_defaults) +def on_word_map(values): + global phrases_to_replace + phrases_to_replace = values + phrase_replacer.update(values) + + # "dictate.word_map" is used by Talon's built-in default implementation of + # `dictate.replace_words`, but supports only single-word replacements. + # Multi-word phrases are ignored. + ctx.settings["dictate.word_map"] = values + @ctx.action_class("dictate") From 2c88fb415b6c2085a43ade2d94bebbc0be032145 Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 31 Aug 2024 21:24:02 -0600 Subject: [PATCH 02/19] Update keys.py --- core/keys/keys.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/keys/keys.py b/core/keys/keys.py index 831a9f68c9..e9591161a2 100644 --- a/core/keys/keys.py +++ b/core/keys/keys.py @@ -1,7 +1,5 @@ from talon import Context, Module, app -from ..user_settings import get_list_from_csv - # used for number keys & function keys respectively digits = "zero one two three four five six seven eight nine".split() f_digits = "one two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen twenty".split() From a07d9da2899c477f6942a063ad2b37790eaabd01 Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 31 Aug 2024 21:29:28 -0600 Subject: [PATCH 03/19] continue migrating --- core/file_extension/file_extension.py | 13 +++++-------- core/system_paths.py | 5 +---- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/core/file_extension/file_extension.py b/core/file_extension/file_extension.py index 04a1c23ca2..a548cad26d 100644 --- a/core/file_extension/file_extension.py +++ b/core/file_extension/file_extension.py @@ -1,6 +1,6 @@ from talon import Context, Module -from ..user_settings import get_list_from_csv +from ..user_settings import track_csv_list mod = Module() mod.list("file_extension", desc="A file extension, such as .py") @@ -55,11 +55,8 @@ "dot log": ".log", } -file_extensions = get_list_from_csv( - "file_extensions.csv", - headers=("File extension", "Name"), - default=_file_extensions_defaults, -) - ctx = Context() -ctx.lists["self.file_extension"] = file_extensions + +@track_csv_list("file_extensions.csv", headers=("File extension", "Name"), default=_file_extensions_defaults) +def on_update(values): + ctx.lists["self.file_extension"] = values \ No newline at end of file diff --git a/core/system_paths.py b/core/system_paths.py index 8ebf773b55..d0245e5279 100644 --- a/core/system_paths.py +++ b/core/system_paths.py @@ -1,15 +1,12 @@ """ This module gives us the list {user.system_paths} and the capture that wraps -the list to easily refer to system paths in talon and python files. It also creates a file -system_paths.csv in the settings folder so they user can easily add their own custom paths. +the list to easily refer to system paths in talon and python files. """ import os from talon import Context, Module, actions, app -from .user_settings import get_list_from_csv - mod = Module() ctx = Context() From 6747b94ed25d65a8c7b767abf7ed4097bd893cbd Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 31 Aug 2024 21:42:03 -0600 Subject: [PATCH 04/19] Update abbreviate.py --- core/abbreviate/abbreviate.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/core/abbreviate/abbreviate.py b/core/abbreviate/abbreviate.py index 5d5cc0406f..50bb3a7403 100644 --- a/core/abbreviate/abbreviate.py +++ b/core/abbreviate/abbreviate.py @@ -451,11 +451,14 @@ @track_csv_list("abbreviations.csv", headers=("Abbreviation", "Spoken Form"), default=abbreviations) def on_abbreviations(values): global abbreviations_list - + + # note: abbreviations_list is imported by the create_spoken_forms module + abbreviations_list = values + # Matches letters and spaces, as currently, Talon doesn't accept other characters in spoken forms. PATTERN = re.compile(r"^[a-zA-Z ]+$") abbreviation_values = { - v: v for v in values.values() if PATTERN.match(v) is not None + v: v for v in abbreviations_list.values() if PATTERN.match(v) is not None } # Allows the abbreviated/short form to be used as spoken phrase. eg "brief app" -> app @@ -464,6 +467,4 @@ def on_abbreviations(values): **abbreviations_list, } - # abbreviations_list is also imported by the create_spoken_forms module - abbreviations_list = abbreviations_list_with_values ctx.lists["user.abbreviation"] = abbreviations_list_with_values From aef46e4bda0544f5ca00d60d3da934f3120cbd45 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 03:48:27 +0000 Subject: [PATCH 05/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- apps/emacs/emacs_commands.py | 4 ++-- core/abbreviate/abbreviate.py | 9 ++++++--- core/create_spoken_forms.py | 9 ++++++--- core/file_extension/file_extension.py | 9 +++++++-- core/system_paths.py | 2 +- core/user_settings.py | 28 +++++++++++++++++++++------ core/vocabulary/vocabulary.py | 9 +++++++-- 7 files changed, 51 insertions(+), 19 deletions(-) diff --git a/apps/emacs/emacs_commands.py b/apps/emacs/emacs_commands.py index 12529f4a37..12fd817285 100644 --- a/apps/emacs/emacs_commands.py +++ b/apps/emacs/emacs_commands.py @@ -32,6 +32,7 @@ def emacs_command_short_form(command_name: str) -> Optional[str]: "Looks up the short form for command_name in emacs_commands.csv." return emacs_commands.get(command_name, Command(command_name)).short + @resource.watch("emacs_commands.csv") def load_commands(f): rows = list(csv.reader(f)) @@ -44,7 +45,7 @@ def load_commands(f): continue if len(row) > 4: print( - f'emacs_commands.csv: More than four values in row: {row}. ' + f"emacs_commands.csv: More than four values in row: {row}. " + " Ignoring the extras" ) name, keys, short, spoken = ( @@ -68,4 +69,3 @@ def load_commands(f): if c.spoken: command_list[c.spoken] = c.name ctx.lists["self.emacs_command"] = command_list - diff --git a/core/abbreviate/abbreviate.py b/core/abbreviate/abbreviate.py index 50bb3a7403..cb43d5c192 100644 --- a/core/abbreviate/abbreviate.py +++ b/core/abbreviate/abbreviate.py @@ -448,12 +448,15 @@ "work in progress": "wip", } -@track_csv_list("abbreviations.csv", headers=("Abbreviation", "Spoken Form"), default=abbreviations) + +@track_csv_list( + "abbreviations.csv", headers=("Abbreviation", "Spoken Form"), default=abbreviations +) def on_abbreviations(values): global abbreviations_list # note: abbreviations_list is imported by the create_spoken_forms module - abbreviations_list = values + abbreviations_list = values # Matches letters and spaces, as currently, Talon doesn't accept other characters in spoken forms. PATTERN = re.compile(r"^[a-zA-Z ]+$") @@ -466,5 +469,5 @@ def on_abbreviations(values): **{v: v for v in abbreviation_values.values()}, **abbreviations_list, } - + ctx.lists["user.abbreviation"] = abbreviations_list_with_values diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index ab731ab06d..e6029414a3 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -17,9 +17,10 @@ EXPLODE_MAX_LEN = 3 FANCY_REGULAR_EXPRESSION = r"[A-Z]?[a-z]+|[A-Z]+(?![a-z])|[0-9]+" SYMBOLS_REGEX = "|".join(re.escape(symbol) for symbol in set(symbol_key_words.values())) -FILE_EXTENSIONS_REGEX = r'^\b$' +FILE_EXTENSIONS_REGEX = r"^\b$" file_extensions = {} + def update_regex(): global REGEX_NO_SYMBOLS global REGEX_WITH_SYMBOLS @@ -35,19 +36,21 @@ def update_regex(): "|".join([FANCY_REGULAR_EXPRESSION, FILE_EXTENSIONS_REGEX, SYMBOLS_REGEX]) ) + update_regex() + @track_csv_list("file_extensions.csv", headers=("File extension", "Name")) def on_extensions(values): global FILE_EXTENSIONS_REGEX global file_extensions file_extensions = values FILE_EXTENSIONS_REGEX = "|".join( - re.escape(file_extension.strip()) + "$" - for file_extension in values.values() + re.escape(file_extension.strip()) + "$" for file_extension in values.values() ) update_regex() + REVERSE_PRONUNCIATION_MAP = { **{str(value): key for key, value in digits_map.items()}, **{value: key for key, value in symbol_key_words.items()}, diff --git a/core/file_extension/file_extension.py b/core/file_extension/file_extension.py index a548cad26d..1a75e76c42 100644 --- a/core/file_extension/file_extension.py +++ b/core/file_extension/file_extension.py @@ -57,6 +57,11 @@ ctx = Context() -@track_csv_list("file_extensions.csv", headers=("File extension", "Name"), default=_file_extensions_defaults) + +@track_csv_list( + "file_extensions.csv", + headers=("File extension", "Name"), + default=_file_extensions_defaults, +) def on_update(values): - ctx.lists["self.file_extension"] = values \ No newline at end of file + ctx.lists["self.file_extension"] = values diff --git a/core/system_paths.py b/core/system_paths.py index d0245e5279..e4a718c66c 100644 --- a/core/system_paths.py +++ b/core/system_paths.py @@ -1,6 +1,6 @@ """ This module gives us the list {user.system_paths} and the capture that wraps -the list to easily refer to system paths in talon and python files. +the list to easily refer to system paths in talon and python files. """ import os diff --git a/core/user_settings.py b/core/user_settings.py index e662edb21f..70077d0116 100644 --- a/core/user_settings.py +++ b/core/user_settings.py @@ -1,7 +1,7 @@ -from pathlib import Path -from typing import Callable, IO import csv import os +from pathlib import Path +from typing import IO, Callable from talon import resource @@ -13,7 +13,10 @@ CallbackT = Callable[[dict[str, str]], None] DecoratorT = Callable[[CallbackT], CallbackT] -def read_csv_list(f: IO, headers: tuple[str, str], is_spoken_form_first: bool = False) -> dict[str, str]: + +def read_csv_list( + f: IO, headers: tuple[str, str], is_spoken_form_first: bool = False +) -> dict[str, str]: rows = list(csv.reader(f)) # print(str(rows)) @@ -48,7 +51,13 @@ def read_csv_list(f: IO, headers: tuple[str, str], is_spoken_form_first: bool = return mapping -def write_csv_defaults(path: Path, headers: tuple[str, str], default: dict[str, str]=None, is_spoken_form_first: bool = False) -> None: + +def write_csv_defaults( + path: Path, + headers: tuple[str, str], + default: dict[str, str] = None, + is_spoken_form_first: bool = False, +) -> None: if not path.is_file() and default is not None: with open(path, "w", encoding="utf-8", newline="") as file: writer = csv.writer(file) @@ -61,7 +70,13 @@ def write_csv_defaults(path: Path, headers: tuple[str, str], default: dict[str, else: writer.writerow([value, key]) -def track_csv_list(filename: str, headers: tuple[str, str], default: dict[str, str]=None, is_spoken_form_first: bool = False) -> DecoratorT: + +def track_csv_list( + filename: str, + headers: tuple[str, str], + default: dict[str, str] = None, + is_spoken_form_first: bool = False, +) -> DecoratorT: assert filename.endswith(".csv") path = SETTINGS_DIR / filename write_csv_defaults(path, headers, default, is_spoken_form_first) @@ -74,6 +89,7 @@ def on_update(f): return decorator + def append_to_csv(filename: str, rows: dict[str, str]): path = SETTINGS_DIR / filename assert filename.endswith(".csv") @@ -88,4 +104,4 @@ def append_to_csv(filename: str, rows: dict[str, str]): if needs_newline: writer.writerow([]) for key, value in rows.items(): - writer.writerow([key] if key == value else [value, key]) \ No newline at end of file + writer.writerow([key] if key == value else [value, key]) diff --git a/core/vocabulary/vocabulary.py b/core/vocabulary/vocabulary.py index 27f8948472..3792138ed1 100644 --- a/core/vocabulary/vocabulary.py +++ b/core/vocabulary/vocabulary.py @@ -45,6 +45,7 @@ _word_map_defaults.update({word.lower(): word for word in _capitalize_defaults}) phrases_to_replace = {} + class PhraseReplacer: """Utility for replacing phrases by other phrases inside text or word lists. @@ -126,11 +127,16 @@ def replace_string(self, text: str) -> str: phrase_replacer = PhraseReplacer() + # phrases_to_replace is a spoken form -> written form map, used by our # implementation of `dictate.replace_words` (at bottom of file) to rewrite words # and phrases Talon recognized. This does not change the priority with which # Talon recognizes particular phrases over others. -@track_csv_list("words_to_replace.csv", headers=("Replacement", "Original"), default=_word_map_defaults) +@track_csv_list( + "words_to_replace.csv", + headers=("Replacement", "Original"), + default=_word_map_defaults, +) def on_word_map(values): global phrases_to_replace phrases_to_replace = values @@ -142,7 +148,6 @@ def on_word_map(values): ctx.settings["dictate.word_map"] = values - @ctx.action_class("dictate") class OverwrittenActions: def replace_words(words: Sequence[str]) -> Sequence[str]: From 6480a802850bcb96e8a18829801376924f41bbee Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 31 Aug 2024 22:07:29 -0600 Subject: [PATCH 06/19] Update __init__.py --- test/stubs/talon/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/stubs/talon/__init__.py b/test/stubs/talon/__init__.py index 35501260c4..f407ac2d6b 100644 --- a/test/stubs/talon/__init__.py +++ b/test/stubs/talon/__init__.py @@ -184,7 +184,9 @@ class Resource: def open(self, path: str, mode: str = "r"): return open(path, mode, encoding="utf-8") - + def watch(self, path: str): + return lambda f: f + class App: """ Implements something like the talon app variable From f432b3da5423ec7b1a6f54c7e36a1fc06b8a913b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 04:07:44 +0000 Subject: [PATCH 07/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/stubs/talon/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/stubs/talon/__init__.py b/test/stubs/talon/__init__.py index f407ac2d6b..f7001e4910 100644 --- a/test/stubs/talon/__init__.py +++ b/test/stubs/talon/__init__.py @@ -186,7 +186,8 @@ def open(self, path: str, mode: str = "r"): def watch(self, path: str): return lambda f: f - + + class App: """ Implements something like the talon app variable From 9f7cac652923ae1cf0df7621172b78d2322b2c2d Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sun, 1 Sep 2024 06:35:16 -0600 Subject: [PATCH 08/19] updates - use track_csv_list for abbreviations - fix unit tests --- core/create_spoken_forms.py | 5 ++++- test/test_create_spoken_forms.py | 34 +++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index e6029414a3..97d3368bc0 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -6,7 +6,6 @@ from talon import Module, actions -from .abbreviate.abbreviate import abbreviations_list from .keys.keys import symbol_key_words from .numbers.numbers import digits_map, scales, teens, tens from .user_settings import track_csv_list @@ -50,6 +49,10 @@ def on_extensions(values): ) update_regex() +@track_csv_list("abbreviations.csv", headers=("Abbreviation", "Spoken Form")) +def on_abbreviations(values): + global abbreviations_list + abbreviations_list = values REVERSE_PRONUNCIATION_MAP = { **{str(value): key for key, value in digits_map.items()}, diff --git a/test/test_create_spoken_forms.py b/test/test_create_spoken_forms.py index b921be0cc2..bf7cc654eb 100644 --- a/test/test_create_spoken_forms.py +++ b/test/test_create_spoken_forms.py @@ -4,11 +4,41 @@ # Only include this when we're running tests import itertools + import core.user_settings from talon import actions + from typing import IO, Callable + + + # we need to replace the track_csv_list decorator for unit tests. + CallbackT = Callable[[dict[str, str]], None] + DecoratorT = Callable[[CallbackT], CallbackT] + def track_csv_list_test( + filename: str, + headers: tuple[str, str], + default: dict[str, str] = None, + is_spoken_form_first: bool = False, + ) -> DecoratorT: + def decorator(fn: CallbackT) -> CallbackT: + extensions = { + "dot see sharp": ".cs", + } + abbreviations = { + "source": "src", + "whats app": "WhatsApp" + } + if filename == "abbreviations.csv": + fn(abbreviations) + elif filename == "file_extensions.csv": + fn(extensions) + + return decorator + + # replace track_csv_list before importing create_spoken_forms + core.user_settings.track_csv_list = track_csv_list_test import core.create_spoken_forms - + def test_excludes_words(): result = actions.user.create_spoken_forms("hi world", ["world"], 0, True) @@ -43,6 +73,8 @@ def test_expands_file_extensions(): assert "hi dot see sharp" in result def test_expands_abbreviations(): + + result = actions.user.create_spoken_forms("src", None, 0, True) assert "source" in result From a535ca63f37305d1a84b9a0214828adc9aa05ed1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 12:35:31 +0000 Subject: [PATCH 09/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- core/create_spoken_forms.py | 2 ++ test/test_create_spoken_forms.py | 17 +++++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index 97d3368bc0..3f9c2a26e3 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -49,11 +49,13 @@ def on_extensions(values): ) update_regex() + @track_csv_list("abbreviations.csv", headers=("Abbreviation", "Spoken Form")) def on_abbreviations(values): global abbreviations_list abbreviations_list = values + REVERSE_PRONUNCIATION_MAP = { **{str(value): key for key, value in digits_map.items()}, **{value: key for key, value in symbol_key_words.items()}, diff --git a/test/test_create_spoken_forms.py b/test/test_create_spoken_forms.py index bf7cc654eb..ec15ff64c8 100644 --- a/test/test_create_spoken_forms.py +++ b/test/test_create_spoken_forms.py @@ -4,15 +4,16 @@ # Only include this when we're running tests import itertools - import core.user_settings + from typing import IO, Callable from talon import actions - from typing import IO, Callable + import core.user_settings # we need to replace the track_csv_list decorator for unit tests. CallbackT = Callable[[dict[str, str]], None] DecoratorT = Callable[[CallbackT], CallbackT] + def track_csv_list_test( filename: str, headers: tuple[str, str], @@ -23,22 +24,19 @@ def decorator(fn: CallbackT) -> CallbackT: extensions = { "dot see sharp": ".cs", } - abbreviations = { - "source": "src", - "whats app": "WhatsApp" - } + abbreviations = {"source": "src", "whats app": "WhatsApp"} if filename == "abbreviations.csv": fn(abbreviations) elif filename == "file_extensions.csv": - fn(extensions) + fn(extensions) return decorator - + # replace track_csv_list before importing create_spoken_forms core.user_settings.track_csv_list = track_csv_list_test import core.create_spoken_forms - + def test_excludes_words(): result = actions.user.create_spoken_forms("hi world", ["world"], 0, True) @@ -74,7 +72,6 @@ def test_expands_file_extensions(): def test_expands_abbreviations(): - result = actions.user.create_spoken_forms("src", None, 0, True) assert "source" in result From d4359a8c9a0057c20102abbb3650e25ba3eee139 Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sun, 1 Sep 2024 06:46:08 -0600 Subject: [PATCH 10/19] Revert usage of track_csv_list for abbreviations + fix unit test again --- core/create_spoken_forms.py | 8 +------- test/test_create_spoken_forms.py | 3 +++ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index 3f9c2a26e3..056426db40 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -6,6 +6,7 @@ from talon import Module, actions +from .abbreviate.abbreviate import abbreviations_list from .keys.keys import symbol_key_words from .numbers.numbers import digits_map, scales, teens, tens from .user_settings import track_csv_list @@ -49,13 +50,6 @@ def on_extensions(values): ) update_regex() - -@track_csv_list("abbreviations.csv", headers=("Abbreviation", "Spoken Form")) -def on_abbreviations(values): - global abbreviations_list - abbreviations_list = values - - REVERSE_PRONUNCIATION_MAP = { **{str(value): key for key, value in digits_map.items()}, **{value: key for key, value in symbol_key_words.items()}, diff --git a/test/test_create_spoken_forms.py b/test/test_create_spoken_forms.py index ec15ff64c8..139460057d 100644 --- a/test/test_create_spoken_forms.py +++ b/test/test_create_spoken_forms.py @@ -9,6 +9,9 @@ from talon import actions import core.user_settings + import core.abbreviate + + core.abbreviate.abbreviations_list = {"source": "src", "whats app": "WhatsApp"} # we need to replace the track_csv_list decorator for unit tests. CallbackT = Callable[[dict[str, str]], None] From 38bd0289f71cc94fc6de904a4189b2f454c40306 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 12:46:23 +0000 Subject: [PATCH 11/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- core/create_spoken_forms.py | 1 + test/test_create_spoken_forms.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index 056426db40..e6029414a3 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -50,6 +50,7 @@ def on_extensions(values): ) update_regex() + REVERSE_PRONUNCIATION_MAP = { **{str(value): key for key, value in digits_map.items()}, **{value: key for key, value in symbol_key_words.items()}, diff --git a/test/test_create_spoken_forms.py b/test/test_create_spoken_forms.py index 139460057d..f5754a1573 100644 --- a/test/test_create_spoken_forms.py +++ b/test/test_create_spoken_forms.py @@ -8,8 +8,8 @@ from talon import actions - import core.user_settings import core.abbreviate + import core.user_settings core.abbreviate.abbreviations_list = {"source": "src", "whats app": "WhatsApp"} From 3b22380eb5d24f2ca2b800bbf2ad08b3766ec67d Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sun, 1 Sep 2024 11:45:52 -0600 Subject: [PATCH 12/19] switch back to track_csv_list for abbreviations --- core/create_spoken_forms.py | 6 +++++- test/test_create_spoken_forms.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index e6029414a3..578b8ff555 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -6,7 +6,6 @@ from talon import Module, actions -from .abbreviate.abbreviate import abbreviations_list from .keys.keys import symbol_key_words from .numbers.numbers import digits_map, scales, teens, tens from .user_settings import track_csv_list @@ -50,6 +49,11 @@ def on_extensions(values): ) update_regex() +abbreviations_list = {} +@track_csv_list("abbreviations.csv", headers=("Abbreviation", "Spoken Form")) +def on_abbreviations(values): + global abbreviations_list + abbreviations_list = values REVERSE_PRONUNCIATION_MAP = { **{str(value): key for key, value in digits_map.items()}, diff --git a/test/test_create_spoken_forms.py b/test/test_create_spoken_forms.py index f5754a1573..404992cb3e 100644 --- a/test/test_create_spoken_forms.py +++ b/test/test_create_spoken_forms.py @@ -11,7 +11,7 @@ import core.abbreviate import core.user_settings - core.abbreviate.abbreviations_list = {"source": "src", "whats app": "WhatsApp"} + # we need to replace the track_csv_list decorator for unit tests. CallbackT = Callable[[dict[str, str]], None] From 2c47768d8f21c06140dff06af02c0cf9579a7a27 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 17:46:08 +0000 Subject: [PATCH 13/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- core/create_spoken_forms.py | 4 ++++ test/test_create_spoken_forms.py | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py index 578b8ff555..508e8a93ff 100644 --- a/core/create_spoken_forms.py +++ b/core/create_spoken_forms.py @@ -49,12 +49,16 @@ def on_extensions(values): ) update_regex() + abbreviations_list = {} + + @track_csv_list("abbreviations.csv", headers=("Abbreviation", "Spoken Form")) def on_abbreviations(values): global abbreviations_list abbreviations_list = values + REVERSE_PRONUNCIATION_MAP = { **{str(value): key for key, value in digits_map.items()}, **{value: key for key, value in symbol_key_words.items()}, diff --git a/test/test_create_spoken_forms.py b/test/test_create_spoken_forms.py index 404992cb3e..609c01b17d 100644 --- a/test/test_create_spoken_forms.py +++ b/test/test_create_spoken_forms.py @@ -11,8 +11,6 @@ import core.abbreviate import core.user_settings - - # we need to replace the track_csv_list decorator for unit tests. CallbackT = Callable[[dict[str, str]], None] DecoratorT = Callable[[CallbackT], CallbackT] From b5ef78ca21005293bfbe7964ec4ab24042963fe3 Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 7 Sep 2024 11:43:27 -0600 Subject: [PATCH 14/19] Remove new_line parameter --- core/user_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/user_settings.py b/core/user_settings.py index 70077d0116..b5e2cde8ba 100644 --- a/core/user_settings.py +++ b/core/user_settings.py @@ -59,7 +59,7 @@ def write_csv_defaults( is_spoken_form_first: bool = False, ) -> None: if not path.is_file() and default is not None: - with open(path, "w", encoding="utf-8", newline="") as file: + with open(path, "w", encoding="utf-8") as file: writer = csv.writer(file) writer.writerow(headers) for key, value in default.items(): From 8067fb082c039b3e1a9c110803ce653dbc92d225 Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 7 Sep 2024 11:49:56 -0600 Subject: [PATCH 15/19] Add a note to breaking_changes.txt --- BREAKING_CHANGES.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/BREAKING_CHANGES.txt b/BREAKING_CHANGES.txt index 48be87043d..4a53dceee1 100644 --- a/BREAKING_CHANGES.txt +++ b/BREAKING_CHANGES.txt @@ -6,6 +6,7 @@ applied given the delay between changes being submitted and the time they were r and merged. --- +* 2024-09-07 Removed get_list_from_csv from `user_settings.py`. Please use the new `track_csv_list` decorator, which leverages Talon's `talon.watch` API for robustness on Talon launch. * 2024-07-31 Remove commands `"command mode"`, `"dictation mode"` from custom user modes. Note that if you have any custom modes where you want these commands you could add that mode to the context of `command_and_dictation_mode.talon` or copying the command to one of your custom files. * 2024-07-30 Deprecate `lend` and `bend` commands in favor of `go line end | tail` and `go line start | head`. * 2024-07-28 Removed the following user namespace actions in favor of the new action/modifier grammar. From dfb7b5e6d61633c69693d8b36c649e06cc6cb58c Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 7 Sep 2024 11:56:06 -0600 Subject: [PATCH 16/19] Add a link to the wiki for alternative spoken forms --- core/keys/letter.talon-list | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/keys/letter.talon-list b/core/keys/letter.talon-list index 3dc3418e33..1f84217bbe 100644 --- a/core/keys/letter.talon-list +++ b/core/keys/letter.talon-list @@ -1,5 +1,7 @@ list: user.letter - +# for common alternative spoken forms for letters, visit +# https://talon.wiki/quickstart/improving_recognition_accuracy/#collected-alternatives-to-the-default-alphabet air: a bat: b cap: c From 087b769e9d9b82e7bb91120f74d9719b268af465 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 7 Sep 2024 17:56:21 +0000 Subject: [PATCH 17/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- core/keys/letter.talon-list | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/keys/letter.talon-list b/core/keys/letter.talon-list index 1f84217bbe..824a2939fc 100644 --- a/core/keys/letter.talon-list +++ b/core/keys/letter.talon-list @@ -1,6 +1,6 @@ list: user.letter - -# for common alternative spoken forms for letters, visit +# for common alternative spoken forms for letters, visit # https://talon.wiki/quickstart/improving_recognition_accuracy/#collected-alternatives-to-the-default-alphabet air: a bat: b From 1f9474f47eb66924ab51ff405cd6181e2fcbbcf9 Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 7 Sep 2024 12:03:45 -0600 Subject: [PATCH 18/19] oops, wrong branch. --- core/keys/letter.talon-list | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/keys/letter.talon-list b/core/keys/letter.talon-list index 824a2939fc..3dc3418e33 100644 --- a/core/keys/letter.talon-list +++ b/core/keys/letter.talon-list @@ -1,7 +1,5 @@ list: user.letter - -# for common alternative spoken forms for letters, visit -# https://talon.wiki/quickstart/improving_recognition_accuracy/#collected-alternatives-to-the-default-alphabet air: a bat: b cap: c From bbd324eb5a20a7351a6c73a6f2092cba77ad2c55 Mon Sep 17 00:00:00 2001 From: Jeff Knaus Date: Sat, 7 Sep 2024 15:21:53 -0600 Subject: [PATCH 19/19] Improve malformed header error - just include file name --- core/user_settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/user_settings.py b/core/user_settings.py index b5e2cde8ba..6e08cfe035 100644 --- a/core/user_settings.py +++ b/core/user_settings.py @@ -25,7 +25,7 @@ def read_csv_list( actual_headers = rows[0] if not actual_headers == list(headers): print( - f'"{f}": Malformed headers - {actual_headers}.' + f'"{f.name}": Malformed headers - {actual_headers}.' + f" Should be {list(headers)}. Ignoring row." ) for row in rows[1:]: @@ -42,7 +42,7 @@ def read_csv_list( if len(row) > 2: print( - f'"{f}": More than two values in row: {row}.' + f'"{f.name}": More than two values in row: {row}.' + " Ignoring the extras." ) # Leading/trailing whitespace in spoken form can prevent recognition.