talonhub · knausj85 · Dec 14, 2024 · Dec 14, 2024 · Dec 14, 2024 · Dec 14, 2024
diff --git a/core/create_spoken_forms.py b/core/create_spoken_forms.py
@@ -6,7 +6,6 @@
 
 from talon import Module, actions
 
-from .keys.keys import symbol_key_words
 from .numbers.numbers import digits_map, scales, teens, tens
 from .user_settings import track_csv_list
 
@@ -15,14 +14,12 @@
 DEFAULT_MINIMUM_TERM_LENGTH = 2
 EXPLODE_MAX_LEN = 3
 FANCY_REGULAR_EXPRESSION = r"[A-Z]?[a-z]+|[A-Z]+(?![a-z])|[0-9]+"
-SYMBOLS_REGEX = "|".join(re.escape(symbol) for symbol in set(symbol_key_words.values()))
 FILE_EXTENSIONS_REGEX = r"^\b$"
 file_extensions = {}
 
 
 def update_regex():
     global REGEX_NO_SYMBOLS
-    global REGEX_WITH_SYMBOLS
     REGEX_NO_SYMBOLS = re.compile(
         "|".join(
             [
@@ -31,9 +28,6 @@ def update_regex():
             ]
         )
     )
-    REGEX_WITH_SYMBOLS = re.compile(
-        "|".join([FANCY_REGULAR_EXPRESSION, FILE_EXTENSIONS_REGEX, SYMBOLS_REGEX])
-    )
 
 
 update_regex()
@@ -61,9 +55,11 @@ def on_abbreviations(values):
 
 REVERSE_PRONUNCIATION_MAP = {
     **{str(value): key for key, value in digits_map.items()},
-    **{value: key for key, value in symbol_key_words.items()},
 }
 
+# for the moment, keep the dot spoken form
+REVERSE_PRONUNCIATION_MAP["."] = "dot"
+
 # begin: create the lists etc necessary for create_spoken_word_for_number
 # by convention, each entry in the list has an append space... until I clean up the function
 # the algorithm's expectation is slightly different from numbers.py
@@ -265,9 +261,11 @@ def create_extension_forms(spoken_forms: List[str]):
 
             if substring in file_extensions_map.keys():
                 file_extension_forms.append(file_extensions_map[substring])
+
                 dotted_extension_form.append(REVERSE_PRONUNCIATION_MAP["."])
                 dotted_extension_form.append(file_extensions_map[substring])
                 have_file_extension = True
+
                 # purposefully down update truncated
             else:
                 file_extension_forms.append(substring)
@@ -466,32 +464,24 @@ def create_spoken_forms(
     ) -> list[str]:
         """Create spoken forms for a given source"""
 
-        spoken_forms_without_symbols = create_spoken_forms_from_regex(
-            source, REGEX_NO_SYMBOLS
-        )
-
-        # todo: this could probably be optimized out if there's no symbols
-        spoken_forms_with_symbols = create_spoken_forms_from_regex(
-            source, REGEX_WITH_SYMBOLS
-        )
+        spoken_forms = create_spoken_forms_from_regex(source, REGEX_NO_SYMBOLS)
 
-        # some may be identical, so ensure the list is reduced
-        spoken_forms = set(spoken_forms_with_symbols + spoken_forms_without_symbols)
+        spoken_forms_set = set(spoken_forms)
 
         # only generate the subsequences if requested
         if generate_subsequences:
             # todo: do we care about the subsequences that are excluded.
             # the only one that seems relevant are the full spoken form for
-            spoken_forms.update(
+            spoken_forms_set.update(
                 generate_string_subsequences(
-                    spoken_forms_without_symbols[-1],
+                    spoken_forms[-1],
                     words_to_exclude or [],
                     minimum_term_length,
                 )
             )
 
         # Avoid empty spoken forms.
-        return [x for x in spoken_forms if x]
+        return [x for x in spoken_forms_set if x]
 
     def create_spoken_forms_from_list(
         sources: list[str],

diff --git a/core/keys/keys.py b/core/keys/keys.py
@@ -101,103 +101,3 @@ def keys(m) -> str:
 def letters(m) -> str:
     "Multiple letter keys"
     return "".join(m.letter_list)
-
-
-ctx = Context()
-
-# `punctuation_words` is for words you want available BOTH in dictation and as key names in command mode.
-# `symbol_key_words` is for key names that should be available in command mode, but NOT during dictation.
-punctuation_words = {
-    # TODO: I'm not sure why we need these, I think it has something to do with
-    # Dragon. Possibly it has been fixed by later improvements to talon? -rntz
-    "`": "`",
-    ",": ",",  # <== these things
-    "back tick": "`",
-    "comma": ",",
-    # Workaround for issue with conformer b-series; see #946
-    "coma": ",",
-    "period": ".",
-    "full stop": ".",
-    "semicolon": ";",
-    "colon": ":",
-    "forward slash": "/",
-    "question mark": "?",
-    "exclamation mark": "!",
-    "exclamation point": "!",
-    "asterisk": "*",
-    "hash sign": "#",
-    "number sign": "#",
-    "percent sign": "%",
-    "at sign": "@",
-    "and sign": "&",
-    "ampersand": "&",
-    # Currencies
-    "dollar sign": "$",
-    "pound sign": "£",
-    "hyphen": "-",
-    "L paren": "(",
-    "left paren": "(",
-    "R paren": ")",
-    "right paren": ")",
-}
-symbol_key_words = {
-    "dot": ".",
-    "point": ".",
-    "quote": "'",
-    "question": "?",
-    "apostrophe": "'",
-    "L square": "[",
-    "left square": "[",
-    "brack": "[",
-    "bracket": "[",
-    "left bracket": "[",
-    "square": "[",
-    "R square": "]",
-    "right square": "]",
-    "r brack": "]",
-    "r bracket": "]",
-    "right bracket": "]",
-    "slash": "/",
-    "backslash": "\\",
-    "minus": "-",
-    "dash": "-",
-    "equals": "=",
-    "plus": "+",
-    "grave": "`",
-    "tilde": "~",
-    "bang": "!",
-    "down score": "_",
-    "underscore": "_",
-    "paren": "(",
-    "brace": "{",
-    "left brace": "{",
-    "curly bracket": "{",
-    "left curly bracket": "{",
-    "r brace": "}",
-    "right brace": "}",
-    "r curly bracket": "}",
-    "right curly bracket": "}",
-    "angle": "<",
-    "left angle": "<",
-    "less than": "<",
-    "rangle": ">",
-    "R angle": ">",
-    "right angle": ">",
-    "greater than": ">",
-    "star": "*",
-    "hash": "#",
-    "percent": "%",
-    "caret": "^",
-    "amper": "&",
-    "pipe": "|",
-    "dub quote": '"',
-    "double quote": '"',
-    # Currencies
-    "dollar": "$",
-    "pound": "£",
-}
-
-# make punctuation words also included in {user.symbol_keys}
-symbol_key_words.update(punctuation_words)
-ctx.lists["self.punctuation"] = punctuation_words
-ctx.lists["self.symbol_key"] = symbol_key_words
diff --git a/core/keys/punctuation.talon-list b/core/keys/punctuation.talon-list
@@ -0,0 +1,28 @@
+# `punctuation` is for words you want available BOTH in dictation and as key names in command mode.
+list: user.punctuation
+-
+back tick: `
+comma: ,
+coma: ,
+period: .
+full stop: .
+semicolon: ;
+colon: :
+forward slash: /
+question mark: ?
+exclamation mark: !
+exclamation point: !
+asterisk: *
+hash sign: #
+number sign: #
+percent sign: %
+at sign: @
+and sign: &
+ampersand: &
+dollar sign: $
+pound sign: £
+hyphen: -
+L paren: (
+left paren: (
+R paren: )
+right paren: )
diff --git a/core/keys/symbol_key.talon-list b/core/keys/symbol_key.talon-list
@@ -0,0 +1,56 @@
+# symbol_key is for key names that should be available in command mode, but NOT during dictation.
+list: user.symbol_key
+-
+dot: .
+point: .
+quote: "'"
+question: ?
+apostrophe: "'"
+L square: [
+left square: [
+brack: [
+bracket: [
+left bracket: [
+square: [
+R square: ]
+right square: ]
+r brack: ]
+r bracket: ]
+right bracket: ]
+slash: /
+backslash: \
+minus: -
+dash: -
+equals: =
+plus: +
+grave: `
+tilde: ~
+bang: !
+down score: _
+underscore: _
+paren: (
+brace: {
+left brace: {
+curly bracket: {
+left curly bracket: {
+r brace: }
+right brace: }
+r curly bracket: }
+right curly bracket: }
+angle: <
+left angle: <
+less than: <
+rangle: >
+R angle: >
+right angle: >
+greater than: >
+star: *
+hash: #
+percent: %
+caret: ^
+amper: &
+pipe: |
+dub quote: '"'
+double quote: '"'
+dollar: $
+pound: £
diff --git a/test/test_create_spoken_forms.py b/test/test_create_spoken_forms.py
@@ -64,7 +64,7 @@ def test_handles_generate_subsequences():
     def test_expands_special_chars():
         result = actions.user.create_spoken_forms("hi $world", None, 0, True)
 
-        assert "hi dollar sign world" in result
+        assert "hi world" in result
 
     def test_expands_file_extensions():
         result = actions.user.create_spoken_forms("hi .cs", None, 0, True)
@@ -104,7 +104,7 @@ def test_properties():
         """
 
         def _example_generator():
-            pieces = ["hi", "world", "$", ".cs", "1900"]
+            pieces = ["hi", "world", "dollar", ".cs", "1900"]
             params = list(
                 itertools.product(
                     [None, ["world"], ["dot"]],  # Dot is from the expanded ".cs"