Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove symbols from create spoken forms & migrate to *.talon-list #1638

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3028ae3
Remove symbols from create spoken forms & migrate remaining keys list…
knausj85 Dec 14, 2024
057eb2d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 14, 2024
6bce77c
Update create_spoken_forms.py
knausj85 Dec 14, 2024
f291cbf
Merge branch 'create-spoken-form-remote-symbols' of https://github.co…
knausj85 Dec 14, 2024
ee47ffb
Update test_create_spoken_forms.py
knausj85 Dec 14, 2024
eadc481
Update create_spoken_forms.py
knausj85 Dec 14, 2024
47e2ddf
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 14, 2024
d01743f
Update create_spoken_forms.py
knausj85 Dec 14, 2024
1e30068
preserve set functionality to remove duplicates, fix unit tests.
knausj85 Dec 14, 2024
0762dfa
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 14, 2024
052047c
Remove duplicates
knausj85 Dec 15, 2024
f8db161
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Dec 15, 2024
3559a73
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Dec 15, 2024
8848939
Update create_spoken_forms.py
knausj85 Jan 19, 2025
2e270a7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 19, 2025
cebcb50
Update test_create_spoken_forms.py
knausj85 Jan 19, 2025
b134361
Merge branch 'create-spoken-form-remote-symbols' of https://github.co…
knausj85 Jan 19, 2025
61d69b3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 19, 2025
775115d
Update create_spoken_forms.py
knausj85 Jan 19, 2025
bf534e5
Merge branch 'create-spoken-form-remote-symbols' of https://github.co…
knausj85 Jan 19, 2025
cb7829e
Merge branch 'main' into create-spoken-form-remote-symbols
knausj85 Jan 19, 2025
7ec878a
Update symbol_key.talon-list
knausj85 Jan 25, 2025
99c2f7f
Merge branch 'main' into create-spoken-form-remote-symbols
knausj85 Jan 25, 2025
78fbfcc
Merge branch 'main' into create-spoken-form-remote-symbols
nriley Jan 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 10 additions & 20 deletions core/create_spoken_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from talon import Module, actions

from .keys.keys import symbol_key_words
from .numbers.numbers import digits_map, scales, teens, tens
from .user_settings import track_csv_list

Expand All @@ -15,14 +14,12 @@
DEFAULT_MINIMUM_TERM_LENGTH = 2
EXPLODE_MAX_LEN = 3
FANCY_REGULAR_EXPRESSION = r"[A-Z]?[a-z]+|[A-Z]+(?![a-z])|[0-9]+"
knausj85 marked this conversation as resolved.
Show resolved Hide resolved
SYMBOLS_REGEX = "|".join(re.escape(symbol) for symbol in set(symbol_key_words.values()))
FILE_EXTENSIONS_REGEX = r"^\b$"
file_extensions = {}


def update_regex():
global REGEX_NO_SYMBOLS
global REGEX_WITH_SYMBOLS
REGEX_NO_SYMBOLS = re.compile(
"|".join(
[
Expand All @@ -31,9 +28,6 @@ def update_regex():
]
)
)
REGEX_WITH_SYMBOLS = re.compile(
"|".join([FANCY_REGULAR_EXPRESSION, FILE_EXTENSIONS_REGEX, SYMBOLS_REGEX])
)


update_regex()
Expand Down Expand Up @@ -61,9 +55,11 @@ def on_abbreviations(values):

REVERSE_PRONUNCIATION_MAP = {
**{str(value): key for key, value in digits_map.items()},
**{value: key for key, value in symbol_key_words.items()},
}

# for the moment, keep the dot spoken form
REVERSE_PRONUNCIATION_MAP["."] = "dot"

# begin: create the lists etc necessary for create_spoken_word_for_number
# by convention, each entry in the list has an append space... until I clean up the function
# the algorithm's expectation is slightly different from numbers.py
Expand Down Expand Up @@ -265,9 +261,11 @@ def create_extension_forms(spoken_forms: List[str]):

if substring in file_extensions_map.keys():
file_extension_forms.append(file_extensions_map[substring])

dotted_extension_form.append(REVERSE_PRONUNCIATION_MAP["."])
dotted_extension_form.append(file_extensions_map[substring])
have_file_extension = True

# purposefully down update truncated
else:
file_extension_forms.append(substring)
Expand Down Expand Up @@ -466,32 +464,24 @@ def create_spoken_forms(
) -> list[str]:
"""Create spoken forms for a given source"""

spoken_forms_without_symbols = create_spoken_forms_from_regex(
source, REGEX_NO_SYMBOLS
)

# todo: this could probably be optimized out if there's no symbols
spoken_forms_with_symbols = create_spoken_forms_from_regex(
source, REGEX_WITH_SYMBOLS
)
spoken_forms = create_spoken_forms_from_regex(source, REGEX_NO_SYMBOLS)

# some may be identical, so ensure the list is reduced
spoken_forms = set(spoken_forms_with_symbols + spoken_forms_without_symbols)
spoken_forms_set = set(spoken_forms)

# only generate the subsequences if requested
if generate_subsequences:
# todo: do we care about the subsequences that are excluded.
# the only one that seems relevant are the full spoken form for
spoken_forms.update(
spoken_forms_set.update(
generate_string_subsequences(
spoken_forms_without_symbols[-1],
spoken_forms[-1],
words_to_exclude or [],
minimum_term_length,
)
)

# Avoid empty spoken forms.
return [x for x in spoken_forms if x]
return [x for x in spoken_forms_set if x]

def create_spoken_forms_from_list(
sources: list[str],
Expand Down
100 changes: 0 additions & 100 deletions core/keys/keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,103 +101,3 @@ def keys(m) -> str:
def letters(m) -> str:
"Multiple letter keys"
return "".join(m.letter_list)


ctx = Context()

# `punctuation_words` is for words you want available BOTH in dictation and as key names in command mode.
# `symbol_key_words` is for key names that should be available in command mode, but NOT during dictation.
punctuation_words = {
# TODO: I'm not sure why we need these, I think it has something to do with
# Dragon. Possibly it has been fixed by later improvements to talon? -rntz
"`": "`",
",": ",", # <== these things
"back tick": "`",
"comma": ",",
# Workaround for issue with conformer b-series; see #946
"coma": ",",
"period": ".",
"full stop": ".",
"semicolon": ";",
"colon": ":",
"forward slash": "/",
"question mark": "?",
"exclamation mark": "!",
"exclamation point": "!",
"asterisk": "*",
"hash sign": "#",
"number sign": "#",
"percent sign": "%",
"at sign": "@",
"and sign": "&",
"ampersand": "&",
# Currencies
"dollar sign": "$",
"pound sign": "£",
"hyphen": "-",
"L paren": "(",
"left paren": "(",
"R paren": ")",
"right paren": ")",
}
symbol_key_words = {
"dot": ".",
"point": ".",
"quote": "'",
"question": "?",
"apostrophe": "'",
"L square": "[",
"left square": "[",
"brack": "[",
"bracket": "[",
"left bracket": "[",
"square": "[",
"R square": "]",
"right square": "]",
"r brack": "]",
"r bracket": "]",
"right bracket": "]",
"slash": "/",
"backslash": "\\",
"minus": "-",
"dash": "-",
"equals": "=",
"plus": "+",
"grave": "`",
"tilde": "~",
"bang": "!",
"down score": "_",
"underscore": "_",
"paren": "(",
"brace": "{",
"left brace": "{",
"curly bracket": "{",
"left curly bracket": "{",
"r brace": "}",
"right brace": "}",
"r curly bracket": "}",
"right curly bracket": "}",
"angle": "<",
"left angle": "<",
"less than": "<",
"rangle": ">",
"R angle": ">",
"right angle": ">",
"greater than": ">",
"star": "*",
"hash": "#",
"percent": "%",
"caret": "^",
"amper": "&",
"pipe": "|",
"dub quote": '"',
"double quote": '"',
# Currencies
"dollar": "$",
"pound": "£",
}

# make punctuation words also included in {user.symbol_keys}
symbol_key_words.update(punctuation_words)
ctx.lists["self.punctuation"] = punctuation_words
ctx.lists["self.symbol_key"] = symbol_key_words
28 changes: 28 additions & 0 deletions core/keys/punctuation.talon-list
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# `punctuation` is for words you want available BOTH in dictation and as key names in command mode.
list: user.punctuation
-
back tick: `
comma: ,
coma: ,
period: .
full stop: .
semicolon: ;
colon: :
forward slash: /
question mark: ?
exclamation mark: !
exclamation point: !
asterisk: *
hash sign: #
number sign: #
percent sign: %
at sign: @
and sign: &
ampersand: &
dollar sign: $
pound sign: £
hyphen: -
L paren: (
left paren: (
R paren: )
right paren: )
56 changes: 56 additions & 0 deletions core/keys/symbol_key.talon-list
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# symbol_key is for key names that should be available in command mode, but NOT during dictation.
list: user.symbol_key
-
dot: .
point: .
quote: "'"
question: ?
apostrophe: "'"
L square: [
left square: [
brack: [
bracket: [
left bracket: [
square: [
R square: ]
knausj85 marked this conversation as resolved.
Show resolved Hide resolved
right square: ]
r brack: ]
r bracket: ]
right bracket: ]
slash: /
backslash: \
minus: -
dash: -
equals: =
plus: +
grave: `
tilde: ~
bang: !
down score: _
underscore: _
paren: (
brace: {
left brace: {
curly bracket: {
left curly bracket: {
r brace: }
knausj85 marked this conversation as resolved.
Show resolved Hide resolved
right brace: }
r curly bracket: }
right curly bracket: }
angle: <
left angle: <
less than: <
rangle: >
R angle: >
right angle: >
greater than: >
star: *
hash: #
percent: %
caret: ^
amper: &
pipe: |
dub quote: '"'
double quote: '"'
dollar: $
pound: £
4 changes: 2 additions & 2 deletions test/test_create_spoken_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_handles_generate_subsequences():
def test_expands_special_chars():
result = actions.user.create_spoken_forms("hi $world", None, 0, True)

assert "hi dollar sign world" in result
assert "hi world" in result

def test_expands_file_extensions():
result = actions.user.create_spoken_forms("hi .cs", None, 0, True)
Expand Down Expand Up @@ -104,7 +104,7 @@ def test_properties():
"""

def _example_generator():
pieces = ["hi", "world", "$", ".cs", "1900"]
pieces = ["hi", "world", "dollar", ".cs", "1900"]
params = list(
itertools.product(
[None, ["world"], ["dot"]], # Dot is from the expanded ".cs"
Expand Down
Loading