From c05f727ae471dbcf5b01130464a93c47b6f40187 Mon Sep 17 00:00:00 2001 From: da3dsoul Date: Tue, 4 Apr 2023 00:09:50 -0400 Subject: [PATCH] Improve Silero's Preprocessor to Handle Negative Numbers Better --- extensions/silero_tts/tts_preprocessor.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/extensions/silero_tts/tts_preprocessor.py b/extensions/silero_tts/tts_preprocessor.py index 3b3146f..11cf7c6 100644 --- a/extensions/silero_tts/tts_preprocessor.py +++ b/extensions/silero_tts/tts_preprocessor.py @@ -38,6 +38,7 @@ def preprocess(string): string = string.replace('“', '') string = string.replace('\n', ' ') string = remove_commas(string) + string = replace_negative(string) string = replace_roman(string) string = hyphen_range_to(string) string = num_to_words(string) @@ -62,8 +63,13 @@ def remove_surrounded_chars(string): return re.sub(r'\*[^*]*?(\*|$)', '', string) +def replace_negative(string): + return re.sub(r'(\s)(-)(\d+)([\s,.?!)"\'\]>])', r'\1negative \3\4', string) + + def replace_roman(string): - pattern = re.compile(r'\s[IVXLCDM]+[\s,.?!)"\'\]>]') + # find a string of roman numerals. Only 2 or more, to avoid capturing I + pattern = re.compile(r'\s[IVXLCDM]{2,}[\s,.?!)"\'\]>]') result = string while True: match = pattern.search(result)