From 7795e087a766ed9a293edc5f75ee2eaf3323d8c4 Mon Sep 17 00:00:00 2001
From: da3dsoul <da3dsoul@gmail.com>
Date: Thu, 6 Apr 2023 21:48:28 -0400
Subject: [PATCH] Fix P, V, and E sounding odd. Add Slash to the punctuation
 list Also add torch and torchaudio back to the requirements, as silero needs
 them. Silero's requirements.txt should be everything needed to run the tests

---
 extensions/silero_tts/requirements.txt    |  2 ++
 extensions/silero_tts/tts_preprocessor.py | 16 ++++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/extensions/silero_tts/requirements.txt b/extensions/silero_tts/requirements.txt
index 1017bf0..ac2785a 100644
--- a/extensions/silero_tts/requirements.txt
+++ b/extensions/silero_tts/requirements.txt
@@ -3,3 +3,5 @@ num2words
 omegaconf
 pydub
 PyYAML
+torch
+torchaudio
diff --git a/extensions/silero_tts/tts_preprocessor.py b/extensions/silero_tts/tts_preprocessor.py
index 50606c0..9dc3fd5 100644
--- a/extensions/silero_tts/tts_preprocessor.py
+++ b/extensions/silero_tts/tts_preprocessor.py
@@ -2,13 +2,13 @@ import re
 
 from num2words import num2words
 
-
+punctuation = r'[\s,.?!/)"\'\]>]'
 alphabet_map = {
     "A": " Ei ",
     "B": " Bee ",
     "C": " See ",
     "D": " Dee ",
-    "E": " Ii ",
+    "E": " Eee ",
     "F": " Eff ",
     "G": " Jee ",
     "H": " Eich ",
@@ -19,13 +19,13 @@ alphabet_map = {
     "M": " Emm ",
     "N": " Enn ",
     "O": " Ohh ",
-    "P": " Pii ",
+    "P": " Pee ",
     "Q": " Queue ",
     "R": " Are ",
     "S": " Ess ",
     "T": " Tee ",
     "U": " You ",
-    "V": " Vii ",
+    "V": " Vee ",
     "W": " Double You ",
     "X": " Ex ",
     "Y": " Why ",
@@ -55,7 +55,7 @@ def preprocess(string):
 
     # cleanup whitespaces
     # remove whitespace before punctuation
-    string = re.sub(r'\s+([,.?!\'])', r'\1', string)
+    string = re.sub(rf'\s+({punctuation})', r'\1', string)
     string = string.strip()
     # compact whitespace
     string = ' '.join(string.split())
@@ -71,13 +71,13 @@ def remove_surrounded_chars(string):
 
 def replace_negative(string):
     # handles situations like -5. -5 would become negative 5, which would then be expanded to negative five
-    return re.sub(r'(\s)(-)(\d+)([\s,.?!)"\'\]>])', r'\1negative \3\4', string)
+    return re.sub(rf'(\s)(-)(\d+)({punctuation})', r'\1negative \3\4', string)
 
 
 def replace_roman(string):
     # find a string of roman numerals.
     # Only 2 or more, to avoid capturing I and single character abbreviations, like names
-    pattern = re.compile(r'\s[IVXLCDM]{2,}[\s,.?!)"\'\]>]')
+    pattern = re.compile(rf'\s[IVXLCDM]{{2,}}{punctuation}')
     result = string
     while True:
         match = pattern.search(result)
@@ -117,7 +117,7 @@ def num_to_words(text):
 
 def replace_abbreviations(string):
     # abbreviations 1 to 4 characters long. It will get things like A and I, but those are pronounced with their letter
-    pattern = re.compile(r'(^|[\s("\'\[<])([A-Z]{1,4})([\s,.?!)"\'\]>]|$)')
+    pattern = re.compile(rf'(^|[\s("\'\[<])([A-Z]{{1,4}})({punctuation}|$)')
     result = string
     while True:
         match = pattern.search(result)