2 lat temu · 7795e087a7
--- a/extensions/silero_tts/requirements.txt
+++ b/extensions/silero_tts/requirements.txt
@@ -3,3 +3,5 @@ num2words
 
				 omegaconf
			
 
				 pydub
			
 
				 PyYAML
			
 
				+torch
			
 
				+torchaudio
			
--- a/extensions/silero_tts/tts_preprocessor.py
+++ b/extensions/silero_tts/tts_preprocessor.py
@@ -2,13 +2,13 @@ import re
 
				 
			
 
				 from num2words import num2words
			
 
				 
			
 
				-
			
 
				+punctuation = r'[\s,.?!/)"\'\]>]'
			
 
				 alphabet_map = {
			
 
				     "A": " Ei ",
			
 
				     "B": " Bee ",
			
 
				     "C": " See ",
			
 
				     "D": " Dee ",
			
 
				-    "E": " Ii ",
			
 
				+    "E": " Eee ",
			
 
				     "F": " Eff ",
			
 
				     "G": " Jee ",
			
 
				     "H": " Eich ",
			
@@ -19,13 +19,13 @@ alphabet_map = {
 
				     "M": " Emm ",
			
 
				     "N": " Enn ",
			
 
				     "O": " Ohh ",
			
 
				-    "P": " Pii ",
			
 
				+    "P": " Pee ",
			
 
				     "Q": " Queue ",
			
 
				     "R": " Are ",
			
 
				     "S": " Ess ",
			
 
				     "T": " Tee ",
			
 
				     "U": " You ",
			
 
				-    "V": " Vii ",
			
 
				+    "V": " Vee ",
			
 
				     "W": " Double You ",
			
 
				     "X": " Ex ",
			
 
				     "Y": " Why ",
			
@@ -55,7 +55,7 @@ def preprocess(string):
 
				 
			
 
				     # cleanup whitespaces
			
 
				     # remove whitespace before punctuation
			
 
				-    string = re.sub(r'\s+([,.?!\'])', r'\1', string)
			
 
				+    string = re.sub(rf'\s+({punctuation})', r'\1', string)
			
 
				     string = string.strip()
			
 
				     # compact whitespace
			
 
				     string = ' '.join(string.split())
			
@@ -71,13 +71,13 @@ def remove_surrounded_chars(string):
 
				 
			
 
				 def replace_negative(string):
			
 
				     # handles situations like -5. -5 would become negative 5, which would then be expanded to negative five
			
 
				-    return re.sub(r'(\s)(-)(\d+)([\s,.?!)"\'\]>])', r'\1negative \3\4', string)
			
 
				+    return re.sub(rf'(\s)(-)(\d+)({punctuation})', r'\1negative \3\4', string)
			
 
				 
			
 
				 
			
 
				 def replace_roman(string):
			
 
				     # find a string of roman numerals.
			
 
				     # Only 2 or more, to avoid capturing I and single character abbreviations, like names
			
 
				-    pattern = re.compile(r'\s[IVXLCDM]{2,}[\s,.?!)"\'\]>]')
			
 
				+    pattern = re.compile(rf'\s[IVXLCDM]{{2,}}{punctuation}')
			
 
				     result = string
			
 
				     while True:
			
 
				         match = pattern.search(result)
			
@@ -117,7 +117,7 @@ def num_to_words(text):
 
				 
			
 
				 def replace_abbreviations(string):
			
 
				     # abbreviations 1 to 4 characters long. It will get things like A and I, but those are pronounced with their letter
			
 
				-    pattern = re.compile(r'(^|[\s("\'\[<])([A-Z]{1,4})([\s,.?!)"\'\]>]|$)')
			
 
				+    pattern = re.compile(rf'(^|[\s("\'\[<])([A-Z]{{1,4}})({punctuation}|$)')
			
 
				     result = string
			
 
				     while True:
			
 
				         match = pattern.search(result)