瀏覽代碼

Fix P, V, and E sounding odd. Add Slash to the punctuation list
Also add torch and torchaudio back to the requirements, as silero needs them. Silero's requirements.txt should be everything needed to run the tests

da3dsoul 2 年之前
父節點
當前提交
7795e087a7
共有 2 個文件被更改,包括 10 次插入8 次删除
  1. 2 0
      extensions/silero_tts/requirements.txt
  2. 8 8
      extensions/silero_tts/tts_preprocessor.py

+ 2 - 0
extensions/silero_tts/requirements.txt

@@ -3,3 +3,5 @@ num2words
 omegaconf
 omegaconf
 pydub
 pydub
 PyYAML
 PyYAML
+torch
+torchaudio

+ 8 - 8
extensions/silero_tts/tts_preprocessor.py

@@ -2,13 +2,13 @@ import re
 
 
 from num2words import num2words
 from num2words import num2words
 
 
-
+punctuation = r'[\s,.?!/)"\'\]>]'
 alphabet_map = {
 alphabet_map = {
     "A": " Ei ",
     "A": " Ei ",
     "B": " Bee ",
     "B": " Bee ",
     "C": " See ",
     "C": " See ",
     "D": " Dee ",
     "D": " Dee ",
-    "E": " Ii ",
+    "E": " Eee ",
     "F": " Eff ",
     "F": " Eff ",
     "G": " Jee ",
     "G": " Jee ",
     "H": " Eich ",
     "H": " Eich ",
@@ -19,13 +19,13 @@ alphabet_map = {
     "M": " Emm ",
     "M": " Emm ",
     "N": " Enn ",
     "N": " Enn ",
     "O": " Ohh ",
     "O": " Ohh ",
-    "P": " Pii ",
+    "P": " Pee ",
     "Q": " Queue ",
     "Q": " Queue ",
     "R": " Are ",
     "R": " Are ",
     "S": " Ess ",
     "S": " Ess ",
     "T": " Tee ",
     "T": " Tee ",
     "U": " You ",
     "U": " You ",
-    "V": " Vii ",
+    "V": " Vee ",
     "W": " Double You ",
     "W": " Double You ",
     "X": " Ex ",
     "X": " Ex ",
     "Y": " Why ",
     "Y": " Why ",
@@ -55,7 +55,7 @@ def preprocess(string):
 
 
     # cleanup whitespaces
     # cleanup whitespaces
     # remove whitespace before punctuation
     # remove whitespace before punctuation
-    string = re.sub(r'\s+([,.?!\'])', r'\1', string)
+    string = re.sub(rf'\s+({punctuation})', r'\1', string)
     string = string.strip()
     string = string.strip()
     # compact whitespace
     # compact whitespace
     string = ' '.join(string.split())
     string = ' '.join(string.split())
@@ -71,13 +71,13 @@ def remove_surrounded_chars(string):
 
 
 def replace_negative(string):
 def replace_negative(string):
     # handles situations like -5. -5 would become negative 5, which would then be expanded to negative five
     # handles situations like -5. -5 would become negative 5, which would then be expanded to negative five
-    return re.sub(r'(\s)(-)(\d+)([\s,.?!)"\'\]>])', r'\1negative \3\4', string)
+    return re.sub(rf'(\s)(-)(\d+)({punctuation})', r'\1negative \3\4', string)
 
 
 
 
 def replace_roman(string):
 def replace_roman(string):
     # find a string of roman numerals.
     # find a string of roman numerals.
     # Only 2 or more, to avoid capturing I and single character abbreviations, like names
     # Only 2 or more, to avoid capturing I and single character abbreviations, like names
-    pattern = re.compile(r'\s[IVXLCDM]{2,}[\s,.?!)"\'\]>]')
+    pattern = re.compile(rf'\s[IVXLCDM]{{2,}}{punctuation}')
     result = string
     result = string
     while True:
     while True:
         match = pattern.search(result)
         match = pattern.search(result)
@@ -117,7 +117,7 @@ def num_to_words(text):
 
 
 def replace_abbreviations(string):
 def replace_abbreviations(string):
     # abbreviations 1 to 4 characters long. It will get things like A and I, but those are pronounced with their letter
     # abbreviations 1 to 4 characters long. It will get things like A and I, but those are pronounced with their letter
-    pattern = re.compile(r'(^|[\s("\'\[<])([A-Z]{1,4})([\s,.?!)"\'\]>]|$)')
+    pattern = re.compile(rf'(^|[\s("\'\[<])([A-Z]{{1,4}})({punctuation}|$)')
     result = string
     result = string
     while True:
     while True:
         match = pattern.search(result)
         match = pattern.search(result)