Просмотр исходного кода

Improve Silero's Preprocessor to Handle Negative Numbers Better

da3dsoul 2 лет назад
Родитель
Сommit
c05f727ae4
1 измененных файлов с 7 добавлено и 1 удалено
  1. 7 1
      extensions/silero_tts/tts_preprocessor.py

+ 7 - 1
extensions/silero_tts/tts_preprocessor.py

@@ -38,6 +38,7 @@ def preprocess(string):
     string = string.replace('“', '')
     string = string.replace('\n', ' ')
     string = remove_commas(string)
+    string = replace_negative(string)
     string = replace_roman(string)
     string = hyphen_range_to(string)
     string = num_to_words(string)
@@ -62,8 +63,13 @@ def remove_surrounded_chars(string):
     return re.sub(r'\*[^*]*?(\*|$)', '', string)
 
 
+def replace_negative(string):
+    return re.sub(r'(\s)(-)(\d+)([\s,.?!)"\'\]>])', r'\1negative \3\4', string)
+
+
 def replace_roman(string):
-    pattern = re.compile(r'\s[IVXLCDM]+[\s,.?!)"\'\]>]')
+    # find a string of roman numerals. Only 2 or more, to avoid capturing I
+    pattern = re.compile(r'\s[IVXLCDM]{2,}[\s,.?!)"\'\]>]')
     result = string
     while True:
         match = pattern.search(result)