Improve Silero's Preprocessor to Handle Numbers and Abbreviations Better

This commit is contained in:
da3dsoul
2023-04-03 17:58:21 -04:00
parent 4c9ed09270
commit b2022d0869
3 changed files with 119 additions and 11 deletions

View File

@@ -1,4 +1,3 @@
import re
import time
from pathlib import Path
@@ -7,6 +6,8 @@ import modules.chat as chat
import modules.shared as shared
import torch
from extensions.silero_tts import tts_preprocessor
torch._C._jit_set_profiling_mode(False)
params = {
@@ -46,11 +47,6 @@ def load_model():
return model
model = load_model()
def remove_surrounded_chars(string):
# this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
# 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
return re.sub('\*[^\*]*?(\*|$)','',string)
def remove_tts_from_history(name1, name2):
for i, entry in enumerate(shared.history['internal']):
shared.history['visible'][i] = [shared.history['visible'][i][0], entry[1]]
@@ -98,11 +94,7 @@ def output_modifier(string):
return string
original_string = string
string = remove_surrounded_chars(string)
string = string.replace('"', '')
string = string.replace('', '')
string = string.replace('\n', ' ')
string = string.strip()
string = tts_preprocessor.preprocess(string)
if string == '':
string = '*Empty reply, try regenerating*'