فهرست منبع

Minor style changes to silero_tts

oobabooga 2 سال پیش
والد
کامیت
8f8da6707d
1فایلهای تغییر یافته به همراه17 افزوده شده و 14 حذف شده
  1. 17 14
      extensions/silero_tts/script.py

+ 17 - 14
extensions/silero_tts/script.py

@@ -14,18 +14,19 @@ params = {
     'model_id': 'v3_en',
     'model_id': 'v3_en',
     'sample_rate': 48000,
     'sample_rate': 48000,
     'device': 'cpu',
     'device': 'cpu',
-    'show_text': True,
+    'show_text': False,
     'autoplay': True,
     'autoplay': True,
     'voice_pitch': 'medium',
     'voice_pitch': 'medium',
     'voice_speed': 'medium',
     'voice_speed': 'medium',
 }
 }
+
 current_params = params.copy()
 current_params = params.copy()
 voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
 voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
 voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
 voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
 voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
 voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
 last_msg_id = 0
 last_msg_id = 0
 
 
-#Used for making text xml compatible, needed for voice pitch and speed control
+# Used for making text xml compatible, needed for voice pitch and speed control
 table = str.maketrans({
 table = str.maketrans({
     "<": "&lt;",
     "<": "&lt;",
     ">": "&gt;",
     ">": "&gt;",
@@ -33,6 +34,7 @@ table = str.maketrans({
     "'": "&apos;",
     "'": "&apos;",
     '"': "&quot;",
     '"': "&quot;",
 })
 })
+
 def xmlesc(txt):
 def xmlesc(txt):
     return txt.translate(table)
     return txt.translate(table)
 
 
@@ -57,7 +59,8 @@ def input_modifier(string):
     This function is applied to your text inputs before
     This function is applied to your text inputs before
     they are fed into the model.
     they are fed into the model.
     """
     """
-    #remove autoplay from previous
+
+    # Remove autoplay from previous
     if len(shared.history['internal'])>0:
     if len(shared.history['internal'])>0:
         [text, reply] = shared.history['internal'][-1]
         [text, reply] = shared.history['internal'][-1]
         [visible_text, visible_reply] = shared.history['visible'][-1]
         [visible_text, visible_reply] = shared.history['visible'][-1]
@@ -91,30 +94,30 @@ def output_modifier(string):
     string = string.replace('\n', ' ')
     string = string.replace('\n', ' ')
     string = string.strip()
     string = string.strip()
 
 
-    silent_string = False #Used to prevent unnecessary audio file generation
+    silent_string = False # Used to prevent unnecessary audio file generation
     if string == '':
     if string == '':
-            string = 'empty reply, try regenerating'
-            silent_string = True
+        string = 'empty reply, try regenerating'
+        silent_string = True
 
 
-    #x-slow, slow, medium, fast, x-fast
-    #x-low, low, medium, high, x-high
+    # x-slow, slow, medium, fast, x-fast
+    # x-low, low, medium, high, x-high
     pitch = params['voice_pitch']
     pitch = params['voice_pitch']
     speed = params['voice_speed']
     speed = params['voice_speed']
     prosody=f'<prosody rate="{speed}" pitch="{pitch}">'
     prosody=f'<prosody rate="{speed}" pitch="{pitch}">'
-    string ='<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
+    string = '<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
 
 
-    current_msg_id=len(shared.history['visible'])#check length here, since output_modifier can run many times on the same message
+    current_msg_id = len(shared.history['visible']) # Check length here, since output_modifier can run many times on the same message
     output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{current_msg_id:06d}.wav')
     output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{current_msg_id:06d}.wav')
     if not shared.still_streaming and not silent_string:
     if not shared.still_streaming and not silent_string:
         model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
         model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
         string = f'<audio id="audio_{current_msg_id:06d}" src="file/{output_file.as_posix()}" controls autoplay></audio>\n\n'
         string = f'<audio id="audio_{current_msg_id:06d}" src="file/{output_file.as_posix()}" controls autoplay></audio>\n\n'
     else:
     else:
-        #placeholder so text doesn't shift around so much
-        string ='<audio controls></audio>\n\n'
+        # Placeholder so text doesn't shift around so much
+        string = '<audio controls></audio>\n\n'
 
 
     if params['show_text']:
     if params['show_text']:
-        #string+=f'*[{current_msg_id}]:*'+orig_string #Debug, looks like there is a delay in "current_msg_id" being updated when switching characters (updates after new message sent). Can't find the source. "shared.character" is updating properly.
-        string+=orig_string
+        #string += f'*[{current_msg_id}]:*'+orig_string #Debug, looks like there is a delay in "current_msg_id" being updated when switching characters (updates after new message sent). Can't find the source. "shared.character" is updating properly.
+        string += orig_string
 
 
     return string
     return string