script.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. import time
  2. from pathlib import Path
  3. import gradio as gr
  4. import torch
  5. import modules.chat as chat
  6. import modules.shared as shared
  7. torch._C._jit_set_profiling_mode(False)
  8. params = {
  9. 'activate': True,
  10. 'speaker': 'en_56',
  11. 'language': 'en',
  12. 'model_id': 'v3_en',
  13. 'sample_rate': 48000,
  14. 'device': 'cpu',
  15. 'show_text': False,
  16. 'autoplay': True,
  17. 'voice_pitch': 'medium',
  18. 'voice_speed': 'medium',
  19. }
  20. current_params = params.copy()
  21. voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
  22. voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
  23. voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
  24. # Used for making text xml compatible, needed for voice pitch and speed control
  25. table = str.maketrans({
  26. "<": "&lt;",
  27. ">": "&gt;",
  28. "&": "&amp;",
  29. "'": "&apos;",
  30. '"': "&quot;",
  31. })
  32. def xmlesc(txt):
  33. return txt.translate(table)
  34. def load_model():
  35. model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id'])
  36. model.to(params['device'])
  37. return model
  38. model = load_model()
  39. def remove_surrounded_chars(string):
  40. new_string = ""
  41. in_star = False
  42. for char in string:
  43. if char == '*':
  44. in_star = not in_star
  45. elif not in_star:
  46. new_string += char
  47. return new_string
  48. def remove_tts_from_history(name1, name2):
  49. for i, entry in enumerate(shared.history['internal']):
  50. shared.history['visible'][i] = [shared.history['visible'][i][0], entry[1]]
  51. return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character)
  52. def toggle_text_in_history(name1, name2):
  53. for i, entry in enumerate(shared.history['visible']):
  54. visible_reply = entry[1]
  55. if visible_reply.startswith('<audio'):
  56. if params['show_text']:
  57. reply = shared.history['internal'][i][1]
  58. shared.history['visible'][i] = [shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"]
  59. else:
  60. shared.history['visible'][i] = [shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"]
  61. return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character)
  62. def input_modifier(string):
  63. """
  64. This function is applied to your text inputs before
  65. they are fed into the model.
  66. """
  67. # Remove autoplay from the last reply
  68. if (shared.args.chat or shared.args.cai_chat) and len(shared.history['internal']) > 0:
  69. shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')]
  70. shared.processing_message = "*Is recording a voice message...*"
  71. return string
  72. def output_modifier(string):
  73. """
  74. This function is applied to the model outputs.
  75. """
  76. global model, current_params
  77. for i in params:
  78. if params[i] != current_params[i]:
  79. model = load_model()
  80. current_params = params.copy()
  81. break
  82. if params['activate'] == False:
  83. return string
  84. original_string = string
  85. string = remove_surrounded_chars(string)
  86. string = string.replace('"', '')
  87. string = string.replace('“', '')
  88. string = string.replace('\n', ' ')
  89. string = string.strip()
  90. if string == '':
  91. string = '*Empty reply, try regenerating*'
  92. else:
  93. output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{int(time.time())}.wav')
  94. prosody = '<prosody rate="{}" pitch="{}">'.format(params['voice_speed'], params['voice_pitch'])
  95. silero_input = f'<speak>{prosody}{xmlesc(string)}</prosody></speak>'
  96. model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
  97. autoplay = 'autoplay' if params['autoplay'] else ''
  98. string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
  99. if params['show_text']:
  100. string += f'\n\n{original_string}'
  101. shared.processing_message = "*Is typing...*"
  102. return string
  103. def bot_prefix_modifier(string):
  104. """
  105. This function is only applied in chat mode. It modifies
  106. the prefix text for the Bot and can be used to bias its
  107. behavior.
  108. """
  109. return string
  110. def ui():
  111. # Gradio elements
  112. with gr.Accordion("Silero TTS"):
  113. with gr.Row():
  114. activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
  115. autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
  116. show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
  117. voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice')
  118. with gr.Row():
  119. v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
  120. v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
  121. with gr.Row():
  122. convert = gr.Button('Permanently replace audios with the message texts')
  123. convert_cancel = gr.Button('Cancel', visible=False)
  124. convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
  125. # Convert history with confirmation
  126. convert_arr = [convert_confirm, convert, convert_cancel]
  127. convert.click(lambda :[gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
  128. convert_confirm.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
  129. convert_confirm.click(remove_tts_from_history, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'])
  130. convert_confirm.click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
  131. convert_cancel.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
  132. # Toggle message text in history
  133. show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
  134. show_text.change(toggle_text_in_history, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'])
  135. show_text.change(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
  136. # Event functions to update the parameters in the backend
  137. activate.change(lambda x: params.update({"activate": x}), activate, None)
  138. autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
  139. voice.change(lambda x: params.update({"speaker": x}), voice, None)
  140. v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None)
  141. v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None)