script.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. from pathlib import Path
  2. import gradio as gr
  3. import torch
  4. import modules.shared as shared
  5. import simpleaudio as sa
  6. torch._C._jit_set_profiling_mode(False)
  7. params = {
  8. 'activate': True,
  9. 'speaker': 'en_5',
  10. 'language': 'en',
  11. 'model_id': 'v3_en',
  12. 'sample_rate': 48000,
  13. 'device': 'cpu',
  14. 'max_wavs': -1,
  15. 'autoplay': True,
  16. 'show_text': True,
  17. }
  18. current_params = params.copy()
  19. voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
  20. wav_idx = 0
  21. #Used for making text xml compatible, needed for voice pitch and speed control
  22. table = str.maketrans({
  23. "<": "&lt;",
  24. ">": "&gt;",
  25. "&": "&amp;",
  26. "'": "&apos;",
  27. '"': "&quot;",
  28. })
  29. def xmlesc(txt):
  30. return txt.translate(table)
  31. def load_model():
  32. model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id'])
  33. model.to(params['device'])
  34. return model
  35. model = load_model()
  36. def remove_surrounded_chars(string):
  37. new_string = ""
  38. in_star = False
  39. for char in string:
  40. if char == '*':
  41. in_star = not in_star
  42. elif not in_star:
  43. new_string += char
  44. return new_string
  45. def input_modifier(string):
  46. """
  47. This function is applied to your text inputs before
  48. they are fed into the model.
  49. """
  50. return string
  51. def output_modifier(string):
  52. """
  53. This function is applied to the model outputs.
  54. """
  55. global wav_idx, model, current_params
  56. for i in params:
  57. if params[i] != current_params[i]:
  58. model = load_model()
  59. current_params = params.copy()
  60. break
  61. if params['activate'] == False:
  62. return string
  63. orig_string = string
  64. string = remove_surrounded_chars(string)
  65. string = string.replace('"', '')
  66. string = string.replace('“', '')
  67. string = string.replace('\n', ' ')
  68. string = string.strip()
  69. auto_playable=True
  70. if string == '':
  71. string = 'empty reply, try regenerating'
  72. auto_playable=False
  73. #x-slow, slow, medium, fast, x-fast
  74. #x-low, low, medium, high, x-high
  75. prosody='<prosody rate="medium" pitch="medium">'
  76. string ='<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
  77. output_file = Path(f'extensions/silero_tts/outputs/{wav_idx:06d}.wav')
  78. autoplay_str = ''
  79. if not shared.still_streaming:
  80. model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
  81. #diabled until autoplay doesn't run on previous messages
  82. #autoplay = 'autoplay' if (params['autoplay'] and auto_playable) else ''
  83. string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay_str}></audio>\n\n'
  84. else:
  85. #placeholder so text doesnt shift around so much
  86. string =f'<audio controls {autoplay_str}></audio>\n\n'
  87. #reset if too many wavs. set max to -1 for unlimited.
  88. if wav_idx < params['max_wavs'] or params['max_wavs'] < 0:
  89. #only increment if starting a new stream, else replace during streaming.
  90. if not shared.still_streaming:
  91. wav_idx += 1
  92. else:
  93. wav_idx = 0
  94. if params['show_text']:
  95. string+=orig_string
  96. if params['autoplay'] == True and auto_playable and not shared.still_streaming:
  97. stop_autoplay()
  98. wave_obj = sa.WaveObject.from_wave_file(output_file.as_posix())
  99. wave_obj.play()
  100. return string
  101. def bot_prefix_modifier(string):
  102. """
  103. This function is only applied in chat mode. It modifies
  104. the prefix text for the Bot and can be used to bias its
  105. behavior.
  106. """
  107. return string
  108. def stop_autoplay():
  109. sa.stop_all()
  110. def ui():
  111. # Gradio elements
  112. activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
  113. show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
  114. autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
  115. stop_audio = gr.Button("Stop Auto-Play")
  116. voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice')
  117. # Event functions to update the parameters in the backend
  118. activate.change(lambda x: params.update({"activate": x}), activate, None)
  119. autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
  120. show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
  121. stop_audio.click(stop_autoplay)
  122. voice.change(lambda x: params.update({"speaker": x}), voice, None)