EliasVincent 2 лет назад
Родитель
Сommit
7a03d0bda3
1 измененных файлов с 4 добавлено и 8 удалено
  1. 4 8
      extensions/whisper_stt/script.py

+ 4 - 8
extensions/whisper_stt/script.py

@@ -1,6 +1,6 @@
 import gradio as gr
 import gradio as gr
 import speech_recognition as sr
 import speech_recognition as sr
-import modules.shared as shared
+
 
 
 input_hijack = {
 input_hijack = {
     'state': False,
     'state': False,
@@ -16,25 +16,21 @@ def do_stt():
     transcription = ""
     transcription = ""
     r = sr.Recognizer()
     r = sr.Recognizer()
     with sr.Microphone() as source:
     with sr.Microphone() as source:
-        print("Say something!")
         r.adjust_for_ambient_noise(source)
         r.adjust_for_ambient_noise(source)
         audio = r.listen(source)
         audio = r.listen(source)
 
 
-    # recognize speech using whisper
     try:
     try:
         transcription = r.recognize_whisper(audio, language="english", model="tiny.en")
         transcription = r.recognize_whisper(audio, language="english", model="tiny.en")
-        print("Whisper thinks you said " + transcription)
     except sr.UnknownValueError:
     except sr.UnknownValueError:
         print("Whisper could not understand audio")
         print("Whisper could not understand audio")
     except sr.RequestError as e:
     except sr.RequestError as e:
-        print("Could not request results from Whisper")
+        print("Could not request results from Whisper", e)
 
 
-    # input_modifier(transcription)
     input_hijack.update({"state": True, "value": [transcription, transcription]})
     input_hijack.update({"state": True, "value": [transcription, transcription]})
     return transcription
     return transcription
 
 
 
 
 def ui():
 def ui():
-    speech_button = gr.Button(value="STT")
-    output_transcription = gr.Textbox(label="Speech Preview")
+    speech_button = gr.Button(value="🎙️")
+    output_transcription = gr.Textbox(label="STT-Preview", placeholder="Speech Preview. Click \"Generate\" to send")
     speech_button.click(do_stt, outputs=[output_transcription])
     speech_button.click(do_stt, outputs=[output_transcription])