script.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import gradio as gr
  2. import speech_recognition as sr
  3. input_hijack = {
  4. 'state': False,
  5. 'value': ["", ""]
  6. }
  7. def do_stt():
  8. transcription = ""
  9. r = sr.Recognizer()
  10. with sr.Microphone() as source:
  11. r.adjust_for_ambient_noise(source, 0.2)
  12. audio = r.listen(source)
  13. try:
  14. transcription = r.recognize_whisper(audio, language="english", model="base.en")
  15. except sr.UnknownValueError:
  16. print("Whisper could not understand audio")
  17. except sr.RequestError as e:
  18. print("Could not request results from Whisper", e)
  19. input_hijack.update({"state": True, "value": [transcription, transcription]})
  20. return transcription
  21. def update_hijack(val):
  22. input_hijack.update({"state": True, "value": [val, val]})
  23. return val
  24. def ui():
  25. speech_button = gr.Button(value="🎙️")
  26. output_transcription = gr.Textbox(label="STT-Input", placeholder="Speech Preview. Click \"Generate\" to send", interactive=True)
  27. output_transcription.change(fn=update_hijack, inputs=[output_transcription])
  28. speech_button.click(do_stt, outputs=[output_transcription])