script.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import gradio as gr
  2. import speech_recognition as sr
  3. from modules import shared
  4. input_hijack = {
  5. 'state': False,
  6. 'value': ["", ""]
  7. }
  8. def do_stt(audio):
  9. transcription = ""
  10. r = sr.Recognizer()
  11. # Convert to AudioData
  12. audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
  13. try:
  14. transcription = r.recognize_whisper(audio_data, language="english", model="base.en")
  15. except sr.UnknownValueError:
  16. print("Whisper could not understand audio")
  17. except sr.RequestError as e:
  18. print("Could not request results from Whisper", e)
  19. return transcription
  20. def auto_transcribe(audio, auto_submit):
  21. if audio is None:
  22. return "", ""
  23. transcription = do_stt(audio)
  24. if auto_submit:
  25. input_hijack.update({"state": True, "value": [transcription, transcription]})
  26. return transcription, None
  27. def ui():
  28. with gr.Row():
  29. audio = gr.Audio(source="microphone")
  30. auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=True)
  31. audio.change(fn=auto_transcribe, inputs=[audio, auto_submit], outputs=[shared.gradio['textbox'], audio])
  32. audio.change(None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")