Skip to content

Commit 48aa528

Browse files
committed
use Gradio microphone input instead
1 parent 3b41459 commit 48aa528

File tree

3 files changed

+17
-50
lines changed

3 files changed

+17
-50
lines changed

extensions/whisper_stt/README.md

Lines changed: 0 additions & 39 deletions
This file was deleted.
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
git+https://github.com/Uberi/speech_recognition.git@010382b
2-
PyAudio
32
openai-whisper
43
soundfile
54
ffmpeg

extensions/whisper_stt/script.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,24 @@
77
}
88

99

10-
def do_stt():
10+
def do_stt(audio, text_state=""):
1111
transcription = ""
1212
r = sr.Recognizer()
13-
with sr.Microphone() as source:
14-
r.adjust_for_ambient_noise(source, 0.2)
15-
audio = r.listen(source)
13+
14+
# Convert to AudioData
15+
audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
1616

1717
try:
18-
transcription = r.recognize_whisper(audio, language="english", model="base.en")
18+
transcription = r.recognize_whisper(audio_data, language="english", model="base.en")
1919
except sr.UnknownValueError:
2020
print("Whisper could not understand audio")
2121
except sr.RequestError as e:
2222
print("Could not request results from Whisper", e)
2323

2424
input_hijack.update({"state": True, "value": [transcription, transcription]})
25-
return transcription
25+
26+
text_state += transcription + " "
27+
return text_state, text_state
2628

2729

2830
def update_hijack(val):
@@ -31,7 +33,12 @@ def update_hijack(val):
3133

3234

3335
def ui():
34-
speech_button = gr.Button(value="🎙️")
35-
output_transcription = gr.Textbox(label="STT-Input", placeholder="Speech Preview. Click \"Generate\" to send", interactive=True)
36-
output_transcription.change(fn=update_hijack, inputs=[output_transcription])
37-
speech_button.click(do_stt, outputs=[output_transcription])
36+
tr_state = gr.State(value="")
37+
output_transcription = gr.Textbox(label="STT-Input",
38+
placeholder="Speech Preview. Click \"Generate\" to send",
39+
interactive=True)
40+
output_transcription.change(fn=update_hijack, inputs=[output_transcription], outputs=[tr_state])
41+
with gr.Row():
42+
audio = gr.Audio(source="microphone")
43+
transcribe_button = gr.Button(value="Transcribe")
44+
transcribe_button.click(do_stt, inputs=[audio, tr_state], outputs=[output_transcription, tr_state])

0 commit comments

Comments
 (0)