Skip to content

Commit 3d4508f

Browse files
committed
feat: fix chunk spliting and chat message
1 parent 11df7d8 commit 3d4508f

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

realtime_agent/agent.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from agora_realtime_ai_api.rtc import Channel, ChatMessage, RtcEngine, RtcOptions
1212

1313
from .logger import setup_logger
14-
from .realtime.struct import InputAudioBufferCommitted, InputAudioBufferSpeechStarted, InputAudioBufferSpeechStopped, ItemCreated, RateLimitsUpdated, ResponseAudioDelta, ResponseAudioDone, ResponseAudioTranscriptDelta, ResponseAudioTranscriptDone, ResponseContentPartAdded, ResponseContentPartDone, ResponseCreated, ResponseDone, ResponseOutputItemAdded, ResponseOutputItemDone, ServerVADUpdateParams, SessionUpdate, SessionUpdateParams, SessionUpdated, Voices, to_json
14+
from .realtime.struct import InputAudioBufferCommitted, InputAudioBufferSpeechStarted, InputAudioBufferSpeechStopped, InputAudioTranscription, ItemCreated, ItemInputAudioTranscriptionCompleted, RateLimitsUpdated, ResponseAudioDelta, ResponseAudioDone, ResponseAudioTranscriptDelta, ResponseAudioTranscriptDone, ResponseContentPartAdded, ResponseContentPartDone, ResponseCreated, ResponseDone, ResponseOutputItemAdded, ResponseOutputItemDone, ServerVADUpdateParams, SessionUpdate, SessionUpdateParams, SessionUpdated, Voices, to_json
1515
from .realtime.connection import RealtimeApiConnection
1616
from .tools import ClientToolCallResponse, ToolContext
1717
from .utils import PCMWriter
@@ -102,6 +102,7 @@ async def setup_and_run_agent(
102102
modalities=["text", "audio"],
103103
temperature=0.8,
104104
max_response_output_tokens="inf",
105+
input_audio_transcription=InputAudioTranscription(model="whisper-1")
105106
)
106107
)
107108
)
@@ -242,7 +243,7 @@ async def _process_model_messages(self) -> None:
242243
# logger.info("Received audio message")
243244
self.audio_queue.put_nowait(base64.b64decode(message.delta))
244245
# loop.call_soon_threadsafe(self.audio_queue.put_nowait, base64.b64decode(message.delta))
245-
logger.info(f"TMS:ResponseAudioDelta: response_id:{message.response_id},item_id: {message.item_id}")
246+
logger.debug(f"TMS:ResponseAudioDelta: response_id:{message.response_id},item_id: {message.item_id}")
246247
case ResponseAudioTranscriptDelta():
247248
# logger.info(f"Received text message {message=}")
248249
asyncio.create_task(self.channel.chat.send_message(
@@ -267,6 +268,13 @@ async def _process_model_messages(self) -> None:
267268
case InputAudioBufferSpeechStopped():
268269
logger.info(f"TMS:InputAudioBufferSpeechStopped: item_id: {message.item_id}")
269270
pass
271+
case ItemInputAudioTranscriptionCompleted():
272+
logger.info(f"ItemInputAudioTranscriptionCompleted: {message=}")
273+
asyncio.create_task(self.channel.chat.send_message(
274+
ChatMessage(
275+
message=to_json(message), msg_id=message.item_id
276+
)
277+
))
270278
# InputAudioBufferCommitted
271279
case InputAudioBufferCommitted():
272280
pass

0 commit comments

Comments
 (0)