Skip to content

Commit eb3bef0

Browse files
xitzhangXiting ZhangCopilot
authored
[Voice Live]Update models related to voice and conversation (#42718)
* Update models related to voice and conversation * Update sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_types.py Co-authored-by: Copilot <[email protected]> * use pyproject and deprecate setup.py * update change log --------- Co-authored-by: Xiting Zhang <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 4eeba58 commit eb3bef0

File tree

12 files changed

+1916
-1080
lines changed

12 files changed

+1916
-1080
lines changed
Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
# Release History
22

3-
## 1.0.0b1 (Unreleased)
3+
## 1.0.0b1 (2025-08-28)
44

55
### Features Added
66

77
- Added WebSocket connection support through `connect()`.
8-
- Added `WebsocketConnectionOptions` to configure WebSocket connections.
98
- Added `VoiceLiveConnection` for managing WebSocket connections.
9+
- Added models of Voice Live preview.
1010
- Added WebSocket-based examples in the samples directory.
1111

1212
### Other Changes
1313

14-
- Added `websockets` extra in `setup.py` to install WebSocket dependencies.
1514
- Initial preview release.

sdk/ai/azure-ai-voicelive/apiview-properties.json

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,21 @@
33
"CrossLanguageDefinitionId": {
44
"azure.ai.voicelive.models.AgentConfig": "VoiceLive.AgentConfig",
55
"azure.ai.voicelive.models.Animation": "VoiceLive.Animation",
6+
"azure.ai.voicelive.models.ConversationRequestItem": "VoiceLive.ConversationRequestItem",
7+
"azure.ai.voicelive.models.MessageItem": "VoiceLive.MessageItem",
8+
"azure.ai.voicelive.models.AssistantMessageItem": "VoiceLive.AssistantMessageItem",
69
"azure.ai.voicelive.models.AudioEchoCancellation": "VoiceLive.AudioEchoCancellation",
710
"azure.ai.voicelive.models.AudioInputTranscriptionSettings": "VoiceLive.AudioInputTranscriptionSettings",
811
"azure.ai.voicelive.models.AudioNoiseReduction": "VoiceLive.AudioNoiseReduction",
912
"azure.ai.voicelive.models.AvatarConfig": "VoiceLive.AvatarConfig",
13+
"azure.ai.voicelive.models.AzureVoice": "VoiceLive.AzureVoice",
1014
"azure.ai.voicelive.models.AzureCustomVoice": "VoiceLive.AzureCustomVoice",
11-
"azure.ai.voicelive.models.AzurePersonalVoice": "VoiceLive.AzurePersonalVoice",
1215
"azure.ai.voicelive.models.TurnDetection": "VoiceLive.TurnDetection",
16+
"azure.ai.voicelive.models.AzureMultilingualSemanticVad": "VoiceLive.AzureMultilingualSemanticVad",
17+
"azure.ai.voicelive.models.AzurePersonalVoice": "VoiceLive.AzurePersonalVoice",
18+
"azure.ai.voicelive.models.EOUDetection": "VoiceLive.EOUDetection",
19+
"azure.ai.voicelive.models.AzureSemanticDetection": "VoiceLive.AzureSemanticDetection",
20+
"azure.ai.voicelive.models.AzureSemanticDetectionMultilingual": "VoiceLive.AzureSemanticDetectionMultilingual",
1321
"azure.ai.voicelive.models.AzureSemanticVad": "VoiceLive.AzureSemanticVad",
1422
"azure.ai.voicelive.models.AzureStandardVoice": "VoiceLive.AzureStandardVoice",
1523
"azure.ai.voicelive.models.ClientEvent": "VoiceLive.ClientEvent",
@@ -30,38 +38,37 @@
3038
"azure.ai.voicelive.models.ClientEventSessionAvatarConnect": "VoiceLive.ClientEventSessionAvatarConnect",
3139
"azure.ai.voicelive.models.ClientEventSessionUpdate": "VoiceLive.ClientEventSessionUpdate",
3240
"azure.ai.voicelive.models.ContentPart": "VoiceLive.ContentPart",
33-
"azure.ai.voicelive.models.ConversationItemWithReference": "VoiceLive.ConversationItemWithReference",
34-
"azure.ai.voicelive.models.ConversationItemWithReferenceContent": "VoiceLive.ConversationItemWithReference.content.anonymous",
35-
"azure.ai.voicelive.models.ConversationRequestItem": "VoiceLive.ConversationRequestItem",
36-
"azure.ai.voicelive.models.ConversationResponseItem": "VoiceLive.ConversationResponseItem",
41+
"azure.ai.voicelive.models.FunctionCallItem": "VoiceLive.FunctionCallItem",
42+
"azure.ai.voicelive.models.FunctionCallOutputItem": "VoiceLive.FunctionCallOutputItem",
3743
"azure.ai.voicelive.models.Tool": "VoiceLive.Tool",
3844
"azure.ai.voicelive.models.FunctionTool": "VoiceLive.FunctionTool",
3945
"azure.ai.voicelive.models.IceServer": "VoiceLive.IceServer",
4046
"azure.ai.voicelive.models.InputAudio": "VoiceLive.InputAudio",
47+
"azure.ai.voicelive.models.UserContentPart": "VoiceLive.UserContentPart",
48+
"azure.ai.voicelive.models.InputAudioContentPart": "VoiceLive.InputAudioContentPart",
49+
"azure.ai.voicelive.models.InputTextContentPart": "VoiceLive.InputTextContentPart",
50+
"azure.ai.voicelive.models.InputTokenDetails": "VoiceLive.InputTokenDetails",
4151
"azure.ai.voicelive.models.LogProbProperties": "VoiceLive.LogProbProperties",
4252
"azure.ai.voicelive.models.NoTurnDetection": "VoiceLive.NoTurnDetection",
43-
"azure.ai.voicelive.models.RequestMessageItem": "VoiceLive.RequestMessageItem",
44-
"azure.ai.voicelive.models.RequestAssistantMessageItem": "VoiceLive.RequestAssistantMessageItem",
53+
"azure.ai.voicelive.models.OpenAIVoice": "VoiceLive.OpenAIVoice",
54+
"azure.ai.voicelive.models.OutputTextContentPart": "VoiceLive.OutputTextContentPart",
55+
"azure.ai.voicelive.models.OutputTokenDetails": "VoiceLive.OutputTokenDetails",
4556
"azure.ai.voicelive.models.RequestAudioContentPart": "VoiceLive.RequestAudioContentPart",
46-
"azure.ai.voicelive.models.RequestFunctionCallItem": "VoiceLive.RequestFunctionCallItem",
47-
"azure.ai.voicelive.models.RequestFunctionCallOutputItem": "VoiceLive.RequestFunctionCallOutputItem",
4857
"azure.ai.voicelive.models.RequestSession": "VoiceLive.RequestSession",
49-
"azure.ai.voicelive.models.RequestSystemMessageItem": "VoiceLive.RequestSystemMessageItem",
5058
"azure.ai.voicelive.models.RequestTextContentPart": "VoiceLive.RequestTextContentPart",
51-
"azure.ai.voicelive.models.RequestUserMessageItem": "VoiceLive.RequestUserMessageItem",
5259
"azure.ai.voicelive.models.Response": "VoiceLive.Response",
5360
"azure.ai.voicelive.models.ResponseAudioContentPart": "VoiceLive.ResponseAudioContentPart",
61+
"azure.ai.voicelive.models.ResponseStatusDetails": "VoiceLive.ResponseStatusDetails",
62+
"azure.ai.voicelive.models.ResponseCancelledDetails": "VoiceLive.ResponseCancelledDetails",
5463
"azure.ai.voicelive.models.ResponseCreateParams": "VoiceLive.ResponseCreateParams",
64+
"azure.ai.voicelive.models.ResponseFailedDetails": "VoiceLive.ResponseFailedDetails",
65+
"azure.ai.voicelive.models.ResponseItem": "VoiceLive.ResponseItem",
5566
"azure.ai.voicelive.models.ResponseFunctionCallItem": "VoiceLive.ResponseFunctionCallItem",
5667
"azure.ai.voicelive.models.ResponseFunctionCallOutputItem": "VoiceLive.ResponseFunctionCallOutputItem",
68+
"azure.ai.voicelive.models.ResponseIncompleteDetails": "VoiceLive.ResponseIncompleteDetails",
5769
"azure.ai.voicelive.models.ResponseMessageItem": "VoiceLive.ResponseMessageItem",
5870
"azure.ai.voicelive.models.ResponseSession": "VoiceLive.ResponseSession",
59-
"azure.ai.voicelive.models.ResponseStatusDetails": "VoiceLive.Response.status_details.anonymous",
60-
"azure.ai.voicelive.models.ResponseStatusDetailsError": "VoiceLive.Response.status_details.error.anonymous",
6171
"azure.ai.voicelive.models.ResponseTextContentPart": "VoiceLive.ResponseTextContentPart",
62-
"azure.ai.voicelive.models.ResponseUsage": "VoiceLive.Response.usage.anonymous",
63-
"azure.ai.voicelive.models.ResponseUsageInputTokenDetails": "VoiceLive.Response.usage.input_token_details.anonymous",
64-
"azure.ai.voicelive.models.ResponseUsageOutputTokenDetails": "VoiceLive.Response.usage.output_token_details.anonymous",
6572
"azure.ai.voicelive.models.ServerEvent": "VoiceLive.ServerEvent",
6673
"azure.ai.voicelive.models.ServerEventConversationItemCreated": "VoiceLive.ServerEventConversationItemCreated",
6774
"azure.ai.voicelive.models.ServerEventConversationItemDeleted": "VoiceLive.ServerEventConversationItemDeleted",
@@ -71,7 +78,7 @@
7178
"azure.ai.voicelive.models.ServerEventConversationItemRetrieved": "VoiceLive.ServerEventConversationItemRetrieved",
7279
"azure.ai.voicelive.models.ServerEventConversationItemTruncated": "VoiceLive.ServerEventConversationItemTruncated",
7380
"azure.ai.voicelive.models.ServerEventError": "VoiceLive.ServerEventError",
74-
"azure.ai.voicelive.models.ServerEventErrorError": "VoiceLive.ServerEventError.error.anonymous",
81+
"azure.ai.voicelive.models.ServerEventErrorDetails": "VoiceLive.ServerEventError.error.anonymous",
7582
"azure.ai.voicelive.models.ServerEventInputAudioBufferCleared": "VoiceLive.ServerEventInputAudioBufferCleared",
7683
"azure.ai.voicelive.models.ServerEventInputAudioBufferCommitted": "VoiceLive.ServerEventInputAudioBufferCommitted",
7784
"azure.ai.voicelive.models.ServerEventInputAudioBufferSpeechStarted": "VoiceLive.ServerEventInputAudioBufferSpeechStarted",
@@ -92,18 +99,22 @@
9299
"azure.ai.voicelive.models.ServerEventSessionCreated": "VoiceLive.ServerEventSessionCreated",
93100
"azure.ai.voicelive.models.ServerEventSessionUpdated": "VoiceLive.ServerEventSessionUpdated",
94101
"azure.ai.voicelive.models.ServerVad": "VoiceLive.ServerVad",
102+
"azure.ai.voicelive.models.SystemMessageItem": "VoiceLive.SystemMessageItem",
95103
"azure.ai.voicelive.models.ToolChoiceObject": "VoiceLive.ToolChoiceObject",
96104
"azure.ai.voicelive.models.ToolChoiceFunctionObject": "VoiceLive.ToolChoiceFunctionObject",
97105
"azure.ai.voicelive.models.ToolChoiceFunctionObjectFunction": "VoiceLive.ToolChoiceFunctionObject.function.anonymous",
106+
"azure.ai.voicelive.models.Usage": "VoiceLive.Usage",
107+
"azure.ai.voicelive.models.UserMessageItem": "VoiceLive.UserMessageItem",
98108
"azure.ai.voicelive.models.VideoCrop": "VoiceLive.VideoCrop",
99109
"azure.ai.voicelive.models.VideoParams": "VoiceLive.VideoParams",
100110
"azure.ai.voicelive.models.VideoResolution": "VoiceLive.VideoResolution",
101111
"azure.ai.voicelive.models.VoiceLiveErrorDetails": "VoiceLive.VoiceLiveErrorDetails",
102112
"azure.ai.voicelive.models.ServerEventType": "VoiceLive.ServerEventType",
103-
"azure.ai.voicelive.models.ContentPartType": "VoiceLive.ContentPartType",
104113
"azure.ai.voicelive.models.ItemType": "VoiceLive.ItemType",
105-
"azure.ai.voicelive.models.ItemStatus": "VoiceLive.ItemStatus",
114+
"azure.ai.voicelive.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus",
106115
"azure.ai.voicelive.models.MessageRole": "VoiceLive.MessageRole",
116+
"azure.ai.voicelive.models.ContentPartType": "VoiceLive.ContentPartType",
117+
"azure.ai.voicelive.models.ResponseStatus": "VoiceLive.ResponseStatus",
107118
"azure.ai.voicelive.models.OAIVoice": "VoiceLive.OAIVoice",
108119
"azure.ai.voicelive.models.Phi4mmVoice": "VoiceLive.Phi4mmVoice",
109120
"azure.ai.voicelive.models.Modality": "VoiceLive.Modality",
@@ -112,6 +123,7 @@
112123
"azure.ai.voicelive.models.AudioTimestampType": "VoiceLive.AudioTimestampType",
113124
"azure.ai.voicelive.models.ToolType": "VoiceLive.ToolType",
114125
"azure.ai.voicelive.models.ToolChoiceLiteral": "VoiceLive.ToolChoiceLiteral",
115-
"azure.ai.voicelive.models.ClientEventType": "VoiceLive.ClientEventType"
126+
"azure.ai.voicelive.models.ClientEventType": "VoiceLive.ClientEventType",
127+
"azure.ai.voicelive.models.ItemParamStatus": "VoiceLive.ItemParamStatus"
116128
}
117129
}

sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_patch.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import logging
1313
from contextlib import AbstractContextManager
1414
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
15+
1516
try: # Python 3.11+
1617
from typing import NotRequired # type: ignore[attr-defined]
1718
except Exception: # Python <=3.10
@@ -41,9 +42,11 @@
4142
try:
4243
from websockets.typing import Subprotocol as WSSubprotocol # runtime if available
4344
except Exception:
45+
4446
class WSSubprotocol(str): # fallback, keeps runtime simple
4547
pass
4648

49+
4750
__all__: List[str] = [
4851
"connect",
4952
"WebsocketConnectionOptions",
@@ -61,6 +64,7 @@ class WSSubprotocol(str): # fallback, keeps runtime simple
6164

6265
log = logging.getLogger(__name__)
6366

67+
6468
def _json_default(o: Any) -> Any:
6569
"""
6670
Fallback JSON serializer for generated SDK models and other custom objects.
@@ -89,13 +93,14 @@ def _json_default(o: Any) -> Any:
8993
return {k: v for k, v in vars(o).items() if not k.startswith("_")}
9094
raise TypeError(f"{type(o).__name__} is not JSON serializable")
9195

96+
9297
class WebsocketConnectionOptions(TypedDict, total=False):
9398
"""
9499
Advanced WebSocket connection options for the synchronous VoiceLive API.
95100
96101
These options are passed directly to :func:`websockets.sync.client.connect`
97-
and control low-level WebSocket behavior.
98-
All keys are optional — if omitted, the `websockets` library's defaults apply.
102+
and control low-level WebSocket behavior.
103+
All keys are optional — if omitted, the `websockets` library's defaults apply.
99104
Unsupported or unknown keys are ignored.
100105
101106
:keyword extensions: WebSocket extensions to negotiate with the server.
@@ -649,7 +654,7 @@ def __enter__(self) -> VoiceLiveConnection:
649654
def __exit__(self, exc_type, exc, exc_tb) -> None:
650655
"""
651656
Close the connection when exiting the context.
652-
657+
653658
:param exc_type: Exception type if an error occurred.
654659
:type exc_type: type | None
655660
:param exc: Exception instance if an error occurred.
@@ -664,7 +669,7 @@ def __exit__(self, exc_type, exc, exc_tb) -> None:
664669

665670
def _get_auth_headers(self) -> Dict[str, str]:
666671
"""Get authentication headers for WebSocket connection.
667-
672+
668673
:return: A dictionary containing authentication headers.
669674
:rtype: dict[str, str]
670675
"""
@@ -740,7 +745,7 @@ def connect(
740745
:paramtype connection_options: ~azure.ai.voicelive.WebsocketConnectionOptions or None
741746
:return: A context manager that yields a connected :class:`~azure.ai.voicelive.VoiceLiveConnection`.
742747
:rtype: contextlib.AbstractContextManager[~azure.ai.voicelive.VoiceLiveConnection]
743-
748+
744749
.. note::
745750
Additional keyword arguments can be passed and will be forwarded to the underlying connection.
746751
"""

sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_types.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,9 @@
66
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
77
# --------------------------------------------------------------------------
88

9-
from typing import TYPE_CHECKING, Literal, Union
9+
from typing import TYPE_CHECKING, Union
1010

1111
if TYPE_CHECKING:
1212
from . import models as _models
13-
Voice = Union[
14-
str,
15-
"_models.OAIVoice",
16-
"_models.AzureStandardVoice",
17-
"_models.AzureCustomVoice",
18-
"_models.AzurePersonalVoice",
19-
str,
20-
"_models.Phi4mmVoice",
21-
]
13+
Voice = Union[str, "_models.OAIVoice", "_models.OpenAIVoice", "_models.AzureVoice", "_models.Phi4mmVoice"]
2214
ToolChoice = Union[str, "_models.ToolChoiceLiteral", "_models.ToolChoiceObject"]
23-
ReasoningEffort = Literal["minimal", "low", "medium", "high"]

sdk/ai/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import logging
1212
from contextlib import AbstractAsyncContextManager
1313
from urllib.parse import urlparse, urlunparse, urlencode, parse_qs
14+
1415
# pylint: disable=ungrouped-imports
1516
try: # Python 3.11+
1617
from typing import NotRequired # type: ignore[attr-defined]
@@ -77,6 +78,7 @@ def _json_default(o: Any) -> Any:
7778
return {k: v for k, v in vars(o).items() if not k.startswith("_")}
7879
raise TypeError(f"{type(o).__name__} is not JSON serializable")
7980

81+
8082
class SessionResource:
8183
"""Resource for session management."""
8284

@@ -493,9 +495,7 @@ async def send(self, event: Union[Mapping[str, Any], ClientEvent]) -> None:
493495
payload = json.dumps(event, default=_json_default)
494496

495497
# Ensure we pass a str to send_str
496-
data: str = (
497-
payload if isinstance(payload, str) else json.dumps(payload, default=_json_default)
498-
)
498+
data: str = payload if isinstance(payload, str) else json.dumps(payload, default=_json_default)
499499

500500
await self._connection.send_str(data)
501501
except (TypeError, ValueError, aiohttp.ClientError, RuntimeError) as e:
@@ -645,9 +645,7 @@ async def __aenter__(self) -> VoiceLiveConnection:
645645

646646
session = aiohttp.ClientSession()
647647
try:
648-
connection_obj = await session.ws_connect(
649-
str(url), headers=headers, **self.__connection_options
650-
)
648+
connection_obj = await session.ws_connect(str(url), headers=headers, **self.__connection_options)
651649
self.__connection = VoiceLiveConnection(session, connection_obj)
652650
return self.__connection
653651
except aiohttp.ClientError as e:
@@ -746,7 +744,7 @@ def connect(
746744
:paramtype type connection_options: ~azure.ai.voicelive.aio.WebsocketConnectionOptions
747745
:return: An async context manager yielding a connected :class:`~azure.ai.voicelive.aio.VoiceLiveConnection`.
748746
:rtype: collections.abc.AsyncContextManager[~azure.ai.voicelive.aio.VoiceLiveConnection]
749-
747+
750748
.. note::
751749
Additional keyword arguments can be passed and will be forwarded to the underlying connection.
752750
"""

0 commit comments

Comments
 (0)