Skip to content

Commit 3a200f0

Browse files
committed
Live API v2
1 parent 3bb3f36 commit 3a200f0

File tree

11 files changed

+1060
-0
lines changed

11 files changed

+1060
-0
lines changed

common/api-review/ai.api.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ export class ArraySchema extends Schema {
8585
toJSON(): SchemaRequest;
8686
}
8787

88+
// @beta
89+
export interface AudioConversationController {
90+
stop: () => Promise<void>;
91+
}
92+
8893
// @public
8994
export abstract class Backend {
9095
protected constructor(type: BackendType);
@@ -787,6 +792,8 @@ export class LiveSession {
787792
// @internal
788793
constructor(webSocketHandler: WebSocketHandler, serverMessages: AsyncGenerator<unknown>);
789794
close(): Promise<void>;
795+
// @public
796+
inConversation: boolean;
790797
isClosed: boolean;
791798
receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation>;
792799
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
@@ -1031,6 +1038,14 @@ export interface SpeechConfig {
10311038
voiceConfig?: VoiceConfig;
10321039
}
10331040

1041+
// @beta
1042+
export function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
1043+
1044+
// @beta
1045+
export interface StartAudioConversationOptions {
1046+
functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
1047+
}
1048+
10341049
// @public
10351050
export interface StartChatParams extends BaseParams {
10361051
// (undocumented)

docs-devsite/_toc.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ toc:
1616
path: /docs/reference/js/ai.anyofschema.md
1717
- title: ArraySchema
1818
path: /docs/reference/js/ai.arrayschema.md
19+
- title: AudioConversationController
20+
path: /docs/reference/js/ai.audioconversationcontroller.md
1921
- title: Backend
2022
path: /docs/reference/js/ai.backend.md
2123
- title: BaseParams
@@ -160,6 +162,8 @@ toc:
160162
path: /docs/reference/js/ai.segment.md
161163
- title: SpeechConfig
162164
path: /docs/reference/js/ai.speechconfig.md
165+
- title: StartAudioConversationOptions
166+
path: /docs/reference/js/ai.startaudioconversationoptions.md
163167
- title: StartChatParams
164168
path: /docs/reference/js/ai.startchatparams.md
165169
- title: StringSchema
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
Project: /docs/reference/js/_project.yaml
2+
Book: /docs/reference/_book.yaml
3+
page_type: reference
4+
5+
{% comment %}
6+
DO NOT EDIT THIS FILE!
7+
This is generated by the JS SDK team, and any local changes will be
8+
overwritten. Changes should be made in the source code at
9+
https://github.com/firebase/firebase-js-sdk
10+
{% endcomment %}
11+
12+
# AudioConversationController interface
13+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
14+
>
15+
16+
A controller for managing an active audio conversation.
17+
18+
<b>Signature:</b>
19+
20+
```typescript
21+
export interface AudioConversationController
22+
```
23+
24+
## Properties
25+
26+
| Property | Type | Description |
27+
| --- | --- | --- |
28+
| [stop](./ai.audioconversationcontroller.md#audioconversationcontrollerstop) | () =&gt; Promise&lt;void&gt; | <b><i>(Public Preview)</i></b> Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete. |
29+
30+
## AudioConversationController.stop
31+
32+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
33+
>
34+
35+
Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete.
36+
37+
<b>Signature:</b>
38+
39+
```typescript
40+
stop: () => Promise<void>;
41+
```

docs-devsite/ai.livesession.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ export declare class LiveSession
2929

3030
| Property | Modifiers | Type | Description |
3131
| --- | --- | --- | --- |
32+
| [inConversation](./ai.livesession.md#livesessioninconversation) | | boolean | Indicates whether this Live session is being controlled by an <code>AudioConversationController</code>. |
3233
| [isClosed](./ai.livesession.md#livesessionisclosed) | | boolean | <b><i>(Public Preview)</i></b> Indicates whether this Live session is closed. |
3334

3435
## Methods
@@ -41,6 +42,16 @@ export declare class LiveSession
4142
| [sendMediaChunks(mediaChunks)](./ai.livesession.md#livesessionsendmediachunks) | | <b><i>(Public Preview)</i></b> Sends realtime input to the server. |
4243
| [sendMediaStream(mediaChunkStream)](./ai.livesession.md#livesessionsendmediastream) | | <b><i>(Public Preview)</i></b> Sends a stream of [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface)<!-- -->. |
4344

45+
## LiveSession.inConversation
46+
47+
Indicates whether this Live session is being controlled by an `AudioConversationController`<!-- -->.
48+
49+
<b>Signature:</b>
50+
51+
```typescript
52+
inConversation: boolean;
53+
```
54+
4455
## LiveSession.isClosed
4556

4657
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.

docs-devsite/ai.md

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ The Firebase AI Web SDK.
2222
| [getGenerativeModel(ai, modelParams, requestOptions)](./ai.md#getgenerativemodel_80bd839) | Returns a [GenerativeModel](./ai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. |
2323
| [getImagenModel(ai, modelParams, requestOptions)](./ai.md#getimagenmodel_e1f6645) | <b><i>(Public Preview)</i></b> Returns an [ImagenModel](./ai.imagenmodel.md#imagenmodel_class) class with methods for using Imagen.<!-- -->Only Imagen 3 models (named <code>imagen-3.0-*</code>) are supported. |
2424
| [getLiveGenerativeModel(ai, modelParams)](./ai.md#getlivegenerativemodel_f2099ac) | <b><i>(Public Preview)</i></b> Returns a [LiveGenerativeModel](./ai.livegenerativemodel.md#livegenerativemodel_class) class for real-time, bidirectional communication.<!-- -->The Live API is only supported in modern browser windows and Node &gt;<!-- -->= 22. |
25+
| <b>function(liveSession, ...)</b> |
26+
| [startAudioConversation(liveSession, options)](./ai.md#startaudioconversation_01c8e7f) | <b><i>(Public Preview)</i></b> Starts a real-time, bidirectional audio conversation with the model.<!-- -->This helper function manages the complexities of microphone access, audio recording, playback, and message sequencing.<!-- -->Important: This function must be called in response to a user gesture (for example, a button click) to comply with [browser autoplay policies](https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy)<!-- -->. |
2527

2628
## Classes
2729

@@ -53,6 +55,7 @@ The Firebase AI Web SDK.
5355
| --- | --- |
5456
| [AI](./ai.ai.md#ai_interface) | An instance of the Firebase AI SDK.<!-- -->Do not create this instance directly. Instead, use [getAI()](./ai.md#getai_a94a413)<!-- -->. |
5557
| [AIOptions](./ai.aioptions.md#aioptions_interface) | Options for initializing the AI service using [getAI()](./ai.md#getai_a94a413)<!-- -->. This allows specifying which backend to use (Vertex AI Gemini API or Gemini Developer API) and configuring its specific options (like location for Vertex AI). |
58+
| [AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface) | <b><i>(Public Preview)</i></b> A controller for managing an active audio conversation. |
5659
| [BaseParams](./ai.baseparams.md#baseparams_interface) | Base parameters for a number of methods. |
5760
| [Citation](./ai.citation.md#citation_interface) | A single citation. |
5861
| [CitationMetadata](./ai.citationmetadata.md#citationmetadata_interface) | Citation metadata that may be found on a [GenerateContentCandidate](./ai.generatecontentcandidate.md#generatecontentcandidate_interface)<!-- -->. |
@@ -112,6 +115,7 @@ The Firebase AI Web SDK.
112115
| [SearchEntrypoint](./ai.searchentrypoint.md#searchentrypoint_interface) | Google search entry point. |
113116
| [Segment](./ai.segment.md#segment_interface) | Represents a specific segment within a [Content](./ai.content.md#content_interface) object, often used to pinpoint the exact location of text or data that grounding information refers to. |
114117
| [SpeechConfig](./ai.speechconfig.md#speechconfig_interface) | <b><i>(Public Preview)</i></b> Configures speech synthesis. |
118+
| [StartAudioConversationOptions](./ai.startaudioconversationoptions.md#startaudioconversationoptions_interface) | <b><i>(Public Preview)</i></b> Options for [startAudioConversation()](./ai.md#startaudioconversation_01c8e7f)<!-- -->. |
115119
| [StartChatParams](./ai.startchatparams.md#startchatparams_interface) | Params for [GenerativeModel.startChat()](./ai.generativemodel.md#generativemodelstartchat)<!-- -->. |
116120
| [TextPart](./ai.textpart.md#textpart_interface) | Content part interface if the part represents a text string. |
117121
| [ThinkingConfig](./ai.thinkingconfig.md#thinkingconfig_interface) | Configuration for "thinking" behavior of compatible Gemini models.<!-- -->Certain models utilize a thinking process before generating a response. This allows them to reason through complex problems and plan a more coherent and accurate answer. |
@@ -307,6 +311,78 @@ export declare function getLiveGenerativeModel(ai: AI, modelParams: LiveModelPar
307311

308312
If the `apiKey` or `projectId` fields are missing in your Firebase config.
309313

314+
## function(liveSession, ...)
315+
316+
### startAudioConversation(liveSession, options) {:#startaudioconversation_01c8e7f}
317+
318+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
319+
>
320+
321+
Starts a real-time, bidirectional audio conversation with the model.
322+
323+
This helper function manages the complexities of microphone access, audio recording, playback, and message sequencing.
324+
325+
Important: This function must be called in response to a user gesture (for example, a button click) to comply with [browser autoplay policies](https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy)<!-- -->.
326+
327+
<b>Signature:</b>
328+
329+
```typescript
330+
export declare function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
331+
```
332+
333+
#### Parameters
334+
335+
| Parameter | Type | Description |
336+
| --- | --- | --- |
337+
| liveSession | [LiveSession](./ai.livesession.md#livesession_class) | An active [LiveSession](./ai.livesession.md#livesession_class) instance. |
338+
| options | [StartAudioConversationOptions](./ai.startaudioconversationoptions.md#startaudioconversationoptions_interface) | Configuration options for the audio conversation. |
339+
340+
<b>Returns:</b>
341+
342+
Promise&lt;[AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface)<!-- -->&gt;
343+
344+
A `Promise` that resolves with an [AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface)<!-- -->.
345+
346+
#### Exceptions
347+
348+
`AIError` if the environment does not support required Web APIs (`UNSUPPORTED`<!-- -->), if a conversation is already active (`REQUEST_ERROR`<!-- -->), the session is closed (`SESSION_CLOSED`<!-- -->), or if an unexpected initialization error occurs (`ERROR`<!-- -->).
349+
350+
`DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`<!-- -->) or no compatible hardware being found (`NotFoundError`<!-- -->). See the [MDN documentation](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions) for a full list of exceptions.
351+
352+
### Example
353+
354+
355+
```javascript
356+
const liveSession = await model.connect();
357+
let conversationController;
358+
359+
// This function must be called from within a click handler.
360+
async function startConversation() {
361+
try {
362+
conversationController = await startAudioConversation(liveSession);
363+
} catch (e) {
364+
// Handle AI-specific errors
365+
if (e instanceof AIError) {
366+
console.error("AI Error:", e.message);
367+
}
368+
// Handle microphone permission and hardware errors
369+
else if (e instanceof DOMException) {
370+
console.error("Microphone Error:", e.message);
371+
}
372+
// Handle other unexpected errors
373+
else {
374+
console.error("An unexpected error occurred:", e);
375+
}
376+
}
377+
}
378+
379+
// Later, to stop the conversation:
380+
// if (conversationController) {
381+
// await conversationController.stop();
382+
// }
383+
384+
```
385+
310386
## AIErrorCode
311387

312388
Standardized error codes that [AIError](./ai.aierror.md#aierror_class) can have.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
Project: /docs/reference/js/_project.yaml
2+
Book: /docs/reference/_book.yaml
3+
page_type: reference
4+
5+
{% comment %}
6+
DO NOT EDIT THIS FILE!
7+
This is generated by the JS SDK team, and any local changes will be
8+
overwritten. Changes should be made in the source code at
9+
https://github.com/firebase/firebase-js-sdk
10+
{% endcomment %}
11+
12+
# StartAudioConversationOptions interface
13+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
14+
>
15+
16+
Options for [startAudioConversation()](./ai.md#startaudioconversation_01c8e7f)<!-- -->.
17+
18+
<b>Signature:</b>
19+
20+
```typescript
21+
export interface StartAudioConversationOptions
22+
```
23+
24+
## Properties
25+
26+
| Property | Type | Description |
27+
| --- | --- | --- |
28+
| [functionCallingHandler](./ai.startaudioconversationoptions.md#startaudioconversationoptionsfunctioncallinghandler) | (functionCalls: [LiveServerToolCall](./ai.liveservertoolcall.md#liveservertoolcall_interface)<!-- -->\['functionCalls'\]) =&gt; Promise&lt;[Part](./ai.md#part)<!-- -->&gt; | <b><i>(Public Preview)</i></b> An async handler that is called when the model requests a function to be executed. The handler should perform the function call and return the result as a <code>Part</code>, which will then be sent back to the model. |
29+
30+
## StartAudioConversationOptions.functionCallingHandler
31+
32+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
33+
>
34+
35+
An async handler that is called when the model requests a function to be executed. The handler should perform the function call and return the result as a `Part`<!-- -->, which will then be sent back to the model.
36+
37+
<b>Signature:</b>
38+
39+
```typescript
40+
functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
41+
```

packages/ai/integration/live.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,5 +323,9 @@ describe('Live', function () {
323323
});
324324
*/
325325
});
326+
327+
describe('startAudioConversation', () => {
328+
it('');
329+
});
326330
});
327331
});

packages/ai/src/api.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ export * from './requests/schema-builder';
4545
export { ImagenImageFormat } from './requests/imagen-image-format';
4646
export { AIModel, GenerativeModel, LiveGenerativeModel, ImagenModel, AIError };
4747
export { Backend, VertexAIBackend, GoogleAIBackend } from './backend';
48+
export {
49+
startAudioConversation,
50+
AudioConversationController,
51+
StartAudioConversationOptions
52+
} from './methods/live-session-helpers';
4853

4954
declare module '@firebase/component' {
5055
interface NameServiceMapping {

0 commit comments

Comments
 (0)