firebase
diff --git a/‎common/api-review/ai.api.md
Lines changed: 17 additions & 2 deletions b/‎common/api-review/ai.api.md
Lines changed: 17 additions & 2 deletions
diff --git a/‎docs-devsite/_toc.yaml
Lines changed: 4 additions & 0 deletions b/‎docs-devsite/_toc.yaml
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs-devsite/ai.audioconversationcontroller.md
Lines changed: 41 additions & 0 deletions b/‎docs-devsite/ai.audioconversationcontroller.md
Lines changed: 41 additions & 0 deletions
diff --git a/‎docs-devsite/ai.livegenerationconfig.md
Lines changed: 2 additions & 2 deletions b/‎docs-devsite/ai.livegenerationconfig.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs-devsite/ai.livesession.md
Lines changed: 14 additions & 0 deletions b/‎docs-devsite/ai.livesession.md
Lines changed: 14 additions & 0 deletions
diff --git a/‎docs-devsite/ai.md
Lines changed: 75 additions & 0 deletions b/‎docs-devsite/ai.md
Lines changed: 75 additions & 0 deletions
diff --git a/‎docs-devsite/ai.prebuiltvoiceconfig.md
Lines changed: 3 additions & 3 deletions b/‎docs-devsite/ai.prebuiltvoiceconfig.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs-devsite/ai.startaudioconversationoptions.md
Lines changed: 41 additions & 0 deletions b/‎docs-devsite/ai.startaudioconversationoptions.md
Lines changed: 41 additions & 0 deletions
diff --git a/‎packages/ai/src/api.ts
Lines changed: 5 additions & 0 deletions b/‎packages/ai/src/api.ts
Lines changed: 5 additions & 0 deletions
@@ -85,6 +85,11 @@ export class ArraySchema extends Schema {
     toJSON(): SchemaRequest;
 }
 
+// @beta
+export interface AudioConversationController {
+    stop: () => Promise<void>;
+}
+
 // @public
 export abstract class Backend {
     protected constructor(type: BackendType);
@@ -710,7 +715,7 @@ export interface LiveGenerationConfig {
     frequencyPenalty?: number;
     maxOutputTokens?: number;
     presencePenalty?: number;
-    responseModalities?: [ResponseModality];
+    responseModalities?: ResponseModality[];
     speechConfig?: SpeechConfig;
     temperature?: number;
     topK?: number;
@@ -787,6 +792,7 @@ export class LiveSession {
     // @internal
     constructor(webSocketHandler: WebSocketHandler, serverMessages: AsyncGenerator<unknown>);
     close(): Promise<void>;
+    inConversation: boolean;
     isClosed: boolean;
     receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation>;
     send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
@@ -860,7 +866,7 @@ export const POSSIBLE_ROLES: readonly ["user", "model", "function", "system"];
 
 // @beta
 export interface PrebuiltVoiceConfig {
-    voiceConfig?: string;
+    voiceName?: string;
 }
 
 // @public
@@ -882,6 +888,7 @@ export interface RequestOptions {
 export const ResponseModality: {
     readonly TEXT: "TEXT";
     readonly IMAGE: "IMAGE";
+    readonly AUDIO: "AUDIO";
 };
 
 // @beta
@@ -1031,6 +1038,14 @@ export interface SpeechConfig {
     voiceConfig?: VoiceConfig;
 }
 
+// @beta
+export function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
+
+// @beta
+export interface StartAudioConversationOptions {
+    functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
+}
+
 // @public
 export interface StartChatParams extends BaseParams {
     // (undocumented)
 
@@ -16,6 +16,8 @@ toc:
     path: /docs/reference/js/ai.anyofschema.md
   - title: ArraySchema
     path: /docs/reference/js/ai.arrayschema.md
+  - title: AudioConversationController
+    path: /docs/reference/js/ai.audioconversationcontroller.md
   - title: Backend
     path: /docs/reference/js/ai.backend.md
   - title: BaseParams
@@ -160,6 +162,8 @@ toc:
     path: /docs/reference/js/ai.segment.md
   - title: SpeechConfig
     path: /docs/reference/js/ai.speechconfig.md
+  - title: StartAudioConversationOptions
+    path: /docs/reference/js/ai.startaudioconversationoptions.md
   - title: StartChatParams
     path: /docs/reference/js/ai.startchatparams.md
   - title: StringSchema
 
@@ -0,0 +1,41 @@
+Project: /docs/reference/js/_project.yaml
+Book: /docs/reference/_book.yaml
+page_type: reference
+
+{% comment %}
+DO NOT EDIT THIS FILE!
+This is generated by the JS SDK team, and any local changes will be
+overwritten. Changes should be made in the source code at
+https://github.com/firebase/firebase-js-sdk
+{% endcomment %}
+
+# AudioConversationController interface
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+A controller for managing an active audio conversation.
+
+<b>Signature:</b>
+
+```typescript
+export interface AudioConversationController 
+```
+
+## Properties
+
+|  Property | Type | Description |
+|  --- | --- | --- |
+|  [stop](./ai.audioconversationcontroller.md#audioconversationcontrollerstop) | () =&gt; Promise&lt;void&gt; | <b><i>(Public Preview)</i></b> Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete. |
+
+## AudioConversationController.stop
+
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete.
+
+<b>Signature:</b>
+
+```typescript
+stop: () => Promise<void>;
+```
@@ -28,7 +28,7 @@ export interface LiveGenerationConfig
 |  [frequencyPenalty](./ai.livegenerationconfig.md#livegenerationconfigfrequencypenalty) | number | <b><i>(Public Preview)</i></b> Frequency penalties. |
 |  [maxOutputTokens](./ai.livegenerationconfig.md#livegenerationconfigmaxoutputtokens) | number | <b><i>(Public Preview)</i></b> Specifies the maximum number of tokens that can be generated in the response. The number of tokens per word varies depending on the language outputted. Is unbounded by default. |
 |  [presencePenalty](./ai.livegenerationconfig.md#livegenerationconfigpresencepenalty) | number | <b><i>(Public Preview)</i></b> Positive penalties. |
-|  [responseModalities](./ai.livegenerationconfig.md#livegenerationconfigresponsemodalities) | \[[ResponseModality](./ai.md#responsemodality)<!-- -->\] | <b><i>(Public Preview)</i></b> The modalities of the response. |
+|  [responseModalities](./ai.livegenerationconfig.md#livegenerationconfigresponsemodalities) | [ResponseModality](./ai.md#responsemodality)<!-- -->\[\] | <b><i>(Public Preview)</i></b> The modalities of the response. |
 |  [speechConfig](./ai.livegenerationconfig.md#livegenerationconfigspeechconfig) | [SpeechConfig](./ai.speechconfig.md#speechconfig_interface) | <b><i>(Public Preview)</i></b> Configuration for speech synthesis. |
 |  [temperature](./ai.livegenerationconfig.md#livegenerationconfigtemperature) | number | <b><i>(Public Preview)</i></b> Controls the degree of randomness in token selection. A <code>temperature</code> value of 0 means that the highest probability tokens are always selected. In this case, responses for a given prompt are mostly deterministic, but a small amount of variation is still possible. |
 |  [topK](./ai.livegenerationconfig.md#livegenerationconfigtopk) | number | <b><i>(Public Preview)</i></b> Changes how the model selects token for output. A <code>topK</code> value of 1 means the select token is the most probable among all tokens in the model's vocabulary, while a <code>topK</code> value 3 means that the next token is selected from among the 3 most probably using probabilities sampled. Tokens are then further filtered with the highest selected <code>temperature</code> sampling. Defaults to 40 if unspecified. |
@@ -83,7 +83,7 @@ The modalities of the response.
 <b>Signature:</b>
 
 ```typescript
-responseModalities?: [ResponseModality];
+responseModalities?: ResponseModality[];
 ```
 
 ## LiveGenerationConfig.speechConfig
 
@@ -29,6 +29,7 @@ export declare class LiveSession
 
 |  Property | Modifiers | Type | Description |
 |  --- | --- | --- | --- |
+|  [inConversation](./ai.livesession.md#livesessioninconversation) |  | boolean | <b><i>(Public Preview)</i></b> Indicates whether this Live session is being controlled by an <code>AudioConversationController</code>. |
 |  [isClosed](./ai.livesession.md#livesessionisclosed) |  | boolean | <b><i>(Public Preview)</i></b> Indicates whether this Live session is closed. |
 
 ## Methods
@@ -41,6 +42,19 @@ export declare class LiveSession
 |  [sendMediaChunks(mediaChunks)](./ai.livesession.md#livesessionsendmediachunks) |  | <b><i>(Public Preview)</i></b> Sends realtime input to the server. |
 |  [sendMediaStream(mediaChunkStream)](./ai.livesession.md#livesessionsendmediastream) |  | <b><i>(Public Preview)</i></b> Sends a stream of [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface)<!-- -->. |
 
+## LiveSession.inConversation
+
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+Indicates whether this Live session is being controlled by an `AudioConversationController`<!-- -->.
+
+<b>Signature:</b>
+
+```typescript
+inConversation: boolean;
+```
+
 ## LiveSession.isClosed
 
 > This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
 
@@ -22,6 +22,8 @@ The Firebase AI Web SDK.
 |  [getGenerativeModel(ai, modelParams, requestOptions)](./ai.md#getgenerativemodel_80bd839) | Returns a [GenerativeModel](./ai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. |
 |  [getImagenModel(ai, modelParams, requestOptions)](./ai.md#getimagenmodel_e1f6645) | <b><i>(Public Preview)</i></b> Returns an [ImagenModel](./ai.imagenmodel.md#imagenmodel_class) class with methods for using Imagen.<!-- -->Only Imagen 3 models (named <code>imagen-3.0-*</code>) are supported. |
 |  [getLiveGenerativeModel(ai, modelParams)](./ai.md#getlivegenerativemodel_f2099ac) | <b><i>(Public Preview)</i></b> Returns a [LiveGenerativeModel](./ai.livegenerativemodel.md#livegenerativemodel_class) class for real-time, bidirectional communication.<!-- -->The Live API is only supported in modern browser windows and Node &gt;<!-- -->= 22. |
+|  <b>function(liveSession, ...)</b> |
+|  [startAudioConversation(liveSession, options)](./ai.md#startaudioconversation_01c8e7f) | <b><i>(Public Preview)</i></b> Starts a real-time, bidirectional audio conversation with the model. This helper function manages the complexities of microphone access, audio recording, playback, and interruptions. |
 
 ## Classes
 
@@ -53,6 +55,7 @@ The Firebase AI Web SDK.
 |  --- | --- |
 |  [AI](./ai.ai.md#ai_interface) | An instance of the Firebase AI SDK.<!-- -->Do not create this instance directly. Instead, use [getAI()](./ai.md#getai_a94a413)<!-- -->. |
 |  [AIOptions](./ai.aioptions.md#aioptions_interface) | Options for initializing the AI service using [getAI()](./ai.md#getai_a94a413)<!-- -->. This allows specifying which backend to use (Vertex AI Gemini API or Gemini Developer API) and configuring its specific options (like location for Vertex AI). |
+|  [AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface) | <b><i>(Public Preview)</i></b> A controller for managing an active audio conversation. |
 |  [BaseParams](./ai.baseparams.md#baseparams_interface) | Base parameters for a number of methods. |
 |  [Citation](./ai.citation.md#citation_interface) | A single citation. |
 |  [CitationMetadata](./ai.citationmetadata.md#citationmetadata_interface) | Citation metadata that may be found on a [GenerateContentCandidate](./ai.generatecontentcandidate.md#generatecontentcandidate_interface)<!-- -->. |
@@ -112,6 +115,7 @@ The Firebase AI Web SDK.
 |  [SearchEntrypoint](./ai.searchentrypoint.md#searchentrypoint_interface) | Google search entry point. |
 |  [Segment](./ai.segment.md#segment_interface) | Represents a specific segment within a [Content](./ai.content.md#content_interface) object, often used to pinpoint the exact location of text or data that grounding information refers to. |
 |  [SpeechConfig](./ai.speechconfig.md#speechconfig_interface) | <b><i>(Public Preview)</i></b> Configures speech synthesis. |
+|  [StartAudioConversationOptions](./ai.startaudioconversationoptions.md#startaudioconversationoptions_interface) | <b><i>(Public Preview)</i></b> Options for [startAudioConversation()](./ai.md#startaudioconversation_01c8e7f)<!-- -->. |
 |  [StartChatParams](./ai.startchatparams.md#startchatparams_interface) | Params for [GenerativeModel.startChat()](./ai.generativemodel.md#generativemodelstartchat)<!-- -->. |
 |  [TextPart](./ai.textpart.md#textpart_interface) | Content part interface if the part represents a text string. |
 |  [ThinkingConfig](./ai.thinkingconfig.md#thinkingconfig_interface) | Configuration for "thinking" behavior of compatible Gemini models.<!-- -->Certain models utilize a thinking process before generating a response. This allows them to reason through complex problems and plan a more coherent and accurate answer. |
@@ -307,6 +311,76 @@ export declare function getLiveGenerativeModel(ai: AI, modelParams: LiveModelPar
 
 If the `apiKey` or `projectId` fields are missing in your Firebase config.
 
+## function(liveSession, ...)
+
+### startAudioConversation(liveSession, options) {:#startaudioconversation_01c8e7f}
+
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+Starts a real-time, bidirectional audio conversation with the model. This helper function manages the complexities of microphone access, audio recording, playback, and interruptions.
+
+Important: This function must be called in response to a user gesture (for example, a button click) to comply with [browser autoplay policies](https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy)<!-- -->.
+
+<b>Signature:</b>
+
+```typescript
+export declare function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
+```
+
+#### Parameters
+
+|  Parameter | Type | Description |
+|  --- | --- | --- |
+|  liveSession | [LiveSession](./ai.livesession.md#livesession_class) | An active [LiveSession](./ai.livesession.md#livesession_class) instance. |
+|  options | [StartAudioConversationOptions](./ai.startaudioconversationoptions.md#startaudioconversationoptions_interface) | Configuration options for the audio conversation. |
+
+<b>Returns:</b>
+
+Promise&lt;[AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface)<!-- -->&gt;
+
+A `Promise` that resolves with an [AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface)<!-- -->.
+
+#### Exceptions
+
+`AIError` if the environment does not support required Web APIs (`UNSUPPORTED`<!-- -->), if a conversation is already active (`REQUEST_ERROR`<!-- -->), the session is closed (`SESSION_CLOSED`<!-- -->), or if an unexpected initialization error occurs (`ERROR`<!-- -->).
+
+`DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`<!-- -->) or no compatible hardware being found (`NotFoundError`<!-- -->). See the [MDN documentation](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions) for a full list of exceptions.
+
+### Example
+
+
+```javascript
+const liveSession = await model.connect();
+let conversationController;
+
+// This function must be called from within a click handler.
+async function startConversation() {
+  try {
+    conversationController = await startAudioConversation(liveSession);
+  } catch (e) {
+    // Handle AI-specific errors
+    if (e instanceof AIError) {
+      console.error("AI Error:", e.message);
+    }
+    // Handle microphone permission and hardware errors
+    else if (e instanceof DOMException) {
+      console.error("Microphone Error:", e.message);
+    }
+    // Handle other unexpected errors
+    else {
+      console.error("An unexpected error occurred:", e);
+    }
+  }
+}
+
+// Later, to stop the conversation:
+// if (conversationController) {
+//   await conversationController.stop();
+// }
+
+```
+
 ## AIErrorCode
 
 Standardized error codes that [AIError](./ai.aierror.md#aierror_class) can have.
@@ -589,6 +663,7 @@ Generation modalities to be returned in generation responses.
 ResponseModality: {
     readonly TEXT: "TEXT";
     readonly IMAGE: "IMAGE";
+    readonly AUDIO: "AUDIO";
 }
 ```
 
 
@@ -25,9 +25,9 @@ export interface PrebuiltVoiceConfig
 
 |  Property | Type | Description |
 |  --- | --- | --- |
-|  [voiceConfig](./ai.prebuiltvoiceconfig.md#prebuiltvoiceconfigvoiceconfig) | string | <b><i>(Public Preview)</i></b> The voice name to use for speech synthesis.<!-- -->For a full list of names and demos of what each voice sounds like, see [Chirp 3: HD Voices](https://cloud.google.com/text-to-speech/docs/chirp3-hd)<!-- -->. |
+|  [voiceName](./ai.prebuiltvoiceconfig.md#prebuiltvoiceconfigvoicename) | string | <b><i>(Public Preview)</i></b> The voice name to use for speech synthesis.<!-- -->For a full list of names and demos of what each voice sounds like, see [Chirp 3: HD Voices](https://cloud.google.com/text-to-speech/docs/chirp3-hd)<!-- -->. |
 
-## PrebuiltVoiceConfig.voiceConfig
+## PrebuiltVoiceConfig.voiceName
 
 > This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
 > 
@@ -39,5 +39,5 @@ For a full list of names and demos of what each voice sounds like, see [Chirp 3:
 <b>Signature:</b>
 
 ```typescript
-voiceConfig?: string;
+voiceName?: string;
 ```
@@ -0,0 +1,41 @@
+Project: /docs/reference/js/_project.yaml
+Book: /docs/reference/_book.yaml
+page_type: reference
+
+{% comment %}
+DO NOT EDIT THIS FILE!
+This is generated by the JS SDK team, and any local changes will be
+overwritten. Changes should be made in the source code at
+https://github.com/firebase/firebase-js-sdk
+{% endcomment %}
+
+# StartAudioConversationOptions interface
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+Options for [startAudioConversation()](./ai.md#startaudioconversation_01c8e7f)<!-- -->.
+
+<b>Signature:</b>
+
+```typescript
+export interface StartAudioConversationOptions 
+```
+
+## Properties
+
+|  Property | Type | Description |
+|  --- | --- | --- |
+|  [functionCallingHandler](./ai.startaudioconversationoptions.md#startaudioconversationoptionsfunctioncallinghandler) | (functionCalls: [LiveServerToolCall](./ai.liveservertoolcall.md#liveservertoolcall_interface)<!-- -->\['functionCalls'\]) =&gt; Promise&lt;[Part](./ai.md#part)<!-- -->&gt; | <b><i>(Public Preview)</i></b> An async handler that is called when the model requests a function to be executed. The handler should perform the function call and return the result as a <code>Part</code>, which will then be sent back to the model. |
+
+## StartAudioConversationOptions.functionCallingHandler
+
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+An async handler that is called when the model requests a function to be executed. The handler should perform the function call and return the result as a `Part`<!-- -->, which will then be sent back to the model.
+
+<b>Signature:</b>
+
+```typescript
+functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
+```
@@ -45,6 +45,11 @@ export * from './requests/schema-builder';
 export { ImagenImageFormat } from './requests/imagen-image-format';
 export { AIModel, GenerativeModel, LiveGenerativeModel, ImagenModel, AIError };
 export { Backend, VertexAIBackend, GoogleAIBackend } from './backend';
+export {
+  startAudioConversation,
+  AudioConversationController,
+  StartAudioConversationOptions
+} from './methods/live-session-helpers';
 
 declare module '@firebase/component' {
   interface NameServiceMapping {