Skip to content

Commit 43f0a68

Browse files
authored
Merge 5adbd54 into cbef6c6
2 parents cbef6c6 + 5adbd54 commit 43f0a68

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+4197
-8
lines changed

.changeset/blue-pets-sin.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
'firebase': minor
3+
'@firebase/ai': minor
4+
---
5+
6+
Add support for the Gemini Live API.

common/api-review/ai.api.md

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export const AIErrorCode: {
4141
readonly REQUEST_ERROR: "request-error";
4242
readonly RESPONSE_ERROR: "response-error";
4343
readonly FETCH_ERROR: "fetch-error";
44+
readonly SESSION_CLOSED: "session-closed";
4445
readonly INVALID_CONTENT: "invalid-content";
4546
readonly API_NOT_ENABLED: "api-not-enabled";
4647
readonly INVALID_SCHEMA: "invalid-schema";
@@ -94,6 +95,11 @@ export class ArraySchema extends Schema {
9495
toJSON(): SchemaRequest;
9596
}
9697

98+
// @beta
99+
export interface AudioConversationController {
100+
stop: () => Promise<void>;
101+
}
102+
97103
// @public
98104
export abstract class Backend {
99105
protected constructor(type: BackendType);
@@ -290,6 +296,7 @@ export type FinishReason = (typeof FinishReason)[keyof typeof FinishReason];
290296
export interface FunctionCall {
291297
// (undocumented)
292298
args: object;
299+
id?: string;
293300
// (undocumented)
294301
name: string;
295302
}
@@ -342,6 +349,7 @@ export interface FunctionDeclarationsTool {
342349

343350
// @public
344351
export interface FunctionResponse {
352+
id?: string;
345353
// (undocumented)
346354
name: string;
347355
// (undocumented)
@@ -480,6 +488,9 @@ export function getGenerativeModel(ai: AI, modelParams: ModelParams | HybridPara
480488
// @beta
481489
export function getImagenModel(ai: AI, modelParams: ImagenModelParams, requestOptions?: RequestOptions): ImagenModel;
482490

491+
// @beta
492+
export function getLiveGenerativeModel(ai: AI, modelParams: LiveModelParams): LiveGenerativeModel;
493+
483494
// @public
484495
export class GoogleAIBackend extends Backend {
485496
constructor();
@@ -813,6 +824,96 @@ export interface LanguageModelPromptOptions {
813824
responseConstraint?: object;
814825
}
815826

827+
// @beta
828+
export interface LiveGenerationConfig {
829+
frequencyPenalty?: number;
830+
maxOutputTokens?: number;
831+
presencePenalty?: number;
832+
responseModalities?: ResponseModality[];
833+
speechConfig?: SpeechConfig;
834+
temperature?: number;
835+
topK?: number;
836+
topP?: number;
837+
}
838+
839+
// @beta
840+
export class LiveGenerativeModel extends AIModel {
841+
// Warning: (ae-forgotten-export) The symbol "WebSocketHandler" needs to be exported by the entry point index.d.ts
842+
//
843+
// @internal
844+
constructor(ai: AI, modelParams: LiveModelParams,
845+
_webSocketHandler: WebSocketHandler);
846+
connect(): Promise<LiveSession>;
847+
// (undocumented)
848+
generationConfig: LiveGenerationConfig;
849+
// (undocumented)
850+
systemInstruction?: Content;
851+
// (undocumented)
852+
toolConfig?: ToolConfig;
853+
// (undocumented)
854+
tools?: Tool[];
855+
}
856+
857+
// @beta
858+
export interface LiveModelParams {
859+
// (undocumented)
860+
generationConfig?: LiveGenerationConfig;
861+
// (undocumented)
862+
model: string;
863+
// (undocumented)
864+
systemInstruction?: string | Part | Content;
865+
// (undocumented)
866+
toolConfig?: ToolConfig;
867+
// (undocumented)
868+
tools?: Tool[];
869+
}
870+
871+
// @beta
872+
export const LiveResponseType: {
873+
SERVER_CONTENT: string;
874+
TOOL_CALL: string;
875+
TOOL_CALL_CANCELLATION: string;
876+
};
877+
878+
// @beta
879+
export type LiveResponseType = (typeof LiveResponseType)[keyof typeof LiveResponseType];
880+
881+
// @beta
882+
export interface LiveServerContent {
883+
interrupted?: boolean;
884+
modelTurn?: Content;
885+
turnComplete?: boolean;
886+
// (undocumented)
887+
type: 'serverContent';
888+
}
889+
890+
// @beta
891+
export interface LiveServerToolCall {
892+
functionCalls: FunctionCall[];
893+
// (undocumented)
894+
type: 'toolCall';
895+
}
896+
897+
// @beta
898+
export interface LiveServerToolCallCancellation {
899+
functionIds: string[];
900+
// (undocumented)
901+
type: 'toolCallCancellation';
902+
}
903+
904+
// @beta
905+
export class LiveSession {
906+
// @internal
907+
constructor(webSocketHandler: WebSocketHandler, serverMessages: AsyncGenerator<unknown>);
908+
close(): Promise<void>;
909+
inConversation: boolean;
910+
isClosed: boolean;
911+
receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation>;
912+
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
913+
sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
914+
sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
915+
}
916+
816917
// @public
817918
export const Modality: {
818919
readonly MODALITY_UNSPECIFIED: "MODALITY_UNSPECIFIED";
@@ -885,6 +986,11 @@ export type Part = TextPart | InlineDataPart | FunctionCallPart | FunctionRespon
885986
// @public
886987
export const POSSIBLE_ROLES: readonly ["user", "model", "function", "system"];
887988

989+
// @beta
990+
export interface PrebuiltVoiceConfig {
991+
voiceName?: string;
992+
}
993+
888994
// @public
889995
export interface PromptFeedback {
890996
// (undocumented)
@@ -904,6 +1010,7 @@ export interface RequestOptions {
9041010
export const ResponseModality: {
9051011
readonly TEXT: "TEXT";
9061012
readonly IMAGE: "IMAGE";
1013+
readonly AUDIO: "AUDIO";
9071014
};
9081015

9091016
// @beta
@@ -1048,6 +1155,19 @@ export interface Segment {
10481155
text: string;
10491156
}
10501157

1158+
// @beta
1159+
export interface SpeechConfig {
1160+
voiceConfig?: VoiceConfig;
1161+
}
1162+
1163+
// @beta
1164+
export function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
1165+
1166+
// @beta
1167+
export interface StartAudioConversationOptions {
1168+
functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
1169+
}
1170+
10511171
// @public
10521172
export interface StartChatParams extends BaseParams {
10531173
// (undocumented)
@@ -1130,6 +1250,11 @@ export interface VideoMetadata {
11301250
startOffset: string;
11311251
}
11321252

1253+
// @beta
1254+
export interface VoiceConfig {
1255+
prebuiltVoiceConfig?: PrebuiltVoiceConfig;
1256+
}
1257+
11331258
// @public (undocumented)
11341259
export interface WebAttribution {
11351260
// (undocumented)

docs-devsite/_toc.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ toc:
1616
path: /docs/reference/js/ai.anyofschema.md
1717
- title: ArraySchema
1818
path: /docs/reference/js/ai.arrayschema.md
19+
- title: AudioConversationController
20+
path: /docs/reference/js/ai.audioconversationcontroller.md
1921
- title: Backend
2022
path: /docs/reference/js/ai.backend.md
2123
- title: BaseParams
@@ -124,6 +126,20 @@ toc:
124126
path: /docs/reference/js/ai.languagemodelmessagecontent.md
125127
- title: LanguageModelPromptOptions
126128
path: /docs/reference/js/ai.languagemodelpromptoptions.md
129+
- title: LiveGenerationConfig
130+
path: /docs/reference/js/ai.livegenerationconfig.md
131+
- title: LiveGenerativeModel
132+
path: /docs/reference/js/ai.livegenerativemodel.md
133+
- title: LiveModelParams
134+
path: /docs/reference/js/ai.livemodelparams.md
135+
- title: LiveServerContent
136+
path: /docs/reference/js/ai.liveservercontent.md
137+
- title: LiveServerToolCall
138+
path: /docs/reference/js/ai.liveservertoolcall.md
139+
- title: LiveServerToolCallCancellation
140+
path: /docs/reference/js/ai.liveservertoolcallcancellation.md
141+
- title: LiveSession
142+
path: /docs/reference/js/ai.livesession.md
127143
- title: ModalityTokenCount
128144
path: /docs/reference/js/ai.modalitytokencount.md
129145
- title: ModelParams
@@ -136,6 +152,8 @@ toc:
136152
path: /docs/reference/js/ai.objectschemarequest.md
137153
- title: OnDeviceParams
138154
path: /docs/reference/js/ai.ondeviceparams.md
155+
- title: PrebuiltVoiceConfig
156+
path: /docs/reference/js/ai.prebuiltvoiceconfig.md
139157
- title: PromptFeedback
140158
path: /docs/reference/js/ai.promptfeedback.md
141159
- title: RequestOptions
@@ -160,6 +178,10 @@ toc:
160178
path: /docs/reference/js/ai.searchentrypoint.md
161179
- title: Segment
162180
path: /docs/reference/js/ai.segment.md
181+
- title: SpeechConfig
182+
path: /docs/reference/js/ai.speechconfig.md
183+
- title: StartAudioConversationOptions
184+
path: /docs/reference/js/ai.startaudioconversationoptions.md
163185
- title: StartChatParams
164186
path: /docs/reference/js/ai.startchatparams.md
165187
- title: StringSchema
@@ -176,6 +198,8 @@ toc:
176198
path: /docs/reference/js/ai.vertexaibackend.md
177199
- title: VideoMetadata
178200
path: /docs/reference/js/ai.videometadata.md
201+
- title: VoiceConfig
202+
path: /docs/reference/js/ai.voiceconfig.md
179203
- title: WebAttribution
180204
path: /docs/reference/js/ai.webattribution.md
181205
- title: WebGroundingChunk
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
Project: /docs/reference/js/_project.yaml
2+
Book: /docs/reference/_book.yaml
3+
page_type: reference
4+
5+
{% comment %}
6+
DO NOT EDIT THIS FILE!
7+
This is generated by the JS SDK team, and any local changes will be
8+
overwritten. Changes should be made in the source code at
9+
https://github.com/firebase/firebase-js-sdk
10+
{% endcomment %}
11+
12+
# AudioConversationController interface
13+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
14+
>
15+
16+
A controller for managing an active audio conversation.
17+
18+
<b>Signature:</b>
19+
20+
```typescript
21+
export interface AudioConversationController
22+
```
23+
24+
## Properties
25+
26+
| Property | Type | Description |
27+
| --- | --- | --- |
28+
| [stop](./ai.audioconversationcontroller.md#audioconversationcontrollerstop) | () =&gt; Promise&lt;void&gt; | <b><i>(Public Preview)</i></b> Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete. |
29+
30+
## AudioConversationController.stop
31+
32+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
33+
>
34+
35+
Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete.
36+
37+
<b>Signature:</b>
38+
39+
```typescript
40+
stop: () => Promise<void>;
41+
```

docs-devsite/ai.functioncall.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ export interface FunctionCall
2323
| Property | Type | Description |
2424
| --- | --- | --- |
2525
| [args](./ai.functioncall.md#functioncallargs) | object | |
26+
| [id](./ai.functioncall.md#functioncallid) | string | The id of the function call. This must be sent back in the associated [FunctionResponse](./ai.functionresponse.md#functionresponse_interface)<!-- -->. |
2627
| [name](./ai.functioncall.md#functioncallname) | string | |
2728

2829
## FunctionCall.args
@@ -33,6 +34,18 @@ export interface FunctionCall
3334
args: object;
3435
```
3536

37+
## FunctionCall.id
38+
39+
The id of the function call. This must be sent back in the associated [FunctionResponse](./ai.functionresponse.md#functionresponse_interface)<!-- -->.
40+
41+
This property is only supported in the Gemini Developer API ([GoogleAIBackend](./ai.googleaibackend.md#googleaibackend_class)<!-- -->). When using the Gemini Developer API ([GoogleAIBackend](./ai.googleaibackend.md#googleaibackend_class)<!-- -->), this property will be `undefined`<!-- -->.
42+
43+
<b>Signature:</b>
44+
45+
```typescript
46+
id?: string;
47+
```
48+
3649
## FunctionCall.name
3750

3851
<b>Signature:</b>

docs-devsite/ai.functionresponse.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,22 @@ export interface FunctionResponse
2222

2323
| Property | Type | Description |
2424
| --- | --- | --- |
25+
| [id](./ai.functionresponse.md#functionresponseid) | string | The id of the [FunctionCall](./ai.functioncall.md#functioncall_interface)<!-- -->. |
2526
| [name](./ai.functionresponse.md#functionresponsename) | string | |
2627
| [response](./ai.functionresponse.md#functionresponseresponse) | object | |
2728

29+
## FunctionResponse.id
30+
31+
The id of the [FunctionCall](./ai.functioncall.md#functioncall_interface)<!-- -->.
32+
33+
This property is only supported in the Gemini Developer API ([GoogleAIBackend](./ai.googleaibackend.md#googleaibackend_class)<!-- -->). When using the Gemini Developer API ([GoogleAIBackend](./ai.googleaibackend.md#googleaibackend_class)<!-- -->), this property will be `undefined`<!-- -->.
34+
35+
<b>Signature:</b>
36+
37+
```typescript
38+
id?: string;
39+
```
40+
2841
## FunctionResponse.name
2942

3043
<b>Signature:</b>

0 commit comments

Comments
 (0)