Skip to content

Commit a1e7e86

Browse files
committed
Merge remote-tracking branch 'origin/main' into SqliteSequenceNumber
2 parents 22911d5 + 264ebad commit a1e7e86

File tree

23 files changed

+412
-93
lines changed

23 files changed

+412
-93
lines changed

firebase-ai/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Unreleased
22

3+
- [feature] Added support for sending realtime audio and video in a `LiveSession`.
34
- [changed] Removed redundant internal exception types. (#7475)
45

56
# 17.4.0

firebase-ai/api.txt

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,11 @@ package com.google.firebase.ai.java {
145145
method public abstract org.reactivestreams.Publisher<com.google.firebase.ai.type.LiveServerMessage> receive();
146146
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> send(com.google.firebase.ai.type.Content content);
147147
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> send(String text);
148+
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendAudioRealtime(com.google.firebase.ai.type.InlineData audio);
148149
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList);
149-
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks);
150+
method @Deprecated public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks);
151+
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendTextRealtime(String text);
152+
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendVideoRealtime(com.google.firebase.ai.type.InlineData video);
150153
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
151154
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
152155
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
@@ -801,6 +804,14 @@ package com.google.firebase.ai.type {
801804
public static final class ImagenSubjectReferenceType.Companion {
802805
}
803806

807+
public final class InlineData {
808+
ctor public InlineData(byte[] data, String mimeType);
809+
method public byte[] getData();
810+
method public String getMimeType();
811+
property public final byte[] data;
812+
property public final String mimeType;
813+
}
814+
804815
public final class InlineDataPart implements com.google.firebase.ai.type.Part {
805816
ctor public InlineDataPart(byte[] inlineData, String mimeType);
806817
method public byte[] getInlineData();
@@ -891,20 +902,23 @@ package com.google.firebase.ai.type {
891902
method public kotlinx.coroutines.flow.Flow<com.google.firebase.ai.type.LiveServerMessage> receive();
892903
method public suspend Object? send(com.google.firebase.ai.type.Content content, kotlin.coroutines.Continuation<? super kotlin.Unit>);
893904
method public suspend Object? send(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
905+
method public suspend Object? sendAudioRealtime(com.google.firebase.ai.type.InlineData audio, kotlin.coroutines.Continuation<? super kotlin.Unit>);
894906
method public suspend Object? sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList, kotlin.coroutines.Continuation<? super kotlin.Unit>);
895-
method public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
907+
method @Deprecated public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
908+
method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
909+
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
896910
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
897911
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
898912
method public void stopAudioConversation();
899913
method public void stopReceiving();
900914
}
901915

902-
@com.google.firebase.ai.type.PublicPreviewAPI public final class MediaData {
903-
ctor public MediaData(byte[] data, String mimeType);
904-
method public byte[] getData();
905-
method public String getMimeType();
906-
property public final byte[] data;
907-
property public final String mimeType;
916+
@Deprecated @com.google.firebase.ai.type.PublicPreviewAPI public final class MediaData {
917+
ctor @Deprecated public MediaData(byte[] data, String mimeType);
918+
method @Deprecated public byte[] getData();
919+
method @Deprecated public String getMimeType();
920+
property @Deprecated public final byte[] data;
921+
property @Deprecated public final String mimeType;
908922
}
909923

910924
public final class ModalityTokenCount {

firebase-ai/gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
version=17.4.1
15+
version=17.5.0
1616
latestReleasedVersion=17.4.0

firebase-ai/src/main/kotlin/com/google/firebase/ai/common/APIController.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ import kotlinx.coroutines.flow.map
7777
import kotlinx.coroutines.launch
7878
import kotlinx.coroutines.withTimeout
7979
import kotlinx.serialization.ExperimentalSerializationApi
80+
import kotlinx.serialization.json.ClassDiscriminatorMode
8081
import kotlinx.serialization.json.Json
8182

8283
@OptIn(ExperimentalSerializationApi::class)
@@ -85,6 +86,7 @@ internal val JSON = Json {
8586
prettyPrint = false
8687
isLenient = true
8788
explicitNulls = false
89+
classDiscriminatorMode = ClassDiscriminatorMode.NONE
8890
}
8991

9092
/**

firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import com.google.common.util.concurrent.ListenableFuture
2323
import com.google.firebase.ai.type.Content
2424
import com.google.firebase.ai.type.FunctionCallPart
2525
import com.google.firebase.ai.type.FunctionResponsePart
26+
import com.google.firebase.ai.type.InlineData
2627
import com.google.firebase.ai.type.LiveServerMessage
2728
import com.google.firebase.ai.type.LiveSession
2829
import com.google.firebase.ai.type.MediaData
@@ -126,13 +127,38 @@ public abstract class LiveSessionFutures internal constructor() {
126127
functionList: List<FunctionResponsePart>
127128
): ListenableFuture<Unit>
128129

130+
/**
131+
* Sends audio data to the server in realtime. Check
132+
* https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime
133+
* input usage.
134+
* @param audio The audio data to send.
135+
*/
136+
public abstract fun sendAudioRealtime(audio: InlineData): ListenableFuture<Unit>
137+
138+
/**
139+
* Sends video data to the server in realtime. Check
140+
* https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime
141+
* input usage.
142+
* @param video The video data to send. Video MIME type could be either video or image.
143+
*/
144+
public abstract fun sendVideoRealtime(video: InlineData): ListenableFuture<Unit>
145+
146+
/**
147+
* Sends text data to the server in realtime. Check
148+
* https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime
149+
* input usage.
150+
* @param text The text data to send.
151+
*/
152+
public abstract fun sendTextRealtime(text: String): ListenableFuture<Unit>
153+
129154
/**
130155
* Streams client data to the model.
131156
*
132157
* Calling this after [startAudioConversation] will play the response audio immediately.
133158
*
134159
* @param mediaChunks The list of [MediaData] instances representing the media data to be sent.
135160
*/
161+
@Deprecated("Use sendAudioRealtime, sendVideoRealtime, or sendTextRealtime instead")
136162
public abstract fun sendMediaStream(mediaChunks: List<MediaData>): ListenableFuture<Unit>
137163

138164
/**
@@ -190,6 +216,15 @@ public abstract class LiveSessionFutures internal constructor() {
190216
override fun sendFunctionResponse(functionList: List<FunctionResponsePart>) =
191217
SuspendToFutureAdapter.launchFuture { session.sendFunctionResponse(functionList) }
192218

219+
override fun sendAudioRealtime(audio: InlineData): ListenableFuture<Unit> =
220+
SuspendToFutureAdapter.launchFuture { session.sendAudioRealtime(audio) }
221+
222+
override fun sendVideoRealtime(video: InlineData): ListenableFuture<Unit> =
223+
SuspendToFutureAdapter.launchFuture { session.sendVideoRealtime(video) }
224+
225+
override fun sendTextRealtime(text: String): ListenableFuture<Unit> =
226+
SuspendToFutureAdapter.launchFuture { session.sendTextRealtime(text) }
227+
193228
override fun sendMediaStream(mediaChunks: List<MediaData>) =
194229
SuspendToFutureAdapter.launchFuture { session.sendMediaStream(mediaChunks) }
195230

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/FunctionDeclaration.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ public class FunctionDeclaration(
6161
internal val schema: Schema =
6262
Schema.obj(properties = parameters, optionalProperties = optionalParameters, nullable = false)
6363

64-
internal fun toInternal() = Internal(name, description, schema.toInternal())
64+
internal fun toInternal() = Internal(name, description, schema.toInternalOpenApi())
6565

6666
@Serializable
6767
internal data class Internal(
6868
val name: String,
6969
val description: String,
70-
val parameters: Schema.Internal
70+
val parameters: Schema.InternalOpenAPI
7171
)
7272
}

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerationConfig.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ private constructor(
200200
frequencyPenalty = frequencyPenalty,
201201
presencePenalty = presencePenalty,
202202
responseMimeType = responseMimeType,
203-
responseSchema = responseSchema?.toInternal(),
203+
responseSchema = responseSchema?.toInternalOpenApi(),
204204
responseModalities = responseModalities?.map { it.toInternal() },
205205
thinkingConfig = thinkingConfig?.toInternal()
206206
)
@@ -216,7 +216,7 @@ private constructor(
216216
@SerialName("response_mime_type") val responseMimeType: String? = null,
217217
@SerialName("presence_penalty") val presencePenalty: Float? = null,
218218
@SerialName("frequency_penalty") val frequencyPenalty: Float? = null,
219-
@SerialName("response_schema") val responseSchema: Schema.Internal? = null,
219+
@SerialName("response_schema") val responseSchema: Schema.InternalOpenAPI? = null,
220220
@SerialName("response_modalities") val responseModalities: List<String>? = null,
221221
@SerialName("thinking_config") val thinkingConfig: ThinkingConfig.Internal? = null
222222
)

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import com.google.firebase.ai.common.JSON
2828
import com.google.firebase.ai.common.util.CancelledCoroutineScope
2929
import com.google.firebase.ai.common.util.accumulateUntil
3030
import com.google.firebase.ai.common.util.childJob
31+
import com.google.firebase.ai.type.MediaData.Internal
3132
import com.google.firebase.annotations.concurrent.Blocking
3233
import io.ktor.client.plugins.websocket.DefaultClientWebSocketSession
3334
import io.ktor.websocket.Frame
@@ -255,20 +256,69 @@ internal constructor(
255256
}
256257
}
257258

259+
/**
260+
* Sends an audio input stream to the model, using the realtime API.
261+
*
262+
* To learn more about audio formats, and the required state they should be provided in, see the
263+
* docs on
264+
* [Supported audio formats](https://cloud.google.com/vertex-ai/generative-ai/docs/live-api#supported-audio-formats)
265+
*
266+
* @param audio Raw audio data used to update the model on the client's conversation. For best
267+
* results, send 16-bit PCM audio at 24kHz.
268+
*/
269+
public suspend fun sendAudioRealtime(audio: InlineData) {
270+
FirebaseAIException.catchAsync {
271+
val jsonString =
272+
Json.encodeToString(BidiGenerateContentRealtimeInputSetup(audio = audio).toInternal())
273+
session.send(Frame.Text(jsonString))
274+
}
275+
}
276+
277+
/**
278+
* Sends a video input stream to the model, using the realtime API.
279+
*
280+
* @param video Encoded video data, used to update the model on the client's conversation. The
281+
* MIME type can be a video format (e.g., `video/webm`) or an image format (e.g., `image/jpeg`).
282+
*/
283+
public suspend fun sendVideoRealtime(video: InlineData) {
284+
FirebaseAIException.catchAsync {
285+
val jsonString =
286+
Json.encodeToString(BidiGenerateContentRealtimeInputSetup(video = video).toInternal())
287+
session.send(Frame.Text(jsonString))
288+
}
289+
}
290+
291+
/**
292+
* Sends a text input stream to the model, using the realtime API.
293+
*
294+
* @param text Text content to append to the current client's conversation.
295+
*/
296+
public suspend fun sendTextRealtime(text: String) {
297+
FirebaseAIException.catchAsync {
298+
val jsonString =
299+
Json.encodeToString(BidiGenerateContentRealtimeInputSetup(text = text).toInternal())
300+
session.send(Frame.Text(jsonString))
301+
}
302+
}
303+
258304
/**
259305
* Streams client data to the model.
260306
*
261307
* Calling this after [startAudioConversation] will play the response audio immediately.
262308
*
263309
* @param mediaChunks The list of [MediaData] instances representing the media data to be sent.
264310
*/
311+
@Deprecated("Use sendAudioRealtime, sendVideoRealtime, or sendTextRealtime instead")
265312
public suspend fun sendMediaStream(
266313
mediaChunks: List<MediaData>,
267314
) {
268315
FirebaseAIException.catchAsync {
269316
val jsonString =
270317
Json.encodeToString(
271-
BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal()
318+
BidiGenerateContentRealtimeInputSetup(
319+
mediaChunks.map { InlineData(it.data, it.mimeType) }
320+
)
321+
.toInternal()
272322
)
273323
session.send(Frame.Text(jsonString))
274324
}
@@ -324,7 +374,7 @@ internal constructor(
324374
?.listenToRecording()
325375
?.buffer(UNLIMITED)
326376
?.accumulateUntil(MIN_BUFFER_SIZE)
327-
?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) }
377+
?.onEach { sendAudioRealtime(InlineData(it, "audio/pcm")) }
328378
?.catch { throw FirebaseAIException.from(it) }
329379
?.launchIn(scope)
330380
}
@@ -464,15 +514,31 @@ internal constructor(
464514
*
465515
* End of turn is derived from user activity (eg; end of speech).
466516
*/
467-
internal class BidiGenerateContentRealtimeInputSetup(val mediaChunks: List<MediaData.Internal>) {
517+
internal class BidiGenerateContentRealtimeInputSetup(
518+
val mediaChunks: List<InlineData>? = null,
519+
val audio: InlineData? = null,
520+
val video: InlineData? = null,
521+
val text: String? = null
522+
) {
468523
@Serializable
469524
internal class Internal(val realtimeInput: BidiGenerateContentRealtimeInput) {
470525
@Serializable
471526
internal data class BidiGenerateContentRealtimeInput(
472-
val mediaChunks: List<MediaData.Internal>
527+
val mediaChunks: List<InlineData.Internal>?,
528+
val audio: InlineData.Internal?,
529+
val video: InlineData.Internal?,
530+
val text: String?
473531
)
474532
}
475-
fun toInternal() = Internal(Internal.BidiGenerateContentRealtimeInput(mediaChunks))
533+
fun toInternal() =
534+
Internal(
535+
Internal.BidiGenerateContentRealtimeInput(
536+
mediaChunks?.map { it.toInternal() },
537+
audio?.toInternal(),
538+
video?.toInternal(),
539+
text
540+
)
541+
)
476542
}
477543

478544
private companion object {

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/MediaData.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import kotlinx.serialization.Serializable
2727
* [Firebase documentation](https://firebase.google.com/docs/vertex-ai/input-file-requirements).
2828
*/
2929
@PublicPreviewAPI
30+
@Deprecated("Use InlineData instead", ReplaceWith("InlineData"))
3031
public class MediaData(public val data: ByteArray, public val mimeType: String) {
3132
@Serializable
3233
internal class Internal(

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ package com.google.firebase.ai.type
1919
import android.graphics.Bitmap
2020
import android.graphics.BitmapFactory
2121
import android.util.Log
22+
import com.google.firebase.ai.type.ImagenImageFormat.Internal
2223
import java.io.ByteArrayOutputStream
2324
import kotlinx.serialization.DeserializationStrategy
2425
import kotlinx.serialization.SerialName
2526
import kotlinx.serialization.Serializable
26-
import kotlinx.serialization.SerializationException
2727
import kotlinx.serialization.json.JsonContentPolymorphicSerializer
2828
import kotlinx.serialization.json.JsonElement
2929
import kotlinx.serialization.json.JsonNull
@@ -161,14 +161,22 @@ internal constructor(
161161

162162
@Serializable
163163
internal data class Internal(
164-
@SerialName("inlineData") val inlineData: InlineData,
164+
@SerialName("inlineData") val inlineData: InlineData.Internal,
165165
val thought: Boolean? = null,
166166
val thoughtSignature: String? = null
167-
) : InternalPart {
167+
) : InternalPart
168+
}
168169

169-
@Serializable
170-
internal data class InlineData(@SerialName("mimeType") val mimeType: String, val data: Base64)
171-
}
170+
/**
171+
* Represents binary data with an associated MIME type.
172+
* @property data the binary data as a [ByteArray]
173+
* @property mimeType an IANA standard MIME type.
174+
*/
175+
public class InlineData(public val data: ByteArray, public val mimeType: String) {
176+
@Serializable internal data class Internal(val mimeType: String, val data: Base64)
177+
178+
internal fun toInternal() =
179+
Internal(mimeType, android.util.Base64.encodeToString(data, BASE_64_FLAGS))
172180
}
173181

174182
/** Represents function call name and params received from requests. */
@@ -334,13 +342,13 @@ internal fun Part.toInternal(): InternalPart {
334342
is TextPart -> TextPart.Internal(text, isThought, thoughtSignature)
335343
is ImagePart ->
336344
InlineDataPart.Internal(
337-
InlineDataPart.Internal.InlineData("image/jpeg", encodeBitmapToBase64Jpeg(image)),
345+
InlineData.Internal("image/jpeg", encodeBitmapToBase64Jpeg(image)),
338346
isThought,
339347
thoughtSignature
340348
)
341349
is InlineDataPart ->
342350
InlineDataPart.Internal(
343-
InlineDataPart.Internal.InlineData(
351+
InlineData.Internal(
344352
mimeType,
345353
android.util.Base64.encodeToString(inlineData, BASE_64_FLAGS)
346354
),

0 commit comments

Comments
 (0)