Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions core/src/main/java/ai/z/openapi/api/audio/AudioApi.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@ public interface AudioApi {
@POST("audio/speech")
Single<ResponseBody> audioSpeech(@Body AudioSpeechRequest request);

/**
* Text-to-Speech (TTS) conversion using GLM-4-Voice Converts text input into
* natural-sounding speech audio with emotion and tone control Supports multiple
* voices, languages, speed adjustment, and various audio formats Features advanced
* voice synthesis with customizable emotional expressions and dialects
* @param request TTS parameters including text, voice selection, emotion, speed,
* tone, and output format
* @return Generated high-quality audio streaming in specified format with natural
* prosody
*/
@Streaming
@POST("audio/speech")
Call<ResponseBody> audioSpeechStreaming(@Body AudioSpeechRequest request);

/**
* Voice cloning and customization using advanced neural models Creates custom voice
* models from provided audio samples with high fidelity Enables personalized speech
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ public interface AudioService {
*/
AudioSpeechResponse createSpeech(AudioSpeechRequest request);

/**
* Creates speech from text using text-to-speech.
* @param request the speech generation request
* @return AudioSpeechStreamingResponse containing the generated speech streaming
*/
AudioSpeechStreamingResponse createStreamingSpeechStreaming(AudioSpeechRequest request);

/**
* Creates customized speech with specific voice characteristics.
* @param request the speech customization request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import ai.z.openapi.utils.FlowableRequestSupplier;
import ai.z.openapi.utils.RequestSupplier;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.reactivex.rxjava3.core.Single;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
Expand Down Expand Up @@ -57,6 +58,13 @@ public AudioSpeechResponse createSpeech(AudioSpeechRequest request) {
return this.zAiClient.executeRequest(request, supplier, AudioSpeechResponse.class);
}

@Override
public AudioSpeechStreamingResponse createStreamingSpeechStreaming(AudioSpeechRequest request) {
validateSpeechParams(request);
FlowableRequestSupplier<AudioSpeechRequest, retrofit2.Call<ResponseBody>> supplier = audioApi::audioSpeechStreaming;
return this.zAiClient.streamRequest(request, supplier, AudioSpeechStreamingResponse.class, ObjectNode.class);
}

@Override
public AudioCustomizationResponse createCustomSpeech(AudioCustomizationRequest request) {
validateCustomSpeechParams(request);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package ai.z.openapi.service.audio;

import ai.z.openapi.core.model.ClientResponse;
import ai.z.openapi.core.model.FlowableClientResponse;
import ai.z.openapi.service.model.ChatError;
import java.io.File;

import com.fasterxml.jackson.databind.node.ObjectNode;
import io.reactivex.rxjava3.core.Flowable;
import lombok.Data;

@Data
public class AudioSpeechStreamingResponse implements FlowableClientResponse<ObjectNode> {

private int code;

private String msg;

private boolean success;

private ObjectNode data;

private ChatError error;

private Flowable<ObjectNode> flowable;

}
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,29 @@ void shouldGenerateSpeechFromTextSuccessfully() throws JsonProcessingException {
logger.info("Text-to-speech response: {}", mapper.writeValueAsString(response));
}

@Test
@DisplayName("Should generate speech streaming from text successfully")
@EnabledIfEnvironmentVariable(named = "ZAI_API_KEY", matches = "^[^.]+\\.[^.]+$")
void testAudioSpeechStreaming() {
String requestId = String.format(REQUEST_ID_TEMPLATE, System.currentTimeMillis());
AudioSpeechRequest audioSpeechRequest = AudioSpeechRequest.builder()
.model(Constants.ModelTTS)
.encodeFormat("base64")
.input("Hello, this is a test for text-to-speech functionality.")
.voice("female")
.speed(1.0f)
.volume(1.0f)
.stream(Boolean.TRUE)
.responseFormat("wav")
.requestId(requestId)
.build();
AudioSpeechStreamingResponse audioSpeechStreamingApiResponse = audioService
.createStreamingSpeechStreaming(audioSpeechRequest);
audioSpeechStreamingApiResponse.getFlowable()
.doOnNext(speechPro -> logger.info("speechPro: {}", speechPro.toString()))
.blockingSubscribe();
}

@Test
@DisplayName("Should generate custom speech with voice cloning successfully")
@EnabledIfEnvironmentVariable(named = "ZAI_API_KEY", matches = "^[^.]+\\.[^.]+$")
Expand Down