Skip to content

Commit a5aa713

Browse files
iceAndFireisFailedyuhongxiao赵嘉琦
authored
feat: audio speech streaming (#81)
* 新增流式音频生成接口api * 测试流程优化 * 优化 * 测试代码删除个人apiKey * feat:/add,/list,SDK开发 * feat:/add,/list,SDK开发 * Revert "feat:/add,/list,SDK开发" This reverts commit c992af4. * Revert "feat:/add,/list,SDK开发" This reverts commit e977b06. --------- Co-authored-by: yuhongxiao <[email protected]> Co-authored-by: 赵嘉琦 <[email protected]>
1 parent b4a8eb3 commit a5aa713

13 files changed

+510
-19
lines changed

src/main/java/com/zhipu/oapi/ClientV4.java

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package com.zhipu.oapi;
22

3+
import static com.zhipu.oapi.Constants.BASE_URL;
4+
35
import com.fasterxml.jackson.core.JsonProcessingException;
46
import com.fasterxml.jackson.databind.ObjectMapper;
7+
import com.fasterxml.jackson.databind.node.ObjectNode;
58
import com.zhipu.oapi.core.ConfigV4;
69
import com.zhipu.oapi.core.cache.ICache;
710
import com.zhipu.oapi.core.cache.LocalCache;
@@ -12,20 +15,20 @@
1215
import com.zhipu.oapi.core.token.GlobalTokenManager;
1316
import com.zhipu.oapi.core.token.TokenManagerV4;
1417
import com.zhipu.oapi.service.v4.agents.AgentsCompletionRequest;
18+
import com.zhipu.oapi.service.v4.api.ClientApiService;
19+
import com.zhipu.oapi.service.v4.audio.*;
1520
import com.zhipu.oapi.service.v4.audio.AudioTranscriptionsRequest;
1621
import com.zhipu.oapi.service.v4.batchs.*;
1722
import com.zhipu.oapi.service.v4.deserialize.MessageDeserializeFactory;
18-
import com.zhipu.oapi.service.v4.fine_turning.*;
19-
import com.zhipu.oapi.service.v4.model.*;
20-
import com.zhipu.oapi.service.v4.api.ClientApiService;
2123
import com.zhipu.oapi.service.v4.embedding.EmbeddingApiResponse;
2224
import com.zhipu.oapi.service.v4.embedding.EmbeddingRequest;
2325
import com.zhipu.oapi.service.v4.embedding.EmbeddingResult;
2426
import com.zhipu.oapi.service.v4.file.*;
25-
import com.zhipu.oapi.service.v4.audio.*;
27+
import com.zhipu.oapi.service.v4.fine_turning.*;
2628
import com.zhipu.oapi.service.v4.image.CreateImageRequest;
2729
import com.zhipu.oapi.service.v4.image.ImageApiResponse;
2830
import com.zhipu.oapi.service.v4.image.ImageResult;
31+
import com.zhipu.oapi.service.v4.model.*;
2932
import com.zhipu.oapi.service.v4.tools.WebSearchApiResponse;
3033
import com.zhipu.oapi.service.v4.tools.WebSearchParamsRequest;
3134
import com.zhipu.oapi.service.v4.tools.WebSearchPro;
@@ -36,16 +39,21 @@
3639
import com.zhipu.oapi.utils.OkHttps;
3740
import com.zhipu.oapi.utils.RequestSupplier;
3841
import com.zhipu.oapi.utils.StringUtils;
42+
3943
import io.reactivex.BackpressureStrategy;
4044
import io.reactivex.Flowable;
4145
import io.reactivex.Single;
46+
4247
import lombok.Getter;
4348
import lombok.Setter;
49+
4450
import okhttp3.ConnectionPool;
4551
import okhttp3.OkHttpClient;
4652
import okhttp3.ResponseBody;
53+
4754
import org.slf4j.Logger;
4855
import org.slf4j.LoggerFactory;
56+
4957
import retrofit2.Response;
5058
import retrofit2.adapter.rxjava2.HttpException;
5159

@@ -54,8 +62,6 @@
5462
import java.util.Map;
5563
import java.util.concurrent.TimeUnit;
5664

57-
import static com.zhipu.oapi.Constants.BASE_URL;
58-
5965
// 抽象类
6066
abstract class AbstractClientBaseService {
6167

@@ -461,6 +467,20 @@ public AudioSpeechApiResponse speech(AudioSpeechRequest request){
461467
return this.executeRequest(request, supplier, AudioSpeechApiResponse.class);
462468
}
463469

470+
/**
471+
* tts接口(Text to speech streaming)
472+
*
473+
* @param request
474+
* @return
475+
*/
476+
public AudioSpeechStreamingApiResponse speechStreaming(AudioSpeechRequest request) {
477+
FlowableRequestSupplier<Map<String, Object>, retrofit2.Call<ResponseBody>> supplier =
478+
params -> chatApiService.audioSpeechStreaming(params);
479+
480+
return this.streamRequest(
481+
request, supplier, AudioSpeechStreamingApiResponse.class, ObjectNode.class);
482+
}
483+
464484
/**
465485
* tts接口(语音克隆)
466486
* @param request

src/main/java/com/zhipu/oapi/service/v4/api/ClientApiService.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,21 @@
1515
import com.zhipu.oapi.service.v4.batchs.Batch;
1616
import com.zhipu.oapi.service.v4.batchs.BatchCreateParams;
1717
import com.zhipu.oapi.service.v4.batchs.BatchPage;
18+
import com.zhipu.oapi.service.v4.embedding.EmbeddingResult;
1819
import com.zhipu.oapi.service.v4.file.*;
1920
import com.zhipu.oapi.service.v4.fine_turning.*;
20-
import com.zhipu.oapi.service.v4.model.*;
21-
import com.zhipu.oapi.service.v4.embedding.EmbeddingResult;
2221
import com.zhipu.oapi.service.v4.image.ImageResult;
22+
import com.zhipu.oapi.service.v4.model.*;
2323
import com.zhipu.oapi.service.v4.tools.WebSearchPro;
2424
import com.zhipu.oapi.service.v4.web_search.WebSearchDTO;
2525
import com.zhipu.oapi.service.v4.web_search.WebSearchRequest;
26+
2627
import io.reactivex.Single;
28+
2729
import okhttp3.*;
30+
2831
import org.apache.tika.Tika;
32+
2933
import retrofit2.Call;
3034
import retrofit2.Response;
3135

@@ -36,7 +40,6 @@
3640
import java.nio.file.Path;
3741
import java.util.*;
3842

39-
4043
public class ClientApiService extends ClientBaseService {
4144

4245
private final ChatApi chatApi;
@@ -225,6 +228,10 @@ public Single<java.io.File> audioSpeech(Map<String,Object> request) throws IOExc
225228
return Single.just(file);
226229
}
227230

231+
public Call<ResponseBody> audioSpeechStreaming(Map<String, Object> request) {
232+
return audioApi.audioSpeechStreaming(request);
233+
}
234+
228235
public Single<java.io.File> audioCustomization(Map<String,Object> request) throws IOException {
229236
java.io.File voiceFile = (java.io.File)request.get("voice_data");
230237
RequestBody requestFile = RequestBody.create(MediaType.parse("multipart/form-data"), voiceFile);

src/main/java/com/zhipu/oapi/service/v4/api/audio/AudioApi.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33

44
import com.zhipu.oapi.service.v4.model.ModelData;
5+
56
import io.reactivex.Single;
7+
68
import okhttp3.MultipartBody;
79
import okhttp3.RequestBody;
810
import okhttp3.ResponseBody;
@@ -12,7 +14,6 @@
1214

1315
import java.util.Map;
1416

15-
1617
public interface AudioApi {
1718

1819
/**
@@ -23,6 +24,15 @@ public interface AudioApi {
2324
@POST("audio/speech")
2425
Single<ResponseBody> audioSpeech(@Body Map<String,Object> request);
2526

27+
/**
28+
* tts接口(streaming Text to speech)
29+
*
30+
* @param request
31+
* @return
32+
*/
33+
@POST("audio/speech")
34+
@Streaming
35+
Call<ResponseBody> audioSpeechStreaming(@Body Map<String, Object> request);
2636

2737
/**
2838
* 语音克隆接口
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package com.zhipu.oapi.service.v4.audio;
2+
3+
import com.fasterxml.jackson.annotation.JsonProperty;
4+
import com.fasterxml.jackson.databind.JsonNode;
5+
import com.fasterxml.jackson.databind.ObjectMapper;
6+
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
7+
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
8+
import com.fasterxml.jackson.databind.node.ObjectNode;
9+
import com.zhipu.oapi.service.v4.deserialize.MessageDeserializeFactory;
10+
import com.zhipu.oapi.service.v4.deserialize.audio.AudioSpeechChoiceDeserializer;
11+
12+
import lombok.Getter;
13+
14+
import java.util.Iterator;
15+
16+
@Getter
17+
@JsonDeserialize(using = AudioSpeechChoiceDeserializer.class)
18+
public class AudioSpeechChoice extends ObjectNode {
19+
20+
/** 索引 */
21+
@JsonProperty("index")
22+
private int index;
23+
24+
/** 完成原因 */
25+
@JsonProperty("finish_reason")
26+
private String finishReason;
27+
28+
/** delta */
29+
@JsonProperty("delta")
30+
private AudioSpeechDelta delta;
31+
32+
public AudioSpeechChoice() {
33+
super(JsonNodeFactory.instance);
34+
}
35+
36+
public AudioSpeechChoice(ObjectNode objectNode) {
37+
super(JsonNodeFactory.instance);
38+
ObjectMapper objectMapper = MessageDeserializeFactory.defaultObjectMapper();
39+
40+
if (objectNode.get("index") != null) {
41+
this.setIndex(objectNode.get("index").asInt());
42+
} else {
43+
this.setIndex(0);
44+
}
45+
if (objectNode.get("finish_reason") != null) {
46+
this.setFinishReason(objectNode.get("finish_reason").asText());
47+
} else {
48+
this.setFinishReason(null);
49+
}
50+
51+
if (objectNode.get("delta") != null) {
52+
this.setDelta(
53+
objectMapper.convertValue(objectNode.get("delta"), AudioSpeechDelta.class));
54+
} else {
55+
this.setDelta(null);
56+
}
57+
Iterator<String> fieldNames = objectNode.fieldNames();
58+
while (fieldNames.hasNext()) {
59+
String fieldName = fieldNames.next();
60+
JsonNode field = objectNode.get(fieldName);
61+
this.set(fieldName, field);
62+
}
63+
}
64+
65+
// Setters
66+
67+
public void setIndex(int index) {
68+
this.index = index;
69+
this.put("index", index);
70+
}
71+
72+
public void setFinishReason(String finishReason) {
73+
this.finishReason = finishReason;
74+
this.put("finish_reason", finishReason);
75+
}
76+
77+
public void setDelta(AudioSpeechDelta delta) {
78+
this.delta = delta;
79+
this.set("delta", delta);
80+
}
81+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package com.zhipu.oapi.service.v4.audio;
2+
3+
import com.fasterxml.jackson.annotation.JsonProperty;
4+
import com.fasterxml.jackson.databind.JsonNode;
5+
import com.fasterxml.jackson.databind.ObjectMapper;
6+
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
7+
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
8+
import com.fasterxml.jackson.databind.node.ObjectNode;
9+
import com.zhipu.oapi.service.v4.deserialize.MessageDeserializeFactory;
10+
import com.zhipu.oapi.service.v4.deserialize.audio.AudioSpeechDeltaDeserializer;
11+
12+
import lombok.Getter;
13+
14+
import java.util.Iterator;
15+
16+
@Getter
17+
@JsonDeserialize(using = AudioSpeechDeltaDeserializer.class)
18+
public class AudioSpeechDelta extends ObjectNode {
19+
20+
/** 角色 */
21+
@JsonProperty("role")
22+
private String role;
23+
24+
/** 工具调用列表 */
25+
@JsonProperty("content")
26+
private String content;
27+
28+
public AudioSpeechDelta() {
29+
super(JsonNodeFactory.instance);
30+
}
31+
32+
public AudioSpeechDelta(ObjectNode objectNode) {
33+
super(JsonNodeFactory.instance);
34+
ObjectMapper objectMapper = MessageDeserializeFactory.defaultObjectMapper();
35+
if (objectNode.has("role")) {
36+
this.setRole(objectNode.get("role").asText());
37+
} else {
38+
this.setRole(null);
39+
}
40+
if (objectNode.has("content")) {
41+
this.setContent(objectNode.get("content").asText());
42+
} else {
43+
this.setContent(null);
44+
}
45+
46+
Iterator<String> fieldNames = objectNode.fieldNames();
47+
while (fieldNames.hasNext()) {
48+
String fieldName = fieldNames.next();
49+
JsonNode field = objectNode.get(fieldName);
50+
this.set(fieldName, field);
51+
}
52+
}
53+
54+
// Getters and Setters
55+
56+
public void setRole(String role) {
57+
this.role = role;
58+
this.put("role", role);
59+
}
60+
61+
public void setContent(String content) {
62+
this.content = content;
63+
this.put("content", content);
64+
}
65+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package com.zhipu.oapi.service.v4.audio;
2+
3+
import com.fasterxml.jackson.annotation.JsonProperty;
4+
import com.fasterxml.jackson.core.type.TypeReference;
5+
import com.fasterxml.jackson.databind.JsonNode;
6+
import com.fasterxml.jackson.databind.ObjectMapper;
7+
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
8+
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
9+
import com.fasterxml.jackson.databind.node.ObjectNode;
10+
import com.zhipu.oapi.service.v4.deserialize.MessageDeserializeFactory;
11+
import com.zhipu.oapi.service.v4.deserialize.audio.AudioSpeechProDeserializer;
12+
import lombok.Getter;
13+
14+
import java.util.Iterator;
15+
import java.util.List;
16+
17+
@JsonDeserialize(using = AudioSpeechProDeserializer.class)
18+
@Getter
19+
public final class AudioSpeechPro extends ObjectNode {
20+
21+
/** 创建时间 */
22+
@JsonProperty("created")
23+
private Integer created;
24+
25+
/** 选择项 */
26+
@JsonProperty("choices")
27+
private List<AudioSpeechChoice> choices;
28+
29+
/** 请求ID */
30+
@JsonProperty("request_id")
31+
private String requestId;
32+
33+
34+
public AudioSpeechPro() {
35+
super(JsonNodeFactory.instance);
36+
}
37+
38+
public AudioSpeechPro(ObjectNode objectNode) {
39+
super(JsonNodeFactory.instance);
40+
ObjectMapper objectMapper = MessageDeserializeFactory.defaultObjectMapper();
41+
if (objectNode.get("created") != null) {
42+
this.setCreated(objectNode.get("created").asInt());
43+
} else {
44+
this.setCreated(null);
45+
}
46+
47+
if (objectNode.get("choices") != null) {
48+
this.setChoices(
49+
objectMapper.convertValue(
50+
objectNode.get("choices"),
51+
new TypeReference<List<AudioSpeechChoice>>() {}));
52+
} else {
53+
this.setChoices(null);
54+
}
55+
if (objectNode.get("request_id") != null) {
56+
this.setRequestId(objectNode.get("request_id").asText());
57+
} else {
58+
this.setRequestId(null);
59+
}
60+
61+
Iterator<String> fieldNames = objectNode.fieldNames();
62+
while (fieldNames.hasNext()) {
63+
String fieldName = fieldNames.next();
64+
JsonNode field = objectNode.get(fieldName);
65+
this.set(fieldName, field);
66+
}
67+
}
68+
69+
// Setters
70+
public void setCreated(Integer created) {
71+
this.created = created;
72+
this.put("created", created);
73+
}
74+
75+
76+
public void setChoices(List<AudioSpeechChoice> choices) {
77+
this.choices = choices;
78+
this.putPOJO("choices", choices);
79+
}
80+
81+
public void setRequestId(String requestId) {
82+
this.requestId = requestId;
83+
this.put("request_id", requestId);
84+
}
85+
}

0 commit comments

Comments
 (0)