Skip to content

Commit bd9bcfd

Browse files
feat(api): Realtime API token_limits, Hybrid searching ranking options
1 parent 630fecf commit bd9bcfd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1772
-319
lines changed

.stats.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 135
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
3-
openapi_spec_hash: 1560717860bba4105936647dde8f618d
4-
config_hash: 50ee3382a63c021a9f821a935950e926
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
3+
openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
4+
config_hash: 032995825500a503a76da119f5354905

openai-java-core/src/main/kotlin/com/openai/models/CustomToolInputFormat.kt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,20 @@ private constructor(
3838
private val _json: JsonValue? = null,
3939
) {
4040

41+
/** Unconstrained free-form text. */
4142
fun text(): Optional<JsonValue> = Optional.ofNullable(text)
4243

44+
/** A grammar defined by the user. */
4345
fun grammar(): Optional<Grammar> = Optional.ofNullable(grammar)
4446

4547
fun isText(): Boolean = text != null
4648

4749
fun isGrammar(): Boolean = grammar != null
4850

51+
/** Unconstrained free-form text. */
4952
fun asText(): JsonValue = text.getOrThrow("text")
5053

54+
/** A grammar defined by the user. */
5155
fun asGrammar(): Grammar = grammar.getOrThrow("grammar")
5256

5357
fun _json(): Optional<JsonValue> = Optional.ofNullable(_json)
@@ -130,9 +134,11 @@ private constructor(
130134

131135
companion object {
132136

137+
/** Unconstrained free-form text. */
133138
@JvmStatic
134139
fun ofText() = CustomToolInputFormat(text = JsonValue.from(mapOf("type" to "text")))
135140

141+
/** A grammar defined by the user. */
136142
@JvmStatic fun ofGrammar(grammar: Grammar) = CustomToolInputFormat(grammar = grammar)
137143
}
138144

@@ -142,8 +148,10 @@ private constructor(
142148
*/
143149
interface Visitor<out T> {
144150

151+
/** Unconstrained free-form text. */
145152
fun visitText(text: JsonValue): T
146153

154+
/** A grammar defined by the user. */
147155
fun visitGrammar(grammar: Grammar): T
148156

149157
/**
@@ -202,6 +210,7 @@ private constructor(
202210
}
203211
}
204212

213+
/** A grammar defined by the user. */
205214
class Grammar
206215
@JsonCreator(mode = JsonCreator.Mode.DISABLED)
207216
private constructor(

openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ private constructor(
8080
fun background(): Optional<Background> = body.background()
8181

8282
/**
83-
* Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
83+
* Control how much effort the model will exert to match the style and features, especially
84+
* facial features, of input images. This parameter is only supported for `gpt-image-1`.
85+
* Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
8486
*
8587
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
8688
* server responded with an unexpected value).
@@ -429,7 +431,9 @@ private constructor(
429431
}
430432

431433
/**
432-
* Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
434+
* Control how much effort the model will exert to match the style and features, especially
435+
* facial features, of input images. This parameter is only supported for `gpt-image-1`.
436+
* Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
433437
*/
434438
fun inputFidelity(inputFidelity: InputFidelity?) = apply {
435439
body.inputFidelity(inputFidelity)
@@ -903,7 +907,9 @@ private constructor(
903907
fun background(): Optional<Background> = background.value.getOptional("background")
904908

905909
/**
906-
* Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
910+
* Control how much effort the model will exert to match the style and features, especially
911+
* facial features, of input images. This parameter is only supported for `gpt-image-1`.
912+
* Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
907913
*
908914
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
909915
* server responded with an unexpected value).
@@ -1297,7 +1303,10 @@ private constructor(
12971303
}
12981304

12991305
/**
1300-
* Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
1306+
* Control how much effort the model will exert to match the style and features,
1307+
* especially facial features, of input images. This parameter is only supported for
1308+
* `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`.
1309+
* Defaults to `low`.
13011310
*/
13021311
fun inputFidelity(inputFidelity: InputFidelity?) =
13031312
inputFidelity(MultipartField.of(inputFidelity))
@@ -1981,7 +1990,9 @@ private constructor(
19811990
}
19821991

19831992
/**
1984-
* Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
1993+
* Control how much effort the model will exert to match the style and features, especially
1994+
* facial features, of input images. This parameter is only supported for `gpt-image-1`.
1995+
* Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
19851996
*/
19861997
class InputFidelity @JsonCreator private constructor(private val value: JsonField<String>) :
19871998
Enum {

openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSessionCreateRequest.kt

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,17 @@ private constructor(
221221
fun tracing(): Optional<RealtimeTracingConfig> = tracing.getOptional("tracing")
222222

223223
/**
224-
* Controls how the realtime conversation is truncated prior to model inference. The default is
225-
* `auto`.
224+
* When the number of tokens in a conversation exceeds the model's input token limit, the
225+
* conversation be truncated, meaning messages (starting from the oldest) will not be included
226+
* in the model's context. A 32k context model with 4,096 max output tokens can only include
227+
* 28,224 tokens in the context before truncation occurs. Clients can configure truncation
228+
* behavior to truncate with a lower max token limit, which is an effective way to control token
229+
* usage and cost. Truncation will reduce the number of cached tokens on the next turn (busting
230+
* the cache), since messages are dropped from the beginning of the context. However, clients
231+
* can also configure truncation to retain messages up to a fraction of the maximum context
232+
* size, which will reduce the need for future truncations and thus improve the cache rate.
233+
* Truncation can be disabled entirely, which means the server will never truncate but would
234+
* instead return an error if the conversation exceeds the model's input token limit.
226235
*
227236
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
228237
* server responded with an unexpected value).
@@ -666,8 +675,18 @@ private constructor(
666675
tracing(RealtimeTracingConfig.ofTracingConfiguration(tracingConfiguration))
667676

668677
/**
669-
* Controls how the realtime conversation is truncated prior to model inference. The default
670-
* is `auto`.
678+
* When the number of tokens in a conversation exceeds the model's input token limit, the
679+
* conversation be truncated, meaning messages (starting from the oldest) will not be
680+
* included in the model's context. A 32k context model with 4,096 max output tokens can
681+
* only include 28,224 tokens in the context before truncation occurs. Clients can configure
682+
* truncation behavior to truncate with a lower max token limit, which is an effective way
683+
* to control token usage and cost. Truncation will reduce the number of cached tokens on
684+
* the next turn (busting the cache), since messages are dropped from the beginning of the
685+
* context. However, clients can also configure truncation to retain messages up to a
686+
* fraction of the maximum context size, which will reduce the need for future truncations
687+
* and thus improve the cache rate. Truncation can be disabled entirely, which means the
688+
* server will never truncate but would instead return an error if the conversation exceeds
689+
* the model's input token limit.
671690
*/
672691
fun truncation(truncation: RealtimeTruncation) = truncation(JsonField.of(truncation))
673692

openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTruncation.kt

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,17 @@ import java.util.Objects
2222
import java.util.Optional
2323

2424
/**
25-
* Controls how the realtime conversation is truncated prior to model inference. The default is
26-
* `auto`.
25+
* When the number of tokens in a conversation exceeds the model's input token limit, the
26+
* conversation be truncated, meaning messages (starting from the oldest) will not be included in
27+
* the model's context. A 32k context model with 4,096 max output tokens can only include 28,224
28+
* tokens in the context before truncation occurs. Clients can configure truncation behavior to
29+
* truncate with a lower max token limit, which is an effective way to control token usage and cost.
30+
* Truncation will reduce the number of cached tokens on the next turn (busting the cache), since
31+
* messages are dropped from the beginning of the context. However, clients can also configure
32+
* truncation to retain messages up to a fraction of the maximum context size, which will reduce the
33+
* need for future truncations and thus improve the cache rate. Truncation can be disabled entirely,
34+
* which means the server will never truncate but would instead return an error if the conversation
35+
* exceeds the model's input token limit.
2736
*/
2837
@JsonDeserialize(using = RealtimeTruncation.Deserializer::class)
2938
@JsonSerialize(using = RealtimeTruncation.Serializer::class)

0 commit comments

Comments
 (0)