Skip to content

Commit 4aa74ad

Browse files
authored
Agent derived states (#840)
- Adds more computed `is_` props to the (private for now) agent state - JS livekit/components-js#1231 - `buffering` state was already handled inside the `.connecting` enum case (associated value is less error-prone) - Adds disconnected == failed concept - JS livekit/components-js#1228 - Checks actual dispatch response when waiting for the agent - JS livekit/components-js#1226
1 parent 4284f4d commit 4aa74ad

File tree

6 files changed

+140
-27
lines changed

6 files changed

+140
-27
lines changed

Sources/LiveKit/Agent/Agent.swift

Lines changed: 83 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,14 @@ public struct Agent: Loggable {
3535

3636
public enum Error: LocalizedError {
3737
case timeout
38+
case left
3839

3940
public var errorDescription: String? {
4041
switch self {
4142
case .timeout:
42-
"Agent did not connect"
43+
"Agent did not connect to the room"
44+
case .left:
45+
"Agent left the room unexpectedly"
4346
}
4447
}
4548
}
@@ -90,50 +93,120 @@ public struct Agent: Loggable {
9093
log("Invalid transition from \(state) to connected", .warning)
9194
}
9295
}
96+
}
97+
98+
// MARK: - Derived State
99+
100+
public extension Agent {
101+
/// A boolean value indicating whether the agent is connected to the client.
102+
///
103+
/// Returns `true` when the agent is actively connected and in a conversational state
104+
/// (listening, thinking, or speaking).
105+
var isConnected: Bool {
106+
switch state {
107+
case let .connected(agentState, _, _):
108+
switch agentState {
109+
case .listening, .thinking, .speaking:
110+
true
111+
default:
112+
false
113+
}
114+
default:
115+
false
116+
}
117+
}
118+
119+
/// A boolean value indicating whether the client could be listening for user speech.
120+
///
121+
/// Returns `true` when the agent is in a state where it can receive user input,
122+
/// either through pre-connect buffering or active conversation states.
123+
///
124+
/// - Note: This may not mean that the agent is actually connected. The audio pre-connect
125+
/// buffer could be active and recording user input before the agent actually connects.
126+
var canListen: Bool {
127+
switch state {
128+
case let .connecting(buffering):
129+
buffering
130+
case let .connected(agentState, _, _):
131+
switch agentState {
132+
case .listening, .thinking, .speaking:
133+
true
134+
default:
135+
false
136+
}
137+
default:
138+
false
139+
}
140+
}
93141

94-
// MARK: - Public
142+
/// A boolean value indicating whether the agent is currently connecting or setting itself up.
143+
///
144+
/// Returns `true` during the connection phase (before pre-connect buffering begins) or
145+
/// when the agent is initializing after connection.
146+
var isPending: Bool {
147+
switch state {
148+
case let .connecting(buffering):
149+
!buffering
150+
case let .connected(agentState, _, _):
151+
switch agentState {
152+
case .initializing, .idle:
153+
true
154+
default:
155+
false
156+
}
157+
default:
158+
false
159+
}
160+
}
95161

96-
/// A boolean value indicating whether the agent is connected.
97-
public var isConnected: Bool {
162+
/// A boolean value indicating whether the client has disconnected from the agent.
163+
///
164+
/// Returns `true` when the agent session has ended, either for an expected or unexpected reason
165+
/// (including failures).
166+
var isFinished: Bool {
98167
switch state {
99-
case .connected: true
100-
default: false
168+
case .disconnected, .failed:
169+
true
170+
default:
171+
false
101172
}
102173
}
103174

104175
/// The current conversational state of the agent.
105-
public var agentState: AgentState? {
176+
var agentState: AgentState? {
106177
switch state {
107178
case let .connected(agentState, _, _): agentState
108179
default: nil
109180
}
110181
}
111182

112183
/// The agent's audio track.
113-
public var audioTrack: (any AudioTrack)? {
184+
var audioTrack: (any AudioTrack)? {
114185
switch state {
115186
case let .connected(_, audioTrack, _): audioTrack
116187
default: nil
117188
}
118189
}
119190

120191
/// The agent's avatar video track.
121-
public var avatarVideoTrack: (any VideoTrack)? {
192+
var avatarVideoTrack: (any VideoTrack)? {
122193
switch state {
123194
case let .connected(_, _, avatarVideoTrack): avatarVideoTrack
124195
default: nil
125196
}
126197
}
127198

128199
/// The last error that occurred.
129-
public var error: Error? {
200+
var error: Error? {
130201
switch state {
131202
case let .failed(error): error
132203
default: nil
133204
}
134205
}
135206
}
136207

208+
// MARK: - Extension
209+
137210
private extension Participant {
138211
var agentAudioTrack: (any AudioTrack)? {
139212
audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack

Sources/LiveKit/Agent/Session.swift

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,8 @@ open class Session: ObservableObject {
213213
agent.disconnected()
214214
} else if let firstAgent = room.agentParticipants.values.first {
215215
agent.connected(participant: firstAgent)
216+
} else if agent.isConnected {
217+
agent.failed(error: .left)
216218
} else {
217219
agent.connecting(buffering: options.preConnectAudio)
218220
}
@@ -248,30 +250,34 @@ open class Session: ObservableObject {
248250
let response = try await self.tokenSourceConfiguration.fetch()
249251
try await self.room.connect(url: response.serverURL.absoluteString,
250252
token: response.participantToken)
253+
return response.dispatchesAgent()
251254
}
252255

253256
do {
257+
let dispatchesAgent: Bool
254258
if options.preConnectAudio {
255-
try await room.withPreConnectAudio(timeout: timeout) {
259+
dispatchesAgent = try await room.withPreConnectAudio(timeout: timeout) {
256260
await MainActor.run {
257261
self.connectionState = .connecting
258262
self.agent.connecting(buffering: true)
259263
}
260-
try await connect()
264+
return try await connect()
261265
}
262266
} else {
263267
connectionState = .connecting
264268
agent.connecting(buffering: false)
265-
try await connect()
269+
dispatchesAgent = try await connect()
266270
try await room.localParticipant.setMicrophone(enabled: true)
267271
}
268272

269-
waitForAgentTask = Task { [weak self] in
270-
try await Task.sleep(nanoseconds: UInt64(timeout * Double(NSEC_PER_SEC)))
271-
try Task.checkCancellation()
272-
guard let self else { return }
273-
if isConnected, !agent.isConnected {
274-
agent.failed(error: .timeout)
273+
if dispatchesAgent {
274+
waitForAgentTask = Task { [weak self] in
275+
try await Task.sleep(nanoseconds: UInt64(timeout * Double(NSEC_PER_SEC)))
276+
try Task.checkCancellation()
277+
guard let self else { return }
278+
if isConnected, !agent.isConnected {
279+
agent.failed(error: .timeout)
280+
}
275281
}
276282
}
277283
} catch {

Sources/LiveKit/Token/CachingTokenSource.swift

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,4 @@ public extension TokenSourceResponse {
157157

158158
return true
159159
}
160-
161-
/// Extracts the JWT payload from the participant token.
162-
///
163-
/// - Returns: The JWT payload if successfully parsed, nil otherwise
164-
internal func jwt() -> LiveKitJWTPayload? {
165-
LiveKitJWTPayload.fromUnverified(token: participantToken)
166-
}
167160
}

Sources/LiveKit/Token/JWT.swift

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,17 @@ internal import JWTKit
1919

2020
/// JWT payload structure for LiveKit authentication tokens.
2121
struct LiveKitJWTPayload: JWTPayload, Codable, Equatable {
22+
/// Room configuration embedded in the JWT token.
23+
struct RoomConfiguration: Codable, Equatable {
24+
/// Agent dispatch configuration.
25+
struct AgentDispatch: Codable, Equatable {
26+
let agentName: String?
27+
let metadata: String?
28+
}
29+
30+
let agents: [AgentDispatch]?
31+
}
32+
2233
/// Video-specific permissions and room access grants for the participant.
2334
struct VideoGrant: Codable, Equatable {
2435
/// Name of the room. Required for admin or join permissions.
@@ -90,6 +101,14 @@ struct LiveKitJWTPayload: JWTPayload, Codable, Equatable {
90101
let metadata: String?
91102
/// Video-specific permissions and room access grants.
92103
let video: VideoGrant?
104+
/// Room configuration, including agent dispatch information.
105+
let roomConfiguration: RoomConfiguration?
106+
107+
enum CodingKeys: String, CodingKey {
108+
case exp, iss, nbf, sub
109+
case name, metadata, video
110+
case roomConfiguration = "roomConfig"
111+
}
93112

94113
/// Verifies the JWT token's validity by checking expiration and not-before claims.
95114
func verify(using _: JWTSigner) throws {

Sources/LiveKit/Token/TokenSource.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,23 @@ public struct TokenSourceResponse: Decodable, Sendable {
139139
self.roomName = roomName
140140
}
141141
}
142+
143+
public extension TokenSourceResponse {
144+
/// Extracts the JWT payload from the participant token.
145+
///
146+
/// - Returns: The JWT payload if successfully parsed, nil otherwise
147+
internal func jwt() -> LiveKitJWTPayload? {
148+
LiveKitJWTPayload.fromUnverified(token: participantToken)
149+
}
150+
151+
/// Checks if the JWT token contains agent dispatch configuration.
152+
///
153+
/// - Returns: `true` if the token is configured to dispatch one or more agents, `false` otherwise
154+
func dispatchesAgent() -> Bool {
155+
guard let jwt = jwt(), let agents = jwt.roomConfiguration?.agents else {
156+
return false
157+
}
158+
159+
return !agents.isEmpty
160+
}
161+
}

Tests/LiveKitTestSupport/TokenGenerator.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public class TokenGenerator {
3030
public var name: String?
3131
public var metadata: String?
3232
public var videoGrant: LiveKitJWTPayload.VideoGrant?
33+
public var roomConfiguration: LiveKitJWTPayload.RoomConfiguration?
3334

3435
// MARK: - Private
3536

@@ -58,7 +59,8 @@ public class TokenGenerator {
5859
sub: .init(stringLiteral: identity),
5960
name: name,
6061
metadata: metadata,
61-
video: videoGrant)
62+
video: videoGrant,
63+
roomConfiguration: roomConfiguration)
6264

6365
return try signers.sign(p)
6466
}

0 commit comments

Comments
 (0)