Skip to content

Commit 4b80ebd

Browse files
Merge pull request #7 from elevenlabs/feat/add-client-tools
2 parents 66716f8 + 312d0c1 commit 4b80ebd

File tree

2 files changed

+126
-6
lines changed

2 files changed

+126
-6
lines changed

README.md

+36-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
# Elevenlabs Conversational AI Swift SDK (experimental)
32

43
![convai222](https://github.com/user-attachments/assets/ca4fa726-5e98-4bbc-91b2-d055e957df7d)
@@ -8,6 +7,8 @@
87
98
Elevenlabs Conversational AI Swift SDK is a framework designed to integrate ElevenLabs' powerful conversational AI capabilities into your Swift applications. Leverage advanced audio processing and seamless WebSocket communication to create interactive and intelligent conversational voivce experiences.
109

10+
For detailed documentation, visit the [ElevenLabs Swift SDK documentation](https://elevenlabs.io/docs/conversational-ai/libraries/conversational-ai-sdk-swift).
11+
1112
> [!NOTE]
1213
> This library is launching to primarily support Conversational AI. The support for speech synthesis and other more generic use cases is planned for the future.
1314
@@ -75,6 +76,40 @@ Add the Elevenlabs Conversational AI Swift SDK to your project using Swift Packa
7576
}
7677
```
7778

79+
### Advanced Configuration
80+
81+
1. Using Client Tools
82+
83+
```swift
84+
var clientTools = ElevenLabsSDK.ClientTools()
85+
clientTools.register("weather", handler: { async parameters throws -> String? in
86+
print("Weather parameters received:", parameters)
87+
...
88+
})
89+
90+
let conversation = try await ElevenLabsSDK.Conversation.startSession(
91+
config: config,
92+
callbacks: callbacks,
93+
clientTools: clientTools
94+
)
95+
```
96+
97+
2. Using Overrides
98+
99+
```swift
100+
let overrides = ElevenLabsSDK.ConversationConfigOverride(
101+
agent: ElevenLabsSDK.AgentConfig(
102+
prompt: ElevenLabsSDK.AgentPrompt(prompt: "You are a helpful assistant"),
103+
language: .en
104+
)
105+
)
106+
107+
let config = ElevenLabsSDK.SessionConfig(
108+
agentId: "your-agent-id",
109+
overrides: overrides
110+
)
111+
```
112+
78113
### Manage the Session
79114

80115
- End Session

Sources/ElevenLabsSwift/ElevenLabsSwift.swift

+90-5
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,39 @@ public class ElevenLabsSDK {
102102
Data(base64Encoded: base64)
103103
}
104104

105+
// MARK: - Client Tools
106+
107+
public typealias ClientToolHandler = @Sendable (Parameters) async throws -> String?
108+
109+
public typealias Parameters = [String: Any]
110+
111+
public struct ClientTools: Sendable {
112+
private var tools: [String: ClientToolHandler] = [:]
113+
private let lock = NSLock() // Ensure thread safety
114+
115+
public init() {}
116+
117+
public mutating func register(_ name: String, handler: @escaping @Sendable ClientToolHandler) {
118+
lock.withLock {
119+
tools[name] = handler
120+
}
121+
}
122+
123+
public func handle(_ name: String, parameters: Parameters) async throws -> String? {
124+
let handler: ClientToolHandler? = lock.withLock { tools[name] }
125+
guard let handler = handler else {
126+
throw ClientToolError.handlerNotFound(name)
127+
}
128+
return try await handler(parameters)
129+
}
130+
}
131+
132+
public enum ClientToolError: Error {
133+
case handlerNotFound(String)
134+
case invalidParameters
135+
case executionFailed(String)
136+
}
137+
105138
// MARK: - Audio Processing
106139

107140
public class AudioConcatProcessor {
@@ -190,14 +223,14 @@ public class ElevenLabsSDK {
190223
public let overrides: ConversationConfigOverride?
191224
public let customLlmExtraBody: [String: LlmExtraBodyValue]?
192225

193-
public init(signedUrl: String, overrides: ConversationConfigOverride? = nil, customLlmExtraBody: [String: LlmExtraBodyValue]? = nil) {
226+
public init(signedUrl: String, overrides: ConversationConfigOverride? = nil, customLlmExtraBody: [String: LlmExtraBodyValue]? = nil, clientTools _: ClientTools = ClientTools()) {
194227
self.signedUrl = signedUrl
195228
agentId = nil
196229
self.overrides = overrides
197230
self.customLlmExtraBody = customLlmExtraBody
198231
}
199232

200-
public init(agentId: String, overrides: ConversationConfigOverride? = nil, customLlmExtraBody: [String: LlmExtraBodyValue]? = nil) {
233+
public init(agentId: String, overrides: ConversationConfigOverride? = nil, customLlmExtraBody: [String: LlmExtraBodyValue]? = nil, clientTools _: ClientTools = ClientTools()) {
201234
self.agentId = agentId
202235
signedUrl = nil
203236
self.overrides = overrides
@@ -559,6 +592,7 @@ public class ElevenLabsSDK {
559592
private let input: Input
560593
private let output: Output
561594
private let callbacks: Callbacks
595+
private let clientTools: ClientTools?
562596

563597
private let modeLock = NSLock()
564598
private let statusLock = NSLock()
@@ -649,11 +683,12 @@ public class ElevenLabsSDK {
649683
}
650684
}
651685

652-
private init(connection: Connection, input: Input, output: Output, callbacks: Callbacks) {
686+
private init(connection: Connection, input: Input, output: Output, callbacks: Callbacks, clientTools: ClientTools?) {
653687
self.connection = connection
654688
self.input = input
655689
self.output = output
656690
self.callbacks = callbacks
691+
self.clientTools = clientTools
657692

658693
// Set the onProcess callback
659694
audioConcatProcessor.onProcess = { [weak self] finished in
@@ -672,8 +707,9 @@ public class ElevenLabsSDK {
672707
/// - Parameters:
673708
/// - config: Session configuration
674709
/// - callbacks: Callbacks for conversation events
710+
/// - clientTools: Client tools callbacks (optional)
675711
/// - Returns: A started `Conversation` instance
676-
public static func startSession(config: SessionConfig, callbacks: Callbacks = Callbacks()) async throws -> Conversation {
712+
public static func startSession(config: SessionConfig, callbacks: Callbacks = Callbacks(), clientTools: ClientTools? = nil) async throws -> Conversation {
677713
// Step 1: Configure the audio session
678714
try ElevenLabsSDK.configureAudioSession()
679715

@@ -687,7 +723,7 @@ public class ElevenLabsSDK {
687723
let output = try await Output.create(sampleRate: Double(connection.sampleRate))
688724

689725
// Step 5: Initialize the Conversation
690-
let conversation = Conversation(connection: connection, input: input, output: output, callbacks: callbacks)
726+
let conversation = Conversation(connection: connection, input: input, output: output, callbacks: callbacks, clientTools: clientTools)
691727

692728
// Step 6: Start the AVAudioEngine
693729
try output.engine.start()
@@ -740,6 +776,9 @@ public class ElevenLabsSDK {
740776
}
741777

742778
switch type {
779+
case "client_tool_call":
780+
handleClientToolCall(json)
781+
743782
case "interruption":
744783
handleInterruptionEvent(json)
745784

@@ -776,6 +815,52 @@ public class ElevenLabsSDK {
776815
}
777816
}
778817

818+
private func handleClientToolCall(_ json: [String: Any]) {
819+
guard let toolCall = json["client_tool_call"] as? [String: Any],
820+
let toolName = toolCall["tool_name"] as? String,
821+
let toolCallId = toolCall["tool_call_id"] as? String,
822+
let parameters = toolCall["parameters"] as? [String: Any]
823+
else {
824+
callbacks.onError("Invalid client tool call format", json)
825+
return
826+
}
827+
828+
// Serialize parameters to JSON Data for thread-safety
829+
let serializedParameters: Data
830+
do {
831+
serializedParameters = try JSONSerialization.data(withJSONObject: parameters, options: [])
832+
} catch {
833+
callbacks.onError("Failed to serialize parameters", error)
834+
return
835+
}
836+
837+
// Execute in a Task (now safe because of serializedParameters)
838+
Task { [toolName, toolCallId, serializedParameters] in
839+
do {
840+
// Deserialize within the Task to pass into clientTools.handle
841+
let deserializedParameters = try JSONSerialization.jsonObject(with: serializedParameters) as? [String: Any] ?? [:]
842+
843+
let result = try await clientTools?.handle(toolName, parameters: deserializedParameters)
844+
845+
let response: [String: Any] = [
846+
"type": "client_tool_result",
847+
"tool_call_id": toolCallId,
848+
"result": result ?? "",
849+
"is_error": false,
850+
]
851+
sendWebSocketMessage(response)
852+
} catch {
853+
let response: [String: Any] = [
854+
"type": "client_tool_result",
855+
"tool_call_id": toolCallId,
856+
"result": error.localizedDescription,
857+
"is_error": true,
858+
]
859+
sendWebSocketMessage(response)
860+
}
861+
}
862+
}
863+
779864
private func handleInterruptionEvent(_ json: [String: Any]) {
780865
guard let event = json["interruption_event"] as? [String: Any],
781866
let eventId = event["event_id"] as? Int else { return }

0 commit comments

Comments
 (0)