Support tool use, add example

DePasqualeOrg · DePasqualeOrg · commit 559a44cd85b0 · 2025-01-31T17:19:23.000+01:00
diff --git a/Applications/LLMEval/ContentView.swift b/Applications/LLMEval/ContentView.swift
@@ -10,10 +10,10 @@ import SwiftUI
 import Tokenizers
 
 struct ContentView: View {
+    @Environment(DeviceStat.self) private var deviceStat
 
-    @State var prompt = ""
     @State var llm = LLMEvaluator()
-    @Environment(DeviceStat.self) private var deviceStat
+    @State var prompt = "What's the current weather in Paris?"
 
     enum displayStyle: String, CaseIterable, Identifiable {
         case plain, markdown
@@ -34,6 +34,9 @@ struct ContentView: View {
                     Text(llm.stat)
                 }
                 HStack {
+                    Toggle(isOn: $llm.includeWeatherTool) {
+                        Text("Include \"get current weather\" tool")
+                    }
                     Spacer()
                     if llm.running {
                         ProgressView()
@@ -126,8 +129,6 @@ struct ContentView: View {
 
         }
         .task {
-            self.prompt = llm.modelConfiguration.defaultPrompt
-
             // pre-load the weights on launch to speed up the first generation
             _ = try? await llm.load()
         }
@@ -154,13 +155,19 @@ class LLMEvaluator {
 
     var running = false
 
+    var includeWeatherTool = false
+
     var output = ""
     var modelInfo = ""
     var stat = ""
 
     /// This controls which model loads. `phi3_5_4bit` is one of the smaller ones, so this will fit on
     /// more devices.
-    let modelConfiguration = ModelRegistry.phi3_5_4bit
+    //    let modelConfiguration = ModelRegistry.phi3_5_4bit
+    //    let modelConfiguration = ModelRegistry.llama3_2_3B_4bit
+    //  let modelConfiguration = ModelRegistry.llama3_1_8B_4bit
+    //  let modelConfiguration = ModelRegistry.mistral7B4bit
+    let modelConfiguration = ModelRegistry.qwen2_5_7b
 
     /// parameters controlling the output
     let generateParameters = GenerateParameters(temperature: 0.6)
@@ -178,6 +185,29 @@ class LLMEvaluator {
 
     var loadState = LoadState.idle
 
+    let currentWeatherToolSpec: [String: any Sendable] =
+        [
+            "type": "function",
+            "function": [
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": [
+                    "type": "object",
+                    "properties": [
+                        "location": [
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        ] as [String: String],
+                        "unit": [
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                        ] as [String: any Sendable],
+                    ] as [String: [String: any Sendable]],
+                    "required": ["location"],
+                ] as [String: any Sendable],
+            ] as [String: any Sendable],
+        ] as [String: any Sendable]
+
     /// load and return the model -- can be called multiple times, subsequent calls will
     /// just return the loaded model
     func load() async throws -> ModelContainer {
@@ -222,18 +252,22 @@ class LLMEvaluator {
             MLXRandom.seed(UInt64(Date.timeIntervalSinceReferenceDate * 1000))
 
             let result = try await modelContainer.perform { context in
-                let input = try await context.processor.prepare(input: .init(prompt: prompt))
+                let input = try await context.processor.prepare(
+                    input: .init(
+                        messages: [
+                            ["role": "system", "content": "You are a helpful assistant."],
+                            ["role": "user", "content": prompt],
+                        ], tools: includeWeatherTool ? [currentWeatherToolSpec] : nil))
                 return try MLXLMCommon.generate(
                     input: input, parameters: generateParameters, context: context
                 ) { tokens in
-                    // update the output -- this will make the view show the text as it generates
+                    // Show the text in the view as it generates
                     if tokens.count % displayEveryNTokens == 0 {
                         let text = context.tokenizer.decode(tokens: tokens)
                         Task { @MainActor in
                             self.output = text
                         }
                     }
-
                     if tokens.count >= maxTokens {
                         return .stop
                     } else {
diff --git a/Libraries/MLXLLM/LLMModelFactory.swift b/Libraries/MLXLLM/LLMModelFactory.swift
@@ -143,10 +143,14 @@ public class ModelRegistry: @unchecked Sendable {
         defaultPrompt: "What is the difference between lettuce and cabbage?"
     )
 
-    static public let qwen205b4bit = ModelConfiguration(
-        id: "mlx-community/Qwen1.5-0.5B-Chat-4bit",
-        overrideTokenizer: "PreTrainedTokenizer",
-        defaultPrompt: "why is the sky blue?"
+    static public let qwen2_5_7b = ModelConfiguration(
+        id: "mlx-community/Qwen2.5-7B-Instruct-4bit",
+        defaultPrompt: "Why is the sky blue?"
+    )
+
+    static public let qwen2_5_1_5b = ModelConfiguration(
+        id: "mlx-community/Qwen2.5-1.5B-Instruct-4bit",
+        defaultPrompt: "Why is the sky blue?"
     )
 
     static public let openelm270m4bit = ModelConfiguration(
@@ -192,7 +196,8 @@ public class ModelRegistry: @unchecked Sendable {
             phi3_5MoE,
             phi3_5_4bit,
             phi4bit,
-            qwen205b4bit,
+            qwen2_5_7b,
+            qwen2_5_1_5b,
             smolLM_135M_4bit,
         ]
     }
@@ -229,7 +234,13 @@ private struct LLMUserInputProcessor: UserInputProcessor {
     func prepare(input: UserInput) throws -> LMInput {
         do {
             let messages = input.prompt.asMessages()
-            let promptTokens = try tokenizer.applyChatTemplate(messages: messages)
+            let promptTokens = try tokenizer.applyChatTemplate(
+                messages: messages, tools: input.tools, additionalContext: input.additionalContext)
+
+            let promptDecoded = try tokenizer.decode(tokens: promptTokens)
+
+            print(promptDecoded)
+
             return LMInput(tokens: MLXArray(promptTokens))
         } catch {
             // #150 -- it might be a TokenizerError.chatTemplate("No chat template was specified")
diff --git a/Libraries/MLXLMCommon/UserInput.swift b/Libraries/MLXLMCommon/UserInput.swift
@@ -3,6 +3,7 @@
 import CoreImage
 import Foundation
 import MLX
+import Tokenizers
 
 /// Container for raw user input.
 ///
@@ -108,23 +109,41 @@ public struct UserInput: Sendable {
     }
 
     public var prompt: Prompt
+    public var tools: [ToolSpec]?
+    /// Additional values provided for the chat template rendering context
+    public var additionalContext: [String: Any]?
     public var images = [Image]()
     public var processing: Processing = .init()
 
-    public init(prompt: String, images: [Image] = [Image]()) {
+    public init(
+        prompt: String, images: [Image] = [Image](), tools: [ToolSpec]? = nil,
+        additionalContext: [String: Any]? = nil
+    ) {
         self.prompt = .text(prompt)
         self.images = images
+        self.tools = tools
+        self.additionalContext = additionalContext
     }
 
-    public init(messages: [[String: String]], images: [Image] = [Image]()) {
+    public init(
+        messages: [[String: String]], images: [Image] = [Image](), tools: [ToolSpec]? = nil,
+        additionalContext: [String: Any]? = nil
+    ) {
         self.prompt = .messages(messages)
         self.images = images
+        self.tools = tools
+        self.additionalContext = additionalContext
     }
 
-    public init(prompt: Prompt, images: [Image] = [Image](), processing: Processing = .init()) {
+    public init(
+        prompt: Prompt, images: [Image] = [Image](), processing: Processing = .init(),
+        tools: [ToolSpec]? = nil, additionalContext: [String: Any]? = nil
+    ) {
         self.prompt = prompt
         self.images = images
         self.processing = processing
+        self.tools = tools
+        self.additionalContext = additionalContext
     }
 }