@@ -29,7 +29,7 @@ import mlx_lm
29
29
30
30
# Create the model
31
31
model = outlines.from_mlxlm(
32
- * mlx_lm.load(" mlx-community/SmolLM-135M-Instruct -4bit" )
32
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0 -4bit" )
33
33
)
34
34
```
35
35
@@ -45,14 +45,43 @@ import mlx_lm
45
45
46
46
# Load the model
47
47
model = outlines.from_mlxlm(
48
- * mlx_lm.load(" mlx-community/SmolLM-135M-Instruct -4bit" )
48
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0 -4bit" )
49
49
)
50
50
51
51
# Call it to generate text
52
52
result = model(" What's the capital of Latvia?" , max_tokens = 20 )
53
53
print (result) # 'Riga'
54
54
```
55
55
56
+ #### Chat
57
+
58
+ You can use chat inputs with the ` MLXLM ` model. To do so, call the model with a ` Chat ` instance.
59
+
60
+ For instance:
61
+
62
+ ``` python
63
+ import outlines
64
+ import mlx_lm
65
+ from outlines.inputs import Chat
66
+
67
+ # Load the model
68
+ model = outlines.from_mlxlm(
69
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit" )
70
+ )
71
+
72
+ # Create the prompt containing the text and the image
73
+ prompt = Chat([
74
+ {" role" : " system" , " content" : " You are a helpful assistant." },
75
+ {" role" : " assistant" , " content" : " What's the capital of Latvia?" },
76
+ ])
77
+
78
+ # Call the model to generate a response
79
+ response = model(prompt, max_tokens = 50 )
80
+ print (response) # 'Riga.'
81
+ ```
82
+
83
+ #### Streaming
84
+
56
85
The ` MLXLM ` model also supports streaming. For instance:
57
86
58
87
``` python
@@ -61,7 +90,7 @@ import mlx_lm
61
90
62
91
# Load the model
63
92
model = outlines.from_mlxlm(
64
- * mlx_lm.load(" mlx-community/SmolLM-135M-Instruct -4bit" )
93
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0 -4bit" )
65
94
)
66
95
67
96
# Stream text
@@ -73,7 +102,7 @@ for chunk in model.stream("Write a short story about a cat.", max_tokens=100):
73
102
74
103
As a local model, ` MLXLM ` supports all forms of structured generation available in Outlines.
75
104
76
- ### Basic Type
105
+ #### Basic Type
77
106
78
107
``` python
79
108
import outlines
@@ -82,14 +111,14 @@ import mlx_lm
82
111
output_type = int
83
112
84
113
model = outlines.from_mlxlm(
85
- * mlx_lm.load(" mlx-community/SmolLM-135M-Instruct -4bit" )
114
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0 -4bit" )
86
115
)
87
116
88
117
result = model(" How many countries are there in the world?" , output_type)
89
118
print (result) # '200'
90
119
```
91
120
92
- ### JSON Schema
121
+ #### JSON Schema
93
122
94
123
``` python
95
124
from pydantic import BaseModel
@@ -103,15 +132,15 @@ class Character(BaseModel):
103
132
skills: List[str ]
104
133
105
134
model = outlines.from_mlxlm(
106
- * mlx_lm.load(" mlx-community/SmolLM-135M-Instruct -4bit" )
135
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0 -4bit" )
107
136
)
108
137
109
138
result = model(" Create a character." , output_type = Character)
110
139
print (result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}'
111
140
print (Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']
112
141
```
113
142
114
- ### Multiple Choice
143
+ #### Multiple Choice
115
144
116
145
``` python
117
146
from typing import Literal
@@ -121,14 +150,14 @@ import mlx_lm
121
150
output_type = Literal[" Paris" , " London" , " Rome" , " Berlin" ]
122
151
123
152
model = outlines.from_mlxlm(
124
- * mlx_lm.load(" mlx-community/SmolLM-135M-Instruct -4bit" )
153
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0 -4bit" )
125
154
)
126
155
127
156
result = model(" What is the capital of France?" , output_type)
128
157
print (result) # 'Paris'
129
158
```
130
159
131
- ### Regex
160
+ #### Regex
132
161
133
162
``` python
134
163
from outlines.types import Regex
@@ -138,14 +167,14 @@ import mlx_lm
138
167
output_type = Regex(r " \d {3} -\d {2} -\d {4} " )
139
168
140
169
model = outlines.from_mlxlm(
141
- * mlx_lm.load(" mlx-community/SmolLM-135M-Instruct -4bit" )
170
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0 -4bit" )
142
171
)
143
172
144
173
result = model(" Generate a fake social security number." , output_type)
145
174
print (result) # '782-32-3789'
146
175
```
147
176
148
- ### Context-Free Grammar
177
+ #### Context-Free Grammar
149
178
150
179
``` python
151
180
from outlines.types import CFG
@@ -175,7 +204,7 @@ arithmetic_grammar = """
175
204
output_type = CFG(arithmetic_grammar)
176
205
177
206
model = outlines.from_mlxlm(
178
- * mlx_lm.load(" mlx-community/SmolLM-135M-Instruct -4bit" )
207
+ * mlx_lm.load(" mlx-community/TinyLlama-1.1B-Chat-v1.0 -4bit" )
179
208
)
180
209
181
210
result = model(" Write an addition." , output_type, max_tokens = 20 )
0 commit comments