Skip to content

Commit bf9ba15

Browse files
authored
Merge pull request #7508 from fbricon/ollama-support-reasoning-flag
feat: enable thinking/reasoning toggle for ollama models
2 parents 2742056 + fe9295b commit bf9ba15

File tree

11 files changed

+123
-40
lines changed

11 files changed

+123
-40
lines changed

core/llm/autodetect.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
import { ChatMessage, ModelCapability, TemplateType } from "../index.js";
1+
import {
2+
ChatMessage,
3+
ModelCapability,
4+
ModelDescription,
5+
TemplateType,
6+
} from "../index.js";
27
import { NEXT_EDIT_MODELS } from "./constants.js";
38

49
import {
@@ -126,6 +131,26 @@ function modelSupportsImages(
126131

127132
return false;
128133
}
134+
135+
function modelSupportsReasoning(
136+
model: ModelDescription | null | undefined,
137+
): boolean {
138+
if (!model) {
139+
return false;
140+
}
141+
if ("anthropic" === model.underlyingProviderName) {
142+
return true;
143+
}
144+
if (model.model.includes("deepseek-r")) {
145+
return true;
146+
}
147+
if (model.completionOptions?.reasoning) {
148+
// Reasoning support is forced at the config level. Model might not necessarily support it though!
149+
return true;
150+
}
151+
return false;
152+
}
153+
129154
const PARALLEL_PROVIDERS: string[] = [
130155
"anthropic",
131156
"bedrock",
@@ -421,4 +446,5 @@ export {
421446
llmCanGenerateInParallel,
422447
modelSupportsImages,
423448
modelSupportsNextEdit,
449+
modelSupportsReasoning,
424450
};

core/llm/llms/Ollama.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ interface OllamaChatOptions extends OllamaBaseOptions {
8787
tools?: OllamaTool[]; // the tools of the chat, this can be used to keep a tool memory
8888
// Not supported yet - tools: tools for the model to use if supported. Requires stream to be set to false
8989
// And correspondingly, tool calls in OllamaChatMessage
90+
think?: boolean; // if true the model will be prompted to think about the response before generating it
9091
}
9192

9293
type OllamaBaseResponse = {
@@ -146,7 +147,6 @@ class Ollama extends BaseLLM implements ModelInstaller {
146147
private static modelsBeingInstalledMutex = new Mutex();
147148

148149
private fimSupported: boolean = false;
149-
150150
constructor(options: LLMOptions) {
151151
super(options);
152152

@@ -393,6 +393,7 @@ class Ollama extends BaseLLM implements ModelInstaller {
393393
model: this._getModel(),
394394
messages: ollamaMessages,
395395
options: this._getModelFileParams(options),
396+
think: options.reasoning,
396397
keep_alive: options.keepAlive ?? 60 * 30, // 30 minutes
397398
stream: options.stream,
398399
// format: options.format, // Not currently in base completion options

docs/reference.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ The `models` section defines the language models used in your configuration. Mod
219219
- `topP`: The cumulative probability for nucleus sampling.
220220
- `topK`: Maximum number of tokens considered at each step.
221221
- `stop`: An array of stop tokens that will terminate the completion.
222-
- `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ models.
222+
- `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
223223
- `reasoningBudgetTokens`: Budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
224224

225225
- `requestOptions`: HTTP request options specific to the model.

docs/reference/json-reference.mdx

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ Parameters that control the behavior of text generation and completion settings.
191191
- `keepAlive`: For Ollama, this parameter sets the number of seconds to keep the model loaded after the last request, unloading it from memory if inactive (default: `1800` seconds, or 30 minutes).
192192
- `numGpu`: For Ollama, this parameter overrides the number of gpu layers that will be used to load the model into VRAM.
193193
- `useMmap`: For Ollama, this parameter allows the model to be mapped into memory. If disabled can enhance response time on low end devices but will slow down the stream.
194-
- `reasoning`: Enables thinking/reasoning for Anthropic Claude 3.7+ models.
194+
- `reasoning`: Enables thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
195195
- `reasoningBudgetTokens`: Sets budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
196196

197197
Example
@@ -485,8 +485,6 @@ Several experimental config parameters are available, as described below:
485485
- `applyCodeBlock`: Model title for applying code blocks.
486486
- `repoMapFileSelection`: Model title for repo map selections.
487487

488-
489-
490488
- `modelContextProtocolServers`: See [Model Context Protocol](/customize/deep-dives/mcp)
491489

492490
Example
@@ -540,5 +538,3 @@ Some deprecated `config.json` settings are no longer stored in config and have b
540538
- `codeWrap`
541539
- `displayRawMarkdown`
542540
- `showChatScrollbar`
543-
544-

extensions/vscode/config_schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
},
7979
"reasoning": {
8080
"title": "Reasoning",
81-
"description": "Enable thinking/reasoning for Anthropic Claude 3.7+ models",
81+
"description": "Enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models",
8282
"type": "boolean"
8383
},
8484
"reasoningBudgetTokens": {

gui/src/components/mainInput/InputToolbar.tsx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ import {
55
} from "@heroicons/react/24/outline";
66
import { LightBulbIcon as LightBulbIconSolid } from "@heroicons/react/24/solid";
77
import { InputModifiers } from "core";
8-
import { modelSupportsImages } from "core/llm/autodetect";
8+
import {
9+
modelSupportsImages,
10+
modelSupportsReasoning,
11+
} from "core/llm/autodetect";
912
import { useContext, useRef } from "react";
1013
import { IdeMessengerContext } from "../../context/IdeMessenger";
1114
import { useAppDispatch, useAppSelector } from "../../redux/hooks";
@@ -66,6 +69,8 @@ function InputToolbar(props: InputToolbarProps) {
6669
defaultModel.capabilities,
6770
);
6871

72+
const supportsReasoning = modelSupportsReasoning(defaultModel);
73+
6974
const smallFont = useFontSize(-2);
7075
const tinyFont = useFontSize(-3);
7176

@@ -130,7 +135,7 @@ function InputToolbar(props: InputToolbarProps) {
130135
</HoverItem>
131136
</ToolTip>
132137
)}
133-
{defaultModel?.underlyingProviderName === "anthropic" && (
138+
{supportsReasoning && (
134139
<HoverItem
135140
onClick={() =>
136141
dispatch(setHasReasoningEnabled(!hasReasoningEnabled))

gui/src/hooks/ParallelListeners.tsx

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {
1919
} from "../redux/slices/sessionSlice";
2020
import { setTTSActive } from "../redux/slices/uiSlice";
2121

22+
import { modelSupportsReasoning } from "core/llm/autodetect";
2223
import { cancelStream } from "../redux/thunks/cancelStream";
2324
import { handleApplyStateUpdate } from "../redux/thunks/handleApplyStateUpdate";
2425
import { refreshSessionMetadata } from "../redux/thunks/session";
@@ -79,12 +80,13 @@ function ParallelListeners() {
7980
document.body.style.fontSize = `${configResult.config.ui.fontSize}px`;
8081
}
8182

82-
if (
83-
configResult.config?.selectedModelByRole.chat?.completionOptions
84-
?.reasoning
85-
) {
86-
dispatch(setHasReasoningEnabled(true));
87-
}
83+
const chatModel = configResult.config?.selectedModelByRole.chat;
84+
const supportsReasoning = modelSupportsReasoning(chatModel);
85+
const isReasoningDisabled =
86+
chatModel?.completionOptions?.reasoning === false;
87+
dispatch(
88+
setHasReasoningEnabled(supportsReasoning && !isReasoningDisabled),
89+
);
8890
},
8991
[dispatch, hasDoneInitialConfigLoad],
9092
);

gui/src/redux/thunks/streamNormalInput.ts

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1+
import { ToolPolicy } from "@continuedev/terminal-security";
12
import { createAsyncThunk, unwrapResult } from "@reduxjs/toolkit";
23
import {
3-
ContextItem,
44
LLMFullCompletionOptions,
5+
ModelDescription,
56
Tool,
67
ToolCallState,
78
} from "core";
8-
import { ToolPolicy } from "@continuedev/terminal-security";
99
import { getRuleId } from "core/llm/rules/getSystemMessageWithRules";
1010
import { ToCoreProtocol } from "core/protocol";
11+
import { IIdeMessenger } from "../../context/IdeMessenger";
1112
import { selectActiveTools } from "../selectors/selectActiveTools";
1213
import { selectSelectedChatModel } from "../slices/configSlice";
1314
import {
@@ -25,7 +26,6 @@ import {
2526
updateToolCallOutput,
2627
} from "../slices/sessionSlice";
2728
import { AppThunkDispatch, RootState, ThunkApiType } from "../store";
28-
import { IIdeMessenger } from "../../context/IdeMessenger";
2929
import { constructMessages } from "../util/constructMessages";
3030

3131
import { modelSupportsNativeTools } from "core/llm/toolSupport";
@@ -192,6 +192,38 @@ async function handleToolCallExecution(
192192
return allAutoApproved;
193193
}
194194

195+
/**
196+
* Builds completion options with reasoning configuration based on session state and model capabilities.
197+
*
198+
* @param baseOptions - Base completion options to extend
199+
* @param hasReasoningEnabled - Whether reasoning is enabled in the session
200+
* @param model - The selected model with provider and completion options
201+
* @returns Completion options with reasoning configuration
202+
*/
203+
function buildReasoningCompletionOptions(
204+
baseOptions: LLMFullCompletionOptions,
205+
hasReasoningEnabled: boolean | undefined,
206+
model: ModelDescription,
207+
): LLMFullCompletionOptions {
208+
if (hasReasoningEnabled === undefined) {
209+
return baseOptions;
210+
}
211+
212+
const reasoningOptions: LLMFullCompletionOptions = {
213+
...baseOptions,
214+
reasoning: !!hasReasoningEnabled,
215+
};
216+
217+
// Add reasoning budget tokens if reasoning is enabled and provider supports it
218+
if (hasReasoningEnabled && model.underlyingProviderName !== "ollama") {
219+
// Ollama doesn't support limiting reasoning tokens at this point
220+
reasoningOptions.reasoningBudgetTokens =
221+
model.completionOptions?.reasoningBudgetTokens ?? 2048;
222+
}
223+
224+
return reasoningOptions;
225+
}
226+
195227
export const streamNormalInput = createAsyncThunk<
196228
void,
197229
{
@@ -228,14 +260,11 @@ export const streamNormalInput = createAsyncThunk<
228260
};
229261
}
230262

231-
if (state.session.hasReasoningEnabled) {
232-
completionOptions = {
233-
...completionOptions,
234-
reasoning: true,
235-
reasoningBudgetTokens:
236-
selectedChatModel.completionOptions?.reasoningBudgetTokens ?? 2048,
237-
};
238-
}
263+
completionOptions = buildReasoningCompletionOptions(
264+
completionOptions,
265+
state.session.hasReasoningEnabled,
266+
selectedChatModel,
267+
);
239268

240269
// Construct messages (excluding system message)
241270
const baseSystemMessage = getBaseSystemMessage(

gui/src/redux/thunks/streamResponse.test.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -435,12 +435,16 @@ describe("streamResponseThunk", () => {
435435
],
436436
},
437437
],
438-
options: {},
438+
options: {
439+
reasoning: false,
440+
},
439441
});
440442

441443
expect(mockIdeMessenger.llmStreamChat).toHaveBeenCalledWith(
442444
{
443-
completionOptions: {},
445+
completionOptions: {
446+
reasoning: false,
447+
},
444448
legacySlashCommandData: undefined,
445449
messageOptions: { precompiled: true },
446450
messages: [
@@ -918,7 +922,9 @@ describe("streamResponseThunk", () => {
918922
],
919923
},
920924
],
921-
options: {},
925+
options: {
926+
reasoning: false,
927+
},
922928
},
923929
);
924930

@@ -1527,13 +1533,17 @@ describe("streamResponseThunk", () => {
15271533
],
15281534
},
15291535
],
1530-
options: {},
1536+
options: {
1537+
reasoning: false,
1538+
},
15311539
},
15321540
);
15331541

15341542
expect(mockIdeMessengerAbort.llmStreamChat).toHaveBeenCalledWith(
15351543
{
1536-
completionOptions: {},
1544+
completionOptions: {
1545+
reasoning: false,
1546+
},
15371547
legacySlashCommandData: undefined,
15381548
messageOptions: { precompiled: true },
15391549
messages: [

gui/src/redux/thunks/streamResponse_errorHandling.test.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,9 @@ describe("streamResponseThunk", () => {
677677
],
678678
},
679679
],
680-
options: {},
680+
options: {
681+
reasoning: false,
682+
},
681683
});
682684
expect(mockIdeMessenger.llmStreamChat).not.toHaveBeenCalled();
683685

@@ -1027,7 +1029,9 @@ describe("streamResponseThunk", () => {
10271029
],
10281030
},
10291031
],
1030-
options: {},
1032+
options: {
1033+
reasoning: false,
1034+
},
10311035
});
10321036
expect(mockIdeMessenger.llmStreamChat).not.toHaveBeenCalled();
10331037

0 commit comments

Comments
 (0)