Merge pull request #7508 from fbricon/ollama-support-reasoning-flag

RomneyDa · web-flow · commit bf9ba1533ba5 · 2025-09-11T14:07:52.000-07:00
feat: enable thinking/reasoning toggle for ollama models
diff --git a/core/llm/autodetect.ts b/core/llm/autodetect.ts
@@ -1,4 +1,9 @@
-import { ChatMessage, ModelCapability, TemplateType } from "../index.js";
+import {
+  ChatMessage,
+  ModelCapability,
+  ModelDescription,
+  TemplateType,
+} from "../index.js";
 import { NEXT_EDIT_MODELS } from "./constants.js";
 
 import {
@@ -126,6 +131,26 @@ function modelSupportsImages(
 
   return false;
 }
+
+function modelSupportsReasoning(
+  model: ModelDescription | null | undefined,
+): boolean {
+  if (!model) {
+    return false;
+  }
+  if ("anthropic" === model.underlyingProviderName) {
+    return true;
+  }
+  if (model.model.includes("deepseek-r")) {
+    return true;
+  }
+  if (model.completionOptions?.reasoning) {
+    // Reasoning support is forced at the config level. Model might not necessarily support it though!
+    return true;
+  }
+  return false;
+}
+
 const PARALLEL_PROVIDERS: string[] = [
   "anthropic",
   "bedrock",
@@ -421,4 +446,5 @@ export {
   llmCanGenerateInParallel,
   modelSupportsImages,
   modelSupportsNextEdit,
+  modelSupportsReasoning,
 };
diff --git a/core/llm/llms/Ollama.ts b/core/llm/llms/Ollama.ts
@@ -87,6 +87,7 @@ interface OllamaChatOptions extends OllamaBaseOptions {
   tools?: OllamaTool[]; // the tools of the chat, this can be used to keep a tool memory
   // Not supported yet - tools: tools for the model to use if supported. Requires stream to be set to false
   // And correspondingly, tool calls in OllamaChatMessage
+  think?: boolean; // if true the model will be prompted to think about the response before generating it
 }
 
 type OllamaBaseResponse = {
@@ -146,7 +147,6 @@ class Ollama extends BaseLLM implements ModelInstaller {
   private static modelsBeingInstalledMutex = new Mutex();
 
   private fimSupported: boolean = false;
-
   constructor(options: LLMOptions) {
     super(options);
 
@@ -393,6 +393,7 @@ class Ollama extends BaseLLM implements ModelInstaller {
       model: this._getModel(),
       messages: ollamaMessages,
       options: this._getModelFileParams(options),
+      think: options.reasoning,
       keep_alive: options.keepAlive ?? 60 * 30, // 30 minutes
       stream: options.stream,
       // format: options.format, // Not currently in base completion options
diff --git a/docs/reference.mdx b/docs/reference.mdx
@@ -219,7 +219,7 @@ The `models` section defines the language models used in your configuration. Mod
   - `topP`: The cumulative probability for nucleus sampling.
   - `topK`: Maximum number of tokens considered at each step.
   - `stop`: An array of stop tokens that will terminate the completion.
-  - `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ models.
+  - `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
   - `reasoningBudgetTokens`: Budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
 
 - `requestOptions`: HTTP request options specific to the model.
diff --git a/docs/reference/json-reference.mdx b/docs/reference/json-reference.mdx
@@ -191,7 +191,7 @@ Parameters that control the behavior of text generation and completion settings.
 - `keepAlive`: For Ollama, this parameter sets the number of seconds to keep the model loaded after the last request, unloading it from memory if inactive (default: `1800` seconds, or 30 minutes).
 - `numGpu`: For Ollama, this parameter overrides the number of gpu layers that will be used to load the model into VRAM.
 - `useMmap`: For Ollama, this parameter allows the model to be mapped into memory. If disabled can enhance response time on low end devices but will slow down the stream.
-- `reasoning`: Enables thinking/reasoning for Anthropic Claude 3.7+ models.
+- `reasoning`: Enables thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
 - `reasoningBudgetTokens`: Sets budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
 
 Example
@@ -485,8 +485,6 @@ Several experimental config parameters are available, as described below:
   - `applyCodeBlock`: Model title for applying code blocks.
   - `repoMapFileSelection`: Model title for repo map selections.
 
-
-
 - `modelContextProtocolServers`: See [Model Context Protocol](/customize/deep-dives/mcp)
 
 Example
@@ -540,5 +538,3 @@ Some deprecated `config.json` settings are no longer stored in config and have b
   - `codeWrap`
   - `displayRawMarkdown`
   - `showChatScrollbar`
-
-
diff --git a/extensions/vscode/config_schema.json b/extensions/vscode/config_schema.json
@@ -78,7 +78,7 @@
         },
         "reasoning": {
           "title": "Reasoning",
-          "description": "Enable thinking/reasoning for Anthropic Claude 3.7+ models",
+          "description": "Enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models",
           "type": "boolean"
         },
         "reasoningBudgetTokens": {
diff --git a/gui/src/components/mainInput/InputToolbar.tsx b/gui/src/components/mainInput/InputToolbar.tsx
@@ -5,7 +5,10 @@ import {
 } from "@heroicons/react/24/outline";
 import { LightBulbIcon as LightBulbIconSolid } from "@heroicons/react/24/solid";
 import { InputModifiers } from "core";
-import { modelSupportsImages } from "core/llm/autodetect";
+import {
+  modelSupportsImages,
+  modelSupportsReasoning,
+} from "core/llm/autodetect";
 import { useContext, useRef } from "react";
 import { IdeMessengerContext } from "../../context/IdeMessenger";
 import { useAppDispatch, useAppSelector } from "../../redux/hooks";
@@ -66,6 +69,8 @@ function InputToolbar(props: InputToolbarProps) {
       defaultModel.capabilities,
     );
 
+  const supportsReasoning = modelSupportsReasoning(defaultModel);
+
   const smallFont = useFontSize(-2);
   const tinyFont = useFontSize(-3);
 
@@ -130,7 +135,7 @@ function InputToolbar(props: InputToolbarProps) {
                 </HoverItem>
               </ToolTip>
             )}
-            {defaultModel?.underlyingProviderName === "anthropic" && (
+            {supportsReasoning && (
               <HoverItem
                 onClick={() =>
                   dispatch(setHasReasoningEnabled(!hasReasoningEnabled))
diff --git a/gui/src/hooks/ParallelListeners.tsx b/gui/src/hooks/ParallelListeners.tsx
@@ -19,6 +19,7 @@ import {
 } from "../redux/slices/sessionSlice";
 import { setTTSActive } from "../redux/slices/uiSlice";
 
+import { modelSupportsReasoning } from "core/llm/autodetect";
 import { cancelStream } from "../redux/thunks/cancelStream";
 import { handleApplyStateUpdate } from "../redux/thunks/handleApplyStateUpdate";
 import { refreshSessionMetadata } from "../redux/thunks/session";
@@ -79,12 +80,13 @@ function ParallelListeners() {
         document.body.style.fontSize = `${configResult.config.ui.fontSize}px`;
       }
 
-      if (
-        configResult.config?.selectedModelByRole.chat?.completionOptions
-          ?.reasoning
-      ) {
-        dispatch(setHasReasoningEnabled(true));
-      }
+      const chatModel = configResult.config?.selectedModelByRole.chat;
+      const supportsReasoning = modelSupportsReasoning(chatModel);
+      const isReasoningDisabled =
+        chatModel?.completionOptions?.reasoning === false;
+      dispatch(
+        setHasReasoningEnabled(supportsReasoning && !isReasoningDisabled),
+      );
     },
     [dispatch, hasDoneInitialConfigLoad],
   );
diff --git a/gui/src/redux/thunks/streamNormalInput.ts b/gui/src/redux/thunks/streamNormalInput.ts
@@ -1,13 +1,14 @@
+import { ToolPolicy } from "@continuedev/terminal-security";
 import { createAsyncThunk, unwrapResult } from "@reduxjs/toolkit";
 import {
-  ContextItem,
   LLMFullCompletionOptions,
+  ModelDescription,
   Tool,
   ToolCallState,
 } from "core";
-import { ToolPolicy } from "@continuedev/terminal-security";
 import { getRuleId } from "core/llm/rules/getSystemMessageWithRules";
 import { ToCoreProtocol } from "core/protocol";
+import { IIdeMessenger } from "../../context/IdeMessenger";
 import { selectActiveTools } from "../selectors/selectActiveTools";
 import { selectSelectedChatModel } from "../slices/configSlice";
 import {
@@ -25,7 +26,6 @@ import {
   updateToolCallOutput,
 } from "../slices/sessionSlice";
 import { AppThunkDispatch, RootState, ThunkApiType } from "../store";
-import { IIdeMessenger } from "../../context/IdeMessenger";
 import { constructMessages } from "../util/constructMessages";
 
 import { modelSupportsNativeTools } from "core/llm/toolSupport";
@@ -192,6 +192,38 @@ async function handleToolCallExecution(
   return allAutoApproved;
 }
 
+/**
+ * Builds completion options with reasoning configuration based on session state and model capabilities.
+ *
+ * @param baseOptions - Base completion options to extend
+ * @param hasReasoningEnabled - Whether reasoning is enabled in the session
+ * @param model - The selected model with provider and completion options
+ * @returns Completion options with reasoning configuration
+ */
+function buildReasoningCompletionOptions(
+  baseOptions: LLMFullCompletionOptions,
+  hasReasoningEnabled: boolean | undefined,
+  model: ModelDescription,
+): LLMFullCompletionOptions {
+  if (hasReasoningEnabled === undefined) {
+    return baseOptions;
+  }
+
+  const reasoningOptions: LLMFullCompletionOptions = {
+    ...baseOptions,
+    reasoning: !!hasReasoningEnabled,
+  };
+
+  // Add reasoning budget tokens if reasoning is enabled and provider supports it
+  if (hasReasoningEnabled && model.underlyingProviderName !== "ollama") {
+    // Ollama doesn't support limiting reasoning tokens at this point
+    reasoningOptions.reasoningBudgetTokens =
+      model.completionOptions?.reasoningBudgetTokens ?? 2048;
+  }
+
+  return reasoningOptions;
+}
+
 export const streamNormalInput = createAsyncThunk<
   void,
   {
@@ -228,14 +260,11 @@ export const streamNormalInput = createAsyncThunk<
       };
     }
 
-    if (state.session.hasReasoningEnabled) {
-      completionOptions = {
-        ...completionOptions,
-        reasoning: true,
-        reasoningBudgetTokens:
-          selectedChatModel.completionOptions?.reasoningBudgetTokens ?? 2048,
-      };
-    }
+    completionOptions = buildReasoningCompletionOptions(
+      completionOptions,
+      state.session.hasReasoningEnabled,
+      selectedChatModel,
+    );
 
     // Construct messages (excluding system message)
     const baseSystemMessage = getBaseSystemMessage(
diff --git a/gui/src/redux/thunks/streamResponse.test.ts b/gui/src/redux/thunks/streamResponse.test.ts
@@ -435,12 +435,16 @@ describe("streamResponseThunk", () => {
           ],
         },
       ],
-      options: {},
+      options: {
+        reasoning: false,
+      },
     });
 
     expect(mockIdeMessenger.llmStreamChat).toHaveBeenCalledWith(
       {
-        completionOptions: {},
+        completionOptions: {
+          reasoning: false,
+        },
         legacySlashCommandData: undefined,
         messageOptions: { precompiled: true },
         messages: [
@@ -918,7 +922,9 @@ describe("streamResponseThunk", () => {
             ],
           },
         ],
-        options: {},
+        options: {
+          reasoning: false,
+        },
       },
     );
 
@@ -1527,13 +1533,17 @@ describe("streamResponseThunk", () => {
             ],
           },
         ],
-        options: {},
+        options: {
+          reasoning: false,
+        },
       },
     );
 
     expect(mockIdeMessengerAbort.llmStreamChat).toHaveBeenCalledWith(
       {
-        completionOptions: {},
+        completionOptions: {
+          reasoning: false,
+        },
         legacySlashCommandData: undefined,
         messageOptions: { precompiled: true },
         messages: [
diff --git a/gui/src/redux/thunks/streamResponse_errorHandling.test.ts b/gui/src/redux/thunks/streamResponse_errorHandling.test.ts
@@ -677,7 +677,9 @@ describe("streamResponseThunk", () => {
           ],
         },
       ],
-      options: {},
+      options: {
+        reasoning: false,
+      },
     });
     expect(mockIdeMessenger.llmStreamChat).not.toHaveBeenCalled();
 
@@ -1027,7 +1029,9 @@ describe("streamResponseThunk", () => {
           ],
         },
       ],
-      options: {},
+      options: {
+        reasoning: false,
+      },
     });
     expect(mockIdeMessenger.llmStreamChat).not.toHaveBeenCalled();
 
diff --git a/gui/src/redux/thunks/streamResponse_toolCalls.test.ts b/gui/src/redux/thunks/streamResponse_toolCalls.test.ts
@@ -535,7 +535,9 @@ describe("streamResponseThunk - tool calls", () => {
             ],
           },
         ],
-        options: {},
+        options: {
+          reasoning: false,
+        },
       },
     );
 
@@ -1217,13 +1219,17 @@ describe("streamResponseThunk - tool calls", () => {
             ],
           },
         ],
-        options: {},
+        options: {
+          reasoning: false,
+        },
       },
     );
 
     expect(mockIdeMessengerReject.llmStreamChat).toHaveBeenCalledWith(
       {
-        completionOptions: {},
+        completionOptions: {
+          reasoning: false,
+        },
         legacySlashCommandData: undefined,
         messageOptions: { precompiled: true },
         messages: [
@@ -1877,13 +1883,17 @@ describe("streamResponseThunk - tool calls", () => {
             ],
           },
         ],
-        options: {},
+        options: {
+          reasoning: false,
+        },
       },
     );
 
     expect(mockIdeMessengerManual.llmStreamChat).toHaveBeenCalledWith(
       {
-        completionOptions: {},
+        completionOptions: {
+          reasoning: false,
+        },
         legacySlashCommandData: undefined,
         messageOptions: { precompiled: true },
         messages: [