@@ -77,22 +77,8 @@ def extract_reasoning_content_streaming(
77
77
# reasoning content continues
78
78
return DeltaMessage (reasoning_content = delta_text )
79
79
else :
80
- # No <think> in previous or delta, also need to check for </think>.
81
- # Because the model may have generated </think> without <think>
82
- # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
83
- if self .think_end_token in delta_text :
84
- # </think> in delta with more tokens,
85
- # extract reasoning content and content
86
- end_index = delta_text .find (self .think_end_token )
87
- reasoning_content = delta_text [:end_index ]
88
- content = delta_text [end_index + len (self .think_end_token ):]
89
- return DeltaMessage (reasoning_content = reasoning_content , content = content if content else None )
90
- elif self .think_end_token in previous_text :
91
- # </think> in previous, thinking content ends
92
- return DeltaMessage (content = delta_text )
93
- else :
94
- # no </think> in previous or delta, reasoning content continues
95
- return DeltaMessage (reasoning_content = delta_text )
80
+ # no <think> in previous or delta, all content
81
+ return DeltaMessage (content = delta_text )
96
82
97
83
def extract_reasoning_content (self , model_output : str , request : ChatCompletionRequest ,
98
84
** kwargs ) -> Tuple [Optional [str ], Optional [str ]]:
@@ -109,26 +95,35 @@ def extract_reasoning_content(self, model_output: str, request: ChatCompletionRe
109
95
reasoning_content (str | None): The reasoning content.
110
96
final_output (str | None): The content.
111
97
"""
112
- # DeepSeek R1 doesn't generate <think> now.
98
+ start_index = model_output .find (self .think_start_token )
99
+ end_index = model_output .find (self .think_end_token )
113
100
# Thus we assume the reasoning content is always at the start.
114
- # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
115
- if self .think_end_token not in model_output :
101
+ if end_index < 0 :
116
102
# for qwen3 model, the reasoning content is wrapped by <think> </think> xml tags
117
- return None , model_output
118
- # Add a start token if it's missing to keep compatibility.
119
- if self .think_start_token not in model_output :
120
- model_output = f'{ self .think_start_token } { model_output } '
121
- # Use a regex to find the reasoning content
122
- reasoning_content = self .reasoning_regex .findall (model_output )[0 ]
123
-
124
- end_index = len (f'{ self .think_start_token } { reasoning_content } { self .think_end_token } ' )
125
- final_output = model_output [end_index :]
126
- if reasoning_content .startswith ('\n ' ):
127
- reasoning_content = reasoning_content [1 :]
128
- if reasoning_content .endswith ('\n ' ):
129
- reasoning_content = reasoning_content [:- 1 ]
103
+ if start_index < 0 :
104
+ return None , model_output
105
+ reasoning_content = model_output [start_index + len (self .think_start_token ):]
106
+ reasoning_content = self ._trim_newlines (reasoning_content )
107
+ return reasoning_content , None
108
+
109
+ if start_index >= 0 and start_index < end_index :
110
+ reasoning_content = model_output [start_index + len (self .think_start_token ):end_index ]
111
+ else :
112
+ reasoning_content = model_output [:end_index ]
113
+ reasoning_content = self ._trim_newlines (reasoning_content )
114
+
115
+ final_output = model_output [end_index + len (self .think_end_token ):]
116
+ final_output = self ._trim_newlines (final_output )
130
117
131
118
if len (final_output ) == 0 :
132
119
return reasoning_content , None
133
-
134
120
return reasoning_content , final_output
121
+
122
+ @classmethod
123
+ def _trim_newlines (cls , text : str ):
124
+ """Trim newlines from the start and end of a string."""
125
+ while text .startswith ('\n ' ):
126
+ text = text [1 :]
127
+ while text .endswith ('\n ' ):
128
+ text = text [:- 1 ]
129
+ return text
0 commit comments