Skip to content

Commit 793c699

Browse files
committed
fix: qwen3 nonstream parse with no or uncompleted think content
1 parent 3c7c83c commit 793c699

File tree

2 files changed

+73
-33
lines changed

2 files changed

+73
-33
lines changed

lmdeploy/serve/openai/reasoning_parser/qwen_qwq_reasoning_parser.py

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -77,22 +77,8 @@ def extract_reasoning_content_streaming(
7777
# reasoning content continues
7878
return DeltaMessage(reasoning_content=delta_text)
7979
else:
80-
# No <think> in previous or delta, also need to check for </think>.
81-
# Because the model may have generated </think> without <think>
82-
# Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
83-
if self.think_end_token in delta_text:
84-
# </think> in delta with more tokens,
85-
# extract reasoning content and content
86-
end_index = delta_text.find(self.think_end_token)
87-
reasoning_content = delta_text[:end_index]
88-
content = delta_text[end_index + len(self.think_end_token):]
89-
return DeltaMessage(reasoning_content=reasoning_content, content=content if content else None)
90-
elif self.think_end_token in previous_text:
91-
# </think> in previous, thinking content ends
92-
return DeltaMessage(content=delta_text)
93-
else:
94-
# no </think> in previous or delta, reasoning content continues
95-
return DeltaMessage(reasoning_content=delta_text)
80+
# no <think> in previous or delta, all content
81+
return DeltaMessage(content=delta_text)
9682

9783
def extract_reasoning_content(self, model_output: str, request: ChatCompletionRequest,
9884
**kwargs) -> Tuple[Optional[str], Optional[str]]:
@@ -109,26 +95,35 @@ def extract_reasoning_content(self, model_output: str, request: ChatCompletionRe
10995
reasoning_content (str | None): The reasoning content.
11096
final_output (str | None): The content.
11197
"""
112-
# DeepSeek R1 doesn't generate <think> now.
98+
start_index = model_output.find(self.think_start_token)
99+
end_index = model_output.find(self.think_end_token)
113100
# Thus we assume the reasoning content is always at the start.
114-
# Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
115-
if self.think_end_token not in model_output:
101+
if end_index < 0:
116102
# for qwen3 model, the reasoning content is wrapped by <think> </think> xml tags
117-
return None, model_output
118-
# Add a start token if it's missing to keep compatibility.
119-
if self.think_start_token not in model_output:
120-
model_output = f'{self.think_start_token}{model_output}'
121-
# Use a regex to find the reasoning content
122-
reasoning_content = self.reasoning_regex.findall(model_output)[0]
123-
124-
end_index = len(f'{self.think_start_token}{reasoning_content}{self.think_end_token}')
125-
final_output = model_output[end_index:]
126-
if reasoning_content.startswith('\n'):
127-
reasoning_content = reasoning_content[1:]
128-
if reasoning_content.endswith('\n'):
129-
reasoning_content = reasoning_content[:-1]
103+
if start_index < 0:
104+
return None, model_output
105+
reasoning_content = model_output[start_index + len(self.think_start_token):]
106+
reasoning_content = self._trim_newlines(reasoning_content)
107+
return reasoning_content, None
108+
109+
if start_index >= 0 and start_index < end_index:
110+
reasoning_content = model_output[start_index + len(self.think_start_token):end_index]
111+
else:
112+
reasoning_content = model_output[:end_index]
113+
reasoning_content = self._trim_newlines(reasoning_content)
114+
115+
final_output = model_output[end_index + len(self.think_end_token):]
116+
final_output = self._trim_newlines(final_output)
130117

131118
if len(final_output) == 0:
132119
return reasoning_content, None
133-
134120
return reasoning_content, final_output
121+
122+
@classmethod
123+
def _trim_newlines(cls, text: str):
124+
"""Trim newlines from the start and end of a string."""
125+
while text.startswith('\n'):
126+
text = text[1:]
127+
while text.endswith('\n'):
128+
text = text[:-1]
129+
return text

tests/test_lmdeploy/test_qwen3_parser.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,3 +358,48 @@ def test_no_think_nonstream():
358358
first_message = resp.choices[0].message
359359
assert first_message.content == '你好呀!✨ 很高兴见到你!'
360360
assert first_message.reasoning_content is None
361+
362+
363+
THINK_START_SEQUENCE = ["<think>", "\n"]
364+
TRUNCATED_SEQUENCE = ["OK", ", ", "user", " ", "sends"]
365+
366+
367+
@pytest.mark.parametrize(
368+
"sequence, expected_content, expected_reasoning_content",
369+
[
370+
# without think start token
371+
(TRUNCATED_SEQUENCE, "".join(TRUNCATED_SEQUENCE), None),
372+
# with think start token
373+
(THINK_START_SEQUENCE + TRUNCATED_SEQUENCE, None, "".join(TRUNCATED_SEQUENCE)),
374+
])
375+
def test_truncated_think_nonstream(sequence, expected_content, expected_reasoning_content):
376+
377+
tokenizer = DummyTokenizer()
378+
VariableInterface.tool_parser = Qwen3ToolParser(tokenizer=tokenizer)
379+
VariableInterface.reasoning_parser = QwenQwQReasoningParser(tokenizer=tokenizer)
380+
req = ChatCompletionRequest(model="qwen", messages=[], stream=False)
381+
resp: ChatCompletionResponse = _chat_completion_v1(req, sequence)
382+
383+
assert len(resp.choices) == 1
384+
first_message = resp.choices[0].message
385+
assert first_message.content == expected_content
386+
assert first_message.reasoning_content == expected_reasoning_content
387+
388+
389+
@pytest.mark.parametrize(
390+
"sequence, expected_content, expected_reasoning_content",
391+
[
392+
# without think start token
393+
(TRUNCATED_SEQUENCE, "".join(TRUNCATED_SEQUENCE), ""),
394+
# with think start token
395+
(THINK_START_SEQUENCE + TRUNCATED_SEQUENCE, "", "".join(TRUNCATED_SEQUENCE)),
396+
])
397+
def test_truncated_think_stream(sequence, expected_content, expected_reasoning_content):
398+
tokenizer = DummyTokenizer()
399+
VariableInterface.tool_parser = Qwen3ToolParser(tokenizer=tokenizer)
400+
VariableInterface.reasoning_parser = QwenQwQReasoningParser(tokenizer=tokenizer)
401+
req = ChatCompletionRequest(model="qwen", messages=[], stream=True)
402+
content, reasoning_content, tool_calls = _stream_parse(req, sequence)
403+
404+
assert content == expected_content
405+
assert reasoning_content.lstrip() == expected_reasoning_content

0 commit comments

Comments
 (0)