diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index b17b3d17ec..f4d0fc51e2 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -4767,6 +4767,7 @@
{
"model_format": "pytorch",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -4846,6 +4847,7 @@
{
"model_format": "pytorch",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -4866,6 +4868,7 @@
{
"model_format": "awq",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -4885,6 +4888,7 @@
{
"model_format": "ggufv2",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -5215,6 +5219,7 @@
{
"model_format": "mlx",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -5263,6 +5268,7 @@
{
"model_format": "pytorch",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -5281,6 +5287,7 @@
{
"model_format": "gptq",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -5311,6 +5318,116 @@
"reasoning_start_tag": "",
"reasoning_end_tag": ""
},
+ {
+ "version": 2,
+ "context_length": 131072,
+ "model_name": "Deepseek-V3.1",
+ "model_lang": [
+ "en",
+ "zh"
+ ],
+ "model_ability": [
+ "chat",
+ "reasoning",
+ "hybrid",
+ "tools"
+ ],
+ "model_description": "DeepSeek-V3.1 is a hybrid model that supports both thinking mode and non-thinking mode.",
+ "model_specs": [
+ {
+ "model_format": "pytorch",
+ "model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
+ "model_src": {
+ "huggingface": {
+ "quantizations": [
+ "none"
+ ],
+ "model_id": "deepseek-ai/DeepSeek-V3.1"
+ },
+ "modelscope": {
+ "quantizations": [
+ "none"
+ ],
+ "model_id": "deepseek-ai/DeepSeek-V3.1"
+ }
+ }
+ },
+ {
+ "model_format": "gptq",
+ "model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
+ "model_src": {
+ "huggingface": {
+ "quantizations": [
+ "Int4"
+ ],
+ "model_id": "cpatonn/DeepSeek-V3.1-GPTQ-4bit"
+ },
+ "modelscope": {
+ "quantizations": [
+ "Int4"
+ ],
+ "model_id": "cpatonn/DeepSeek-V3.1-GPTQ-4bit"
+ }
+ }
+ },
+ {
+ "model_format": "awq",
+ "model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
+ "model_src": {
+ "huggingface": {
+ "quantizations": [
+ "Int4"
+ ],
+ "model_id": "QuantTrio/DeepSeek-V3.1-AWQ"
+ },
+ "modelscope": {
+ "quantizations": [
+ "Int4"
+ ],
+ "model_id": "tclf90/DeepSeek-V3.1-AWQ"
+ }
+ }
+ },
+ {
+ "model_format": "mlx",
+ "model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
+ "model_src": {
+ "huggingface": {
+ "quantizations": [
+ "8bit",
+ "4bit"
+ ],
+ "model_id": "mlx-community/DeepSeek-V3.1-{quantization}"
+ },
+ "modelscope": {
+ "quantizations": [
+ "8bit",
+ "4bit"
+ ],
+ "model_id": "mlx-community/DeepSeek-V3.1-{quantization}"
+ }
+ }
+ }
+ ],
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{''}} {%- else %}{{''}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '' in content %}{%- set content = content.split('', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{''}}{%- else %}{{''}}{%- endif %}{% endif %}",
+ "stop_token_ids": [
+ 1
+ ],
+ "stop": [
+ "<|end▁of▁sentence|>"
+ ],
+ "reasoning_start_tag": "",
+ "reasoning_end_tag": "",
+ "virtualenv": {
+ "packages": [
+ "transformers==4.53.0"
+ ]
+ }
+ },
{
"version": 2,
"context_length": 131072,
@@ -6242,6 +6359,7 @@
{
"model_format": "pytorch",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -6262,6 +6380,7 @@
{
"model_format": "awq",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -6281,6 +6400,7 @@
{
"model_format": "ggufv2",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -6475,6 +6595,7 @@
{
"model_format": "mlx",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -6517,6 +6638,7 @@
{
"model_format": "pytorch",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -6535,6 +6657,7 @@
{
"model_format": "awq",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
@@ -6553,6 +6676,7 @@
{
"model_format": "mlx",
"model_size_in_billions": 671,
+ "activated_size_in_billions": 37,
"model_src": {
"huggingface": {
"quantizations": [
diff --git a/xinference/model/llm/utils.py b/xinference/model/llm/utils.py
index d6e8dd0efb..15bf3e07d8 100644
--- a/xinference/model/llm/utils.py
+++ b/xinference/model/llm/utils.py
@@ -82,7 +82,7 @@
"HuatuoGPT-o1-LLaMA-3.1",
]
-DEEPSEEK_TOOL_CALL_FAMILY = ["deepseek-v3", "deepseek-r1-0528"]
+DEEPSEEK_TOOL_CALL_FAMILY = ["deepseek-v3", "deepseek-r1-0528", "Deepseek-V3.1"]
TOOL_CALL_FAMILY = (
QWEN_TOOL_CALL_FAMILY
diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
index c531d34972..df9411c8cd 100644
--- a/xinference/model/llm/vllm/core.py
+++ b/xinference/model/llm/vllm/core.py
@@ -273,6 +273,7 @@ class VLLMGenerateConfig(TypedDict, total=False):
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Instruct")
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Thinking")
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Coder")
+ VLLM_SUPPORTED_CHAT_MODELS.append("Deepseek-V3.1")
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
VLLM_SUPPORTED_CHAT_MODELS.append("glm-4.5")