diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md index d31b566953..0e56fe320d 100644 --- a/docs/zh_cn/supported_models/supported_models.md +++ b/docs/zh_cn/supported_models/supported_models.md @@ -133,7 +133,7 @@ | QWen2.5-VL | 3B - 72B | MLLM | Yes | Yes | - | - | Yes | | QWen2-MoE | A14.57B | LLM | Yes | - | No | No | - | | QWen3 | 0.6B-235B | LLM | Yes | Yes | No | No | Yes | -| DeepSeek-V2 | 16B | LLM | No | Yes | No | No | - | +| DeepSeek-V2 | 16B | LLM | Yes | Yes | No | No | - | | InternVL(v1.5) | 2B-26B | MLLM | Yes | - | Yes | Yes | - | | InternVL2 | 1B-40B | MLLM | Yes | Yes | Yes | Yes | Yes | | InternVL2.5 | 1B-78B | MLLM | Yes | Yes | Yes | Yes | Yes | diff --git a/lmdeploy/pytorch/backends/dlinfer/ascend/op_backend.py b/lmdeploy/pytorch/backends/dlinfer/ascend/op_backend.py index dfa4370ef5..fcd955bcb1 100644 --- a/lmdeploy/pytorch/backends/dlinfer/ascend/op_backend.py +++ b/lmdeploy/pytorch/backends/dlinfer/ascend/op_backend.py @@ -260,11 +260,12 @@ def get_total_slots(): kv_seqlens = kv_seqlens.repeat_interleave(step_context.q_seqlens, 0) else: if step_context.is_decoding: - kv_seqlens_cpu = step_context.kv_seqlens.cpu() + kv_seqlens_cpu = step_context.kv_seqlens.cpu().to(torch.int32) elif is_unpaged_prefill: pass else: - kv_seqlens_cpu = step_context.kv_seqlens.repeat_interleave(step_context.q_seqlens, 0).cpu() + kv_seqlens_cpu = step_context.kv_seqlens.repeat_interleave(step_context.q_seqlens, + 0).cpu().to(torch.int32) block_offsets_int32 = step_context.block_offsets.to(torch.int32) step_context.block_offsets = block_offsets_int32\ .repeat_interleave(step_context.q_seqlens, 0)