diff --git a/README.md b/README.md
index 6e46573..85fb643 100644
--- a/README.md
+++ b/README.md
@@ -147,6 +147,7 @@ The following is the list of models supported by MCore-Bridge:
| Qwen | qwen2_vl, qwen2_5_vl, qwen2_5_omni
qwen3_vl, qwen3_vl_moe, qwen3_omni_moe, qwen3_asr
qwen3_5, qwen3_5_moe |
| Gemma | gemma4, gemma4_unified |
| GLM | glm4v, glm4v_moe |
+| MiniMax | minimax_m3_vl |
| Kimi | kimi_vl, kimi_k25 |
| InternVL | internvl_chat, internvl |
| Ovis | ovis2_5 |
diff --git a/README_zh.md b/README_zh.md
index 7e03aa2..56980c1 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -144,6 +144,7 @@ uv pip install -e . --torch-backend=auto
| Qwen | qwen2_vl, qwen2_5_vl, qwen2_5_omni
qwen3_vl, qwen3_vl_moe, qwen3_omni_moe, qwen3_asr
qwen3_5, qwen3_5_moe |
| Gemma | gemma4, gemma4_unified |
| GLM | glm4v, glm4v_moe |
+| MiniMax | minimax_m3_vl |
| Kimi | kimi_vl, kimi_k25 |
| InternVL | internvl_chat, internvl |
| Ovis | ovis2_5 |
diff --git a/src/mcore_bridge/config/parser.py b/src/mcore_bridge/config/parser.py
index a97ba70..56f8143 100644
--- a/src/mcore_bridge/config/parser.py
+++ b/src/mcore_bridge/config/parser.py
@@ -26,7 +26,7 @@
'hf_model_type': ['model_type'],
# moe
'moe_ffn_hidden_size': ['moe_intermediate_size'],
- 'moe_shared_expert_intermediate_size': ['shared_expert_intermediate_size'],
+ 'moe_shared_expert_intermediate_size': ['shared_expert_intermediate_size', 'shared_intermediate_size'],
'moe_router_topk': ['num_experts_per_tok', 'moe_topk', 'moe_k', 'top_k_experts'],
'moe_router_num_groups': ['n_group'],
'moe_router_group_topk': ['topk_group'],
@@ -229,6 +229,20 @@ def hf_to_mcore_config(hf_config: PretrainedConfig) -> Dict[str, Any]:
res.setdefault('linear_attention_freq', 4)
elif llm_model_type == 'minimax_m2':
res['add_qkv_bias'] = False
+ elif hf_model_type == 'minimax_m3_vl':
+ text_config = hf_config.text_config
+ res['add_qkv_bias'] = False
+ # Fix intermediate sizes: intermediate_size is MoE expert size, dense_intermediate_size is for dense MLP
+ res['moe_ffn_hidden_size'] = res['ffn_hidden_size']
+ res['ffn_hidden_size'] = text_config.dense_intermediate_size
+ moe_layer_freq_list = text_config.mlp_layer_types
+ if isinstance(moe_layer_freq_list, list):
+ res['moe_layer_freq'] = f"[{','.join('0' if x == 'dense' else '1' for x in moe_layer_freq_list)}]"
+ res['swiglu'] = False
+ res['quick_geglu'] = True
+ res['activation_func_clamp_value'] = 7
+ res['glu_linear_offset'] = 1
+ res['layernorm_zero_centered_gamma'] = True
elif llm_model_type == 'olmoe':
res['qk_layernorm'] = True
elif hf_model_type == 'llama4':
diff --git a/src/mcore_bridge/model/constant.py b/src/mcore_bridge/model/constant.py
index 1c3ae97..7de7672 100644
--- a/src/mcore_bridge/model/constant.py
+++ b/src/mcore_bridge/model/constant.py
@@ -7,6 +7,7 @@ class LLMModelType:
olmoe = 'olmoe'
glm4 = 'glm4'
minimax_m2 = 'minimax_m2'
+ minimax_m3_vl = 'minimax_m3_vl'
hy_v3 = 'hy_v3'
bailing_moe = 'bailing_moe'
bailing_hybrid = 'bailing_hybrid'
diff --git a/src/mcore_bridge/model/mm_gpts/__init__.py b/src/mcore_bridge/model/mm_gpts/__init__.py
index b862ec6..67298dc 100644
--- a/src/mcore_bridge/model/mm_gpts/__init__.py
+++ b/src/mcore_bridge/model/mm_gpts/__init__.py
@@ -1,2 +1,3 @@
# Copyright (c) ModelScope Contributors. All rights reserved.
-from . import gemma4, glm, internvl, kimi_vl, llama4, llava, qwen, qwen3_5, qwen3_5_gdn, qwen3_asr, qwen3_omni, qwen3_vl
+from . import (gemma4, glm, internvl, kimi_vl, llama4, llava, minimax_m3_vl, qwen, qwen3_5, qwen3_5_gdn, qwen3_asr,
+ qwen3_omni, qwen3_vl)
diff --git a/src/mcore_bridge/model/mm_gpts/minimax_m3_vl.py b/src/mcore_bridge/model/mm_gpts/minimax_m3_vl.py
new file mode 100644
index 0000000..69814dc
--- /dev/null
+++ b/src/mcore_bridge/model/mm_gpts/minimax_m3_vl.py
@@ -0,0 +1,65 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+import torch
+import torch.distributed as dist
+from transformers import PretrainedConfig
+
+from mcore_bridge.bridge import GPTBridge
+
+from ..constant import ModelType
+from ..register import ModelMeta, register_model
+from .utils import HuggingFaceVit
+
+
+class MinimaxM3Vit(HuggingFaceVit):
+ module_mapping = {'model.vision_tower': 'vision_tower', 'model.multi_modal_projector': 'multi_modal_projector'}
+ _vision_tower = ['vision_tower']
+ _aligner = ['multi_modal_projector']
+
+ def prepare_model(self, hf_config: PretrainedConfig):
+ from transformers.models.minimax_m3_vl.modeling_minimax_m3_vl import (MiniMaxM3VLMultiModalProjector,
+ MiniMaxM3VLVisionModel)
+ self.vision_tower = MiniMaxM3VLVisionModel(hf_config.vision_config).to(hf_config.dtype)
+ self.multi_modal_projector = MiniMaxM3VLMultiModalProjector(hf_config).to(hf_config.dtype)
+
+ def get_inputs_embeds(self, inputs_embeds, **kwargs):
+ return self._hf_get_inputs_embeds(inputs_embeds, kwargs, self.visual, self.hf_config)
+
+
+class MinimaxM3Bridge(GPTBridge):
+ hf_layers_prefix = 'language_model.model.layers'
+ hf_mtp_prefix = 'language_model.model.layers'
+ hf_embed_key = 'language_model.model.embed_tokens.weight'
+ hf_final_layernorm_key = 'language_model.model.norm.weight'
+ hf_lm_head_key = 'language_model.lm_head.weight'
+ hf_expert_bias_key = 'e_score_correction_bias'
+
+ def _set_moe_state(
+ self,
+ mg_mlp,
+ hf_state_dict,
+ hf_prefix: str,
+ layer_idx: int,
+ to_mcore: bool,
+ is_mtp: bool = False,
+ ):
+ if to_mcore:
+ hf_state_dict = {
+ k.replace('.w1.', '.gate_proj.').replace('.w3.', '.up_proj.').replace('.w2.', '.down_proj.'): v
+ for k, v in hf_state_dict.items()
+ }
+ hf_state_dict = super()._set_moe_state(mg_mlp, hf_state_dict, hf_prefix, layer_idx, to_mcore, is_mtp)
+ if not to_mcore:
+ hf_state_dict = {
+ k.replace('.gate_proj.', '.w1.').replace('.up_proj.', '.w3.').replace('.down_proj.', '.w2.'): v
+ for k, v in hf_state_dict.items()
+ }
+ return hf_state_dict
+
+
+register_model(
+ ModelMeta(
+ ModelType.minimax_m3_vl,
+ ['minimax_m3_vl'],
+ bridge_cls=MinimaxM3Bridge,
+ visual_cls=MinimaxM3Vit,
+ ))