Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ The following is the list of models supported by MCore-Bridge:
| Qwen | qwen2_vl, qwen2_5_vl, qwen2_5_omni<br />qwen3_vl, qwen3_vl_moe, qwen3_omni_moe, qwen3_asr<br />qwen3_5, qwen3_5_moe |
| Gemma | gemma4, gemma4_unified |
| GLM | glm4v, glm4v_moe |
| MiniMax | minimax_m3_vl |
| Kimi | kimi_vl, kimi_k25 |
| InternVL | internvl_chat, internvl |
| Ovis | ovis2_5 |
Expand Down
1 change: 1 addition & 0 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ uv pip install -e . --torch-backend=auto
| Qwen | qwen2_vl, qwen2_5_vl, qwen2_5_omni<br />qwen3_vl, qwen3_vl_moe, qwen3_omni_moe, qwen3_asr<br />qwen3_5, qwen3_5_moe |
| Gemma | gemma4, gemma4_unified |
| GLM | glm4v, glm4v_moe |
| MiniMax | minimax_m3_vl |
| Kimi | kimi_vl, kimi_k25 |
| InternVL | internvl_chat, internvl |
| Ovis | ovis2_5 |
Expand Down
16 changes: 15 additions & 1 deletion src/mcore_bridge/config/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
'hf_model_type': ['model_type'],
# moe
'moe_ffn_hidden_size': ['moe_intermediate_size'],
'moe_shared_expert_intermediate_size': ['shared_expert_intermediate_size'],
'moe_shared_expert_intermediate_size': ['shared_expert_intermediate_size', 'shared_intermediate_size'],
'moe_router_topk': ['num_experts_per_tok', 'moe_topk', 'moe_k', 'top_k_experts'],
'moe_router_num_groups': ['n_group'],
'moe_router_group_topk': ['topk_group'],
Expand Down Expand Up @@ -229,6 +229,20 @@ def hf_to_mcore_config(hf_config: PretrainedConfig) -> Dict[str, Any]:
res.setdefault('linear_attention_freq', 4)
elif llm_model_type == 'minimax_m2':
res['add_qkv_bias'] = False
elif hf_model_type == 'minimax_m3_vl':
text_config = hf_config.text_config
res['add_qkv_bias'] = False
# Fix intermediate sizes: intermediate_size is MoE expert size, dense_intermediate_size is for dense MLP
res['moe_ffn_hidden_size'] = res['ffn_hidden_size']
res['ffn_hidden_size'] = text_config.dense_intermediate_size
moe_layer_freq_list = text_config.mlp_layer_types
if isinstance(moe_layer_freq_list, list):
res['moe_layer_freq'] = f"[{','.join('0' if x == 'dense' else '1' for x in moe_layer_freq_list)}]"
res['swiglu'] = False
res['quick_geglu'] = True
res['activation_func_clamp_value'] = 7
res['glu_linear_offset'] = 1
res['layernorm_zero_centered_gamma'] = True
elif llm_model_type == 'olmoe':
res['qk_layernorm'] = True
elif hf_model_type == 'llama4':
Expand Down
1 change: 1 addition & 0 deletions src/mcore_bridge/model/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class LLMModelType:
olmoe = 'olmoe'
glm4 = 'glm4'
minimax_m2 = 'minimax_m2'
minimax_m3_vl = 'minimax_m3_vl'
hy_v3 = 'hy_v3'
bailing_moe = 'bailing_moe'
bailing_hybrid = 'bailing_hybrid'
Expand Down
3 changes: 2 additions & 1 deletion src/mcore_bridge/model/mm_gpts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# Copyright (c) ModelScope Contributors. All rights reserved.
from . import gemma4, glm, internvl, kimi_vl, llama4, llava, qwen, qwen3_5, qwen3_5_gdn, qwen3_asr, qwen3_omni, qwen3_vl
from . import (gemma4, glm, internvl, kimi_vl, llama4, llava, minimax_m3_vl, qwen, qwen3_5, qwen3_5_gdn, qwen3_asr,
qwen3_omni, qwen3_vl)
65 changes: 65 additions & 0 deletions src/mcore_bridge/model/mm_gpts/minimax_m3_vl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) ModelScope Contributors. All rights reserved.
import torch
import torch.distributed as dist
from transformers import PretrainedConfig

from mcore_bridge.bridge import GPTBridge

from ..constant import ModelType
from ..register import ModelMeta, register_model
from .utils import HuggingFaceVit


class MinimaxM3Vit(HuggingFaceVit):
module_mapping = {'model.vision_tower': 'vision_tower', 'model.multi_modal_projector': 'multi_modal_projector'}
_vision_tower = ['vision_tower']
_aligner = ['multi_modal_projector']

def prepare_model(self, hf_config: PretrainedConfig):
from transformers.models.minimax_m3_vl.modeling_minimax_m3_vl import (MiniMaxM3VLMultiModalProjector,
MiniMaxM3VLVisionModel)
self.vision_tower = MiniMaxM3VLVisionModel(hf_config.vision_config).to(hf_config.dtype)
self.multi_modal_projector = MiniMaxM3VLMultiModalProjector(hf_config).to(hf_config.dtype)

def get_inputs_embeds(self, inputs_embeds, **kwargs):
return self._hf_get_inputs_embeds(inputs_embeds, kwargs, self.visual, self.hf_config)


class MinimaxM3Bridge(GPTBridge):
hf_layers_prefix = 'language_model.model.layers'
hf_mtp_prefix = 'language_model.model.layers'
hf_embed_key = 'language_model.model.embed_tokens.weight'
hf_final_layernorm_key = 'language_model.model.norm.weight'
hf_lm_head_key = 'language_model.lm_head.weight'
hf_expert_bias_key = 'e_score_correction_bias'

def _set_moe_state(
self,
mg_mlp,
hf_state_dict,
hf_prefix: str,
layer_idx: int,
to_mcore: bool,
is_mtp: bool = False,
):
if to_mcore:
hf_state_dict = {
k.replace('.w1.', '.gate_proj.').replace('.w3.', '.up_proj.').replace('.w2.', '.down_proj.'): v
for k, v in hf_state_dict.items()
}
hf_state_dict = super()._set_moe_state(mg_mlp, hf_state_dict, hf_prefix, layer_idx, to_mcore, is_mtp)
if not to_mcore:
hf_state_dict = {
k.replace('.gate_proj.', '.w1.').replace('.up_proj.', '.w3.').replace('.down_proj.', '.w2.'): v
for k, v in hf_state_dict.items()
}
return hf_state_dict


register_model(
ModelMeta(
ModelType.minimax_m3_vl,
['minimax_m3_vl'],
bridge_cls=MinimaxM3Bridge,
visual_cls=MinimaxM3Vit,
))
Loading