From 9706de6c92d43268cbe00a9881999f40cf4b1e89 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 18 Jun 2026 17:09:13 +0800 Subject: [PATCH] fix get_logger --- src/mcore_bridge/model/gpts/deepseek_v4.py | 2 +- src/mcore_bridge/model/mm_gpts/gemma4.py | 2 +- src/mcore_bridge/utils/dequantizer.py | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mcore_bridge/model/gpts/deepseek_v4.py b/src/mcore_bridge/model/gpts/deepseek_v4.py index 9a957d1..65c9270 100644 --- a/src/mcore_bridge/model/gpts/deepseek_v4.py +++ b/src/mcore_bridge/model/gpts/deepseek_v4.py @@ -503,7 +503,7 @@ def _set_param(self, param, tensor, scale_inv): tensor = fp4_to_fp8(tensor) tensor = tensor.reshape(*param.shape) scale_inv = scale_inv.reshape(-1, scale_inv.shape[-1]) - tensor = Fp8Dequantizer().convert(tensor, scale_inv) + tensor = Fp8Dequantizer(block_size='auto').convert(tensor, scale_inv) if self._is_fp8_param(param): param._high_precision_init_val.copy_(tensor) param.data.copy_(tensor) diff --git a/src/mcore_bridge/model/mm_gpts/gemma4.py b/src/mcore_bridge/model/mm_gpts/gemma4.py index 94b6e12..21624ee 100644 --- a/src/mcore_bridge/model/mm_gpts/gemma4.py +++ b/src/mcore_bridge/model/mm_gpts/gemma4.py @@ -21,7 +21,6 @@ from megatron.core.transformer.moe.moe_layer import MoELayer from megatron.core.transformer.spec_utils import build_module from megatron.core.utils import make_viewless_tensor, nvtx_range_pop, nvtx_range_push -from swift.utils import get_logger from torch import Tensor, nn from transformers import AutoModel, PretrainedConfig from transformers.utils.versions import require_version @@ -29,6 +28,7 @@ from mcore_bridge.bridge import MultimodalGPTBridge from mcore_bridge.config import ModelConfig +from mcore_bridge.utils import get_logger from ..constant import ModelType from ..gpt_model import GPTModel diff --git a/src/mcore_bridge/utils/dequantizer.py b/src/mcore_bridge/utils/dequantizer.py index 96b06c6..1a9e3df 100644 --- a/src/mcore_bridge/utils/dequantizer.py +++ b/src/mcore_bridge/utils/dequantizer.py @@ -5,8 +5,10 @@ class Fp8Dequantizer: - def __init__(self, block_size: Tuple[int, int] = (None, None)): + def __init__(self, block_size: Tuple[int, int] = (128, 128)): # Set to None to enable automatic selection. + if block_size in {None, 'auto'}: + block_size = (None, None) self.block_size = block_size def convert(