Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions install_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@

def install_torch_nightly_deps():
"""Install torch related dependencies from pinned nightly"""
EXECUTORCH_NIGHTLY_VERSION = "dev20260104"
TORCHAO_NIGHTLY_VERSION = "dev20251222"
EXECUTORCH_NIGHTLY_VERSION = "dev20260317"
TORCHAO_NIGHTLY_VERSION = "dev20260317"
# Torch nightly is aligned with pinned nightly in https://github.com/pytorch/executorch/blob/main/torch_pin.py#L2
TORCH_NIGHTLY_VERSION = "dev20251222"
TORCH_NIGHTLY_VERSION = "dev20260317"
subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"install",
"--no-cache-dir", # Prevent cached CUDA packages
f"executorch==1.1.0.{EXECUTORCH_NIGHTLY_VERSION}",
f"torch==2.11.0.{TORCH_NIGHTLY_VERSION}",
f"torchvision==0.25.0.{TORCH_NIGHTLY_VERSION}",
f"torchaudio==2.10.0.{TORCH_NIGHTLY_VERSION}",
f"torchao==0.16.0.{TORCHAO_NIGHTLY_VERSION}",
f"executorch==1.3.0.{EXECUTORCH_NIGHTLY_VERSION}",
f"torch==2.12.0.{TORCH_NIGHTLY_VERSION}",
f"torchvision==0.26.0.{TORCH_NIGHTLY_VERSION}",
f"torchaudio==2.11.0.{TORCH_NIGHTLY_VERSION}",
f"torchao==0.17.0.{TORCHAO_NIGHTLY_VERSION}",
"--extra-index-url",
"https://download.pytorch.org/whl/nightly/cpu",
]
Expand Down
12 changes: 12 additions & 0 deletions optimum/commands/export/executorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ def parse_args_executorch(parser):
required=True,
help="Model ID on huggingface.co or path on disk to load model from.",
)
required_group.add_argument(
"--gguf_file",
type=str,
required=False,
help="GGUF filename (in a Hub repo) or local path to a GGUF file. "
"When set, the model weights and config are loaded from this GGUF file "
"instead of safetensors/bin checkpoints. Note: GGUF weights are "
"dequantized to float32 before export, which increases peak memory. "
"Use --dtype float16 to halve memory usage. Requires: pip install gguf",
)
required_group.add_argument(
"-o",
"--output_dir",
Expand Down Expand Up @@ -271,6 +281,8 @@ def run(self):
kwargs["device"] = self.args.device
if hasattr(self.args, "image_size") and self.args.image_size:
kwargs["image_size"] = self.args.image_size
if hasattr(self.args, "gguf_file") and self.args.gguf_file:
kwargs["gguf_file"] = self.args.gguf_file

main_export(
model_name_or_path=self.args.model,
Expand Down
12 changes: 11 additions & 1 deletion optimum/exporters/executorch/tasks/causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,13 @@ def load_causal_lm_model(model_name_or_path: str, **kwargs) -> CausalLMExportabl
use_custom_sdpa = use_custom_sdpa or attn_implementation == "custom_sdpa"
max_seq_len = kwargs.get("max_seq_len", None)
max_length = max_seq_len if max_seq_len is not None else kwargs.get("max_length", 2048)
config = kwargs.get("config") or AutoConfig.from_pretrained(model_name_or_path)
gguf_file = kwargs.get("gguf_file", None)
if gguf_file:
logging.warning(
"GGUF weights are dequantized to float32 before export, which increases "
"peak memory. Use --dtype float16 to halve memory usage."
)
config = kwargs.get("config") or AutoConfig.from_pretrained(model_name_or_path, gguf_file=gguf_file)

if hasattr(config, "rope_scaling") and config.rope_scaling is not None:
# NOTE: To make the model exportable we need to set the rope scaling to default to avoid hitting
Expand All @@ -81,13 +87,15 @@ def _load_eager_pretrained(
cache_implementation,
batch_size,
max_length,
gguf_file=None,
):
eager_model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
device_map=device,
dtype=dtype,
config=config,
attn_implementation=attn_implementation,
gguf_file=gguf_file,
generation_config=GenerationConfig(
use_cache=True,
cache_implementation=cache_implementation,
Expand All @@ -110,6 +118,7 @@ def _load_eager_pretrained(
cache_implementation,
batch_size,
max_length,
gguf_file,
)
except ValueError as e:
if "torch.nn.functional.scaled_dot_product_attention" in str(e):
Expand All @@ -124,6 +133,7 @@ def _load_eager_pretrained(
cache_implementation,
batch_size,
max_length,
gguf_file,
)
else:
raise e
Expand Down
Loading