Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
26f7512
add lora test
Edenzzzz Jun 24, 2025
3dc90b4
add comments
Edenzzzz Jun 24, 2025
a7544d1
fix vae precision; unify param name mapping; reload weights after unm…
Edenzzzz Jun 30, 2025
518ad09
fix fsdp lora merging bug
Edenzzzz Jun 30, 2025
3c24e62
fix
Edenzzzz Jun 30, 2025
e6607da
addd weight check
Edenzzzz Jul 1, 2025
301c337
fix default text precision
Edenzzzz Jul 3, 2025
4f8d699
fixes
Edenzzzz Jul 4, 2025
bbd388e
weight test passed
Edenzzzz Jul 4, 2025
649ec92
fix
Edenzzzz Jul 4, 2025
6493f17
add modal
Edenzzzz Jul 4, 2025
8b43e1b
add ssim test
Edenzzzz Jul 4, 2025
e58ab73
ref videos
Edenzzzz Jul 4, 2025
0396998
fix
Edenzzzz Jul 4, 2025
b20d629
add threshold
Edenzzzz Jul 4, 2025
bfb1df1
add to buildkite
Edenzzzz Jul 5, 2025
1dcc5df
add back mypy
Edenzzzz Jul 5, 2025
150913e
fix pr test
Edenzzzz Jul 5, 2025
a611c08
fixes
Edenzzzz Jul 5, 2025
9359b3e
fixes
Edenzzzz Jul 5, 2025
0276749
fix
Edenzzzz Jul 5, 2025
de9a8de
Merge main
Edenzzzz Jul 5, 2025
924cb72
fix
Edenzzzz Jul 5, 2025
e94f104
fix
Edenzzzz Jul 5, 2025
b6f3af1
revert dtype
Edenzzzz Jul 5, 2025
d0519ac
fix
Edenzzzz Jul 5, 2025
462cfd9
fix
Edenzzzz Jul 5, 2025
a2d2d4c
Merge branch 'main' into lora_tests
Edenzzzz Jul 6, 2025
c6c23c4
Merge branch 'main' into lora_tests
Edenzzzz Jul 8, 2025
d5af439
rename test
Edenzzzz Jul 8, 2025
36d6f69
revert pre-commit
Edenzzzz Jul 9, 2025
d8f2ead
Merge branch 'main' into lora_tests
Edenzzzz Jul 9, 2025
c367d8e
add back ref videos
Edenzzzz Jul 9, 2025
a1d13a1
revert mypy in pyproject.toml
Edenzzzz Jul 9, 2025
e0a5a4d
pre-commit
Edenzzzz Jul 9, 2025
2d1b2d8
fix
Edenzzzz Jul 9, 2025
59d2a91
fix vsa test
Edenzzzz Jul 9, 2025
df87747
fix rpc and lora pipeline device
Edenzzzz Jul 11, 2025
855112a
fix fsdp param re-register bug...
Edenzzzz Jul 11, 2025
cb66430
add comments
Edenzzzz Jul 11, 2025
c218951
pre-commit
Edenzzzz Jul 11, 2025
f3e6b33
use fully_shard to re-register
Edenzzzz Jul 12, 2025
4fefab9
use unshard
Edenzzzz Jul 12, 2025
a85aa2d
use single lora in tests
Edenzzzz Jul 15, 2025
36dddba
add comments
Edenzzzz Jul 15, 2025
e8feddd
fix test precision
Edenzzzz Jul 15, 2025
289b578
revert
Edenzzzz Jul 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,21 @@ steps:
- TEST_TYPE=ssim
agents:
queue: "default"
- path:
- "fastvideo/v1/tests/lora/**"
- "fastvideo/v1/models/loader/**"
- "fastvideo/v1/tests/transformers/**"
- "fastvideo/v1/pipelines/**"
- "fastvideo/v1/layers/lora/**"
- "pyproject.toml"
- "docker/Dockerfile.python3.12"
config:
command: "timeout 15m .buildkite/scripts/pr_test.sh"
label: "LoRA Inference Tests"
env:
- TEST_TYPE=inference_lora
agents:
queue: "default"
- path:
- "fastvideo/v1/**"
- "pyproject.toml"
Expand Down
4 changes: 4 additions & 0 deletions .buildkite/scripts/pr_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ case "$TEST_TYPE" in
log "Running precision VSA tests..."
MODAL_COMMAND="$MODAL_ENV python3 -m modal run $MODAL_TEST_FILE::run_precision_tests_VSA"
;;
"inference_lora")
log "Running LoRA tests..."
MODAL_COMMAND="$MODAL_ENV python3 -m modal run $MODAL_TEST_FILE::run_inference_lora_tests"
;;
*)
log "Error: Unknown test type: $TEST_TYPE"
exit 1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/matchers/mypy.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
]
}
]
}
}
2 changes: 1 addition & 1 deletion .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -372,4 +372,4 @@ jobs:
JOB_IDS: '["encoder-test", "vae-test", "transformer-test", "ssim-test-py3.10", "ssim-test-py3.11", "ssim-test-py3.12", "training-test", "training-test-VSA", "inference-test-STA", "precision-test-STA", "precision-test-VSA"]'
RUNPOD_API_KEY: ${{ secrets.RUNPOD_API_KEY }}
GITHUB_RUN_ID: ${{ github.run_id }}
run: python .github/scripts/runpod_cleanup.py
run: python .github/scripts/runpod_cleanup.py
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ repos:
rev: v1.15.0
hooks:
- id: mypy
args: [--python-version, '3.10', --follow-imports, "skip", ]
args: [--python-version, '3.10', --follow-imports, "skip" ]
additional_dependencies: [types-cachetools, types-setuptools, types-PyYAML, types-requests]
- repo: local
hooks:
Expand All @@ -69,7 +69,7 @@ repos:
entry: bash
args:
- -c
- 'git ls-files | grep -v "^fastvideo/v1/tests/ssim/" | grep " " && echo "Filenames should not contain spaces!" && exit 1 || exit 0'
- 'git ls-files | grep -v "^fastvideo/v1/tests/ssim/" | grep -v "^fastvideo/v1/tests/inference/lora/L40S_reference_videos/" | grep " " && echo "Filenames should not contain spaces!" && exit 1 || exit 0'
language: system
always_run: true
pass_filenames: false
Expand Down
16 changes: 13 additions & 3 deletions examples/inference/lora/wan_lora_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def main():
# Initialize VideoGenerator with the Wan model
generator = VideoGenerator.from_pretrained(
"Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
num_gpus=2,
num_gpus=1,
lora_path="benjamin-paine/steamboat-willie-1.3b",
lora_nickname="steamboat"
)
Expand All @@ -16,6 +16,7 @@ def main():
"num_frames": 81,
"guidance_scale": 5.0,
"num_inference_steps": 32,
"seed": 42,
}
# Generate video with LoRA style
prompt = "steamboat willie style, golden era animation, close-up of a short fluffy monster kneeling beside a melting red candle. the mood is one of wonder and curiosity, as the monster gazes at the flame with wide eyes and open mouth. Its pose and expression convey a sense of innocence and playfulness, as if it is exploring the world around it for the first time. The use of warm colors and dramatic lighting further enhances the cozy atmosphere of the image."
Expand All @@ -29,8 +30,17 @@ def main():
negative_prompt=negative_prompt,
**kwargs
)

generator.set_lora_adapter(lora_nickname="flat_color", lora_path="motimalu/wan-flat-color-1.3b-v2")
del generator

# Until FSDP resharding bug is fixed, multi-lora requires reloading the model
# see https://github.com/pytorch/pytorch/issues/157209
generator = VideoGenerator.from_pretrained(
"Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
num_gpus=1,
lora_path="motimalu/wan-flat-color-1.3b-v2",
lora_nickname="flat_color"
)
# generator.set_lora_adapter(lora_nickname="flat_color", lora_path="motimalu/wan-flat-color-1.3b-v2")
prompt = "flat color, no lineart, blending, negative space, artist:[john kafka|ponsuke kaikai|hara id 21|yoneyama mai|fuzichoco], 1girl, sakura miko, pink hair, cowboy shot, white shirt, floral print, off shoulder, outdoors, cherry blossom, tree shade, wariza, looking up, falling petals, half-closed eyes, white sky, clouds, live2d animation, upper body, high quality cinematic video of a woman sitting under a sakura tree. Dreamy and lonely, the camera close-ups on the face of the woman as she turns towards the viewer. The Camera is steady, This is a cowboy shot. The animation is smooth and fluid."
negative_prompt = "bad quality video,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
video = generator.generate_video(
Expand Down
2 changes: 1 addition & 1 deletion fastvideo/utils/collect_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
DEFAULT_CONDA_PATTERNS = {
"torch",
"numpy",
"mypy"
"cudatoolkit",
"soumith",
"mkl",
Expand All @@ -80,7 +81,6 @@
DEFAULT_PIP_PATTERNS = {
"torch",
"numpy",
"mypy",
Comment thread
Edenzzzz marked this conversation as resolved.
"flake8",
"triton",
"optree",
Expand Down
2 changes: 1 addition & 1 deletion fastvideo/v1/configs/fasthunyuan_t2v.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"use_cpu_offload": false,
"disable_autocast": false,
"precision": "bf16",
"vae_precision": "fp16",
"vae_precision": "fp32",
"vae_tiling": true,
"vae_sp": true,
"vae_config": {
Expand Down
6 changes: 3 additions & 3 deletions fastvideo/v1/configs/models/dits/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
class DiTArchConfig(ArchConfig):
_fsdp_shard_conditions: list = field(default_factory=list)
_compile_conditions: list = field(default_factory=list)
_param_names_mapping: dict = field(default_factory=dict)
_reverse_param_names_mapping: dict = field(default_factory=dict)
_lora_param_names_mapping: dict = field(default_factory=dict)
param_names_mapping: dict = field(default_factory=dict)
reverse_param_names_mapping: dict = field(default_factory=dict)
lora_param_names_mapping: dict = field(default_factory=dict)
_supported_attention_backends: tuple[AttentionBackendEnum, ...] = (
AttentionBackendEnum.SLIDING_TILE_ATTN, AttentionBackendEnum.SAGE_ATTN,
AttentionBackendEnum.FLASH_ATTN, AttentionBackendEnum.TORCH_SDPA,
Expand Down
6 changes: 3 additions & 3 deletions fastvideo/v1/configs/models/dits/hunyuanvideo.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class HunyuanVideoArchConfig(DiTArchConfig):
_compile_conditions: list = field(
default_factory=lambda: [is_double_block, is_single_block, is_txt_in])

_param_names_mapping: dict = field(
param_names_mapping: dict = field(
default_factory=lambda: {
# 1. context_embedder.time_text_embed submodules (specific rules, applied first):
r"^context_embedder\.time_text_embed\.timestep_embedder\.linear_1\.(.*)$":
Expand Down Expand Up @@ -146,8 +146,8 @@ class HunyuanVideoArchConfig(DiTArchConfig):
r"final_layer.linear.\1",
})

# Reverse mapping for saving checkpoints: training -> diffusers
_reverse_param_names_mapping: dict = field(default_factory=lambda: {})
# Reverse mapping for saving checkpoints: custom -> hf
reverse_param_names_mapping: dict = field(default_factory=lambda: {})

patch_size: int = 2
patch_size_t: int = 1
Expand Down
2 changes: 1 addition & 1 deletion fastvideo/v1/configs/models/dits/stepvideo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class StepVideoArchConfig(DiTArchConfig):
default_factory=lambda:
[lambda n, m: "transformer_blocks" in n and n.split(".")[-1].isdigit()])

_param_names_mapping: dict = field(
param_names_mapping: dict = field(
default_factory=lambda: {
# transformer block
r"^transformer_blocks\.(\d+)\.norm1\.(weight|bias)$":
Expand Down
8 changes: 4 additions & 4 deletions fastvideo/v1/configs/models/dits/wanvideo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def is_blocks(n: str, m) -> bool:
class WanVideoArchConfig(DiTArchConfig):
_fsdp_shard_conditions: list = field(default_factory=lambda: [is_blocks])

_param_names_mapping: dict = field(
param_names_mapping: dict = field(
default_factory=lambda: {
r"^patch_embedding\.(.*)$":
r"patch_embedding.proj.\1",
Expand Down Expand Up @@ -52,12 +52,12 @@ class WanVideoArchConfig(DiTArchConfig):
r"blocks.\1.self_attn_residual_norm.norm.\2",
})

# Reverse mapping for saving checkpoints: training -> diffusers
_reverse_param_names_mapping: dict = field(default_factory=lambda: {})
# Reverse mapping for saving checkpoints: custom -> hf
reverse_param_names_mapping: dict = field(default_factory=lambda: {})

# Some LoRA adapters use the original official layer names instead of hf layer names,
# so apply this before the param_names_mapping
_lora_param_names_mapping: dict = field(
lora_param_names_mapping: dict = field(
default_factory=lambda: {
r"^blocks\.(\d+)\.self_attn\.q\.(.*)$": r"blocks.\1.attn1.to_q.\2",
r"^blocks\.(\d+)\.self_attn\.k\.(.*)$": r"blocks.\1.attn1.to_k.\2",
Expand Down
4 changes: 2 additions & 2 deletions fastvideo/v1/configs/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ class PipelineConfig:
image_encoder_precision: str = "fp32"

# Text encoder configuration
DEFAULT_TEXT_ENCODER_PRECISIONS = ("fp16", )
DEFAULT_TEXT_ENCODER_PRECISIONS = ("fp32", )
text_encoder_configs: tuple[EncoderConfig, ...] = field(
default_factory=lambda: (EncoderConfig(), ))
text_encoder_precisions: tuple[str, ...] = field(
default_factory=lambda: ("fp16", ))
default_factory=lambda: ("fp32", ))
preprocess_text_funcs: tuple[Callable[[str], str], ...] = field(
default_factory=lambda: (preprocess_text, ))
postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], torch.tensor],
Expand Down
11 changes: 8 additions & 3 deletions fastvideo/v1/entrypoints/video_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def from_pretrained(cls,
"""
# If users also provide some kwargs, it will override the FastVideoArgs and PipelineConfig.
kwargs['model_path'] = model_path
fastvideo_args = FastVideoArgs.from_kwargs(kwargs)
fastvideo_args = FastVideoArgs.from_kwargs(**kwargs)

return cls.from_fastvideo_args(fastvideo_args)

Expand Down Expand Up @@ -109,6 +109,7 @@ def generate_video(
prompt: The prompt to use for generation
negative_prompt: The negative prompt to use (overrides the one in fastvideo_args)
output_path: Path to save the video (overrides the one in fastvideo_args)
output_video_name: Name of the video file to save. Default is the first 100 characters of the prompt.
save_video: Whether to save the video to disk
return_frames: Whether to return the raw frames
num_inference_steps: Number of denoising steps (overrides fastvideo_args)
Expand Down Expand Up @@ -228,6 +229,7 @@ def generate_video(
n_tokens=n_tokens,
VSA_sparsity=fastvideo_args.VSA_sparsity,
extra={},
output_video_name=kwargs.get("output_video_name", prompt[:100]),
)

# Run inference
Expand All @@ -251,7 +253,8 @@ def generate_video(
output_path = batch.output_path
if output_path:
os.makedirs(output_path, exist_ok=True)
video_path = os.path.join(output_path, f"{prompt[:100]}.mp4")
video_path = os.path.join(output_path,
f"{batch.output_video_name}.mp4")
imageio.mimsave(video_path, frames, fps=batch.fps, format="mp4")
logger.info("Saved video to %s", video_path)
else:
Expand All @@ -267,7 +270,9 @@ def generate_video(
"generation_time": gen_time
}

def set_lora_adapter(self, lora_nickname: str, lora_path: str) -> None:
def set_lora_adapter(self,
lora_nickname: str,
lora_path: str | None = None) -> None:
self.executor.set_lora_adapter(lora_nickname, lora_path)

def shutdown(self):
Expand Down
2 changes: 1 addition & 1 deletion fastvideo/v1/fastvideo_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def from_cli_args(cls, args: argparse.Namespace) -> "FastVideoArgs":
return cls(**kwargs) # type: ignore

@classmethod
def from_kwargs(cls, kwargs: dict[str, Any]) -> "FastVideoArgs":
def from_kwargs(cls, **kwargs: Any) -> "FastVideoArgs":
kwargs['pipeline_config'] = PipelineConfig.from_kwargs(kwargs)
return cls(**kwargs)

Expand Down
Loading