forked from KlingAIResearch/MultiShotMaster
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain_14B_single_node.sh
More file actions
29 lines (24 loc) · 1008 Bytes
/
train_14B_single_node.sh
File metadata and controls
29 lines (24 loc) · 1008 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# We have tested this code on 5-second multi-shot data with a single GPU.
# If you want train 14B model on longer video data, you need to implement sequence parallel on our code.
pkill python
source /m2v_intern/wangqinghe/miniconda3/bin/activate wan_multishot
# resume
RESUME_CKPT=None # "checkpoints/download_models/14B/MultiShotMaster_model_14B.safetensors"
# output dir
TIMESTAMP=$(date +%Y%m%d_%H%M)
OUTPUT_DIR="models/train/14B/${TIMESTAMP}-T2V-1.3B_full"
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch --num_processes=8 --multi_gpu train.py \
--dataset_metadata_path "toy_cases/train_multishot_5s.csv" \
--height 480 \
--width 832 \
--dataset_repeat 1 \
--learning_rate 1e-5 \
--num_epochs 100 \
--remove_prefix_in_ckpt "pipe.dit." \
--output_path "${OUTPUT_DIR}" \
--trainable_models "dit" \
--save_steps 500 \
--wan_version "t2v-14B" \
--dataset_num_workers 8 \
--resume_ckpt "$RESUME_CKPT" \
--load_path_json "checkpoints/model_configs/model_path_14B.json"