-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.yaml
More file actions
87 lines (81 loc) · 1.76 KB
/
config.yaml
File metadata and controls
87 lines (81 loc) · 1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# 기본 설정 값 (필요 시 수정)
seed: 42
# 데이터
data:
musdb_root: ./data/musdb18
musdbhq_root: ./data/musdb18hq
segment_seconds: 2.0
mono: true
dataset: musdbhq # musdb | musdbhq
pitch_aug:
enabled: false
semitones: [-2.0, 2.0]
prob: 0.3
# 오디오 / STFT
audio:
# MSST 스타일 기본값 (필요 시 조정)
sample_rate: 44100
n_fft: 8192
hop_length: 1024
win_length: 8192
window: hann
center: true
# 학습
train:
batch_size: 1
epochs: 100
lr: 2.0e-4
weight_decay: 1.0e-2
grad_clip: 1.0
amp: true
log_interval: 50
val_every_steps: 75
val_batches: 8
loss_cf_weight: 1.0
loss_voc_weight: 0.5
# 확산
diffusion:
timesteps: 200
beta_start: 1.0e-4
beta_end: 2.0e-2
beta_schedule: linear # linear | cosine
use_ddim: true
ddim_steps: 50
val_ddim_steps: 50
eta: 0.0
# sampler: ddpm | ddim | (future: dpm-solver, unipc)
sampler: ddim
# Validation shallow diffusion options
validate_use_shallow: true
shallow_k: 50
add_forward_noise: true
# 모델
model:
in_channels: 2 # [noisy, mixture]
out_channels: 1 # instruments magnitude
base_channels: 64
channels_mult: [1, 2, 4]
attn_resolutions: [4, 8]
model_type: conformer # unet | conformer
model_kwargs:
d_model: 128
n_heads: 4
d_ff: 256
num_layers: 4
kernel_size: 31
dropout: 0.0
axis: time # time | freq | mixed
# Memory helpers (safe defaults)
freq_chunk: 64 # process freq bins in chunks when axis='time' to reduce memory
time_chunk: null # set e.g., 128 if axis='freq'
use_checkpoint: false
# 로깅/체크포인트
log:
out_dir: ./logs
use_wandb: false
use_tensorboard: true
tb_log_dir: ./logs/tb
project: diff-msst
entity: null
save_best: true
save_last: true