diff --git a/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.mp4 b/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.mp4 new file mode 100644 index 0000000..757fb77 Binary files /dev/null and b/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.mp4 differ diff --git a/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.npy b/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.npy new file mode 100644 index 0000000..b2c0cfe Binary files /dev/null and b/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.npy differ diff --git a/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.png b/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.png new file mode 100644 index 0000000..ff36fb5 Binary files /dev/null and b/data/unet_samples/512px-001_2015_04_19_Das_Gelb_der_Natur.jpg_ddpm100_2024-11-17T11_34_41.294060.png differ diff --git a/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.mp4 b/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.mp4 new file mode 100644 index 0000000..abc2f9e Binary files /dev/null and b/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.mp4 differ diff --git a/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.npy b/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.npy new file mode 100644 index 0000000..009cdc1 Binary files /dev/null and b/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.npy differ diff --git a/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.png b/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.png new file mode 100644 index 0000000..0f9d029 Binary files /dev/null and b/data/unet_samples/candles.jpg_ddpm500_2025-06-03T14_42_03.770349.png differ diff --git a/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.mp4 b/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.mp4 new file mode 100644 index 0000000..8ab9daf Binary files /dev/null and b/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.mp4 differ diff --git a/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.npy b/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.npy new file mode 100644 index 0000000..540a8bb Binary files /dev/null and b/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.npy differ diff --git a/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.png b/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.png new file mode 100644 index 0000000..ab87185 Binary files /dev/null and b/data/unet_samples/first_frame.png_ddpm100_2025-05-27T09_14_24.425748.png differ diff --git a/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.mp4 b/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.mp4 new file mode 100644 index 0000000..d7aa334 Binary files /dev/null and b/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.mp4 differ diff --git a/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.npy b/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.npy new file mode 100644 index 0000000..5c2c888 Binary files /dev/null and b/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.npy differ diff --git a/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.png b/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.png new file mode 100644 index 0000000..058a475 Binary files /dev/null and b/data/unet_samples/first_frame1.png_ddpm100_2025-05-27T09_21_19.936094.png differ diff --git a/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.mp4 b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.mp4 new file mode 100644 index 0000000..ca71a44 Binary files /dev/null and b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.mp4 differ diff --git a/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.npy b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.npy new file mode 100644 index 0000000..308c25a Binary files /dev/null and b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.npy differ diff --git a/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.png b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.png new file mode 100644 index 0000000..4b06d74 Binary files /dev/null and b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T10_33_35.418203.png differ diff --git a/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T12_44_24.659274.npy b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T12_44_24.659274.npy new file mode 100644 index 0000000..4945005 Binary files /dev/null and b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T12_44_24.659274.npy differ diff --git a/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T13_11_44.238432.npy b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T13_11_44.238432.npy new file mode 100644 index 0000000..d82f839 Binary files /dev/null and b/data/unet_samples/first_frame2.png_ddpm100_2025-05-27T13_11_44.238432.npy differ diff --git a/img.png b/img.png new file mode 100644 index 0000000..b9999d4 Binary files /dev/null and b/img.png differ diff --git a/img2.jpg b/img2.jpg new file mode 100644 index 0000000..cde271d Binary files /dev/null and b/img2.jpg differ diff --git a/models/frame_synthesis.py b/models/frame_synthesis.py index ae24961..c43b4a3 100644 --- a/models/frame_synthesis.py +++ b/models/frame_synthesis.py @@ -3,10 +3,10 @@ from typing import Union from .softsplat import softsplat -def get_vgg16_feature_extractor(layers): - from torchvision.models import vgg16, VGG16_Weights +def get_resnet34_feature_extractor(layers): + from torchvision.models import resnet34, ResNet34_Weights from torchvision.models.feature_extraction import create_feature_extractor - m = vgg16(weights=VGG16_Weights.IMAGENET1K_V1) + m = resnet34(weights=ResNet34_Weights.IMAGENET1K_V1) return_nodes = [f"features.{l}" for l in layers] return create_feature_extractor(m, return_nodes) @@ -15,7 +15,7 @@ class VGGLoss(nn.Module): def __init__(self, layers=[3, 8, 15, 22]): super().__init__() - self.feature_extractor = get_vgg16_feature_extractor(layers).eval() + self.feature_extractor = get_resnet34_feature_extractor(layers).eval() for p in self.parameters(): p.requires_grad = False @@ -268,7 +268,7 @@ def forward(self, tenEncone, tenMetricone, tenForward): def forward(self, tenOne, tenForward): tenEncone = self.netEncode(tenOne) - tenMetricone = self.netSoftmetric(tenEncone, tenForward) * 2.0 + tenMetricone = torch.norm(tenForward, p=2, dim=1, keepdim=True) tenWarp = self.netWarp(tenEncone, tenMetricone, tenForward) @@ -338,12 +338,25 @@ def forward(self, tenOne, tenForward): # end @torch.no_grad() -def predict_tensor(src_frame: torch.Tensor, flow: torch.Tensor, model: Synthesis, transforms, batch_size: int = 32, return_tensor: bool = True): - # src_frame and flow should be normalized tensors - out_frames = [] - for i in range(0, flow.shape[0], batch_size): - bs = min(batch_size, flow.shape[0] - i) - out_frames.append(model(src_frame.repeat(bs, 1, 1, 1), flow[i:i + bs])) +def predict_sequence(src_frame: torch.Tensor, flow: torch.Tensor, model: Synthesis, transforms, return_tensor: bool = True): + """ + Sequentially predict frames using the model by updating src_frame every step. + src_frame: initial frame tensor (1, C, H, W) + flow: (N, 2, H, W) optical flow between frames + """ + out_frames = [src_frame] + + current_frame = src_frame + for i in range(flow.shape[0]): + current_flow = flow[i:i+1] # batch size 1 + + pred_frame = model(current_frame, current_flow) + + # Optionally denormalize or deprocess here, or after loop + current_frame = pred_frame + + out_frames.append(current_frame) + out_frames = torch.cat(out_frames, dim=0) if return_tensor: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..14dd903 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +torch==2.3.0 +torchvision==0.18.0 +diffusers==0.28.0 +torchmetrics==1.4.0 +opencv-python==4.8.0.76 +scipy==1.11.4 +matplotlib==3.7.1 +moviepy==1.0.3 +cupy==12.2.0 + diff --git a/utils/flow.py b/utils/flow.py index 5df111c..6245ee9 100644 --- a/utils/flow.py +++ b/utils/flow.py @@ -65,4 +65,4 @@ def optical_flow_raft(src, tgt, model, transforms, batch_size=1): src_, tgt_ = transforms(src.repeat(e - s, 1, 1, 1), tgt[s:e]) out = model(src_.to(device), tgt_.to(device))[-1] flow.append(out.cpu()) - return torch.cat(flow).permute(0, 2, 3, 1).numpy() + return torch.cat(flow).permute(0, 2, 3, 1).numpy() \ No newline at end of file