From a91365f0363c2cc61148c4194b169b0b27bae49e Mon Sep 17 00:00:00 2001 From: Cincinnati Podcast Studio Date: Sat, 16 May 2026 11:08:36 -0400 Subject: [PATCH] rsv-ben: fix fps detection for CFR reference clips repairRsvBen() read the video frame duration only from Track::times_, but getSampleTimes() collapses a constant-rate stts into constant_duration_ and leaves times_ empty. Every Sony camera records CFR, so times_ was empty and video_duration_per_sample kept its 1000 default. fps was therefore computed as 24 instead of 23.976 (24000/1001), making the per-GOP audio chunk 96000 bytes instead of 96096. That ~96-byte/GOP error appended PCM to each GOP's last video frame (one corrupt frame per GOP -> ~0.5s judder) and truncated audio by the same amount (~4.5s short over 75 min) for NTSC-fractional footage (23.976/29.97/59.94). Fix: use constant_duration_ when times_ is empty. Also compute audio_samples_per_chunk with exact integer rounding so float truncation can't drop a sample at other rates. Verified on a 58GB Sony FX30 XAVC S .RSV: audio/video durations now match exactly and the file decodes with zero per-GOP errors. --- src/rsv.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/rsv.cpp b/src/rsv.cpp index 2840326..be44e06 100644 --- a/src/rsv.cpp +++ b/src/rsv.cpp @@ -103,7 +103,14 @@ void Mp4::repairRsvBen(const string& filename) { if (video_track_idx >= 0) { Track& video_track = tracks_[video_track_idx]; video_timescale = video_track.timescale_; - if (!video_track.times_.empty()) { + // CFR reference clips (all Sony cameras) collapse stts into + // constant_duration_ and leave times_ empty. Must read it, otherwise + // video_duration_per_sample keeps its 1000 default -> fps wrongly 24 + // instead of 23.976 (24000/1001) -> audio chunk size off by ~96 + // bytes/GOP -> 1 corrupt video frame/GOP + short audio. + if (video_track.constant_duration_ != -1) { + video_duration_per_sample = video_track.constant_duration_; + } else if (!video_track.times_.empty()) { video_duration_per_sample = video_track.times_[0]; } logg(V, "video timescale: ", video_timescale, ", duration per sample: ", video_duration_per_sample, "\n"); @@ -216,7 +223,12 @@ void Mp4::repairRsvBen(const string& filename) { // Audio samples per chunk = GOP duration * audio_sample_rate double fps = (double)video_timescale / video_duration_per_sample; double gop_duration_sec = (double)frames_per_gop / fps; - int audio_samples_per_chunk = (int)(gop_duration_sec * audio_sample_rate); + // Exact integer rational: samples = frames_per_gop * sample_rate * + // dur_per_sample / timescale (rounded). Avoids float truncation that + // would drop a sample and re-introduce the boundary error. + int audio_samples_per_chunk = (int)( + ((long long)frames_per_gop * audio_sample_rate * video_duration_per_sample + + video_timescale / 2) / video_timescale); int audio_chunk_size_per_track = audio_samples_per_chunk * audio_sample_size; // Total audio size in RSV is for all tracks combined int total_audio_chunk_size = audio_chunk_size_per_track * max(1, num_audio_tracks);