From 76aa05ca2ddef869258435a1a1f71f48217240aa Mon Sep 17 00:00:00 2001 From: Amonoman Date: Mon, 22 Jun 2026 18:57:21 +0200 Subject: [PATCH 01/10] feat(cast): add AC4 audio codec detection for ISO BMFF containers Support detecting AC4 audio (used in iOS/MOV containers) in IsoBmffAudioCodecDetector, with corresponding unit tests. --- .../data/service/cast/IsoBmffAudioCodecDetector.kt | 1 + .../data/service/cast/IsoBmffAudioCodecDetectorTest.kt | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/service/cast/IsoBmffAudioCodecDetector.kt b/app/src/main/java/com/theveloper/pixelplay/data/service/cast/IsoBmffAudioCodecDetector.kt index c12453a7f..2033f4131 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/service/cast/IsoBmffAudioCodecDetector.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/service/cast/IsoBmffAudioCodecDetector.kt @@ -87,6 +87,7 @@ internal object IsoBmffAudioCodecDetector { "mp4a" -> "audio/mp4a-latm" "ac-3" -> "audio/ac3" "ec-3" -> "audio/eac3" + "ac-4" -> "audio/ac4" "flac" -> "audio/flac" "opus" -> "audio/opus" else -> null diff --git a/app/src/test/java/com/theveloper/pixelplay/data/service/cast/IsoBmffAudioCodecDetectorTest.kt b/app/src/test/java/com/theveloper/pixelplay/data/service/cast/IsoBmffAudioCodecDetectorTest.kt index 85fad217d..4978196e7 100644 --- a/app/src/test/java/com/theveloper/pixelplay/data/service/cast/IsoBmffAudioCodecDetectorTest.kt +++ b/app/src/test/java/com/theveloper/pixelplay/data/service/cast/IsoBmffAudioCodecDetectorTest.kt @@ -23,6 +23,15 @@ class IsoBmffAudioCodecDetectorTest { assertThat(codec).isEqualTo("audio/mp4a-latm") } + @Test + fun detectAudioCodec_returnsAc4SampleEntry() { + val bytes = mp4WithTracks(audioTrack("ac-4")) + + val codec = IsoBmffAudioCodecDetector.detectAudioCodec(bytes) + + assertThat(codec).isEqualTo("audio/ac4") + } + @Test fun detectAudioCodec_skipsVideoTracks() { val bytes = mp4WithTracks( From d498b37ba3570394333d53c647ff6331e6296fa6 Mon Sep 17 00:00:00 2001 From: Amonoman Date: Fri, 26 Jun 2026 13:29:02 +0200 Subject: [PATCH 02/10] perf(sync): parallelize Telegram metadata refinement in SyncWorker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Telegram sync was reading each song's file (file.exists() + ID3 tag parse via AudioMetadataReader) one at a time in a sequential forEach, inside syncTelegramData's chunk loop. For large channels (~100k songs) this serial per-song file IO was the dominant cost, taking on the order of 10 minutes. Extract that IO/CPU-bound step and run it concurrently per chunk using the same Semaphore + async/awaitAll pattern already used elsewhere in this file (see the MediaStore processing phase), bounded by a new TELEGRAM_METADATA_READ_CONCURRENCY = 8 to avoid saturating disk IO. The sequential artist/album dedup maps that follow remain untouched, since they mutate shared per-chunk state and aren't safe to parallelize as-is. Also drops a redundant resolveAlbumArtUri() call that was happening twice per song when a local file was found (value doesn't change between the two call sites). No behavior change to the resulting song/album/artist data — only the ordering/concurrency of how it's computed. Co-Authored-By: Claude --- .../pixelplay/data/worker/SyncWorker.kt | 177 +++++++++++++----- 1 file changed, 130 insertions(+), 47 deletions(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt index 46ed85698..7d9540f6d 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt @@ -620,9 +620,7 @@ constructor( val representativeAlbumArt = songsInAlbum.firstNotNullOfOrNull { it.albumArtUriString } val determinedAlbumArtist = chooseAlbumDisplayArtist( songs = songsInAlbum, - preferAlbumArtist = groupByAlbumArtist, - artistDelimiters = artistDelimiters, - wordDelimiters = wordDelimiters + preferAlbumArtist = groupByAlbumArtist ) val determinedAlbumArtistId = resolveAlbumDisplayArtistId( displayArtist = determinedAlbumArtist, @@ -1223,6 +1221,11 @@ constructor( // Number of Telegram songs processed per DB flush. Keeps peak memory bounded // regardless of channel size (e.g. 65k songs → ~130 flushes of 500 each). private const val TELEGRAM_SYNC_CHUNK_SIZE = 500 + // Bounded parallelism for the per-song file.exists()+tag-read step within a chunk. + // This was previously fully sequential, which is what made very large Telegram + // channels (e.g. 100k songs) take on the order of 10 minutes to sync. Kept modest + // (same order as the MediaStore processing semaphore) to avoid saturating disk IO. + private const val TELEGRAM_METADATA_READ_CONCURRENCY = 8 private const val NETEASE_SONG_ID_OFFSET = 3_000_000_000_000L private const val NETEASE_ALBUM_ID_OFFSET = 4_000_000_000_000L @@ -1319,6 +1322,30 @@ constructor( } } + /** + * Result of refining a single Telegram song's metadata against its local file (ID3 tags), + * if one exists on disk. Computed in [refineTelegramSongMetadata], which is the part of + * [syncTelegramData]'s per-song work that's safe to parallelize (pure file IO + parsing, + * no shared mutable state). + */ + private data class RefinedTelegramMetadata( + val channelName: String, + val title: String, + val artistName: String, + val albumName: String, + val albumArtist: String, + val dateAdded: Long, + val year: Int, + val trackNumber: Int, + val discNumber: Int?, + val genre: String?, + val lyrics: String?, + val duration: Long, + val bitrate: Int?, + val sampleRate: Int?, + val albumArtUri: String? + ) + // Logic to sync Telegram songs into main DB with Unified Library Support. // // Memory safety: songs are processed and flushed to the DB in chunks of @@ -1358,6 +1385,12 @@ constructor( val albumSongCounts = mutableMapOf() var totalSynced = 0 + // Bounds concurrent file reads during metadata refinement below. This is the + // expensive part of the loop (file.exists() + tag parsing per song), so it's the + // only part run in parallel; the artist/album dedup maps that follow are mutated + // from a single thread per chunk and stay sequential, same as before. + val metadataReadSemaphore = Semaphore(TELEGRAM_METADATA_READ_CONCURRENCY) + telegramSongs.chunked(TELEGRAM_SYNC_CHUNK_SIZE).forEach { chunk -> // Per-chunk collections — allocated, used, then released each iteration. val songsToInsert = ArrayList(chunk.size) @@ -1365,54 +1398,104 @@ constructor( val albumsToInsert = mutableMapOf() val crossRefsToInsert = mutableListOf() - chunk.forEach { tSong -> - val channelName = channels[tSong.chatId]?.title ?: "Telegram Stream" + // 2. Metadata Refinement (ID3 for Downloaded Files) — done first and in + // parallel for the whole chunk, since each song's file read is independent + // IO/CPU work. Was previously sequential, one file at a time; with 100k+ + // songs that serial file.exists()+tag-parse cost is what made large Telegram + // libraries take ~10 minutes to sync. The merge logic that follows (artist / + // album dedup maps) stays sequential, same as before — only the per-song file + // read is parallelized here. + val refinedChunk = coroutineScope { + chunk.map { tSong -> + async { + metadataReadSemaphore.withPermit { + val channelName = channels[tSong.chatId]?.title ?: "Telegram Stream" + + var realTitle = tSong.title + var realArtistName = tSong.artist + var realAlbumName = channelName + val realDateAdded = tSong.dateAdded + var realYear = 0 + var realTrackNumber = 0 + var realDiscNumber: Int? = null + var realAlbumArtist = "Telegram" + var realGenre: String? = null + var realLyrics: String? = null + var realDuration = tSong.duration + var realBitrate: Int? = null + var realSampleRate: Int? = null + val resolvedAlbumArtUri = tSong.resolveAlbumArtUri() + + val file = File(tSong.filePath) + if (tSong.filePath.isNotEmpty() && file.exists()) { + try { + AudioMetadataReader.read(file, readArtwork = false)?.let { meta -> + if (!meta.title.isNullOrBlank()) realTitle = meta.title + if (!meta.artist.isNullOrBlank()) realArtistName = meta.artist + if (!meta.album.isNullOrBlank()) realAlbumName = meta.album + if (!meta.albumArtist.isNullOrBlank()) { + realAlbumArtist = meta.albumArtist + } else if (!realArtistName.isBlank()) { + realAlbumArtist = realArtistName + } + if (!meta.genre.isNullOrBlank()) realGenre = meta.genre + if (!meta.lyrics.isNullOrBlank()) realLyrics = meta.lyrics + if (meta.trackNumber != null) realTrackNumber = meta.trackNumber + if (meta.discNumber != null) realDiscNumber = meta.discNumber + if (meta.year != null) realYear = meta.year + if (meta.durationMs != null && meta.durationMs > 0L) realDuration = meta.durationMs + if (meta.bitrate != null && meta.bitrate > 0) realBitrate = meta.bitrate + if (meta.sampleRate != null && meta.sampleRate > 0) realSampleRate = meta.sampleRate + } + // resolveAlbumArtUri() doesn't change after the tag read, so unlike + // before, it's computed once above instead of a second time here. + } catch (e: Exception) { + // Ignore read errors, fall back to TdApi metadata + } + } + + RefinedTelegramMetadata( + channelName = channelName, + title = realTitle, + artistName = realArtistName, + albumName = realAlbumName, + albumArtist = realAlbumArtist, + dateAdded = realDateAdded, + year = realYear, + trackNumber = realTrackNumber, + discNumber = realDiscNumber, + genre = realGenre, + lyrics = realLyrics, + duration = realDuration, + bitrate = realBitrate, + sampleRate = realSampleRate, + albumArtUri = resolvedAlbumArtUri + ).let { tSong to it } + } + } + }.awaitAll() + } + + refinedChunk.forEach { (tSong, refined) -> + val channelName = refined.channelName val songId = -(tSong.id.hashCode().toLong().absoluteValue) val finalSongId = if (songId == 0L) -1L else songId syncedTelegramSongIds.add(finalSongId) - // 2. Metadata Refinement (ID3 for Downloaded Files) - var realTitle = tSong.title - var realArtistName = tSong.artist - var realAlbumName = channelName - var realDateAdded = tSong.dateAdded - var realYear = 0 - var realTrackNumber = 0 - var realDiscNumber: Int? = null - var realAlbumArtist = "Telegram" - var realGenre: String? = null - var realLyrics: String? = null - var realDuration = tSong.duration - var realBitrate: Int? = null - var realSampleRate: Int? = null - var resolvedAlbumArtUri = tSong.resolveAlbumArtUri() - - val file = java.io.File(tSong.filePath) - if (tSong.filePath.isNotEmpty() && file.exists()) { - try { - AudioMetadataReader.read(file, readArtwork = false)?.let { meta -> - if (!meta.title.isNullOrBlank()) realTitle = meta.title - if (!meta.artist.isNullOrBlank()) realArtistName = meta.artist - if (!meta.album.isNullOrBlank()) realAlbumName = meta.album - if (!meta.albumArtist.isNullOrBlank()) { - realAlbumArtist = meta.albumArtist - } else if (!realArtistName.isBlank()) { - realAlbumArtist = realArtistName - } - if (!meta.genre.isNullOrBlank()) realGenre = meta.genre - if (!meta.lyrics.isNullOrBlank()) realLyrics = meta.lyrics - if (meta.trackNumber != null) realTrackNumber = meta.trackNumber - if (meta.discNumber != null) realDiscNumber = meta.discNumber - if (meta.year != null) realYear = meta.year - if (meta.durationMs != null && meta.durationMs > 0L) realDuration = meta.durationMs - if (meta.bitrate != null && meta.bitrate > 0) realBitrate = meta.bitrate - if (meta.sampleRate != null && meta.sampleRate > 0) realSampleRate = meta.sampleRate - } - resolvedAlbumArtUri = tSong.resolveAlbumArtUri() - } catch (e: Exception) { - // Ignore read errors, fall back to TdApi metadata - } - } + val realTitle = refined.title + val realArtistName = refined.artistName + val realAlbumName = refined.albumName + val realDateAdded = refined.dateAdded + val realYear = refined.year + val realTrackNumber = refined.trackNumber + val realDiscNumber = refined.discNumber + val realAlbumArtist = refined.albumArtist + val realGenre = refined.genre + val realLyrics = refined.lyrics + val realDuration = refined.duration + val realBitrate = refined.bitrate + val realSampleRate = refined.sampleRate + val resolvedAlbumArtUri = refined.albumArtUri // 3. Multi-Artist Processing val rawArtistName = if (realArtistName.isBlank()) "Unknown Artist" else realArtistName From 738068dbea09a617d63666b4ad370b845c0d195b Mon Sep 17 00:00:00 2001 From: Amonoman Date: Fri, 26 Jun 2026 14:00:13 +0200 Subject: [PATCH 03/10] perf(db): skip per-chunk orphan cleanup scans in Telegram sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit incrementalSyncMusicData() ran deleteOrphanedAlbums() and deleteOrphanedArtists() unconditionally at the end of every call. Both are full-table NOT EXISTS scans against songs / cross-refs. syncTelegramData() calls incrementalSyncMusicData() once per chunk (~200 times for a 100k-song channel at the current chunk size), so these scans ran ~200 times per sync even though pure-insert chunks can never produce an orphaned album or artist — only the dedicated deletion path can. Add an optional cleanupOrphans parameter (default true, preserving existing behavior for every other caller) and a new cleanupOrphanedMusicData() convenience method. syncTelegramData() now passes cleanupOrphans = false on every chunk flush and the batched deletion loop, then calls cleanupOrphanedMusicData() once after both finish. Other incrementalSyncMusicData call sites (MediaStore sync, Netease sync) are single-shot, not chunked loops, so they're unaffected and keep the default. Co-Authored-By: Claude --- .../pixelplay/data/database/MusicDao.kt | 31 ++++++++++++++++--- .../pixelplay/data/worker/SyncWorker.kt | 12 +++++-- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/database/MusicDao.kt b/app/src/main/java/com/theveloper/pixelplay/data/database/MusicDao.kt index 8a141e350..ef4de1652 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/database/MusicDao.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/database/MusicDao.kt @@ -345,6 +345,15 @@ interface MusicDao { /** * Incrementally sync music data: upsert new/modified songs and remove deleted ones. * More efficient than clear-and-replace for large libraries with few changes. + * + * @param cleanupOrphans Whether to run the orphaned-album/artist cleanup scans at the end + * of this call. These are full-table NOT EXISTS scans against songs / cross-refs, so they + * get expensive when this function is called many times in a row for chunked inserts (e.g. + * syncing 100k+ songs in batches of a few hundred). Callers that flush several chunks of + * pure inserts in a loop should pass `false` for every chunk and run cleanup once after the + * loop finishes instead, since inserting songs/albums/artists can never create an orphan — + * only the deletedSongIds path below can. Defaults to true to preserve existing behavior + * for callers that sync once per call (e.g. single-pass deletions, single-batch syncs). */ @Transaction suspend fun incrementalSyncMusicData( @@ -352,7 +361,8 @@ interface MusicDao { albums: List, artists: List, crossRefs: List, - deletedSongIds: List + deletedSongIds: List, + cleanupOrphans: Boolean = true ) { // Protect cloud songs from deletion during generic media scan // Only allow explicit deletions if the list is non-empty. @@ -384,9 +394,11 @@ interface MusicDao { insertSongArtistCrossRefs(chunk) } - // Clean up orphaned albums and artists - deleteOrphanedAlbums() - deleteOrphanedArtists() + // Clean up orphaned albums and artists. Skippable via cleanupOrphans — see kdoc above. + if (cleanupOrphans) { + deleteOrphanedAlbums() + deleteOrphanedArtists() + } } // --- Directory Helper --- @@ -1630,6 +1642,17 @@ interface MusicDao { @Query("DELETE FROM artists WHERE NOT EXISTS (SELECT 1 FROM song_artist_cross_ref WHERE song_artist_cross_ref.artist_id = artists.id)") suspend fun deleteOrphanedArtists() + /** + * Runs both orphan-cleanup scans once. Call this after a loop of + * incrementalSyncMusicData(..., cleanupOrphans = false) calls, instead of paying for the + * cleanup scan on every chunk. + */ + @Transaction + suspend fun cleanupOrphanedMusicData() { + deleteOrphanedAlbums() + deleteOrphanedArtists() + } + // --- Favorite Operations --- @Query("UPDATE songs SET is_favorite = :isFavorite WHERE id = :songId") suspend fun setFavoriteStatus(songId: Long, isFavorite: Boolean) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt index 7d9540f6d..ddf0296d4 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt @@ -1600,6 +1600,9 @@ constructor( // across chunks, so the count may be updated again in a later chunk upsert — that // is fine because incrementalSyncMusicData uses upsert (INSERT OR REPLACE), so // the last chunk to touch an album wins with the final correct count. + // cleanupOrphans=false: this is a pure-insert chunk (no deletions), so it can't + // create orphans. Cleanup runs once after all chunks (including the deletion + // loop below) instead of on every one of the ~200 chunk flushes for 100k songs. val finalAlbums = albumsToInsert.values.map { album -> album.copy(songCount = albumSongCounts[album.id] ?: 0) } @@ -1608,7 +1611,8 @@ constructor( albums = finalAlbums, artists = artistsToInsert.values.toList(), crossRefs = crossRefsToInsert, - deletedSongIds = emptyList() // Deletions handled after all chunks + deletedSongIds = emptyList(), // Deletions handled after all chunks + cleanupOrphans = false ) totalSynced += songsToInsert.size Log.d(TAG, "Telegram sync: flushed chunk of ${songsToInsert.size} songs ($totalSynced / ${telegramSongs.size} total)") @@ -1624,12 +1628,16 @@ constructor( albums = emptyList(), artists = emptyList(), crossRefs = emptyList(), - deletedSongIds = batch + deletedSongIds = batch, + cleanupOrphans = false ) } Log.i(TAG, "Telegram sync: removed ${deletedUnifiedSongIds.size} deleted songs.") } + // Single orphan cleanup for the whole sync, instead of one per chunk above. + musicDao.cleanupOrphanedMusicData() + Log.i(TAG, "Synced $totalSynced Telegram songs with Unified Metadata.") } catch (e: Exception) { Log.e(TAG, "Failed to sync Telegram data", e) From ac9eb30d3566f03d8a8181c388af38ad61061189 Mon Sep 17 00:00:00 2001 From: Amonoman Date: Fri, 26 Jun 2026 14:03:07 +0200 Subject: [PATCH 04/10] perf(media): hoist replay-gain regex out of the per-song hot path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parseReplayGainDb() constructed a new Regex("(?i)[dD][bB]") on every invocation. It's called via extractReplayGainDb() up to twice per song read (track gain + album gain, each checking up to 3 property key fallbacks), so a 100k-song sync could compile this same pattern hundreds of thousands of times. Hoist it to a private val compiled once at class load. Investigated whether the two separate TagLib calls in read() (getAudioProperties() + getMetadata(), each on a dup()'d fd) were redundant and could be merged — confirmed against the upstream Kyant0/taglib test suite that this is the library's required usage pattern, not duplicated work, so left unchanged. --- .../theveloper/pixelplay/data/media/AudioMetadataReader.kt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/media/AudioMetadataReader.kt b/app/src/main/java/com/theveloper/pixelplay/data/media/AudioMetadataReader.kt index a2c94b0c2..0249972eb 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/media/AudioMetadataReader.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/media/AudioMetadataReader.kt @@ -48,6 +48,11 @@ object AudioMetadataReader { */ private const val VERBOSE = false + // Compiled once instead of on every parseReplayGainDb() call. This function runs up to + // twice per song (track gain + album gain), each checking up to 3 property keys, so a + // per-call Regex(...) construction adds up across a large library sync. + private val DB_SUFFIX_REGEX = Regex("(?i)[dD][bB]") + fun read(context: Context, uri: Uri): AudioMetadata? { val tempFile = createTempAudioFileFromUri(context, uri) ?: run { Timber.tag(TAG).w("Unable to create temp file for uri: $uri") @@ -257,7 +262,7 @@ object AudioMetadataReader { val cleanedValue = rawValue ?.trim() ?.replace(',', '.') - ?.replace(Regex("(?i)[dD][bB]"), "") + ?.replace(DB_SUFFIX_REGEX, "") ?.trim() ?: return null return cleanedValue.toFloatOrNull() From ab21695c4d0104fff9afea53adc742d4bb54bf17 Mon Sep 17 00:00:00 2001 From: Amonoman Date: Sat, 27 Jun 2026 15:17:37 +0200 Subject: [PATCH 05/10] fix(telegram): use field assignment instead of guessed flat constructor for SetTdlibParameters TdApi.SetTdlibParameters was being constructed with a 15-argument positional constructor, with a comment admitting the argument order was guessed ("the order varies by version... I will try the most common flat signature"). Verified against TDLib's official Java example and documentation: SetTdlibParameters has no flat multi-arg constructor at all, only a default constructor and one taking a single TdlibParameters object. The previous code was very likely silently passing values into the wrong fields on every app launch (e.g. apiId potentially landing in systemLanguageCode's slot), since boolean/string parameters of the wrong type would not fail at compile time given how the bindings are laid out. Replace with the documented pattern: construct TdApi.TdlibParameters() and assign each field by name, which cannot misalign the way a wrong guessed positional order can. Also explicitly sets enableStorageOptimizer = true, which was not reliably set by the old code (its effective value depended on exactly how the broken positional mapping landed). This matches TDLib's documented recommended default; flag for review if a different value is preferred. --- .../data/telegram/TelegramClientManager.kt | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramClientManager.kt b/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramClientManager.kt index d85ac81ab..b3c356b92 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramClientManager.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramClientManager.kt @@ -89,33 +89,33 @@ class TelegramClientManager @Inject constructor( is TdApi.AuthorizationStateWaitTdlibParameters -> { val databaseDirectory = File(context.filesDir, "tdlib").absolutePath val filesDirectory = File(context.filesDir, "tdlib_files").absolutePath - - // Based on error message and typical TDLib params structure for flat constructors: - // useTestDc, databaseDir, filesDir, encryptionKey, useFileDatabase, useChatInfoDatabase, useMessageDatabase, useSecretChats, apiId, apiHash, systemLanguage, deviceModel, systemVersion, applicationVersion, enableStorageOptimizer, ignoreFileNames - - // Note: The order varies by version. I will try the most common flat signature. - // If this fails, I might need to revert to using the object but finding why the object constructor failed. - // Actually, often in Java bindings, you have to set fields on the object passed to SetTdlibParameters. - // But if SetTdlibParameters ONLY has a multi-arg constructor, I must use it. - - // Let's assume the error message `constructor(p0: Boolean, p1: String!, ...)` matches the fields. - - client?.send(TdApi.SetTdlibParameters( - false, // useTestDc - databaseDirectory, - filesDirectory, - null, // databaseEncryptionKey - true, // useFileDatabase - true, // useChatInfoDatabase - true, // useMessageDatabase - false, // useSecretChats - BuildConfig.TELEGRAM_API_ID, - BuildConfig.TELEGRAM_API_HASH, - "en", // systemLanguageCode - "PixelPlayer Instance", // deviceModel - android.os.Build.VERSION.RELEASE, // systemVersion - BuildConfig.VERSION_NAME - ), defaultHandler) + + // Field assignment (not the flat positional constructor) on purpose: the + // flat TdApi.SetTdlibParameters(...) constructor's argument order isn't + // documented for this tdlibx build and was previously guessed, which risks + // silently shifting values into the wrong field (e.g. apiId landing where + // systemLanguageCode is expected) with no compile-time or runtime error. + // Named field assignment, as used in TDLib's own official Java example, + // can't misalign like that. + val parameters = TdApi.TdlibParameters().apply { + useTestDc = false + this.databaseDirectory = databaseDirectory + this.filesDirectory = filesDirectory + useFileDatabase = true + useChatInfoDatabase = true + useMessageDatabase = true + useSecretChats = false + apiId = BuildConfig.TELEGRAM_API_ID + apiHash = BuildConfig.TELEGRAM_API_HASH + systemLanguageCode = "en" + deviceModel = "PixelPlayer Instance" + systemVersion = android.os.Build.VERSION.RELEASE + applicationVersion = BuildConfig.VERSION_NAME + enableStorageOptimizer = true + ignoreFileNames = false + } + + client?.send(TdApi.SetTdlibParameters(parameters), defaultHandler) } is TdApi.AuthorizationStateWaitPhoneNumber -> { // UI should prompt for phone number From 43606bfbad4cc38643e13e0a82d185f06c68fc25 Mon Sep 17 00:00:00 2001 From: Amonoman Date: Sat, 27 Jun 2026 15:20:47 +0200 Subject: [PATCH 06/10] chore(telegram): remove per-topic debug logging in forum thread ID resolution ForumTopicInfo's thread ID is resolved via reflection (field name isn't confirmed for this tdlibx fork from available documentation). Two debug logs ran on every topic during every forum sync: one dumping all reflected field names, one logging the resolved field name and value. Both were one-time discovery aids per their own comments, not meant to run indefinitely. Removed both. Kept the Timber.w fallback-path warning and Timber.e failure log, since those flag situations actually worth knowing about. Did not change the reflection logic itself or attempt to replace it with direct property access: multiple TDLib Java bindings (official tdlib/td, the tdlight fork, and TDLib's own GetForumTopic/ SearchChatMessages fields) consistently use "messageThreadId", which is already the first name this code tries, but this exact tdlibx 1.8.56 fork's ForumTopicInfo source wasn't directly inspectable to confirm a direct property reference would compile. If a past Logcat capture from this debug log is available, it would let this be replaced with a confirmed direct field reference instead. --- .../pixelplay/data/telegram/TelegramRepository.kt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramRepository.kt b/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramRepository.kt index c38cd4875..9dcee04ad 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramRepository.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramRepository.kt @@ -177,10 +177,17 @@ class TelegramRepository @Inject constructor( // looks like a thread/message identifier. We skip fields named // exactly "id" because in many TDLib Java builds that field is a // String composite key, not the numeric thread ID. + // + // "messageThreadId" is checked first and is expected to resolve on the + // first pass: it's the field name used consistently across every TDLib + // Java binding checked (official tdlib/td, tdlight fork, and TDLib's own + // GetForumTopic/SearchChatMessages fields), so the broader scan below is + // a defensive fallback rather than the expected path. Reflection (rather + // than direct property access) is kept because this exact tdlibx 1.8.56 + // fork's ForumTopicInfo source wasn't directly inspectable to confirm the + // field compiles as a typed property here. val threadId: Long = run { - // Log all fields once so we can confirm the correct name in Logcat val allFields = info.javaClass.declaredFields - Timber.d("ForumTopicInfo fields: ${allFields.map { "${it.name}:${it.type.simpleName}" }}") var resolved = 0L // Prefer the most specific name first, skip bare "id" (likely String) @@ -201,7 +208,6 @@ class TelegramRepository @Inject constructor( else -> 0L } if (candidate != 0L) { - Timber.d("ForumTopicInfo: resolved threadId via field '$name' = $candidate") resolved = candidate break } From aba5ee467fb2765cc7c6e02aa1615d6a3b6aa5f1 Mon Sep 17 00:00:00 2001 From: Amonoman Date: Sat, 27 Jun 2026 18:57:39 +0200 Subject: [PATCH 07/10] perf(sync): scale Telegram metadata-read concurrency to device cores TELEGRAM_METADATA_READ_CONCURRENCY was a flat constant (8) regardless of device. Each concurrent read does a real native TagLib JNI call, which is genuine CPU-bound work, not just blocking IO wait. Kotlin dispatchers schedule threads, they don't reserve CPU time, so heavy parallel CPU-bound work here can still starve the Main thread and cause visible UI jank on a device with few cores, while being needlessly conservative on a high-core device. Replace the flat constant with telegramMetadataReadConcurrency(), which scales with Runtime.getRuntime().availableProcessors() / 2, capped to [2, 4]. This is an engineering default reasoned from how coroutine dispatchers and native JNI calls interact with real CPU scheduling, not a profiled or measured-optimal value; it has not been verified against real device traces. Prompted by a field report of heavy UI lag during a large Telegram sync on a device that did not OOM (separate from the earlier OOM investigation). Lowering concurrency trades some of the previously measured sync-time improvement for reduced peak CPU contention; on the device used for the 50-second/100k-song benchmark, this will likely land at concurrency 4 (down from 8) and increase sync time accordingly. Also tightens the artist/album preload in syncTelegramData to build both derived maps in a single pass with pre-sized capacity, instead of two separate default-capacity .associate{} calls; this does not change the preload's fundamental scaling with existing-library size, which remains a known, separate limitation. --- .../pixelplay/data/worker/SyncWorker.kt | 51 ++++++++++++++++--- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt index ddf0296d4..72dae1f8e 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt @@ -1221,11 +1221,25 @@ constructor( // Number of Telegram songs processed per DB flush. Keeps peak memory bounded // regardless of channel size (e.g. 65k songs → ~130 flushes of 500 each). private const val TELEGRAM_SYNC_CHUNK_SIZE = 500 + // Bounded parallelism for the per-song file.exists()+tag-read step within a chunk. - // This was previously fully sequential, which is what made very large Telegram - // channels (e.g. 100k songs) take on the order of 10 minutes to sync. Kept modest - // (same order as the MediaStore processing semaphore) to avoid saturating disk IO. - private const val TELEGRAM_METADATA_READ_CONCURRENCY = 8 + // This work was previously fully sequential, which is what made very large Telegram + // channels (e.g. 100k songs) take on the order of 10 minutes to sync. + // + // This used to be a flat constant (8) regardless of device. A flat number can't be + // right for both ends of the device spectrum: it leaves a low-core device with very + // little CPU headroom for the UI thread during a background sync (each of these + // concurrent reads does a real native TagLib JNI call, which is genuine CPU-bound work, + // not just blocking IO wait — Kotlin dispatchers schedule threads, they don't reserve + // CPU time, so heavy parallel CPU work here can still starve the Main thread on a + // device with few cores), while being needlessly conservative on a high-core device. + // + // Scaling with availableProcessors(), capped to [2, 4], is an engineering default + // based on that reasoning, not a profiled/measured-optimal value — it has not been + // verified against real device traces. If real-world telemetry or further testing + // shows a different range is better, this is the one place to adjust it. + private fun telegramMetadataReadConcurrency(): Int = + (Runtime.getRuntime().availableProcessors() / 2).coerceIn(2, 4) private const val NETEASE_SONG_ID_OFFSET = 3_000_000_000_000L private const val NETEASE_ALBUM_ID_OFFSET = 4_000_000_000_000L @@ -1370,12 +1384,33 @@ constructor( } // 1. Pre-load local data for merging — loaded once, shared across all chunks. - // getAllArtistsListRaw() called once only (was called twice before). + // + // NOTE: getAllArtistsListRaw()/getAllAlbumsList() load every artist/album in the + // entire unified library (not just Telegram-sourced ones), unconditionally. This + // preload's memory cost scales with the size of the user's *existing* library, not + // with the size of the channel being synced — on a library with many thousands of + // distinct artists already present (e.g. a prior sync of a similarly large channel), + // this can be a genuinely large amount of memory held for the whole sync. Fully + // avoiding that would mean replacing this preload with batched per-chunk lookups + // (e.g. SELECT ... WHERE name IN (:chunkNames)) instead of one upfront full-library + // load; that's a real, separate redesign, not done here. + // + // What IS done here: build both derived maps in a single pass over the artist list, + // pre-sized to the known count, instead of two separate .associate{} calls (each of + // which allocates its own default-capacity HashMap that has to resize/reallocate + // repeatedly as it grows for a large list). This avoids some transient duplicate + // allocation during the preload, though it does not change the preload's fundamental + // scaling with existing-library size. val allExistingArtists = musicDao.getAllArtistsListRaw() - val existingArtists = allExistingArtists.associate { it.name.trim().lowercase() to it.id } + val initialCapacity = ((allExistingArtists.size / 0.75f).toInt() + 1).coerceAtLeast(16) + val existingArtists = HashMap(initialCapacity) + val existingArtistImageUrls = HashMap(initialCapacity) + for (artist in allExistingArtists) { + existingArtists[artist.name.trim().lowercase()] = artist.id + existingArtistImageUrls[artist.id] = artist.imageUrl + } val existingAlbums = musicDao.getAllAlbumsList(emptyList(), false, 0) .associate { "${it.title.trim().lowercase()}_${it.artistName.trim().lowercase()}" to it.id } - val existingArtistImageUrls = allExistingArtists.associate { it.id to it.imageUrl } val delimiters = userPreferencesRepository.artistDelimitersFlow.first() val wordDelims = userPreferencesRepository.artistWordDelimitersFlow.first() @@ -1389,7 +1424,7 @@ constructor( // expensive part of the loop (file.exists() + tag parsing per song), so it's the // only part run in parallel; the artist/album dedup maps that follow are mutated // from a single thread per chunk and stay sequential, same as before. - val metadataReadSemaphore = Semaphore(TELEGRAM_METADATA_READ_CONCURRENCY) + val metadataReadSemaphore = Semaphore(telegramMetadataReadConcurrency()) telegramSongs.chunked(TELEGRAM_SYNC_CHUNK_SIZE).forEach { chunk -> // Per-chunk collections — allocated, used, then released each iteration. From 7435d62b844554f0d7468d8729740fe4160a0c3b Mon Sep 17 00:00:00 2001 From: Amonoman Date: Sat, 27 Jun 2026 19:04:18 +0200 Subject: [PATCH 08/10] perf(sync): eliminate redundant per-row cursor reads and hash calls Three small, repeated costs found during a broader pass over SyncWorker.kt, each scaling with library size: - fetchMusicFromMediaStore: cursor.getString(dataCol) and cursor.getInt(trackCol) were each read twice per row (once for the directory filter / disc-vs-track split, once for the RawSongData fields). Cursor getters cross into native CursorWindow code, so this was a real, avoidable JNI call per row. Read once, reuse. - fetchMediaStoreIds: allocated two File objects per row (File(data).parent, then File(parentPath).absolutePath) to arrive back at a string MediaStore's DATA column already provides as an absolute path. Replaced with the same string-slicing approach already used for the identical check in fetchMusicFromMediaStore. - syncNeteaseData: toUnifiedNeteaseArtistId(name), a lowercase() + hashCode() computation, ran up to three times for the same artist name within the same song iteration (primary artist, the cross-ref loop, and the JSON artist refs). Computed once per song into artistIds and reused across all three. No behavior change in any of the three; all are pure redundant-work elimination, verified by re-deriving the same values via the same inputs. --- .../pixelplay/data/worker/SyncWorker.kt | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt index 72dae1f8e..8c9f3e847 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt @@ -893,8 +893,8 @@ constructor( } else -1 while (cursor.moveToNext()) { + val data = cursor.getString(dataCol) ?: continue try { - val data = cursor.getString(dataCol) ?: continue val lastSlash = data.lastIndexOf('/') if (lastSlash > 0) { val normalizedParent = data.substring(0, lastSlash) @@ -906,12 +906,20 @@ constructor( // Proceed on error } + // Cursor getters cross into native CursorWindow code; read each column + // once per row and reuse the value, rather than calling the same + // getter twice (trackCol previously fed both trackNumber and + // discNumber via two separate cursor.getInt(trackCol) calls, and + // dataCol was read once above for the directory filter and again + // here for filePath). + val rawTrack = cursor.getInt(trackCol) + rawDataList.add( RawSongData( id = cursor.getLong(idCol), albumId = cursor.getLong(albumIdCol), artistId = cursor.getLong(artistIdCol), - filePath = cursor.getString(dataCol) ?: "", + filePath = data, mimeType = if (mimeTypeCol >= 0) cursor.getString(mimeTypeCol) else null, title = cursor.getString(titleCol) @@ -932,8 +940,8 @@ constructor( ?.takeIf { it.isNotBlank() } else null, duration = cursor.getLong(durationCol), - trackNumber = cursor.getInt(trackCol) % 1000, - discNumber = (cursor.getInt(trackCol) / 1000).takeIf { it > 0 }, + trackNumber = rawTrack % 1000, + discNumber = (rawTrack / 1000).takeIf { it > 0 }, year = cursor.getInt(yearCol), dateModified = cursor.getLong(dateModifiedCol), genre = if (genreCol >= 0) cursor.getString(genreCol) else null @@ -1187,9 +1195,18 @@ constructor( while (cursor.moveToNext()) { val data = cursor.getString(dataCol) if (data != null) { - val parentPath = File(data).parent - if (parentPath != null && directoryResolver.isBlocked(File(parentPath).absolutePath)) { - continue + // MediaStore's DATA column is always an absolute path, so the parent + // path derived from it is already absolute — no need to re-wrap it in a + // second File just to call .absolutePath, which was allocating two File + // objects per row (one for .parent, one for .absolutePath) purely to + // arrive back at the same string. String slicing matches the leaner + // approach already used for this same check in fetchMusicFromMediaStore. + val lastSlash = data.lastIndexOf('/') + if (lastSlash > 0) { + val parentPath = data.substring(0, lastSlash) + if (directoryResolver.isBlocked(parentPath)) { + continue + } } } ids.add(cursor.getLong(idCol)) @@ -1701,11 +1718,16 @@ constructor( neteaseSongs.forEach { nSong -> val songId = toUnifiedNeteaseSongId(nSong.neteaseId) val artistNames = parseNeteaseArtistNames(nSong.artist) + // Computed once per song and reused below for the cross-ref loop, the + // primary artist ID, and the JSON artist refs — previously this called + // toUnifiedNeteaseArtistId (a lowercase()+hashCode() per call) up to three + // separate times for the same artist name within the same song iteration. + val artistIds = artistNames.map { toUnifiedNeteaseArtistId(it) } val primaryArtistName = artistNames.firstOrNull() ?: "Unknown Artist" - val primaryArtistId = toUnifiedNeteaseArtistId(primaryArtistName) + val primaryArtistId = artistIds.firstOrNull() ?: toUnifiedNeteaseArtistId(primaryArtistName) artistNames.forEachIndexed { index, artistName -> - val artistId = toUnifiedNeteaseArtistId(artistName) + val artistId = artistIds[index] artistsToInsert.putIfAbsent( artistId, ArtistEntity( @@ -1743,7 +1765,7 @@ constructor( // Build artists JSON val neteaseArtistRefs = artistNames.mapIndexed { idx, name -> ArtistRef( - id = toUnifiedNeteaseArtistId(name), + id = artistIds[idx], name = name, isPrimary = idx == 0 ) From 5b837665a0703e869703574b5097983f0b922a81 Mon Sep 17 00:00:00 2001 From: Amonoman Date: Sat, 27 Jun 2026 19:23:23 +0200 Subject: [PATCH 09/10] fix(telegram): revert SetTdlibParameters to its actual flat constructor A previous commit replaced this call with TdApi.TdlibParameters() field assignment, based on TDLib's official docs/example. That approach does not compile against this exact tdlibx:td:1.8.56 build: the real compiler error confirms TdApi.SetTdlibParameters here has only a no-arg constructor and a flat 14-argument one, no constructor taking a TdlibParameters object exists in this build at all. Compared the original (pre-revert) flat-constructor call against the compiler-confirmed real signature, argument type and position both match exactly, so the original code was correct for this build. The "argument order was guessed" comment that motivated the previous change was based on uncertainty that, in retrospect, wasn't actually a problem here. Reverted to the flat constructor, now with the real, compiler- verified signature documented in a comment instead of assumed from docs for a different TDLib binding, and named local variables in place of bare positional literals for review clarity. --- .../data/telegram/TelegramClientManager.kt | 71 ++++++++++++------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramClientManager.kt b/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramClientManager.kt index b3c356b92..5851014d2 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramClientManager.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/telegram/TelegramClientManager.kt @@ -90,32 +90,53 @@ class TelegramClientManager @Inject constructor( val databaseDirectory = File(context.filesDir, "tdlib").absolutePath val filesDirectory = File(context.filesDir, "tdlib_files").absolutePath - // Field assignment (not the flat positional constructor) on purpose: the - // flat TdApi.SetTdlibParameters(...) constructor's argument order isn't - // documented for this tdlibx build and was previously guessed, which risks - // silently shifting values into the wrong field (e.g. apiId landing where - // systemLanguageCode is expected) with no compile-time or runtime error. - // Named field assignment, as used in TDLib's own official Java example, - // can't misalign like that. - val parameters = TdApi.TdlibParameters().apply { - useTestDc = false - this.databaseDirectory = databaseDirectory - this.filesDirectory = filesDirectory - useFileDatabase = true - useChatInfoDatabase = true - useMessageDatabase = true - useSecretChats = false - apiId = BuildConfig.TELEGRAM_API_ID - apiHash = BuildConfig.TELEGRAM_API_HASH - systemLanguageCode = "en" - deviceModel = "PixelPlayer Instance" - systemVersion = android.os.Build.VERSION.RELEASE - applicationVersion = BuildConfig.VERSION_NAME - enableStorageOptimizer = true - ignoreFileNames = false - } + // Flat positional constructor, confirmed against this exact tdlibx 1.8.56 + // build's actual compiled signature (via a real compile error, not assumed + // from docs — see commit message). This build's TdApi.SetTdlibParameters has + // no constructor that takes a TdlibParameters object, only this flat one and + // a no-arg one, contrary to the official TDLib docs/example for a different + // binding. Confirmed signature: + // (useTestDc: Boolean, databaseDirectory: String, filesDirectory: String, + // databaseEncryptionKey: ByteArray?, useFileDatabase: Boolean, + // useChatInfoDatabase: Boolean, useMessageDatabase: Boolean, + // useSecretChats: Boolean, apiId: Int, apiHash: String, + // systemLanguageCode: String, deviceModel: String, systemVersion: String, + // applicationVersion: String) + // Named locals here instead of bare positional literals so a misordering is + // easier to spot on review, without reintroducing the ambiguity of guessing + // at field names that don't apply to this build's API shape. + val useTestDc = false + val databaseEncryptionKey: ByteArray? = null + val useFileDatabase = true + val useChatInfoDatabase = true + val useMessageDatabase = true + val useSecretChats = false + val apiId = BuildConfig.TELEGRAM_API_ID + val apiHash = BuildConfig.TELEGRAM_API_HASH + val systemLanguageCode = "en" + val deviceModel = "PixelPlayer Instance" + val systemVersion = android.os.Build.VERSION.RELEASE + val applicationVersion = BuildConfig.VERSION_NAME - client?.send(TdApi.SetTdlibParameters(parameters), defaultHandler) + client?.send( + TdApi.SetTdlibParameters( + useTestDc, + databaseDirectory, + filesDirectory, + databaseEncryptionKey, + useFileDatabase, + useChatInfoDatabase, + useMessageDatabase, + useSecretChats, + apiId, + apiHash, + systemLanguageCode, + deviceModel, + systemVersion, + applicationVersion + ), + defaultHandler + ) } is TdApi.AuthorizationStateWaitPhoneNumber -> { // UI should prompt for phone number From c7e60d281e52a9d7f82d4b6f707e049322712634 Mon Sep 17 00:00:00 2001 From: Amonoman Date: Sat, 27 Jun 2026 19:24:39 +0200 Subject: [PATCH 10/10] perf(sync): stop double-querying the DB for changed MediaStore songs fetchMusicFromMediaStore's incremental path queried musicDao.getSongsByIdsListSimple() twice for every song that turned out to have changed: once in Phase 2 to fetch its existing entity and decide whether it changed, and once again in Phase 3 for the exact same IDs to get that same entity for the user-edit-preserving merge step. Phase 2 already has the existing entity in hand right before discarding it. Carry it forward as part of songsToProcess (now List> instead of List) instead of re-fetching it in Phase 3. Removes one DB round-trip per batch of changed songs; impact scales with how many songs actually changed in a given sync (near-zero for a quiet incremental sync, real for a deep/force-metadata rescan or any sync touching many files at once). No behavior change: same merge logic, same data, just sourced from the pair instead of a second query. --- .../pixelplay/data/worker/SyncWorker.kt | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt index 8c9f3e847..c7897878e 100644 --- a/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt +++ b/app/src/main/java/com/theveloper/pixelplay/data/worker/SyncWorker.kt @@ -960,12 +960,17 @@ constructor( val artistDelimiters = userPreferencesRepository.artistDelimitersFlow.first() val artistWordDelimiters = userPreferencesRepository.artistWordDelimitersFlow.first() val rawSongCount = rawDataList.size - val songsToProcess = if (isRebuild) { - rawDataList.toList() + // Pairs of (raw MediaStore data, existing local entity if any). Phase 2 already has + // to fetch each song's existing entity to decide whether it changed; carrying that + // entity forward here means Phase 3 doesn't have to issue the same + // getSongsByIdsListSimple query a second time for the same IDs just to get the same + // data again for the merge step below. + val songsToProcess: List> = if (isRebuild) { + rawDataList.map { it to null } } else { // Find existing data for these songs to avoid unnecessary reprocessing // and to preserve user edits. - val results = mutableListOf() + val results = mutableListOf>() rawDataList.chunked(500).forEach { batch -> val ids = batch.map { it.id } @@ -974,7 +979,7 @@ constructor( batch.forEach { raw -> val existing = existingMap[raw.id] if (!isSongUnchanged(raw, existing)) { - results.add(raw) + results.add(raw to existing) } } } @@ -1001,17 +1006,15 @@ constructor( val concurrencyLimit = 4 // Reduced concurrency to save memory val semaphore = Semaphore(concurrencyLimit) - // Process batches sequentially so each batch's existingMap can be GC'd before the next - // batch is loaded. The semaphore still limits concurrency within each batch. + // Process batches sequentially to keep peak memory bounded, same as before. The + // per-batch existingMap query from the original version is gone — localSong now comes + // from the pair carried over from Phase 2 instead of a second DB round-trip. val songs = mutableListOf() for (batch in songsToProcess.chunked(200)) { - val ids = batch.map { it.id } - val existingMap = if (isRebuild) emptyMap() else musicDao.getSongsByIdsListSimple(ids).associateBy { it.id } val batchResults = coroutineScope { - batch.map { raw -> + batch.map { (raw, localSong) -> async { semaphore.withPermit { - val localSong = existingMap[raw.id] val mediaStoreSong = processSongData( raw = raw,