diff --git a/build.gradle.kts b/build.gradle.kts index 0df180429..e6ff76c0d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -211,6 +211,7 @@ tasks.withType { "com/epam/brn/exception/**", "com/epam/brn/Application*", "com/epam/brn/service/azure/tts/config/**", + "com/epam/brn/service/yandex/tts/config/**", "com/epam/brn/webclient/customizer/**", "com/epam/brn/webclient/model/**", ) @@ -251,6 +252,7 @@ sonarqube { "**/com/epam/brn/service/load/FirebaseUserDataLoader*," + "**/com/epam/brn/service/azure/tts/AzureVoiceLoader*," + "**/com/epam/brn/service/azure/tts/config/**," + + "**/com/epam/brn/service/yandex/tts/config/**," + "**/com/epam/brn/webclient/customizer/**," + "**/com/epam/brn/webclient/model/**", ) diff --git a/frontend/make-words.js b/frontend/make-words.js index 6c9fb8f29..b7c380417 100644 --- a/frontend/make-words.js +++ b/frontend/make-words.js @@ -1,8 +1,7 @@ /* eslint-disable @typescript-eslint/no-var-requires */ /* global require */ const fs = require('fs'); -const request = require('request'); -const qs = require('querystring'); +const https = require('https'); const words = `бам,сам,дам,зал,бум`; const token = ''; @@ -12,31 +11,77 @@ const folderId = ''; // install ffmpeg // yc iam create-token -// https://cloud.yandex.ru/docs/speechkit/tts/request +// https://cloud.yandex.ru/docs/speechkit/tts/v3/api-ref/grpc/ -const yandex_tts_url = - 'https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize?'; +const yandex_tts_url = '/tts/v3/utteranceSynthesis'; function YandexTTS(options, callback) { - var params = {}; - - params['text'] = options['text']; - params['folderId'] = folderId; - params['format'] = 'oggopus'; - params['lang'] = 'ru-RU'; - params['voice'] = 'filipp'; - params['emotion'] = 'good'; - - var full_url = yandex_tts_url + qs.stringify(params); + const body = JSON.stringify({ + text: options['text'], + outputAudioSpec: { + containerAudio: { + containerAudioType: 'OGG_OPUS', + }, + }, + hints: [ + { voice: 'filipp' }, + { role: 'neutral' }, + ], + loudnessNormalizationType: 'LUFS', + }); - var file = fs.createWriteStream(options['file']); - file.on('finish', callback); - request({ - url: full_url, + const reqOptions = { + hostname: 'tts.api.cloud.yandex.net', + port: 443, + path: yandex_tts_url, + method: 'POST', headers: { - Authorization: `Bearer ${token}`, + 'Authorization': `Bearer ${token}`, + 'x-folder-id': folderId, + 'Content-Type': 'application/json', + 'Content-Length': Buffer.byteLength(body), }, - }).pipe(file); + }; + + const file = fs.createWriteStream(options['file']); + const req = https.request(reqOptions, (res) => { + let responseData = ''; + res.on('data', (chunk) => { + responseData += chunk; + }); + res.on('end', () => { + if (res.statusCode !== 200) { + console.error(`HTTP ${res.statusCode}: ${responseData}`); + file.end(); + callback(); + return; + } + + const lines = responseData.split('\n').filter((line) => line.trim()); + for (const line of lines) { + try { + const parsed = JSON.parse(line); + if (parsed.result && parsed.result.audioChunk && parsed.result.audioChunk.data) { + const audioBuffer = Buffer.from(parsed.result.audioChunk.data, 'base64'); + file.write(audioBuffer); + } + } catch (e) { + // Ignore non-JSON transport lines. + } + } + + file.end(callback); + }); + }); + + req.on('error', (e) => { + console.error(`Request error: ${e.message}`); + file.end(); + callback(); + }); + + req.write(body); + req.end(); } const execSync = require('child_process').execSync; @@ -72,8 +117,3 @@ async function makeFiles() { } makeFiles(); -// stack.forEach((word)=>{ -// let file = word.trim(); -// execSync(`gtts-cli "${word}." -lang_check --lang ru --output ${file}.mp3`); -// execSync(`ffmpeg-normalize ${file}.mp3 --normalization-type peak --target-level 0 -c:a libmp3lame -b:a 320k -o ${file}_n.mp3`) -// }); diff --git a/src/main/kotlin/com/epam/brn/controller/YandexAudioSettingsController.kt b/src/main/kotlin/com/epam/brn/controller/YandexAudioSettingsController.kt new file mode 100644 index 000000000..507f79d09 --- /dev/null +++ b/src/main/kotlin/com/epam/brn/controller/YandexAudioSettingsController.kt @@ -0,0 +1,66 @@ +package com.epam.brn.controller + +import com.epam.brn.dto.request.audio.AudioVoiceOverrideRequest +import com.epam.brn.dto.response.BrnResponse +import com.epam.brn.dto.response.audio.AudioVoiceOptionResponse +import com.epam.brn.dto.response.audio.AudioVoiceSettingsResponse +import com.epam.brn.enums.BrnRole +import com.epam.brn.service.WordsService +import io.swagger.v3.oas.annotations.Operation +import io.swagger.v3.oas.annotations.tags.Tag +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.http.ResponseEntity +import org.springframework.web.bind.annotation.GetMapping +import org.springframework.web.bind.annotation.PostMapping +import org.springframework.web.bind.annotation.RequestBody +import org.springframework.web.bind.annotation.RequestMapping +import org.springframework.web.bind.annotation.RequestParam +import org.springframework.web.bind.annotation.RestController +import javax.annotation.security.RolesAllowed + +@RestController +@RequestMapping("/audio") +@Tag(name = "Audio", description = "Contains actions for getting audio file for words") +@ConditionalOnProperty(name = ["default.tts.provider"], havingValue = "yandex") +@RolesAllowed(BrnRole.USER) +class YandexAudioSettingsController( + private val wordsService: WordsService, +) { + @GetMapping("/voices") + @Operation(summary = "Get available Yandex voices and the current runtime default for a locale") + fun getVoices( + @RequestParam(required = false, defaultValue = "ru-ru") locale: String, + ): ResponseEntity> = ResponseEntity + .ok() + .body(BrnResponse(data = buildVoiceSettingsResponse(locale))) + + @PostMapping("/default-voice") + @Operation(summary = "Set the runtime default Yandex voice for a locale until the server restarts") + @RolesAllowed(BrnRole.ADMIN) + fun setDefaultVoice( + @RequestBody request: AudioVoiceOverrideRequest, + ): ResponseEntity> { + wordsService.setDefaultVoiceForLocale(request.locale, request.voice) + return ResponseEntity.ok().body(BrnResponse(data = buildVoiceSettingsResponse(request.locale))) + } + + private fun buildVoiceSettingsResponse(locale: String): AudioVoiceSettingsResponse { + val defaultVoice = wordsService.getDefaultVoiceForLocale(locale) + val voiceOptions = + wordsService.getAvailableVoicesForLocale(locale).map { voice -> + AudioVoiceOptionResponse( + name = voice.name, + apiValue = voice.apiValue, + gender = voice.gender.name.lowercase(), + roles = voice.supportedRoles.map { it.apiValue }, + isDefault = voice.name == defaultVoice, + ) + } + + return AudioVoiceSettingsResponse( + locale = locale.lowercase(), + defaultVoice = defaultVoice, + voices = voiceOptions, + ) + } +} diff --git a/src/main/kotlin/com/epam/brn/dto/request/audio/AudioVoiceOverrideRequest.kt b/src/main/kotlin/com/epam/brn/dto/request/audio/AudioVoiceOverrideRequest.kt new file mode 100644 index 000000000..69b9b3067 --- /dev/null +++ b/src/main/kotlin/com/epam/brn/dto/request/audio/AudioVoiceOverrideRequest.kt @@ -0,0 +1,6 @@ +package com.epam.brn.dto.request.audio + +data class AudioVoiceOverrideRequest( + val locale: String, + val voice: String, +) diff --git a/src/main/kotlin/com/epam/brn/dto/response/audio/AudioVoiceSettingsResponse.kt b/src/main/kotlin/com/epam/brn/dto/response/audio/AudioVoiceSettingsResponse.kt new file mode 100644 index 000000000..c6b52f060 --- /dev/null +++ b/src/main/kotlin/com/epam/brn/dto/response/audio/AudioVoiceSettingsResponse.kt @@ -0,0 +1,15 @@ +package com.epam.brn.dto.response.audio + +data class AudioVoiceSettingsResponse( + val locale: String, + val defaultVoice: String, + val voices: List, +) + +data class AudioVoiceOptionResponse( + val name: String, + val apiValue: String, + val gender: String, + val roles: List, + val isDefault: Boolean, +) diff --git a/src/main/kotlin/com/epam/brn/dto/yandex/tts/YandexTtsRequest.kt b/src/main/kotlin/com/epam/brn/dto/yandex/tts/YandexTtsRequest.kt new file mode 100644 index 000000000..119c9b15c --- /dev/null +++ b/src/main/kotlin/com/epam/brn/dto/yandex/tts/YandexTtsRequest.kt @@ -0,0 +1,25 @@ +package com.epam.brn.dto.yandex.tts + +import com.fasterxml.jackson.annotation.JsonInclude + +data class YandexTtsRequest( + val text: String, + val outputAudioSpec: OutputAudioSpec, + val hints: List, + val loudnessNormalizationType: String = "LUFS", +) + +data class OutputAudioSpec( + val containerAudio: ContainerAudio, +) + +data class ContainerAudio( + val containerAudioType: String = "OGG_OPUS", +) + +@JsonInclude(JsonInclude.Include.NON_NULL) +data class Hint( + val voice: String? = null, + val speed: String? = null, + val role: String? = null, +) diff --git a/src/main/kotlin/com/epam/brn/dto/yandex/tts/YandexTtsResponse.kt b/src/main/kotlin/com/epam/brn/dto/yandex/tts/YandexTtsResponse.kt new file mode 100644 index 000000000..3ad087de0 --- /dev/null +++ b/src/main/kotlin/com/epam/brn/dto/yandex/tts/YandexTtsResponse.kt @@ -0,0 +1,18 @@ +package com.epam.brn.dto.yandex.tts + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties + +@JsonIgnoreProperties(ignoreUnknown = true) +data class YandexTtsResponse( + val result: YandexTtsResult? = null, +) + +@JsonIgnoreProperties(ignoreUnknown = true) +data class YandexTtsResult( + val audioChunk: AudioChunk? = null, +) + +@JsonIgnoreProperties(ignoreUnknown = true) +data class AudioChunk( + val data: String? = null, +) diff --git a/src/main/kotlin/com/epam/brn/enums/BrnLocale.kt b/src/main/kotlin/com/epam/brn/enums/BrnLocale.kt index 1316fe86b..427bbb569 100644 --- a/src/main/kotlin/com/epam/brn/enums/BrnLocale.kt +++ b/src/main/kotlin/com/epam/brn/enums/BrnLocale.kt @@ -5,5 +5,4 @@ enum class BrnLocale( ) { RU("ru-ru"), EN("en-us"), - TR("tr-tr"), } diff --git a/src/main/kotlin/com/epam/brn/enums/Voice.kt b/src/main/kotlin/com/epam/brn/enums/Voice.kt index 41606c3e4..c9340c426 100644 --- a/src/main/kotlin/com/epam/brn/enums/Voice.kt +++ b/src/main/kotlin/com/epam/brn/enums/Voice.kt @@ -1,20 +1,61 @@ package com.epam.brn.enums -// from docs: https://cloud.yandex.ru/docs/speechkit/tts/voices -enum class Voice { - FILIPP, // old Russian man voice - ALEXANDER, // new Russian man voice: neutral, good - KIRILL, // new Russian man voice: neutral, strict, good - - OKSANA, // old Russian woman voice - MARINA, // new Russian woman voice - LERA, // new Russian woman voice - DASHA, // new Russian woman voice - - NICK, // old English man voice - JOHN, // new English man voice - ALYSS, // old English woman voice - - ERKANYAVAS, // old Turkish man voice - SILAERKAN, // old Turkish voice +// Based on the current Yandex SpeechKit TTS voices docs. +enum class Voice( + val locale: String, + val gender: VoiceGender, + vararg supportedRoles: VoiceRole, +) { + FILIPP(BrnLocale.RU.locale, VoiceGender.MALE, VoiceRole.NEUTRAL), + ERMIL(BrnLocale.RU.locale, VoiceGender.MALE, VoiceRole.NEUTRAL, VoiceRole.GOOD), + ZAHAR(BrnLocale.RU.locale, VoiceGender.MALE), + ALEXANDER(BrnLocale.RU.locale, VoiceGender.MALE, VoiceRole.NEUTRAL, VoiceRole.GOOD), + KIRILL(BrnLocale.RU.locale, VoiceGender.MALE, VoiceRole.NEUTRAL, VoiceRole.STRICT, VoiceRole.GOOD), + + ALENA(BrnLocale.RU.locale, VoiceGender.FEMALE), + OKSANA(BrnLocale.RU.locale, VoiceGender.FEMALE), + MARINA(BrnLocale.RU.locale, VoiceGender.FEMALE, VoiceRole.FRIENDLY), + DASHA(BrnLocale.RU.locale, VoiceGender.FEMALE), + LERA(BrnLocale.RU.locale, VoiceGender.FEMALE), + JULIA(BrnLocale.RU.locale, VoiceGender.FEMALE), + MASHA(BrnLocale.RU.locale, VoiceGender.FEMALE), + MADI_RU(BrnLocale.RU.locale, VoiceGender.FEMALE), + OMAZH(BrnLocale.RU.locale, VoiceGender.FEMALE), + + JOHN(BrnLocale.EN.locale, VoiceGender.MALE), + NICK(BrnLocale.EN.locale, VoiceGender.MALE), + JANE(BrnLocale.EN.locale, VoiceGender.FEMALE), + ALYSS(BrnLocale.EN.locale, VoiceGender.FEMALE), + ; + + val supportedRoles: List = supportedRoles.toList() + + val apiValue: String + get() = name.lowercase() + + companion object { + fun getVoicesForLocale(locale: String): List = values().filter { it.locale == locale.lowercase() } + + fun findByValue(value: String): Voice? = values().firstOrNull { it.name.equals(value, ignoreCase = true) } + } +} + +enum class VoiceGender { + MALE, + FEMALE, +} + +enum class VoiceRole { + NEUTRAL, + GOOD, + FRIENDLY, + STRICT, + ; + + val apiValue: String + get() = name.lowercase() + + companion object { + fun findByValue(value: String): VoiceRole? = values().firstOrNull { it.name.equals(value, ignoreCase = true) } + } } diff --git a/src/main/kotlin/com/epam/brn/service/WordsService.kt b/src/main/kotlin/com/epam/brn/service/WordsService.kt index ce83c8070..edfb71596 100644 --- a/src/main/kotlin/com/epam/brn/service/WordsService.kt +++ b/src/main/kotlin/com/epam/brn/service/WordsService.kt @@ -1,41 +1,75 @@ package com.epam.brn.service import com.epam.brn.dto.AudioFileMetaData -import com.epam.brn.enums.BrnLocale import com.epam.brn.enums.Voice +import com.epam.brn.enums.VoiceGender import org.apache.commons.codec.digest.DigestUtils import org.springframework.beans.factory.annotation.Value import org.springframework.stereotype.Service +import java.util.concurrent.ConcurrentHashMap @Service class WordsService { @Value(value = "\${yandex.folderForFiles}") private lateinit var localFolderForFiles: String - private val mapYandexLocaleManVoice = - mapOf( - BrnLocale.RU.locale to listOf(Voice.FILIPP, Voice.ALEXANDER, Voice.KIRILL), - BrnLocale.EN.locale to listOf(Voice.JOHN, Voice.NICK), - BrnLocale.TR.locale to listOf(Voice.ERKANYAVAS), - ) + private val runtimeDefaultVoices = ConcurrentHashMap() - private val mapYandexLocaleWomanVoice = - mapOf( - BrnLocale.RU.locale to listOf(Voice.MARINA, Voice.DASHA, Voice.OKSANA, Voice.LERA), - BrnLocale.EN.locale to listOf(Voice.JOHN, Voice.ALYSS), - BrnLocale.TR.locale to listOf(Voice.SILAERKAN), - ) + fun getDefaultManVoiceForLocale(locale: String): String = getVoiceEnumsForLocale(locale, VoiceGender.MALE).first().name - fun getDefaultManVoiceForLocale(locale: String): String = mapYandexLocaleManVoice[locale]!!.first().name + fun getDefaultWomanVoiceForLocale(locale: String): String = getVoiceEnumsForLocale(locale, VoiceGender.FEMALE) + .firstOrNull() + ?.name + ?: getDefaultManVoiceForLocale(locale) - fun getDefaultWomanVoiceForLocale(locale: String): String = mapYandexLocaleWomanVoice[locale]!!.first().name + fun getDefaultVoiceForLocale(locale: String): String = getDefaultVoiceEnumForLocale(locale).name - fun getVoicesForLocale(locale: String): List = - mapYandexLocaleManVoice[locale]!!.map { it.name }.plus(mapYandexLocaleWomanVoice[locale]!!.map { it.name }) + fun getVoicesForLocale(locale: String): List = getVoiceEnumsForLocale(locale).map { it.name } + + fun getAvailableVoicesForLocale(locale: String): List = getVoiceEnumsForLocale(locale) + + fun getVoiceForLocale( + locale: String, + voice: String, + ): Voice? = Voice.findByValue(voice)?.takeIf { it.locale == locale.lowercase() } + + fun setDefaultVoiceForLocale( + locale: String, + voice: String, + ): Voice { + val localeVoices = getVoiceEnumsForLocale(locale) + if (localeVoices.isEmpty()) + throw IllegalArgumentException("Locale $locale does not support yet for generation audio files.") + + val selectedVoice = + getVoiceForLocale(locale, voice) + ?: throw IllegalArgumentException( + "Locale $locale does not support voice $voice, only ${localeVoices.map { it.name }}.", + ) + + runtimeDefaultVoices[locale.lowercase()] = selectedVoice + return selectedVoice + } fun getLocalFilePathForWord(audioFileMetaData: AudioFileMetaData) = "$localFolderForFiles${getSubFilePathForWord(audioFileMetaData)}" fun getSubFilePathForWord(meta: AudioFileMetaData) = "${getSubPathForWord(meta)}/${DigestUtils.md5Hex(meta.text)}.ogg" fun getSubPathForWord(meta: AudioFileMetaData) = "/audio/${meta.locale}/${meta.voice.lowercase()}/${meta.speedFloat}" + + private fun getVoiceEnumsForLocale( + locale: String, + gender: VoiceGender? = null, + ): List = Voice + .getVoicesForLocale(locale) + .filter { gender == null || it.gender == gender } + + private fun getDefaultVoiceEnumForLocale(locale: String): Voice { + val normalizedLocale = locale.lowercase() + + return runtimeDefaultVoices[normalizedLocale] + ?: getVoiceEnumsForLocale(normalizedLocale, VoiceGender.MALE).firstOrNull() + ?: getVoiceEnumsForLocale(normalizedLocale).firstOrNull() + ?: throw IllegalArgumentException("Locale $locale does not support yet for generation audio files.") + } } diff --git a/src/main/kotlin/com/epam/brn/service/YandexSpeechKitService.kt b/src/main/kotlin/com/epam/brn/service/YandexSpeechKitService.kt index ff6d5e23c..26cef699b 100644 --- a/src/main/kotlin/com/epam/brn/service/YandexSpeechKitService.kt +++ b/src/main/kotlin/com/epam/brn/service/YandexSpeechKitService.kt @@ -1,131 +1,169 @@ package com.epam.brn.service +import com.epam.brn.config.ExcludeFromJacocoGeneratedReport import com.epam.brn.dto.AudioFileMetaData -import com.epam.brn.enums.BrnLocale +import com.epam.brn.dto.YandexIamTokenDto +import com.epam.brn.dto.yandex.tts.ContainerAudio +import com.epam.brn.dto.yandex.tts.Hint +import com.epam.brn.dto.yandex.tts.OutputAudioSpec +import com.epam.brn.dto.yandex.tts.YandexTtsRequest +import com.epam.brn.dto.yandex.tts.YandexTtsResponse +import com.epam.brn.enums.Voice +import com.epam.brn.enums.VoiceRole import com.epam.brn.exception.YandexServiceException -import org.apache.http.NameValuePair -import org.apache.http.client.methods.CloseableHttpResponse -import org.apache.http.client.methods.HttpPost -import org.apache.http.client.utils.URIBuilder -import org.apache.http.impl.client.HttpClientBuilder -import org.apache.http.message.BasicNameValuePair -import org.apache.http.util.EntityUtils +import com.epam.brn.service.yandex.tts.config.YandexTtsProperties +import com.fasterxml.jackson.databind.DeserializationFeature +import com.fasterxml.jackson.databind.ObjectMapper import org.apache.logging.log4j.kotlin.logger -import org.json.JSONObject -import org.springframework.beans.factory.annotation.Value +import org.springframework.beans.factory.annotation.Qualifier import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty -import org.springframework.context.annotation.Primary import org.springframework.http.HttpStatus import org.springframework.stereotype.Service +import org.springframework.web.reactive.function.client.WebClient +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream import java.io.InputStream import java.time.LocalDateTime import java.time.ZoneOffset +import java.time.ZonedDateTime +import java.util.Base64 @Service -@Primary @ConditionalOnProperty(name = ["default.tts.provider"], havingValue = "yandex") class YandexSpeechKitService( private val wordsService: WordsService, private val timeService: TimeService, + private val yandexTtsProperties: YandexTtsProperties, + @Qualifier("yandexTtsWebClient") private val yandexTtsWebClient: WebClient, + @Qualifier("yandexIamTokenWebClient") private val yandexIamTokenWebClient: WebClient, ) : TextToSpeechService { - @Value("\${yandex.getTokenLink}") - lateinit var uriGetIamToken: String - - @Value("\${yandex.authToken}") - lateinit var authToken: String - - @Value("\${yandex.generationAudioLink}") - lateinit var uriGenerationAudioFile: String - - @Value("\${yandex.folderId}") - lateinit var folderId: String - - @Value("\${yandex.format}") - lateinit var format: String - - @Value("\${yandex.emotions}") - lateinit var emotions: List - var iamToken: String = "" var iamTokenExpiresTime: LocalDateTime = LocalDateTime.now(ZoneOffset.UTC) private val log = logger() + private val objectMapper = + ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) fun getYandexIamTokenForAudioGeneration(): String { if (iamToken.isNotEmpty() && iamTokenExpiresTime.isAfter(timeService.now())) return iamToken - val parameters = ArrayList() - parameters.add(BasicNameValuePair("yandexPassportOauthToken", authToken)) - val uriBuilder = URIBuilder(uriGetIamToken) - uriBuilder.addParameters(parameters) - val postRequest = HttpPost(uriBuilder.build()) - val httpClient = HttpClientBuilder.create().build() - val response: CloseableHttpResponse = httpClient.execute(postRequest) - val statusCode = response.statusLine.statusCode - if (statusCode != HttpStatus.OK.value()) - throw YandexServiceException("Can't get yandex iam token, httpStatus={$statusCode}") - val entity = EntityUtils.toString(response.entity) - val jsonObject = JSONObject(entity) - iamToken = jsonObject.getString("iamToken") - val tokenExpiresTimeValue = jsonObject.getString("expiresAt") - iamTokenExpiresTime = timeService.now() - log.info("Get iam token from yandex cloud successfully, it will expire at $tokenExpiresTimeValue") + + val tokenDto = requestIamToken() + iamToken = tokenDto.iamToken + iamTokenExpiresTime = + ZonedDateTime + .parse(tokenDto.expiresAt) + .withZoneSameInstant(ZoneOffset.UTC) + .toLocalDateTime() + log.info("Got IAM token from Yandex Cloud, expires at ${tokenDto.expiresAt}") return iamToken } - /** - * Generate stream of .ogg audio file from yandex cloud speech kit service - */ + @ExcludeFromJacocoGeneratedReport + private fun requestIamToken(): YandexIamTokenDto = yandexIamTokenWebClient + .post() + .bodyValue(mapOf("yandexPassportOauthToken" to yandexTtsProperties.authToken)) + .retrieve() + .onStatus(HttpStatus::isError) { response -> + response + .bodyToMono(String::class.java) + .defaultIfEmpty("no body") + .map { body -> + YandexServiceException( + "Can't get Yandex IAM token, status=${response.statusCode()}: $body", + ) + } + }.bodyToMono(YandexIamTokenDto::class.java) + .block() ?: throw YandexServiceException("Failed to get IAM token from Yandex Cloud") + fun generateAudioStream(audioFileMetaData: AudioFileMetaData): InputStream { val token = getYandexIamTokenForAudioGeneration() - val emotion = emotions.first() - val parameters = - ArrayList().apply { - add(BasicNameValuePair("folderId", folderId)) - add(BasicNameValuePair("lang", audioFileMetaData.locale)) - add(BasicNameValuePair("format", format)) - add(BasicNameValuePair("voice", audioFileMetaData.voice.lowercase())) - add(BasicNameValuePair("emotion", emotion)) - add(BasicNameValuePair("speed", audioFileMetaData.speedFloat)) - add(BasicNameValuePair("text", audioFileMetaData.text)) + val voice = resolveVoice(audioFileMetaData.locale, audioFileMetaData.voice) + val request = + YandexTtsRequest( + text = audioFileMetaData.text, + outputAudioSpec = OutputAudioSpec(containerAudio = ContainerAudio()), + hints = buildHints(voice, audioFileMetaData.speedFloat), + ) + val responseBody = requestAudioSynthesis(token, request) + val outputStream = ByteArrayOutputStream() + + parseAudioChunks(responseBody).forEach { outputStream.write(it) } + + val audioBytes = outputStream.toByteArray() + if (audioBytes.isEmpty()) + throw YandexServiceException("Yandex Cloud returned empty audio for $audioFileMetaData") + + log.info("Ogg audio file for $audioFileMetaData was successfully generated by Yandex!") + return ByteArrayInputStream(audioBytes) + } + + @ExcludeFromJacocoGeneratedReport + private fun requestAudioSynthesis( + token: String, + request: YandexTtsRequest, + ): String = yandexTtsWebClient + .post() + .header("Authorization", "Bearer $token") + .header("x-folder-id", yandexTtsProperties.folderId) + .bodyValue(request) + .retrieve() + .onStatus(HttpStatus::isError) { response -> + response + .bodyToMono(String::class.java) + .defaultIfEmpty("no body") + .map { body -> YandexServiceException("Yandex TTS error ${response.statusCode()}: $body") } + }.bodyToMono(String::class.java) + .block() ?: throw YandexServiceException("Yandex Cloud did not return audio response") + + internal fun parseAudioChunks(responseBody: String): List { + val chunks = mutableListOf() + + responseBody + .lines() + .filter { it.isNotBlank() } + .forEachIndexed { index, line -> + val parsed = + try { + objectMapper.readValue(line, YandexTtsResponse::class.java) + } catch (e: Exception) { + throw YandexServiceException("Failed to parse Yandex audio chunk at line ${index + 1}.") + } + + val audioData = parsed.result?.audioChunk?.data ?: return@forEachIndexed + + try { + chunks.add(Base64.getDecoder().decode(audioData)) + } catch (e: IllegalArgumentException) { + throw YandexServiceException("Yandex audio chunk at line ${index + 1} has invalid base64 content.") + } } - val uriBuilder = URIBuilder(uriGenerationAudioFile) - uriBuilder.addParameters(parameters) - - val postRequest = HttpPost(uriBuilder.build()) - postRequest.setHeader("Authorization", "Bearer $token") - - val httpClient = HttpClientBuilder.create().build() - val response = httpClient.execute(postRequest) - var count = 10 - var success = false - var statusCode = 0 - while (!success && count != 0) { - count-- - statusCode = response.statusLine.statusCode - if (statusCode != HttpStatus.OK.value()) - log.error("====== for $audioFileMetaData, httpStatus={$statusCode}, count=$count ======") - else - success = true + return chunks + } + + internal fun resolvePreferredRole(voice: Voice): VoiceRole? { + val preferredRole = + yandexTtsProperties.preferredRole + ?.takeIf { it.isNotBlank() } + ?.let(VoiceRole::findByValue) + + return when { + preferredRole != null && voice.supportedRoles.contains(preferredRole) -> preferredRole + preferredRole == null -> voice.supportedRoles.firstOrNull() + else -> null } - if (statusCode != HttpStatus.OK.value()) - throw YandexServiceException( - "Yandex cloud does not provide audio file for $audioFileMetaData, httpStatus={$statusCode}, content=${response.entity.content}", - ) - log.info("Ogg audio file for $audioFileMetaData was successfully generated by yandex!") - val httpEntity = response.entity - return httpEntity.content } fun validateLocaleAndVoice( locale: String, voice: String, ) { - if (!BrnLocale.values().map { it.locale }.contains(locale.lowercase())) - throw IllegalArgumentException("Locale $locale does not support yet for generation audio files.") val localeVoices = wordsService.getVoicesForLocale(locale) - if (voice.isNotEmpty() && !localeVoices.contains(voice)) + if (localeVoices.isEmpty()) + throw IllegalArgumentException("Locale $locale does not support yet for generation audio files.") + if (voice.isNotBlank() && wordsService.getVoiceForLocale(locale, voice) == null) throw IllegalArgumentException("Locale $locale does not support voice $voice, only $localeVoices.") } @@ -135,9 +173,34 @@ class YandexSpeechKitService( AudioFileMetaData( audioFileMetaData.text, audioFileMetaData.locale, - audioFileMetaData.voice.ifEmpty { wordsService.getDefaultWomanVoiceForLocale(audioFileMetaData.locale) }, + audioFileMetaData.voice.ifBlank { wordsService.getDefaultVoiceForLocale(audioFileMetaData.locale) }, audioFileMetaData.speedFloat, ), ) } + + private fun resolveVoice( + locale: String, + voice: String, + ): Voice = if (voice.isBlank()) + wordsService + .getVoiceForLocale(locale, wordsService.getDefaultVoiceForLocale(locale)) + ?: throw IllegalArgumentException("Locale $locale does not support yet for generation audio files.") + else + wordsService + .getVoiceForLocale(locale, voice) + ?: throw IllegalArgumentException("Locale $locale does not support voice $voice.") + + private fun buildHints( + voice: Voice, + speed: String, + ): List { + val hints = mutableListOf(Hint(voice = voice.apiValue)) + + if (speed.isNotBlank()) + hints.add(Hint(speed = speed)) + resolvePreferredRole(voice)?.let { hints.add(Hint(role = it.apiValue)) } + + return hints + } } diff --git a/src/main/kotlin/com/epam/brn/service/impl/UserAnalyticsServiceImpl.kt b/src/main/kotlin/com/epam/brn/service/impl/UserAnalyticsServiceImpl.kt index 2d74d7a02..4544ad1e9 100644 --- a/src/main/kotlin/com/epam/brn/service/impl/UserAnalyticsServiceImpl.kt +++ b/src/main/kotlin/com/epam/brn/service/impl/UserAnalyticsServiceImpl.kt @@ -93,8 +93,8 @@ class UserAnalyticsServiceImpl( if (!listTextExercises.contains(seriesType)) audioFileMetaData.text = text.replace(" ", ", ") val currentUser = userAccountService.getCurrentUser() - // todo use choseVoiceForUser(currentUser) after moving to yandex speechKit v3 - audioFileMetaData.voice = wordsService.getDefaultWomanVoiceForLocale(audioFileMetaData.locale) + audioFileMetaData.voice = + audioFileMetaData.voice.ifBlank { wordsService.getDefaultVoiceForLocale(audioFileMetaData.locale) } setSpeedForUser(currentUser, exerciseId, audioFileMetaData) return audioFileMetaData } diff --git a/src/main/kotlin/com/epam/brn/service/yandex/tts/config/YandexTtsProperties.kt b/src/main/kotlin/com/epam/brn/service/yandex/tts/config/YandexTtsProperties.kt new file mode 100644 index 000000000..7491b1c1d --- /dev/null +++ b/src/main/kotlin/com/epam/brn/service/yandex/tts/config/YandexTtsProperties.kt @@ -0,0 +1,18 @@ +package com.epam.brn.service.yandex.tts.config + +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.boot.context.properties.ConfigurationProperties +import org.springframework.stereotype.Component + +@Component +@ConfigurationProperties("yandex") +@ConditionalOnProperty(name = ["default.tts.provider"], havingValue = "yandex") +class YandexTtsProperties { + lateinit var authToken: String + lateinit var getTokenLink: String + lateinit var generationAudioLink: String + lateinit var folderId: String + var preferredRole: String? = null + var enableWiretap: Boolean = false + lateinit var folderForFiles: String +} diff --git a/src/main/kotlin/com/epam/brn/service/yandex/tts/config/YandexTtsWebClientConfig.kt b/src/main/kotlin/com/epam/brn/service/yandex/tts/config/YandexTtsWebClientConfig.kt new file mode 100644 index 000000000..bb9473415 --- /dev/null +++ b/src/main/kotlin/com/epam/brn/service/yandex/tts/config/YandexTtsWebClientConfig.kt @@ -0,0 +1,48 @@ +package com.epam.brn.service.yandex.tts.config + +import io.netty.handler.logging.LogLevel.DEBUG +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import org.springframework.http.client.reactive.ReactorClientHttpConnector +import org.springframework.web.reactive.function.client.ExchangeStrategies +import org.springframework.web.reactive.function.client.WebClient +import reactor.netty.http.client.HttpClient +import reactor.netty.transport.logging.AdvancedByteBufFormat.TEXTUAL +import java.time.Duration + +@Configuration +@ConditionalOnProperty(name = ["default.tts.provider"], havingValue = "yandex") +class YandexTtsWebClientConfig( + private val yandexTtsProperties: YandexTtsProperties, +) { + @Bean("yandexTtsWebClient") + fun yandexTtsWebClient() = WebClient + .builder() + .baseUrl(yandexTtsProperties.generationAudioLink) + .clientConnector(reactorClientHttpConnector()) + .exchangeStrategies( + ExchangeStrategies + .builder() + .codecs { configurer -> configurer.defaultCodecs().maxInMemorySize(2 * 1024 * 1024) } + .build(), + ).build() + + @Bean("yandexIamTokenWebClient") + fun yandexIamTokenWebClient() = WebClient + .builder() + .baseUrl(yandexTtsProperties.getTokenLink) + .clientConnector(reactorClientHttpConnector()) + .build() + + private fun reactorClientHttpConnector() = ReactorClientHttpConnector(httpClient()) + + private fun httpClient(): HttpClient { + val client = HttpClient.create().responseTimeout(Duration.ofSeconds(15)) + + return if (yandexTtsProperties.enableWiretap) + client.wiretap("reactor.netty.client.HttpClient", DEBUG, TEXTUAL) + else + client + } +} diff --git a/src/main/kotlin/com/epam/brn/upload/CsvUploadService.kt b/src/main/kotlin/com/epam/brn/upload/CsvUploadService.kt index 258adff8c..09dd94184 100644 --- a/src/main/kotlin/com/epam/brn/upload/CsvUploadService.kt +++ b/src/main/kotlin/com/epam/brn/upload/CsvUploadService.kt @@ -22,7 +22,7 @@ class CsvUploadService( private val recordProcessors: List>, private val seriesRepository: SeriesRepository, ) { - val localeSuffixMap = mapOf("ru" to BrnLocale.RU, "en" to BrnLocale.EN, "tr" to BrnLocale.TR) + val localeSuffixMap = mapOf("ru" to BrnLocale.RU, "en" to BrnLocale.EN) companion object { private val csvContentTypes = diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 361bbfe76..803849baa 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -59,11 +59,10 @@ lopotkoFileName=lopotko.txt yandex.authToken=${YANDEX_AUTH_TOKEN:askLenasForTesting} yandex.getTokenLink=https://iam.api.cloud.yandex.net/iam/v1/tokens -yandex.generationAudioLink=https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize +yandex.generationAudioLink=https://tts.api.cloud.yandex.net:443/tts/v3/utteranceSynthesis yandex.folderId=${YANDEX_FOLDER_ID:b1g0m877l9r22ngujisu} -yandex.format=oggopus -#yandex.emotion=good -yandex.emotions=friendly +yandex.preferredRole=neutral +yandex.enableWiretap=false yandex.folderForFiles=frontend/public brn.picture.file.default.path=pictures/%s.jpg diff --git a/src/test/kotlin/com/epam/brn/controller/YandexAudioSettingsControllerTest.kt b/src/test/kotlin/com/epam/brn/controller/YandexAudioSettingsControllerTest.kt new file mode 100644 index 000000000..954fe101b --- /dev/null +++ b/src/test/kotlin/com/epam/brn/controller/YandexAudioSettingsControllerTest.kt @@ -0,0 +1,56 @@ +package com.epam.brn.controller + +import com.epam.brn.dto.request.audio.AudioVoiceOverrideRequest +import com.epam.brn.enums.Voice +import com.epam.brn.service.WordsService +import io.kotest.matchers.shouldBe +import io.mockk.every +import io.mockk.impl.annotations.InjectMockKs +import io.mockk.impl.annotations.MockK +import io.mockk.junit5.MockKExtension +import io.mockk.verify +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.extension.ExtendWith + +@ExtendWith(MockKExtension::class) +internal class YandexAudioSettingsControllerTest { + @InjectMockKs + lateinit var controller: YandexAudioSettingsController + + @MockK + lateinit var wordsService: WordsService + + @Test + fun `should return available voices with current default`() { + every { wordsService.getDefaultVoiceForLocale("ru-ru") } returns Voice.FILIPP.name + every { wordsService.getAvailableVoicesForLocale("ru-ru") } returns listOf(Voice.FILIPP, Voice.MARINA) + + val response = controller.getVoices("ru-ru") + + response.statusCode.value() shouldBe 200 + response.body?.data?.defaultVoice shouldBe Voice.FILIPP.name + response.body + ?.data + ?.voices + ?.map { it.name } shouldBe listOf(Voice.FILIPP.name, Voice.MARINA.name) + response.body + ?.data + ?.voices + ?.first { it.name == Voice.FILIPP.name } + ?.isDefault shouldBe true + } + + @Test + fun `should update runtime default voice`() { + val request = AudioVoiceOverrideRequest(locale = "ru-ru", voice = Voice.MARINA.apiValue) + every { wordsService.setDefaultVoiceForLocale("ru-ru", Voice.MARINA.apiValue) } returns Voice.MARINA + every { wordsService.getDefaultVoiceForLocale("ru-ru") } returns Voice.MARINA.name + every { wordsService.getAvailableVoicesForLocale("ru-ru") } returns listOf(Voice.FILIPP, Voice.MARINA) + + val response = controller.setDefaultVoice(request) + + response.statusCode.value() shouldBe 200 + response.body?.data?.defaultVoice shouldBe Voice.MARINA.name + verify(exactly = 1) { wordsService.setDefaultVoiceForLocale("ru-ru", Voice.MARINA.apiValue) } + } +} diff --git a/src/test/kotlin/com/epam/brn/service/UserAnalyticsServiceTest.kt b/src/test/kotlin/com/epam/brn/service/UserAnalyticsServiceTest.kt index b4d17dc8e..ff19a2217 100644 --- a/src/test/kotlin/com/epam/brn/service/UserAnalyticsServiceTest.kt +++ b/src/test/kotlin/com/epam/brn/service/UserAnalyticsServiceTest.kt @@ -22,6 +22,7 @@ import io.mockk.impl.annotations.InjectMockKs import io.mockk.impl.annotations.MockK import io.mockk.junit5.MockKExtension import io.mockk.mockk +import io.mockk.verify import org.junit.jupiter.api.DisplayName import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows @@ -122,7 +123,7 @@ internal class UserAnalyticsServiceTest { // GIVEN val studyHistory = mockk() val currentUser = mockk() - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name every { userAccountService.getCurrentUser() } returns currentUser every { currentUser.bornYear } returns 2023 every { currentUser.id } returns currentUserId @@ -155,7 +156,7 @@ internal class UserAnalyticsServiceTest { } returns studyHistory every { exerciseService.isDoneWell(studyHistory) } returns true every { exerciseRepository.findTypeByExerciseId(exerciseId) } returns ExerciseType.PHRASES.name - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name val audioFileMetaData = AudioFileMetaData("мама папа", BrnLocale.RU.locale, Voice.FILIPP.name, "1", AzureRates.DEFAULT) // WHEN @@ -180,7 +181,7 @@ internal class UserAnalyticsServiceTest { } returns studyHistory every { exerciseService.isDoneWell(studyHistory) } returns false every { exerciseRepository.findTypeByExerciseId(exerciseId) } returns ExerciseType.SINGLE_SIMPLE_WORDS.name - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name val audioFileMetaData = AudioFileMetaData("мама папа", BrnLocale.RU.locale, Voice.FILIPP.name, "1", AzureRates.DEFAULT) @@ -195,7 +196,7 @@ internal class UserAnalyticsServiceTest { } @Test - fun `should prepareAudioFileMetaData with lera voice up to 18 years old user`() { + fun `should use configured default voice when request voice is blank`() { // GIVEN val studyHistory = mockk() val currentUser = mockk() @@ -209,13 +210,37 @@ internal class UserAnalyticsServiceTest { every { exerciseRepository.findTypeByExerciseId(exerciseId) } returns ExerciseType.SINGLE_SIMPLE_WORDS.name val audioFileMetaData = AudioFileMetaData("мама папа", BrnLocale.RU.locale, "", "1", AzureRates.DEFAULT) - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name // WHEN val metaDataResult = userAnalyticsService.prepareAudioFileMetaData(exerciseId, audioFileMetaData) // THEN metaDataResult.voice shouldBe Voice.FILIPP.name } + @Test + fun `should keep requested voice when it is already set`() { + // GIVEN + val studyHistory = mockk() + val currentUser = mockk() + every { userAccountService.getCurrentUser() } returns currentUser + every { currentUser.bornYear } returns 2000 + every { currentUser.id } returns currentUserId + every { + studyHistoryRepository.findLastByUserAccountIdAndExerciseId(currentUserId, exerciseId) + } returns studyHistory + every { exerciseService.isDoneWell(studyHistory) } returns true + every { exerciseRepository.findTypeByExerciseId(exerciseId) } returns ExerciseType.SINGLE_SIMPLE_WORDS.name + val audioFileMetaData = + AudioFileMetaData("мама папа", BrnLocale.RU.locale, Voice.MARINA.name, "1", AzureRates.DEFAULT) + + // WHEN + val metaDataResult = userAnalyticsService.prepareAudioFileMetaData(exerciseId, audioFileMetaData) + + // THEN + metaDataResult.voice shouldBe Voice.MARINA.name + verify(exactly = 0) { wordsService.getDefaultVoiceForLocale(any()) } + } + @Test fun `should prepareAudioFileMetaData without adding comma and slowest speed for words with bad stat PHRASES`() { // GIVEN @@ -229,7 +254,7 @@ internal class UserAnalyticsServiceTest { } returns studyHistory every { exerciseService.isDoneWell(studyHistory) } returns false every { exerciseRepository.findTypeByExerciseId(exerciseId) } returns ExerciseType.PHRASES.name - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name val audioFileMetaData = AudioFileMetaData("мама папа", BrnLocale.RU.locale, Voice.FILIPP.name, "1", AzureRates.DEFAULT) @@ -255,7 +280,7 @@ internal class UserAnalyticsServiceTest { } returns studyHistory every { exerciseService.isDoneWell(studyHistory) } returns true every { exerciseRepository.findTypeByExerciseId(exerciseId) } returns ExerciseType.SINGLE_SIMPLE_WORDS.name - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name val audioFileMetaData = AudioFileMetaData("мама", BrnLocale.RU.locale, Voice.FILIPP.name, "1", AzureRates.DEFAULT) @@ -281,7 +306,7 @@ internal class UserAnalyticsServiceTest { } returns studyHistory every { exerciseService.isDoneWell(studyHistory) } returns false every { exerciseRepository.findTypeByExerciseId(exerciseId) } returns ExerciseType.SINGLE_SIMPLE_WORDS.name - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name val audioFileMetaData = AudioFileMetaData("text", BrnLocale.RU.locale, Voice.FILIPP.name, "1", AzureRates.DEFAULT) @@ -304,7 +329,7 @@ internal class UserAnalyticsServiceTest { studyHistoryRepository.findLastByUserAccountIdAndExerciseId(currentUserId, exerciseId) } returns null every { exerciseRepository.findTypeByExerciseId(exerciseId) } returns ExerciseType.SINGLE_SIMPLE_WORDS.name - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name val audioFileMetaData = AudioFileMetaData("text", BrnLocale.RU.locale, Voice.FILIPP.name, "1", AzureRates.DEFAULT) // WHEN @@ -346,7 +371,7 @@ internal class UserAnalyticsServiceTest { val audioFileMetaData = AudioFileMetaData("text", BrnLocale.RU.locale, Voice.FILIPP.name, "1", AzureRates.DEFAULT) every { textToSpeechService.generateAudioOggStreamWithValidation(audioFileMetaData) } returns audioStreamMock - every { wordsService.getDefaultWomanVoiceForLocale(any()) } returns Voice.FILIPP.name + every { wordsService.getDefaultVoiceForLocale(any()) } returns Voice.FILIPP.name // WHEN val audioStreamResult = userAnalyticsService.prepareAudioStreamForUser(exerciseId, audioFileMetaData) diff --git a/src/test/kotlin/com/epam/brn/service/WordsServiceTest.kt b/src/test/kotlin/com/epam/brn/service/WordsServiceTest.kt index ce7e75ebf..243dec132 100644 --- a/src/test/kotlin/com/epam/brn/service/WordsServiceTest.kt +++ b/src/test/kotlin/com/epam/brn/service/WordsServiceTest.kt @@ -1,8 +1,15 @@ package com.epam.brn.service +import com.epam.brn.dto.AudioFileMetaData +import com.epam.brn.enums.Voice +import io.kotest.matchers.collections.shouldContain +import io.kotest.matchers.shouldBe +import io.kotest.matchers.string.shouldContain +import io.kotest.matchers.string.shouldEndWith import io.mockk.impl.annotations.InjectMockKs import io.mockk.junit5.MockKExtension import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows import org.junit.jupiter.api.extension.ExtendWith @ExtendWith(MockKExtension::class) @@ -11,16 +18,112 @@ internal class WordsServiceTest { lateinit var wordsService: WordsService @Test - fun `should create file with words`() { - // GIVEN -// val words = hashMapOf("girl" to "girlHex", "boy" to "boyHex", "man" to "manHex") -// val fileName = "testWordsFile.txt" -// // WHEN -// val fileResult = wordsService.createTxtFilesWithExerciseWordsMap(words, fileName) -// // THAN -// assertTrue(fileResult.exists()) -// val expected = "man=manHex${System.lineSeparator()}girl=girlHex${System.lineSeparator()}boy=boyHex${System.lineSeparator()}" -// assertEquals(expected, fileResult.readText()) -// fileResult.deleteOnExit() + fun `should return first male voice by default`() { + wordsService.getDefaultVoiceForLocale("ru-ru") shouldBe Voice.FILIPP.name + wordsService.getDefaultVoiceForLocale("en-us") shouldBe Voice.JOHN.name + } + + @Test + fun `should allow overriding default voice at runtime`() { + wordsService.setDefaultVoiceForLocale("ru-ru", Voice.MARINA.name) + + wordsService.getDefaultVoiceForLocale("ru-ru") shouldBe Voice.MARINA.name + } + + @Test + fun `should expose voices directly from enum`() { + val voices = wordsService.getAvailableVoicesForLocale("ru-ru") + + voices.first() shouldBe Voice.FILIPP + voices shouldContain Voice.MARINA + } + + @Test + fun `should return default man voice for locale`() { + wordsService.getDefaultManVoiceForLocale("ru-ru") shouldBe Voice.FILIPP.name + wordsService.getDefaultManVoiceForLocale("en-us") shouldBe Voice.JOHN.name + } + + @Test + fun `should return default woman voice for locale`() { + wordsService.getDefaultWomanVoiceForLocale("ru-ru") shouldBe Voice.ALENA.name + wordsService.getDefaultWomanVoiceForLocale("en-us") shouldBe Voice.JANE.name + } + + @Test + fun `should return voice names for locale`() { + val voices = wordsService.getVoicesForLocale("ru-ru") + + voices shouldContain Voice.FILIPP.name + voices shouldContain Voice.MARINA.name + } + + @Test + fun `should find voice for matching locale`() { + wordsService.getVoiceForLocale("ru-ru", "FILIPP") shouldBe Voice.FILIPP + } + + @Test + fun `should return null for voice with wrong locale`() { + wordsService.getVoiceForLocale("en-us", "FILIPP") shouldBe null + } + + @Test + fun `should return null for non-existent voice`() { + wordsService.getVoiceForLocale("ru-ru", "NONEXISTENT") shouldBe null + } + + @Test + fun `should throw on setDefaultVoice for unsupported locale`() { + val ex = + assertThrows { + wordsService.setDefaultVoiceForLocale("xx-xx", "FILIPP") + } + ex.message shouldContain "xx-xx" + } + + @Test + fun `should throw on setDefaultVoice for invalid voice`() { + val ex = + assertThrows { + wordsService.setDefaultVoiceForLocale("ru-ru", "NONEXISTENT") + } + ex.message shouldContain "NONEXISTENT" + } + + @Test + fun `should build sub path for word`() { + val meta = AudioFileMetaData(text = "hello", locale = "ru-ru", voice = "FILIPP", speedFloat = "1.0") + + wordsService.getSubPathForWord(meta) shouldBe "/audio/ru-ru/filipp/1.0" + } + + @Test + fun `should build sub file path for word`() { + val meta = AudioFileMetaData(text = "hello", locale = "ru-ru", voice = "FILIPP", speedFloat = "1.0") + + val path = wordsService.getSubFilePathForWord(meta) + path shouldEndWith ".ogg" + path shouldContain "/audio/ru-ru/filipp/1.0/" + } + + @Test + fun `should throw on getDefaultVoiceForLocale for unsupported locale`() { + assertThrows { + wordsService.getDefaultVoiceForLocale("xx-xx") + } + } + + @Test + fun `should build local file path for word`() { + val field = WordsService::class.java.getDeclaredField("localFolderForFiles") + field.isAccessible = true + field.set(wordsService, "/tmp/test-files") + + val meta = AudioFileMetaData(text = "hello", locale = "ru-ru", voice = "FILIPP", speedFloat = "1.0") + + val path = wordsService.getLocalFilePathForWord(meta) + path shouldContain "/tmp/test-files" + path shouldEndWith ".ogg" } } diff --git a/src/test/kotlin/com/epam/brn/service/YandexSpeechKitServiceTest.kt b/src/test/kotlin/com/epam/brn/service/YandexSpeechKitServiceTest.kt index 1c04c86d5..c9679a907 100644 --- a/src/test/kotlin/com/epam/brn/service/YandexSpeechKitServiceTest.kt +++ b/src/test/kotlin/com/epam/brn/service/YandexSpeechKitServiceTest.kt @@ -1,72 +1,147 @@ package com.epam.brn.service +import com.epam.brn.dto.AudioFileMetaData +import com.epam.brn.dto.YandexIamTokenDto +import com.epam.brn.dto.yandex.tts.AudioChunk +import com.epam.brn.dto.yandex.tts.YandexTtsRequest +import com.epam.brn.dto.yandex.tts.YandexTtsResponse +import com.epam.brn.dto.yandex.tts.YandexTtsResult +import com.epam.brn.enums.Voice +import com.epam.brn.enums.VoiceRole import com.epam.brn.exception.YandexServiceException +import com.epam.brn.service.yandex.tts.config.YandexTtsProperties +import com.fasterxml.jackson.databind.ObjectMapper import io.kotest.matchers.shouldBe import io.mockk.every -import io.mockk.impl.annotations.InjectMockKs -import io.mockk.impl.annotations.MockK -import io.mockk.junit5.MockKExtension import io.mockk.mockk -import io.mockk.mockkStatic -import io.mockk.unmockkStatic -import org.apache.http.HttpEntity -import org.apache.http.client.methods.CloseableHttpResponse -import org.apache.http.impl.client.CloseableHttpClient -import org.apache.http.impl.client.HttpClientBuilder -import org.apache.http.util.EntityUtils +import io.mockk.slot +import io.mockk.verify +import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows -import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.ValueSource -import java.io.InputStream +import org.springframework.web.reactive.function.client.WebClient +import reactor.core.publisher.Mono import java.time.LocalDateTime +import java.time.ZoneOffset +import java.time.ZonedDateTime +import java.util.Base64 -@ExtendWith(MockKExtension::class) internal class YandexSpeechKitServiceTest { - @InjectMockKs - lateinit var yandexSpeechKitService: YandexSpeechKitService + private lateinit var yandexSpeechKitService: YandexSpeechKitService + private lateinit var wordsService: WordsService + private lateinit var timeService: TimeService + private lateinit var yandexTtsProperties: YandexTtsProperties + private lateinit var yandexTtsWebClient: WebClient + private lateinit var yandexIamTokenWebClient: WebClient + private val objectMapper = ObjectMapper() - @MockK - lateinit var wordsService: WordsService + @BeforeEach + fun setUp() { + wordsService = mockk() + timeService = mockk() + yandexTtsProperties = mockk() + yandexTtsWebClient = mockk() + yandexIamTokenWebClient = mockk() - @MockK - lateinit var timeService: TimeService + every { yandexTtsProperties.folderId } returns "test-folder-id" + every { yandexTtsProperties.authToken } returns "test-auth-token" + every { yandexTtsProperties.preferredRole } returns "neutral" - @ParameterizedTest - @ValueSource(strings = ["ru-ru", "en-us", "tr-tr"]) - fun `should success pass locale validation without Exceptions`(locale: String) { + yandexSpeechKitService = + YandexSpeechKitService( + wordsService = wordsService, + timeService = timeService, + yandexTtsProperties = yandexTtsProperties, + yandexTtsWebClient = yandexTtsWebClient, + yandexIamTokenWebClient = yandexIamTokenWebClient, + ) + } + + private fun mockIamTokenWebClient(mono: Mono): WebClient.ResponseSpec { + val requestBodyUriSpec = mockk() + val requestHeadersSpec = mockk>() + val responseSpec = mockk() + + every { yandexIamTokenWebClient.post() } returns requestBodyUriSpec + every { requestBodyUriSpec.bodyValue(any()) } returns requestHeadersSpec + every { requestHeadersSpec.retrieve() } returns responseSpec + every { responseSpec.onStatus(any(), any()) } returns responseSpec + every { responseSpec.bodyToMono(YandexIamTokenDto::class.java) } returns mono + + return responseSpec + } + + private fun mockTtsWebClient(mono: Mono): WebClient.RequestBodyUriSpec { + val requestBodyUriSpec = mockk() + val requestHeadersSpec = mockk>() + val responseSpec = mockk() + + every { yandexTtsWebClient.post() } returns requestBodyUriSpec + every { requestBodyUriSpec.header(any(), any()) } returns requestBodyUriSpec + every { requestBodyUriSpec.bodyValue(any()) } returns requestHeadersSpec + every { requestHeadersSpec.retrieve() } returns responseSpec + every { responseSpec.onStatus(any(), any()) } returns responseSpec + every { responseSpec.bodyToMono(String::class.java) } returns mono + + return requestBodyUriSpec + } + + private fun setValidToken() { + yandexSpeechKitService.iamToken = "valid-token" + yandexSpeechKitService.iamTokenExpiresTime = LocalDateTime.now().plusHours(1) every { timeService.now() } returns LocalDateTime.now() - every { wordsService.getVoicesForLocale(locale) } returns emptyList() - // WHENv + } + + private fun stubVoice( + locale: String = "ru-ru", + voiceName: String = Voice.FILIPP.name, + voice: Voice = Voice.FILIPP, + ) { + every { wordsService.getVoicesForLocale(locale) } returns Voice.getVoicesForLocale(locale).map { it.name } + every { wordsService.getVoiceForLocale(locale, voiceName) } returns voice + every { wordsService.getVoiceForLocale(locale, voiceName.lowercase()) } returns voice + } + + private fun buildNdjsonResponse(vararg chunks: String): String = chunks.joinToString("\n") { base64Data -> + objectMapper.writeValueAsString( + YandexTtsResponse( + result = YandexTtsResult(audioChunk = AudioChunk(data = base64Data)), + ), + ) + } + + @ParameterizedTest + @ValueSource(strings = ["ru-ru", "en-us"]) + fun `should pass locale validation for supported v3 locales`(locale: String) { + every { wordsService.getVoicesForLocale(locale) } returns Voice.getVoicesForLocale(locale).map { it.name } + yandexSpeechKitService.validateLocaleAndVoice(locale, "") } @ParameterizedTest - @ValueSource(strings = ["ruru", "en-en", "tr"]) - fun `should failed on locale validation`(locale: String) { - // WHEN + @ValueSource(strings = ["ruru", "en-en", "fr-fr"]) + fun `should fail on locale validation when v3 has no voices for locale`(locale: String) { + every { wordsService.getVoicesForLocale(locale) } returns emptyList() + assertThrows { yandexSpeechKitService.validateLocaleAndVoice(locale, "") } } - @ParameterizedTest - @ValueSource(strings = ["FILIPP", "NICK"]) - fun `should success pass voice validation without Exceptions`(voice: String) { - val yandexVoices = listOf("FILIPP", "NICK") - every { timeService.now() } returns LocalDateTime.now() - every { wordsService.getVoicesForLocale("ru-ru") } returns yandexVoices - // WHEN - yandexSpeechKitService.validateLocaleAndVoice("ru-ru", voice) + @Test + fun `should pass voice validation case-insensitively`() { + every { wordsService.getVoicesForLocale("ru-ru") } returns Voice.getVoicesForLocale("ru-ru").map { it.name } + every { wordsService.getVoiceForLocale("ru-ru", "filipp") } returns Voice.FILIPP + + yandexSpeechKitService.validateLocaleAndVoice("ru-ru", "filipp") } - @ParameterizedTest - @ValueSource(strings = ["ddd", "rrr"]) - fun `should failed on voice validation`(voice: String) { - val yandexVoices = listOf("FILIPP", "NICK") - every { timeService.now() } returns LocalDateTime.now() - every { wordsService.getVoicesForLocale("ru-ru") } returns yandexVoices - // WHEN - assertThrows { yandexSpeechKitService.validateLocaleAndVoice("ru-ru", voice) } + @Test + fun `should fail on unsupported voice validation`() { + every { wordsService.getVoicesForLocale("ru-ru") } returns Voice.getVoicesForLocale("ru-ru").map { it.name } + every { wordsService.getVoiceForLocale("ru-ru", "ddd") } returns null + + assertThrows { yandexSpeechKitService.validateLocaleAndVoice("ru-ru", "ddd") } } @Test @@ -74,63 +149,435 @@ internal class YandexSpeechKitServiceTest { yandexSpeechKitService.iamToken = "current token" yandexSpeechKitService.iamTokenExpiresTime = LocalDateTime.now().plusHours(1) every { timeService.now() } returns LocalDateTime.now() - // WHEN + val resultToken = yandexSpeechKitService.getYandexIamTokenForAudioGeneration() - // THEN + resultToken shouldBe "current token" } @Test - fun `should return new token in getYandexIamTokenForAudioGeneration`() { + fun `should return new token and parse expiry correctly`() { yandexSpeechKitService.iamToken = "" - yandexSpeechKitService.authToken = "authToken" - yandexSpeechKitService.uriGetIamToken = "uriGetIamToken" - - val httpClientBuilder = mockk() - val httpClient = mockk() - val httpResponse = mockk() - val httpEntity = mockk() - val inputStream = mockk() - mockkStatic(HttpClientBuilder::class) - every { HttpClientBuilder.create() } returns httpClientBuilder - mockkStatic(EntityUtils::class) - every { EntityUtils.toString(any()) } returns "{\n" + - " \"iamToken\": \"iamTokenValue\",\n" + - " \"expiresAt\": \"2040-11-24T11:48:38.503511+03:00\"\n" + - "}" - - every { httpClientBuilder.build() } returns httpClient - every { httpClient.execute(any()) } returns httpResponse - every { httpResponse.statusLine.statusCode } returns 200 - every { httpResponse.entity } returns httpEntity - every { httpEntity.content } returns inputStream every { timeService.now() } returns LocalDateTime.now() - // WHEN + + val bodySlot = slot>() + val requestBodyUriSpec = mockk() + val requestHeadersSpec = mockk>() + val responseSpec = mockk() + + every { yandexIamTokenWebClient.post() } returns requestBodyUriSpec + every { requestBodyUriSpec.bodyValue(capture(bodySlot)) } returns requestHeadersSpec + every { requestHeadersSpec.retrieve() } returns responseSpec + every { responseSpec.onStatus(any(), any()) } returns responseSpec + every { responseSpec.bodyToMono(YandexIamTokenDto::class.java) } returns + Mono.just( + YandexIamTokenDto( + iamToken = "newIamToken", + expiresAt = "2040-11-24T11:48:38.503511+03:00", + ), + ) + val resultToken = yandexSpeechKitService.getYandexIamTokenForAudioGeneration() - // THEN - resultToken shouldBe "iamTokenValue" - httpResponse.statusLine.statusCode shouldBe 200 - unmockkStatic(HttpClientBuilder::class) - unmockkStatic(EntityUtils::class) + resultToken shouldBe "newIamToken" + bodySlot.captured["yandexPassportOauthToken"] shouldBe "test-auth-token" + val expectedExpiry = + ZonedDateTime + .parse("2040-11-24T11:48:38.503511+03:00") + .withZoneSameInstant(ZoneOffset.UTC) + .toLocalDateTime() + yandexSpeechKitService.iamTokenExpiresTime shouldBe expectedExpiry } @Test - fun `should throw Exception if status code is not 200`() { + fun `should throw exception if token request returns empty`() { yandexSpeechKitService.iamToken = "" - yandexSpeechKitService.authToken = "authToken" - yandexSpeechKitService.uriGetIamToken = "uriGetIamToken" - - val httpClientBuilder = mockk() - val httpClient = mockk() - val httpResponse = mockk() - - mockkStatic(HttpClientBuilder::class) - every { HttpClientBuilder.create() } returns httpClientBuilder - every { httpClientBuilder.build() } returns httpClient - every { httpClient.execute(any()) } returns httpResponse - every { httpResponse.statusLine.statusCode } returns 100 - // WHEN & THEN + every { timeService.now() } returns LocalDateTime.now() + mockIamTokenWebClient(Mono.empty()) + assertThrows { yandexSpeechKitService.getYandexIamTokenForAudioGeneration() } } + + @Test + fun `should propagate exception when token request fails with error`() { + yandexSpeechKitService.iamToken = "" + every { timeService.now() } returns LocalDateTime.now() + mockIamTokenWebClient(Mono.error(RuntimeException("Connection refused"))) + + assertThrows { yandexSpeechKitService.getYandexIamTokenForAudioGeneration() } + } + + @Test + fun `parseAudioChunks should decode base64 audio from NDJSON`() { + val audioContent = "test audio content".toByteArray() + val base64Audio = Base64.getEncoder().encodeToString(audioContent) + val ndjson = buildNdjsonResponse(base64Audio) + + val chunks = yandexSpeechKitService.parseAudioChunks(ndjson) + + chunks.size shouldBe 1 + chunks[0] shouldBe audioContent + } + + @Test + fun `parseAudioChunks should concatenate multiple chunks`() { + val chunk1 = "chunk1".toByteArray() + val chunk2 = "chunk2".toByteArray() + val ndjson = + buildNdjsonResponse( + Base64.getEncoder().encodeToString(chunk1), + Base64.getEncoder().encodeToString(chunk2), + ) + + val chunks = yandexSpeechKitService.parseAudioChunks(ndjson) + + chunks.size shouldBe 2 + chunks[0] shouldBe chunk1 + chunks[1] shouldBe chunk2 + } + + @Test + fun `parseAudioChunks should throw on malformed lines`() { + val ndjson = "not-json" + + assertThrows { yandexSpeechKitService.parseAudioChunks(ndjson) } + } + + @Test + fun `parseAudioChunks should throw on invalid base64 audio chunk`() { + val ndjson = + objectMapper.writeValueAsString( + YandexTtsResponse( + result = YandexTtsResult(audioChunk = AudioChunk(data = "###not-base64###")), + ), + ) + + assertThrows { yandexSpeechKitService.parseAudioChunks(ndjson) } + } + + @Test + fun `parseAudioChunks should return empty list for blank input`() { + val chunks = yandexSpeechKitService.parseAudioChunks("") + + chunks.size shouldBe 0 + } + + @Test + fun `should resolve preferred role when voice supports it`() { + every { yandexTtsProperties.preferredRole } returns "neutral" + + val result = yandexSpeechKitService.resolvePreferredRole(Voice.FILIPP) + + result shouldBe VoiceRole.NEUTRAL + } + + @Test + fun `should skip preferred role when voice does not support it`() { + every { yandexTtsProperties.preferredRole } returns "friendly" + + val result = yandexSpeechKitService.resolvePreferredRole(Voice.FILIPP) + + result shouldBe null + } + + @Test + fun `should generate audio stream with correct headers`() { + val audioContent = "test audio content".toByteArray() + val base64Audio = Base64.getEncoder().encodeToString(audioContent) + val ndjson = buildNdjsonResponse(base64Audio) + setValidToken() + stubVoice() + val requestBodyUriSpec = mockTtsWebClient(Mono.just(ndjson)) + + val result = + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "filipp", speedFloat = "1.0"), + ) + + result.readBytes() shouldBe audioContent + verify { requestBodyUriSpec.header("Authorization", "Bearer valid-token") } + verify { requestBodyUriSpec.header("x-folder-id", "test-folder-id") } + } + + @Test + fun `should build correct TTS request with safe role hints`() { + val audioContent = "audio".toByteArray() + val ndjson = buildNdjsonResponse(Base64.getEncoder().encodeToString(audioContent)) + setValidToken() + stubVoice() + + val requestBodyUriSpec = mockk() + val requestHeadersSpec = mockk>() + val responseSpec = mockk() + val bodySlot = slot() + + every { yandexTtsWebClient.post() } returns requestBodyUriSpec + every { requestBodyUriSpec.header(any(), any()) } returns requestBodyUriSpec + every { requestBodyUriSpec.bodyValue(capture(bodySlot)) } returns requestHeadersSpec + every { requestHeadersSpec.retrieve() } returns responseSpec + every { responseSpec.onStatus(any(), any()) } returns responseSpec + every { responseSpec.bodyToMono(String::class.java) } returns Mono.just(ndjson) + + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "hello", locale = "ru-ru", voice = "FILIPP", speedFloat = "0.8"), + ) + + val captured = bodySlot.captured + captured.text shouldBe "hello" + captured.outputAudioSpec.containerAudio.containerAudioType shouldBe "OGG_OPUS" + captured.hints.size shouldBe 3 + captured.hints[0].voice shouldBe "filipp" + captured.hints[1].speed shouldBe "0.8" + captured.hints[2].role shouldBe "neutral" + } + + @Test + fun `should omit unsupported configured role from TTS request`() { + val audioContent = "audio".toByteArray() + val ndjson = buildNdjsonResponse(Base64.getEncoder().encodeToString(audioContent)) + setValidToken() + every { yandexTtsProperties.preferredRole } returns "friendly" + stubVoice() + + val requestBodyUriSpec = mockk() + val requestHeadersSpec = mockk>() + val responseSpec = mockk() + val bodySlot = slot() + + every { yandexTtsWebClient.post() } returns requestBodyUriSpec + every { requestBodyUriSpec.header(any(), any()) } returns requestBodyUriSpec + every { requestBodyUriSpec.bodyValue(capture(bodySlot)) } returns requestHeadersSpec + every { requestHeadersSpec.retrieve() } returns responseSpec + every { responseSpec.onStatus(any(), any()) } returns responseSpec + every { responseSpec.bodyToMono(String::class.java) } returns Mono.just(ndjson) + + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "hello", locale = "ru-ru", voice = "FILIPP", speedFloat = "0.8"), + ) + + bodySlot.captured.hints.size shouldBe 2 + } + + @Test + fun `should concatenate multiple audio chunks from NDJSON response`() { + val chunk1 = "chunk1".toByteArray() + val chunk2 = "chunk2".toByteArray() + val ndjson = + buildNdjsonResponse( + Base64.getEncoder().encodeToString(chunk1), + Base64.getEncoder().encodeToString(chunk2), + ) + setValidToken() + stubVoice() + mockTtsWebClient(Mono.just(ndjson)) + + val result = + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "filipp", speedFloat = "1.0"), + ) + + result.readBytes() shouldBe chunk1 + chunk2 + } + + @Test + fun `should throw exception when audio response is empty`() { + setValidToken() + stubVoice() + mockTtsWebClient(Mono.just("")) + + assertThrows { + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "filipp", speedFloat = "1.0"), + ) + } + } + + @Test + fun `should throw exception when WebClient returns null`() { + setValidToken() + stubVoice() + mockTtsWebClient(Mono.empty()) + + assertThrows { + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "filipp", speedFloat = "1.0"), + ) + } + } + + @Test + fun `should propagate exception when TTS request fails`() { + setValidToken() + stubVoice() + mockTtsWebClient(Mono.error(RuntimeException("Connection refused"))) + + assertThrows { + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "filipp", speedFloat = "1.0"), + ) + } + } + + @Test + fun `should generate audio with explicit voice`() { + val audioContent = "audio".toByteArray() + val ndjson = buildNdjsonResponse(Base64.getEncoder().encodeToString(audioContent)) + setValidToken() + stubVoice() + mockTtsWebClient(Mono.just(ndjson)) + + val result = + yandexSpeechKitService.generateAudioOggStreamWithValidation( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "filipp", speedFloat = "1.0"), + ) + + result.readBytes() shouldBe audioContent + } + + @Test + fun `should fall back to first male voice when voice is empty`() { + val audioContent = "audio".toByteArray() + val ndjson = buildNdjsonResponse(Base64.getEncoder().encodeToString(audioContent)) + setValidToken() + every { wordsService.getVoicesForLocale("ru-ru") } returns Voice.getVoicesForLocale("ru-ru").map { it.name } + every { wordsService.getDefaultVoiceForLocale("ru-ru") } returns Voice.FILIPP.name + every { wordsService.getVoiceForLocale("ru-ru", Voice.FILIPP.name) } returns Voice.FILIPP + mockTtsWebClient(Mono.just(ndjson)) + + val result = + yandexSpeechKitService.generateAudioOggStreamWithValidation( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "", speedFloat = "1.0"), + ) + + result.readBytes() shouldBe audioContent + verify { wordsService.getDefaultVoiceForLocale("ru-ru") } + } + + @Test + fun `should throw on invalid locale in generateAudioOggStreamWithValidation`() { + every { wordsService.getVoicesForLocale("fr-fr") } returns emptyList() + + assertThrows { + yandexSpeechKitService.generateAudioOggStreamWithValidation( + AudioFileMetaData(text = "test", locale = "fr-fr", voice = "", speedFloat = "1.0"), + ) + } + } + + @Test + fun `should resolve voice when blank voice falls back to default`() { + val audioContent = "audio".toByteArray() + val ndjson = buildNdjsonResponse(Base64.getEncoder().encodeToString(audioContent)) + setValidToken() + every { wordsService.getDefaultVoiceForLocale("ru-ru") } returns Voice.FILIPP.name + every { wordsService.getVoiceForLocale("ru-ru", Voice.FILIPP.name) } returns Voice.FILIPP + mockTtsWebClient(Mono.just(ndjson)) + + val result = + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "", speedFloat = "1.0"), + ) + + result.readBytes() shouldBe audioContent + verify { wordsService.getDefaultVoiceForLocale("ru-ru") } + } + + @Test + fun `should throw when blank voice default lookup returns null`() { + setValidToken() + every { wordsService.getDefaultVoiceForLocale("ru-ru") } returns Voice.FILIPP.name + every { wordsService.getVoiceForLocale("ru-ru", Voice.FILIPP.name) } returns null + + assertThrows { + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "", speedFloat = "1.0"), + ) + } + } + + @Test + fun `should throw when explicit voice lookup returns null`() { + setValidToken() + every { wordsService.getVoiceForLocale("ru-ru", "unknown") } returns null + + assertThrows { + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = "unknown", speedFloat = "1.0"), + ) + } + } + + @Test + fun `should build hints without speed when speed is blank`() { + val audioContent = "audio".toByteArray() + val ndjson = buildNdjsonResponse(Base64.getEncoder().encodeToString(audioContent)) + setValidToken() + every { wordsService.getVoicesForLocale("ru-ru") } returns Voice.getVoicesForLocale("ru-ru").map { it.name } + every { wordsService.getVoiceForLocale("ru-ru", Voice.ZAHAR.name) } returns Voice.ZAHAR + every { wordsService.getVoiceForLocale("ru-ru", Voice.ZAHAR.name.lowercase()) } returns Voice.ZAHAR + every { yandexTtsProperties.preferredRole } returns null + + val requestBodyUriSpec = mockk() + val requestHeadersSpec = mockk>() + val responseSpec = mockk() + val bodySlot = slot() + + every { yandexTtsWebClient.post() } returns requestBodyUriSpec + every { requestBodyUriSpec.header(any(), any()) } returns requestBodyUriSpec + every { requestBodyUriSpec.bodyValue(capture(bodySlot)) } returns requestHeadersSpec + every { requestHeadersSpec.retrieve() } returns responseSpec + every { responseSpec.onStatus(any(), any()) } returns responseSpec + every { responseSpec.bodyToMono(String::class.java) } returns Mono.just(ndjson) + + yandexSpeechKitService.generateAudioStream( + AudioFileMetaData(text = "test", locale = "ru-ru", voice = Voice.ZAHAR.name, speedFloat = ""), + ) + + bodySlot.captured.hints.size shouldBe 1 + bodySlot.captured.hints[0].voice shouldBe "zahar" + } + + @Test + fun `should return first supported role when preferredRole is null`() { + every { yandexTtsProperties.preferredRole } returns null + + val result = yandexSpeechKitService.resolvePreferredRole(Voice.FILIPP) + + result shouldBe VoiceRole.NEUTRAL + } + + @Test + fun `should return null when preferredRole is blank`() { + every { yandexTtsProperties.preferredRole } returns " " + + val result = yandexSpeechKitService.resolvePreferredRole(Voice.FILIPP) + + result shouldBe VoiceRole.NEUTRAL + } + + @Test + fun `should return null when voice has no roles and preferredRole is null`() { + every { yandexTtsProperties.preferredRole } returns null + + val result = yandexSpeechKitService.resolvePreferredRole(Voice.ZAHAR) + + result shouldBe null + } + + @Test + fun `should skip chunks with null result in NDJSON response`() { + val audioContent = "audio".toByteArray() + val base64Audio = Base64.getEncoder().encodeToString(audioContent) + val lineWithData = + objectMapper.writeValueAsString( + YandexTtsResponse(result = YandexTtsResult(audioChunk = AudioChunk(data = base64Audio))), + ) + val lineWithoutData = objectMapper.writeValueAsString(YandexTtsResponse(result = null)) + val ndjson = "$lineWithoutData\n$lineWithData" + + val chunks = yandexSpeechKitService.parseAudioChunks(ndjson) + + chunks.size shouldBe 1 + chunks[0] shouldBe audioContent + } } diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties index c90744a63..6dc72afd6 100644 --- a/src/test/resources/application.properties +++ b/src/test/resources/application.properties @@ -45,10 +45,10 @@ lopotkoFileName=lopotko.txt yandex.authToken=${YANDEX_AUTH_TOKEN:askLenasForLocalTest} yandex.getTokenLink=https://iam.api.cloud.yandex.net/iam/v1/tokens -yandex.generationAudioLink=https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize +yandex.generationAudioLink=https://tts.api.cloud.yandex.net:443/tts/v3/utteranceSynthesis yandex.folderId=b1gqn2760f5ongt82lm3 -yandex.format=oggopus -yandex.emotions=friendly +yandex.preferredRole=neutral +yandex.enableWiretap=false yandex.folderForFiles=audioTest brn.resources.default-pictures.path=pictures/ @@ -126,4 +126,4 @@ github.api.url.base=https://api.github.com github.api.url.path.contributors=/repos/{OWNER}/{REPO}/contributors github.api.url.path.users=/users/{username} -autotest.users.deletion.prefix=autotest \ No newline at end of file +autotest.users.deletion.prefix=autotest