const OPENAI_MODELS_BY_QUALITY = { easy: 'whisper-1', medium: 'gpt-4o-mini-transcribe', hard: 'gpt-4o-transcribe', }; const PIPER_LENGTH_SCALE_BY_QUALITY = { easy: '1.15', medium: '1.0', hard: '0.9', }; function normalizeOpenAiBaseUrl(url) { const value = String(url || '').trim(); if (!value) return 'https://api.openai.com/v1'; return value.replace(/\/+$/, ''); } function resolveSttModel(config) { const quality = String(config?.quality || 'medium').toLowerCase(); const customModel = String(config?.model || '').trim(); return customModel || OPENAI_MODELS_BY_QUALITY[quality] || OPENAI_MODELS_BY_QUALITY.medium; } export function isSpeechToTextConfigured(entrySettings) { const cfg = entrySettings?.tools?.speechToText || {}; if (String(cfg.provider || 'openai') !== 'openai') return false; return !!String(cfg.apiKey || '').trim(); } export function isTextToSpeechConfigured(entrySettings) { const cfg = entrySettings?.tools?.textToSpeech || {}; const provider = String(cfg.provider || 'browser'); if (provider === 'browser') return true; if (provider === 'piper-http') return !!String(cfg.piperBaseUrl || '').trim(); if (provider === 'openai') return !!String(cfg.apiKey || '').trim(); return false; } export async function transcribeAudioBySettings(audioBlob, entrySettings) { const cfg = entrySettings?.tools?.speechToText || {}; const provider = String(cfg.provider || 'openai'); if (provider !== 'openai') { throw new Error('Поддерживается только провайдер OpenAI для распознавания.'); } const apiKey = String(cfg.apiKey || '').trim(); if (!apiKey) throw new Error('Не заполнен OpenAI API key.'); const model = resolveSttModel(cfg); const baseUrl = normalizeOpenAiBaseUrl(cfg.baseUrl); const form = new FormData(); form.append('model', model); form.append('language', 'ru'); form.append('response_format', 'json'); form.append('file', audioBlob, 'voice-input.webm'); const response = await fetch(`${baseUrl}/audio/transcriptions`, { method: 'POST', headers: { Authorization: `Bearer ${apiKey}`, }, body: form, }); if (!response.ok) { const body = await response.text().catch(() => ''); throw new Error(`Ошибка STT API (${response.status}): ${body || 'unknown error'}`); } const payload = await response.json(); const text = String(payload?.text || '').trim(); if (!text) throw new Error('Пустой ответ распознавания.'); return text; } export function createMicrophoneRecorder() { const Ctx = window.AudioContext || window.webkitAudioContext; let stream = null; let recorder = null; let startedAtMs = 0; let chunks = []; let timerId = 0; let level = 0; let analyser = null; let rafId = 0; async function start(onTick) { stream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false }); recorder = new MediaRecorder(stream, { mimeType: 'audio/webm' }); startedAtMs = Date.now(); chunks = []; recorder.ondataavailable = (event) => { if (event?.data?.size > 0) chunks.push(event.data); }; recorder.start(250); timerId = window.setInterval(() => { if (typeof onTick === 'function') { onTick({ elapsedMs: Date.now() - startedAtMs, level, }); } }, 120); if (Ctx) { const audioCtx = new Ctx(); const source = audioCtx.createMediaStreamSource(stream); analyser = audioCtx.createAnalyser(); analyser.fftSize = 256; source.connect(analyser); const data = new Uint8Array(analyser.frequencyBinCount); const read = () => { if (!analyser) return; analyser.getByteFrequencyData(data); let sum = 0; for (let i = 0; i < data.length; i += 1) sum += data[i]; level = data.length > 0 ? Math.max(0, Math.min(1, (sum / data.length) / 255)) : 0; rafId = window.requestAnimationFrame(read); }; read(); } } async function stop() { if (!recorder) return null; const blob = await new Promise((resolve) => { recorder.onstop = () => resolve(new Blob(chunks, { type: 'audio/webm' })); recorder.stop(); }); cleanup(); return blob; } function cancel() { try { recorder?.stop(); } catch { // ignore } cleanup(); } function cleanup() { if (timerId) window.clearInterval(timerId); if (rafId) window.cancelAnimationFrame(rafId); timerId = 0; rafId = 0; analyser = null; if (stream) { stream.getTracks().forEach((track) => { try { track.stop(); } catch { // ignore } }); } stream = null; recorder = null; } return { start, stop, cancel }; } export async function speakTextBySettings(text, entrySettings) { const value = String(text || '').trim(); if (!value) return; const cfg = entrySettings?.tools?.textToSpeech || {}; const provider = String(cfg.provider || 'browser'); if (provider === 'browser') { const utt = new SpeechSynthesisUtterance(value); utt.lang = 'ru-RU'; const selected = String(cfg.voice || '').trim(); if (selected) { const voice = window.speechSynthesis.getVoices().find((v) => v.name === selected); if (voice) utt.voice = voice; } window.speechSynthesis.speak(utt); return; } if (provider === 'piper-http') { const baseUrl = String(cfg.piperBaseUrl || '').trim().replace(/\/+$/, ''); if (!baseUrl) throw new Error('Не указан адрес Piper HTTP.'); const quality = String(cfg.quality || 'medium').toLowerCase(); const voice = String(cfg.voice || '').trim(); const lengthScale = PIPER_LENGTH_SCALE_BY_QUALITY[quality] || PIPER_LENGTH_SCALE_BY_QUALITY.medium; const resp = await fetch(`${baseUrl}/api/tts`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text: value, voice, quality, length_scale: lengthScale, }), }); if (!resp.ok) throw new Error(`Piper HTTP недоступен (${resp.status}).`); const blob = await resp.blob(); const audioUrl = URL.createObjectURL(blob); const audio = new Audio(audioUrl); await audio.play(); window.setTimeout(() => URL.revokeObjectURL(audioUrl), 30000); return; } if (provider === 'openai') { const apiKey = String(cfg.apiKey || '').trim(); if (!apiKey) throw new Error('Не заполнен API key для OpenAI TTS.'); const model = String(cfg.model || '').trim() || 'gpt-4o-mini-tts'; const baseUrl = normalizeOpenAiBaseUrl(cfg.externalBaseUrl || cfg.baseUrl || 'https://api.openai.com/v1'); const voice = String(cfg.voice || '').trim() || 'alloy'; const resp = await fetch(`${baseUrl}/audio/speech`, { method: 'POST', headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model, voice, input: value, format: 'mp3', }), }); if (!resp.ok) throw new Error(`OpenAI TTS недоступен (${resp.status}).`); const blob = await resp.blob(); const audioUrl = URL.createObjectURL(blob); const audio = new Audio(audioUrl); await audio.play(); window.setTimeout(() => URL.revokeObjectURL(audioUrl), 30000); return; } throw new Error('Неизвестный провайдер озвучки.'); }