SHiNE-server/shine-UI/js/services/speech-tools-service.js

243 lines
7.4 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const OPENAI_MODELS_BY_QUALITY = {
easy: 'whisper-1',
medium: 'gpt-4o-mini-transcribe',
hard: 'gpt-4o-transcribe',
};
const PIPER_LENGTH_SCALE_BY_QUALITY = {
easy: '1.15',
medium: '1.0',
hard: '0.9',
};
function normalizeOpenAiBaseUrl(url) {
const value = String(url || '').trim();
if (!value) return 'https://api.openai.com/v1';
return value.replace(/\/+$/, '');
}
function resolveSttModel(config) {
const quality = String(config?.quality || 'medium').toLowerCase();
const customModel = String(config?.model || '').trim();
return customModel || OPENAI_MODELS_BY_QUALITY[quality] || OPENAI_MODELS_BY_QUALITY.medium;
}
export function isSpeechToTextConfigured(entrySettings) {
const cfg = entrySettings?.tools?.speechToText || {};
if (String(cfg.provider || 'openai') !== 'openai') return false;
return !!String(cfg.apiKey || '').trim();
}
export function isTextToSpeechConfigured(entrySettings) {
const cfg = entrySettings?.tools?.textToSpeech || {};
const provider = String(cfg.provider || 'browser');
if (provider === 'browser') return true;
if (provider === 'piper-http') return !!String(cfg.piperBaseUrl || '').trim();
if (provider === 'openai') return !!String(cfg.apiKey || '').trim();
return false;
}
export async function transcribeAudioBySettings(audioBlob, entrySettings) {
const cfg = entrySettings?.tools?.speechToText || {};
const provider = String(cfg.provider || 'openai');
if (provider !== 'openai') {
throw new Error('Поддерживается только провайдер OpenAI для распознавания.');
}
const apiKey = String(cfg.apiKey || '').trim();
if (!apiKey) throw new Error('Не заполнен OpenAI API key.');
const model = resolveSttModel(cfg);
const baseUrl = normalizeOpenAiBaseUrl(cfg.baseUrl);
const form = new FormData();
form.append('model', model);
form.append('language', 'ru');
form.append('response_format', 'json');
form.append('file', audioBlob, 'voice-input.webm');
const response = await fetch(`${baseUrl}/audio/transcriptions`, {
method: 'POST',
headers: {
Authorization: `Bearer ${apiKey}`,
},
body: form,
});
if (!response.ok) {
const body = await response.text().catch(() => '');
throw new Error(`Ошибка STT API (${response.status}): ${body || 'unknown error'}`);
}
const payload = await response.json();
const text = String(payload?.text || '').trim();
if (!text) throw new Error('Пустой ответ распознавания.');
return text;
}
export function createMicrophoneRecorder() {
const Ctx = window.AudioContext || window.webkitAudioContext;
let stream = null;
let recorder = null;
let startedAtMs = 0;
let chunks = [];
let timerId = 0;
let level = 0;
let analyser = null;
let rafId = 0;
async function start(onTick) {
stream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
recorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
startedAtMs = Date.now();
chunks = [];
recorder.ondataavailable = (event) => {
if (event?.data?.size > 0) chunks.push(event.data);
};
recorder.start(250);
timerId = window.setInterval(() => {
if (typeof onTick === 'function') {
onTick({
elapsedMs: Date.now() - startedAtMs,
level,
});
}
}, 120);
if (Ctx) {
const audioCtx = new Ctx();
const source = audioCtx.createMediaStreamSource(stream);
analyser = audioCtx.createAnalyser();
analyser.fftSize = 256;
source.connect(analyser);
const data = new Uint8Array(analyser.frequencyBinCount);
const read = () => {
if (!analyser) return;
analyser.getByteFrequencyData(data);
let sum = 0;
for (let i = 0; i < data.length; i += 1) sum += data[i];
level = data.length > 0 ? Math.max(0, Math.min(1, (sum / data.length) / 255)) : 0;
rafId = window.requestAnimationFrame(read);
};
read();
}
}
async function stop() {
if (!recorder) return null;
const blob = await new Promise((resolve) => {
recorder.onstop = () => resolve(new Blob(chunks, { type: 'audio/webm' }));
recorder.stop();
});
cleanup();
return blob;
}
function cancel() {
try {
recorder?.stop();
} catch {
// ignore
}
cleanup();
}
function cleanup() {
if (timerId) window.clearInterval(timerId);
if (rafId) window.cancelAnimationFrame(rafId);
timerId = 0;
rafId = 0;
analyser = null;
if (stream) {
stream.getTracks().forEach((track) => {
try {
track.stop();
} catch {
// ignore
}
});
}
stream = null;
recorder = null;
}
return { start, stop, cancel };
}
export async function speakTextBySettings(text, entrySettings) {
const value = String(text || '').trim();
if (!value) return;
const cfg = entrySettings?.tools?.textToSpeech || {};
const provider = String(cfg.provider || 'browser');
if (provider === 'browser') {
const utt = new SpeechSynthesisUtterance(value);
utt.lang = 'ru-RU';
const selected = String(cfg.voice || '').trim();
if (selected) {
const voice = window.speechSynthesis.getVoices().find((v) => v.name === selected);
if (voice) utt.voice = voice;
}
window.speechSynthesis.speak(utt);
return;
}
if (provider === 'piper-http') {
const baseUrl = String(cfg.piperBaseUrl || '').trim().replace(/\/+$/, '');
if (!baseUrl) throw new Error('Не указан адрес Piper HTTP.');
const quality = String(cfg.quality || 'medium').toLowerCase();
const voice = String(cfg.voice || '').trim();
const lengthScale = PIPER_LENGTH_SCALE_BY_QUALITY[quality] || PIPER_LENGTH_SCALE_BY_QUALITY.medium;
const resp = await fetch(`${baseUrl}/api/tts`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text: value,
voice,
quality,
length_scale: lengthScale,
}),
});
if (!resp.ok) throw new Error(`Piper HTTP недоступен (${resp.status}).`);
const blob = await resp.blob();
const audioUrl = URL.createObjectURL(blob);
const audio = new Audio(audioUrl);
await audio.play();
window.setTimeout(() => URL.revokeObjectURL(audioUrl), 30000);
return;
}
if (provider === 'openai') {
const apiKey = String(cfg.apiKey || '').trim();
if (!apiKey) throw new Error('Не заполнен API key для OpenAI TTS.');
const model = String(cfg.model || '').trim() || 'gpt-4o-mini-tts';
const baseUrl = normalizeOpenAiBaseUrl(cfg.externalBaseUrl || cfg.baseUrl || 'https://api.openai.com/v1');
const voice = String(cfg.voice || '').trim() || 'alloy';
const resp = await fetch(`${baseUrl}/audio/speech`, {
method: 'POST',
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model,
voice,
input: value,
format: 'mp3',
}),
});
if (!resp.ok) throw new Error(`OpenAI TTS недоступен (${resp.status}).`);
const blob = await resp.blob();
const audioUrl = URL.createObjectURL(blob);
const audio = new Audio(audioUrl);
await audio.play();
window.setTimeout(() => URL.revokeObjectURL(audioUrl), 30000);
return;
}
throw new Error('Неизвестный провайдер озвучки.');
}