// VoiceOutput — speaker button that reads text aloud via the browser's // SpeechSynthesis API. Same shape as main project's voice-output.tsx, // ported to the MVP's no-Tailwind / no-framer-motion stack. // // Props: // text — string to speak. Markdown is stripped before TTS. // autoPlay? — speak the text once when it first appears (used for the // voice-in → voice-out chat flow). const _VOIcon = ({ d, size = 14 }) => ( ); const IconSpeaker = (p) => <_VOIcon {...p} d={<>}/>; const IconStop2 = (p) => <_VOIcon {...p} d={}/>; // Strip markdown so speech sounds natural (no "asterisk asterisk bold"). function _stripMd(md) { return String(md || '') .replace(/```[\s\S]*?```/g, ' ') .replace(/`([^`]+)`/g, '$1') .replace(/!\[[^\]]*\]$[^)]+$/g, ' ') .replace(/\[([^\]]+)\]$[^)]+$/g, '$1') .replace(/^#{1,6}\s+/gm, '') .replace(/\*\*([^*]+)\*\*/g, '$1') .replace(/\*([^*]+)\*/g, '$1') .replace(/__([^_]+)__/g, '$1') .replace(/_([^_]+)_/g, '$1') .replace(/^\s*[-*+]\s+/gm, '') .replace(/^\s*\d+\.\s+/gm, '') .replace(/\|/g, ' ') .replace(/---+/g, ' ') .replace(/\s+/g, ' ') .trim(); } // Pick the best installed voice for a BCP-47 code (exact match first, then // base language, e.g. "ta-IN" → fall back to any "ta-*" voice). function _pickVoice(code) { if (typeof window === 'undefined' || !window.speechSynthesis) return null; const voices = window.speechSynthesis.getVoices(); if (!voices.length) return null; const exact = voices.find(v => v.lang && v.lang.toLowerCase() === code.toLowerCase()); if (exact) return exact; const base = code.split('-')[0].toLowerCase(); return voices.find(v => v.lang && v.lang.toLowerCase().startsWith(base)) || null; } const VoiceOutput = ({ text, autoPlay = false }) => { const lang = window.useLanguage(); const t = lang.t; const [speaking, setSpeaking] = React.useState(false); const utterRef = React.useRef(null); const autoPlayedRef = React.useRef(false); const supported = typeof window !== 'undefined' && 'speechSynthesis' in window; // Voices load asynchronously in some browsers (Chrome). Force load on mount // and listen for the voiceschanged event. React.useEffect(() => { if (!supported) return; window.speechSynthesis.getVoices(); // trigger const onChange = () => window.speechSynthesis.getVoices(); window.speechSynthesis.onvoiceschanged = onChange; return () => { try { window.speechSynthesis.onvoiceschanged = null; } catch {} }; }, [supported]); const stop = () => { if (!supported) return; window.speechSynthesis.cancel(); setSpeaking(false); utterRef.current = null; }; const speak = React.useCallback(() => { if (!supported) return; const clean = _stripMd(text); if (!clean) return; window.speechSynthesis.cancel(); // never overlap const code = (lang.current && lang.current.speechCode) || 'en-US'; const utter = new SpeechSynthesisUtterance(clean); utter.lang = code; const v = _pickVoice(code); if (v) utter.voice = v; utter.rate = 1.0; utter.pitch = 1.0; utter.onstart = () => setSpeaking(true); utter.onend = () => { setSpeaking(false); utterRef.current = null; }; utter.onerror = () => { setSpeaking(false); utterRef.current = null; }; utterRef.current = utter; if (!window.speechSynthesis.getVoices().length) { // Voices not loaded yet — wait briefly then speak. setTimeout(() => { const v2 = _pickVoice(code); if (v2) utter.voice = v2; window.speechSynthesis.speak(utter); }, 250); return; } window.speechSynthesis.speak(utter); }, [text, lang, supported]); const toggle = () => { if (speaking) stop(); else speak(); }; // Auto-play once per text change when enabled. React.useEffect(() => { if (autoPlay && !autoPlayedRef.current && text) { autoPlayedRef.current = true; const id = setTimeout(speak, 200); return () => clearTimeout(id); } }, [autoPlay, text, speak]); // Cancel speech on unmount. React.useEffect(() => () => { if (utterRef.current && supported) { try { window.speechSynthesis.cancel(); } catch {} } }, [supported]); if (!supported) return null; return ( ); }; window.VoiceOutput = VoiceOutput;