using System;using Agent.Core.Logging;using KokoroSharp;using KokoroSharp.Core;namespace Agent.Core.TextToSpeech;/// <summary>/// Local text-to-speech via <a href="https://github.com/Lyrcaxis/KokoroSharp/">KokoroSharp</a> (ONNX Kokoro 82M)./// </summary>public sealed class KokoroTts{ private readonly Logger _logger; private readonly object _gate = new(); private KokoroTTS? _model; private KokoroVoice? _voice; public string VoiceId { get; private set; } public bool WaitForTts { get; } public bool IsReady { get { lock (_gate) { return _model is not null && _voice is not null; } } } public KokoroTts(Logger logger, KokoroTtsOptions? options = null) { _logger = logger ?? throw new ArgumentNullException(nameof(logger)); VoiceId = options?.VoiceId ?? "af_heart"; WaitForTts = options?.WaitForTts ?? false; } /// <summary>Load model and voice (may download ~320MB on first run).</summary> public static async Task<KokoroTts> CreateAsync( Logger logger, KokoroTtsOptions? options = null, CancellationToken cancellationToken = default) { var service = new KokoroTts(logger, options); await service.InitializeAsync(cancellationToken).ConfigureAwait(false); return service; } public async Task InitializeAsync(CancellationToken cancellationToken = default) { if (IsReady) { return; } await Task.Run( () => { lock (_gate) { if (_model is not null) { return; } _logger.WriteLine( "TTS", "Loading Kokoro model (first run may download ~320MB)...", LogColors.Cyan); _model = KokoroTTS.LoadModel(); _voice = KokoroVoiceManager.GetVoice(VoiceId) ?? throw new InvalidOperationException($"Kokoro voice '{VoiceId}' was not found."); _logger.WriteLine("TTS", $"Kokoro ready (voice: {VoiceId}).", LogColors.Cyan); } }, cancellationToken).ConfigureAwait(false); } public void SetVoice(string voiceId) { lock (_gate) { var newVoice = KokoroVoiceManager.GetVoice(voiceId); if (newVoice is null) { throw new InvalidOperationException($"Kokoro voice '{voiceId}' was not found."); } _voice = newVoice; VoiceId = voiceId; } } /// <summary>Speak text using KokoroSharp playback queue.</summary> public async Task SpeakAsync(string text, CancellationToken cancellationToken = default) { if (string.IsNullOrWhiteSpace(text)) { return; } if (!IsReady) { await InitializeAsync(cancellationToken).ConfigureAwait(false); } string spoken = KokoroTtsSanitizer.Sanitize(text); if (spoken.Length == 0) { return; } // Start playback under lock SynthesisHandle? handle = null; bool useSimulation = Environment.GetEnvironmentVariable("LILITH_TESTER") == "1"; if (!useSimulation) { try { lock (_gate) { if (_model is null || _voice is null) { throw new InvalidOperationException("Kokoro TTS is not initialized."); } handle = _model.SpeakFast(spoken, _voice); } } catch (Exception ex) { _logger.WriteLine("TTS", $"Playback initialization failed: {ex.Message}. Falling back to simulation.", LogColors.Orange); useSimulation = true; } } if (WaitForTts) { var tcs = new TaskCompletionSource<bool>(); int pct = 0; using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); if (useSimulation) { // Simulate speaking at a speed of ~50 characters per second (min 1s, max 3s total to keep tests fast) int totalDurationMs = Math.Clamp(spoken.Length * 20, 1000, 3000); _ = Task.Run(async () => { int steps = 10; int stepDurationMs = totalDurationMs / steps; for (int i = 1; i <= steps; i++) { if (cts.Token.IsCancellationRequested) { tcs.TrySetResult(false); return; } pct = i * 10; await Task.Delay(stepDurationMs).ConfigureAwait(false); } tcs.TrySetResult(true); }); } else if (handle != null) { handle.OnSpeechProgressed += p => { pct = spoken.Length > 0 ? (int)Math.Round((double)p.SpokenText_BestGuess.Length / spoken.Length * 100) : 0; }; handle.OnSpeechCompleted += _ => tcs.TrySetResult(true); handle.OnSpeechCanceled += _ => tcs.TrySetResult(false); } // Compute safe timeout: minimum 5s, +1s per 15 chars, safety factor of 3 int timeoutMs = (int)Math.Max(5000, ((spoken.Length / 15.0) + 5.0) * 3.0 * 1000.0); cts.CancelAfter(timeoutMs); var spinner = new[] { "|", "/", "-", "\\" }; int spinIdx = 0; try { while (!tcs.Task.IsCompleted && !cts.Token.IsCancellationRequested) { var spinChar = spinner[spinIdx % spinner.Length]; spinIdx++; _logger.WriteProgress("TTS", $"Speaking... [{spinChar}] {pct}%"); await Task.WhenAny(tcs.Task, Task.Delay(100, cts.Token)).ConfigureAwait(false); } } catch (OperationCanceledException) when (cts.IsCancellationRequested && !cancellationToken.IsCancellationRequested) { // Timeout occurred } if (cancellationToken.IsCancellationRequested) { _logger.WriteLine("TTS", "[Cancelled] Speech was interrupted.", LogColors.Orange); } else if (tcs.Task.IsCompletedSuccessfully && tcs.Task.Result) { _logger.WriteProgress("TTS", "Completed [100%]"); _logger.WriteLine("TTS", "", LogColors.Green); } else if (cts.IsCancellationRequested && !tcs.Task.IsCompleted) { _logger.WriteLine("TTS", "[Timeout] Speech completion timed out (possibly no audio device).", LogColors.Orange); } else { _logger.WriteLine("TTS", "[Cancelled] Speech was interrupted.", LogColors.Orange); } } }}
Documentation
KokoroTts
Local Kokoro text-to-speech engine.
KokoroTts.cs
KokoroTts— loads the Kokoro ONNX model and speaks sanitized text (default voiceaf_heart).