BMAD-METHOD/.claude/hooks/play-tts-piper.sh

339 lines
12 KiB
Bash
Executable File

#!/bin/bash
#
# File: .claude/hooks/play-tts-piper.sh
#
# AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
# Website: https://agentvibes.org
# Repository: https://github.com/paulpreibisch/AgentVibes
#
# Co-created by Paul Preibisch with Claude AI
# Copyright (c) 2025 Paul Preibisch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
# express or implied. Use at your own risk. See the Apache License for details.
#
# ---
#
# @fileoverview Piper TTS Provider Implementation - Free, offline neural TTS
# @context Provides local, privacy-first TTS alternative to cloud services for WSL/Linux
# @architecture Implements provider interface contract for Piper binary integration
# @dependencies piper (pipx), piper-voice-manager.sh, mpv/aplay, ffmpeg (optional padding)
# @entrypoints Called by play-tts.sh router when provider=piper
# @patterns Provider contract: text/voice → audio file path, voice auto-download, language-aware synthesis
# @related play-tts.sh, piper-voice-manager.sh, language-manager.sh, GitHub Issue #25
#
# Fix locale warnings
export LC_ALL=C
TEXT="$1"
VOICE_OVERRIDE="$2" # Optional: voice model name
# Source voice manager and language manager
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/piper-voice-manager.sh"
source "$SCRIPT_DIR/language-manager.sh"
# Default voice for Piper
DEFAULT_VOICE="en_US-lessac-medium"
# @function determine_voice_model
# @intent Resolve voice name to Piper model name with language support
# @why Support voice override, language-specific voices, and default fallback
# @param Uses global: $VOICE_OVERRIDE
# @returns Sets $VOICE_MODEL global variable
# @sideeffects None
VOICE_MODEL=""
# Get current language setting
CURRENT_LANGUAGE=$(get_language_code)
if [[ -n "$VOICE_OVERRIDE" ]]; then
# Use override if provided
VOICE_MODEL="$VOICE_OVERRIDE"
echo "🎤 Using voice: $VOICE_OVERRIDE (session-specific)"
else
# Try to get voice from voice file (check CLAUDE_PROJECT_DIR first for MCP context)
VOICE_FILE=""
# Priority order:
# 1. CLAUDE_PROJECT_DIR env var (set by MCP for project-specific settings)
# 2. Script location (for direct slash command usage)
# 3. Global ~/.claude (fallback)
if [[ -n "$CLAUDE_PROJECT_DIR" ]] && [[ -f "$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt" ]]; then
# MCP context: Use the project directory where MCP was invoked
VOICE_FILE="$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt"
elif [[ -f "$SCRIPT_DIR/../tts-voice.txt" ]]; then
# Direct usage: Use script location
VOICE_FILE="$SCRIPT_DIR/../tts-voice.txt"
elif [[ -f "$HOME/.claude/tts-voice.txt" ]]; then
# Fallback: Use global
VOICE_FILE="$HOME/.claude/tts-voice.txt"
fi
if [[ -n "$VOICE_FILE" ]]; then
FILE_VOICE=$(cat "$VOICE_FILE" 2>/dev/null)
# Check for multi-speaker voice (model + speaker ID stored separately)
# Use same directory as VOICE_FILE for consistency
VOICE_DIR=$(dirname "$VOICE_FILE")
MODEL_FILE="$VOICE_DIR/tts-piper-model.txt"
SPEAKER_ID_FILE="$VOICE_DIR/tts-piper-speaker-id.txt"
if [[ -f "$MODEL_FILE" ]] && [[ -f "$SPEAKER_ID_FILE" ]]; then
# Multi-speaker voice
VOICE_MODEL=$(cat "$MODEL_FILE" 2>/dev/null)
SPEAKER_ID=$(cat "$SPEAKER_ID_FILE" 2>/dev/null)
echo "🎭 Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: $SPEAKER_ID)"
# Check if it's a standard Piper model name or custom voice (just use as-is)
elif [[ -n "$FILE_VOICE" ]]; then
VOICE_MODEL="$FILE_VOICE"
fi
fi
# If no Piper voice from file, try language-specific voice
if [[ -z "$VOICE_MODEL" ]]; then
LANG_VOICE=$(get_voice_for_language "$CURRENT_LANGUAGE" "piper" 2>/dev/null)
if [[ -n "$LANG_VOICE" ]]; then
VOICE_MODEL="$LANG_VOICE"
echo "🌍 Using $CURRENT_LANGUAGE voice: $LANG_VOICE (Piper)"
else
# Use default voice
VOICE_MODEL="$DEFAULT_VOICE"
fi
fi
fi
# @function validate_inputs
# @intent Check required parameters
# @why Fail fast with clear errors if inputs missing
# @exitcode 1=missing text, 2=missing piper binary
if [[ -z "$TEXT" ]]; then
echo "Usage: $0 \"text to speak\" [voice_model_name]"
exit 1
fi
# Check if Piper is installed
if ! command -v piper &> /dev/null; then
echo "❌ Error: Piper TTS not installed"
echo "Install with: pipx install piper-tts"
echo "Or run: .claude/hooks/piper-installer.sh"
exit 2
fi
# @function ensure_voice_downloaded
# @intent Download voice model if not cached
# @why Provide seamless experience with automatic downloads
# @param Uses global: $VOICE_MODEL
# @sideeffects Downloads voice model files
# @edgecases Prompts user for consent before downloading
if ! verify_voice "$VOICE_MODEL"; then
echo "📥 Voice model not found: $VOICE_MODEL"
echo " File size: ~25MB"
echo " Preview: https://huggingface.co/rhasspy/piper-voices"
echo ""
read -p " Download this voice model? [y/N]: " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
if ! download_voice "$VOICE_MODEL"; then
echo "❌ Failed to download voice model"
echo "Fix: Download manually or choose different voice"
exit 3
fi
else
echo "❌ Voice download cancelled"
exit 3
fi
fi
# Get voice model path
VOICE_PATH=$(get_voice_path "$VOICE_MODEL")
if [[ $? -ne 0 ]]; then
echo "❌ Voice model path not found: $VOICE_MODEL"
exit 3
fi
# @function determine_audio_directory
# @intent Find appropriate directory for audio file storage
# @why Supports project-local and global storage
# @returns Sets $AUDIO_DIR global variable
if [[ -n "$CLAUDE_PROJECT_DIR" ]]; then
AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
else
# Fallback: try to find .claude directory in current path
CURRENT_DIR="$PWD"
while [[ "$CURRENT_DIR" != "/" ]]; do
if [[ -d "$CURRENT_DIR/.claude" ]]; then
AUDIO_DIR="$CURRENT_DIR/.claude/audio"
break
fi
CURRENT_DIR=$(dirname "$CURRENT_DIR")
done
# Final fallback to global if no project .claude found
if [[ -z "$AUDIO_DIR" ]]; then
AUDIO_DIR="$HOME/.claude/audio"
fi
fi
mkdir -p "$AUDIO_DIR"
TEMP_FILE="$AUDIO_DIR/tts-$(date +%s).wav"
# @function get_speech_rate
# @intent Determine speech rate for Piper synthesis
# @why Convert user-facing speed (0.5=slower, 2.0=faster) to Piper length-scale (inverted)
# @returns Piper length-scale value (inverted from user scale)
# @note Piper uses length-scale where higher=slower, opposite of user expectation
get_speech_rate() {
local target_config=""
local main_config=""
# Check for target-specific config first (new and legacy paths)
if [[ -f "$SCRIPT_DIR/../config/tts-target-speech-rate.txt" ]]; then
target_config="$SCRIPT_DIR/../config/tts-target-speech-rate.txt"
elif [[ -f "$HOME/.claude/config/tts-target-speech-rate.txt" ]]; then
target_config="$HOME/.claude/config/tts-target-speech-rate.txt"
elif [[ -f "$SCRIPT_DIR/../config/piper-target-speech-rate.txt" ]]; then
target_config="$SCRIPT_DIR/../config/piper-target-speech-rate.txt"
elif [[ -f "$HOME/.claude/config/piper-target-speech-rate.txt" ]]; then
target_config="$HOME/.claude/config/piper-target-speech-rate.txt"
fi
# Check for main config (new and legacy paths)
if [[ -f "$SCRIPT_DIR/../config/tts-speech-rate.txt" ]]; then
main_config="$SCRIPT_DIR/../config/tts-speech-rate.txt"
elif [[ -f "$HOME/.claude/config/tts-speech-rate.txt" ]]; then
main_config="$HOME/.claude/config/tts-speech-rate.txt"
elif [[ -f "$SCRIPT_DIR/../config/piper-speech-rate.txt" ]]; then
main_config="$SCRIPT_DIR/../config/piper-speech-rate.txt"
elif [[ -f "$HOME/.claude/config/piper-speech-rate.txt" ]]; then
main_config="$HOME/.claude/config/piper-speech-rate.txt"
fi
# If this is a non-English voice and target config exists, use it
if [[ "$CURRENT_LANGUAGE" != "english" ]] && [[ -n "$target_config" ]]; then
local user_speed=$(cat "$target_config" 2>/dev/null)
# Convert user speed to Piper length-scale (invert)
# User: 0.5=slower, 1.0=normal, 2.0=faster
# Piper: 2.0=slower, 1.0=normal, 0.5=faster
# Formula: piper_length_scale = 1.0 / user_speed
echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
return
fi
# Otherwise use main config if available
if [[ -n "$main_config" ]]; then
local user_speed=$(grep -v '^#' "$main_config" 2>/dev/null | grep -v '^$' | tail -1)
echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
return
fi
# Default: 1.0 (normal) for English, 2.0 (slower) for learning
if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
echo "2.0"
else
echo "1.0"
fi
}
SPEECH_RATE=$(get_speech_rate)
# @function synthesize_with_piper
# @intent Generate speech using Piper TTS
# @why Provides free, offline TTS alternative
# @param Uses globals: $TEXT, $VOICE_PATH, $SPEECH_RATE, $SPEAKER_ID (optional)
# @returns Creates WAV file at $TEMP_FILE
# @exitcode 0=success, 4=synthesis error
# @sideeffects Creates audio file
# @edgecases Handles piper errors, invalid models, multi-speaker voices
if [[ -n "$SPEAKER_ID" ]]; then
# Multi-speaker voice: Pass speaker ID
echo "$TEXT" | piper --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --output_file "$TEMP_FILE" 2>/dev/null
else
# Single-speaker voice
echo "$TEXT" | piper --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --output_file "$TEMP_FILE" 2>/dev/null
fi
if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
echo "❌ Failed to synthesize speech with Piper"
echo "Voice model: $VOICE_MODEL"
echo "Check that voice model is valid"
exit 4
fi
# @function add_silence_padding
# @intent Add silence to prevent WSL audio static
# @why WSL audio subsystem cuts off first ~200ms
# @param Uses global: $TEMP_FILE
# @returns Updates $TEMP_FILE to padded version
# @sideeffects Modifies audio file
# AI NOTE: Use ffmpeg if available, otherwise skip padding (degraded experience)
if command -v ffmpeg &> /dev/null; then
PADDED_FILE="$AUDIO_DIR/tts-padded-$(date +%s).wav"
# Add 200ms of silence at the beginning
ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
-filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
-map "[out]" -y "$PADDED_FILE" 2>/dev/null
if [[ -f "$PADDED_FILE" ]]; then
rm -f "$TEMP_FILE"
TEMP_FILE="$PADDED_FILE"
fi
fi
# @function play_audio
# @intent Play generated audio using available player with sequential playback
# @why Support multiple audio players and prevent overlapping audio in learning mode
# @param Uses global: $TEMP_FILE, $CURRENT_LANGUAGE
# @sideeffects Plays audio with lock mechanism for sequential playback
LOCK_FILE="/tmp/agentvibes-audio.lock"
# Wait for previous audio to finish (max 30 seconds)
for i in {1..60}; do
if [ ! -f "$LOCK_FILE" ]; then
break
fi
sleep 0.5
done
# Track last target language audio for replay command
if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
TARGET_AUDIO_FILE="${CLAUDE_PROJECT_DIR:-.}/.claude/last-target-audio.txt"
echo "$TEMP_FILE" > "$TARGET_AUDIO_FILE"
fi
# Create lock and play audio
touch "$LOCK_FILE"
# Get audio duration for proper lock timing
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null)
DURATION=${DURATION%.*} # Round to integer
DURATION=${DURATION:-1} # Default to 1 second if detection fails
# Play audio in background (skip if in test mode)
if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]]; then
(mpv "$TEMP_FILE" || aplay "$TEMP_FILE" || paplay "$TEMP_FILE") >/dev/null 2>&1 &
PLAYER_PID=$!
fi
# Wait for audio to finish, then release lock
(sleep $DURATION; rm -f "$LOCK_FILE") &
disown
echo "🎵 Saved to: $TEMP_FILE"
echo "🎤 Voice used: $VOICE_MODEL (Piper TTS)"