feat: Adjust VAD sensitivity, enable global auto-stop, update docs

This commit is contained in:
michael.borak
2026-01-21 11:09:54 +01:00
parent 79db6adf45
commit b848154942
5 changed files with 110 additions and 58 deletions

View File

@@ -159,10 +159,10 @@ impl AudioProcessor {
let sq_sum: f32 = vad_chunk.iter().map(|x| x * x).sum();
let rms = (sq_sum / vad_chunk.len() as f32).sqrt();
// Hybrid VAD: Probability > 0.8 OR RMS > 0.015
// INCREASED THRESHOLDS (v1.9.0):
// Now that routing works, we must filter out system notifications (beeps) and noise floor.
let is_speech = probability > 0.8 || rms > 0.015;
// Hybrid VAD: Probability > 0.9 OR RMS > 0.025
// INCREASED THRESHOLDS (v1.1.1):
// Reduced sensitivity to avoid background noise triggering recording.
let is_speech = probability > 0.9 || rms > 0.025;
if is_speech {
self.is_speech_active = true;

View File

@@ -579,23 +579,42 @@ struct AudioMetadata {
format: String,
}
// Helper to find ffmpeg/ffprobe in common paths
fn resolve_binary_path(binary_name: &str) -> String {
let common_paths = [
format!("/opt/homebrew/bin/{}", binary_name),
format!("/usr/local/bin/{}", binary_name),
format!("/usr/bin/{}", binary_name),
];
for path in common_paths.iter() {
if std::path::Path::new(path).exists() {
return path.clone();
}
}
// Fallback to expecting it in PATH
binary_name.to_string()
}
#[tauri::command]
fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata, String> {
emit_log(&app, "INFO", &format!("Getting metadata for: {}", file_path));
// Get file size
let metadata = std::fs::metadata(&file_path).map_err(|e| e.to_string())?;
let size = metadata.len();
// Extract format from extension
let path = std::path::Path::new(&file_path);
let format = path.extension()
.and_then(|e| e.to_str())
.unwrap_or("unknown")
.to_string();
if !path.exists() {
return Err(format!("File not found: {}", file_path));
}
let size = std::fs::metadata(&file_path)
.map_err(|e| e.to_string())?
.len();
// Use ffprobe to get duration
// Try resolved path first
let ffprobe_cmd = resolve_binary_path("ffprobe");
// Get duration using ffprobe (requires ffmpeg to be installed)
let duration = match Command::new("ffprobe")
let output = Command::new(&ffprobe_cmd)
.args([
"-v", "error",
"-show_entries", "format=duration",
@@ -603,24 +622,17 @@ fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata
&file_path
])
.output()
{
Ok(output) => {
if output.status.success() {
let duration_str = String::from_utf8_lossy(&output.stdout);
duration_str.trim().parse::<f64>().unwrap_or(0.0)
} else {
emit_log(&app, "WARN", "ffprobe failed, duration = 0");
0.0
}
},
Err(_) => {
emit_log(&app, "WARN", "ffprobe not found, duration = 0");
0.0
}
};
emit_log(&app, "SUCCESS", &format!("Metadata: {}s, {} bytes", duration, size));
.map_err(|e| format!("Failed to execute ffprobe at '{}': {}", ffprobe_cmd, e))?;
let duration_str = String::from_utf8_lossy(&output.stdout);
let duration: f64 = duration_str.trim().parse().unwrap_or(0.0);
// Extension as format
let format = path.extension()
.and_then(|e| e.to_str())
.unwrap_or("unknown")
.to_string();
Ok(AudioMetadata {
duration,
size,
@@ -633,8 +645,9 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
emit_log(&app, "INFO", &format!("Converting to MP3: {}", wav_path));
let mp3_path = wav_path.replace(".wav", ".mp3");
let ffmpeg_cmd = resolve_binary_path("ffmpeg");
let output = Command::new("ffmpeg")
let output = Command::new(&ffmpeg_cmd)
.args([
"-i", &wav_path,
"-codec:a", "libmp3lame",
@@ -643,7 +656,7 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
&mp3_path
])
.output()
.map_err(|e| format!("Failed to execute ffmpeg: {}", e))?;
.map_err(|e| format!("Failed to execute ffmpeg at '{}': {}", ffmpeg_cmd, e))?;
if output.status.success() {
emit_log(&app, "SUCCESS", &format!("MP3 created: {}", mp3_path));
@@ -660,9 +673,11 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
emit_log(&app, "INFO", &format!("Chunking audio: {} ({}min chunks)", file_path, chunk_minutes));
let chunk_seconds = chunk_minutes * 60;
let ffprobe_cmd = resolve_binary_path("ffprobe");
let ffmpeg_cmd = resolve_binary_path("ffmpeg");
// Get total duration using ffprobe
let duration_output = Command::new("ffprobe")
let duration_output = Command::new(&ffprobe_cmd)
.args([
"-v", "error",
"-show_entries", "format=duration",
@@ -670,7 +685,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
&file_path
])
.output()
.map_err(|e| format!("Failed to get duration: {}", e))?;
.map_err(|e| format!("Failed to get duration with '{}': {}", ffprobe_cmd, e))?;
let duration_str = String::from_utf8_lossy(&duration_output.stdout);
let duration: f64 = duration_str.trim().parse()
@@ -686,7 +701,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
let start_time = i as u32 * chunk_seconds;
let chunk_path = format!("{}_chunk_{}.mp3", base_path, i);
let output = Command::new("ffmpeg")
let output = Command::new(&ffmpeg_cmd)
.args([
"-i", &file_path,
"-ss", &start_time.to_string(),
@@ -696,7 +711,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
&chunk_path
])
.output()
.map_err(|e| format!("Failed to create chunk {}: {}", i, e))?;
.map_err(|e| format!("Failed to create chunk {} with '{}': {}", i, ffmpeg_cmd, e))?;
if !output.status.success() {
let error = String::from_utf8_lossy(&output.stderr);