feat: meeting mode auto-start with sustained speech fallback

Meeting mode trigger now has two paths: - Primary: system audio energy > 0.005 (immediate, catches most apps) - Fallback: ~3 seconds sustained speech detection via VAD counter (catches Electron/WebRTC apps like Nextcloud Talk where ScreenCaptureKit may not capture audio) Brief sounds or momentary speech won't trigger - only a real conversation lasting ~3s will activate the fallback. The counter decays during silence to prevent accumulation of brief detections. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 13:13:31 +01:00
parent 2c93afce9e
commit 6584136cb0
1 changed files with 31 additions and 11 deletions
--- a/src-tauri/src/audio_processor.rs
+++ b/src-tauri/src/audio_processor.rs
@@ -46,6 +46,10 @@ pub struct AudioProcessor {
    // Recording Mode (voice or meeting)
    recording_mode: String,
    // Meeting mode: consecutive speech detection counter.
    // Prevents false triggers from brief sounds; requires sustained speech.
    meeting_speech_frames: u32,
 }
 impl AudioProcessor {
@@ -106,6 +110,7 @@ impl AudioProcessor {
            last_event_time: std::time::Instant::now(),
            system_queue: Arc::new(Mutex::new(std::collections::VecDeque::new())),
            recording_mode,
            meeting_speech_frames: 0,
        })
    }
@@ -247,29 +252,44 @@ impl AudioProcessor {
            if self.last_event_time.elapsed().as_millis() > 2000 && self.recording_mode == "meeting" {
                if let Some(app) = &self.app_handle {
                    emit_log(app, "DEBUG", &format!(
-                        "Waiting for Meeting... SysEnergy: {:.4} (thr: 0.005), VAD Speech: {} | SysQueue empty: {}",
+                        "Waiting... SysEnergy: {:.4} (thr: 0.005), Speech: {}, SpeechFrames: {}/90 | SysQ empty: {}",
-                        max_system_energy, self.is_speech_active,
+                        max_system_energy, self.is_speech_active, self.meeting_speech_frames,
                        if let Ok(q) = self.system_queue.lock() { q.is_empty() } else { true }
                    ));
                }
            }
            // MODE-SPECIFIC TRIGGER LOGIC:
-            // "voice"   -> Trigger if user speaks (VAD)
+            // "voice"   -> Trigger if user speaks (VAD) - immediate
-            // "meeting" -> Trigger ONLY on system audio energy above threshold.
+            // "meeting" -> Primary: system audio energy above threshold (immediate)
-            //              Speech alone NEVER triggers meeting mode to prevent
+            //              Fallback: sustained speech for ~3 seconds (catches apps like
-            //              false starts when user speaks near mic without a call.
+            //              Nextcloud Talk where ScreenCaptureKit may not capture audio).
-            //              The threshold (0.005) is low enough to catch ringtones,
+            //              Brief speech resets the counter to prevent false triggers.
-            //              call audio, and notification sounds from any app.
+            //
            // The sustained speech counter uses VAD chunks (~32ms each at 16kHz/512).
            // ~90 consecutive chunks ≈ ~3 seconds of sustained speech.
            const MEETING_SPEECH_THRESHOLD: u32 = 90;
            if self.recording_mode == "meeting" {
                if self.is_speech_active || system_active {
                    self.meeting_speech_frames += 1;
                } else {
                    // Reset if silence detected - prevents brief sounds from accumulating
                    self.meeting_speech_frames = self.meeting_speech_frames.saturating_sub(3);
                }
            }
            let trigger = if self.recording_mode == "voice" {
                self.is_speech_active
            } else {
-                system_active
+                // Immediate trigger on system audio energy
                // OR sustained speech (~3s) as fallback for Electron/WebRTC apps
                system_active || self.meeting_speech_frames >= MEETING_SPEECH_THRESHOLD
            };
            if trigger {
-                // Trigger Detected!
+                let reason = if system_active { "SystemAudio" } else { "SustainedSpeech" };
-                println!("Auto-Start: Trigger! (Mode: {}, SysEnergy: {:.4}, Speech: {})", self.recording_mode, max_system_energy, self.is_speech_active);
+                println!("Auto-Start: Trigger! (Mode: {}, Reason: {}, SysEnergy: {:.4}, SpeechFrames: {})", self.recording_mode, reason, max_system_energy, self.meeting_speech_frames);
                self.waiting_for_speech = false;
                // Flush Ring Buffer (Orderly: from ring_pos to end, then 0 to ring_pos)