feat: meeting mode auto-start with sustained speech fallback
Meeting mode trigger now has two paths: - Primary: system audio energy > 0.005 (immediate, catches most apps) - Fallback: ~3 seconds sustained speech detection via VAD counter (catches Electron/WebRTC apps like Nextcloud Talk where ScreenCaptureKit may not capture audio) Brief sounds or momentary speech won't trigger - only a real conversation lasting ~3s will activate the fallback. The counter decays during silence to prevent accumulation of brief detections. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -46,6 +46,10 @@ pub struct AudioProcessor {
|
||||
|
||||
// Recording Mode (voice or meeting)
|
||||
recording_mode: String,
|
||||
|
||||
// Meeting mode: consecutive speech detection counter.
|
||||
// Prevents false triggers from brief sounds; requires sustained speech.
|
||||
meeting_speech_frames: u32,
|
||||
}
|
||||
|
||||
impl AudioProcessor {
|
||||
@@ -106,6 +110,7 @@ impl AudioProcessor {
|
||||
last_event_time: std::time::Instant::now(),
|
||||
system_queue: Arc::new(Mutex::new(std::collections::VecDeque::new())),
|
||||
recording_mode,
|
||||
meeting_speech_frames: 0,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -247,29 +252,44 @@ impl AudioProcessor {
|
||||
if self.last_event_time.elapsed().as_millis() > 2000 && self.recording_mode == "meeting" {
|
||||
if let Some(app) = &self.app_handle {
|
||||
emit_log(app, "DEBUG", &format!(
|
||||
"Waiting for Meeting... SysEnergy: {:.4} (thr: 0.005), VAD Speech: {} | SysQueue empty: {}",
|
||||
max_system_energy, self.is_speech_active,
|
||||
"Waiting... SysEnergy: {:.4} (thr: 0.005), Speech: {}, SpeechFrames: {}/90 | SysQ empty: {}",
|
||||
max_system_energy, self.is_speech_active, self.meeting_speech_frames,
|
||||
if let Ok(q) = self.system_queue.lock() { q.is_empty() } else { true }
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// MODE-SPECIFIC TRIGGER LOGIC:
|
||||
// "voice" -> Trigger if user speaks (VAD)
|
||||
// "meeting" -> Trigger ONLY on system audio energy above threshold.
|
||||
// Speech alone NEVER triggers meeting mode to prevent
|
||||
// false starts when user speaks near mic without a call.
|
||||
// The threshold (0.005) is low enough to catch ringtones,
|
||||
// call audio, and notification sounds from any app.
|
||||
// "voice" -> Trigger if user speaks (VAD) - immediate
|
||||
// "meeting" -> Primary: system audio energy above threshold (immediate)
|
||||
// Fallback: sustained speech for ~3 seconds (catches apps like
|
||||
// Nextcloud Talk where ScreenCaptureKit may not capture audio).
|
||||
// Brief speech resets the counter to prevent false triggers.
|
||||
//
|
||||
// The sustained speech counter uses VAD chunks (~32ms each at 16kHz/512).
|
||||
// ~90 consecutive chunks ≈ ~3 seconds of sustained speech.
|
||||
const MEETING_SPEECH_THRESHOLD: u32 = 90;
|
||||
|
||||
if self.recording_mode == "meeting" {
|
||||
if self.is_speech_active || system_active {
|
||||
self.meeting_speech_frames += 1;
|
||||
} else {
|
||||
// Reset if silence detected - prevents brief sounds from accumulating
|
||||
self.meeting_speech_frames = self.meeting_speech_frames.saturating_sub(3);
|
||||
}
|
||||
}
|
||||
|
||||
let trigger = if self.recording_mode == "voice" {
|
||||
self.is_speech_active
|
||||
} else {
|
||||
system_active
|
||||
// Immediate trigger on system audio energy
|
||||
// OR sustained speech (~3s) as fallback for Electron/WebRTC apps
|
||||
system_active || self.meeting_speech_frames >= MEETING_SPEECH_THRESHOLD
|
||||
};
|
||||
|
||||
if trigger {
|
||||
// Trigger Detected!
|
||||
println!("Auto-Start: Trigger! (Mode: {}, SysEnergy: {:.4}, Speech: {})", self.recording_mode, max_system_energy, self.is_speech_active);
|
||||
let reason = if system_active { "SystemAudio" } else { "SustainedSpeech" };
|
||||
println!("Auto-Start: Trigger! (Mode: {}, Reason: {}, SysEnergy: {:.4}, SpeechFrames: {})", self.recording_mode, reason, max_system_energy, self.meeting_speech_frames);
|
||||
self.waiting_for_speech = false;
|
||||
|
||||
// Flush Ring Buffer (Orderly: from ring_pos to end, then 0 to ring_pos)
|
||||
|
||||
Reference in New Issue
Block a user