feat: meeting mode auto-start with sustained speech fallback
Meeting mode trigger now has two paths: - Primary: system audio energy > 0.005 (immediate, catches most apps) - Fallback: ~3 seconds sustained speech detection via VAD counter (catches Electron/WebRTC apps like Nextcloud Talk where ScreenCaptureKit may not capture audio) Brief sounds or momentary speech won't trigger - only a real conversation lasting ~3s will activate the fallback. The counter decays during silence to prevent accumulation of brief detections. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -46,6 +46,10 @@ pub struct AudioProcessor {
|
|||||||
|
|
||||||
// Recording Mode (voice or meeting)
|
// Recording Mode (voice or meeting)
|
||||||
recording_mode: String,
|
recording_mode: String,
|
||||||
|
|
||||||
|
// Meeting mode: consecutive speech detection counter.
|
||||||
|
// Prevents false triggers from brief sounds; requires sustained speech.
|
||||||
|
meeting_speech_frames: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AudioProcessor {
|
impl AudioProcessor {
|
||||||
@@ -106,6 +110,7 @@ impl AudioProcessor {
|
|||||||
last_event_time: std::time::Instant::now(),
|
last_event_time: std::time::Instant::now(),
|
||||||
system_queue: Arc::new(Mutex::new(std::collections::VecDeque::new())),
|
system_queue: Arc::new(Mutex::new(std::collections::VecDeque::new())),
|
||||||
recording_mode,
|
recording_mode,
|
||||||
|
meeting_speech_frames: 0,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -247,29 +252,44 @@ impl AudioProcessor {
|
|||||||
if self.last_event_time.elapsed().as_millis() > 2000 && self.recording_mode == "meeting" {
|
if self.last_event_time.elapsed().as_millis() > 2000 && self.recording_mode == "meeting" {
|
||||||
if let Some(app) = &self.app_handle {
|
if let Some(app) = &self.app_handle {
|
||||||
emit_log(app, "DEBUG", &format!(
|
emit_log(app, "DEBUG", &format!(
|
||||||
"Waiting for Meeting... SysEnergy: {:.4} (thr: 0.005), VAD Speech: {} | SysQueue empty: {}",
|
"Waiting... SysEnergy: {:.4} (thr: 0.005), Speech: {}, SpeechFrames: {}/90 | SysQ empty: {}",
|
||||||
max_system_energy, self.is_speech_active,
|
max_system_energy, self.is_speech_active, self.meeting_speech_frames,
|
||||||
if let Ok(q) = self.system_queue.lock() { q.is_empty() } else { true }
|
if let Ok(q) = self.system_queue.lock() { q.is_empty() } else { true }
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MODE-SPECIFIC TRIGGER LOGIC:
|
// MODE-SPECIFIC TRIGGER LOGIC:
|
||||||
// "voice" -> Trigger if user speaks (VAD)
|
// "voice" -> Trigger if user speaks (VAD) - immediate
|
||||||
// "meeting" -> Trigger ONLY on system audio energy above threshold.
|
// "meeting" -> Primary: system audio energy above threshold (immediate)
|
||||||
// Speech alone NEVER triggers meeting mode to prevent
|
// Fallback: sustained speech for ~3 seconds (catches apps like
|
||||||
// false starts when user speaks near mic without a call.
|
// Nextcloud Talk where ScreenCaptureKit may not capture audio).
|
||||||
// The threshold (0.005) is low enough to catch ringtones,
|
// Brief speech resets the counter to prevent false triggers.
|
||||||
// call audio, and notification sounds from any app.
|
//
|
||||||
|
// The sustained speech counter uses VAD chunks (~32ms each at 16kHz/512).
|
||||||
|
// ~90 consecutive chunks ≈ ~3 seconds of sustained speech.
|
||||||
|
const MEETING_SPEECH_THRESHOLD: u32 = 90;
|
||||||
|
|
||||||
|
if self.recording_mode == "meeting" {
|
||||||
|
if self.is_speech_active || system_active {
|
||||||
|
self.meeting_speech_frames += 1;
|
||||||
|
} else {
|
||||||
|
// Reset if silence detected - prevents brief sounds from accumulating
|
||||||
|
self.meeting_speech_frames = self.meeting_speech_frames.saturating_sub(3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let trigger = if self.recording_mode == "voice" {
|
let trigger = if self.recording_mode == "voice" {
|
||||||
self.is_speech_active
|
self.is_speech_active
|
||||||
} else {
|
} else {
|
||||||
system_active
|
// Immediate trigger on system audio energy
|
||||||
|
// OR sustained speech (~3s) as fallback for Electron/WebRTC apps
|
||||||
|
system_active || self.meeting_speech_frames >= MEETING_SPEECH_THRESHOLD
|
||||||
};
|
};
|
||||||
|
|
||||||
if trigger {
|
if trigger {
|
||||||
// Trigger Detected!
|
let reason = if system_active { "SystemAudio" } else { "SustainedSpeech" };
|
||||||
println!("Auto-Start: Trigger! (Mode: {}, SysEnergy: {:.4}, Speech: {})", self.recording_mode, max_system_energy, self.is_speech_active);
|
println!("Auto-Start: Trigger! (Mode: {}, Reason: {}, SysEnergy: {:.4}, SpeechFrames: {})", self.recording_mode, reason, max_system_energy, self.meeting_speech_frames);
|
||||||
self.waiting_for_speech = false;
|
self.waiting_for_speech = false;
|
||||||
|
|
||||||
// Flush Ring Buffer (Orderly: from ring_pos to end, then 0 to ring_pos)
|
// Flush Ring Buffer (Orderly: from ring_pos to end, then 0 to ring_pos)
|
||||||
|
|||||||
Reference in New Issue
Block a user