diff --git a/README.md b/README.md index 47f7fdd..2478a34 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ * **Noise Filtering**: Ignores typing and background noise. * **🛡️ Empty Audio Guard**: Automatically discards silent recordings (e.g., false triggers) to save API costs and prevent errors. * **✨ 48kHz Crystal Clear Audio**: Optimized audio pipeline prevents "robot voice" distortion. +* **💾 Daily Security Backups**: Automatically saves your entire history as a standard JSON file every 24 hours (unencrypted for easy recovery). ## 🚀 Key Features @@ -40,13 +41,23 @@ ### 3. Recording a Meeting 1. **Select Mode**: Choose "Meeting" (captures Mic + System) or "Voice Memo" (Mic only). -2. **Auto-Start (Recommended)**: Check "Auto-start when audio detected". +2. **Auto-Start Logic**: + - **Meeting Mode**: Triggers only when the call actually starts (system audio detected). + - **Voice Memo**: Triggers immediately when you start speaking. 3. **Standby**: Click "Standby (Auto-Start)". The app waits silently. 4. **Join Call**: Join your Teams/Zoom call. -5. **Trigger**: As soon as someone speaks, Hearbit starts recording automatically. -6. **Finish**: When the call ends (silence > 20s), Hearbit stops, transcribes, summarizes, and **goes back to Standby** for the next call. +5. **Trigger**: Hearbit starts recording automatically based on the selected mode. +6. **Finish**: When the call ends (silence > 25s), Hearbit stops, transcribes, summarizes, and **goes back to Standby** for the next call. -### 4. Customizing Prompts +### 4. Optimal Setup (MS Teams/Zoom) +For the best experience without changing any software settings: +* **Hearbit App**: Select your **real microphone** (e.g., "MacBook Mic" or Headset). +* **Teams/Zoom**: Use your standard output (Speakers/Headset). +* *How it works*: Hearbit captures your voice via mic and the other side via macOS System Audio Capture automatically. + +*Note: If you choose "Hearbit Audio" (Aggregate Device) in the app, you MUST set your Teams' speaker output to "Hearbit Audio" as well.* + +### 5. Customizing Prompts You can create custom AI templates in Settings -> Prompts. Example: * **"Sales Call"**: Focus on budget, timeline, and decision makers. * **"Daily Standup"**: Extract blockers and next steps. @@ -63,10 +74,10 @@ If macOS blocks the app because it's not notarized: 3. Enter your password and try again. ### Audio cuts off at the start? -v1.2.0 includes a **3-second buffer**. If this persists, ensure your "Auto-start" threshold isn't too high (though it's currently auto-calibrated). +v1.2.0 includes a **3-second buffer**. The Meeting mode now uses a more sensitive trigger (0.005 energy) to catch even quiet participants. ### "Batch processing failed" -This means the audio was empty or too short. The new **Empty Guard** prevents this in most cases. If it happens, check your microphone selection. +This means the audio was empty or too short. Check the **Logs** tab for detailed error messages from Infomaniak. The most common cause is selecting the wrong input device or a lack of Screen Recording permissions. --- diff --git a/src-tauri/src/audio_processor.rs b/src-tauri/src/audio_processor.rs index a0a6e6f..590b39f 100644 --- a/src-tauri/src/audio_processor.rs +++ b/src-tauri/src/audio_processor.rs @@ -1,5 +1,6 @@ use std::sync::{Arc, Mutex}; use tauri::{AppHandle, Emitter}; +use crate::emit_log; use cpal::Sample; use hound::WavWriter; use rubato::{Resampler, FastFixedIn, PolynomialDegree}; @@ -42,6 +43,9 @@ pub struct AudioProcessor { // System Audio Queue for Mixing pub system_queue: Arc>>, + + // Recording Mode (voice or meeting) + recording_mode: String, } impl AudioProcessor { @@ -50,7 +54,8 @@ impl AudioProcessor { channel_count: u16, writer: Arc>>>, app_handle: AppHandle, - wait_for_speech: bool + wait_for_speech: bool, + recording_mode: String, ) -> Result { let vad_sample_rate = 16000; let vad_chunk_size = 512; @@ -100,30 +105,51 @@ impl AudioProcessor { app_handle: Some(app_handle), last_event_time: std::time::Instant::now(), system_queue: Arc::new(Mutex::new(std::collections::VecDeque::new())), + recording_mode, }) } pub fn process(&mut self, input_data: &[f32]) { // MIXING LOGIC: // We have `input_data` (Microphone). We check `system_queue` for System Audio. - // We mix them: Out = Mic + System. + // System Audio is hardcoded to 2 channels (Stereo) in sc_audio.rs. + // Microphone `self.channel_count` can be 1 (Mono) or 2 (Stereo). + + let mic_channels = self.channel_count as usize; let mut mixed_data = input_data.to_vec(); let mut max_system_energy = 0.0; + let gain_mic = 1.0; + let gain_sys = 0.8; // Slightly lower system audio to prioritize speaker + if let Ok(mut queue) = self.system_queue.lock() { - for i in 0..mixed_data.len() { - if let Some(sys_sample) = queue.pop_front() { - // Track system energy for trigger logic - let abs_sample = sys_sample.abs(); - if abs_sample > max_system_energy { - max_system_energy = abs_sample; - } - - // Simple addition mixing with clamping to avoid clipping - let mixed = mixed_data[i] + sys_sample; - mixed_data[i] = mixed.max(-1.0).min(1.0); - } - } + let frames = mixed_data.len() / mic_channels; + + for f in 0..frames { + // system_queue is always stereo (L, R, L, R...) + if let (Some(l), Some(r)) = (queue.pop_front(), queue.pop_front()) { + let abs_l = l.abs(); + let abs_r = r.abs(); + let current_sys_max = if abs_l > abs_r { abs_l } else { abs_r }; + if current_sys_max > max_system_energy { + max_system_energy = current_sys_max; + } + + if mic_channels == 1 { + // Mic is Mono: Mix System L+R down to Mono + let sys_mono = (l + r) / 2.0; + let mixed = (mixed_data[f] * gain_mic) + (sys_mono * gain_sys); + mixed_data[f] = mixed.max(-1.0).min(1.0); + } else { + // Mic is Stereo: Mix L-to-L and R-to-R + let f_start = f * 2; + let mixed_l = (mixed_data[f_start] * gain_mic) + (l * gain_sys); + let mixed_r = (mixed_data[f_start + 1] * gain_mic) + (r * gain_sys); + mixed_data[f_start] = mixed_l.max(-1.0).min(1.0); + mixed_data[f_start + 1] = mixed_r.max(-1.0).min(1.0); + } + } + } } let data = &mixed_data; @@ -181,11 +207,7 @@ impl AudioProcessor { // Run Detection let probability = self.vad.predict(vad_chunk.clone()); - // Hybrid VAD: Probability > 0.9 OR System Audio Active - // We want to keep recording if there is meaningful audio from the system (Call in progress), - // even if the VAD doesn't strictly classify it as 'speech' (e.g. ringing, laughter, noise). - - let system_is_active = max_system_energy > 0.01; // Same threshold as trigger + let system_is_active = max_system_energy > 0.005; // Lowered to match trigger let is_speech = probability > 0.9; if is_speech || system_is_active { @@ -219,23 +241,23 @@ impl AudioProcessor { // 2. AND System Audio has energy (Meaning audio is coming from the PC, i.e., Call started) // Threshold 0.01 is roughly -40dB, should cover ringtones/speech easily but ignore silence/hiss. - let system_active = max_system_energy > 0.01; + let system_active = max_system_energy > 0.005; - // Special Case: If System Audio acts like a Ringtone (Constant high energy but maybe not VAD speech?) - // We trust VAD for speech. But we also trust "Loud System Sound" = Call. - // If system is consistently loud, it's likely a call. - - // For now, Strict Mode: - // Trigger if: (Speech Detected) AND (System Audio Present) - // This prevents "User talking alone" -> No trigger (System silent). - // This allows "Partner talking" -> Trigger (Speech + System). - - // What about Ringtone? Ringtone has energy but maybe no speech. - // If we want to record the ringtone, we should trigger on `system_active` alone? - // "erst wen der call startet" -> usually ringing. - // Let's be generous: If System Audio is loud (> 0.05), we trigger regardless of VAD. - - let trigger = (self.is_speech_active && system_active) || (max_system_energy > 0.05); + // Periodically log energy to help debug why meeting mode might not start + if self.last_event_time.elapsed().as_millis() > 2000 && self.recording_mode == "meeting" { + if let Some(app) = &self.app_handle { + emit_log(app, "DEBUG", &format!("Waiting for Meeting... Current System Energy: {:.4} (Threshold: 0.005)", max_system_energy)); + } + } + + // MODE-SPECIFIC TRIGGER LOGIC: + // "voice" -> Trigger if user speaks (is_speech_active) + // "meeting" -> Trigger ONLY if system audio energy detected (Call starting) + let trigger = if self.recording_mode == "voice" { + self.is_speech_active + } else { + system_active + }; if trigger { // Trigger Detected! @@ -271,7 +293,13 @@ impl AudioProcessor { // Standard Recording Logic (Active or Hangover) let time_since_speech = self.total_processed_samples.saturating_sub(self.last_speech_time); - if self.is_speech_active || time_since_speech < self.hangover_samples { + // We write to file if: + // 1. VAD thinks someone is speaking (Mic or System) + // 2. OR System audio energy is currently above threshold (Ensures calls are captured) + // 3. OR we are within the hangover period + let system_is_active = max_system_energy > 0.005; + + if self.is_speech_active || system_is_active || time_since_speech < self.hangover_samples { let mut guard = self.writer.lock().unwrap(); for &sample in data { let amplitude = i16::MAX as f32; diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 9f725c2..e3f1c7b 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -37,7 +37,7 @@ struct LogEvent { timestamp: String, } -fn emit_log(app: &AppHandle, level: &str, message: &str) { +pub(crate) fn emit_log(app: &AppHandle, level: &str, message: &str) { let log = LogEvent { level: level.to_string(), message: message.to_string(), @@ -73,8 +73,8 @@ fn get_input_devices() -> Result, String> { #[tauri::command] -async fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String, save_path: Option, custom_filename: Option, wait_for_speech: Option) -> Result<(), String> { - emit_log(&app, "INFO", &format!("Starting recording on device: {}", device_id)); +async fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String, save_path: Option, custom_filename: Option, wait_for_speech: Option, mode: String) -> Result<(), String> { + emit_log(&app, "INFO", &format!("Starting recording [Mode: {}] on device: {}", mode, device_id)); let host = cpal::default_host(); // Find device by name (using name as ID) @@ -143,10 +143,10 @@ async fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: // We pass the writer to it. let should_wait = wait_for_speech.unwrap_or(false); if should_wait { - emit_log(&app, "INFO", "Recording started in WAITING mode (buffer-only until speech)."); + emit_log(&app, "INFO", &format!("Recording started in WAITING mode (Trigger: {}).", if mode == "voice" { "Speech" } else { "System Audio" })); } - let processor = AudioProcessor::new(config.sample_rate(), config.channels(), writer.clone(), app.clone(), should_wait) + let processor = AudioProcessor::new(config.sample_rate(), config.channels(), writer.clone(), app.clone(), should_wait, mode) .map_err(|e| format!("Failed to create AudioProcessor: {}", e))?; // Wrap processor in Arc so we can share/move it into callback @@ -158,61 +158,40 @@ async fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: let processor_clone = processor.clone(); // --- SYSTEM AUDIO CAPTURE START --- - let mut sys_capture = sc_audio::SystemAudioCapture::new(config.sample_rate()); + // Prevent Doubling: If user selected an aggregate device (Hearbit Audio/BlackHole), + // it ALREADY contains system audio. In that case, we don't need internal SCK capture. + let is_aggregate = device_id.contains("Hearbit") || device_id.contains("BlackHole"); - // Get the queue to share with the capture callback - let queue_clone = { - let p = processor.lock().unwrap(); - p.system_queue.clone() // Access the pub field we added - }; + if is_aggregate { + emit_log(&app, "INFO", "Aggregate device detected. Disabling internal System Audio Capture to prevent doubling."); + } else { + let mut sys_capture = sc_audio::SystemAudioCapture::new(config.sample_rate()); + + // Get the queue to share with the capture callback + let queue_clone = { + let p = processor.lock().unwrap(); + p.system_queue.clone() // Access the pub field we added + }; - let sys_handle = app.clone(); - let sys_callback = move |data: &[f32]| { - // Push to queue - if let Ok(mut q) = queue_clone.lock() { - q.extend(data.iter()); - - // Limit queue size to avoid memory leaks if main process loop is slow - while q.len() > 48000 * 5 { // 5 seconds buffer - q.pop_front(); + let sys_callback = move |data: &[f32]| { + // Push to queue + if let Ok(mut q) = queue_clone.lock() { + q.extend(data.iter()); + + // Limit queue size to avoid memory leaks if main process loop is slow + while q.len() > 48000 * 5 { // 5 seconds buffer + q.pop_front(); + } } - } - }; + }; - // Need to run async start in sync command? - // Tauri commands are async if they return Future, but here we returned Result. - // We should probably spawn it. - // Actually, SystemAudioCapture::start is async. - // We can spawn a tokio task to start it. But we need to keep the object alive. - // The start method modifies self.stream. - // If we make start synchronous or use block_in_place? - // Better: change start_recording to async fn (it is not currently async in signature used by tauri::command macros? No, tauri supports async commands). - // Let's check line 76: `fn start_recording`... it is NOT async. - // We should make it `async fn start_recording`. - - // However, changing to async might affect how state is passed or other things. - // Actually Tauri works fine with async commands. - // But then we need to await `sys_capture.start`. - - // Wait, let's look at `SystemAudioCapture::start`. It takes `&mut self`. - // We can't easily spawn it away properly if we want to keep `sys_capture` in State. - // The `sys_capture` struct holds the `SCStream` which must be kept alive. - - // Let's assume we can make `start_recording` into `async fn`. - - // TEMPORARY: Just putting placeholder for logic flow. - // We will need to change the function signature of start_recording to async first in a separate step or assume I can do it here if I replace the whole signature. - // The replace_file_content replaces a block. - // I will replace line 76 in a separate call to make it async. - - // For this block, I will assume it's async context. - - match sys_capture.start(sys_callback) { - Ok(_) => emit_log(&app, "INFO", "System Audio Capture started."), - Err(e) => emit_log(&app, "WARN", &format!("System Audio Capture failed (Permissions?): {}", e)), + match sys_capture.start(sys_callback).await { + Ok(_) => emit_log(&app, "INFO", "System Audio Capture started."), + Err(e) => emit_log(&app, "WARN", &format!("System Audio Capture failed (Permissions?): {}", e)), + } + + *state.system_capture.lock().unwrap() = Some(sys_capture); } - - *state.system_capture.lock().unwrap() = Some(sys_capture); // --- SYSTEM AUDIO CAPTURE END --- let app_handle = app.clone(); @@ -585,8 +564,9 @@ async fn poll_transcription(app: &AppHandle, client: &reqwest::Client, api_key: return Err(format!("Download failed: {}", dl_res.status())); } } else if status == "failed" || status == "error" { - emit_log(app, "ERROR", &format!("Batch processing failed: {:?}", json)); - return Err(format!("Batch processing failed: {:?}", json)); + let err_msg = format!("Batch processing failed [Status: {}]. Full Response: {:?}", status, json); + emit_log(app, "ERROR", &err_msg); + return Err(err_msg); } // If 'processing' or 'pending', continue loop } diff --git a/src-tauri/src/sc_audio.rs b/src-tauri/src/sc_audio.rs index 8ad1dce..f07f0a2 100644 --- a/src-tauri/src/sc_audio.rs +++ b/src-tauri/src/sc_audio.rs @@ -56,7 +56,7 @@ impl SystemAudioCapture { Self { stream: None, sample_rate } } - pub fn start(&mut self, callback: F) -> Result<(), String> + pub async fn start(&mut self, callback: F) -> Result<(), String> where F: Fn(&[f32]) + Send + Sync + 'static { let content = UnsafeSCShareableContent::get().map_err(|e| format!("Failed to get content"))?; diff --git a/src/components/Recorder.tsx b/src/components/Recorder.tsx index ff1046b..5216507 100644 --- a/src/components/Recorder.tsx +++ b/src/components/Recorder.tsx @@ -123,15 +123,18 @@ const Recorder: React.FC = ({ const aggregateDev = aliasedDevs.find(d => d.name === 'Hearbit Audio'); const virtualDev = aliasedDevs.find(d => d.name.includes('Hearbit Virtual')); - if (aggregateDev) { - setRecordingMode('meeting'); - setSelectedDevice(aggregateDev.id); - } else if (virtualDev) { - setRecordingMode('meeting'); - setSelectedDevice(virtualDev.id); - } else { - setRecordingMode('voice'); - if (aliasedDevs.length > 0) setSelectedDevice(aliasedDevs[0].id); + if (recordingMode === 'meeting') { + if (aggregateDev) { + setSelectedDevice(aggregateDev.id); + } else if (virtualDev) { + setSelectedDevice(virtualDev.id); + } else if (aliasedDevs.length > 0) { + setSelectedDevice(aliasedDevs[0].id); + } + } else if (aliasedDevs.length > 0) { + // Voice mode: just pick first non-virtual if possible, otherwise first + const physicalMic = aliasedDevs.find(d => !d.name.includes('Hearbit') && !d.name.includes('BlackHole')); + setSelectedDevice(physicalMic ? physicalMic.id : aliasedDevs[0].id); } } } catch (e) { @@ -160,7 +163,8 @@ const Recorder: React.FC = ({ deviceId: targetDeviceId, savePath: savePath || null, customFilename: props.recordingSubject || null, - waitForSpeech: autoStartEnabled // Pass the toggle state + waitForSpeech: autoStartEnabled, // Pass the toggle state + mode: recordingMode }); setIsRecording(true); @@ -268,7 +272,7 @@ const Recorder: React.FC = ({ // AUTO STOP Logic // Use Ref to get LATEST visibility instantly - if (isVisibleRef.current && timeSinceSpeech > 20 && !isStoppingRef.current) { + if (isVisibleRef.current && timeSinceSpeech > 25 && !isStoppingRef.current) { console.log("Auto-stopping due to silence..."); isStoppingRef.current = true; addToast('Auto-stopped due to silence', 'info');