From b84815494233ab4d0e1206a9b2211aa9b4f60b82 Mon Sep 17 00:00:00 2001 From: "michael.borak" Date: Wed, 21 Jan 2026 11:09:54 +0100 Subject: [PATCH] feat: Adjust VAD sensitivity, enable global auto-stop, update docs --- README.md | 36 +++++++++++- src-tauri/src/audio_processor.rs | 8 +-- src-tauri/src/lib.rs | 83 ++++++++++++++++------------ src/components/EmailPreviewModal.tsx | 24 +++++--- src/components/Recorder.tsx | 17 +++--- 5 files changed, 110 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index fe4b921..9a2ba77 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ * **📁 Import Audio Files**: Upload existing recordings (MP3, MP4, WAV, M4A, FLAC, OGG, AAC, WMA) for transcription and summarization. * **⏱️ Long Meeting Support**: Record meetings up to 2+ hours with automatic MP3 conversion and chunking. * **🎵 Smart Auto-Stop**: - * **Voice Memo Mode**: Automatically stops after 20 seconds of silence - * **Meeting Mode**: No auto-stop to capture full discussions + * **Universal Auto-Stop**: Automatically stops recording after **20 seconds of silence** in ALL modes (Voice Memo & Meeting). + * **Noise Filtering**: Enhanced VAD (Voice Activity Detection) ignores background noise and keyboard typing, only triggering on clear speech. * **📅 Microsoft 365 Integration**: * **Upcoming Meetings**: View your daily schedule and join with **one click**. * **Meeting Details**: View full agenda and **invited attendee status** (Accepted/Declined). @@ -21,6 +21,7 @@ * **Precision Transcription**: Standard-compliant formatting with **second-by-second timestamps**. * **Smart Summaries**: Uses **Smart Templates** to automatically select the best format (Business Protocol vs. 1:1) based on meeting content. * **🔇 Smart VAD**: Automatically filters out silence and stops recording when you stop talking. +* **🎨 White-Labeling**: Upload your **custom company logo** in Settings to brand the application. * **🔒 Privacy-First**: Data is processed securely via your own Infomaniak API keys. --- @@ -91,6 +92,37 @@ We've made this easy! **Note: You must have the BlackHole driver installed.** --- +## 🎨 Custom Branding (White-Labeling) + +You can replace the default Livtec logo with your own company branding: + +1. Go to **Settings** (gear icon) → **Branding**. +2. Click **Upload Logo**. +3. Select your file (PNG, JPG, SVG). +4. The content changes immediately across the app. +5. *Tip*: Use a transparent PNG for best results. + +--- + +## 📧 Advanced Email Templates + +The email system supports **full HTML & JavaScript** templates. This allows for dynamic dashboards, charts, and interactive reports. + +**How to use:** +1. Go to **Settings** → **Email**. +2. Create a new template. +3. Use `{{summary}}` as a placeholder for the raw AI JSON output. +4. In your HTML/Script, parse it: + ```javascript + const reportData = {{summary}}; + // Now you can use reportData.todos, reportData.updates, etc. + ``` +5. Use `{{date}}` for the current date and `{{subject}}` for the meeting title. + +*Example*: Create a "Daily Standup Dashboard" that visualizes Blocker/Updates/Todos in a grid layout. + +--- + ## ❓ Troubleshooting ### "Hearbit AI is damaged and can't be opened" diff --git a/src-tauri/src/audio_processor.rs b/src-tauri/src/audio_processor.rs index bf0c8da..d6bfa8f 100644 --- a/src-tauri/src/audio_processor.rs +++ b/src-tauri/src/audio_processor.rs @@ -159,10 +159,10 @@ impl AudioProcessor { let sq_sum: f32 = vad_chunk.iter().map(|x| x * x).sum(); let rms = (sq_sum / vad_chunk.len() as f32).sqrt(); - // Hybrid VAD: Probability > 0.8 OR RMS > 0.015 - // INCREASED THRESHOLDS (v1.9.0): - // Now that routing works, we must filter out system notifications (beeps) and noise floor. - let is_speech = probability > 0.8 || rms > 0.015; + // Hybrid VAD: Probability > 0.9 OR RMS > 0.025 + // INCREASED THRESHOLDS (v1.1.1): + // Reduced sensitivity to avoid background noise triggering recording. + let is_speech = probability > 0.9 || rms > 0.025; if is_speech { self.is_speech_active = true; diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index cdfc876..6350a81 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -579,23 +579,42 @@ struct AudioMetadata { format: String, } +// Helper to find ffmpeg/ffprobe in common paths +fn resolve_binary_path(binary_name: &str) -> String { + let common_paths = [ + format!("/opt/homebrew/bin/{}", binary_name), + format!("/usr/local/bin/{}", binary_name), + format!("/usr/bin/{}", binary_name), + ]; + + for path in common_paths.iter() { + if std::path::Path::new(path).exists() { + return path.clone(); + } + } + + // Fallback to expecting it in PATH + binary_name.to_string() +} + #[tauri::command] fn get_audio_metadata(app: AppHandle, file_path: String) -> Result { emit_log(&app, "INFO", &format!("Getting metadata for: {}", file_path)); - // Get file size - let metadata = std::fs::metadata(&file_path).map_err(|e| e.to_string())?; - let size = metadata.len(); - - // Extract format from extension let path = std::path::Path::new(&file_path); - let format = path.extension() - .and_then(|e| e.to_str()) - .unwrap_or("unknown") - .to_string(); + if !path.exists() { + return Err(format!("File not found: {}", file_path)); + } + + let size = std::fs::metadata(&file_path) + .map_err(|e| e.to_string())? + .len(); + + // Use ffprobe to get duration + // Try resolved path first + let ffprobe_cmd = resolve_binary_path("ffprobe"); - // Get duration using ffprobe (requires ffmpeg to be installed) - let duration = match Command::new("ffprobe") + let output = Command::new(&ffprobe_cmd) .args([ "-v", "error", "-show_entries", "format=duration", @@ -603,24 +622,17 @@ fn get_audio_metadata(app: AppHandle, file_path: String) -> Result { - if output.status.success() { - let duration_str = String::from_utf8_lossy(&output.stdout); - duration_str.trim().parse::().unwrap_or(0.0) - } else { - emit_log(&app, "WARN", "ffprobe failed, duration = 0"); - 0.0 - } - }, - Err(_) => { - emit_log(&app, "WARN", "ffprobe not found, duration = 0"); - 0.0 - } - }; - - emit_log(&app, "SUCCESS", &format!("Metadata: {}s, {} bytes", duration, size)); + .map_err(|e| format!("Failed to execute ffprobe at '{}': {}", ffprobe_cmd, e))?; + + let duration_str = String::from_utf8_lossy(&output.stdout); + let duration: f64 = duration_str.trim().parse().unwrap_or(0.0); + // Extension as format + let format = path.extension() + .and_then(|e| e.to_str()) + .unwrap_or("unknown") + .to_string(); + Ok(AudioMetadata { duration, size, @@ -633,8 +645,9 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result { emit_log(&app, "INFO", &format!("Converting to MP3: {}", wav_path)); let mp3_path = wav_path.replace(".wav", ".mp3"); + let ffmpeg_cmd = resolve_binary_path("ffmpeg"); - let output = Command::new("ffmpeg") + let output = Command::new(&ffmpeg_cmd) .args([ "-i", &wav_path, "-codec:a", "libmp3lame", @@ -643,7 +656,7 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result { &mp3_path ]) .output() - .map_err(|e| format!("Failed to execute ffmpeg: {}", e))?; + .map_err(|e| format!("Failed to execute ffmpeg at '{}': {}", ffmpeg_cmd, e))?; if output.status.success() { emit_log(&app, "SUCCESS", &format!("MP3 created: {}", mp3_path)); @@ -660,9 +673,11 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result< emit_log(&app, "INFO", &format!("Chunking audio: {} ({}min chunks)", file_path, chunk_minutes)); let chunk_seconds = chunk_minutes * 60; + let ffprobe_cmd = resolve_binary_path("ffprobe"); + let ffmpeg_cmd = resolve_binary_path("ffmpeg"); // Get total duration using ffprobe - let duration_output = Command::new("ffprobe") + let duration_output = Command::new(&ffprobe_cmd) .args([ "-v", "error", "-show_entries", "format=duration", @@ -670,7 +685,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result< &file_path ]) .output() - .map_err(|e| format!("Failed to get duration: {}", e))?; + .map_err(|e| format!("Failed to get duration with '{}': {}", ffprobe_cmd, e))?; let duration_str = String::from_utf8_lossy(&duration_output.stdout); let duration: f64 = duration_str.trim().parse() @@ -686,7 +701,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result< let start_time = i as u32 * chunk_seconds; let chunk_path = format!("{}_chunk_{}.mp3", base_path, i); - let output = Command::new("ffmpeg") + let output = Command::new(&ffmpeg_cmd) .args([ "-i", &file_path, "-ss", &start_time.to_string(), @@ -696,7 +711,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result< &chunk_path ]) .output() - .map_err(|e| format!("Failed to create chunk {}: {}", i, e))?; + .map_err(|e| format!("Failed to create chunk {} with '{}': {}", i, ffmpeg_cmd, e))?; if !output.status.success() { let error = String::from_utf8_lossy(&output.stderr); diff --git a/src/components/EmailPreviewModal.tsx b/src/components/EmailPreviewModal.tsx index 929b3cd..4ebe148 100644 --- a/src/components/EmailPreviewModal.tsx +++ b/src/components/EmailPreviewModal.tsx @@ -64,9 +64,14 @@ const EmailPreviewModal: React.FC = ({ const [activeTab, setActiveTab] = useState<'preview' | 'source'>('preview'); const generateHtmlBody = (content: string, title: string) => { - // Simple heuristic: if it looks like HTML, treat as HTML. Otherwise, markdown. - const isHtml = /^\s*<(!DOCTYPE|html|div|p|table)/i.test(content); - const formattedBody = isHtml ? content : formatMarkdownToHtml(content); + // Check if it's a full HTML document + if (/^\s* @@ -111,14 +116,17 @@ const EmailPreviewModal: React.FC = ({ // Replace placeholders const dateStr = new Date().toLocaleDateString(); let newSub = tmpl.subject.replace(/{{date}}/g, dateStr).replace(/{{subject}}/g, "Meeting"); - // Note: We don't have the original 'recordingSubject' here easily without more prop drilling, - // so we default to "Meeting" or user can edit. - // Actually, initialSubject usually contains "Meeting Summary", so we could parse it, but for now date/summary is most important. + + // Clean up JSON if necessary (e.g. remove markdown code blocks ```json ... ```) + let cleanSummary = initialBody; + if (initialBody.trim().startsWith('```')) { + cleanSummary = initialBody.replace(/^```(json)?/i, '').replace(/```$/, '').trim(); + } let newBody = tmpl.body .replace(/{{date}}/g, dateStr) .replace(/{{subject}}/g, "the meeting") - .replace(/{{summary}}/g, initialBody); + .replace(/{{summary}}/g, cleanSummary); setSubject(newSub); setBody(generateHtmlBody(newBody, newSub)); @@ -242,7 +250,7 @@ const EmailPreviewModal: React.FC = ({ srcDoc={body} className="w-full h-full border-none" title="Email Preview" - sandbox="allow-same-origin" + sandbox="allow-same-origin allow-scripts" /> ) : ( diff --git a/src/components/Recorder.tsx b/src/components/Recorder.tsx index c669f45..d670e7b 100644 --- a/src/components/Recorder.tsx +++ b/src/components/Recorder.tsx @@ -252,17 +252,14 @@ const Recorder: React.FC = ({ const interval = setInterval(() => { const now = Date.now(); - const diff = (now - lastSpeechTimeRef.current) / 1000; - setSilenceDuration(diff); + const timeSinceSpeech = (now - lastSpeechTimeRef.current) / 1000; + setSilenceDuration(timeSinceSpeech); - // Different timeouts based on mode: - // Voice Memo: 20 seconds of silence - // Meeting: Disabled (no auto-stop to avoid cutting off long meetings) - const timeoutSeconds = recordingMode === 'voice' ? 20 : 9999; // 9999 = effectively disabled - - if (diff > timeoutSeconds && !isStoppingRef.current) { - console.log(`Auto-stopping (${recordingMode} mode) due to ${timeoutSeconds}s silence`); - addToast(`Auto-stopping (${Math.floor(diff)}s silence detected)`, "info", 3000); + // AUTO STOP after 20 seconds of silence (ALL MODES) + if (timeSinceSpeech > 20 && !isStoppingRef.current) { + console.log("Auto-stopping due to silence..."); + isStoppingRef.current = true; + addToast('Auto-stopped due to silence', 'info'); stopRecording(); } }, 1000);