feat: Adjust VAD sensitivity, enable global auto-stop, update docs

2026-01-21 11:09:54 +01:00
parent 79db6adf45
commit b848154942
5 changed files with 110 additions and 58 deletions
--- a/README.md
+++ b/README.md
@@ -10,8 +10,8 @@
 *   **📁 Import Audio Files**: Upload existing recordings (MP3, MP4, WAV, M4A, FLAC, OGG, AAC, WMA) for transcription and summarization.
 *   **⏱️ Long Meeting Support**: Record meetings up to 2+ hours with automatic MP3 conversion and chunking.
 *   **🎵 Smart Auto-Stop**: 
-    *   **Voice Memo Mode**: Automatically stops after 20 seconds of silence
-    *   **Meeting Mode**: No auto-stop to capture full discussions
+    *   **Universal Auto-Stop**: Automatically stops recording after **20 seconds of silence** in ALL modes (Voice Memo & Meeting).
+    *   **Noise Filtering**: Enhanced VAD (Voice Activity Detection) ignores background noise and keyboard typing, only triggering on clear speech.
 *   **📅 Microsoft 365 Integration**:
    *   **Upcoming Meetings**: View your daily schedule and join with **one click**.
    *   **Meeting Details**: View full agenda and **invited attendee status** (Accepted/Declined).
@@ -21,6 +21,7 @@
    *   **Precision Transcription**: Standard-compliant formatting with **second-by-second timestamps**.
    *   **Smart Summaries**: Uses **Smart Templates** to automatically select the best format (Business Protocol vs. 1:1) based on meeting content.
 *   **🔇 Smart VAD**: Automatically filters out silence and stops recording when you stop talking.
+*   **🎨 White-Labeling**: Upload your **custom company logo** in Settings to brand the application.
 *   **🔒 Privacy-First**: Data is processed securely via your own Infomaniak API keys.

 ---
@@ -91,6 +92,37 @@ We've made this easy! **Note: You must have the BlackHole driver installed.**

 ---

+## 🎨 Custom Branding (White-Labeling)
+
+You can replace the default Livtec logo with your own company branding:
+
+1.  Go to **Settings** (gear icon) → **Branding**.
+2.  Click **Upload Logo**.
+3.  Select your file (PNG, JPG, SVG).
+4.  The content changes immediately across the app.
+5.  *Tip*: Use a transparent PNG for best results.
+
+---
+
+## 📧 Advanced Email Templates
+
+The email system supports **full HTML & JavaScript** templates. This allows for dynamic dashboards, charts, and interactive reports.
+
+**How to use:**
+1.  Go to **Settings** → **Email**.
+2.  Create a new template.
+3.  Use `{{summary}}` as a placeholder for the raw AI JSON output.
+4.  In your HTML/Script, parse it:
+    ```javascript
+    const reportData = {{summary}};
+    // Now you can use reportData.todos, reportData.updates, etc.
+    ```
+5.  Use `{{date}}` for the current date and `{{subject}}` for the meeting title.
+
+*Example*: Create a "Daily Standup Dashboard" that visualizes Blocker/Updates/Todos in a grid layout.
+
+---
+
 ## ❓ Troubleshooting

 ### "Hearbit AI is damaged and can't be opened"
--- a/src-tauri/src/audio_processor.rs
+++ b/src-tauri/src/audio_processor.rs
@@ -159,10 +159,10 @@ impl AudioProcessor {
            let sq_sum: f32 = vad_chunk.iter().map(|x| x * x).sum();
            let rms = (sq_sum / vad_chunk.len() as f32).sqrt();

-            // Hybrid VAD: Probability > 0.8 OR RMS > 0.015
-            // INCREASED THRESHOLDS (v1.9.0): 
-            // Now that routing works, we must filter out system notifications (beeps) and noise floor.
-            let is_speech = probability > 0.8 || rms > 0.015; 
+            // Hybrid VAD: Probability > 0.9 OR RMS > 0.025
+            // INCREASED THRESHOLDS (v1.1.1): 
+            // Reduced sensitivity to avoid background noise triggering recording.
+            let is_speech = probability > 0.9 || rms > 0.025; 

            if is_speech {
                self.is_speech_active = true;
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -579,23 +579,42 @@ struct AudioMetadata {
    format: String,
 }

+// Helper to find ffmpeg/ffprobe in common paths
+fn resolve_binary_path(binary_name: &str) -> String {
+    let common_paths = [
+        format!("/opt/homebrew/bin/{}", binary_name),
+        format!("/usr/local/bin/{}", binary_name),
+        format!("/usr/bin/{}", binary_name),
+    ];
+
+    for path in common_paths.iter() {
+        if std::path::Path::new(path).exists() {
+            return path.clone();
+        }
+    }
+    
+    // Fallback to expecting it in PATH
+    binary_name.to_string()
+}
+
 #[tauri::command]
 fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata, String> {
    emit_log(&app, "INFO", &format!("Getting metadata for: {}", file_path));
    
-    // Get file size
-    let metadata = std::fs::metadata(&file_path).map_err(|e| e.to_string())?;
-    let size = metadata.len();
-    
-    // Extract format from extension
    let path = std::path::Path::new(&file_path);
-    let format = path.extension()
-        .and_then(|e| e.to_str())
-        .unwrap_or("unknown")
-        .to_string();
+    if !path.exists() {
+        return Err(format!("File not found: {}", file_path));
+    }

-    // Get duration using ffprobe (requires ffmpeg to be installed)
-    let duration = match Command::new("ffprobe")
+    let size = std::fs::metadata(&file_path)
+        .map_err(|e| e.to_string())?
+        .len();
+
+    // Use ffprobe to get duration
+    // Try resolved path first
+    let ffprobe_cmd = resolve_binary_path("ffprobe");
+    
+    let output = Command::new(&ffprobe_cmd)
        .args([
            "-v", "error",
            "-show_entries", "format=duration",
@@ -603,23 +622,16 @@ fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata
            &file_path
        ])
        .output()
-    {
-        Ok(output) => {
-            if output.status.success() {
-                let duration_str = String::from_utf8_lossy(&output.stdout);
-                duration_str.trim().parse::<f64>().unwrap_or(0.0)
-            } else {
-                emit_log(&app, "WARN", "ffprobe failed, duration = 0");
-                0.0
-            }
-        },
-        Err(_) => {
-            emit_log(&app, "WARN", "ffprobe not found, duration = 0");
-            0.0
-        }
-    };
+        .map_err(|e| format!("Failed to execute ffprobe at '{}': {}", ffprobe_cmd, e))?;

-    emit_log(&app, "SUCCESS", &format!("Metadata: {}s, {} bytes", duration, size));
+    let duration_str = String::from_utf8_lossy(&output.stdout);
+    let duration: f64 = duration_str.trim().parse().unwrap_or(0.0);
+    
+    // Extension as format
+    let format = path.extension()
+        .and_then(|e| e.to_str())
+        .unwrap_or("unknown")
+        .to_string();

    Ok(AudioMetadata {
        duration,
@@ -633,8 +645,9 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
    emit_log(&app, "INFO", &format!("Converting to MP3: {}", wav_path));
    
    let mp3_path = wav_path.replace(".wav", ".mp3");
+    let ffmpeg_cmd = resolve_binary_path("ffmpeg");
    
-    let output = Command::new("ffmpeg")
+    let output = Command::new(&ffmpeg_cmd)
        .args([
            "-i", &wav_path,
            "-codec:a", "libmp3lame",
@@ -643,7 +656,7 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
            &mp3_path
        ])
        .output()
-        .map_err(|e| format!("Failed to execute ffmpeg: {}", e))?;
+        .map_err(|e| format!("Failed to execute ffmpeg at '{}': {}", ffmpeg_cmd, e))?;
    
    if output.status.success() {
        emit_log(&app, "SUCCESS", &format!("MP3 created: {}", mp3_path));
@@ -660,9 +673,11 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
    emit_log(&app, "INFO", &format!("Chunking audio: {} ({}min chunks)", file_path, chunk_minutes));
    
    let chunk_seconds = chunk_minutes * 60;
+    let ffprobe_cmd = resolve_binary_path("ffprobe");
+    let ffmpeg_cmd = resolve_binary_path("ffmpeg");
    
    // Get total duration using ffprobe
-    let duration_output = Command::new("ffprobe")
+    let duration_output = Command::new(&ffprobe_cmd)
        .args([
            "-v", "error",
            "-show_entries", "format=duration",
@@ -670,7 +685,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
            &file_path
        ])
        .output()
-        .map_err(|e| format!("Failed to get duration: {}", e))?;
+        .map_err(|e| format!("Failed to get duration with '{}': {}", ffprobe_cmd, e))?;
    
    let duration_str = String::from_utf8_lossy(&duration_output.stdout);
    let duration: f64 = duration_str.trim().parse()
@@ -686,7 +701,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
        let start_time = i as u32 * chunk_seconds;
        let chunk_path = format!("{}_chunk_{}.mp3", base_path, i);
        
-        let output = Command::new("ffmpeg")
+        let output = Command::new(&ffmpeg_cmd)
            .args([
                "-i", &file_path,
                "-ss", &start_time.to_string(),
@@ -696,7 +711,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
                &chunk_path
            ])
            .output()
-            .map_err(|e| format!("Failed to create chunk {}: {}", i, e))?;
+            .map_err(|e| format!("Failed to create chunk {} with '{}': {}", i, ffmpeg_cmd, e))?;
        
        if !output.status.success() {
            let error = String::from_utf8_lossy(&output.stderr);
--- a/src/components/EmailPreviewModal.tsx
+++ b/src/components/EmailPreviewModal.tsx
@@ -64,9 +64,14 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
    const [activeTab, setActiveTab] = useState<'preview' | 'source'>('preview');

    const generateHtmlBody = (content: string, title: string) => {
-        // Simple heuristic: if it looks like HTML, treat as HTML. Otherwise, markdown.
-        const isHtml = /^\s*<(!DOCTYPE|html|div|p|table)/i.test(content);
-        const formattedBody = isHtml ? content : formatMarkdownToHtml(content);
+        // Check if it's a full HTML document
+        if (/^\s*<!DOCTYPE html/i.test(content) || /^\s*<html/i.test(content)) {
+            return content;
+        }
+
+        // Simple heuristic: if it looks like HTML fragment (div, p, table), treat as HTML. Otherwise, markdown.
+        const isHtmlFragment = /^\s*<(div|p|table|section|header|footer)/i.test(content);
+        const formattedBody = isHtmlFragment ? content : formatMarkdownToHtml(content);

        return `
 <!DOCTYPE html>
@@ -111,14 +116,17 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
            // Replace placeholders
            const dateStr = new Date().toLocaleDateString();
            let newSub = tmpl.subject.replace(/{{date}}/g, dateStr).replace(/{{subject}}/g, "Meeting");
-            // Note: We don't have the original 'recordingSubject' here easily without more prop drilling, 
-            // so we default to "Meeting" or user can edit. 
-            // Actually, initialSubject usually contains "Meeting Summary", so we could parse it, but for now date/summary is most important.
+
+            // Clean up JSON if necessary (e.g. remove markdown code blocks ```json ... ```)
+            let cleanSummary = initialBody;
+            if (initialBody.trim().startsWith('```')) {
+                cleanSummary = initialBody.replace(/^```(json)?/i, '').replace(/```$/, '').trim();
+            }

            let newBody = tmpl.body
                .replace(/{{date}}/g, dateStr)
                .replace(/{{subject}}/g, "the meeting")
-                .replace(/{{summary}}/g, initialBody);
+                .replace(/{{summary}}/g, cleanSummary);

            setSubject(newSub);
            setBody(generateHtmlBody(newBody, newSub));
@@ -242,7 +250,7 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
                                        srcDoc={body}
                                        className="w-full h-full border-none"
                                        title="Email Preview"
-                                        sandbox="allow-same-origin"
+                                        sandbox="allow-same-origin allow-scripts"
                                    />
                                </div>
                            ) : (
--- a/src/components/Recorder.tsx
+++ b/src/components/Recorder.tsx
@@ -252,17 +252,14 @@ const Recorder: React.FC<RecorderProps> = ({

        const interval = setInterval(() => {
            const now = Date.now();
-            const diff = (now - lastSpeechTimeRef.current) / 1000;
-            setSilenceDuration(diff);
+            const timeSinceSpeech = (now - lastSpeechTimeRef.current) / 1000;
+            setSilenceDuration(timeSinceSpeech);

-            // Different timeouts based on mode:
-            // Voice Memo: 20 seconds of silence
-            // Meeting: Disabled (no auto-stop to avoid cutting off long meetings)
-            const timeoutSeconds = recordingMode === 'voice' ? 20 : 9999; // 9999 = effectively disabled
-
-            if (diff > timeoutSeconds && !isStoppingRef.current) {
-                console.log(`Auto-stopping (${recordingMode} mode) due to ${timeoutSeconds}s silence`);
-                addToast(`Auto-stopping (${Math.floor(diff)}s silence detected)`, "info", 3000);
+            // AUTO STOP after 20 seconds of silence (ALL MODES)
+            if (timeSinceSpeech > 20 && !isStoppingRef.current) {
+                console.log("Auto-stopping due to silence...");
+                isStoppingRef.current = true;
+                addToast('Auto-stopped due to silence', 'info');
                stopRecording();
            }
        }, 1000);