feat: Adjust VAD sensitivity, enable global auto-stop, update docs

This commit is contained in:
michael.borak
2026-01-21 11:09:54 +01:00
parent 79db6adf45
commit b848154942
5 changed files with 110 additions and 58 deletions

View File

@@ -10,8 +10,8 @@
* **📁 Import Audio Files**: Upload existing recordings (MP3, MP4, WAV, M4A, FLAC, OGG, AAC, WMA) for transcription and summarization.
* **⏱️ Long Meeting Support**: Record meetings up to 2+ hours with automatic MP3 conversion and chunking.
* **🎵 Smart Auto-Stop**:
* **Voice Memo Mode**: Automatically stops after 20 seconds of silence
* **Meeting Mode**: No auto-stop to capture full discussions
* **Universal Auto-Stop**: Automatically stops recording after **20 seconds of silence** in ALL modes (Voice Memo & Meeting).
* **Noise Filtering**: Enhanced VAD (Voice Activity Detection) ignores background noise and keyboard typing, only triggering on clear speech.
* **📅 Microsoft 365 Integration**:
* **Upcoming Meetings**: View your daily schedule and join with **one click**.
* **Meeting Details**: View full agenda and **invited attendee status** (Accepted/Declined).
@@ -21,6 +21,7 @@
* **Precision Transcription**: Standard-compliant formatting with **second-by-second timestamps**.
* **Smart Summaries**: Uses **Smart Templates** to automatically select the best format (Business Protocol vs. 1:1) based on meeting content.
* **🔇 Smart VAD**: Automatically filters out silence and stops recording when you stop talking.
* **🎨 White-Labeling**: Upload your **custom company logo** in Settings to brand the application.
* **🔒 Privacy-First**: Data is processed securely via your own Infomaniak API keys.
---
@@ -91,6 +92,37 @@ We've made this easy! **Note: You must have the BlackHole driver installed.**
---
## 🎨 Custom Branding (White-Labeling)
You can replace the default Livtec logo with your own company branding:
1. Go to **Settings** (gear icon) → **Branding**.
2. Click **Upload Logo**.
3. Select your file (PNG, JPG, SVG).
4. The content changes immediately across the app.
5. *Tip*: Use a transparent PNG for best results.
---
## 📧 Advanced Email Templates
The email system supports **full HTML & JavaScript** templates. This allows for dynamic dashboards, charts, and interactive reports.
**How to use:**
1. Go to **Settings** → **Email**.
2. Create a new template.
3. Use `{{summary}}` as a placeholder for the raw AI JSON output.
4. In your HTML/Script, parse it:
```javascript
const reportData = {{summary}};
// Now you can use reportData.todos, reportData.updates, etc.
```
5. Use `{{date}}` for the current date and `{{subject}}` for the meeting title.
*Example*: Create a "Daily Standup Dashboard" that visualizes Blocker/Updates/Todos in a grid layout.
---
## ❓ Troubleshooting
### "Hearbit AI is damaged and can't be opened"

View File

@@ -159,10 +159,10 @@ impl AudioProcessor {
let sq_sum: f32 = vad_chunk.iter().map(|x| x * x).sum();
let rms = (sq_sum / vad_chunk.len() as f32).sqrt();
// Hybrid VAD: Probability > 0.8 OR RMS > 0.015
// INCREASED THRESHOLDS (v1.9.0):
// Now that routing works, we must filter out system notifications (beeps) and noise floor.
let is_speech = probability > 0.8 || rms > 0.015;
// Hybrid VAD: Probability > 0.9 OR RMS > 0.025
// INCREASED THRESHOLDS (v1.1.1):
// Reduced sensitivity to avoid background noise triggering recording.
let is_speech = probability > 0.9 || rms > 0.025;
if is_speech {
self.is_speech_active = true;

View File

@@ -579,23 +579,42 @@ struct AudioMetadata {
format: String,
}
// Helper to find ffmpeg/ffprobe in common paths
fn resolve_binary_path(binary_name: &str) -> String {
let common_paths = [
format!("/opt/homebrew/bin/{}", binary_name),
format!("/usr/local/bin/{}", binary_name),
format!("/usr/bin/{}", binary_name),
];
for path in common_paths.iter() {
if std::path::Path::new(path).exists() {
return path.clone();
}
}
// Fallback to expecting it in PATH
binary_name.to_string()
}
#[tauri::command]
fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata, String> {
emit_log(&app, "INFO", &format!("Getting metadata for: {}", file_path));
// Get file size
let metadata = std::fs::metadata(&file_path).map_err(|e| e.to_string())?;
let size = metadata.len();
// Extract format from extension
let path = std::path::Path::new(&file_path);
let format = path.extension()
.and_then(|e| e.to_str())
.unwrap_or("unknown")
.to_string();
if !path.exists() {
return Err(format!("File not found: {}", file_path));
}
// Get duration using ffprobe (requires ffmpeg to be installed)
let duration = match Command::new("ffprobe")
let size = std::fs::metadata(&file_path)
.map_err(|e| e.to_string())?
.len();
// Use ffprobe to get duration
// Try resolved path first
let ffprobe_cmd = resolve_binary_path("ffprobe");
let output = Command::new(&ffprobe_cmd)
.args([
"-v", "error",
"-show_entries", "format=duration",
@@ -603,23 +622,16 @@ fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata
&file_path
])
.output()
{
Ok(output) => {
if output.status.success() {
let duration_str = String::from_utf8_lossy(&output.stdout);
duration_str.trim().parse::<f64>().unwrap_or(0.0)
} else {
emit_log(&app, "WARN", "ffprobe failed, duration = 0");
0.0
}
},
Err(_) => {
emit_log(&app, "WARN", "ffprobe not found, duration = 0");
0.0
}
};
.map_err(|e| format!("Failed to execute ffprobe at '{}': {}", ffprobe_cmd, e))?;
emit_log(&app, "SUCCESS", &format!("Metadata: {}s, {} bytes", duration, size));
let duration_str = String::from_utf8_lossy(&output.stdout);
let duration: f64 = duration_str.trim().parse().unwrap_or(0.0);
// Extension as format
let format = path.extension()
.and_then(|e| e.to_str())
.unwrap_or("unknown")
.to_string();
Ok(AudioMetadata {
duration,
@@ -633,8 +645,9 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
emit_log(&app, "INFO", &format!("Converting to MP3: {}", wav_path));
let mp3_path = wav_path.replace(".wav", ".mp3");
let ffmpeg_cmd = resolve_binary_path("ffmpeg");
let output = Command::new("ffmpeg")
let output = Command::new(&ffmpeg_cmd)
.args([
"-i", &wav_path,
"-codec:a", "libmp3lame",
@@ -643,7 +656,7 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
&mp3_path
])
.output()
.map_err(|e| format!("Failed to execute ffmpeg: {}", e))?;
.map_err(|e| format!("Failed to execute ffmpeg at '{}': {}", ffmpeg_cmd, e))?;
if output.status.success() {
emit_log(&app, "SUCCESS", &format!("MP3 created: {}", mp3_path));
@@ -660,9 +673,11 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
emit_log(&app, "INFO", &format!("Chunking audio: {} ({}min chunks)", file_path, chunk_minutes));
let chunk_seconds = chunk_minutes * 60;
let ffprobe_cmd = resolve_binary_path("ffprobe");
let ffmpeg_cmd = resolve_binary_path("ffmpeg");
// Get total duration using ffprobe
let duration_output = Command::new("ffprobe")
let duration_output = Command::new(&ffprobe_cmd)
.args([
"-v", "error",
"-show_entries", "format=duration",
@@ -670,7 +685,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
&file_path
])
.output()
.map_err(|e| format!("Failed to get duration: {}", e))?;
.map_err(|e| format!("Failed to get duration with '{}': {}", ffprobe_cmd, e))?;
let duration_str = String::from_utf8_lossy(&duration_output.stdout);
let duration: f64 = duration_str.trim().parse()
@@ -686,7 +701,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
let start_time = i as u32 * chunk_seconds;
let chunk_path = format!("{}_chunk_{}.mp3", base_path, i);
let output = Command::new("ffmpeg")
let output = Command::new(&ffmpeg_cmd)
.args([
"-i", &file_path,
"-ss", &start_time.to_string(),
@@ -696,7 +711,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
&chunk_path
])
.output()
.map_err(|e| format!("Failed to create chunk {}: {}", i, e))?;
.map_err(|e| format!("Failed to create chunk {} with '{}': {}", i, ffmpeg_cmd, e))?;
if !output.status.success() {
let error = String::from_utf8_lossy(&output.stderr);

View File

@@ -64,9 +64,14 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
const [activeTab, setActiveTab] = useState<'preview' | 'source'>('preview');
const generateHtmlBody = (content: string, title: string) => {
// Simple heuristic: if it looks like HTML, treat as HTML. Otherwise, markdown.
const isHtml = /^\s*<(!DOCTYPE|html|div|p|table)/i.test(content);
const formattedBody = isHtml ? content : formatMarkdownToHtml(content);
// Check if it's a full HTML document
if (/^\s*<!DOCTYPE html/i.test(content) || /^\s*<html/i.test(content)) {
return content;
}
// Simple heuristic: if it looks like HTML fragment (div, p, table), treat as HTML. Otherwise, markdown.
const isHtmlFragment = /^\s*<(div|p|table|section|header|footer)/i.test(content);
const formattedBody = isHtmlFragment ? content : formatMarkdownToHtml(content);
return `
<!DOCTYPE html>
@@ -111,14 +116,17 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
// Replace placeholders
const dateStr = new Date().toLocaleDateString();
let newSub = tmpl.subject.replace(/{{date}}/g, dateStr).replace(/{{subject}}/g, "Meeting");
// Note: We don't have the original 'recordingSubject' here easily without more prop drilling,
// so we default to "Meeting" or user can edit.
// Actually, initialSubject usually contains "Meeting Summary", so we could parse it, but for now date/summary is most important.
// Clean up JSON if necessary (e.g. remove markdown code blocks ```json ... ```)
let cleanSummary = initialBody;
if (initialBody.trim().startsWith('```')) {
cleanSummary = initialBody.replace(/^```(json)?/i, '').replace(/```$/, '').trim();
}
let newBody = tmpl.body
.replace(/{{date}}/g, dateStr)
.replace(/{{subject}}/g, "the meeting")
.replace(/{{summary}}/g, initialBody);
.replace(/{{summary}}/g, cleanSummary);
setSubject(newSub);
setBody(generateHtmlBody(newBody, newSub));
@@ -242,7 +250,7 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
srcDoc={body}
className="w-full h-full border-none"
title="Email Preview"
sandbox="allow-same-origin"
sandbox="allow-same-origin allow-scripts"
/>
</div>
) : (

View File

@@ -252,17 +252,14 @@ const Recorder: React.FC<RecorderProps> = ({
const interval = setInterval(() => {
const now = Date.now();
const diff = (now - lastSpeechTimeRef.current) / 1000;
setSilenceDuration(diff);
const timeSinceSpeech = (now - lastSpeechTimeRef.current) / 1000;
setSilenceDuration(timeSinceSpeech);
// Different timeouts based on mode:
// Voice Memo: 20 seconds of silence
// Meeting: Disabled (no auto-stop to avoid cutting off long meetings)
const timeoutSeconds = recordingMode === 'voice' ? 20 : 9999; // 9999 = effectively disabled
if (diff > timeoutSeconds && !isStoppingRef.current) {
console.log(`Auto-stopping (${recordingMode} mode) due to ${timeoutSeconds}s silence`);
addToast(`Auto-stopping (${Math.floor(diff)}s silence detected)`, "info", 3000);
// AUTO STOP after 20 seconds of silence (ALL MODES)
if (timeSinceSpeech > 20 && !isStoppingRef.current) {
console.log("Auto-stopping due to silence...");
isStoppingRef.current = true;
addToast('Auto-stopped due to silence', 'info');
stopRecording();
}
}, 1000);