feat: Adjust VAD sensitivity, enable global auto-stop, update docs
This commit is contained in:
36
README.md
36
README.md
@@ -10,8 +10,8 @@
|
||||
* **📁 Import Audio Files**: Upload existing recordings (MP3, MP4, WAV, M4A, FLAC, OGG, AAC, WMA) for transcription and summarization.
|
||||
* **⏱️ Long Meeting Support**: Record meetings up to 2+ hours with automatic MP3 conversion and chunking.
|
||||
* **🎵 Smart Auto-Stop**:
|
||||
* **Voice Memo Mode**: Automatically stops after 20 seconds of silence
|
||||
* **Meeting Mode**: No auto-stop to capture full discussions
|
||||
* **Universal Auto-Stop**: Automatically stops recording after **20 seconds of silence** in ALL modes (Voice Memo & Meeting).
|
||||
* **Noise Filtering**: Enhanced VAD (Voice Activity Detection) ignores background noise and keyboard typing, only triggering on clear speech.
|
||||
* **📅 Microsoft 365 Integration**:
|
||||
* **Upcoming Meetings**: View your daily schedule and join with **one click**.
|
||||
* **Meeting Details**: View full agenda and **invited attendee status** (Accepted/Declined).
|
||||
@@ -21,6 +21,7 @@
|
||||
* **Precision Transcription**: Standard-compliant formatting with **second-by-second timestamps**.
|
||||
* **Smart Summaries**: Uses **Smart Templates** to automatically select the best format (Business Protocol vs. 1:1) based on meeting content.
|
||||
* **🔇 Smart VAD**: Automatically filters out silence and stops recording when you stop talking.
|
||||
* **🎨 White-Labeling**: Upload your **custom company logo** in Settings to brand the application.
|
||||
* **🔒 Privacy-First**: Data is processed securely via your own Infomaniak API keys.
|
||||
|
||||
---
|
||||
@@ -91,6 +92,37 @@ We've made this easy! **Note: You must have the BlackHole driver installed.**
|
||||
|
||||
---
|
||||
|
||||
## 🎨 Custom Branding (White-Labeling)
|
||||
|
||||
You can replace the default Livtec logo with your own company branding:
|
||||
|
||||
1. Go to **Settings** (gear icon) → **Branding**.
|
||||
2. Click **Upload Logo**.
|
||||
3. Select your file (PNG, JPG, SVG).
|
||||
4. The content changes immediately across the app.
|
||||
5. *Tip*: Use a transparent PNG for best results.
|
||||
|
||||
---
|
||||
|
||||
## 📧 Advanced Email Templates
|
||||
|
||||
The email system supports **full HTML & JavaScript** templates. This allows for dynamic dashboards, charts, and interactive reports.
|
||||
|
||||
**How to use:**
|
||||
1. Go to **Settings** → **Email**.
|
||||
2. Create a new template.
|
||||
3. Use `{{summary}}` as a placeholder for the raw AI JSON output.
|
||||
4. In your HTML/Script, parse it:
|
||||
```javascript
|
||||
const reportData = {{summary}};
|
||||
// Now you can use reportData.todos, reportData.updates, etc.
|
||||
```
|
||||
5. Use `{{date}}` for the current date and `{{subject}}` for the meeting title.
|
||||
|
||||
*Example*: Create a "Daily Standup Dashboard" that visualizes Blocker/Updates/Todos in a grid layout.
|
||||
|
||||
---
|
||||
|
||||
## ❓ Troubleshooting
|
||||
|
||||
### "Hearbit AI is damaged and can't be opened"
|
||||
|
||||
@@ -159,10 +159,10 @@ impl AudioProcessor {
|
||||
let sq_sum: f32 = vad_chunk.iter().map(|x| x * x).sum();
|
||||
let rms = (sq_sum / vad_chunk.len() as f32).sqrt();
|
||||
|
||||
// Hybrid VAD: Probability > 0.8 OR RMS > 0.015
|
||||
// INCREASED THRESHOLDS (v1.9.0):
|
||||
// Now that routing works, we must filter out system notifications (beeps) and noise floor.
|
||||
let is_speech = probability > 0.8 || rms > 0.015;
|
||||
// Hybrid VAD: Probability > 0.9 OR RMS > 0.025
|
||||
// INCREASED THRESHOLDS (v1.1.1):
|
||||
// Reduced sensitivity to avoid background noise triggering recording.
|
||||
let is_speech = probability > 0.9 || rms > 0.025;
|
||||
|
||||
if is_speech {
|
||||
self.is_speech_active = true;
|
||||
|
||||
@@ -579,23 +579,42 @@ struct AudioMetadata {
|
||||
format: String,
|
||||
}
|
||||
|
||||
// Helper to find ffmpeg/ffprobe in common paths
|
||||
fn resolve_binary_path(binary_name: &str) -> String {
|
||||
let common_paths = [
|
||||
format!("/opt/homebrew/bin/{}", binary_name),
|
||||
format!("/usr/local/bin/{}", binary_name),
|
||||
format!("/usr/bin/{}", binary_name),
|
||||
];
|
||||
|
||||
for path in common_paths.iter() {
|
||||
if std::path::Path::new(path).exists() {
|
||||
return path.clone();
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to expecting it in PATH
|
||||
binary_name.to_string()
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata, String> {
|
||||
emit_log(&app, "INFO", &format!("Getting metadata for: {}", file_path));
|
||||
|
||||
// Get file size
|
||||
let metadata = std::fs::metadata(&file_path).map_err(|e| e.to_string())?;
|
||||
let size = metadata.len();
|
||||
|
||||
// Extract format from extension
|
||||
let path = std::path::Path::new(&file_path);
|
||||
let format = path.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
if !path.exists() {
|
||||
return Err(format!("File not found: {}", file_path));
|
||||
}
|
||||
|
||||
// Get duration using ffprobe (requires ffmpeg to be installed)
|
||||
let duration = match Command::new("ffprobe")
|
||||
let size = std::fs::metadata(&file_path)
|
||||
.map_err(|e| e.to_string())?
|
||||
.len();
|
||||
|
||||
// Use ffprobe to get duration
|
||||
// Try resolved path first
|
||||
let ffprobe_cmd = resolve_binary_path("ffprobe");
|
||||
|
||||
let output = Command::new(&ffprobe_cmd)
|
||||
.args([
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
@@ -603,23 +622,16 @@ fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata
|
||||
&file_path
|
||||
])
|
||||
.output()
|
||||
{
|
||||
Ok(output) => {
|
||||
if output.status.success() {
|
||||
let duration_str = String::from_utf8_lossy(&output.stdout);
|
||||
duration_str.trim().parse::<f64>().unwrap_or(0.0)
|
||||
} else {
|
||||
emit_log(&app, "WARN", "ffprobe failed, duration = 0");
|
||||
0.0
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
emit_log(&app, "WARN", "ffprobe not found, duration = 0");
|
||||
0.0
|
||||
}
|
||||
};
|
||||
.map_err(|e| format!("Failed to execute ffprobe at '{}': {}", ffprobe_cmd, e))?;
|
||||
|
||||
emit_log(&app, "SUCCESS", &format!("Metadata: {}s, {} bytes", duration, size));
|
||||
let duration_str = String::from_utf8_lossy(&output.stdout);
|
||||
let duration: f64 = duration_str.trim().parse().unwrap_or(0.0);
|
||||
|
||||
// Extension as format
|
||||
let format = path.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
|
||||
Ok(AudioMetadata {
|
||||
duration,
|
||||
@@ -633,8 +645,9 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
|
||||
emit_log(&app, "INFO", &format!("Converting to MP3: {}", wav_path));
|
||||
|
||||
let mp3_path = wav_path.replace(".wav", ".mp3");
|
||||
let ffmpeg_cmd = resolve_binary_path("ffmpeg");
|
||||
|
||||
let output = Command::new("ffmpeg")
|
||||
let output = Command::new(&ffmpeg_cmd)
|
||||
.args([
|
||||
"-i", &wav_path,
|
||||
"-codec:a", "libmp3lame",
|
||||
@@ -643,7 +656,7 @@ fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
|
||||
&mp3_path
|
||||
])
|
||||
.output()
|
||||
.map_err(|e| format!("Failed to execute ffmpeg: {}", e))?;
|
||||
.map_err(|e| format!("Failed to execute ffmpeg at '{}': {}", ffmpeg_cmd, e))?;
|
||||
|
||||
if output.status.success() {
|
||||
emit_log(&app, "SUCCESS", &format!("MP3 created: {}", mp3_path));
|
||||
@@ -660,9 +673,11 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
|
||||
emit_log(&app, "INFO", &format!("Chunking audio: {} ({}min chunks)", file_path, chunk_minutes));
|
||||
|
||||
let chunk_seconds = chunk_minutes * 60;
|
||||
let ffprobe_cmd = resolve_binary_path("ffprobe");
|
||||
let ffmpeg_cmd = resolve_binary_path("ffmpeg");
|
||||
|
||||
// Get total duration using ffprobe
|
||||
let duration_output = Command::new("ffprobe")
|
||||
let duration_output = Command::new(&ffprobe_cmd)
|
||||
.args([
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
@@ -670,7 +685,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
|
||||
&file_path
|
||||
])
|
||||
.output()
|
||||
.map_err(|e| format!("Failed to get duration: {}", e))?;
|
||||
.map_err(|e| format!("Failed to get duration with '{}': {}", ffprobe_cmd, e))?;
|
||||
|
||||
let duration_str = String::from_utf8_lossy(&duration_output.stdout);
|
||||
let duration: f64 = duration_str.trim().parse()
|
||||
@@ -686,7 +701,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
|
||||
let start_time = i as u32 * chunk_seconds;
|
||||
let chunk_path = format!("{}_chunk_{}.mp3", base_path, i);
|
||||
|
||||
let output = Command::new("ffmpeg")
|
||||
let output = Command::new(&ffmpeg_cmd)
|
||||
.args([
|
||||
"-i", &file_path,
|
||||
"-ss", &start_time.to_string(),
|
||||
@@ -696,7 +711,7 @@ fn chunk_audio(app: AppHandle, file_path: String, chunk_minutes: u32) -> Result<
|
||||
&chunk_path
|
||||
])
|
||||
.output()
|
||||
.map_err(|e| format!("Failed to create chunk {}: {}", i, e))?;
|
||||
.map_err(|e| format!("Failed to create chunk {} with '{}': {}", i, ffmpeg_cmd, e))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let error = String::from_utf8_lossy(&output.stderr);
|
||||
|
||||
@@ -64,9 +64,14 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
|
||||
const [activeTab, setActiveTab] = useState<'preview' | 'source'>('preview');
|
||||
|
||||
const generateHtmlBody = (content: string, title: string) => {
|
||||
// Simple heuristic: if it looks like HTML, treat as HTML. Otherwise, markdown.
|
||||
const isHtml = /^\s*<(!DOCTYPE|html|div|p|table)/i.test(content);
|
||||
const formattedBody = isHtml ? content : formatMarkdownToHtml(content);
|
||||
// Check if it's a full HTML document
|
||||
if (/^\s*<!DOCTYPE html/i.test(content) || /^\s*<html/i.test(content)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
// Simple heuristic: if it looks like HTML fragment (div, p, table), treat as HTML. Otherwise, markdown.
|
||||
const isHtmlFragment = /^\s*<(div|p|table|section|header|footer)/i.test(content);
|
||||
const formattedBody = isHtmlFragment ? content : formatMarkdownToHtml(content);
|
||||
|
||||
return `
|
||||
<!DOCTYPE html>
|
||||
@@ -111,14 +116,17 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
|
||||
// Replace placeholders
|
||||
const dateStr = new Date().toLocaleDateString();
|
||||
let newSub = tmpl.subject.replace(/{{date}}/g, dateStr).replace(/{{subject}}/g, "Meeting");
|
||||
// Note: We don't have the original 'recordingSubject' here easily without more prop drilling,
|
||||
// so we default to "Meeting" or user can edit.
|
||||
// Actually, initialSubject usually contains "Meeting Summary", so we could parse it, but for now date/summary is most important.
|
||||
|
||||
// Clean up JSON if necessary (e.g. remove markdown code blocks ```json ... ```)
|
||||
let cleanSummary = initialBody;
|
||||
if (initialBody.trim().startsWith('```')) {
|
||||
cleanSummary = initialBody.replace(/^```(json)?/i, '').replace(/```$/, '').trim();
|
||||
}
|
||||
|
||||
let newBody = tmpl.body
|
||||
.replace(/{{date}}/g, dateStr)
|
||||
.replace(/{{subject}}/g, "the meeting")
|
||||
.replace(/{{summary}}/g, initialBody);
|
||||
.replace(/{{summary}}/g, cleanSummary);
|
||||
|
||||
setSubject(newSub);
|
||||
setBody(generateHtmlBody(newBody, newSub));
|
||||
@@ -242,7 +250,7 @@ const EmailPreviewModal: React.FC<EmailPreviewModalProps> = ({
|
||||
srcDoc={body}
|
||||
className="w-full h-full border-none"
|
||||
title="Email Preview"
|
||||
sandbox="allow-same-origin"
|
||||
sandbox="allow-same-origin allow-scripts"
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
|
||||
@@ -252,17 +252,14 @@ const Recorder: React.FC<RecorderProps> = ({
|
||||
|
||||
const interval = setInterval(() => {
|
||||
const now = Date.now();
|
||||
const diff = (now - lastSpeechTimeRef.current) / 1000;
|
||||
setSilenceDuration(diff);
|
||||
const timeSinceSpeech = (now - lastSpeechTimeRef.current) / 1000;
|
||||
setSilenceDuration(timeSinceSpeech);
|
||||
|
||||
// Different timeouts based on mode:
|
||||
// Voice Memo: 20 seconds of silence
|
||||
// Meeting: Disabled (no auto-stop to avoid cutting off long meetings)
|
||||
const timeoutSeconds = recordingMode === 'voice' ? 20 : 9999; // 9999 = effectively disabled
|
||||
|
||||
if (diff > timeoutSeconds && !isStoppingRef.current) {
|
||||
console.log(`Auto-stopping (${recordingMode} mode) due to ${timeoutSeconds}s silence`);
|
||||
addToast(`Auto-stopping (${Math.floor(diff)}s silence detected)`, "info", 3000);
|
||||
// AUTO STOP after 20 seconds of silence (ALL MODES)
|
||||
if (timeSinceSpeech > 20 && !isStoppingRef.current) {
|
||||
console.log("Auto-stopping due to silence...");
|
||||
isStoppingRef.current = true;
|
||||
addToast('Auto-stopped due to silence', 'info');
|
||||
stopRecording();
|
||||
}
|
||||
}, 1000);
|
||||
|
||||
Reference in New Issue
Block a user