feat(v1.2.0): Final Release - Native Audio, Smart VAD, Auto-Loop & Quality Fixes

- Implemented standard 48kHz audio pipeline to fix sample rate mismatch/distortion
- Added Native System Audio (ScreenCaptureKit) support
- Implemented Smart VAD (Voice Activity Detection) with Auto-Start on valid audio
- Added Auto-Loop: Automatically re-arms recording after stop
- Added Empty Guard: Prevents transcribing silent recordings (< 20s empty)
- Increased Pre-Roll buffer to 3.0s to prevent cut-off speech
- Fixed clipping with clamped audio mixing
This commit is contained in:
michael.borak
2026-01-24 01:35:09 +01:00
parent 31f59ba4a2
commit 4e9a1fd038
10 changed files with 513 additions and 145 deletions

View File

@@ -60,9 +60,9 @@ const Recorder: React.FC<RecorderProps> = ({
const [isStopping, setIsStopping] = useState(false); // New lock state
const [isPaused, setIsPaused] = useState(false);
const [isWaiting, setIsWaiting] = useState(false); // New state for Auto-Start
const [hasSpeechDetected, setHasSpeechDetected] = useState(false); // New tracking state
const [autoStartEnabled, setAutoStartEnabled] = useState(false); // Toggle state
const [status, setStatus] = useState<string>('Ready to record');
const [selectedDevice, setSelectedDevice] = useState<string>('');
const [selectedPromptId, setSelectedPromptId] = useState<string>('');
@@ -73,11 +73,8 @@ const Recorder: React.FC<RecorderProps> = ({
const [lastSpeechTime, setLastSpeechTime] = useState<number>(Date.now());
const [silenceDuration, setSilenceDuration] = useState(0);
// Filtered devices based on mode
const filteredDevices = devices.filter(d => {
const isVirtual = d.name.toLowerCase().includes('hearbit') || d.name.toLowerCase().includes('blackhole');
return recordingMode === 'meeting' ? isVirtual : !isVirtual;
});
// Show all devices for both modes now (System Audio is captured natively)
const filteredDevices = devices;
useEffect(() => {
loadDevices();
@@ -170,6 +167,7 @@ const Recorder: React.FC<RecorderProps> = ({
setIsPaused(false);
setTranscription('');
setSummary('');
setHasSpeechDetected(false); // Reset check for new session
if (autoStartEnabled) {
setIsWaiting(true);
@@ -215,15 +213,16 @@ const Recorder: React.FC<RecorderProps> = ({
unlistenVAD = await listen<{ is_speech: boolean, probability: number }>('vad-event', (event) => {
if (event.payload.is_speech) {
setLastSpeechTime(Date.now());
lastSpeechTimeRef.current = Date.now(); // Update ref immediately
lastSpeechTimeRef.current = Date.now();
setSilenceDuration(0);
setHasSpeechDetected(true); // Track positive speech
}
});
// Auto-Start Trigger Listener
unlistenTrigger = await listen('auto-recording-triggered', () => {
console.log("Auto-Start Triggered from Backend!");
// Only trigger if we are actually waiting
setHasSpeechDetected(true); // Trigger counts as speech
setIsWaiting((prev) => {
if (prev) {
addToast("Audio detected! Recording started.", 'success', 4000);
@@ -341,134 +340,162 @@ const Recorder: React.FC<RecorderProps> = ({
setIsRecording(false);
setIsPaused(false);
setIsWaiting(false); // Reset waiting state
setTranscription('');
setSummary('');
setHasSpeechDetected(false); // Reset checkiting state
setStatus('Saving recording...');
const filePath = await invoke<string>('stop_recording');
// Wait a moment for file flush (safety)
await new Promise(r => setTimeout(r, 500));
// NEW: Check if speech was actually detected during the session
// If we recorded 20s of silence (Auto-Stop), we shouldn't transcribe.
// If we recorded 20s of silence (Auto-Stop), we shouldn't transcribe.
if (!hasSpeechDetected && recordingMode === 'voice') {
// Note: For 'meeting' mode, system audio might have happened without VAD triggering?
// But our updated backend VAD logic includes System Audio in 'is_speech' event.
// So we can trust hasSpeechDetected for both modes now.
// Confirm recording saved
addToast(`Recording saved locally: ${filePath.split('/').pop()}`, 'success', 3000);
setStatus('Converting to MP3...');
console.log("No speech detected during recording. Skipping transcription.");
addToast("Recording discarded (No speech/audio detected)", 'info');
// Small delay to show the "saved" message
await new Promise(r => setTimeout(r, 500));
// If auto-start is on, we just loop back.
// skip the rest.
} else {
// Convert WAV to MP3 for smaller size
const mp3Path = await invoke<string>('convert_to_mp3', { wavPath: filePath });
// Wait a moment for file flush (safety)
await new Promise(r => setTimeout(r, 500));
// Get file size to check if chunking needed
interface AudioMetadata { duration: number; size: number; format: string; }
const metadata = await invoke<AudioMetadata>('get_audio_metadata', { filePath: mp3Path });
const sizeMB = metadata.size / (1024 * 1024);
// Confirm recording saved
addToast(`Recording saved locally: ${filePath.split('/').pop()}`, 'success', 3000);
setStatus('Converting to MP3...');
let transText = '';
// Small delay to show the "saved" message
await new Promise(r => setTimeout(r, 500));
// Check if chunking needed (only for Meeting mode and large files)
if (recordingMode === 'meeting' && sizeMB >= 18) {
// CHUNKING PATH for large meetings
setStatus(`Large file (${sizeMB.toFixed(1)}MB). Splitting into chunks...`);
const chunks = await invoke<string[]>('chunk_audio', {
filePath: mp3Path,
chunkMinutes: 10
});
// Convert WAV to MP3 for smaller size
const mp3Path = await invoke<string>('convert_to_mp3', { wavPath: filePath });
addToast(`Processing ${chunks.length} chunks...`, 'info', 4000);
// Get file size to check if chunking needed
interface AudioMetadata { duration: number; size: number; format: string; }
const metadata = await invoke<AudioMetadata>('get_audio_metadata', { filePath: mp3Path });
const sizeMB = metadata.size / (1024 * 1024);
let allTranscriptions: string[] = [];
let transText = '';
for (let i = 0; i < chunks.length; i++) {
setStatus(`Transcribing chunk ${i + 1}/${chunks.length}...`);
const chunkText = await invoke<string>('transcribe_audio', {
filePath: chunks[i],
// Check if chunking needed (only for Meeting mode and large files)
if (recordingMode === 'meeting' && sizeMB >= 18) {
// CHUNKING PATH for large meetings
setStatus(`Large file (${sizeMB.toFixed(1)}MB). Splitting into chunks...`);
const chunks = await invoke<string[]>('chunk_audio', {
filePath: mp3Path,
chunkMinutes: 10
});
addToast(`Processing ${chunks.length} chunks...`, 'info', 4000);
let allTranscriptions: string[] = [];
for (let i = 0; i < chunks.length; i++) {
setStatus(`Transcribing chunk ${i + 1}/${chunks.length}...`);
const chunkText = await invoke<string>('transcribe_audio', {
filePath: chunks[i],
apiKey,
productId
});
allTranscriptions.push(chunkText);
}
// Merge transcriptions
transText = allTranscriptions.join('\n\n--- Next Segment ---\n\n');
addToast('All chunks transcribed successfully!', 'success', 3000);
} else {
// NORMAL PATH for small files
setStatus('Transcribing (Infomaniak Whisper)...');
transText = await invoke<string>('transcribe_audio', {
filePath: mp3Path,
apiKey,
productId
});
allTranscriptions.push(chunkText);
}
// Merge transcriptions
transText = allTranscriptions.join('\n\n--- Next Segment ---\n\n');
addToast('All chunks transcribed successfully!', 'success', 3000);
} else {
// NORMAL PATH for small files
setStatus('Transcribing (Infomaniak Whisper)...');
transText = await invoke<string>('transcribe_audio', {
filePath: mp3Path,
apiKey,
productId
});
}
setTranscription(transText);
setTranscription(transText);
// Check if transcription is empty or just whitespace
if (!transText || transText.trim().length === 0) {
setStatus('Done (No speech detected)');
setTranscription('(No speech detected. Check your microphone settings.)');
setTimeout(() => setStatus('Ready to record'), 3000);
// allow finally block to restart loop
} else {
// Logic continues...
// Check if transcription is empty or just whitespace
if (!transText || transText.trim().length === 0) {
setStatus('Done (No speech detected)');
setTranscription('(No speech detected. Check your microphone settings.)');
setTimeout(() => setStatus('Ready to record'), 3000);
return;
}
// Find selected prompt content - SMART SELECTION
let activePrompt = prompts.find(p => p.id === selectedPromptId);
// Find selected prompt content - SMART SELECTION
let activePrompt = prompts.find(p => p.id === selectedPromptId);
// Smart Auto-Select based on keywords
const lowerText = transText.toLowerCase();
let bestMatchId = selectedPromptId;
let maxMatches = 0;
// Smart Auto-Select based on keywords
const lowerText = transText.toLowerCase();
let bestMatchId = selectedPromptId;
let maxMatches = 0;
for (const p of prompts) {
if (!p.keywords) continue;
let matches = 0;
for (const kw of p.keywords) {
if (lowerText.includes(kw.toLowerCase())) {
matches++;
for (const p of prompts) {
if (!p.keywords) continue;
let matches = 0;
for (const kw of p.keywords) {
if (lowerText.includes(kw.toLowerCase())) {
matches++;
}
}
if (matches > maxMatches) {
maxMatches = matches;
bestMatchId = p.id;
}
}
}
if (matches > maxMatches) {
maxMatches = matches;
bestMatchId = p.id;
if (bestMatchId !== selectedPromptId) {
const newPrompt = prompts.find(p => p.id === bestMatchId);
if (newPrompt) {
console.log(`Smart Select: Switched to '${newPrompt.name}' with ${maxMatches} matches.`);
setStatus(`Smart Select: Using "${newPrompt.name}"...`);
addToast(`Smart Select: Switched to "${newPrompt.name}"`, 'success', 4000);
activePrompt = newPrompt;
}
}
const promptContent = activePrompt ? activePrompt.content : "Summarize this.";
setStatus(`Summarizing (${selectedModel})...`);
const sumText = await invoke<string>('summarize_text', {
text: transText,
apiKey,
productId,
prompt: promptContent,
model: selectedModel
});
setSummary(sumText);
// Auto-save to history
onSaveToHistory(transText, sumText);
setStatus('Done!');
addToast('Transcription & Summary complete!', 'success', 4000);
onRecordingComplete(); // Auto-switch tab
setTimeout(() => setStatus('Ready to record'), 3000);
}
}
if (bestMatchId !== selectedPromptId) {
const newPrompt = prompts.find(p => p.id === bestMatchId);
if (newPrompt) {
console.log(`Smart Select: Switched to '${newPrompt.name}' with ${maxMatches} matches.`);
setStatus(`Smart Select: Using "${newPrompt.name}"...`);
addToast(`Smart Select: Switched to "${newPrompt.name}"`, 'success', 4000);
activePrompt = newPrompt;
// Optional: Update UI selection? setSelectedPromptId(bestMatchId);
// Let's verify with user preference? For now, we override as "Magic".
}
}
const promptContent = activePrompt ? activePrompt.content : "Summarize this.";
setStatus(`Summarizing (${selectedModel})...`);
const sumText = await invoke<string>('summarize_text', {
text: transText,
apiKey,
productId,
prompt: promptContent,
model: selectedModel
});
setSummary(sumText);
// Auto-save to history
onSaveToHistory(transText, sumText);
setStatus('Done!');
addToast('Transcription & Summary complete!', 'success', 4000);
onRecordingComplete(); // Auto-switch tab
setTimeout(() => setStatus('Ready to record'), 3000);
} catch (e) {
console.error(e);
setStatus(`Error: ${e}`);
addToast(`Error processing: ${e}`, 'error');
} finally {
setIsStopping(false);
// AUTO-RESTART LOGIC
if (autoStartEnabled) {
console.log("Auto-Start enabled: Restarting listener loop...");
// Short delay to ensure backend cleanup
setTimeout(() => {
startRecording();
}, 1000);
}
}
};
@@ -634,12 +661,20 @@ const Recorder: React.FC<RecorderProps> = ({
</div>
<div className="flex flex-col gap-2 mt-2 w-full">
{recordingMode === 'meeting' && filteredDevices.length === 0 && (
{recordingMode === 'meeting' && (
<button
onClick={onOpenSettings}
onClick={async () => {
const allowed = await invoke<boolean>('check_screen_recording_permission');
if (allowed) {
addToast('System Audio Permission: GRANTED ✅', 'success');
} else {
addToast('System Audio Permission: MISSING ❌. Please enable in System Settings -> Privacy -> Screen Recording', 'error', 5000);
// Open Settings?
}
}}
className="text-xs bg-primary/10 text-primary hover:bg-primary/20 w-full text-center border border-primary/20 rounded p-2 mb-2 font-semibold"
>
🪄 Create "Hearbit Audio" Device
🔒 Check Audio Permission
</button>
)}
<button