feat(v1.2.0): Final Release - Native Audio, Smart VAD, Auto-Loop & Quality Fixes

- Implemented standard 48kHz audio pipeline to fix sample rate mismatch/distortion - Added Native System Audio (ScreenCaptureKit) support - Implemented Smart VAD (Voice Activity Detection) with Auto-Start on valid audio - Added Auto-Loop: Automatically re-arms recording after stop - Added Empty Guard: Prevents transcribing silent recordings (< 20s empty) - Increased Pre-Roll buffer to 3.0s to prevent cut-off speech - Fixed clipping with clamped audio mixing
2026-01-24 01:35:09 +01:00
parent 31f59ba4a2
commit 4e9a1fd038
10 changed files with 513 additions and 145 deletions
--- a/src/components/Recorder.tsx
+++ b/src/components/Recorder.tsx
@@ -60,9 +60,9 @@ const Recorder: React.FC<RecorderProps> = ({
    const [isStopping, setIsStopping] = useState(false); // New lock state
    const [isPaused, setIsPaused] = useState(false);
    const [isWaiting, setIsWaiting] = useState(false); // New state for Auto-Start
+    const [hasSpeechDetected, setHasSpeechDetected] = useState(false); // New tracking state
    const [autoStartEnabled, setAutoStartEnabled] = useState(false); // Toggle state

-
    const [status, setStatus] = useState<string>('Ready to record');
    const [selectedDevice, setSelectedDevice] = useState<string>('');
    const [selectedPromptId, setSelectedPromptId] = useState<string>('');
@@ -73,11 +73,8 @@ const Recorder: React.FC<RecorderProps> = ({
    const [lastSpeechTime, setLastSpeechTime] = useState<number>(Date.now());
    const [silenceDuration, setSilenceDuration] = useState(0);

-    // Filtered devices based on mode
-    const filteredDevices = devices.filter(d => {
-        const isVirtual = d.name.toLowerCase().includes('hearbit') || d.name.toLowerCase().includes('blackhole');
-        return recordingMode === 'meeting' ? isVirtual : !isVirtual;
-    });
+    // Show all devices for both modes now (System Audio is captured natively)
+    const filteredDevices = devices;

    useEffect(() => {
        loadDevices();
@@ -170,6 +167,7 @@ const Recorder: React.FC<RecorderProps> = ({
            setIsPaused(false);
            setTranscription('');
            setSummary('');
+            setHasSpeechDetected(false); // Reset check for new session

            if (autoStartEnabled) {
                setIsWaiting(true);
@@ -215,15 +213,16 @@ const Recorder: React.FC<RecorderProps> = ({
            unlistenVAD = await listen<{ is_speech: boolean, probability: number }>('vad-event', (event) => {
                if (event.payload.is_speech) {
                    setLastSpeechTime(Date.now());
-                    lastSpeechTimeRef.current = Date.now(); // Update ref immediately
+                    lastSpeechTimeRef.current = Date.now();
                    setSilenceDuration(0);
+                    setHasSpeechDetected(true); // Track positive speech
                }
            });

            // Auto-Start Trigger Listener
            unlistenTrigger = await listen('auto-recording-triggered', () => {
                console.log("Auto-Start Triggered from Backend!");
-                // Only trigger if we are actually waiting
+                setHasSpeechDetected(true); // Trigger counts as speech
                setIsWaiting((prev) => {
                    if (prev) {
                        addToast("Audio detected! Recording started.", 'success', 4000);
@@ -341,134 +340,162 @@ const Recorder: React.FC<RecorderProps> = ({
            setIsRecording(false);
            setIsPaused(false);
            setIsWaiting(false); // Reset waiting state
+            setTranscription('');
+            setSummary('');
+            setHasSpeechDetected(false); // Reset checkiting state
            setStatus('Saving recording...');
            const filePath = await invoke<string>('stop_recording');

-            // Wait a moment for file flush (safety)
-            await new Promise(r => setTimeout(r, 500));
+            // NEW: Check if speech was actually detected during the session
+            // If we recorded 20s of silence (Auto-Stop), we shouldn't transcribe.
+            // If we recorded 20s of silence (Auto-Stop), we shouldn't transcribe.
+            if (!hasSpeechDetected && recordingMode === 'voice') {
+                // Note: For 'meeting' mode, system audio might have happened without VAD triggering?
+                // But our updated backend VAD logic includes System Audio in 'is_speech' event.
+                // So we can trust hasSpeechDetected for both modes now.

-            // Confirm recording saved
-            addToast(`Recording saved locally: ${filePath.split('/').pop()}`, 'success', 3000);
-            setStatus('Converting to MP3...');
+                console.log("No speech detected during recording. Skipping transcription.");
+                addToast("Recording discarded (No speech/audio detected)", 'info');

-            // Small delay to show the "saved" message
-            await new Promise(r => setTimeout(r, 500));
+                // If auto-start is on, we just loop back.
+                // skip the rest.
+            } else {

-            // Convert WAV to MP3 for smaller size
-            const mp3Path = await invoke<string>('convert_to_mp3', { wavPath: filePath });
+                // Wait a moment for file flush (safety)
+                await new Promise(r => setTimeout(r, 500));

-            // Get file size to check if chunking needed
-            interface AudioMetadata { duration: number; size: number; format: string; }
-            const metadata = await invoke<AudioMetadata>('get_audio_metadata', { filePath: mp3Path });
-            const sizeMB = metadata.size / (1024 * 1024);
+                // Confirm recording saved
+                addToast(`Recording saved locally: ${filePath.split('/').pop()}`, 'success', 3000);
+                setStatus('Converting to MP3...');

-            let transText = '';
+                // Small delay to show the "saved" message
+                await new Promise(r => setTimeout(r, 500));

-            // Check if chunking needed (only for Meeting mode and large files)
-            if (recordingMode === 'meeting' && sizeMB >= 18) {
-                // CHUNKING PATH for large meetings
-                setStatus(`Large file (${sizeMB.toFixed(1)}MB). Splitting into chunks...`);
-                const chunks = await invoke<string[]>('chunk_audio', {
-                    filePath: mp3Path,
-                    chunkMinutes: 10
-                });
+                // Convert WAV to MP3 for smaller size
+                const mp3Path = await invoke<string>('convert_to_mp3', { wavPath: filePath });

-                addToast(`Processing ${chunks.length} chunks...`, 'info', 4000);
+                // Get file size to check if chunking needed
+                interface AudioMetadata { duration: number; size: number; format: string; }
+                const metadata = await invoke<AudioMetadata>('get_audio_metadata', { filePath: mp3Path });
+                const sizeMB = metadata.size / (1024 * 1024);

-                let allTranscriptions: string[] = [];
+                let transText = '';

-                for (let i = 0; i < chunks.length; i++) {
-                    setStatus(`Transcribing chunk ${i + 1}/${chunks.length}...`);
-                    const chunkText = await invoke<string>('transcribe_audio', {
-                        filePath: chunks[i],
+                // Check if chunking needed (only for Meeting mode and large files)
+                if (recordingMode === 'meeting' && sizeMB >= 18) {
+                    // CHUNKING PATH for large meetings
+                    setStatus(`Large file (${sizeMB.toFixed(1)}MB). Splitting into chunks...`);
+                    const chunks = await invoke<string[]>('chunk_audio', {
+                        filePath: mp3Path,
+                        chunkMinutes: 10
+                    });
+
+                    addToast(`Processing ${chunks.length} chunks...`, 'info', 4000);
+
+                    let allTranscriptions: string[] = [];
+
+                    for (let i = 0; i < chunks.length; i++) {
+                        setStatus(`Transcribing chunk ${i + 1}/${chunks.length}...`);
+                        const chunkText = await invoke<string>('transcribe_audio', {
+                            filePath: chunks[i],
+                            apiKey,
+                            productId
+                        });
+                        allTranscriptions.push(chunkText);
+                    }
+
+                    // Merge transcriptions
+                    transText = allTranscriptions.join('\n\n--- Next Segment ---\n\n');
+                    addToast('All chunks transcribed successfully!', 'success', 3000);
+                } else {
+                    // NORMAL PATH for small files
+                    setStatus('Transcribing (Infomaniak Whisper)...');
+                    transText = await invoke<string>('transcribe_audio', {
+                        filePath: mp3Path,
                        apiKey,
                        productId
                    });
-                    allTranscriptions.push(chunkText);
                }

-                // Merge transcriptions
-                transText = allTranscriptions.join('\n\n--- Next Segment ---\n\n');
-                addToast('All chunks transcribed successfully!', 'success', 3000);
-            } else {
-                // NORMAL PATH for small files
-                setStatus('Transcribing (Infomaniak Whisper)...');
-                transText = await invoke<string>('transcribe_audio', {
-                    filePath: mp3Path,
-                    apiKey,
-                    productId
-                });
-            }
+                setTranscription(transText);

-            setTranscription(transText);
+                // Check if transcription is empty or just whitespace
+                if (!transText || transText.trim().length === 0) {
+                    setStatus('Done (No speech detected)');
+                    setTranscription('(No speech detected. Check your microphone settings.)');
+                    setTimeout(() => setStatus('Ready to record'), 3000);
+                    // allow finally block to restart loop
+                } else {
+                    // Logic continues...

-            // Check if transcription is empty or just whitespace
-            if (!transText || transText.trim().length === 0) {
-                setStatus('Done (No speech detected)');
-                setTranscription('(No speech detected. Check your microphone settings.)');
-                setTimeout(() => setStatus('Ready to record'), 3000);
-                return;
-            }
+                    // Find selected prompt content - SMART SELECTION
+                    let activePrompt = prompts.find(p => p.id === selectedPromptId);

-            // Find selected prompt content - SMART SELECTION
-            let activePrompt = prompts.find(p => p.id === selectedPromptId);
+                    // Smart Auto-Select based on keywords
+                    const lowerText = transText.toLowerCase();
+                    let bestMatchId = selectedPromptId;
+                    let maxMatches = 0;

-            // Smart Auto-Select based on keywords
-            const lowerText = transText.toLowerCase();
-            let bestMatchId = selectedPromptId;
-            let maxMatches = 0;
-
-            for (const p of prompts) {
-                if (!p.keywords) continue;
-                let matches = 0;
-                for (const kw of p.keywords) {
-                    if (lowerText.includes(kw.toLowerCase())) {
-                        matches++;
+                    for (const p of prompts) {
+                        if (!p.keywords) continue;
+                        let matches = 0;
+                        for (const kw of p.keywords) {
+                            if (lowerText.includes(kw.toLowerCase())) {
+                                matches++;
+                            }
+                        }
+                        if (matches > maxMatches) {
+                            maxMatches = matches;
+                            bestMatchId = p.id;
+                        }
                    }
-                }
-                if (matches > maxMatches) {
-                    maxMatches = matches;
-                    bestMatchId = p.id;
+
+                    if (bestMatchId !== selectedPromptId) {
+                        const newPrompt = prompts.find(p => p.id === bestMatchId);
+                        if (newPrompt) {
+                            console.log(`Smart Select: Switched to '${newPrompt.name}' with ${maxMatches} matches.`);
+                            setStatus(`Smart Select: Using "${newPrompt.name}"...`);
+                            addToast(`Smart Select: Switched to "${newPrompt.name}"`, 'success', 4000);
+                            activePrompt = newPrompt;
+                        }
+                    }
+
+                    const promptContent = activePrompt ? activePrompt.content : "Summarize this.";
+
+                    setStatus(`Summarizing (${selectedModel})...`);
+                    const sumText = await invoke<string>('summarize_text', {
+                        text: transText,
+                        apiKey,
+                        productId,
+                        prompt: promptContent,
+                        model: selectedModel
+                    });
+                    setSummary(sumText);
+
+                    // Auto-save to history
+                    onSaveToHistory(transText, sumText);
+
+                    setStatus('Done!');
+                    addToast('Transcription & Summary complete!', 'success', 4000);
+                    onRecordingComplete(); // Auto-switch tab
+                    setTimeout(() => setStatus('Ready to record'), 3000);
                }
            }
-
-            if (bestMatchId !== selectedPromptId) {
-                const newPrompt = prompts.find(p => p.id === bestMatchId);
-                if (newPrompt) {
-                    console.log(`Smart Select: Switched to '${newPrompt.name}' with ${maxMatches} matches.`);
-                    setStatus(`Smart Select: Using "${newPrompt.name}"...`);
-                    addToast(`Smart Select: Switched to "${newPrompt.name}"`, 'success', 4000);
-                    activePrompt = newPrompt;
-                    // Optional: Update UI selection? setSelectedPromptId(bestMatchId);
-                    // Let's verify with user preference? For now, we override as "Magic".
-                }
-            }
-
-            const promptContent = activePrompt ? activePrompt.content : "Summarize this.";
-
-            setStatus(`Summarizing (${selectedModel})...`);
-            const sumText = await invoke<string>('summarize_text', {
-                text: transText,
-                apiKey,
-                productId,
-                prompt: promptContent,
-                model: selectedModel
-            });
-            setSummary(sumText);
-
-            // Auto-save to history
-            onSaveToHistory(transText, sumText);
-
-            setStatus('Done!');
-            addToast('Transcription & Summary complete!', 'success', 4000);
-            onRecordingComplete(); // Auto-switch tab
-            setTimeout(() => setStatus('Ready to record'), 3000);
        } catch (e) {
            console.error(e);
            setStatus(`Error: ${e}`);
            addToast(`Error processing: ${e}`, 'error');
        } finally {
            setIsStopping(false);
+
+            // AUTO-RESTART LOGIC
+            if (autoStartEnabled) {
+                console.log("Auto-Start enabled: Restarting listener loop...");
+                // Short delay to ensure backend cleanup
+                setTimeout(() => {
+                    startRecording();
+                }, 1000);
+            }
        }
    };

@@ -634,12 +661,20 @@ const Recorder: React.FC<RecorderProps> = ({
                    </div>

                    <div className="flex flex-col gap-2 mt-2 w-full">
-                        {recordingMode === 'meeting' && filteredDevices.length === 0 && (
+                        {recordingMode === 'meeting' && (
                            <button
-                                onClick={onOpenSettings}
+                                onClick={async () => {
+                                    const allowed = await invoke<boolean>('check_screen_recording_permission');
+                                    if (allowed) {
+                                        addToast('System Audio Permission: GRANTED ✅', 'success');
+                                    } else {
+                                        addToast('System Audio Permission: MISSING ❌. Please enable in System Settings -> Privacy -> Screen Recording', 'error', 5000);
+                                        // Open Settings?
+                                    }
+                                }}
                                className="text-xs bg-primary/10 text-primary hover:bg-primary/20 w-full text-center border border-primary/20 rounded p-2 mb-2 font-semibold"
                            >
-                                🪄 Create "Hearbit Audio" Device
+                                🔒 Check Audio Permission
                            </button>
                        )}
                        <button