feat(v1.2.0): Final Release - Native Audio, Smart VAD, Auto-Loop & Quality Fixes
- Implemented standard 48kHz audio pipeline to fix sample rate mismatch/distortion - Added Native System Audio (ScreenCaptureKit) support - Implemented Smart VAD (Voice Activity Detection) with Auto-Start on valid audio - Added Auto-Loop: Automatically re-arms recording after stop - Added Empty Guard: Prevents transcribing silent recordings (< 20s empty) - Increased Pre-Roll buffer to 3.0s to prevent cut-off speech - Fixed clipping with clamped audio mixing
This commit is contained in:
@@ -60,9 +60,9 @@ const Recorder: React.FC<RecorderProps> = ({
|
||||
const [isStopping, setIsStopping] = useState(false); // New lock state
|
||||
const [isPaused, setIsPaused] = useState(false);
|
||||
const [isWaiting, setIsWaiting] = useState(false); // New state for Auto-Start
|
||||
const [hasSpeechDetected, setHasSpeechDetected] = useState(false); // New tracking state
|
||||
const [autoStartEnabled, setAutoStartEnabled] = useState(false); // Toggle state
|
||||
|
||||
|
||||
const [status, setStatus] = useState<string>('Ready to record');
|
||||
const [selectedDevice, setSelectedDevice] = useState<string>('');
|
||||
const [selectedPromptId, setSelectedPromptId] = useState<string>('');
|
||||
@@ -73,11 +73,8 @@ const Recorder: React.FC<RecorderProps> = ({
|
||||
const [lastSpeechTime, setLastSpeechTime] = useState<number>(Date.now());
|
||||
const [silenceDuration, setSilenceDuration] = useState(0);
|
||||
|
||||
// Filtered devices based on mode
|
||||
const filteredDevices = devices.filter(d => {
|
||||
const isVirtual = d.name.toLowerCase().includes('hearbit') || d.name.toLowerCase().includes('blackhole');
|
||||
return recordingMode === 'meeting' ? isVirtual : !isVirtual;
|
||||
});
|
||||
// Show all devices for both modes now (System Audio is captured natively)
|
||||
const filteredDevices = devices;
|
||||
|
||||
useEffect(() => {
|
||||
loadDevices();
|
||||
@@ -170,6 +167,7 @@ const Recorder: React.FC<RecorderProps> = ({
|
||||
setIsPaused(false);
|
||||
setTranscription('');
|
||||
setSummary('');
|
||||
setHasSpeechDetected(false); // Reset check for new session
|
||||
|
||||
if (autoStartEnabled) {
|
||||
setIsWaiting(true);
|
||||
@@ -215,15 +213,16 @@ const Recorder: React.FC<RecorderProps> = ({
|
||||
unlistenVAD = await listen<{ is_speech: boolean, probability: number }>('vad-event', (event) => {
|
||||
if (event.payload.is_speech) {
|
||||
setLastSpeechTime(Date.now());
|
||||
lastSpeechTimeRef.current = Date.now(); // Update ref immediately
|
||||
lastSpeechTimeRef.current = Date.now();
|
||||
setSilenceDuration(0);
|
||||
setHasSpeechDetected(true); // Track positive speech
|
||||
}
|
||||
});
|
||||
|
||||
// Auto-Start Trigger Listener
|
||||
unlistenTrigger = await listen('auto-recording-triggered', () => {
|
||||
console.log("Auto-Start Triggered from Backend!");
|
||||
// Only trigger if we are actually waiting
|
||||
setHasSpeechDetected(true); // Trigger counts as speech
|
||||
setIsWaiting((prev) => {
|
||||
if (prev) {
|
||||
addToast("Audio detected! Recording started.", 'success', 4000);
|
||||
@@ -341,134 +340,162 @@ const Recorder: React.FC<RecorderProps> = ({
|
||||
setIsRecording(false);
|
||||
setIsPaused(false);
|
||||
setIsWaiting(false); // Reset waiting state
|
||||
setTranscription('');
|
||||
setSummary('');
|
||||
setHasSpeechDetected(false); // Reset checkiting state
|
||||
setStatus('Saving recording...');
|
||||
const filePath = await invoke<string>('stop_recording');
|
||||
|
||||
// Wait a moment for file flush (safety)
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
// NEW: Check if speech was actually detected during the session
|
||||
// If we recorded 20s of silence (Auto-Stop), we shouldn't transcribe.
|
||||
// If we recorded 20s of silence (Auto-Stop), we shouldn't transcribe.
|
||||
if (!hasSpeechDetected && recordingMode === 'voice') {
|
||||
// Note: For 'meeting' mode, system audio might have happened without VAD triggering?
|
||||
// But our updated backend VAD logic includes System Audio in 'is_speech' event.
|
||||
// So we can trust hasSpeechDetected for both modes now.
|
||||
|
||||
// Confirm recording saved
|
||||
addToast(`Recording saved locally: ${filePath.split('/').pop()}`, 'success', 3000);
|
||||
setStatus('Converting to MP3...');
|
||||
console.log("No speech detected during recording. Skipping transcription.");
|
||||
addToast("Recording discarded (No speech/audio detected)", 'info');
|
||||
|
||||
// Small delay to show the "saved" message
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
// If auto-start is on, we just loop back.
|
||||
// skip the rest.
|
||||
} else {
|
||||
|
||||
// Convert WAV to MP3 for smaller size
|
||||
const mp3Path = await invoke<string>('convert_to_mp3', { wavPath: filePath });
|
||||
// Wait a moment for file flush (safety)
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
|
||||
// Get file size to check if chunking needed
|
||||
interface AudioMetadata { duration: number; size: number; format: string; }
|
||||
const metadata = await invoke<AudioMetadata>('get_audio_metadata', { filePath: mp3Path });
|
||||
const sizeMB = metadata.size / (1024 * 1024);
|
||||
// Confirm recording saved
|
||||
addToast(`Recording saved locally: ${filePath.split('/').pop()}`, 'success', 3000);
|
||||
setStatus('Converting to MP3...');
|
||||
|
||||
let transText = '';
|
||||
// Small delay to show the "saved" message
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
|
||||
// Check if chunking needed (only for Meeting mode and large files)
|
||||
if (recordingMode === 'meeting' && sizeMB >= 18) {
|
||||
// CHUNKING PATH for large meetings
|
||||
setStatus(`Large file (${sizeMB.toFixed(1)}MB). Splitting into chunks...`);
|
||||
const chunks = await invoke<string[]>('chunk_audio', {
|
||||
filePath: mp3Path,
|
||||
chunkMinutes: 10
|
||||
});
|
||||
// Convert WAV to MP3 for smaller size
|
||||
const mp3Path = await invoke<string>('convert_to_mp3', { wavPath: filePath });
|
||||
|
||||
addToast(`Processing ${chunks.length} chunks...`, 'info', 4000);
|
||||
// Get file size to check if chunking needed
|
||||
interface AudioMetadata { duration: number; size: number; format: string; }
|
||||
const metadata = await invoke<AudioMetadata>('get_audio_metadata', { filePath: mp3Path });
|
||||
const sizeMB = metadata.size / (1024 * 1024);
|
||||
|
||||
let allTranscriptions: string[] = [];
|
||||
let transText = '';
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
setStatus(`Transcribing chunk ${i + 1}/${chunks.length}...`);
|
||||
const chunkText = await invoke<string>('transcribe_audio', {
|
||||
filePath: chunks[i],
|
||||
// Check if chunking needed (only for Meeting mode and large files)
|
||||
if (recordingMode === 'meeting' && sizeMB >= 18) {
|
||||
// CHUNKING PATH for large meetings
|
||||
setStatus(`Large file (${sizeMB.toFixed(1)}MB). Splitting into chunks...`);
|
||||
const chunks = await invoke<string[]>('chunk_audio', {
|
||||
filePath: mp3Path,
|
||||
chunkMinutes: 10
|
||||
});
|
||||
|
||||
addToast(`Processing ${chunks.length} chunks...`, 'info', 4000);
|
||||
|
||||
let allTranscriptions: string[] = [];
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
setStatus(`Transcribing chunk ${i + 1}/${chunks.length}...`);
|
||||
const chunkText = await invoke<string>('transcribe_audio', {
|
||||
filePath: chunks[i],
|
||||
apiKey,
|
||||
productId
|
||||
});
|
||||
allTranscriptions.push(chunkText);
|
||||
}
|
||||
|
||||
// Merge transcriptions
|
||||
transText = allTranscriptions.join('\n\n--- Next Segment ---\n\n');
|
||||
addToast('All chunks transcribed successfully!', 'success', 3000);
|
||||
} else {
|
||||
// NORMAL PATH for small files
|
||||
setStatus('Transcribing (Infomaniak Whisper)...');
|
||||
transText = await invoke<string>('transcribe_audio', {
|
||||
filePath: mp3Path,
|
||||
apiKey,
|
||||
productId
|
||||
});
|
||||
allTranscriptions.push(chunkText);
|
||||
}
|
||||
|
||||
// Merge transcriptions
|
||||
transText = allTranscriptions.join('\n\n--- Next Segment ---\n\n');
|
||||
addToast('All chunks transcribed successfully!', 'success', 3000);
|
||||
} else {
|
||||
// NORMAL PATH for small files
|
||||
setStatus('Transcribing (Infomaniak Whisper)...');
|
||||
transText = await invoke<string>('transcribe_audio', {
|
||||
filePath: mp3Path,
|
||||
apiKey,
|
||||
productId
|
||||
});
|
||||
}
|
||||
setTranscription(transText);
|
||||
|
||||
setTranscription(transText);
|
||||
// Check if transcription is empty or just whitespace
|
||||
if (!transText || transText.trim().length === 0) {
|
||||
setStatus('Done (No speech detected)');
|
||||
setTranscription('(No speech detected. Check your microphone settings.)');
|
||||
setTimeout(() => setStatus('Ready to record'), 3000);
|
||||
// allow finally block to restart loop
|
||||
} else {
|
||||
// Logic continues...
|
||||
|
||||
// Check if transcription is empty or just whitespace
|
||||
if (!transText || transText.trim().length === 0) {
|
||||
setStatus('Done (No speech detected)');
|
||||
setTranscription('(No speech detected. Check your microphone settings.)');
|
||||
setTimeout(() => setStatus('Ready to record'), 3000);
|
||||
return;
|
||||
}
|
||||
// Find selected prompt content - SMART SELECTION
|
||||
let activePrompt = prompts.find(p => p.id === selectedPromptId);
|
||||
|
||||
// Find selected prompt content - SMART SELECTION
|
||||
let activePrompt = prompts.find(p => p.id === selectedPromptId);
|
||||
// Smart Auto-Select based on keywords
|
||||
const lowerText = transText.toLowerCase();
|
||||
let bestMatchId = selectedPromptId;
|
||||
let maxMatches = 0;
|
||||
|
||||
// Smart Auto-Select based on keywords
|
||||
const lowerText = transText.toLowerCase();
|
||||
let bestMatchId = selectedPromptId;
|
||||
let maxMatches = 0;
|
||||
|
||||
for (const p of prompts) {
|
||||
if (!p.keywords) continue;
|
||||
let matches = 0;
|
||||
for (const kw of p.keywords) {
|
||||
if (lowerText.includes(kw.toLowerCase())) {
|
||||
matches++;
|
||||
for (const p of prompts) {
|
||||
if (!p.keywords) continue;
|
||||
let matches = 0;
|
||||
for (const kw of p.keywords) {
|
||||
if (lowerText.includes(kw.toLowerCase())) {
|
||||
matches++;
|
||||
}
|
||||
}
|
||||
if (matches > maxMatches) {
|
||||
maxMatches = matches;
|
||||
bestMatchId = p.id;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (matches > maxMatches) {
|
||||
maxMatches = matches;
|
||||
bestMatchId = p.id;
|
||||
|
||||
if (bestMatchId !== selectedPromptId) {
|
||||
const newPrompt = prompts.find(p => p.id === bestMatchId);
|
||||
if (newPrompt) {
|
||||
console.log(`Smart Select: Switched to '${newPrompt.name}' with ${maxMatches} matches.`);
|
||||
setStatus(`Smart Select: Using "${newPrompt.name}"...`);
|
||||
addToast(`Smart Select: Switched to "${newPrompt.name}"`, 'success', 4000);
|
||||
activePrompt = newPrompt;
|
||||
}
|
||||
}
|
||||
|
||||
const promptContent = activePrompt ? activePrompt.content : "Summarize this.";
|
||||
|
||||
setStatus(`Summarizing (${selectedModel})...`);
|
||||
const sumText = await invoke<string>('summarize_text', {
|
||||
text: transText,
|
||||
apiKey,
|
||||
productId,
|
||||
prompt: promptContent,
|
||||
model: selectedModel
|
||||
});
|
||||
setSummary(sumText);
|
||||
|
||||
// Auto-save to history
|
||||
onSaveToHistory(transText, sumText);
|
||||
|
||||
setStatus('Done!');
|
||||
addToast('Transcription & Summary complete!', 'success', 4000);
|
||||
onRecordingComplete(); // Auto-switch tab
|
||||
setTimeout(() => setStatus('Ready to record'), 3000);
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatchId !== selectedPromptId) {
|
||||
const newPrompt = prompts.find(p => p.id === bestMatchId);
|
||||
if (newPrompt) {
|
||||
console.log(`Smart Select: Switched to '${newPrompt.name}' with ${maxMatches} matches.`);
|
||||
setStatus(`Smart Select: Using "${newPrompt.name}"...`);
|
||||
addToast(`Smart Select: Switched to "${newPrompt.name}"`, 'success', 4000);
|
||||
activePrompt = newPrompt;
|
||||
// Optional: Update UI selection? setSelectedPromptId(bestMatchId);
|
||||
// Let's verify with user preference? For now, we override as "Magic".
|
||||
}
|
||||
}
|
||||
|
||||
const promptContent = activePrompt ? activePrompt.content : "Summarize this.";
|
||||
|
||||
setStatus(`Summarizing (${selectedModel})...`);
|
||||
const sumText = await invoke<string>('summarize_text', {
|
||||
text: transText,
|
||||
apiKey,
|
||||
productId,
|
||||
prompt: promptContent,
|
||||
model: selectedModel
|
||||
});
|
||||
setSummary(sumText);
|
||||
|
||||
// Auto-save to history
|
||||
onSaveToHistory(transText, sumText);
|
||||
|
||||
setStatus('Done!');
|
||||
addToast('Transcription & Summary complete!', 'success', 4000);
|
||||
onRecordingComplete(); // Auto-switch tab
|
||||
setTimeout(() => setStatus('Ready to record'), 3000);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
setStatus(`Error: ${e}`);
|
||||
addToast(`Error processing: ${e}`, 'error');
|
||||
} finally {
|
||||
setIsStopping(false);
|
||||
|
||||
// AUTO-RESTART LOGIC
|
||||
if (autoStartEnabled) {
|
||||
console.log("Auto-Start enabled: Restarting listener loop...");
|
||||
// Short delay to ensure backend cleanup
|
||||
setTimeout(() => {
|
||||
startRecording();
|
||||
}, 1000);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -634,12 +661,20 @@ const Recorder: React.FC<RecorderProps> = ({
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-2 mt-2 w-full">
|
||||
{recordingMode === 'meeting' && filteredDevices.length === 0 && (
|
||||
{recordingMode === 'meeting' && (
|
||||
<button
|
||||
onClick={onOpenSettings}
|
||||
onClick={async () => {
|
||||
const allowed = await invoke<boolean>('check_screen_recording_permission');
|
||||
if (allowed) {
|
||||
addToast('System Audio Permission: GRANTED ✅', 'success');
|
||||
} else {
|
||||
addToast('System Audio Permission: MISSING ❌. Please enable in System Settings -> Privacy -> Screen Recording', 'error', 5000);
|
||||
// Open Settings?
|
||||
}
|
||||
}}
|
||||
className="text-xs bg-primary/10 text-primary hover:bg-primary/20 w-full text-center border border-primary/20 rounded p-2 mb-2 font-semibold"
|
||||
>
|
||||
🪄 Create "Hearbit Audio" Device
|
||||
🔒 Check Audio Permission
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
|
||||
Reference in New Issue
Block a user