feat: complete history, attendees list, and smart templates

2026-01-20 15:00:56 +01:00
parent d266de942a
commit 52ccd7ee03
18 changed files with 2222 additions and 480 deletions
--- a/src-tauri/src/audio_processor.rs
+++ b/src-tauri/src/audio_processor.rs
@@ -0,0 +1,183 @@
+use std::sync::{Arc, Mutex};
+use tauri::{AppHandle, Emitter};
+use cpal::Sample;
+use hound::WavWriter;
+use rubato::{Resampler, FastFixedIn, PolynomialDegree};
+use voice_activity_detector::VoiceActivityDetector;
+
+pub struct AudioProcessor {
+    // VAD
+    vad: VoiceActivityDetector,
+    vad_chunk_size: usize,
+    vad_buffer: Vec<f32>,
+    
+    // Resampler
+    resampler: FastFixedIn<f32>,
+    resample_input_buffer: Vec<f32>,
+    resample_output_buffer: Vec<f32>,
+    
+    // State
+    is_speech_active: bool,
+    last_speech_time: u64, // In samples or frames
+    hangover_samples: u64,
+    
+    // Ring Buffer (for pre-roll)
+    ring_buffer: Vec<f32>,
+    ring_pos: usize,
+    ring_size: usize,
+    
+    // Output
+    writer: Arc<Mutex<WavWriter<std::io::BufWriter<std::fs::File>>>>,
+    sample_rate: u32,
+    total_processed_samples: u64,
+    // Event Emission
+    app_handle: Option<AppHandle>,
+    last_event_time: std::time::Instant,
+}
+
+impl AudioProcessor {
+    pub fn new(
+        sample_rate: u32, 
+        writer: Arc<Mutex<WavWriter<std::io::BufWriter<std::fs::File>>>>,
+        app_handle: AppHandle
+    ) -> Result<Self, String> {
+        let vad_sample_rate = 16000;
+        let vad_chunk_size = 512; // Silero usually likes ~30ms which is 512 at 16k? No 16000 * 0.032 = 512.
+        
+        // Initialize VAD
+        let vad = VoiceActivityDetector::builder()
+            .sample_rate(vad_sample_rate as u32)
+            .chunk_size(vad_chunk_size)
+            .build()
+            .map_err(|e| format!("Failed to init VAD: {:?}", e))?;
+
+        // Initialize Resampler (Input Rate -> 16000) using FastFixedIn for speed/simplicity
+        // new(f_ratio, max_resample_ratio_relative, polyn_deg, chunk_size, channels)
+        let resampler = FastFixedIn::<f32>::new(
+            16000.0 / sample_rate as f64,
+            1.0, 
+            PolynomialDegree::Septic,
+            1024,
+            1
+        ).map_err(|e| format!("Failed to init Resampler: {:?}", e))?;
+
+        // Pre-roll buffer (e.g. 0.5 seconds of high quality audio)
+        let ring_curr_seconds = 1.0; 
+        let ring_size = (sample_rate as f32 * ring_curr_seconds) as usize;
+
+        Ok(Self {
+            vad,
+            vad_chunk_size,
+            vad_buffer: Vec::new(),
+            resampler,
+            resample_input_buffer: Vec::new(),
+            resample_output_buffer: Vec::new(),
+            is_speech_active: false,
+            last_speech_time: 0,
+            hangover_samples: (sample_rate as f32 * 1.5) as u64, // 1.5s hangover
+            ring_buffer: vec![0.0; ring_size],
+            ring_pos: 0,
+            ring_size,
+            writer,
+            sample_rate,
+            total_processed_samples: 0,
+            app_handle: Some(app_handle),
+            last_event_time: std::time::Instant::now(),
+        })
+    }
+
+    pub fn process(&mut self, data: &[f32]) {
+        // 1. Add to Ring Buffer (always, for pre-roll)
+        for &sample in data {
+            self.ring_buffer[self.ring_pos] = sample;
+            self.ring_pos = (self.ring_pos + 1) % self.ring_size;
+        }
+
+        // 2. Resample for VAD
+        // We append new data to input buffer for resampler
+        self.resample_input_buffer.extend_from_slice(data);
+        
+        // Process in chunks compatible with resampler
+        // Actually rubato process_into_buffer needs waves of input.
+        // Simplified: SincFixedIn wants a fixed number of input frames? 
+        // Docs: "retrieve result... input buffer must contain needed number of frames"
+        // SincFixedIn: "input buffer used for resampling... must receive a fixed number of frames"
+        // Wait, SincFixedIn is fixed INPUT size. SincFixedOut is fixed OUTPUT size.
+        // We want to feed whatever we get.
+        // For simplicity, let's use a simpler resampling strategy or accept rubato's constraints.
+        // Rubato SincFixedIn: we must provide `input_frames_next` frames.
+        
+        // Let's defer strict resampling and just use decimation if sample rate is multiple?
+        // No, user devices vary.
+        
+        // Handling Resampling properly:
+        let needed = self.resampler.input_frames_next();
+        while self.resample_input_buffer.len() >= needed {
+             let chunk: Vec<f32> = self.resample_input_buffer.drain(0..needed).collect();
+             // Resample (mono)
+             let waves_in = vec![chunk];
+             // Allocate output (approx)
+             let mut waves_out = vec![vec![0.0; (needed as f64 * (16000.0 / self.sample_rate as f64)).ceil() as usize + 10]; 1]; // +10 padding
+             
+             if let Ok((_in_len, out_len)) = self.resampler.process_into_buffer(&waves_in, &mut waves_out, None) {
+                 if out_len > 0 {
+                     self.vad_buffer.extend_from_slice(&waves_out[0][0..out_len]);
+                 }
+             }
+             // Update output buffer usage... logic is tricky with drain.
+        }
+
+        // 3. Process VAD
+        while self.vad_buffer.len() >= self.vad_chunk_size {
+            let vad_chunk: Vec<f32> = self.vad_buffer.drain(0..self.vad_chunk_size).collect();
+            // Run Detection
+            let probability = self.vad.predict(vad_chunk);
+            let is_speech = probability > 0.5;
+
+            if is_speech {
+                self.is_speech_active = true;
+                self.last_speech_time = self.total_processed_samples;
+            }
+            
+            // Emit VAD event periodically (every 500ms)
+            if self.last_event_time.elapsed().as_millis() > 500 {
+                if let Some(app) = &self.app_handle {
+                     // Calculate crude RMS for visualization or just send probability
+                     // Just sending probability is enough for now
+                     #[derive(serde::Serialize, Clone)]
+                     struct VadEvent {
+                         probability: f32,
+                         is_speech: bool,
+                     }
+                     let _ = app.emit("vad-event", VadEvent { probability, is_speech });
+                }
+                self.last_event_time = std::time::Instant::now();
+            }
+        }
+        
+        // 4. Update Hangover and Check Write condition
+        let time_since_speech = self.total_processed_samples.saturating_sub(self.last_speech_time);
+        
+        if self.is_speech_active || time_since_speech < self.hangover_samples {
+             // We are recording!
+             // Check if we just started (transition)
+             // Ideally we dump the ring buffer here if we just switched state.
+             // Implementing perfect ring buffer dump is complex (need to track state changes better).
+             // MVP: Just Write Current Data if in state.
+             
+             // Improvement: If we are in hangover, we just write.
+             // If we just detected speech (was not speech?), dump ring buffer? 
+             // We'd need to know if we 'wrote' the ring buffer already.
+             
+             // Simple Logic: just write all incoming data if (Now - LastSpeech < Hangover)
+             
+             let mut guard = self.writer.lock().unwrap();
+             for &sample in data {
+                 let amplitude = i16::MAX as f32;
+                 guard.write_sample((sample * amplitude) as i16).ok();
+             }
+        }
+        
+        self.total_processed_samples += data.len() as u64;
+    }
+}