feat(v1.2.0): Final Release - Native Audio, Smart VAD, Auto-Loop & Quality Fixes
- Implemented standard 48kHz audio pipeline to fix sample rate mismatch/distortion - Added Native System Audio (ScreenCaptureKit) support - Implemented Smart VAD (Voice Activity Detection) with Auto-Start on valid audio - Added Auto-Loop: Automatically re-arms recording after stop - Added Empty Guard: Prevents transcribing silent recordings (< 20s empty) - Increased Pre-Roll buffer to 3.0s to prevent cut-off speech - Fixed clipping with clamped audio mixing
This commit is contained in:
@@ -15,11 +15,13 @@ mod audio_processor;
|
||||
use audio_processor::AudioProcessor;
|
||||
mod auth;
|
||||
mod email;
|
||||
mod sc_audio;
|
||||
|
||||
// State to hold the active recording stream
|
||||
struct AppState {
|
||||
recording_stream: Mutex<Option<cpal::Stream>>,
|
||||
recording_file_path: Mutex<Option<String>>,
|
||||
system_capture: Mutex<Option<sc_audio::SystemAudioCapture>>,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
@@ -71,7 +73,7 @@ fn get_input_devices() -> Result<Vec<AudioDevice>, String> {
|
||||
|
||||
|
||||
#[tauri::command]
|
||||
fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String, save_path: Option<String>, custom_filename: Option<String>, wait_for_speech: Option<bool>) -> Result<(), String> {
|
||||
async fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String, save_path: Option<String>, custom_filename: Option<String>, wait_for_speech: Option<bool>) -> Result<(), String> {
|
||||
emit_log(&app, "INFO", &format!("Starting recording on device: {}", device_id));
|
||||
let host = cpal::default_host();
|
||||
|
||||
@@ -85,13 +87,23 @@ fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String
|
||||
|
||||
// Select the configuration with the MAXIMUM number of channels
|
||||
// This is crucial for "Hearbit Audio" (Aggregate) which lists 3 channels but might default to 2.
|
||||
// We want the raw 3 channels to separate Mic (Ch0) from System (Ch1+2).
|
||||
let supported_configs = device.supported_input_configs().map_err(|e| e.to_string())?;
|
||||
let config = supported_configs
|
||||
.max_by_key(|c| c.channels())
|
||||
.map(|c| c.with_max_sample_rate())
|
||||
// Select Audio Configuration
|
||||
// We prioritize 48kHz because System Audio (ScreenCaptureKit) acts best at 48k.
|
||||
let supported_configs: Vec<_> = device.supported_input_configs().map_err(|e| e.to_string())?.collect();
|
||||
|
||||
// Try to find 48kHz specifically
|
||||
// Note: cpal::SampleRate is likely a type alias for u32 here, so we pass 48000 directly.
|
||||
let config = supported_configs.iter()
|
||||
.find(|c| c.min_sample_rate() <= 48000 && c.max_sample_rate() >= 48000)
|
||||
.map(|c| c.with_sample_rate(48000))
|
||||
.or_else(|| {
|
||||
// Fallback: Max sample rate
|
||||
supported_configs.iter()
|
||||
.max_by_key(|c| c.channels())
|
||||
.map(|c| c.with_max_sample_rate())
|
||||
})
|
||||
.ok_or("No supported input configurations found")?;
|
||||
|
||||
|
||||
emit_log(&app, "INFO", &format!("Selected Audio Config: {} Channels, {} Hz", config.channels(), config.sample_rate()));
|
||||
|
||||
let spec = hound::WavSpec {
|
||||
@@ -145,6 +157,64 @@ fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String
|
||||
let processor = Arc::new(Mutex::new(processor));
|
||||
let processor_clone = processor.clone();
|
||||
|
||||
// --- SYSTEM AUDIO CAPTURE START ---
|
||||
let mut sys_capture = sc_audio::SystemAudioCapture::new(config.sample_rate());
|
||||
|
||||
// Get the queue to share with the capture callback
|
||||
let queue_clone = {
|
||||
let p = processor.lock().unwrap();
|
||||
p.system_queue.clone() // Access the pub field we added
|
||||
};
|
||||
|
||||
let sys_handle = app.clone();
|
||||
let sys_callback = move |data: &[f32]| {
|
||||
// Push to queue
|
||||
if let Ok(mut q) = queue_clone.lock() {
|
||||
q.extend(data.iter());
|
||||
|
||||
// Limit queue size to avoid memory leaks if main process loop is slow
|
||||
while q.len() > 48000 * 5 { // 5 seconds buffer
|
||||
q.pop_front();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Need to run async start in sync command?
|
||||
// Tauri commands are async if they return Future, but here we returned Result.
|
||||
// We should probably spawn it.
|
||||
// Actually, SystemAudioCapture::start is async.
|
||||
// We can spawn a tokio task to start it. But we need to keep the object alive.
|
||||
// The start method modifies self.stream.
|
||||
// If we make start synchronous or use block_in_place?
|
||||
// Better: change start_recording to async fn (it is not currently async in signature used by tauri::command macros? No, tauri supports async commands).
|
||||
// Let's check line 76: `fn start_recording`... it is NOT async.
|
||||
// We should make it `async fn start_recording`.
|
||||
|
||||
// However, changing to async might affect how state is passed or other things.
|
||||
// Actually Tauri works fine with async commands.
|
||||
// But then we need to await `sys_capture.start`.
|
||||
|
||||
// Wait, let's look at `SystemAudioCapture::start`. It takes `&mut self`.
|
||||
// We can't easily spawn it away properly if we want to keep `sys_capture` in State.
|
||||
// The `sys_capture` struct holds the `SCStream` which must be kept alive.
|
||||
|
||||
// Let's assume we can make `start_recording` into `async fn`.
|
||||
|
||||
// TEMPORARY: Just putting placeholder for logic flow.
|
||||
// We will need to change the function signature of start_recording to async first in a separate step or assume I can do it here if I replace the whole signature.
|
||||
// The replace_file_content replaces a block.
|
||||
// I will replace line 76 in a separate call to make it async.
|
||||
|
||||
// For this block, I will assume it's async context.
|
||||
|
||||
match sys_capture.start(sys_callback) {
|
||||
Ok(_) => emit_log(&app, "INFO", "System Audio Capture started."),
|
||||
Err(e) => emit_log(&app, "WARN", &format!("System Audio Capture failed (Permissions?): {}", e)),
|
||||
}
|
||||
|
||||
*state.system_capture.lock().unwrap() = Some(sys_capture);
|
||||
// --- SYSTEM AUDIO CAPTURE END ---
|
||||
|
||||
let app_handle = app.clone();
|
||||
let err_fn = move |err| {
|
||||
eprintln!("an error occurred on stream: {}", err);
|
||||
@@ -206,6 +276,13 @@ fn stop_recording(app: AppHandle, state: State<'_, AppState>) -> Result<String,
|
||||
// Drop stream to stop recording
|
||||
{
|
||||
let mut stream_guard = state.recording_stream.lock().unwrap();
|
||||
// Also stop System Capture
|
||||
let mut sys_guard = state.system_capture.lock().unwrap();
|
||||
if let Some(sys) = sys_guard.as_mut() {
|
||||
sys.stop();
|
||||
}
|
||||
*sys_guard = None;
|
||||
|
||||
if stream_guard.is_none() {
|
||||
return Err("Not recording".to_string());
|
||||
}
|
||||
@@ -804,6 +881,12 @@ fn create_hearbit_audio_device(app: AppHandle) -> Result<String, String> {
|
||||
}
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
async fn check_screen_recording_permission() -> bool {
|
||||
sc_audio::check_permissions().await
|
||||
}
|
||||
|
||||
|
||||
#[tauri::command]
|
||||
async fn save_text_file(app: AppHandle, path: String, content: String) -> Result<(), String> {
|
||||
emit_log(&app, "INFO", &format!("Saving text file to: {}", path));
|
||||
@@ -891,6 +974,7 @@ pub fn run() {
|
||||
.manage(AppState {
|
||||
recording_stream: Mutex::new(None),
|
||||
recording_file_path: Mutex::new(None),
|
||||
system_capture: Mutex::new(None),
|
||||
})
|
||||
.invoke_handler(tauri::generate_handler![
|
||||
greet,
|
||||
@@ -904,6 +988,7 @@ pub fn run() {
|
||||
get_available_models,
|
||||
open_audio_midi_setup,
|
||||
create_hearbit_audio_device,
|
||||
check_screen_recording_permission,
|
||||
auth::start_auth_flow,
|
||||
auth::get_calendar_events,
|
||||
save_text_file,
|
||||
|
||||
Reference in New Issue
Block a user