diff --git a/README.md b/README.md index 915e37e..1946255 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ ## ✨ Features * **🎙️ Dual-Channel Recording**: seamlessly capture your voice and meeting audio from apps like Microsoft Teams, Zoom, or Google Meet. +* **📁 Import Audio Files**: Upload existing recordings (MP3, MP4, WAV, M4A, FLAC, OGG, AAC, WMA) for transcription and summarization. * **📅 Microsoft 365 Integration**: * **Upcoming Meetings**: View your daily schedule and join with **one click**. * **Meeting Details**: View full agenda and **invited attendee status** (Accepted/Declined). @@ -24,6 +25,7 @@ ### 1. Prerequisites * **macOS** (Apple Silicon or Intel). +* **BlackHole 2ch Driver** (Mandatory): Download from [existential.audio](https://existential.audio/blackhole/) or run `brew install blackhole-2ch`. * **Infomaniak AI Account**: You need an API Key and Product ID from the [Infomaniak Developer Portal](https://manager.infomaniak.com/). ### 2. Installation @@ -35,15 +37,21 @@ ## 🎧 Recording System Audio (Teams, Zoom, etc.) -We've made this easy! Hearbit AI includes a built-in helper to set up your audio devices. +We've made this easy! **Note: You must have the BlackHole driver installed.** -1. **Open Audio MIDI Setup**: Click the "Open Audio MIDI Setup" button in the recorder view. -2. **Create "Hearbit Audio" Device**: - * If you don't have a virtual device, click **"🪄 Create Hearbit Audio Device"** in the app (appears in Meeting mode if no device is found). - * This will automatically configure a Multi-Output Device so you can record and hear at the same time. -3. **Select "Hearbit Audio" in Teams/Zoom**: - * In your meeting app settings (Teams/Zoom), set your **Speaker** to **Hearbit Audio**. - * In Hearbit AI, select **Hearbit Audio** (or BlackHole) as your input. +1. **Create "Hearbit Audio" Device**: + * Open the app and select **Meeting** mode. + * If you don't have the device yet, click the **"🪄 Create Hearbit Audio Device"** button. + * This creates a specialized "Multi-Output Device" that routes audio to both your headphones/speakers AND the app. + +2. **Configure Teams / Zoom / Webex**: + * **Speaker / Output**: Change this to **Hearbit Audio**. + * *Why?* This ensures the audio goes to the recording app *and* your ears. + * **Microphone / Input**: Leave this as your normal microphone (e.g., MacBook Pro Mic). + * *Note:* Do **not** select Hearbit Audio as your microphone in Teams. + +3. **Start Recording**: + * In Hearbit AI, ensure **Hearbit Audio** is selected as the input. --- diff --git a/RELEASE_NOTES_1.1.0.md b/RELEASE_NOTES_1.1.0.md new file mode 100644 index 0000000..732becd --- /dev/null +++ b/RELEASE_NOTES_1.1.0.md @@ -0,0 +1,81 @@ +# Release Notes - Version 1.1.0 + +**Release Date**: January 21, 2026 + +## 🎉 What's New + +### Import Audio Files Feature + +We've added a powerful new **Import** tab that allows you to upload and process existing audio/video files! + +**Key Features:** +- **Drag-and-Drop Upload**: Simply drag your audio files into the app +- **8 Format Support**: MP3, MP4, WAV, M4A, FLAC, OGG, AAC, WMA +- **Smart Metadata Display**: See file duration, size, and format before processing +- **Editable Meeting Titles**: Customize the name (defaults to filename) +- **Progress Tracking**: Visual indicators for each stage (Validating → Transcribing → Summarizing) +- **Same AI Power**: Uses the same AI templates and Smart Select as live recordings +- **Auto-Navigation**: Seamlessly transition to Transcription view when complete + +**Use Cases:** +- Process pre-recorded meetings you forgot to record live +- Batch process voice memos +- Import recordings from other devices +- Archive and transcribe old meeting recordings + +--- + +## 📝 Documentation Updates + +### README Enhancements +- Added mandatory **BlackHole 2ch Driver** requirement to Prerequisites +- Clarified **Teams/Zoom configuration** (Speaker vs. Microphone settings) +- Added detailed setup instructions for meeting audio capture + +--- + +## 🔧 Technical Improvements + +- Added `get_audio_metadata` Rust command for file metadata extraction +- Improved tab navigation with new Import tab +- Enhanced error handling for file validation +- Code optimizations and cleanup + +--- + +## 📦 Installation + +Download the DMG file: +``` +Hearbit_AI_1.1.0_aarch64.dmg +``` + +**Location**: `src-tauri/target/release/bundle/dmg/` + +### First-time Installation +If you see "Hearbit AI is damaged and can't be opened": +```bash +sudo xattr -cr /Applications/Hearbit\ AI.app +``` + +--- + +## 🐛 Known Issues + +None reported for this release. + +--- + +## 🙏 Credits + +Built with ❤️ by the Livtec team using Tauri, React, and TypeScript. + +--- + +## What's Next? + +Potential future enhancements: +- Meeting auto-stop when meeting ends (via M365 API) +- Batch file import +- Audio preview player +- More audio format conversions diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index aaabccf..261f789 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -1739,7 +1739,7 @@ checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" [[package]] name = "hearbit-ai" -version = "1.1.0" +version = "0.1.2" dependencies = [ "chrono", "cpal", @@ -1757,6 +1757,7 @@ dependencies = [ "tauri-plugin-log", "tauri-plugin-oauth", "tauri-plugin-opener", + "tauri-plugin-shell", "tokio", "url", "voice_activity_detector", @@ -3089,6 +3090,16 @@ dependencies = [ "ureq", ] +[[package]] +name = "os_pipe" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "pango" version = "0.18.3" @@ -4361,12 +4372,44 @@ dependencies = [ "digest", ] +[[package]] +name = "shared_child" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e362d9935bc50f019969e2f9ecd66786612daae13e8f277be7bfb66e8bed3f7" +dependencies = [ + "libc", + "sigchld", + "windows-sys 0.60.2", +] + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "sigchld" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47106eded3c154e70176fc83df9737335c94ce22f821c32d17ed1db1f83badb1" +dependencies = [ + "libc", + "os_pipe", + "signal-hook", +] + +[[package]] +name = "signal-hook" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2" +dependencies = [ + "libc", + "signal-hook-registry", +] + [[package]] name = "signal-hook-registry" version = "1.4.8" @@ -4951,6 +4994,27 @@ dependencies = [ "zbus", ] +[[package]] +name = "tauri-plugin-shell" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39b76f884a3937e04b631ffdc3be506088fa979369d25147361352f2f352e5ed" +dependencies = [ + "encoding_rs", + "log", + "open", + "os_pipe", + "regex", + "schemars 0.8.22", + "serde", + "serde_json", + "shared_child", + "tauri", + "tauri-plugin", + "thiserror 2.0.18", + "tokio", +] + [[package]] name = "tauri-runtime" version = "2.9.2" diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 0b77826..9e2ac21 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hearbit-ai" -version = "1.1.0" +version = "0.1.2" description = "A Tauri App" authors = ["you"] edition = "2021" @@ -18,7 +18,7 @@ crate-type = ["staticlib", "cdylib", "rlib"] tauri-build = { version = "2", features = [] } [dependencies] -tauri = { version = "2", features = [] } +tauri = { version = "2", features = ["tray-icon"] } tauri-plugin-opener = "2" tauri-plugin-dialog = "2" serde = { version = "1", features = ["derive"] } @@ -36,3 +36,4 @@ oauth2 = "4.4" url = "2.5" lettre = { version = "0.11", features = ["tokio1", "tokio1-native-tls", "builder"] } tauri-plugin-log = "2.0.0" +tauri-plugin-shell = "2.3.4" diff --git a/src-tauri/resources/create_hearbit_audio.swift b/src-tauri/resources/create_hearbit_audio.swift index 3891c91..94cf9a8 100644 --- a/src-tauri/resources/create_hearbit_audio.swift +++ b/src-tauri/resources/create_hearbit_audio.swift @@ -110,6 +110,9 @@ func createAggregateDevice() { } print("Found BlackHole 2ch (ID: \(blackHoleID))") + // --- PART 1: Hearbit Audio (Input: Mic + BlackHole) --- + print("\n--- Creating 'Hearbit Audio' (Input) ---") + // Default Input var defaultInputID: AudioObjectID = 0 var size = UInt32(MemoryLayout.size) @@ -125,19 +128,14 @@ func createAggregateDevice() { } print("Found Default Input (ID: \(defaultInputID))") - // Check for existing "Hearbit Audio" by UID - let targetUID = "hearbit_audio_aggregate_v1" - if let existingID = findDeviceByUID(targetUID) { - print("Found existing Hearbit Audio device (ID: \(existingID)). Destroying to recreate...") - if AudioHardwareDestroyAggregateDevice(existingID) != noErr { - print("Warning: Failed to destroy existing device.") - } else { - print("Existing device destroyed.") - } + // Check for existing "Hearbit Audio" + let inputUID = "hearbit_audio_aggregate_v1" + if let existingID = findDeviceByUID(inputUID) { + print("Found existing Hearbit Audio (ID: \(existingID)). Destroying...") + AudioHardwareDestroyAggregateDevice(existingID) Thread.sleep(forTimeInterval: 0.5) } - // Build SubDevice List guard let bhUID = getStringProperty(objectID: blackHoleID, selector: kAudioDevicePropertyDeviceUID) else { print("Error: Could not get BlackHole UID.") exit(1) @@ -147,36 +145,47 @@ func createAggregateDevice() { exit(1) } - // Dedup: if Mic IS BlackHole (user set BlackHole as default), don't duplicate var subDevicesUIDs = [bhUID] if micUID != bhUID { subDevicesUIDs.append(micUID) } - let subDevicesArray = subDevicesUIDs.map { - [kAudioSubDeviceUIDKey: $0] - } - - let desc: [String: Any] = [ + let subDevicesArray = subDevicesUIDs.map { [kAudioSubDeviceUIDKey: $0] } + let inputDesc: [String: Any] = [ kAudioAggregateDeviceNameKey: "Hearbit Audio", - kAudioAggregateDeviceUIDKey: targetUID, + kAudioAggregateDeviceUIDKey: inputUID, kAudioAggregateDeviceIsPrivateKey: Int(0), kAudioAggregateDeviceIsStackedKey: Int(0), kAudioAggregateDeviceSubDeviceListKey: subDevicesArray ] - print("Creating Aggregate Device with UIDs: \(subDevicesUIDs)") - - var outID: AudioObjectID = 0 - let err = AudioHardwareCreateAggregateDevice(desc as CFDictionary, &outID) - - if err == noErr { - print("Success! Created 'Hearbit Audio' with ID: \(outID)") - exit(0) + var outInputID: AudioObjectID = 0 + let errIn = AudioHardwareCreateAggregateDevice(inputDesc as CFDictionary, &outInputID) + if errIn == noErr { + print("Success! Created 'Hearbit Audio' with ID: \(outInputID)") } else { - print("Failed to create device. Error code: \(err) (\(err.fourCC))") - exit(1) + print("Failed to create 'Hearbit Audio'. Error: \(errIn)") } + + + // --- PART 2: Cleanup Unstable "Hearbit Speakers" --- + // The previous "Hearbit Speakers" device caused MS Teams to crash. + // We strictly remove it here to restore stability. + print("\n--- Cleaning up Unstable Devices ---") + let stopOutputUID = "hearbit_speakers_aggregate_v1" + if let existingOutID = findDeviceByUID(stopOutputUID) { + print("Found unstable 'Hearbit Speakers' (ID: \(existingOutID)). Removing to fix Teams crash...") + let errDist = AudioHardwareDestroyAggregateDevice(existingOutID) + if errDist == noErr { + print("Successfully removed unstable device.") + } else { + print("Warning: Failed to remove device. Error: \(errDist)") + } + } else { + print("No unstable 'Hearbit Speakers' found. System is clean.") + } + + exit(0) } createAggregateDevice() diff --git a/src-tauri/src/audio_processor.rs b/src-tauri/src/audio_processor.rs index 263c15f..bf0c8da 100644 --- a/src-tauri/src/audio_processor.rs +++ b/src-tauri/src/audio_processor.rs @@ -11,6 +11,9 @@ pub struct AudioProcessor { vad_chunk_size: usize, vad_buffer: Vec, + // Audio Config + channel_count: u16, + // Resampler resampler: FastFixedIn, resample_input_buffer: Vec, @@ -21,6 +24,9 @@ pub struct AudioProcessor { last_speech_time: u64, // In samples or frames hangover_samples: u64, + // Waiting Mode + waiting_for_speech: bool, + // Ring Buffer (for pre-roll) ring_buffer: Vec, ring_pos: usize, @@ -37,12 +43,14 @@ pub struct AudioProcessor { impl AudioProcessor { pub fn new( - sample_rate: u32, + sample_rate: u32, + channel_count: u16, writer: Arc>>>, - app_handle: AppHandle + app_handle: AppHandle, + wait_for_speech: bool ) -> Result { let vad_sample_rate = 16000; - let vad_chunk_size = 512; // Silero usually likes ~30ms which is 512 at 16k? No 16000 * 0.032 = 512. + let vad_chunk_size = 512; // Initialize VAD let vad = VoiceActivityDetector::builder() @@ -51,8 +59,7 @@ impl AudioProcessor { .build() .map_err(|e| format!("Failed to init VAD: {:?}", e))?; - // Initialize Resampler (Input Rate -> 16000) using FastFixedIn for speed/simplicity - // new(f_ratio, max_resample_ratio_relative, polyn_deg, chunk_size, channels) + // Initialize Resampler (Input Rate -> 16000) let resampler = FastFixedIn::::new( 16000.0 / sample_rate as f64, 1.0, @@ -61,20 +68,26 @@ impl AudioProcessor { 1 ).map_err(|e| format!("Failed to init Resampler: {:?}", e))?; - // Pre-roll buffer (e.g. 0.5 seconds of high quality audio) + // Pre-roll buffer (1.0 seconds) * Channels (interleaved store) let ring_curr_seconds = 1.0; - let ring_size = (sample_rate as f32 * ring_curr_seconds) as usize; + // WavWriter writes interleaved, so we store interleaved. + let ring_size = (sample_rate as f32 * ring_curr_seconds) as usize * channel_count as usize; Ok(Self { vad, vad_chunk_size, vad_buffer: Vec::new(), + channel_count, resampler, resample_input_buffer: Vec::new(), resample_output_buffer: Vec::new(), is_speech_active: false, last_speech_time: 0, - hangover_samples: (sample_rate as f32 * 1.5) as u64, // 1.5s hangover + // Hangover counts "processed samples" which are actually frames * channels in current logic? + // Actually total_processed_samples usually counts FRAMES in audio terminology, but here we count elements. + // Let's stick to elements to match existing logic logic. + hangover_samples: (sample_rate as f32 * 1.5 * channel_count as f32) as u64, + waiting_for_speech: wait_for_speech, ring_buffer: vec![0.0; ring_size], ring_pos: 0, ring_size, @@ -87,30 +100,39 @@ impl AudioProcessor { } pub fn process(&mut self, data: &[f32]) { - // 1. Add to Ring Buffer (always, for pre-roll) + // 1. Add to Ring Buffer (Interleaved data - Record EVERYTHING) for &sample in data { self.ring_buffer[self.ring_pos] = sample; self.ring_pos = (self.ring_pos + 1) % self.ring_size; } - // 2. Resample for VAD - // We append new data to input buffer for resampler - self.resample_input_buffer.extend_from_slice(data); + // 2. Prepare VAD Signal (Mono Mixdown) + // FRESH START LOGIC (v0.2.0): + // We expect standard Stereo Input (BlackHole 2ch). + // No magic 3-channel aggregate. - // Process in chunks compatible with resampler - // Actually rubato process_into_buffer needs waves of input. - // Simplified: SincFixedIn wants a fixed number of input frames? - // Docs: "retrieve result... input buffer must contain needed number of frames" - // SincFixedIn: "input buffer used for resampling... must receive a fixed number of frames" - // Wait, SincFixedIn is fixed INPUT size. SincFixedOut is fixed OUTPUT size. - // We want to feed whatever we get. - // For simplicity, let's use a simpler resampling strategy or accept rubato's constraints. - // Rubato SincFixedIn: we must provide `input_frames_next` frames. + let channels = self.channel_count as usize; + let frame_count = data.len() / channels; + let mut vad_input_chunk = Vec::with_capacity(frame_count); + + for i in 0..frame_count { + let frame_start = i * channels; + + let mix_sample = if channels >= 2 { + // Stereo -> Average L + R + (data[frame_start] + data[frame_start + 1]) / 2.0 + } else { + // Mono -> Take as is + data[frame_start] + }; + + vad_input_chunk.push(mix_sample); + } + + + // 3. Resample for VAD + self.resample_input_buffer.extend_from_slice(&vad_input_chunk); - // Let's defer strict resampling and just use decimation if sample rate is multiple? - // No, user devices vary. - - // Handling Resampling properly: let needed = self.resampler.input_frames_next(); while self.resample_input_buffer.len() >= needed { let chunk: Vec = self.resample_input_buffer.drain(0..needed).collect(); @@ -127,63 +149,87 @@ impl AudioProcessor { // Update output buffer usage... logic is tricky with drain. } - // 3. Process VAD + // 4. Process VAD while self.vad_buffer.len() >= self.vad_chunk_size { let vad_chunk: Vec = self.vad_buffer.drain(0..self.vad_chunk_size).collect(); // Run Detection - // Run Detection let probability = self.vad.predict(vad_chunk.clone()); // Calculate RMS for this chunk to use as fallback/hybrid detection let sq_sum: f32 = vad_chunk.iter().map(|x| x * x).sum(); let rms = (sq_sum / vad_chunk.len() as f32).sqrt(); - // Hybrid VAD: Probability > 0.4 OR RMS > 0.005 (approx -46dB) - let is_speech = probability > 0.4 || rms > 0.005; + // Hybrid VAD: Probability > 0.8 OR RMS > 0.015 + // INCREASED THRESHOLDS (v1.9.0): + // Now that routing works, we must filter out system notifications (beeps) and noise floor. + let is_speech = probability > 0.8 || rms > 0.015; if is_speech { self.is_speech_active = true; self.last_speech_time = self.total_processed_samples; } - // Emit VAD event periodically (every 500ms) + // Emit VAD event periodically (every 500ms is enough for non-diagnostic mode) if self.last_event_time.elapsed().as_millis() > 500 { - // Calculate simple RMS of the current chunk for debugging - let sq_sum: f32 = vad_chunk.iter().map(|x| x * x).sum(); - let rms = (sq_sum / vad_chunk.len() as f32).sqrt(); - - // Print debug info to stdout (viewable in terminal) - println!("VAD Debug: Prob={:.4}, RMS={:.6}, Speech={}", probability, rms, is_speech); - if let Some(app) = &self.app_handle { - // Just sending probability is enough for now - #[derive(serde::Serialize, Clone)] + #[derive(Clone, serde::Serialize)] struct VadEvent { - probability: f32, is_speech: bool, + probability: f32, } - let _ = app.emit("vad-event", VadEvent { probability, is_speech }); + let _ = app.emit("vad-event", VadEvent { + probability, + is_speech: self.is_speech_active, + }); } self.last_event_time = std::time::Instant::now(); + + // IMPORTANT: We reset is_speech_active after emitting, + // so we don't latch it forever if the user stops talking. + // However, the main loop sets it to true if current chunk is speech. + // This logic is a bit of a "latch for X ms". + self.is_speech_active = false; } } + // 4. Update Hangover and Check Write condition + if self.waiting_for_speech { + if self.is_speech_active { + // Trigger Detected! + println!("Auto-Start: Speech detected. Flushing pre-roll..."); + self.waiting_for_speech = false; + + // Flush Ring Buffer (Orderly: from ring_pos to end, then 0 to ring_pos) + let mut guard = self.writer.lock().unwrap(); + let amplitude = i16::MAX as f32; + + // Part 1: ring_pos to end + for i in self.ring_pos..self.ring_size { + let sample = self.ring_buffer[i]; + guard.write_sample((sample * amplitude) as i16).ok(); + } + // Part 2: 0 to ring_pos + for i in 0..self.ring_pos { + let sample = self.ring_buffer[i]; + guard.write_sample((sample * amplitude) as i16).ok(); + } + + // Emit event to notify frontend that "real" recording started + if let Some(app) = &self.app_handle { + let _ = app.emit("auto-recording-triggered", ()); + } + + } else { + // Still waiting, do not write to file. + return; + } + } + + // Standard Recording Logic (Active or Hangover) let time_since_speech = self.total_processed_samples.saturating_sub(self.last_speech_time); if self.is_speech_active || time_since_speech < self.hangover_samples { - // We are recording! - // Check if we just started (transition) - // Ideally we dump the ring buffer here if we just switched state. - // Implementing perfect ring buffer dump is complex (need to track state changes better). - // MVP: Just Write Current Data if in state. - - // Improvement: If we are in hangover, we just write. - // If we just detected speech (was not speech?), dump ring buffer? - // We'd need to know if we 'wrote' the ring buffer already. - - // Simple Logic: just write all incoming data if (Now - LastSpeech < Hangover) - let mut guard = self.writer.lock().unwrap(); for &sample in data { let amplitude = i16::MAX as f32; diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index cadbbf9..a4f8698 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -1,4 +1,9 @@ -use tauri::{AppHandle, Manager, State, Emitter}; +use tauri::{ + AppHandle, Manager, State, Emitter, + menu::{Menu, MenuItem}, + tray::{TrayIconBuilder, TrayIconEvent}, + WindowEvent +}; use std::sync::{Arc, Mutex}; use std::process::Command; use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; @@ -65,7 +70,7 @@ fn get_input_devices() -> Result, String> { #[tauri::command] -fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String, save_path: Option, custom_filename: Option) -> Result<(), String> { +fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String, save_path: Option, custom_filename: Option, wait_for_speech: Option) -> Result<(), String> { emit_log(&app, "INFO", &format!("Starting recording on device: {}", device_id)); let host = cpal::default_host(); @@ -77,16 +82,17 @@ fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String .or_else(|| host.default_input_device()) .ok_or("No input device found")?; - let config = device.default_input_config().map_err(|e| e.to_string())?; - - // VAD requires 16Hz or 8kHz, typically. Silero likes 16k. - // We might need to resample or just check if the device supports it. - // For MVP VAD, we will try to stick to standard rates. - // Actually, simple energy VAD is easier to start with if Silero is too heavy or requires ONNX runtime. - // Let's check the crate docs or usage first. - // Wait, the user wants to IGNORE music. Energy VAD will fail on music. - // voice_activity_detector crate usually uses Silero or similar. - + // Select the configuration with the MAXIMUM number of channels + // This is crucial for "Hearbit Audio" (Aggregate) which lists 3 channels but might default to 2. + // We want the raw 3 channels to separate Mic (Ch0) from System (Ch1+2). + let supported_configs = device.supported_input_configs().map_err(|e| e.to_string())?; + let config = supported_configs + .max_by_key(|c| c.channels()) + .map(|c| c.with_max_sample_rate()) + .ok_or("No supported input configurations found")?; + + emit_log(&app, "INFO", &format!("Selected Audio Config: {} Channels, {} Hz", config.channels(), config.sample_rate())); + let spec = hound::WavSpec { channels: config.channels(), sample_rate: config.sample_rate(), @@ -122,7 +128,12 @@ fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String // Initialize AudioProcessor (VAD) // We pass the writer to it. - let processor = AudioProcessor::new(config.sample_rate(), writer.clone(), app.clone()) + let should_wait = wait_for_speech.unwrap_or(false); + if should_wait { + emit_log(&app, "INFO", "Recording started in WAITING mode (buffer-only until speech)."); + } + + let processor = AudioProcessor::new(config.sample_rate(), config.channels(), writer.clone(), app.clone(), should_wait) .map_err(|e| format!("Failed to create AudioProcessor: {}", e))?; // Wrap processor in Arc so we can share/move it into callback @@ -560,6 +571,62 @@ async fn summarize_text(app: AppHandle, text: String, api_key: String, product_i } } +#[derive(serde::Serialize)] +struct AudioMetadata { + duration: f64, + size: u64, + format: String, +} + +#[tauri::command] +fn get_audio_metadata(app: AppHandle, file_path: String) -> Result { + emit_log(&app, "INFO", &format!("Getting metadata for: {}", file_path)); + + // Get file size + let metadata = std::fs::metadata(&file_path).map_err(|e| e.to_string())?; + let size = metadata.len(); + + // Extract format from extension + let path = std::path::Path::new(&file_path); + let format = path.extension() + .and_then(|e| e.to_str()) + .unwrap_or("unknown") + .to_string(); + + // Get duration using ffprobe (requires ffmpeg to be installed) + let duration = match Command::new("ffprobe") + .args([ + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + &file_path + ]) + .output() + { + Ok(output) => { + if output.status.success() { + let duration_str = String::from_utf8_lossy(&output.stdout); + duration_str.trim().parse::().unwrap_or(0.0) + } else { + emit_log(&app, "WARN", "ffprobe failed, duration = 0"); + 0.0 + } + }, + Err(_) => { + emit_log(&app, "WARN", "ffprobe not found, duration = 0"); + 0.0 + } + }; + + emit_log(&app, "SUCCESS", &format!("Metadata: {}s, {} bytes", duration, size)); + + Ok(AudioMetadata { + duration, + size, + format, + }) +} + #[tauri::command] fn open_audio_midi_setup() -> Result<(), String> { Command::new("open") @@ -640,6 +707,49 @@ async fn read_log_file(app: AppHandle) -> Result { #[cfg_attr(mobile, tauri::mobile_entry_point)] pub fn run() { tauri::Builder::default() + .setup(|app| { + // Setup Tray Icon + let quit_i = MenuItem::with_id(app, "quit", "Quit Hearbit AI", true, None::<&str>).unwrap(); + let show_i = MenuItem::with_id(app, "show", "Show Window", true, None::<&str>).unwrap(); + let menu = Menu::with_items(app, &[&show_i, &quit_i]).unwrap(); + + let _tray = TrayIconBuilder::new() + .icon(app.default_window_icon().unwrap().clone()) + .menu(&menu) + .show_menu_on_left_click(true) + .on_menu_event(|app, event| { + match event.id.as_ref() { + "quit" => app.exit(0), + "show" => { + if let Some(window) = app.get_webview_window("main") { + let _ = window.show(); + let _ = window.set_focus(); + } + } + _ => {} + } + }) + .on_tray_icon_event(|tray, event| { + if let TrayIconEvent::Click { .. } = event { + let app = tray.app_handle(); + if let Some(window) = app.get_webview_window("main") { + let _ = window.show(); + let _ = window.set_focus(); + } + } + }) + .build(app)?; + + Ok(()) + }) + .on_window_event(|window, event| { + if let WindowEvent::CloseRequested { api, .. } = event { + // Prevent window from closing, just hide it + window.hide().unwrap(); + api.prevent_close(); + } + }) + .plugin(tauri_plugin_shell::init()) .plugin(tauri_plugin_log::Builder::default() .targets([ tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::Stdout), @@ -670,6 +780,7 @@ pub fn run() { auth::get_calendar_events, save_text_file, read_log_file, + get_audio_metadata, email::send_smtp_email ]) .run(tauri::generate_context!()) diff --git a/src/App.tsx b/src/App.tsx index 2b48ed9..40ed9de 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -7,6 +7,7 @@ import TranscriptionView from "./components/TranscriptionView"; import Tabs from "./components/Tabs"; import MeetingsView from "./components/MeetingsView"; import HistoryView from "./components/HistoryView"; +import Import from "./components/Import"; import ToastContainer, { ToastMessage, ToastType } from "./components/ui/Toast"; export interface PromptTemplate { @@ -24,8 +25,8 @@ export interface EmailTemplate { } function App() { - const [view, setView] = useState<'recorder' | 'settings' | 'transcription' | 'meetings' | 'history'>('recorder'); - const [lastTab, setLastTab] = useState<'recorder' | 'transcription' | 'meetings' | 'history'>('recorder'); + const [view, setView] = useState<'recorder' | 'settings' | 'transcription' | 'meetings' | 'history' | 'import'>('recorder'); + const [lastTab, setLastTab] = useState<'recorder' | 'transcription' | 'meetings' | 'history' | 'import'>('recorder'); // Auto-start recording state to handle "Join & Record" transition @@ -311,6 +312,14 @@ Thanks!` } }; + const handleRenameHistory = (id: string, newSubject: string) => { + const newHistory = history.map(item => + item.id === id ? { ...item, subject: newSubject } : item + ); + setHistory(newHistory); + localStorage.setItem('infomaniak_history', JSON.stringify(newHistory)); + }; + const handleDeleteHistory = (id: string) => { const newHistory = history.filter(item => item.id !== id); setHistory(newHistory); @@ -343,7 +352,7 @@ Thanks!` setView(t)} /> @@ -410,6 +419,7 @@ Thanks!` history={history} onLoad={handleLoadHistory} onDelete={handleDeleteHistory} + onRename={handleRenameHistory} /> )} @@ -429,6 +439,23 @@ Thanks!` /> )} + {view === 'import' && ( + setView('transcription')} + addToast={addToast} + setTranscription={setTranscription} + setSummary={setSummary} + /> + )} + + + + {view === 'settings' && ( diff --git a/src/components/HistoryView.tsx b/src/components/HistoryView.tsx index 59cac57..ef115ad 100644 --- a/src/components/HistoryView.tsx +++ b/src/components/HistoryView.tsx @@ -1,4 +1,5 @@ -import { FileText, Trash2, Calendar } from 'lucide-react'; +import { FileText, Trash2, Calendar, Pencil, Check, X } from 'lucide-react'; +import { useState } from 'react'; interface HistoryItem { id: string; @@ -13,9 +14,30 @@ interface HistoryViewProps { history: HistoryItem[]; onLoad: (item: HistoryItem) => void; onDelete: (id: string) => void; + onRename: (id: string, newSubject: string) => void; } -export default function HistoryView({ history, onLoad, onDelete }: HistoryViewProps) { +export default function HistoryView({ history, onLoad, onDelete, onRename }: HistoryViewProps) { + const [editingId, setEditingId] = useState(null); + const [editValue, setEditValue] = useState(""); + + const startEditing = (item: HistoryItem) => { + setEditingId(item.id); + setEditValue(item.subject || "Untitled Recording"); + }; + + const saveEdit = () => { + if (editingId && editValue.trim()) { + onRename(editingId, editValue.trim()); + setEditingId(null); + } + }; + + const cancelEdit = () => { + setEditingId(null); + setEditValue(""); + }; + return (

@@ -33,26 +55,58 @@ export default function HistoryView({ history, onLoad, onDelete }: HistoryViewPr {history.map(item => (
-
onLoad(item)} - > -

- {item.subject || "Untitled Recording"} -

-
+
+ {editingId === item.id ? ( +
e.stopPropagation()}> + setEditValue(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter') saveEdit(); + if (e.key === 'Escape') cancelEdit(); + }} + /> + + +
+ ) : ( +
onLoad(item)} + > +

+ {item.subject || "Untitled Recording"} + +

+
+ )} + +
!editingId && onLoad(item)}> {item.date} {item.filename && {item.filename}}
-

+

!editingId && onLoad(item)}> {item.summary ? item.summary.substring(0, 150) + "..." : "No summary available."}

+ + ) : ( + <> + +
+

Drag & Drop audio file

+

+ or click below to browse +

+
+ +

+ Supported: MP3, MP4, WAV, M4A, FLAC, OGG, AAC, WMA +

+ + )} +
+
+ + {/* Configuration Section */} + {selectedFile && ( +
+ {/* Meeting Title */} +
+ + setMeetingTitle(e.target.value)} + disabled={isProcessing} + className="w-full p-2 text-sm bg-secondary rounded border border-border outline-none focus:ring-2 focus:ring-primary disabled:opacity-50" + placeholder="Enter meeting title..." + /> +
+ + {/* AI Template */} +
+ + +
+ + {/* Process Button */} + + + {/* Progress Indicator */} + {stageInfo && ( +
+ + + {stageInfo.text} + +
+ )} +
+ )} +
+
+ ); +}; + +export default Import; diff --git a/src/components/Recorder.tsx b/src/components/Recorder.tsx index 6c3e5c8..b622b44 100644 --- a/src/components/Recorder.tsx +++ b/src/components/Recorder.tsx @@ -1,4 +1,4 @@ -import React, { useState, useEffect } from 'react'; +import React, { useState, useEffect, useRef } from 'react'; import { Mic, Square, Users, Headphones } from 'lucide-react'; import { invoke } from "@tauri-apps/api/core"; import { listen } from '@tauri-apps/api/event'; @@ -58,6 +58,10 @@ const Recorder: React.FC = ({ const [isRecording, setIsRecording] = useState(false); const [isStopping, setIsStopping] = useState(false); // New lock state const [isPaused, setIsPaused] = useState(false); + const [isWaiting, setIsWaiting] = useState(false); // New state for Auto-Start + const [autoStartEnabled, setAutoStartEnabled] = useState(false); // Toggle state + + const [status, setStatus] = useState('Ready to record'); const [selectedDevice, setSelectedDevice] = useState(''); const [selectedPromptId, setSelectedPromptId] = useState(''); @@ -149,19 +153,33 @@ const Recorder: React.FC = ({ const startRecording = async (deviceIdOverride?: string) => { try { - setStatus('Starting...'); setStatus('Starting...'); // Check override or state const targetDeviceId = deviceIdOverride || selectedDevice; // Pass customFilename (camelCase key maps to snake_case in Rust automatically or we need to check Tauri mapping, usually it maps camel to camel? Rust expects snake. Let's use snake_case in invoke args to be safe) - await invoke('start_recording', { deviceId: targetDeviceId, savePath: savePath || null, customFilename: props.recordingSubject || null }); + await invoke('start_recording', { + deviceId: targetDeviceId, + savePath: savePath || null, + customFilename: props.recordingSubject || null, + waitForSpeech: autoStartEnabled // Pass the toggle state + }); + setIsRecording(true); setIsPaused(false); setTranscription(''); setSummary(''); - setStatus('Recording...'); - addToast('Recording started', 'success', 2000); + + if (autoStartEnabled) { + setIsWaiting(true); + setStatus('Waiting for audio...'); + addToast('Standing by for audio...', 'info', 3000); + } else { + setIsWaiting(false); + setStatus('Recording...'); + addToast('Recording started', 'success', 2000); + } + } catch (e) { console.error(e); setStatus(`Error: ${e}`); @@ -170,43 +188,83 @@ const Recorder: React.FC = ({ } }; - // VAD & Auto-Stop Logic - useEffect(() => { - let unlisten: () => void; + // Refs for interval access to avoid dependency cycles + const lastSpeechTimeRef = useRef(Date.now()); + const isStoppingRef = useRef(false); - const setupListener = async () => { - unlisten = await listen<{ is_speech: boolean, probability: number }>('vad-event', (event) => { + // Update refs when state changes + useEffect(() => { + lastSpeechTimeRef.current = lastSpeechTime; + }, [lastSpeechTime]); + + useEffect(() => { + isStoppingRef.current = isStopping; + }, [isStopping]); + + // 1. Event Listeners Effect (Run ONCE when recording starts) + useEffect(() => { + let unlistenVAD: () => void; + let unlistenTrigger: () => void; + + const setupListeners = async () => { + if (!isRecording) return; + + console.log("Setting up VAD listeners..."); + // VAD Event Listener + unlistenVAD = await listen<{ is_speech: boolean, probability: number }>('vad-event', (event) => { if (event.payload.is_speech) { setLastSpeechTime(Date.now()); + lastSpeechTimeRef.current = Date.now(); // Update ref immediately setSilenceDuration(0); } }); + + // Auto-Start Trigger Listener + unlistenTrigger = await listen('auto-recording-triggered', () => { + console.log("Auto-Start Triggered from Backend!"); + // Only trigger if we are actually waiting + setIsWaiting((prev) => { + if (prev) { + addToast("Audio detected! Recording started.", 'success', 4000); + return false; + } + return prev; + }); + setStatus('Recording (Auto-Started)...'); + setLastSpeechTime(Date.now()); + }); }; - if (isRecording && !isPaused) { - setupListener(); - setLastSpeechTime(Date.now()); // Reset on start + if (isRecording) { + setupListeners(); } - const interval = setInterval(() => { - if (isRecording && !isPaused) { - const diff = (Date.now() - lastSpeechTime) / 1000; - setSilenceDuration(diff); + return () => { + // Cleanup listeners + if (unlistenVAD) unlistenVAD(); + if (unlistenTrigger) unlistenTrigger(); + }; + }, [isRecording, addToast]); // Dependencies for listener setup - // Auto-stop after 30 seconds of silence - if (diff > 30 && !isStopping) { // Check lock - console.log("Auto-stopping due to silence"); - addToast("Auto-stopping (Silence detected)", "info", 3000); - stopRecording(); - } + // Auto-Stop Interval Effect + useEffect(() => { + if (!isRecording || isPaused || isWaiting) return; + + const interval = setInterval(() => { + const now = Date.now(); + const diff = (now - lastSpeechTimeRef.current) / 1000; + setSilenceDuration(diff); + + // Auto-stop after 30 seconds of silence + if (diff > 30 && !isStoppingRef.current) { + console.log("Auto-stopping due to silence"); + addToast("Auto-stopping (Silence detected)", "info", 3000); + stopRecording(); } }, 1000); - return () => { - if (unlisten) unlisten(); - clearInterval(interval); - }; - }, [isRecording, isPaused, lastSpeechTime]); + return () => clearInterval(interval); + }, [isRecording, isPaused, isWaiting, addToast]); // Dependencies for interval lifecycle // Handle Auto Start Prop useEffect(() => { @@ -273,6 +331,7 @@ const Recorder: React.FC = ({ try { setIsRecording(false); setIsPaused(false); + setIsWaiting(false); // Reset waiting state setStatus('Processing...'); const filePath = await invoke('stop_recording'); @@ -357,6 +416,8 @@ const Recorder: React.FC = ({ } }; + + return (
{/* Fixed Header - Reduced padding */} @@ -367,9 +428,9 @@ const Recorder: React.FC = ({ {/* Scrollable Content - Reduced spacing */}
-
+
{isRecording ? ( -
+
) : ( @@ -381,12 +442,12 @@ const Recorder: React.FC = ({

- {isRecording ? (isPaused ? 'Paused' : 'Listening...') : 'Ready to Record'} + {isRecording ? (isWaiting ? 'Waiting for Audio...' : isPaused ? 'Paused' : 'Listening...') : 'Ready to Record'}

{status} - {isRecording && !isPaused && silenceDuration > 10 && ( + {isRecording && !isPaused && !isWaiting && silenceDuration > 10 && ( Silence detected: {Math.floor(silenceDuration)}s @@ -395,30 +456,46 @@ const Recorder: React.FC = ({

{!isRecording ? ( - + <> + +
+ +
+ ) : (
- + {/* In Waiting mode, we can only Stop (Cancel) */} + {!isWaiting && ( + + )}
)} diff --git a/src/components/Tabs.tsx b/src/components/Tabs.tsx index 5cdd190..f311b6e 100644 --- a/src/components/Tabs.tsx +++ b/src/components/Tabs.tsx @@ -1,9 +1,9 @@ import React from 'react'; -import { Mic, FileText, Calendar } from 'lucide-react'; +import { Mic, FileText, Calendar, Upload } from 'lucide-react'; interface TabsProps { - currentTab: 'recorder' | 'transcription' | 'settings' | 'meetings' | 'history'; - onTabChange: (tab: 'recorder' | 'transcription' | 'settings' | 'meetings' | 'history') => void; + currentTab: 'recorder' | 'transcription' | 'settings' | 'meetings' | 'history' | 'import'; + onTabChange: (tab: 'recorder' | 'transcription' | 'settings' | 'meetings' | 'history' | 'import') => void; } const Tabs: React.FC = ({ currentTab, onTabChange }) => { @@ -16,6 +16,13 @@ const Tabs: React.FC = ({ currentTab, onTabChange }) => { Recording +