feat: release 1.0 - rename to Hearbit AI, fix timestamps, update UI

2026-01-20 10:14:07 +01:00
parent 768574709f
commit cd08e1c144
69 changed files with 1369 additions and 545 deletions
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -1,4 +1,4 @@
-use tauri::State;
+use tauri::{AppHandle, Manager, State, Emitter};
 use std::sync::{Arc, Mutex};
 use std::process::Command;
 use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
@@ -17,6 +17,22 @@ struct AudioDevice {
    name: String,
 }

+#[derive(serde::Serialize, Clone)]
+struct LogEvent {
+    level: String,
+    message: String,
+    timestamp: String,
+}
+
+fn emit_log(app: &AppHandle, level: &str, message: &str) {
+    let log = LogEvent {
+        level: level.to_string(),
+        message: message.to_string(),
+        timestamp: chrono::Local::now().format("%H:%M:%S").to_string(),
+    };
+    let _ = app.emit("log-event", log);
+}
+
 #[tauri::command]
 fn greet(name: &str) -> String {
    format!("Hello, {}! You've been greeted from Rust!", name)
@@ -41,22 +57,11 @@ fn get_input_devices() -> Result<Vec<AudioDevice>, String> {
    Ok(result)
 }

-#[tauri::command]
-fn install_driver() -> Result<String, String> {
-    let output = Command::new("brew")
-        .args(["install", "blackhole-2ch"])
-        .output()
-        .map_err(|e| format!("Failed to execute command: {}", e))?;

-    if output.status.success() {
-        Ok(String::from_utf8_lossy(&output.stdout).to_string())
-    } else {
-        Err(String::from_utf8_lossy(&output.stderr).to_string())
-    }
-}

 #[tauri::command]
-fn start_recording(state: State<'_, AppState>, device_id: String) -> Result<(), String> {
+fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String, save_path: Option<String>) -> Result<(), String> {
+    emit_log(&app, "INFO", &format!("Starting recording on device: {}", device_id));
    let host = cpal::default_host();
    
    // Find device by name (using name as ID)
@@ -75,16 +80,31 @@ fn start_recording(state: State<'_, AppState>, device_id: String) -> Result<(),
        sample_format: hound::SampleFormat::Int,
    };

-    // Create a temporary file
-    let temp_dir = std::env::temp_dir();
-    let file_path = temp_dir.join(format!("recording_{}.wav", std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()));
+    // Determine file path: User provided or Temp
+    let file_path = if let Some(path) = save_path {
+        if path.trim().is_empty() {
+            std::env::temp_dir().join(format!("recording_{}.wav", std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()))
+        } else {
+            // Check if directory exists, if not try to create it or error out? 
+            // For now, assume user gives a valid directory. We'll append filename.
+             std::path::PathBuf::from(path).join(format!("recording_{}.wav", std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()))
+        }
+    } else {
+        std::env::temp_dir().join(format!("recording_{}.wav", std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()))
+    };
+    
    let file_path_str = file_path.to_string_lossy().to_string();
+    emit_log(&app, "INFO", &format!("Saving recording to: {}", file_path_str));

    let writer = hound::WavWriter::create(&file_path, spec).map_err(|e| e.to_string())?;
    let writer = Arc::new(Mutex::new(writer));
    let writer_clone = writer.clone();

-    let err_fn = |err| eprintln!("an error occurred on stream: {}", err);
+    let app_handle = app.clone();
+    let err_fn = move |err| {
+        eprintln!("an error occurred on stream: {}", err);
+        emit_log(&app_handle, "ERROR", &format!("Stream error: {}", err));
+    };

    let stream = match config.sample_format() {
        cpal::SampleFormat::F32 => device.build_input_stream(
@@ -128,13 +148,15 @@ fn start_recording(state: State<'_, AppState>, device_id: String) -> Result<(),

    // Store state
    *state.recording_stream.lock().unwrap() = Some(stream);
-    *state.recording_file_path.lock().unwrap() = Some(file_path_str);
+    *state.recording_file_path.lock().unwrap() = Some(file_path_str.clone());

+    emit_log(&app, "SUCCESS", &format!("Recording started. File: {}", file_path_str));
    Ok(())
 }

 #[tauri::command]
-fn stop_recording(state: State<'_, AppState>) -> Result<String, String> {
+fn stop_recording(app: AppHandle, state: State<'_, AppState>) -> Result<String, String> {
+    emit_log(&app, "INFO", "Stopping recording...");
    // Drop stream to stop recording
    {
        let mut stream_guard = state.recording_stream.lock().unwrap();
@@ -146,7 +168,35 @@ fn stop_recording(state: State<'_, AppState>) -> Result<String, String> {
    
    // Return file path
    let mut path_guard = state.recording_file_path.lock().unwrap();
-    path_guard.take().ok_or("No recording path found".to_string())
+    let path = path_guard.take().ok_or("No recording path found".to_string())?;
+    emit_log(&app, "SUCCESS", &format!("Recording stopped. Saved to: {}", path));
+    Ok(path)
+}
+
+#[tauri::command]
+fn pause_recording(app: AppHandle, state: State<'_, AppState>) -> Result<(), String> {
+    emit_log(&app, "INFO", "Pausing recording...");
+    let stream_guard = state.recording_stream.lock().unwrap();
+    if let Some(stream) = stream_guard.as_ref() {
+        stream.pause().map_err(|e| e.to_string())?;
+        emit_log(&app, "SUCCESS", "Recording paused.");
+        Ok(())
+    } else {
+        Err("Not recording".to_string())
+    }
+}
+
+#[tauri::command]
+fn resume_recording(app: AppHandle, state: State<'_, AppState>) -> Result<(), String> {
+    emit_log(&app, "INFO", "Resuming recording...");
+    let stream_guard = state.recording_stream.lock().unwrap();
+    if let Some(stream) = stream_guard.as_ref() {
+        stream.play().map_err(|e| e.to_string())?;
+        emit_log(&app, "SUCCESS", "Recording resumed.");
+        Ok(())
+    } else {
+        Err("Not recording".to_string())
+    }
 }

 #[derive(serde::Deserialize)]
@@ -157,6 +207,7 @@ struct ModelListResponse {
 #[derive(serde::Deserialize)]
 struct ModelData {
    id: String,
+    #[allow(dead_code)]
    owned_by: Option<String>,
 }

@@ -177,6 +228,7 @@ struct Choice {
 }
 #[derive(serde::Deserialize)]
 struct Message {
+    #[allow(dead_code)]
    content: String,
 }

@@ -187,20 +239,27 @@ struct ModelInfo {
 }

 #[tauri::command]
-async fn get_available_models(api_key: String, product_id: String) -> Result<Vec<ModelInfo>, String> {
+async fn get_available_models(app: AppHandle, api_key: String, product_id: String) -> Result<Vec<ModelInfo>, String> {
+    emit_log(&app, "INFO", "Fetching available models from Infomaniak...");
    let client = reqwest::Client::new();
    // Use the v2/openai compliant endpoint as per docs
    let url = format!("https://api.infomaniak.com/2/ai/{}/openai/v1/models", product_id);

+    emit_log(&app, "DEBUG", &format!("GET {}", url));
+
    let res = client.get(&url)
        .header("Authorization", format!("Bearer {}", api_key))
        .send()
        .await
-        .map_err(|e| e.to_string())?;
+        .map_err(|e| {
+            let msg = format!("Network error fetching models: {}", e);
+            emit_log(&app, "ERROR", &msg);
+            msg
+        })?;

    if res.status().is_success() {
        let raw_body = res.text().await.map_err(|e| e.to_string())?;
-        println!("Models Raw Response: {}", raw_body);
+        // println!("Models Raw Response: {}", raw_body);
        let list: ModelListResponse = serde_json::from_str(&raw_body)
            .map_err(|e| format!("Failed to parse models: {}. Body: {}", e, raw_body))?;

@@ -209,20 +268,34 @@ async fn get_available_models(api_key: String, product_id: String) -> Result<Vec
            .map(|m| ModelInfo {
            id: m.id.clone(),
            name: m.id, // Use ID as name for now, or fetch more details if available
-        }).collect();
+        }).collect::<Vec<ModelInfo>>();
        
+        emit_log(&app, "SUCCESS", &format!("Loaded {} models.", models.len()));
        Ok(models)
    } else {
-         // Fallback to v1 if v2 fails or try another common path?
-         // For now just error out
         let err = res.text().await.unwrap_or_default();
+         emit_log(&app, "ERROR", &format!("Failed to fetch models: {}", err));
         Err(format!("Failed to fetch models: {}", err))
    }
 }


+#[derive(serde::Deserialize)]
+struct WhisperVerboseResponse {
+    text: Option<String>,
+    segments: Option<Vec<Segment>>,
+}
+
+#[derive(serde::Deserialize)]
+struct Segment {
+    start: f64,
+    end: f64,
+    text: String,
+}
+
 #[tauri::command]
-async fn transcribe_audio(file_path: String, api_key: String, product_id: String) -> Result<String, String> {
+async fn transcribe_audio(app: AppHandle, file_path: String, api_key: String, product_id: String) -> Result<String, String> {
+    emit_log(&app, "INFO", "Starting transcription with timestamps...");
    let client = reqwest::Client::new();
    
    // Prepare file part
@@ -235,44 +308,88 @@ async fn transcribe_audio(file_path: String, api_key: String, product_id: String

    let form = reqwest::multipart::Form::new()
        .part("file", file_part)
-        .text("model", "whisper");
+        .text("model", "whisper")
+        .text("response_format", "verbose_json")
+        .text("timestamp_granularities[]", "segment"); // Crucial for accurate segments

    let url = format!("https://api.infomaniak.com/1/ai/{}/openai/audio/transcriptions", product_id);
    
+    emit_log(&app, "DEBUG", &format!("POST {}", url));
+
    let res = client.post(&url)
        .header("Authorization", format!("Bearer {}", api_key))
        .multipart(form)
        .send()
        .await
-        .map_err(|e| e.to_string())?;
+        .map_err(|e| {
+            let msg = format!("Network error during transcription: {}", e);
+            emit_log(&app, "ERROR", &msg);
+            msg
+        })?;

    if res.status().is_success() {
        let raw_body = res.text().await.map_err(|e| e.to_string())?;
-        println!("Transcription Raw Response: {}", raw_body);
        
-        // Attempt to parse text or batch_id
-        // Attempt to parse text or batch_id
-        let response: WhisperResponse = serde_json::from_str(&raw_body)
+        // Check if we got a batch ID
+        #[derive(serde::Deserialize)]
+        struct BatchResponse {
+            batch_id: Option<String>,
+        }
+        
+        // Try parsing as batch response first (Infomaniak specific behavior)
+        if let Ok(batch_res) = serde_json::from_str::<BatchResponse>(&raw_body) {
+             if let Some(batch_id) = batch_res.batch_id {
+                 emit_log(&app, "INFO", &format!("Transcription queued. Batch ID: {}", batch_id));
+                 return poll_transcription(&app, &client, &api_key, &product_id, &batch_id).await;
+             }
+        }
+
+        // If not batch, try parsing verbose response directly
+        // Log the raw body so we can see why it fails
+        emit_log(&app, "DEBUG", &format!("Direct Response (first 500 chars): {:.500}", raw_body));
+
+        let response: WhisperVerboseResponse = serde_json::from_str(&raw_body)
            .map_err(|e| format!("Failed to decode JSON: {}. Body: {}", e, raw_body))?;

-        match (response.text, response.batch_id) {
-            (Some(text), _) => Ok(text),
-            (_, Some(batch_id)) => {
-                 // Need to poll
-                 poll_transcription(&client, &api_key, &product_id, &batch_id).await
-            },
-            _ => Err(format!("Response contained neither text nor batch_id. Body: {}", raw_body))
+        if let Some(segments) = response.segments {
+            emit_log(&app, "INFO", &format!("Found {} segments (Direct).", segments.len()));
+             for (i, seg) in segments.iter().take(3).enumerate() {
+                emit_log(&app, "DEBUG", &format!("Seg {}: start={}", i, seg.start));
+            }
+
+            // Format timestamps: [MM:SS] Text
+            let mut formatted_transcript = String::new();
+            for segment in segments {
+                let start_mins = (segment.start / 60.0).floor() as u64;
+                let start_secs = (segment.start % 60.0).floor() as u64;
+                formatted_transcript.push_str(&format!("[{:02}:{:02}] {}\n", start_mins, start_secs, segment.text.trim()));
+            }
+            
+            // Fallback to raw text if segments empty
+            if formatted_transcript.trim().is_empty() {
+                if let Some(text) = response.text {
+                    emit_log(&app, "SUCCESS", "Segments missing, using raw text.");
+                    return Ok(text);
+                }
+            } else {
+                emit_log(&app, "SUCCESS", "Transcription received with timestamps.");
+                return Ok(formatted_transcript);
+            }
+        } else if let Some(text) = response.text {
+             emit_log(&app, "SUCCESS", "Segments missing, using raw text.");
+             return Ok(text);
        }
+
+        emit_log(&app, "ERROR", "Response contained no recognized content.");
+        Err(format!("Response contained no recognized content. Body: {}", raw_body))
    } else {
        let error_text = res.text().await.unwrap_or_default();
+        emit_log(&app, "ERROR", &format!("Transcription failed: {}", error_text));
        Err(format!("Transcription failed: {}", error_text))
    }
 }

-async fn poll_transcription(client: &reqwest::Client, api_key: &str, product_id: &str, batch_id: &str) -> Result<String, String> {
-    // Polling URL: /1/ai/{product_id}/results/{batch_id} (or similar, verifying via trial)
-    // If that fails, we can try /openai/audio/transcriptions/{batch_id} but documentation suggests results endpoint.
-    // Let's assume the standard Infomaniak pattern for batches.
+async fn poll_transcription(app: &AppHandle, client: &reqwest::Client, api_key: &str, product_id: &str, batch_id: &str) -> Result<String, String> {
    let status_url = format!("https://api.infomaniak.com/1/ai/{}/results/{}", product_id, batch_id);
    
    let mut attempts = 0;
@@ -280,6 +397,7 @@ async fn poll_transcription(client: &reqwest::Client, api_key: &str, product_id:
        attempts += 1;
        sleep(Duration::from_secs(2)).await;
        
+        emit_log(app, "DEBUG", &format!("Polling status... Attempt {}", attempts));
        let res = client.get(&status_url)
             .header("Authorization", format!("Bearer {}", api_key))
             .send()
@@ -301,31 +419,63 @@ async fn poll_transcription(client: &reqwest::Client, api_key: &str, product_id:
                         
                     if dl_res.status().is_success() {
                         let content = dl_res.text().await.map_err(|e| e.to_string())?;
-                         
-                         // Try to parse the content as JSON to see if it's { "text": "..." }
-                         if let Ok(json_val) = serde_json::from_str::<serde_json::Value>(&content) {
-                             if let Some(text_content) = json_val.get("text").and_then(|t| t.as_str()) {
-                                 return Ok(text_content.to_string());
-                             }
+                         emit_log(app, "DEBUG", &format!("Poll Raw Content (first 500 chars): {:.500}", content));
+
+                         // Try to parse as Verbose JSON to get timestamps
+                         if let Ok(response) = serde_json::from_str::<WhisperVerboseResponse>(&content) {
+                            if let Some(segments) = response.segments {
+                                emit_log(app, "INFO", &format!("Found {} segments.", segments.len()));
+                                // Log first 3 segments start times
+                                for (i, seg) in segments.iter().take(3).enumerate() {
+                                    emit_log(app, "DEBUG", &format!("Seg {}: start={}", i, seg.start));
+                                }
+
+                                let mut formatted_transcript = String::new();
+                                for segment in segments {
+                                    let start_mins = (segment.start / 60.0).floor() as u64;
+                                    let start_secs = (segment.start % 60.0).floor() as u64;
+                                    formatted_transcript.push_str(&format!("[{:02}:{:02}] {}\n", start_mins, start_secs, segment.text.trim()));
+                                }
+                                if !formatted_transcript.trim().is_empty() {
+                                    emit_log(app, "SUCCESS", "Transcription completed (async) with timestamps.");
+                                    return Ok(formatted_transcript);
+                                } else {
+                                     emit_log(app, "WARN", "Segments found but empty content.");
+                                }
+                            } else {
+                                emit_log(app, "WARN", "Verbose parsed but no segments found.");
+                            }
+                            
+                            if let Some(text) = response.text {
+                                 emit_log(app, "SUCCESS", "Transcription completed (async) - raw text (segments missing).");
+                                 return Ok(text);
+                            }
+                         } else {
+                             emit_log(app, "WARN", "Failed to parse poll content as WhisperVerboseResponse");
                         }
-                         
+
+                         emit_log(app, "SUCCESS", "Transcription completed - returning raw content.");
                         // If not JSON or no text field, return raw content
                         return Ok(content);
                     } else {
+                         emit_log(app, "ERROR", "Failed to download transcription results.");
                         return Err(format!("Download failed: {}", dl_res.status()));
                     }
                } else if status == "failed" || status == "error" {
+                    emit_log(app, "ERROR", &format!("Batch processing failed: {:?}", json));
                    return Err(format!("Batch processing failed: {:?}", json));
                }
                // If 'processing' or 'pending', continue loop
            }
        }
    }
+    emit_log(app, "ERROR", "Transcription timed out after 80s.");
    Err("Transcription timed out".to_string())
 }

 #[tauri::command]
-async fn summarize_text(text: String, api_key: String, product_id: String, prompt: String, model: String) -> Result<String, String> {
+async fn summarize_text(app: AppHandle, text: String, api_key: String, product_id: String, prompt: String, model: String) -> Result<String, String> {
+    emit_log(&app, "INFO", "Starting summarization...");
    let client = reqwest::Client::new();
    let url = format!("https://api.infomaniak.com/2/ai/{}/openai/v1/chat/completions", product_id);

@@ -341,36 +491,58 @@ async fn summarize_text(text: String, api_key: String, product_id: String, promp
        "messages": messages
    });

+    emit_log(&app, "DEBUG", &format!("POST {}", url));
+
    let res = client.post(&url)
        .header("Authorization", format!("Bearer {}", api_key))
        .header("Content-Type", "application/json")
        .json(&body)
        .send()
        .await
-        .map_err(|e| e.to_string())?;
+        .map_err(|e| {
+            let msg = format!("Network error during summarization: {}", e);
+            emit_log(&app, "ERROR", &msg);
+            msg
+        })?;

    if res.status().is_success() {
        let raw_body = res.text().await.map_err(|e| e.to_string())?;
-        println!("Summarization Raw Response: {}", raw_body);
+        // println!("Summarization Raw Response: {}", raw_body);

        let response_body: ChatCompletionResponse = serde_json::from_str(&raw_body)
            .map_err(|e| format!("Failed to decode JSON: {}. Body: {}", e, raw_body))?;
            
        if let Some(choice) = response_body.choices.first() {
+            emit_log(&app, "SUCCESS", "Summarization received.");
            Ok(choice.message.content.clone())
        } else {
+            emit_log(&app, "WARN", "No summary generated in response.");
            Err("No summary generated".to_string())
        }
    } else {
        let error_text = res.text().await.unwrap_or_default();
+        emit_log(&app, "ERROR", &format!("Summarization failed: {}", error_text));
        Err(format!("Summarization failed: {}", error_text))
    }
 }

+#[tauri::command]
+fn open_audio_midi_setup() -> Result<(), String> {
+    Command::new("open")
+        .arg("-a")
+        .arg("Audio MIDI Setup")
+        .spawn()
+        .map_err(|e| e.to_string())?;
+    Ok(())
+}
+
+
+
 #[cfg_attr(mobile, tauri::mobile_entry_point)]
 pub fn run() {
    tauri::Builder::default()
        .plugin(tauri_plugin_opener::init())
+        .plugin(tauri_plugin_dialog::init())
        .manage(AppState {
            recording_stream: Mutex::new(None),
            recording_file_path: Mutex::new(None),
@@ -378,12 +550,14 @@ pub fn run() {
        .invoke_handler(tauri::generate_handler![
            greet,
            get_input_devices,
-            install_driver,
            start_recording,
            stop_recording,
+            pause_recording,
+            resume_recording,
            transcribe_audio,
            summarize_text,
-            get_available_models
+            get_available_models,
+            open_audio_midi_setup
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");