feat: release 1.0 - rename to Hearbit AI, fix timestamps, update UI

This commit is contained in:
michael.borak
2026-01-20 10:14:07 +01:00
parent 768574709f
commit cd08e1c144
69 changed files with 1369 additions and 545 deletions

View File

@@ -1,4 +1,4 @@
use tauri::State;
use tauri::{AppHandle, Manager, State, Emitter};
use std::sync::{Arc, Mutex};
use std::process::Command;
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
@@ -17,6 +17,22 @@ struct AudioDevice {
name: String,
}
#[derive(serde::Serialize, Clone)]
struct LogEvent {
level: String,
message: String,
timestamp: String,
}
fn emit_log(app: &AppHandle, level: &str, message: &str) {
let log = LogEvent {
level: level.to_string(),
message: message.to_string(),
timestamp: chrono::Local::now().format("%H:%M:%S").to_string(),
};
let _ = app.emit("log-event", log);
}
#[tauri::command]
fn greet(name: &str) -> String {
format!("Hello, {}! You've been greeted from Rust!", name)
@@ -41,22 +57,11 @@ fn get_input_devices() -> Result<Vec<AudioDevice>, String> {
Ok(result)
}
#[tauri::command]
fn install_driver() -> Result<String, String> {
let output = Command::new("brew")
.args(["install", "blackhole-2ch"])
.output()
.map_err(|e| format!("Failed to execute command: {}", e))?;
if output.status.success() {
Ok(String::from_utf8_lossy(&output.stdout).to_string())
} else {
Err(String::from_utf8_lossy(&output.stderr).to_string())
}
}
#[tauri::command]
fn start_recording(state: State<'_, AppState>, device_id: String) -> Result<(), String> {
fn start_recording(app: AppHandle, state: State<'_, AppState>, device_id: String, save_path: Option<String>) -> Result<(), String> {
emit_log(&app, "INFO", &format!("Starting recording on device: {}", device_id));
let host = cpal::default_host();
// Find device by name (using name as ID)
@@ -75,16 +80,31 @@ fn start_recording(state: State<'_, AppState>, device_id: String) -> Result<(),
sample_format: hound::SampleFormat::Int,
};
// Create a temporary file
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join(format!("recording_{}.wav", std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()));
// Determine file path: User provided or Temp
let file_path = if let Some(path) = save_path {
if path.trim().is_empty() {
std::env::temp_dir().join(format!("recording_{}.wav", std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()))
} else {
// Check if directory exists, if not try to create it or error out?
// For now, assume user gives a valid directory. We'll append filename.
std::path::PathBuf::from(path).join(format!("recording_{}.wav", std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()))
}
} else {
std::env::temp_dir().join(format!("recording_{}.wav", std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs()))
};
let file_path_str = file_path.to_string_lossy().to_string();
emit_log(&app, "INFO", &format!("Saving recording to: {}", file_path_str));
let writer = hound::WavWriter::create(&file_path, spec).map_err(|e| e.to_string())?;
let writer = Arc::new(Mutex::new(writer));
let writer_clone = writer.clone();
let err_fn = |err| eprintln!("an error occurred on stream: {}", err);
let app_handle = app.clone();
let err_fn = move |err| {
eprintln!("an error occurred on stream: {}", err);
emit_log(&app_handle, "ERROR", &format!("Stream error: {}", err));
};
let stream = match config.sample_format() {
cpal::SampleFormat::F32 => device.build_input_stream(
@@ -128,13 +148,15 @@ fn start_recording(state: State<'_, AppState>, device_id: String) -> Result<(),
// Store state
*state.recording_stream.lock().unwrap() = Some(stream);
*state.recording_file_path.lock().unwrap() = Some(file_path_str);
*state.recording_file_path.lock().unwrap() = Some(file_path_str.clone());
emit_log(&app, "SUCCESS", &format!("Recording started. File: {}", file_path_str));
Ok(())
}
#[tauri::command]
fn stop_recording(state: State<'_, AppState>) -> Result<String, String> {
fn stop_recording(app: AppHandle, state: State<'_, AppState>) -> Result<String, String> {
emit_log(&app, "INFO", "Stopping recording...");
// Drop stream to stop recording
{
let mut stream_guard = state.recording_stream.lock().unwrap();
@@ -146,7 +168,35 @@ fn stop_recording(state: State<'_, AppState>) -> Result<String, String> {
// Return file path
let mut path_guard = state.recording_file_path.lock().unwrap();
path_guard.take().ok_or("No recording path found".to_string())
let path = path_guard.take().ok_or("No recording path found".to_string())?;
emit_log(&app, "SUCCESS", &format!("Recording stopped. Saved to: {}", path));
Ok(path)
}
#[tauri::command]
fn pause_recording(app: AppHandle, state: State<'_, AppState>) -> Result<(), String> {
emit_log(&app, "INFO", "Pausing recording...");
let stream_guard = state.recording_stream.lock().unwrap();
if let Some(stream) = stream_guard.as_ref() {
stream.pause().map_err(|e| e.to_string())?;
emit_log(&app, "SUCCESS", "Recording paused.");
Ok(())
} else {
Err("Not recording".to_string())
}
}
#[tauri::command]
fn resume_recording(app: AppHandle, state: State<'_, AppState>) -> Result<(), String> {
emit_log(&app, "INFO", "Resuming recording...");
let stream_guard = state.recording_stream.lock().unwrap();
if let Some(stream) = stream_guard.as_ref() {
stream.play().map_err(|e| e.to_string())?;
emit_log(&app, "SUCCESS", "Recording resumed.");
Ok(())
} else {
Err("Not recording".to_string())
}
}
#[derive(serde::Deserialize)]
@@ -157,6 +207,7 @@ struct ModelListResponse {
#[derive(serde::Deserialize)]
struct ModelData {
id: String,
#[allow(dead_code)]
owned_by: Option<String>,
}
@@ -177,6 +228,7 @@ struct Choice {
}
#[derive(serde::Deserialize)]
struct Message {
#[allow(dead_code)]
content: String,
}
@@ -187,20 +239,27 @@ struct ModelInfo {
}
#[tauri::command]
async fn get_available_models(api_key: String, product_id: String) -> Result<Vec<ModelInfo>, String> {
async fn get_available_models(app: AppHandle, api_key: String, product_id: String) -> Result<Vec<ModelInfo>, String> {
emit_log(&app, "INFO", "Fetching available models from Infomaniak...");
let client = reqwest::Client::new();
// Use the v2/openai compliant endpoint as per docs
let url = format!("https://api.infomaniak.com/2/ai/{}/openai/v1/models", product_id);
emit_log(&app, "DEBUG", &format!("GET {}", url));
let res = client.get(&url)
.header("Authorization", format!("Bearer {}", api_key))
.send()
.await
.map_err(|e| e.to_string())?;
.map_err(|e| {
let msg = format!("Network error fetching models: {}", e);
emit_log(&app, "ERROR", &msg);
msg
})?;
if res.status().is_success() {
let raw_body = res.text().await.map_err(|e| e.to_string())?;
println!("Models Raw Response: {}", raw_body);
// println!("Models Raw Response: {}", raw_body);
let list: ModelListResponse = serde_json::from_str(&raw_body)
.map_err(|e| format!("Failed to parse models: {}. Body: {}", e, raw_body))?;
@@ -209,20 +268,34 @@ async fn get_available_models(api_key: String, product_id: String) -> Result<Vec
.map(|m| ModelInfo {
id: m.id.clone(),
name: m.id, // Use ID as name for now, or fetch more details if available
}).collect();
}).collect::<Vec<ModelInfo>>();
emit_log(&app, "SUCCESS", &format!("Loaded {} models.", models.len()));
Ok(models)
} else {
// Fallback to v1 if v2 fails or try another common path?
// For now just error out
let err = res.text().await.unwrap_or_default();
emit_log(&app, "ERROR", &format!("Failed to fetch models: {}", err));
Err(format!("Failed to fetch models: {}", err))
}
}
#[derive(serde::Deserialize)]
struct WhisperVerboseResponse {
text: Option<String>,
segments: Option<Vec<Segment>>,
}
#[derive(serde::Deserialize)]
struct Segment {
start: f64,
end: f64,
text: String,
}
#[tauri::command]
async fn transcribe_audio(file_path: String, api_key: String, product_id: String) -> Result<String, String> {
async fn transcribe_audio(app: AppHandle, file_path: String, api_key: String, product_id: String) -> Result<String, String> {
emit_log(&app, "INFO", "Starting transcription with timestamps...");
let client = reqwest::Client::new();
// Prepare file part
@@ -235,44 +308,88 @@ async fn transcribe_audio(file_path: String, api_key: String, product_id: String
let form = reqwest::multipart::Form::new()
.part("file", file_part)
.text("model", "whisper");
.text("model", "whisper")
.text("response_format", "verbose_json")
.text("timestamp_granularities[]", "segment"); // Crucial for accurate segments
let url = format!("https://api.infomaniak.com/1/ai/{}/openai/audio/transcriptions", product_id);
emit_log(&app, "DEBUG", &format!("POST {}", url));
let res = client.post(&url)
.header("Authorization", format!("Bearer {}", api_key))
.multipart(form)
.send()
.await
.map_err(|e| e.to_string())?;
.map_err(|e| {
let msg = format!("Network error during transcription: {}", e);
emit_log(&app, "ERROR", &msg);
msg
})?;
if res.status().is_success() {
let raw_body = res.text().await.map_err(|e| e.to_string())?;
println!("Transcription Raw Response: {}", raw_body);
// Attempt to parse text or batch_id
// Attempt to parse text or batch_id
let response: WhisperResponse = serde_json::from_str(&raw_body)
// Check if we got a batch ID
#[derive(serde::Deserialize)]
struct BatchResponse {
batch_id: Option<String>,
}
// Try parsing as batch response first (Infomaniak specific behavior)
if let Ok(batch_res) = serde_json::from_str::<BatchResponse>(&raw_body) {
if let Some(batch_id) = batch_res.batch_id {
emit_log(&app, "INFO", &format!("Transcription queued. Batch ID: {}", batch_id));
return poll_transcription(&app, &client, &api_key, &product_id, &batch_id).await;
}
}
// If not batch, try parsing verbose response directly
// Log the raw body so we can see why it fails
emit_log(&app, "DEBUG", &format!("Direct Response (first 500 chars): {:.500}", raw_body));
let response: WhisperVerboseResponse = serde_json::from_str(&raw_body)
.map_err(|e| format!("Failed to decode JSON: {}. Body: {}", e, raw_body))?;
match (response.text, response.batch_id) {
(Some(text), _) => Ok(text),
(_, Some(batch_id)) => {
// Need to poll
poll_transcription(&client, &api_key, &product_id, &batch_id).await
},
_ => Err(format!("Response contained neither text nor batch_id. Body: {}", raw_body))
if let Some(segments) = response.segments {
emit_log(&app, "INFO", &format!("Found {} segments (Direct).", segments.len()));
for (i, seg) in segments.iter().take(3).enumerate() {
emit_log(&app, "DEBUG", &format!("Seg {}: start={}", i, seg.start));
}
// Format timestamps: [MM:SS] Text
let mut formatted_transcript = String::new();
for segment in segments {
let start_mins = (segment.start / 60.0).floor() as u64;
let start_secs = (segment.start % 60.0).floor() as u64;
formatted_transcript.push_str(&format!("[{:02}:{:02}] {}\n", start_mins, start_secs, segment.text.trim()));
}
// Fallback to raw text if segments empty
if formatted_transcript.trim().is_empty() {
if let Some(text) = response.text {
emit_log(&app, "SUCCESS", "Segments missing, using raw text.");
return Ok(text);
}
} else {
emit_log(&app, "SUCCESS", "Transcription received with timestamps.");
return Ok(formatted_transcript);
}
} else if let Some(text) = response.text {
emit_log(&app, "SUCCESS", "Segments missing, using raw text.");
return Ok(text);
}
emit_log(&app, "ERROR", "Response contained no recognized content.");
Err(format!("Response contained no recognized content. Body: {}", raw_body))
} else {
let error_text = res.text().await.unwrap_or_default();
emit_log(&app, "ERROR", &format!("Transcription failed: {}", error_text));
Err(format!("Transcription failed: {}", error_text))
}
}
async fn poll_transcription(client: &reqwest::Client, api_key: &str, product_id: &str, batch_id: &str) -> Result<String, String> {
// Polling URL: /1/ai/{product_id}/results/{batch_id} (or similar, verifying via trial)
// If that fails, we can try /openai/audio/transcriptions/{batch_id} but documentation suggests results endpoint.
// Let's assume the standard Infomaniak pattern for batches.
async fn poll_transcription(app: &AppHandle, client: &reqwest::Client, api_key: &str, product_id: &str, batch_id: &str) -> Result<String, String> {
let status_url = format!("https://api.infomaniak.com/1/ai/{}/results/{}", product_id, batch_id);
let mut attempts = 0;
@@ -280,6 +397,7 @@ async fn poll_transcription(client: &reqwest::Client, api_key: &str, product_id:
attempts += 1;
sleep(Duration::from_secs(2)).await;
emit_log(app, "DEBUG", &format!("Polling status... Attempt {}", attempts));
let res = client.get(&status_url)
.header("Authorization", format!("Bearer {}", api_key))
.send()
@@ -301,31 +419,63 @@ async fn poll_transcription(client: &reqwest::Client, api_key: &str, product_id:
if dl_res.status().is_success() {
let content = dl_res.text().await.map_err(|e| e.to_string())?;
// Try to parse the content as JSON to see if it's { "text": "..." }
if let Ok(json_val) = serde_json::from_str::<serde_json::Value>(&content) {
if let Some(text_content) = json_val.get("text").and_then(|t| t.as_str()) {
return Ok(text_content.to_string());
}
emit_log(app, "DEBUG", &format!("Poll Raw Content (first 500 chars): {:.500}", content));
// Try to parse as Verbose JSON to get timestamps
if let Ok(response) = serde_json::from_str::<WhisperVerboseResponse>(&content) {
if let Some(segments) = response.segments {
emit_log(app, "INFO", &format!("Found {} segments.", segments.len()));
// Log first 3 segments start times
for (i, seg) in segments.iter().take(3).enumerate() {
emit_log(app, "DEBUG", &format!("Seg {}: start={}", i, seg.start));
}
let mut formatted_transcript = String::new();
for segment in segments {
let start_mins = (segment.start / 60.0).floor() as u64;
let start_secs = (segment.start % 60.0).floor() as u64;
formatted_transcript.push_str(&format!("[{:02}:{:02}] {}\n", start_mins, start_secs, segment.text.trim()));
}
if !formatted_transcript.trim().is_empty() {
emit_log(app, "SUCCESS", "Transcription completed (async) with timestamps.");
return Ok(formatted_transcript);
} else {
emit_log(app, "WARN", "Segments found but empty content.");
}
} else {
emit_log(app, "WARN", "Verbose parsed but no segments found.");
}
if let Some(text) = response.text {
emit_log(app, "SUCCESS", "Transcription completed (async) - raw text (segments missing).");
return Ok(text);
}
} else {
emit_log(app, "WARN", "Failed to parse poll content as WhisperVerboseResponse");
}
emit_log(app, "SUCCESS", "Transcription completed - returning raw content.");
// If not JSON or no text field, return raw content
return Ok(content);
} else {
emit_log(app, "ERROR", "Failed to download transcription results.");
return Err(format!("Download failed: {}", dl_res.status()));
}
} else if status == "failed" || status == "error" {
emit_log(app, "ERROR", &format!("Batch processing failed: {:?}", json));
return Err(format!("Batch processing failed: {:?}", json));
}
// If 'processing' or 'pending', continue loop
}
}
}
emit_log(app, "ERROR", "Transcription timed out after 80s.");
Err("Transcription timed out".to_string())
}
#[tauri::command]
async fn summarize_text(text: String, api_key: String, product_id: String, prompt: String, model: String) -> Result<String, String> {
async fn summarize_text(app: AppHandle, text: String, api_key: String, product_id: String, prompt: String, model: String) -> Result<String, String> {
emit_log(&app, "INFO", "Starting summarization...");
let client = reqwest::Client::new();
let url = format!("https://api.infomaniak.com/2/ai/{}/openai/v1/chat/completions", product_id);
@@ -341,36 +491,58 @@ async fn summarize_text(text: String, api_key: String, product_id: String, promp
"messages": messages
});
emit_log(&app, "DEBUG", &format!("POST {}", url));
let res = client.post(&url)
.header("Authorization", format!("Bearer {}", api_key))
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.map_err(|e| e.to_string())?;
.map_err(|e| {
let msg = format!("Network error during summarization: {}", e);
emit_log(&app, "ERROR", &msg);
msg
})?;
if res.status().is_success() {
let raw_body = res.text().await.map_err(|e| e.to_string())?;
println!("Summarization Raw Response: {}", raw_body);
// println!("Summarization Raw Response: {}", raw_body);
let response_body: ChatCompletionResponse = serde_json::from_str(&raw_body)
.map_err(|e| format!("Failed to decode JSON: {}. Body: {}", e, raw_body))?;
if let Some(choice) = response_body.choices.first() {
emit_log(&app, "SUCCESS", "Summarization received.");
Ok(choice.message.content.clone())
} else {
emit_log(&app, "WARN", "No summary generated in response.");
Err("No summary generated".to_string())
}
} else {
let error_text = res.text().await.unwrap_or_default();
emit_log(&app, "ERROR", &format!("Summarization failed: {}", error_text));
Err(format!("Summarization failed: {}", error_text))
}
}
#[tauri::command]
fn open_audio_midi_setup() -> Result<(), String> {
Command::new("open")
.arg("-a")
.arg("Audio MIDI Setup")
.spawn()
.map_err(|e| e.to_string())?;
Ok(())
}
#[cfg_attr(mobile, tauri::mobile_entry_point)]
pub fn run() {
tauri::Builder::default()
.plugin(tauri_plugin_opener::init())
.plugin(tauri_plugin_dialog::init())
.manage(AppState {
recording_stream: Mutex::new(None),
recording_file_path: Mutex::new(None),
@@ -378,12 +550,14 @@ pub fn run() {
.invoke_handler(tauri::generate_handler![
greet,
get_input_devices,
install_driver,
start_recording,
stop_recording,
pause_recording,
resume_recording,
transcribe_audio,
summarize_text,
get_available_models
get_available_models,
open_audio_midi_setup
])
.run(tauri::generate_context!())
.expect("error while running tauri application");