1291 lines
41 KiB
Rust
1291 lines
41 KiB
Rust
use base64::Engine;
|
|
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
|
|
use std::process::Command;
|
|
use std::sync::{Arc, Mutex};
|
|
use std::time::Duration;
|
|
use tauri::{
|
|
menu::{Menu, MenuItem},
|
|
tray::{TrayIconBuilder, TrayIconEvent},
|
|
AppHandle, Emitter, Manager, State, WindowEvent,
|
|
};
|
|
use tokio::time::sleep;
|
|
|
|
mod audio_processor;
|
|
use audio_processor::AudioProcessor;
|
|
mod apps;
|
|
mod auth;
|
|
mod email;
|
|
mod sc_audio;
|
|
|
|
// State to hold the active recording stream
|
|
struct AppState {
|
|
recording_stream: Mutex<Option<cpal::Stream>>,
|
|
recording_file_path: Mutex<Option<String>>,
|
|
system_capture: tokio::sync::Mutex<Option<sc_audio::SystemAudioCapture>>,
|
|
}
|
|
|
|
#[derive(serde::Serialize)]
|
|
struct AudioDevice {
|
|
id: String,
|
|
name: String,
|
|
}
|
|
|
|
#[derive(serde::Serialize, Clone)]
|
|
struct LogEvent {
|
|
level: String,
|
|
message: String,
|
|
timestamp: String,
|
|
}
|
|
|
|
pub(crate) fn emit_log(app: &AppHandle, level: &str, message: &str) {
|
|
let log = LogEvent {
|
|
level: level.to_string(),
|
|
message: message.to_string(),
|
|
timestamp: chrono::Local::now().format("%H:%M:%S").to_string(),
|
|
};
|
|
let _ = app.emit("log-event", log);
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn greet(name: &str) -> String {
|
|
format!("Hello, {}! You've been greeted from Rust!", name)
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn get_input_devices() -> Result<Vec<AudioDevice>, String> {
|
|
let host = cpal::default_host();
|
|
let devices = host.input_devices().map_err(|e| e.to_string())?;
|
|
|
|
let mut result = Vec::new();
|
|
for device in devices {
|
|
#[allow(deprecated)]
|
|
if let Ok(name) = device.name() {
|
|
// macOS often produces weird names, but let's just use what we get
|
|
result.push(AudioDevice {
|
|
id: name.clone(), // Using name as ID for simplicity in this MVP
|
|
name,
|
|
});
|
|
}
|
|
}
|
|
Ok(result)
|
|
}
|
|
|
|
#[tauri::command]
|
|
async fn start_recording(
|
|
app: AppHandle,
|
|
state: State<'_, AppState>,
|
|
device_id: String,
|
|
save_path: Option<String>,
|
|
custom_filename: Option<String>,
|
|
wait_for_speech: Option<bool>,
|
|
mode: String,
|
|
excluded_apps: Option<Vec<String>>,
|
|
) -> Result<(), String> {
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!(
|
|
"Starting recording [Mode: {}] on device: {}",
|
|
mode, device_id
|
|
),
|
|
);
|
|
let host = cpal::default_host();
|
|
|
|
// Find device by name (using name as ID)
|
|
#[allow(deprecated)]
|
|
let device = host
|
|
.input_devices()
|
|
.map_err(|e| e.to_string())?
|
|
.find(|d| d.name().map(|n| n == device_id).unwrap_or(false))
|
|
.or_else(|| host.default_input_device())
|
|
.ok_or("No input device found")?;
|
|
|
|
// Select the configuration with the MAXIMUM number of channels
|
|
// This is crucial for "Hearbit Audio" (Aggregate) which lists 3 channels but might default to 2.
|
|
// Select Audio Configuration
|
|
// We prioritize 48kHz because System Audio (ScreenCaptureKit) acts best at 48k.
|
|
let supported_configs: Vec<_> = device
|
|
.supported_input_configs()
|
|
.map_err(|e| e.to_string())?
|
|
.collect();
|
|
|
|
// Try to find 48kHz specifically
|
|
// Note: cpal::SampleRate is likely a type alias for u32 here, so we pass 48000 directly.
|
|
let config = supported_configs
|
|
.iter()
|
|
.find(|c| c.min_sample_rate() <= 48000 && c.max_sample_rate() >= 48000)
|
|
.map(|c| c.with_sample_rate(48000))
|
|
.or_else(|| {
|
|
// Fallback: Max sample rate
|
|
supported_configs
|
|
.iter()
|
|
.max_by_key(|c| c.channels())
|
|
.map(|c| c.with_max_sample_rate())
|
|
})
|
|
.ok_or("No supported input configurations found")?;
|
|
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!(
|
|
"Selected Audio Config: {} Channels, {} Hz",
|
|
config.channels(),
|
|
config.sample_rate()
|
|
),
|
|
);
|
|
|
|
let spec = hound::WavSpec {
|
|
channels: config.channels(),
|
|
sample_rate: config.sample_rate(),
|
|
bits_per_sample: 16,
|
|
sample_format: hound::SampleFormat::Int,
|
|
};
|
|
|
|
// Determine file path: User provided or Temp
|
|
let filename = if let Some(name) = custom_filename {
|
|
// Sanitize filename
|
|
let safe_name: String = name
|
|
.chars()
|
|
.map(|x| {
|
|
if x.is_alphanumeric() || x == ' ' || x == '-' || x == '_' {
|
|
x
|
|
} else {
|
|
'_'
|
|
}
|
|
})
|
|
.collect();
|
|
format!("{}.wav", safe_name)
|
|
} else {
|
|
format!(
|
|
"recording_{}.wav",
|
|
std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.unwrap()
|
|
.as_secs()
|
|
)
|
|
};
|
|
|
|
let file_path = if let Some(path) = save_path {
|
|
if path.trim().is_empty() {
|
|
std::env::temp_dir().join(&filename)
|
|
} else {
|
|
std::path::PathBuf::from(path).join(&filename)
|
|
}
|
|
} else {
|
|
std::env::temp_dir().join(&filename)
|
|
};
|
|
|
|
let file_path_str = file_path.to_string_lossy().to_string();
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!("Saving recording to: {}", file_path_str),
|
|
);
|
|
|
|
let writer = hound::WavWriter::create(&file_path, spec).map_err(|e| e.to_string())?;
|
|
let writer = Arc::new(Mutex::new(writer));
|
|
let writer_clone = writer.clone();
|
|
|
|
// Initialize AudioProcessor (VAD)
|
|
// We pass the writer to it.
|
|
let should_wait = wait_for_speech.unwrap_or(false);
|
|
if should_wait {
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!(
|
|
"Recording started in WAITING mode (Trigger: {}).",
|
|
if mode == "voice" {
|
|
"Speech"
|
|
} else {
|
|
"System Audio"
|
|
}
|
|
),
|
|
);
|
|
}
|
|
|
|
let processor = AudioProcessor::new(
|
|
config.sample_rate(),
|
|
config.channels(),
|
|
writer.clone(),
|
|
app.clone(),
|
|
should_wait,
|
|
mode,
|
|
)
|
|
.map_err(|e| format!("Failed to create AudioProcessor: {}", e))?;
|
|
|
|
// Wrap processor in Arc<Mutex> so we can share/move it into callback
|
|
// Actually, cpal callback takes ownership of its closure state usually if 'move'.
|
|
// Since stream is on another thread, we need Send. AudioProcessor should be Send.
|
|
// However, the callback is called repeatedly. We need to keep state.
|
|
// The workaround is to wrap it in a Mutex.
|
|
let processor = Arc::new(Mutex::new(processor));
|
|
let processor_clone = processor.clone();
|
|
|
|
// --- SYSTEM AUDIO CAPTURE START ---
|
|
// Prevent Doubling: If user selected an aggregate device (Hearbit Audio/BlackHole),
|
|
// it ALREADY contains system audio. In that case, we don't need internal SCK capture.
|
|
let is_aggregate = device_id.contains("Hearbit") || device_id.contains("BlackHole");
|
|
|
|
if is_aggregate {
|
|
emit_log(&app, "INFO", "Aggregate device detected. Disabling internal System Audio Capture to prevent doubling.");
|
|
} else {
|
|
let excluded = excluded_apps.unwrap_or_default();
|
|
let mut sys_capture = sc_audio::SystemAudioCapture::new(config.sample_rate(), excluded);
|
|
|
|
// Get the queue to share with the capture callback
|
|
let queue_clone = {
|
|
let p = processor.lock().unwrap();
|
|
p.system_queue.clone() // Access the pub field we added
|
|
};
|
|
|
|
let sys_callback = move |data: &[f32]| {
|
|
// Push to queue
|
|
if let Ok(mut q) = queue_clone.lock() {
|
|
q.extend(data.iter());
|
|
|
|
// Limit queue size to avoid memory leaks if main process loop is slow
|
|
while q.len() > 48000 * 5 {
|
|
// 5 seconds buffer
|
|
q.pop_front();
|
|
}
|
|
}
|
|
};
|
|
|
|
match sys_capture.start(sys_callback).await {
|
|
Ok(_) => emit_log(&app, "INFO", "System Audio Capture started."),
|
|
Err(e) => emit_log(
|
|
&app,
|
|
"WARN",
|
|
&format!("System Audio Capture failed (Permissions?): {}", e),
|
|
),
|
|
}
|
|
|
|
*state.system_capture.lock().await = Some(sys_capture);
|
|
|
|
// Spawn background task to periodically refresh the filter (handles apps opened after recording starts)
|
|
let app_handle = app.clone();
|
|
tauri::async_runtime::spawn(async move {
|
|
loop {
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
|
|
let state = app_handle.state::<AppState>();
|
|
let mut guard = state.system_capture.lock().await;
|
|
if let Some(sys) = guard.as_mut() {
|
|
// Try to refresh filter
|
|
if let Err(e) = sys.refresh_filter().await {
|
|
// Just log as debug/warn, not critical if it fails once
|
|
eprintln!("Failed to refresh audio filter: {}", e);
|
|
}
|
|
} else {
|
|
// Recording stopped
|
|
break;
|
|
}
|
|
}
|
|
});
|
|
}
|
|
// --- SYSTEM AUDIO CAPTURE END ---
|
|
|
|
let app_handle = app.clone();
|
|
let err_fn = move |err| {
|
|
eprintln!("an error occurred on stream: {}", err);
|
|
emit_log(&app_handle, "ERROR", &format!("Stream error: {}", err));
|
|
};
|
|
|
|
let stream = match config.sample_format() {
|
|
cpal::SampleFormat::F32 => device.build_input_stream(
|
|
&config.into(),
|
|
move |data: &[f32], _: &_| {
|
|
if let Ok(mut p) = processor_clone.lock() {
|
|
p.process(data);
|
|
}
|
|
},
|
|
err_fn,
|
|
None,
|
|
),
|
|
// For I16 and U16 we need to convert to F32 for our processor
|
|
cpal::SampleFormat::I16 => device.build_input_stream(
|
|
&config.into(),
|
|
move |data: &[i16], _: &_| {
|
|
// Convert i16 to f32
|
|
let f32_data: Vec<f32> = data.iter().map(|&s| s as f32 / i16::MAX as f32).collect();
|
|
if let Ok(mut p) = processor_clone.lock() {
|
|
p.process(&f32_data);
|
|
}
|
|
},
|
|
err_fn,
|
|
None,
|
|
),
|
|
cpal::SampleFormat::U16 => device.build_input_stream(
|
|
&config.into(),
|
|
move |data: &[u16], _: &_| {
|
|
// Convert u16 to f32
|
|
let f32_data: Vec<f32> = data
|
|
.iter()
|
|
.map(|&s| (s as i32 - 32768) as f32 / 32768.0)
|
|
.collect();
|
|
if let Ok(mut p) = processor_clone.lock() {
|
|
p.process(&f32_data);
|
|
}
|
|
},
|
|
err_fn,
|
|
None,
|
|
),
|
|
_ => return Err("Unsupported sample format".to_string()),
|
|
}
|
|
.map_err(|e| e.to_string())?;
|
|
|
|
stream.play().map_err(|e| e.to_string())?;
|
|
|
|
// Store state
|
|
*state.recording_stream.lock().unwrap() = Some(stream);
|
|
*state.recording_file_path.lock().unwrap() = Some(file_path_str.clone());
|
|
|
|
emit_log(
|
|
&app,
|
|
"SUCCESS",
|
|
&format!("Recording started. File: {}", file_path_str),
|
|
);
|
|
Ok(())
|
|
}
|
|
|
|
#[tauri::command]
|
|
async fn stop_recording(app: AppHandle, state: State<'_, AppState>) -> Result<String, String> {
|
|
emit_log(&app, "INFO", "Stopping recording...");
|
|
// Drop stream to stop recording
|
|
{
|
|
// Also stop System Capture
|
|
let mut sys_guard = state.system_capture.lock().await;
|
|
if let Some(mut sys) = sys_guard.take() {
|
|
sys.stop();
|
|
}
|
|
|
|
let mut stream_guard = state.recording_stream.lock().unwrap();
|
|
|
|
if stream_guard.is_none() {
|
|
return Err("Not recording".to_string());
|
|
}
|
|
*stream_guard = None; // This drops the stream and stops recording
|
|
}
|
|
|
|
// Return file path
|
|
let mut path_guard = state.recording_file_path.lock().unwrap();
|
|
let path = path_guard
|
|
.take()
|
|
.ok_or("No recording path found".to_string())?;
|
|
emit_log(
|
|
&app,
|
|
"SUCCESS",
|
|
&format!("Recording stopped. Saved to: {}", path),
|
|
);
|
|
Ok(path)
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn pause_recording(app: AppHandle, state: State<'_, AppState>) -> Result<(), String> {
|
|
emit_log(&app, "INFO", "Pausing recording...");
|
|
let stream_guard = state.recording_stream.lock().unwrap();
|
|
if let Some(stream) = stream_guard.as_ref() {
|
|
stream.pause().map_err(|e| e.to_string())?;
|
|
emit_log(&app, "SUCCESS", "Recording paused.");
|
|
Ok(())
|
|
} else {
|
|
Err("Not recording".to_string())
|
|
}
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn resume_recording(app: AppHandle, state: State<'_, AppState>) -> Result<(), String> {
|
|
emit_log(&app, "INFO", "Resuming recording...");
|
|
let stream_guard = state.recording_stream.lock().unwrap();
|
|
if let Some(stream) = stream_guard.as_ref() {
|
|
stream.play().map_err(|e| e.to_string())?;
|
|
emit_log(&app, "SUCCESS", "Recording resumed.");
|
|
Ok(())
|
|
} else {
|
|
Err("Not recording".to_string())
|
|
}
|
|
}
|
|
|
|
#[derive(serde::Deserialize)]
|
|
struct ModelListResponse {
|
|
data: Vec<ModelData>,
|
|
}
|
|
|
|
#[derive(serde::Deserialize)]
|
|
struct ModelData {
|
|
id: String,
|
|
#[allow(dead_code)]
|
|
owned_by: Option<String>,
|
|
}
|
|
|
|
// Structs for Infomaniak API responses
|
|
#[derive(serde::Deserialize)]
|
|
struct WhisperResponse {
|
|
text: Option<String>,
|
|
batch_id: Option<String>,
|
|
}
|
|
|
|
#[derive(serde::Deserialize)]
|
|
struct ChatCompletionResponse {
|
|
choices: Vec<Choice>,
|
|
}
|
|
#[derive(serde::Deserialize)]
|
|
struct Choice {
|
|
message: Message,
|
|
}
|
|
#[derive(serde::Deserialize)]
|
|
struct Message {
|
|
#[allow(dead_code)]
|
|
content: String,
|
|
}
|
|
|
|
#[derive(serde::Serialize)]
|
|
struct ModelInfo {
|
|
id: String,
|
|
name: String,
|
|
}
|
|
|
|
#[tauri::command]
|
|
async fn get_available_models(
|
|
app: AppHandle,
|
|
api_key: String,
|
|
product_id: String,
|
|
) -> Result<Vec<ModelInfo>, String> {
|
|
emit_log(&app, "INFO", "Fetching available models from Infomaniak...");
|
|
let client = reqwest::Client::new();
|
|
// Use the v2/openai compliant endpoint as per docs
|
|
let url = format!(
|
|
"https://api.infomaniak.com/2/ai/{}/openai/v1/models",
|
|
product_id
|
|
);
|
|
|
|
emit_log(&app, "DEBUG", &format!("GET {}", url));
|
|
|
|
let res = client
|
|
.get(&url)
|
|
.header("Authorization", format!("Bearer {}", api_key))
|
|
.send()
|
|
.await
|
|
.map_err(|e| {
|
|
let msg = format!("Network error fetching models: {}", e);
|
|
emit_log(&app, "ERROR", &msg);
|
|
msg
|
|
})?;
|
|
|
|
if res.status().is_success() {
|
|
let raw_body = res.text().await.map_err(|e| e.to_string())?;
|
|
// println!("Models Raw Response: {}", raw_body);
|
|
let list: ModelListResponse = serde_json::from_str(&raw_body)
|
|
.map_err(|e| format!("Failed to parse models: {}. Body: {}", e, raw_body))?;
|
|
|
|
let models = list
|
|
.data
|
|
.into_iter()
|
|
.filter(|m| {
|
|
!m.id.to_lowercase().contains("mini_lm")
|
|
&& !m.id.to_lowercase().contains("bert")
|
|
&& !m.id.to_lowercase().contains("embedding")
|
|
})
|
|
.map(|m| ModelInfo {
|
|
id: m.id.clone(),
|
|
name: m.id, // Use ID as name for now, or fetch more details if available
|
|
})
|
|
.collect::<Vec<ModelInfo>>();
|
|
|
|
emit_log(&app, "SUCCESS", &format!("Loaded {} models.", models.len()));
|
|
Ok(models)
|
|
} else {
|
|
let err = res.text().await.unwrap_or_default();
|
|
emit_log(&app, "ERROR", &format!("Failed to fetch models: {}", err));
|
|
Err(format!("Failed to fetch models: {}", err))
|
|
}
|
|
}
|
|
|
|
#[derive(serde::Deserialize)]
|
|
struct WhisperVerboseResponse {
|
|
text: Option<String>,
|
|
segments: Option<Vec<Segment>>,
|
|
}
|
|
|
|
#[derive(serde::Deserialize)]
|
|
struct Segment {
|
|
start: f64,
|
|
end: f64,
|
|
text: String,
|
|
}
|
|
|
|
#[tauri::command]
|
|
async fn transcribe_audio(
|
|
app: AppHandle,
|
|
file_path: String,
|
|
api_key: String,
|
|
product_id: String,
|
|
) -> Result<String, String> {
|
|
emit_log(&app, "INFO", "Starting transcription with timestamps...");
|
|
let client = reqwest::Client::new();
|
|
|
|
// Prepare file part
|
|
let file_bytes = std::fs::read(&file_path).map_err(|e| e.to_string())?;
|
|
// We must use a known file name for the part, Infomaniak might care, or not.
|
|
let file_part = reqwest::multipart::Part::bytes(file_bytes)
|
|
.file_name("recording.wav")
|
|
.mime_str("audio/wav")
|
|
.map_err(|e| e.to_string())?;
|
|
|
|
let form = reqwest::multipart::Form::new()
|
|
.part("file", file_part)
|
|
.text("model", "whisper")
|
|
.text("response_format", "verbose_json")
|
|
.text("timestamp_granularities[]", "segment"); // Crucial for accurate segments
|
|
|
|
let url = format!(
|
|
"https://api.infomaniak.com/1/ai/{}/openai/audio/transcriptions",
|
|
product_id
|
|
);
|
|
|
|
emit_log(&app, "DEBUG", &format!("POST {}", url));
|
|
|
|
let res = client
|
|
.post(&url)
|
|
.header("Authorization", format!("Bearer {}", api_key))
|
|
.multipart(form)
|
|
.send()
|
|
.await
|
|
.map_err(|e| {
|
|
let msg = format!("Network error during transcription: {}", e);
|
|
emit_log(&app, "ERROR", &msg);
|
|
msg
|
|
})?;
|
|
|
|
if res.status().is_success() {
|
|
let raw_body = res.text().await.map_err(|e| e.to_string())?;
|
|
|
|
// Check if we got a batch ID
|
|
#[derive(serde::Deserialize)]
|
|
struct BatchResponse {
|
|
batch_id: Option<String>,
|
|
}
|
|
|
|
// Try parsing as batch response first (Infomaniak specific behavior)
|
|
if let Ok(batch_res) = serde_json::from_str::<BatchResponse>(&raw_body) {
|
|
if let Some(batch_id) = batch_res.batch_id {
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!("Transcription queued. Batch ID: {}", batch_id),
|
|
);
|
|
return poll_transcription(&app, &client, &api_key, &product_id, &batch_id).await;
|
|
}
|
|
}
|
|
|
|
// If not batch, try parsing verbose response directly
|
|
// Log the raw body so we can see why it fails
|
|
emit_log(
|
|
&app,
|
|
"DEBUG",
|
|
&format!("Direct Response (first 500 chars): {:.500}", raw_body),
|
|
);
|
|
|
|
let response: WhisperVerboseResponse = serde_json::from_str(&raw_body)
|
|
.map_err(|e| format!("Failed to decode JSON: {}. Body: {}", e, raw_body))?;
|
|
|
|
if let Some(segments) = response.segments {
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!("Found {} segments (Direct).", segments.len()),
|
|
);
|
|
for (i, seg) in segments.iter().take(3).enumerate() {
|
|
emit_log(&app, "DEBUG", &format!("Seg {}: start={}", i, seg.start));
|
|
}
|
|
|
|
// Format timestamps: [MM:SS] Text
|
|
let mut formatted_transcript = String::new();
|
|
for segment in segments {
|
|
let start_mins = (segment.start / 60.0).floor() as u64;
|
|
let start_secs = (segment.start % 60.0).floor() as u64;
|
|
formatted_transcript.push_str(&format!(
|
|
"[{:02}:{:02}] {}\n",
|
|
start_mins,
|
|
start_secs,
|
|
segment.text.trim()
|
|
));
|
|
}
|
|
|
|
// Fallback to raw text if segments empty
|
|
if formatted_transcript.trim().is_empty() {
|
|
if let Some(text) = response.text {
|
|
emit_log(&app, "SUCCESS", "Segments missing, using raw text.");
|
|
return Ok(text);
|
|
}
|
|
} else {
|
|
emit_log(&app, "SUCCESS", "Transcription received with timestamps.");
|
|
return Ok(formatted_transcript);
|
|
}
|
|
} else if let Some(text) = response.text {
|
|
emit_log(&app, "SUCCESS", "Segments missing, using raw text.");
|
|
return Ok(text);
|
|
}
|
|
|
|
emit_log(&app, "ERROR", "Response contained no recognized content.");
|
|
Err(format!(
|
|
"Response contained no recognized content. Body: {}",
|
|
raw_body
|
|
))
|
|
} else {
|
|
let error_text = res.text().await.unwrap_or_default();
|
|
emit_log(
|
|
&app,
|
|
"ERROR",
|
|
&format!("Transcription failed: {}", error_text),
|
|
);
|
|
Err(format!("Transcription failed: {}", error_text))
|
|
}
|
|
}
|
|
|
|
async fn poll_transcription(
|
|
app: &AppHandle,
|
|
client: &reqwest::Client,
|
|
api_key: &str,
|
|
product_id: &str,
|
|
batch_id: &str,
|
|
) -> Result<String, String> {
|
|
let status_url = format!(
|
|
"https://api.infomaniak.com/1/ai/{}/results/{}",
|
|
product_id, batch_id
|
|
);
|
|
|
|
let mut attempts = 0;
|
|
while attempts < 40 {
|
|
// 40 * 2s = 80s timeout
|
|
attempts += 1;
|
|
sleep(Duration::from_secs(2)).await;
|
|
|
|
emit_log(
|
|
app,
|
|
"DEBUG",
|
|
&format!("Polling status... Attempt {}", attempts),
|
|
);
|
|
let res = client
|
|
.get(&status_url)
|
|
.header("Authorization", format!("Bearer {}", api_key))
|
|
.send()
|
|
.await
|
|
.map_err(|e| format!("Polling error: {}", e))?;
|
|
|
|
if res.status().is_success() {
|
|
let json: serde_json::Value = res.json().await.map_err(|e| e.to_string())?;
|
|
// Check 'status'
|
|
if let Some(status) = json.get("status").and_then(|s| s.as_str()) {
|
|
if status == "success" {
|
|
// Download the result
|
|
let download_url = format!(
|
|
"https://api.infomaniak.com/1/ai/{}/results/{}/download",
|
|
product_id, batch_id
|
|
);
|
|
let dl_res = client
|
|
.get(&download_url)
|
|
.header("Authorization", format!("Bearer {}", api_key))
|
|
.send()
|
|
.await
|
|
.map_err(|e| e.to_string())?;
|
|
|
|
if dl_res.status().is_success() {
|
|
let content = dl_res.text().await.map_err(|e| e.to_string())?;
|
|
emit_log(
|
|
app,
|
|
"DEBUG",
|
|
&format!("Poll Raw Content (first 500 chars): {:.500}", content),
|
|
);
|
|
|
|
// Try to parse as Verbose JSON to get timestamps
|
|
if let Ok(response) =
|
|
serde_json::from_str::<WhisperVerboseResponse>(&content)
|
|
{
|
|
if let Some(segments) = response.segments {
|
|
emit_log(
|
|
app,
|
|
"INFO",
|
|
&format!("Found {} segments.", segments.len()),
|
|
);
|
|
// Log first 3 segments start times
|
|
for (i, seg) in segments.iter().take(3).enumerate() {
|
|
emit_log(
|
|
app,
|
|
"DEBUG",
|
|
&format!("Seg {}: start={}", i, seg.start),
|
|
);
|
|
}
|
|
|
|
let mut formatted_transcript = String::new();
|
|
for segment in segments {
|
|
let start_mins = (segment.start / 60.0).floor() as u64;
|
|
let start_secs = (segment.start % 60.0).floor() as u64;
|
|
formatted_transcript.push_str(&format!(
|
|
"[{:02}:{:02}] {}\n",
|
|
start_mins,
|
|
start_secs,
|
|
segment.text.trim()
|
|
));
|
|
}
|
|
if !formatted_transcript.trim().is_empty() {
|
|
emit_log(
|
|
app,
|
|
"SUCCESS",
|
|
"Transcription completed (async) with timestamps.",
|
|
);
|
|
return Ok(formatted_transcript);
|
|
} else {
|
|
emit_log(app, "WARN", "Segments found but empty content.");
|
|
}
|
|
} else {
|
|
emit_log(app, "WARN", "Verbose parsed but no segments found.");
|
|
}
|
|
|
|
if let Some(text) = response.text {
|
|
emit_log(app, "SUCCESS", "Transcription completed (async) - raw text (segments missing).");
|
|
return Ok(text);
|
|
}
|
|
} else {
|
|
emit_log(
|
|
app,
|
|
"WARN",
|
|
"Failed to parse poll content as WhisperVerboseResponse",
|
|
);
|
|
}
|
|
|
|
emit_log(
|
|
app,
|
|
"SUCCESS",
|
|
"Transcription completed - returning raw content.",
|
|
);
|
|
// If not JSON or no text field, return raw content
|
|
return Ok(content);
|
|
} else {
|
|
emit_log(app, "ERROR", "Failed to download transcription results.");
|
|
return Err(format!("Download failed: {}", dl_res.status()));
|
|
}
|
|
} else if status == "failed" || status == "error" {
|
|
let err_msg = format!(
|
|
"Batch processing failed [Status: {}]. Full Response: {:?}",
|
|
status, json
|
|
);
|
|
emit_log(app, "ERROR", &err_msg);
|
|
return Err(err_msg);
|
|
}
|
|
// If 'processing' or 'pending', continue loop
|
|
}
|
|
}
|
|
}
|
|
emit_log(app, "ERROR", "Transcription timed out after 80s.");
|
|
Err("Transcription timed out".to_string())
|
|
}
|
|
|
|
#[tauri::command]
|
|
async fn summarize_text(
|
|
app: AppHandle,
|
|
text: String,
|
|
api_key: String,
|
|
product_id: String,
|
|
prompt: String,
|
|
model: String,
|
|
) -> Result<String, String> {
|
|
emit_log(&app, "INFO", "Starting summarization...");
|
|
let client = reqwest::Client::new();
|
|
let url = format!(
|
|
"https://api.infomaniak.com/2/ai/{}/openai/v1/chat/completions",
|
|
product_id
|
|
);
|
|
|
|
let messages = serde_json::json!([
|
|
{ "role": "system", "content": prompt },
|
|
{ "role": "user", "content": text }
|
|
]);
|
|
|
|
let model_to_use = if model.is_empty() {
|
|
"mixtral".to_string()
|
|
} else {
|
|
model
|
|
};
|
|
|
|
let body = serde_json::json!({
|
|
"model": model_to_use,
|
|
"messages": messages
|
|
});
|
|
|
|
emit_log(&app, "DEBUG", &format!("POST {}", url));
|
|
|
|
let res = client
|
|
.post(&url)
|
|
.header("Authorization", format!("Bearer {}", api_key))
|
|
.header("Content-Type", "application/json")
|
|
.json(&body)
|
|
.send()
|
|
.await
|
|
.map_err(|e| {
|
|
let msg = format!("Network error during summarization: {}", e);
|
|
emit_log(&app, "ERROR", &msg);
|
|
msg
|
|
})?;
|
|
|
|
if res.status().is_success() {
|
|
let raw_body = res.text().await.map_err(|e| e.to_string())?;
|
|
// println!("Summarization Raw Response: {}", raw_body);
|
|
|
|
let response_body: ChatCompletionResponse = serde_json::from_str(&raw_body)
|
|
.map_err(|e| format!("Failed to decode JSON: {}. Body: {}", e, raw_body))?;
|
|
|
|
if let Some(choice) = response_body.choices.first() {
|
|
emit_log(&app, "SUCCESS", "Summarization received.");
|
|
Ok(choice.message.content.clone())
|
|
} else {
|
|
emit_log(&app, "WARN", "No summary generated in response.");
|
|
Err("No summary generated".to_string())
|
|
}
|
|
} else {
|
|
let error_text = res.text().await.unwrap_or_default();
|
|
emit_log(
|
|
&app,
|
|
"ERROR",
|
|
&format!("Summarization failed: {}", error_text),
|
|
);
|
|
Err(format!("Summarization failed: {}", error_text))
|
|
}
|
|
}
|
|
|
|
#[derive(serde::Serialize)]
|
|
struct AudioMetadata {
|
|
duration: f64,
|
|
size: u64,
|
|
format: String,
|
|
}
|
|
|
|
// Helper to find ffmpeg/ffprobe in common paths
|
|
fn resolve_binary_path(binary_name: &str) -> String {
|
|
let common_paths = [
|
|
format!("/opt/homebrew/bin/{}", binary_name),
|
|
format!("/usr/local/bin/{}", binary_name),
|
|
format!("/usr/bin/{}", binary_name),
|
|
];
|
|
|
|
for path in common_paths.iter() {
|
|
if std::path::Path::new(path).exists() {
|
|
return path.clone();
|
|
}
|
|
}
|
|
|
|
// Fallback to expecting it in PATH
|
|
binary_name.to_string()
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn get_audio_metadata(app: AppHandle, file_path: String) -> Result<AudioMetadata, String> {
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!("Getting metadata for: {}", file_path),
|
|
);
|
|
|
|
let path = std::path::Path::new(&file_path);
|
|
if !path.exists() {
|
|
return Err(format!("File not found: {}", file_path));
|
|
}
|
|
|
|
let size = std::fs::metadata(&file_path)
|
|
.map_err(|e| e.to_string())?
|
|
.len();
|
|
|
|
// Use ffprobe to get duration
|
|
// Try resolved path first
|
|
let ffprobe_cmd = resolve_binary_path("ffprobe");
|
|
|
|
let output = Command::new(&ffprobe_cmd)
|
|
.args([
|
|
"-v",
|
|
"error",
|
|
"-show_entries",
|
|
"format=duration",
|
|
"-of",
|
|
"default=noprint_wrappers=1:nokey=1",
|
|
&file_path,
|
|
])
|
|
.output()
|
|
.map_err(|e| format!("Failed to execute ffprobe at '{}': {}", ffprobe_cmd, e))?;
|
|
|
|
let duration_str = String::from_utf8_lossy(&output.stdout);
|
|
let duration: f64 = duration_str.trim().parse().unwrap_or(0.0);
|
|
|
|
// Extension as format
|
|
let format = path
|
|
.extension()
|
|
.and_then(|e| e.to_str())
|
|
.unwrap_or("unknown")
|
|
.to_string();
|
|
|
|
Ok(AudioMetadata {
|
|
duration,
|
|
size,
|
|
format,
|
|
})
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn convert_to_mp3(app: AppHandle, wav_path: String) -> Result<String, String> {
|
|
emit_log(&app, "INFO", &format!("Converting to MP3: {}", wav_path));
|
|
|
|
let mp3_path = wav_path.replace(".wav", ".mp3");
|
|
let ffmpeg_cmd = resolve_binary_path("ffmpeg");
|
|
|
|
let output = Command::new(&ffmpeg_cmd)
|
|
.args([
|
|
"-i",
|
|
&wav_path,
|
|
"-codec:a",
|
|
"libmp3lame",
|
|
"-b:a",
|
|
"64k",
|
|
"-y", // overwrite
|
|
&mp3_path,
|
|
])
|
|
.output()
|
|
.map_err(|e| format!("Failed to execute ffmpeg at '{}': {}", ffmpeg_cmd, e))?;
|
|
|
|
if output.status.success() {
|
|
emit_log(&app, "SUCCESS", &format!("MP3 created: {}", mp3_path));
|
|
Ok(mp3_path)
|
|
} else {
|
|
let error = String::from_utf8_lossy(&output.stderr);
|
|
emit_log(&app, "ERROR", &format!("MP3 conversion failed: {}", error));
|
|
Err(format!("MP3 conversion failed: {}", error))
|
|
}
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn chunk_audio(
|
|
app: AppHandle,
|
|
file_path: String,
|
|
chunk_minutes: u32,
|
|
) -> Result<Vec<String>, String> {
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!(
|
|
"Chunking audio: {} ({}min chunks)",
|
|
file_path, chunk_minutes
|
|
),
|
|
);
|
|
|
|
let chunk_seconds = chunk_minutes * 60;
|
|
let ffprobe_cmd = resolve_binary_path("ffprobe");
|
|
let ffmpeg_cmd = resolve_binary_path("ffmpeg");
|
|
|
|
// Get total duration using ffprobe
|
|
let duration_output = Command::new(&ffprobe_cmd)
|
|
.args([
|
|
"-v",
|
|
"error",
|
|
"-show_entries",
|
|
"format=duration",
|
|
"-of",
|
|
"default=noprint_wrappers=1:nokey=1",
|
|
&file_path,
|
|
])
|
|
.output()
|
|
.map_err(|e| format!("Failed to get duration with '{}': {}", ffprobe_cmd, e))?;
|
|
|
|
let duration_str = String::from_utf8_lossy(&duration_output.stdout);
|
|
let duration: f64 = duration_str
|
|
.trim()
|
|
.parse()
|
|
.map_err(|_| "Failed to parse duration".to_string())?;
|
|
|
|
let num_chunks = (duration / chunk_seconds as f64).ceil() as usize;
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!(
|
|
"Total duration: {}s, creating {} chunks",
|
|
duration, num_chunks
|
|
),
|
|
);
|
|
|
|
let mut chunk_paths = Vec::new();
|
|
let base_path = file_path.replace(".mp3", "");
|
|
|
|
for i in 0..num_chunks {
|
|
let start_time = i as u32 * chunk_seconds;
|
|
let chunk_path = format!("{}_chunk_{}.mp3", base_path, i);
|
|
|
|
let output = Command::new(&ffmpeg_cmd)
|
|
.args([
|
|
"-i",
|
|
&file_path,
|
|
"-ss",
|
|
&start_time.to_string(),
|
|
"-t",
|
|
&chunk_seconds.to_string(),
|
|
"-c",
|
|
"copy",
|
|
"-y",
|
|
&chunk_path,
|
|
])
|
|
.output()
|
|
.map_err(|e| format!("Failed to create chunk {} with '{}': {}", i, ffmpeg_cmd, e))?;
|
|
|
|
if !output.status.success() {
|
|
let error = String::from_utf8_lossy(&output.stderr);
|
|
return Err(format!("Chunk {} failed: {}", i, error));
|
|
}
|
|
|
|
chunk_paths.push(chunk_path);
|
|
}
|
|
|
|
emit_log(
|
|
&app,
|
|
"SUCCESS",
|
|
&format!("Created {} chunks", chunk_paths.len()),
|
|
);
|
|
Ok(chunk_paths)
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn read_image_as_base64(app: AppHandle, file_path: String) -> Result<String, String> {
|
|
emit_log(
|
|
&app,
|
|
"INFO",
|
|
&format!("Reading image as base64: {}", file_path),
|
|
);
|
|
|
|
let bytes = std::fs::read(&file_path).map_err(|e| format!("Failed to read file: {}", e))?;
|
|
|
|
// Detect image type from extension
|
|
let extension = std::path::Path::new(&file_path)
|
|
.extension()
|
|
.and_then(|e| e.to_str())
|
|
.unwrap_or("png")
|
|
.to_lowercase();
|
|
|
|
let mime_type = match extension.as_str() {
|
|
"jpg" | "jpeg" => "image/jpeg",
|
|
"png" => "image/png",
|
|
"svg" => "image/svg+xml",
|
|
"gif" => "image/gif",
|
|
_ => "image/png",
|
|
};
|
|
|
|
// Use base64 encoding
|
|
let base64_str = base64::prelude::BASE64_STANDARD.encode(&bytes);
|
|
let data_url = format!("data:{};base64,{}", mime_type, base64_str);
|
|
|
|
emit_log(
|
|
&app,
|
|
"SUCCESS",
|
|
&format!("Image converted to base64 ({} bytes)", base64_str.len()),
|
|
);
|
|
Ok(data_url)
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn get_running_apps(app: AppHandle) -> Result<Vec<apps::RunningApp>, String> {
|
|
emit_log(&app, "INFO", "Fetching running applications...");
|
|
Ok(apps::get_running_applications())
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn open_audio_midi_setup() -> Result<(), String> {
|
|
Command::new("open")
|
|
.arg("-a")
|
|
.arg("Audio MIDI Setup")
|
|
.spawn()
|
|
.map_err(|e| e.to_string())?;
|
|
Ok(())
|
|
}
|
|
|
|
#[tauri::command]
|
|
fn create_hearbit_audio_device(app: AppHandle) -> Result<String, String> {
|
|
emit_log(&app, "INFO", "Attempting to create Hearbit Audio device...");
|
|
|
|
// Resolve resource path
|
|
let resource_path = app
|
|
.path()
|
|
.resource_dir()
|
|
.map_err(|e| e.to_string())?
|
|
.join("resources/create_hearbit_audio.swift");
|
|
|
|
if !resource_path.exists() {
|
|
// Fallback for dev environment where resources might not be bundled yet or different path
|
|
emit_log(
|
|
&app,
|
|
"WARN",
|
|
&format!(
|
|
"Resource script not found at {:?}. Trying local src-tauri path.",
|
|
resource_path
|
|
),
|
|
);
|
|
}
|
|
|
|
// For now, in dev mode, we might need to point to the source location if bundle isn't active
|
|
// But let's try running it.
|
|
|
|
let output = Command::new("swift")
|
|
.arg(resource_path)
|
|
.output()
|
|
.map_err(|e| e.to_string())?;
|
|
|
|
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
|
|
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
|
|
|
|
emit_log(&app, "DEBUG", &format!("Script Output: {}", stdout));
|
|
if !stderr.is_empty() {
|
|
emit_log(&app, "WARN", &format!("Script Stderr: {}", stderr));
|
|
}
|
|
|
|
if output.status.success() {
|
|
emit_log(
|
|
&app,
|
|
"SUCCESS",
|
|
"Hearbit Audio device created successfully.",
|
|
);
|
|
Ok("Device created successfully".to_string())
|
|
} else {
|
|
emit_log(&app, "ERROR", "Failed to create device.");
|
|
Err(format!("Failed to create device: {} {}", stdout, stderr))
|
|
}
|
|
}
|
|
|
|
#[tauri::command]
|
|
async fn check_screen_recording_permission() -> bool {
|
|
sc_audio::check_permissions().await
|
|
}
|
|
|
|
#[tauri::command]
|
|
async fn save_text_file(app: AppHandle, path: String, content: String) -> Result<(), String> {
|
|
emit_log(&app, "INFO", &format!("Saving text file to: {}", path));
|
|
match std::fs::write(&path, content) {
|
|
Ok(_) => {
|
|
emit_log(&app, "SUCCESS", "File saved successfully.");
|
|
Ok(())
|
|
}
|
|
Err(e) => {
|
|
emit_log(&app, "ERROR", &format!("Failed to save file: {}", e));
|
|
Err(e.to_string())
|
|
}
|
|
}
|
|
}
|
|
|
|
#[tauri::command]
|
|
async fn read_log_file(app: AppHandle) -> Result<String, String> {
|
|
let log_path = app
|
|
.path()
|
|
.app_log_dir()
|
|
.map_err(|e| e.to_string())?
|
|
.join("hearbit-ai.log");
|
|
if log_path.exists() {
|
|
let content = std::fs::read_to_string(&log_path).map_err(|e| e.to_string())?;
|
|
Ok(content)
|
|
} else {
|
|
Ok("No log file found yet.".to_string())
|
|
}
|
|
}
|
|
|
|
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
|
pub fn run() {
|
|
tauri::Builder::default()
|
|
.setup(|app| {
|
|
// Setup Tray Icon
|
|
let quit_i =
|
|
MenuItem::with_id(app, "quit", "Quit Hearbit AI", true, None::<&str>).unwrap();
|
|
let show_i = MenuItem::with_id(app, "show", "Show Window", true, None::<&str>).unwrap();
|
|
let menu = Menu::with_items(app, &[&show_i, &quit_i]).unwrap();
|
|
|
|
let _tray = TrayIconBuilder::new()
|
|
.icon(app.default_window_icon().unwrap().clone())
|
|
.menu(&menu)
|
|
.show_menu_on_left_click(true)
|
|
.on_menu_event(|app, event| match event.id.as_ref() {
|
|
"quit" => app.exit(0),
|
|
"show" => {
|
|
if let Some(window) = app.get_webview_window("main") {
|
|
let _ = window.show();
|
|
let _ = window.set_focus();
|
|
}
|
|
}
|
|
_ => {}
|
|
})
|
|
.on_tray_icon_event(|tray, event| {
|
|
if let TrayIconEvent::Click { .. } = event {
|
|
let app = tray.app_handle();
|
|
if let Some(window) = app.get_webview_window("main") {
|
|
let _ = window.show();
|
|
let _ = window.set_focus();
|
|
}
|
|
}
|
|
})
|
|
.build(app)?;
|
|
|
|
Ok(())
|
|
})
|
|
.on_window_event(|window, event| {
|
|
if let WindowEvent::CloseRequested { api, .. } = event {
|
|
// Prevent window from closing, just hide it
|
|
window.hide().unwrap();
|
|
api.prevent_close();
|
|
}
|
|
})
|
|
.plugin(tauri_plugin_shell::init())
|
|
.plugin(
|
|
tauri_plugin_log::Builder::default()
|
|
.targets([
|
|
tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::Stdout),
|
|
tauri_plugin_log::Target::new(tauri_plugin_log::TargetKind::LogDir {
|
|
file_name: Some("hearbit-ai.log".to_string()),
|
|
}),
|
|
])
|
|
.build(),
|
|
)
|
|
.plugin(tauri_plugin_opener::init())
|
|
.plugin(tauri_plugin_dialog::init())
|
|
.plugin(tauri_plugin_fs::init())
|
|
.plugin(tauri_plugin_oauth::init())
|
|
.manage(AppState {
|
|
recording_stream: Mutex::new(None),
|
|
recording_file_path: Mutex::new(None),
|
|
system_capture: tokio::sync::Mutex::new(None),
|
|
})
|
|
.invoke_handler(tauri::generate_handler![
|
|
greet,
|
|
get_input_devices,
|
|
start_recording,
|
|
stop_recording,
|
|
pause_recording,
|
|
resume_recording,
|
|
transcribe_audio,
|
|
summarize_text,
|
|
get_available_models,
|
|
open_audio_midi_setup,
|
|
create_hearbit_audio_device,
|
|
check_screen_recording_permission,
|
|
auth::start_auth_flow,
|
|
auth::get_calendar_events,
|
|
save_text_file,
|
|
read_log_file,
|
|
get_audio_metadata,
|
|
convert_to_mp3,
|
|
chunk_audio,
|
|
read_image_as_base64,
|
|
email::send_smtp_email,
|
|
get_running_apps
|
|
])
|
|
.build(tauri::generate_context!())
|
|
.expect("error while building tauri application")
|
|
.run(|app_handle, event| match event {
|
|
tauri::RunEvent::Reopen { .. } => {
|
|
if let Some(window) = app_handle.get_webview_window("main") {
|
|
let _ = window.show();
|
|
let _ = window.set_focus();
|
|
}
|
|
}
|
|
_ => {}
|
|
});
|
|
}
|