Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 129 additions & 62 deletions crates/kit/src/libvirt/ssh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@ use std::io::Write;
use std::os::unix::fs::PermissionsExt as _;
use std::os::unix::process::CommandExt;
use std::process::Command;
use std::time::{Duration, Instant};
use tempfile;
use tracing::debug;

// SSH retry configuration
const SSH_RETRY_TIMEOUT_SECS: u64 = 60; // Total time to retry SSH connections
const SSH_POLL_DELAY_SECS: u64 = 2; // Delay between SSH attempts

/// Configuration options for SSH connection to libvirt domain
#[derive(Debug, Parser)]
pub struct LibvirtSshOpts {
Expand All @@ -36,7 +41,7 @@ pub struct LibvirtSshOpts {
pub strict_host_keys: bool,

/// SSH connection timeout in seconds
#[clap(long, default_value = "30")]
#[clap(long, default_value = "5")]
pub timeout: u32,

/// SSH log level
Expand Down Expand Up @@ -236,8 +241,39 @@ impl LibvirtSshOpts {
Ok(temp_key)
}

/// Execute SSH connection to domain
fn connect_ssh(&self, ssh_config: &DomainSshConfig) -> Result<()> {
/// Build SSH command with configured options
fn build_ssh_command(
&self,
ssh_config: &DomainSshConfig,
temp_key: &tempfile::NamedTempFile,
parsed_extra_options: Vec<(String, String)>,
) -> Command {
let mut ssh_cmd = Command::new("ssh");
ssh_cmd
.arg("-i")
.arg(temp_key.path())
.arg("-p")
.arg(ssh_config.ssh_port.to_string());

let common_opts = crate::ssh::CommonSshOptions {
strict_host_keys: self.strict_host_keys,
connect_timeout: self.timeout,
server_alive_interval: 60,
log_level: self.log_level.clone(),
extra_options: parsed_extra_options,
};
common_opts.apply_to_command(&mut ssh_cmd);
ssh_cmd.arg(format!("{}@127.0.0.1", self.user));

ssh_cmd
}

/// Execute SSH connection to domain with retries
fn connect_ssh(
&self,
_global_opts: &crate::libvirt::LibvirtOptions,
ssh_config: &DomainSshConfig,
) -> Result<()> {
debug!(
"Connecting to domain '{}' via SSH on port {} (user: {})",
self.domain_name, ssh_config.ssh_port, self.user
Expand All @@ -250,17 +286,7 @@ impl LibvirtSshOpts {
// Create temporary SSH key file
let temp_key = self.create_temp_ssh_key(ssh_config)?;

// Build SSH command
let mut ssh_cmd = Command::new("ssh");

// Add SSH key and port
ssh_cmd
.arg("-i")
.arg(temp_key.path())
.arg("-p")
.arg(ssh_config.ssh_port.to_string());

// Parse extra options from key=value format
// Parse extra options
let mut parsed_extra_options = Vec::new();
for option in &self.extra_options {
if let Some((key, value)) = option.split_once('=') {
Expand All @@ -273,76 +299,117 @@ impl LibvirtSshOpts {
}
}

// Apply common SSH options
let common_opts = crate::ssh::CommonSshOptions {
strict_host_keys: self.strict_host_keys,
connect_timeout: self.timeout,
server_alive_interval: 60,
log_level: self.log_level.clone(),
extra_options: parsed_extra_options,
};
common_opts.apply_to_command(&mut ssh_cmd);
let start_time = Instant::now();
let timeout = Duration::from_secs(SSH_RETRY_TIMEOUT_SECS);

// Target host
ssh_cmd.arg(format!("{}@127.0.0.1", self.user));
// For interactive SSH, test connectivity first with retries, then exec
if self.command.is_empty() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AI: Important: Interactive SSH sessions (empty command) now bypass the retry/console-feedback logic entirely. If the VM is slow to boot, interactive SSH will fail immediately after one 5s timeout with no feedback, which is worse UX than before. Consider adding retry logic for interactive sessions too, perhaps with a user-visible "waiting for SSH..." message.

debug!("Testing SSH connectivity before interactive session");

// Retry until timeout
loop {
let mut test_cmd =
self.build_ssh_command(ssh_config, &temp_key, parsed_extra_options.clone());
test_cmd.arg("--").arg("true"); // Simple test command

match test_cmd.output() {
Ok(output) if output.status.success() => {
// SSH is ready, now exec for interactive session
debug!("SSH ready, launching interactive session");
let mut ssh_cmd =
self.build_ssh_command(ssh_config, &temp_key, parsed_extra_options);
let error = ssh_cmd.exec();
return Err(eyre!("Failed to exec SSH command: {}", error));
}
Ok(output) => {
// Check if we've exceeded timeout
if start_time.elapsed() >= timeout {
if !self.suppress_output {
let stderr_str = String::from_utf8_lossy(&output.stderr);
eprint!("{}", stderr_str);
eprintln!(
"\nSSH connection failed after {:.1}s. To see VM console output, run: virsh console {}",
start_time.elapsed().as_secs_f64(),
self.domain_name
);
}
return Err(eyre!("SSH connection failed after timeout"));
}

// Retry all errors - show message on first attempt
if start_time.elapsed().as_secs() < SSH_POLL_DELAY_SECS
&& !self.suppress_output
{
eprintln!("Waiting for SSH to be ready...");
}

std::thread::sleep(Duration::from_secs(SSH_POLL_DELAY_SECS));
}
Err(e) => {
return Err(eyre!("Failed to spawn SSH command: {}", e));
}
}
}
}

// For command execution: retry with timeout
loop {
debug!(
"SSH connection attempt (elapsed: {:.1}s)",
start_time.elapsed().as_secs_f64()
);

// Add command if specified - use the same argument escaping logic as container SSH
if !self.command.is_empty() {
// Build SSH command
let mut ssh_cmd =
self.build_ssh_command(ssh_config, &temp_key, parsed_extra_options.clone());

// Add command
ssh_cmd.arg("--");
if self.command.len() > 1 {
// Multiple arguments need proper shell escaping
let combined_command = crate::ssh::shell_escape_command(&self.command)
.map_err(|e| eyre!("Failed to escape shell command: {}", e))?;
debug!("Combined escaped command: {}", combined_command);
ssh_cmd.arg(combined_command);
} else {
// Single argument can be passed directly
ssh_cmd.args(&self.command);
}
}

debug!("Executing SSH command: {:?}", ssh_cmd);

// For commands (non-interactive SSH), capture output
// For interactive SSH (no command), exec to replace current process
if self.command.is_empty() {
// Interactive SSH - exec to replace the current process
// This provides the cleanest terminal experience
debug!("Executing interactive SSH session via exec");

let error = ssh_cmd.exec();
// exec() only returns on error
return Err(eyre!("Failed to exec SSH command: {}", error));
} else {
// Command execution - capture and forward output
// Try SSH
let output = ssh_cmd
.output()
.map_err(|e| eyre!("Failed to execute SSH command: {}", e))?;

if !output.stdout.is_empty() {
if !self.suppress_output {
// Forward stdout to parent process
if output.status.success() {
if !output.stdout.is_empty() && !self.suppress_output {
print!("{}", String::from_utf8_lossy(&output.stdout));
}
debug!("SSH stdout: {}", String::from_utf8_lossy(&output.stdout));
debug!(
"SSH connected after {:.1}s",
start_time.elapsed().as_secs_f64()
);
return Ok(());
}
if !output.stderr.is_empty() {

// Check if we've exceeded timeout
if start_time.elapsed() >= timeout {
// Timeout - fail with helpful message
if !self.suppress_output {
// Forward stderr to parent process
eprint!("{}", String::from_utf8_lossy(&output.stderr));
let stderr_str = String::from_utf8_lossy(&output.stderr);
eprint!("{}", stderr_str);
eprintln!(
"\nSSH connection failed after {:.1}s. To see VM console output, run: virsh console {}",
start_time.elapsed().as_secs_f64(),
self.domain_name
);
}
debug!("SSH stderr: {}", String::from_utf8_lossy(&output.stderr));
}

if !output.status.success() {
return Err(eyre!(
"SSH connection failed with exit code: {}",
output.status.code().unwrap_or(-1)
"SSH connection failed after {:.1}s",
start_time.elapsed().as_secs_f64()
));
}
}

Ok(())
// Retry all errors
std::thread::sleep(Duration::from_secs(SSH_POLL_DELAY_SECS));
}
}
}

Expand Down Expand Up @@ -377,8 +444,8 @@ pub fn run_ssh_impl(
// Extract SSH configuration from domain metadata
let ssh_config = opts.extract_ssh_config(global_opts)?;

// Connect via SSH
opts.connect_ssh(&ssh_config)?;
// Connect via SSH with retries
opts.connect_ssh(global_opts, &ssh_config)?;

Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions crates/kit/src/ssh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ impl Default for CommonSshOptions {
fn default() -> Self {
Self {
strict_host_keys: false,
connect_timeout: 30,
connect_timeout: 5,
server_alive_interval: 60,
log_level: "ERROR".to_string(),
extra_options: vec![],
Expand Down Expand Up @@ -363,7 +363,7 @@ mod tests {
fn test_ssh_connection_options() {
// Test default options
let default_opts = SshConnectionOptions::default();
assert_eq!(default_opts.common.connect_timeout, 30);
assert_eq!(default_opts.common.connect_timeout, 5);
assert!(default_opts.allocate_tty);
assert_eq!(default_opts.common.log_level, "ERROR");
assert!(default_opts.common.extra_options.is_empty());
Expand Down
Loading