proxmox-cluster-network-cha.../ssh_manager.py

148 lines
4.9 KiB
Python

"""SSH connection manager for remote Proxmox nodes.
Uses sshpass + password from .env for authentication.
This is required because Proxmox stores SSH keys in /etc/pve/priv/authorized_keys,
which disappears when pve-cluster is stopped during migration.
"""
import subprocess
from typing import Optional
class SSHManager:
"""Manages SSH connections to Proxmox nodes using sshpass + system ssh."""
def __init__(self, ssh_user: str = "root", ssh_port: int = 22,
ssh_password: Optional[str] = None):
self.ssh_user = ssh_user
self.ssh_port = ssh_port
self.ssh_password = ssh_password
def _build_ssh_cmd(self, host: str, command: str) -> list[str]:
"""Build the ssh command list."""
cmd = ["sshpass", "-p", self.ssh_password]
cmd.extend([
"ssh",
"-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10",
"-o", "BatchMode=no",
"-o", "PubkeyAuthentication=no",
"-p", str(self.ssh_port),
])
cmd.append(f"{self.ssh_user}@{host}")
cmd.append(command)
return cmd
def execute(self, host: str, command: str, timeout: int = 30) -> tuple[int, str, str]:
"""Execute a command on a remote host via SSH.
Returns: (return_code, stdout, stderr)
"""
cmd = self._build_ssh_cmd(host, command)
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
)
return result.returncode, result.stdout, result.stderr
except subprocess.TimeoutExpired:
return -1, "", f"SSH command timed out after {timeout}s"
except Exception as e:
return -1, "", str(e)
def read_file(self, host: str, path: str) -> tuple[bool, str]:
"""Read a file from a remote host.
Returns: (success, content)
"""
rc, stdout, stderr = self.execute(host, f"cat {path}")
if rc == 0:
return True, stdout
return False, stderr
def write_file(self, host: str, path: str, content: str) -> tuple[bool, str]:
"""Write content to a file on a remote host.
Returns: (success, message)
"""
cmd = self._build_ssh_cmd(
host, f"cat > {path} << 'PROXMOX_NET_EOF'\n{content}\nPROXMOX_NET_EOF"
)
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=30,
)
if result.returncode == 0:
return True, "OK"
return False, result.stderr
except Exception as e:
return False, str(e)
def is_reachable(self, host: str) -> bool:
"""Check if a host is reachable via SSH."""
rc, _, _ = self.execute(host, "echo ok", timeout=10)
return rc == 0
def execute_local(self, command: str, timeout: int = 30) -> tuple[int, str, str]:
"""Execute a command locally.
Returns: (return_code, stdout, stderr)
"""
try:
result = subprocess.run(
command,
shell=True,
capture_output=True,
text=True,
timeout=timeout,
)
return result.returncode, result.stdout, result.stderr
except subprocess.TimeoutExpired:
return -1, "", f"Command timed out after {timeout}s"
except Exception as e:
return -1, "", str(e)
def read_local_file(self, path: str) -> tuple[bool, str]:
"""Read a local file."""
try:
with open(path, 'r') as f:
return True, f.read()
except Exception as e:
return False, str(e)
def write_local_file(self, path: str, content: str) -> tuple[bool, str]:
"""Write a local file."""
try:
with open(path, 'w') as f:
f.write(content)
return True, "OK"
except Exception as e:
return False, str(e)
def run_on_node(self, host: str, command: str, is_local: bool = False,
timeout: int = 30) -> tuple[int, str, str]:
"""Run a command on a node (local or remote)."""
if is_local:
return self.execute_local(command, timeout)
return self.execute(host, command, timeout)
def read_node_file(self, host: str, path: str, is_local: bool = False) -> tuple[bool, str]:
"""Read a file from a node (local or remote)."""
if is_local:
return self.read_local_file(path)
return self.read_file(host, path)
def write_node_file(self, host: str, path: str, content: str,
is_local: bool = False) -> tuple[bool, str]:
"""Write a file to a node (local or remote)."""
if is_local:
return self.write_local_file(path, content)
return self.write_file(host, path, content)