9 Commits

Author SHA1 Message Date
Bastian
8467bcd08e improvement: precise wording 2023-08-04 16:05:56 +02:00
Bastian
48eb3f840e fix: missing vm_id in log message 2023-08-04 15:59:07 +02:00
Bastian
514d19b9f6 added: retrieve ceph versions for compatibility checks 2023-08-04 15:54:37 +02:00
Bastian
a6e1f9342a added: support for EFI Disks 2023-08-04 15:36:18 +02:00
Bastian
59b8ab5ce2 added: default pool, feature: confirm --migrate, add: --noconfirm 2023-08-04 13:38:26 +02:00
Bastian
4bfd79e79e improved: retrieve source/destination cluster name for better insights 2023-07-13 15:18:24 +02:00
Bastian
6e8eb7ce2b fixed: preflight checks 2023-07-13 14:45:55 +02:00
Bastian
be88cb4d40 fixed: perf_vm_stopped++ never counted. 2023-07-13 13:54:20 +02:00
Bastian
1343dc6b51 fixed: Correct target host now displayed in log messsage, Add downtime counter 2023-07-13 13:51:58 +02:00

110
crossover
View File

@@ -1,5 +1,8 @@
#!/bin/bash
# Cross Pool Migration and incremental replication Tool for Proxmox VMs using Ceph.
# Author: Bastian Mäuser <bma@netz.org>
LC_ALL="en_US.UTF-8"
source rainbow.sh
@@ -13,14 +16,11 @@ declare opt_influx_jobname=''
declare opt_influx_job_metrics='crossover_xmit'
declare opt_influx_summary_metrics='crossover_jobs'
# Cross Pool Migration and incremental replication Tool for Proxmox VMs using Ceph.
# Author: Bastian Mäuser <bma@netz.org>
name=$(basename "$0")
# readonly variables
declare -r NAME=$name
declare -r VERSION=0.8
declare -r PROGNAME=${NAME%.*}
declare -r PVE_DIR="/etc/pve"
declare -r PVE_NODES="$PVE_DIR/nodes"
declare -r QEMU='qemu-server'
@@ -28,15 +28,21 @@ declare -r QEMU_CONF_CLUSTER="$PVE_NODES/*/$QEMU"
declare -r EXT_CONF='.conf'
declare -r PVFORMAT_FULL='e:%t r:%e c:%r a:%a %b %p'
declare -r PVFORMAT_SNAP='e:%t c:%r a:%a %b'
logfile=$(mktemp)
declare -r LOG_FILE=$logfile
# associative global arrays
declare -A -g pvnode
declare -A -g dstpvnode
declare -A -g svmids
declare -A -g dvmids
declare -g scluster
declare -g dcluster
declare -g scephversion
declare -g dcephversion
# global integers
declare -g -i perf_freeze_ok=0
declare -g -i perf_freeze_failed=0
declare -g -i perf_ss_ok=0
@@ -56,10 +62,12 @@ declare -g -i perf_snaps_removed=0
declare -g -i perf_vm_total=0
declare -g -i perf_vm_ok=0
# commandline parameters
declare opt_destination
declare opt_vm_ids=''
declare opt_snapshot_prefix='mirror-'
declare opt_rewrite=''
declare opt_pool='rbd'
declare -i opt_prefix_id
declare opt_exclude_vmids=''
declare -i opt_debug=0
@@ -71,6 +79,7 @@ declare -i opt_keepdlock=0
declare -i opt_overwrite=0
declare -i opt_online=0
declare -i opt_migrate=0
declare -i opt_noconfirm=0
declare opt_keep_local='0s'
declare opt_keep_remote='0s'
@@ -113,7 +122,7 @@ Options:
--prefixid Prefix for VMID's on target System [optional]
--excludevmids Exclusde VM IDs when using --vmid==all
--destination Target PVE Host in target pool. e.g. --destination=pve04
--pool Ceph pool name in target pool. e.g. --pool=data
--pool Ceph pool name in target pool. e.g. --pool=data [default=rbd]
--keeplocal How many additional Snapshots to keep locally. e.g. --keeplocal=2d
--keepremote How many additional Snapshots to keep remote. e.g. --keepremote=7d
--rewrite PCRE Regex to rewrite the Config Files (eg. --rewrite='s/(net0:)(.*)tag=([0-9]+)/\1\2tag=1/g' would
@@ -130,6 +139,7 @@ Switches:
--keep-slock Keep source VM locked on Transfer
--keep-dlock Keep VM locked after transfer on Destination
--overwrite Overwrite Destination
--noconfirm Don't ask for confirmation before starting --migrate mode (use with care!)
--debug Show Debug Output
Report bugs to <mephisto@mephis.to>
@@ -144,7 +154,7 @@ function parse_opts(){
local args
args=$(getopt \
--options '' \
--longoptions=vmid:,prefixid:,excludevmids:,destination:,pool:,keeplocal:,keepremote:,rewrite:,influxurl:,influxorg:,influxtoken:,influxbucket:,jobname:,mail:,online,migrate,nolock,keep-slock,keep-dlock,overwrite,dry-run,debug,syslog \
--longoptions=vmid:,prefixid:,excludevmids:,destination:,pool:,keeplocal:,keepremote:,rewrite:,influxurl:,influxorg:,influxtoken:,influxbucket:,jobname:,mail:,online,migrate,nolock,keep-slock,keep-dlock,overwrite,dry-run,noconfirm,debug,syslog \
--name "$PROGNAME" \
-- "$@") \
|| end_process 128
@@ -170,6 +180,7 @@ function parse_opts(){
--online) opt_online=1; shift ;;
--migrate) opt_migrate=1; shift ;;
--dry-run) opt_dry_run=1; shift;;
--noconfirm) opt_noconfirm=1; shift;;
--debug) opt_debug=1; shift;;
--nolock) opt_lock=0; shift;;
--keep-slock) opt_keepslock=1; shift;;
@@ -309,20 +320,6 @@ function get_vm_ids(){
echo "$data"
}
function get_config_file(){
local file_config=''
if exist_file "$QEMU_CONF_CLUSTER/$vm_id$EXT_CONF"; then
file_config=$(ls $QEMU_CONF_CLUSTER/$vm_id$EXT_CONF)
else
log error "VM $vm_id - Unknown technology or VMID not found: $QEMU_CONF_CLUSTER/$vm_id$EXT_CONF"
end_process 128
fi
echo "$file_config"
}
function get_disks_from_config(){
local disks;
local file_config=$1
@@ -334,7 +331,7 @@ function get_disks_from_config(){
[[ "$line" == "" ]] && break
echo "$line"
done < "$file_config" | \
grep -P '^(?:((?:virtio|ide|scsi|sata|mp)\d+)|rootfs): ' | \
grep -P '^(?:((?:efidisk|virtio|ide|scsi|sata|mp)\d+)|rootfs): ' | \
grep -v -P 'cdrom|none' | \
grep -v -P 'backup=0' | \
awk '{ split($0,a,","); split(a[1],b," "); print b[2]}')
@@ -415,6 +412,9 @@ function mirror() {
local -i endjob
local -i vmcount=0
local -i diskcount=0
local -i startdowntime
local -i enddowntime
local -i ga_ping
local disp_perf_freeze_failed
local disp_perf_ss_failed
@@ -425,6 +425,9 @@ function mirror() {
log info "Start mirror $(date "+%F %T")"
startjob=$(date +%s)
get_ceph_version
log info "Local Ceph Version: $scephversion, Remote Ceph Version: $dcephversion"
#create pid file
local pid_file="/var/run/$PROGNAME.pid"
if [[ -e "$pid_file" ]]; then
@@ -439,6 +442,23 @@ function mirror() {
end_process 1
fi
scluster=$(grep cluster_name /etc/pve/corosync.conf | cut -d " " -f 4)
dcluster=$(ssh "$opt_destination" grep cluster_name /etc/pve/corosync.conf | cut -d " " -f 4)
if [ $opt_migrate -eq 1 ] && [ $opt_noconfirm -eq 0 ]; then
echo "VM(s) $opt_vm_ids will subsequently be shutdown on [$scluster] and started on [$dcluster]"
read -p "Do you want to proceed? (yes/no) " yn
case $yn in
yes ) echo ok, we will proceed;;
no ) echo exiting...;
exit;;
* ) echo invalid response;
exit 1;;
esac
fi
map_source_to_destination_vmid
map_vmids_to_host
map_vmids_to_dsthost "$opt_destination"
@@ -449,9 +469,18 @@ function mirror() {
fi
for vm_id in $svmids; do
file_config="$PVE_NODES/${pvnode[$vm_id]}/$QEMU/$vm_id.conf"
if ! exist_file "$file_config"; then
log error "VM $vm_id - Preflight check: VM $vm_id does not exist on source cluster [$scluster] - skipping to next VM."
continue
fi
ga_ping=$(gaping "$vm_id")
log debug "ga_ping: $ga_ping"
if [ "$ga_ping" -eq 255 ] ; then #vm running but no qemu-guest-agent answering
log error "VM $vm_id - Preflight check: VM $vm_id on source cluster [$scluster] has no qemu-guest-agent running - skipping to next VM."
continue
fi
(( vmcount++ ))
local file_config; file_config=$(get_config_file)
[ -z "$file_config" ] && continue
local disk=''
dvmid=${dvmids[$vm_id]}
vmname=$(cat $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf | sed -e ''$restripsnapshots'' | grep "name\:" | cut -d' ' -f 2)
@@ -467,7 +496,7 @@ function mirror() {
if [ $host_on_destination ]; then
dststatus=$(ssh root@${dstpvnode[$dvmid]} qm status $dvmid|cut -d' ' -f 2)
if [ $dststatus == "running" ]; then
log error "VM is running on Destination. bailing out"
log error "VM is running on Destination Cluster [$dcluster]. bailing out"
end_process 255
fi
fi
@@ -483,7 +512,7 @@ function mirror() {
log error "Source VM genid ($srcvmgenid) doesn't match destination VM genid ($dstvmgenid). This should not happen. Bailing out.."
end_process 255
fi
log info "VM $vm_id - Transmitting Config for to destination $opt_destination VMID $dvmid"
log info "VM $vm_id - Transmitting Config for VM $vm_id to destination $opt_destination VMID $dvmid"
rewriteconfig $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf $opt_destination "$opt_pool" $PVE_NODES/"$opt_destination"/$QEMU/"$dvmid".conf "$dvmid"
map_vmids_to_dsthost "$opt_destination"
fi
@@ -496,6 +525,7 @@ function mirror() {
do_run "ha-manager remove $vm_id"
fi
do_run "ssh root@${pvnode[$vm_id]} qm shutdown $vm_id >/dev/null"
startdowntime=$(date +%s)
fi
#Lock on source + destination
@@ -505,8 +535,8 @@ function mirror() {
do_run "ssh root@""${dstpvnode[$dvmid]}"" qm set ""$dvmid"" --lock backup" >/dev/null
log info "VM $dvmid - locked $dvmid [rc:$?] on destination"
fi
#Freeze fs only if no migration running
if [ $opt_migrate -eq 0 ]; then
#Freeze fs only if no migration running and qemu-guest-agent okay.
if [ $opt_migrate -eq 0 ] && [ $ga_ping -eq 0 ]; then
vm_freeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null
freezerc=$?
if [ $freezerc -gt 0 ]; then
@@ -638,8 +668,10 @@ function mirror() {
fi
#--migrate so start on destination?
if [ $opt_migrate -eq 1 ]; then
log info "VM $dvmid - Starting VM on ${pvnode[$vm_id]}"
log info "VM $dvmid - Starting VM on node ${dstpvnode[$dvmid]} in cluster [$dcluster]"
do_run "ssh root@""${dstpvnode[$dvmid]}"" qm start "$dvmid >/dev/null
enddowntime=$(date +%s)
log info "VM $dvmid - Downtime: $(( enddowntime - startdowntime )) Seconds"
fi
done
@@ -707,6 +739,15 @@ function do_housekeeping(){
done
}
function gaping() {
local vmid=$1
local rc
cmd="ssh root@${pvnode[$vmid]} qm guest cmd $vmid ping >/dev/null 2>&1"
eval "$cmd"
rc=$?
echo $rc
}
function create_snapshot(){
local snap="$1"
log info "VM $vm_id - Creating snapshot $snap"
@@ -722,8 +763,8 @@ function vm_freeze() {
status=$(ssh root@"$fhost" qm status "$fvm"|cut -d' ' -f 2)
if ! [[ "$status" == "running" ]]; then
log info "VM $fvm - Not running, skipping fsfreeze-freeze"
return
(( perf_vm_stopped++ ))
return
else
(( perf_vm_running++ ))
fi
@@ -763,7 +804,7 @@ function rewriteconfig(){
else
sedcmd='sed -e /^$/,$d'
fi
cat "$oldconfig" | sed -r -e "s/^(virtio|ide|scsi|sata|mp)([0-9]+):\s([a-zA-Z0-9]+):(.*)-([0-9]+)-disk-([0-9]+).*,(.*)$/\1\2: $newpool:\4-$newvmid-disk-\6-\3,\7/g" | $sedcmd | sed -e '/^$/,$d' | sed -e '/ide[0-9]:.*-cloudinit,media=cdrom.*/d' | grep -v "^parent:\s.*$" | ssh "$dst" "cat - >$newconfig"
cat "$oldconfig" | sed -r -e "s/^(efidisk|virtio|ide|scsi|sata|mp)([0-9]+):\s([a-zA-Z0-9]+):(.*)-([0-9]+)-disk-([0-9]+).*,(.*)$/\1\2: $newpool:\4-$newvmid-disk-\6-\3,\7/g" | $sedcmd | sed -e '/^$/,$d' | sed -e '/ide[0-9]:.*-cloudinit,media=cdrom.*/d' | grep -v "^parent:\s.*$" | ssh "$dst" "cat - >$newconfig"
}
function checkvmid(){
@@ -798,7 +839,7 @@ function end_process(){
local -i bps
local -i ss_total
local subject
if ! [[ -z "$startjob" && -z "$endjob" ]]; then
if ! [[ -z "$startjob" || -z "$endjob" ]]; then
runtime=$(expr $endjob - $startjob)
bps=$(expr $perf_bytes_total/$runtime)
fi
@@ -847,6 +888,11 @@ function check_pool_exist() {
echo $exists
}
function get_ceph_version() {
scephversion=$(ceph -v | cut -d " " -f 3)
dcephversion=$(ssh $opt_destination ceph -v | cut -d " " -f 3)
}
function main(){
[ $# = 0 ] && usage;