9 Commits

Author SHA1 Message Date
Bastian
8467bcd08e improvement: precise wording 2023-08-04 16:05:56 +02:00
Bastian
48eb3f840e fix: missing vm_id in log message 2023-08-04 15:59:07 +02:00
Bastian
514d19b9f6 added: retrieve ceph versions for compatibility checks 2023-08-04 15:54:37 +02:00
Bastian
a6e1f9342a added: support for EFI Disks 2023-08-04 15:36:18 +02:00
Bastian
59b8ab5ce2 added: default pool, feature: confirm --migrate, add: --noconfirm 2023-08-04 13:38:26 +02:00
Bastian
4bfd79e79e improved: retrieve source/destination cluster name for better insights 2023-07-13 15:18:24 +02:00
Bastian
6e8eb7ce2b fixed: preflight checks 2023-07-13 14:45:55 +02:00
Bastian
be88cb4d40 fixed: perf_vm_stopped++ never counted. 2023-07-13 13:54:20 +02:00
Bastian
1343dc6b51 fixed: Correct target host now displayed in log messsage, Add downtime counter 2023-07-13 13:51:58 +02:00

110
crossover
View File

@@ -1,5 +1,8 @@
#!/bin/bash #!/bin/bash
# Cross Pool Migration and incremental replication Tool for Proxmox VMs using Ceph.
# Author: Bastian Mäuser <bma@netz.org>
LC_ALL="en_US.UTF-8" LC_ALL="en_US.UTF-8"
source rainbow.sh source rainbow.sh
@@ -13,14 +16,11 @@ declare opt_influx_jobname=''
declare opt_influx_job_metrics='crossover_xmit' declare opt_influx_job_metrics='crossover_xmit'
declare opt_influx_summary_metrics='crossover_jobs' declare opt_influx_summary_metrics='crossover_jobs'
# Cross Pool Migration and incremental replication Tool for Proxmox VMs using Ceph.
# Author: Bastian Mäuser <bma@netz.org>
name=$(basename "$0") name=$(basename "$0")
# readonly variables
declare -r NAME=$name declare -r NAME=$name
declare -r VERSION=0.8 declare -r VERSION=0.8
declare -r PROGNAME=${NAME%.*} declare -r PROGNAME=${NAME%.*}
declare -r PVE_DIR="/etc/pve" declare -r PVE_DIR="/etc/pve"
declare -r PVE_NODES="$PVE_DIR/nodes" declare -r PVE_NODES="$PVE_DIR/nodes"
declare -r QEMU='qemu-server' declare -r QEMU='qemu-server'
@@ -28,15 +28,21 @@ declare -r QEMU_CONF_CLUSTER="$PVE_NODES/*/$QEMU"
declare -r EXT_CONF='.conf' declare -r EXT_CONF='.conf'
declare -r PVFORMAT_FULL='e:%t r:%e c:%r a:%a %b %p' declare -r PVFORMAT_FULL='e:%t r:%e c:%r a:%a %b %p'
declare -r PVFORMAT_SNAP='e:%t c:%r a:%a %b' declare -r PVFORMAT_SNAP='e:%t c:%r a:%a %b'
logfile=$(mktemp) logfile=$(mktemp)
declare -r LOG_FILE=$logfile declare -r LOG_FILE=$logfile
# associative global arrays
declare -A -g pvnode declare -A -g pvnode
declare -A -g dstpvnode declare -A -g dstpvnode
declare -A -g svmids declare -A -g svmids
declare -A -g dvmids declare -A -g dvmids
declare -g scluster
declare -g dcluster
declare -g scephversion
declare -g dcephversion
# global integers
declare -g -i perf_freeze_ok=0 declare -g -i perf_freeze_ok=0
declare -g -i perf_freeze_failed=0 declare -g -i perf_freeze_failed=0
declare -g -i perf_ss_ok=0 declare -g -i perf_ss_ok=0
@@ -56,10 +62,12 @@ declare -g -i perf_snaps_removed=0
declare -g -i perf_vm_total=0 declare -g -i perf_vm_total=0
declare -g -i perf_vm_ok=0 declare -g -i perf_vm_ok=0
# commandline parameters
declare opt_destination declare opt_destination
declare opt_vm_ids='' declare opt_vm_ids=''
declare opt_snapshot_prefix='mirror-' declare opt_snapshot_prefix='mirror-'
declare opt_rewrite='' declare opt_rewrite=''
declare opt_pool='rbd'
declare -i opt_prefix_id declare -i opt_prefix_id
declare opt_exclude_vmids='' declare opt_exclude_vmids=''
declare -i opt_debug=0 declare -i opt_debug=0
@@ -71,6 +79,7 @@ declare -i opt_keepdlock=0
declare -i opt_overwrite=0 declare -i opt_overwrite=0
declare -i opt_online=0 declare -i opt_online=0
declare -i opt_migrate=0 declare -i opt_migrate=0
declare -i opt_noconfirm=0
declare opt_keep_local='0s' declare opt_keep_local='0s'
declare opt_keep_remote='0s' declare opt_keep_remote='0s'
@@ -113,7 +122,7 @@ Options:
--prefixid Prefix for VMID's on target System [optional] --prefixid Prefix for VMID's on target System [optional]
--excludevmids Exclusde VM IDs when using --vmid==all --excludevmids Exclusde VM IDs when using --vmid==all
--destination Target PVE Host in target pool. e.g. --destination=pve04 --destination Target PVE Host in target pool. e.g. --destination=pve04
--pool Ceph pool name in target pool. e.g. --pool=data --pool Ceph pool name in target pool. e.g. --pool=data [default=rbd]
--keeplocal How many additional Snapshots to keep locally. e.g. --keeplocal=2d --keeplocal How many additional Snapshots to keep locally. e.g. --keeplocal=2d
--keepremote How many additional Snapshots to keep remote. e.g. --keepremote=7d --keepremote How many additional Snapshots to keep remote. e.g. --keepremote=7d
--rewrite PCRE Regex to rewrite the Config Files (eg. --rewrite='s/(net0:)(.*)tag=([0-9]+)/\1\2tag=1/g' would --rewrite PCRE Regex to rewrite the Config Files (eg. --rewrite='s/(net0:)(.*)tag=([0-9]+)/\1\2tag=1/g' would
@@ -130,6 +139,7 @@ Switches:
--keep-slock Keep source VM locked on Transfer --keep-slock Keep source VM locked on Transfer
--keep-dlock Keep VM locked after transfer on Destination --keep-dlock Keep VM locked after transfer on Destination
--overwrite Overwrite Destination --overwrite Overwrite Destination
--noconfirm Don't ask for confirmation before starting --migrate mode (use with care!)
--debug Show Debug Output --debug Show Debug Output
Report bugs to <mephisto@mephis.to> Report bugs to <mephisto@mephis.to>
@@ -144,7 +154,7 @@ function parse_opts(){
local args local args
args=$(getopt \ args=$(getopt \
--options '' \ --options '' \
--longoptions=vmid:,prefixid:,excludevmids:,destination:,pool:,keeplocal:,keepremote:,rewrite:,influxurl:,influxorg:,influxtoken:,influxbucket:,jobname:,mail:,online,migrate,nolock,keep-slock,keep-dlock,overwrite,dry-run,debug,syslog \ --longoptions=vmid:,prefixid:,excludevmids:,destination:,pool:,keeplocal:,keepremote:,rewrite:,influxurl:,influxorg:,influxtoken:,influxbucket:,jobname:,mail:,online,migrate,nolock,keep-slock,keep-dlock,overwrite,dry-run,noconfirm,debug,syslog \
--name "$PROGNAME" \ --name "$PROGNAME" \
-- "$@") \ -- "$@") \
|| end_process 128 || end_process 128
@@ -170,6 +180,7 @@ function parse_opts(){
--online) opt_online=1; shift ;; --online) opt_online=1; shift ;;
--migrate) opt_migrate=1; shift ;; --migrate) opt_migrate=1; shift ;;
--dry-run) opt_dry_run=1; shift;; --dry-run) opt_dry_run=1; shift;;
--noconfirm) opt_noconfirm=1; shift;;
--debug) opt_debug=1; shift;; --debug) opt_debug=1; shift;;
--nolock) opt_lock=0; shift;; --nolock) opt_lock=0; shift;;
--keep-slock) opt_keepslock=1; shift;; --keep-slock) opt_keepslock=1; shift;;
@@ -309,20 +320,6 @@ function get_vm_ids(){
echo "$data" echo "$data"
} }
function get_config_file(){
local file_config=''
if exist_file "$QEMU_CONF_CLUSTER/$vm_id$EXT_CONF"; then
file_config=$(ls $QEMU_CONF_CLUSTER/$vm_id$EXT_CONF)
else
log error "VM $vm_id - Unknown technology or VMID not found: $QEMU_CONF_CLUSTER/$vm_id$EXT_CONF"
end_process 128
fi
echo "$file_config"
}
function get_disks_from_config(){ function get_disks_from_config(){
local disks; local disks;
local file_config=$1 local file_config=$1
@@ -334,7 +331,7 @@ function get_disks_from_config(){
[[ "$line" == "" ]] && break [[ "$line" == "" ]] && break
echo "$line" echo "$line"
done < "$file_config" | \ done < "$file_config" | \
grep -P '^(?:((?:virtio|ide|scsi|sata|mp)\d+)|rootfs): ' | \ grep -P '^(?:((?:efidisk|virtio|ide|scsi|sata|mp)\d+)|rootfs): ' | \
grep -v -P 'cdrom|none' | \ grep -v -P 'cdrom|none' | \
grep -v -P 'backup=0' | \ grep -v -P 'backup=0' | \
awk '{ split($0,a,","); split(a[1],b," "); print b[2]}') awk '{ split($0,a,","); split(a[1],b," "); print b[2]}')
@@ -415,6 +412,9 @@ function mirror() {
local -i endjob local -i endjob
local -i vmcount=0 local -i vmcount=0
local -i diskcount=0 local -i diskcount=0
local -i startdowntime
local -i enddowntime
local -i ga_ping
local disp_perf_freeze_failed local disp_perf_freeze_failed
local disp_perf_ss_failed local disp_perf_ss_failed
@@ -425,6 +425,9 @@ function mirror() {
log info "Start mirror $(date "+%F %T")" log info "Start mirror $(date "+%F %T")"
startjob=$(date +%s) startjob=$(date +%s)
get_ceph_version
log info "Local Ceph Version: $scephversion, Remote Ceph Version: $dcephversion"
#create pid file #create pid file
local pid_file="/var/run/$PROGNAME.pid" local pid_file="/var/run/$PROGNAME.pid"
if [[ -e "$pid_file" ]]; then if [[ -e "$pid_file" ]]; then
@@ -439,6 +442,23 @@ function mirror() {
end_process 1 end_process 1
fi fi
scluster=$(grep cluster_name /etc/pve/corosync.conf | cut -d " " -f 4)
dcluster=$(ssh "$opt_destination" grep cluster_name /etc/pve/corosync.conf | cut -d " " -f 4)
if [ $opt_migrate -eq 1 ] && [ $opt_noconfirm -eq 0 ]; then
echo "VM(s) $opt_vm_ids will subsequently be shutdown on [$scluster] and started on [$dcluster]"
read -p "Do you want to proceed? (yes/no) " yn
case $yn in
yes ) echo ok, we will proceed;;
no ) echo exiting...;
exit;;
* ) echo invalid response;
exit 1;;
esac
fi
map_source_to_destination_vmid map_source_to_destination_vmid
map_vmids_to_host map_vmids_to_host
map_vmids_to_dsthost "$opt_destination" map_vmids_to_dsthost "$opt_destination"
@@ -449,9 +469,18 @@ function mirror() {
fi fi
for vm_id in $svmids; do for vm_id in $svmids; do
file_config="$PVE_NODES/${pvnode[$vm_id]}/$QEMU/$vm_id.conf"
if ! exist_file "$file_config"; then
log error "VM $vm_id - Preflight check: VM $vm_id does not exist on source cluster [$scluster] - skipping to next VM."
continue
fi
ga_ping=$(gaping "$vm_id")
log debug "ga_ping: $ga_ping"
if [ "$ga_ping" -eq 255 ] ; then #vm running but no qemu-guest-agent answering
log error "VM $vm_id - Preflight check: VM $vm_id on source cluster [$scluster] has no qemu-guest-agent running - skipping to next VM."
continue
fi
(( vmcount++ )) (( vmcount++ ))
local file_config; file_config=$(get_config_file)
[ -z "$file_config" ] && continue
local disk='' local disk=''
dvmid=${dvmids[$vm_id]} dvmid=${dvmids[$vm_id]}
vmname=$(cat $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf | sed -e ''$restripsnapshots'' | grep "name\:" | cut -d' ' -f 2) vmname=$(cat $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf | sed -e ''$restripsnapshots'' | grep "name\:" | cut -d' ' -f 2)
@@ -467,7 +496,7 @@ function mirror() {
if [ $host_on_destination ]; then if [ $host_on_destination ]; then
dststatus=$(ssh root@${dstpvnode[$dvmid]} qm status $dvmid|cut -d' ' -f 2) dststatus=$(ssh root@${dstpvnode[$dvmid]} qm status $dvmid|cut -d' ' -f 2)
if [ $dststatus == "running" ]; then if [ $dststatus == "running" ]; then
log error "VM is running on Destination. bailing out" log error "VM is running on Destination Cluster [$dcluster]. bailing out"
end_process 255 end_process 255
fi fi
fi fi
@@ -483,7 +512,7 @@ function mirror() {
log error "Source VM genid ($srcvmgenid) doesn't match destination VM genid ($dstvmgenid). This should not happen. Bailing out.." log error "Source VM genid ($srcvmgenid) doesn't match destination VM genid ($dstvmgenid). This should not happen. Bailing out.."
end_process 255 end_process 255
fi fi
log info "VM $vm_id - Transmitting Config for to destination $opt_destination VMID $dvmid" log info "VM $vm_id - Transmitting Config for VM $vm_id to destination $opt_destination VMID $dvmid"
rewriteconfig $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf $opt_destination "$opt_pool" $PVE_NODES/"$opt_destination"/$QEMU/"$dvmid".conf "$dvmid" rewriteconfig $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf $opt_destination "$opt_pool" $PVE_NODES/"$opt_destination"/$QEMU/"$dvmid".conf "$dvmid"
map_vmids_to_dsthost "$opt_destination" map_vmids_to_dsthost "$opt_destination"
fi fi
@@ -496,6 +525,7 @@ function mirror() {
do_run "ha-manager remove $vm_id" do_run "ha-manager remove $vm_id"
fi fi
do_run "ssh root@${pvnode[$vm_id]} qm shutdown $vm_id >/dev/null" do_run "ssh root@${pvnode[$vm_id]} qm shutdown $vm_id >/dev/null"
startdowntime=$(date +%s)
fi fi
#Lock on source + destination #Lock on source + destination
@@ -505,8 +535,8 @@ function mirror() {
do_run "ssh root@""${dstpvnode[$dvmid]}"" qm set ""$dvmid"" --lock backup" >/dev/null do_run "ssh root@""${dstpvnode[$dvmid]}"" qm set ""$dvmid"" --lock backup" >/dev/null
log info "VM $dvmid - locked $dvmid [rc:$?] on destination" log info "VM $dvmid - locked $dvmid [rc:$?] on destination"
fi fi
#Freeze fs only if no migration running #Freeze fs only if no migration running and qemu-guest-agent okay.
if [ $opt_migrate -eq 0 ]; then if [ $opt_migrate -eq 0 ] && [ $ga_ping -eq 0 ]; then
vm_freeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null vm_freeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null
freezerc=$? freezerc=$?
if [ $freezerc -gt 0 ]; then if [ $freezerc -gt 0 ]; then
@@ -638,8 +668,10 @@ function mirror() {
fi fi
#--migrate so start on destination? #--migrate so start on destination?
if [ $opt_migrate -eq 1 ]; then if [ $opt_migrate -eq 1 ]; then
log info "VM $dvmid - Starting VM on ${pvnode[$vm_id]}" log info "VM $dvmid - Starting VM on node ${dstpvnode[$dvmid]} in cluster [$dcluster]"
do_run "ssh root@""${dstpvnode[$dvmid]}"" qm start "$dvmid >/dev/null do_run "ssh root@""${dstpvnode[$dvmid]}"" qm start "$dvmid >/dev/null
enddowntime=$(date +%s)
log info "VM $dvmid - Downtime: $(( enddowntime - startdowntime )) Seconds"
fi fi
done done
@@ -707,6 +739,15 @@ function do_housekeeping(){
done done
} }
function gaping() {
local vmid=$1
local rc
cmd="ssh root@${pvnode[$vmid]} qm guest cmd $vmid ping >/dev/null 2>&1"
eval "$cmd"
rc=$?
echo $rc
}
function create_snapshot(){ function create_snapshot(){
local snap="$1" local snap="$1"
log info "VM $vm_id - Creating snapshot $snap" log info "VM $vm_id - Creating snapshot $snap"
@@ -722,8 +763,8 @@ function vm_freeze() {
status=$(ssh root@"$fhost" qm status "$fvm"|cut -d' ' -f 2) status=$(ssh root@"$fhost" qm status "$fvm"|cut -d' ' -f 2)
if ! [[ "$status" == "running" ]]; then if ! [[ "$status" == "running" ]]; then
log info "VM $fvm - Not running, skipping fsfreeze-freeze" log info "VM $fvm - Not running, skipping fsfreeze-freeze"
return
(( perf_vm_stopped++ )) (( perf_vm_stopped++ ))
return
else else
(( perf_vm_running++ )) (( perf_vm_running++ ))
fi fi
@@ -763,7 +804,7 @@ function rewriteconfig(){
else else
sedcmd='sed -e /^$/,$d' sedcmd='sed -e /^$/,$d'
fi fi
cat "$oldconfig" | sed -r -e "s/^(virtio|ide|scsi|sata|mp)([0-9]+):\s([a-zA-Z0-9]+):(.*)-([0-9]+)-disk-([0-9]+).*,(.*)$/\1\2: $newpool:\4-$newvmid-disk-\6-\3,\7/g" | $sedcmd | sed -e '/^$/,$d' | sed -e '/ide[0-9]:.*-cloudinit,media=cdrom.*/d' | grep -v "^parent:\s.*$" | ssh "$dst" "cat - >$newconfig" cat "$oldconfig" | sed -r -e "s/^(efidisk|virtio|ide|scsi|sata|mp)([0-9]+):\s([a-zA-Z0-9]+):(.*)-([0-9]+)-disk-([0-9]+).*,(.*)$/\1\2: $newpool:\4-$newvmid-disk-\6-\3,\7/g" | $sedcmd | sed -e '/^$/,$d' | sed -e '/ide[0-9]:.*-cloudinit,media=cdrom.*/d' | grep -v "^parent:\s.*$" | ssh "$dst" "cat - >$newconfig"
} }
function checkvmid(){ function checkvmid(){
@@ -798,7 +839,7 @@ function end_process(){
local -i bps local -i bps
local -i ss_total local -i ss_total
local subject local subject
if ! [[ -z "$startjob" && -z "$endjob" ]]; then if ! [[ -z "$startjob" || -z "$endjob" ]]; then
runtime=$(expr $endjob - $startjob) runtime=$(expr $endjob - $startjob)
bps=$(expr $perf_bytes_total/$runtime) bps=$(expr $perf_bytes_total/$runtime)
fi fi
@@ -847,6 +888,11 @@ function check_pool_exist() {
echo $exists echo $exists
} }
function get_ceph_version() {
scephversion=$(ceph -v | cut -d " " -f 3)
dcephversion=$(ssh $opt_destination ceph -v | cut -d " " -f 3)
}
function main(){ function main(){
[ $# = 0 ] && usage; [ $# = 0 ] && usage;