9 Commits

Author SHA1 Message Date
Bastian
5b7fd4986b Add preflight check: pool_exists 2023-03-23 16:14:04 +01:00
Bastian
41abd0429a Fix Regex to exclude cloud-init drive 2023-03-23 15:46:58 +01:00
Bastian
890567ad05 Remove vm from ha group before shutting down on migration 2023-03-23 14:10:55 +01:00
Bastian
f5441f4c0b Sanitize cloud-init drives from the config 2023-03-22 15:22:36 +01:00
Bastian
fb5b3a6d09 Merge pull request #1 from lephisto/feature-move
Add --migrate feature: near-live migrate between clusters
2023-03-22 14:42:05 +01:00
Bastian
5bf37e886c Add --migrate feature: near-live migrate between clusters 2023-03-22 14:40:01 +01:00
Bastian
010f04c412 make --vmids=num vorking with --prefixids, bump version 2022-12-06 14:20:28 +01:00
Bastian
13245fdf5e Add --jobname as mandatory parameter 2022-11-16 12:47:47 +01:00
Bastian
ae641a3927 Add colors to central logging function 2022-11-15 16:21:36 +01:00
2 changed files with 101 additions and 31 deletions

View File

@@ -142,6 +142,10 @@ The use case is that you might want to keep a cold-standby copy of a certain VM
Another usecase could be that you want to migrate a VM from one cluster to another with the least downtime possible. Real live migration that you are used to inside one cluster is hard to achive cross-cluster, but you can easily make an initial migration while the VM is still running on the source cluster (fully transferring the block devices), shut it down on source, run the mirror process again (which is much faster now because it only needs to transfer the diff since the initial snapshot) and start it up on the target cluster. This way the migration basically takes one boot plus a few seconds for transferring the incremental snapshot.
## Near-live Migration
To minimize downtime and achive a near-live Migration from one Cluster to another it's recommended to do an initial Sync of a VM from the source to the destination cluster. After that, run the job again, and add the --migrate switch. This causes the source VM to be shut down prior snapshot + transfer, and be restarted on the destination cluster as soon as the incremental transfer is complete. Using --migrate will always try to start the VM on the destination cluster.
## Things to check
From Proxmox VE Hosts you want to backup you need to be able to ssh passwordless to all other Cluster hosts, that may hold VM's or Containers. This goes for the source and for the destination Cluster.

126
crossover
View File

@@ -9,13 +9,14 @@ declare opt_influx_api_url=''
declare opt_influx_token=''
declare opt_influx_bucket=''
declare opt_influx_api_org=''
declare opt_influx_jobname=''
declare opt_influx_job_metrics='crossover_xmit'
declare opt_influx_summary_metrics='crossover_jobs'
# Cross Pool Migration and incremental replication Tool for Proxmox VMs using Ceph.
# Author: Bastian Mäuser <bma@netz.org>
declare -r VERSION=0.5
declare -r VERSION=0.7
declare -r NAME=$(basename "$0")
declare -r PROGNAME=${NAME%.*}
@@ -65,6 +66,7 @@ declare -i opt_keepslock=0
declare -i opt_keepdlock=0
declare -i opt_overwrite=0
declare -i opt_online=0
declare -i opt_migrate=0
declare opt_keep_local='0s'
declare opt_keep_remote='0s'
@@ -114,8 +116,10 @@ Options:
--influxurl Influx API url (e.g. --influxurl=https://your-influxserver.com/api/)
--influxtoken Influx API token with write permission
--influxbucket Influx Bucket to write to (e.g. --influxbucket=telegraf/autogen)
--jobname Descriptive name for the job, used in Statistics
Switches:
--online Allow online Copy
--migrate Stop VM on Source Cluster before final Transfer and restart on destination Cluster
--nolock Don't lock source VM on Transfer (mainly for test purposes)
--keep-slock Keep source VM locked on Transfer
--keep-dlock Keep VM locked after transfer on Destination
@@ -135,7 +139,7 @@ function parse_opts(){
local args
args=$(getopt \
--options '' \
--longoptions=vmid:,prefixid:,excludevmids:,destination:,pool:,keeplocal:,keepremote:,rewrite:,influxurl:,influxorg:,influxtoken:,influxbucket:,online,nolock,keep-slock,keep-dlock,overwrite,dry-run,debug \
--longoptions=vmid:,prefixid:,excludevmids:,destination:,pool:,keeplocal:,keepremote:,rewrite:,influxurl:,influxorg:,influxtoken:,influxbucket:,jobname:,online,migrate,nolock,keep-slock,keep-dlock,overwrite,dry-run,debug \
--name "$PROGNAME" \
-- "$@") \
|| end_process 128
@@ -156,8 +160,9 @@ function parse_opts(){
--influxorg) opt_influx_api_org=$2; shift 2;;
--influxtoken) opt_influx_token=$2; shift 2;;
--influxbucket) opt_influx_bucket=$2; shift 2;;
--online) opt_online=1; shift 2;;
--jobname) opt_influx_jobname=$2; shift 2;;
--online) opt_online=1; shift ;;
--migrate) opt_migrate=1; shift ;;
--dry-run) opt_dry_run=1; shift;;
--debug) opt_debug=1; shift;;
--nolock) opt_lock=0; shift;;
@@ -183,6 +188,8 @@ function parse_opts(){
[ -z "$opt_vm_ids" ] && { log info "VM id is not set."; end_process 1; }
[ -z "$opt_influx_jobname" ] && { log info "Jobname is not set."; end_process 1; }
if [ -n "$opt_keep_local" ]; then
if ! [[ ${opt_keep_local:(-1)} == "s" || ${opt_keep_local:(-1)} == "d" ]]; then
echo "--keeplocal: Parameter malformed. suffix s or d missing"
@@ -197,10 +204,16 @@ function parse_opts(){
fi
fi
if [ $opt_keepdlock -eq 1 ] && [ $opt_migrate -eq 1 ]; then
log error "--keepdlock/--migrate: Invalid parameter Combination: you can't keep the destination locked in near-live migration mode"
end_process 255
fi
if [ "$opt_vm_ids" = "all" ]; then
local all=''
local data=''
local cnt=''
local ids=''
all=$(get_vm_ids "$QEMU_CONF_CLUSTER/*$EXT_CONF" "$LXC_CONF_CLUSTER/*$EXT_CONF")
all=$(echo "$all" | tr ',' "\n")
@@ -213,8 +226,17 @@ function parse_opts(){
done
vm_ids=$(echo "$vm_ids" | tr ',' "\n")
else
vm_ids=$(echo "$opt_vm_ids" | tr ',' "\n")
if [ ! -z $opt_prefix_id ]; then
ids=$(echo "$opt_vm_ids" | tr ',' "\n")
for id in $ids; do
vm_ids=$(echo "$vm_ids$id:$opt_prefix_id$id,")
done
vm_ids=$(echo "$vm_ids" | tr ',' "\n")
else
vm_ids=$(echo "$opt_vm_ids" | tr ',' "\n")
fi
fi
log debug "vm_ids: $vm_ids"
}
human_readable() {
@@ -333,13 +355,15 @@ function log(){
;;
warn)
echo "WARNING: $message" 1>&2
echo -n $(echoyellow "WARNING: ")
echo $(echowhite "$message") 1>&2
echo -e "$message" >> "$LOG_FILE";
[ $opt_syslog -eq 1 ] && logger -t "$PROGNAME" -p daemon.warn "$message"
;;
error)
echo "ERROR: $message" 1>&2
echo -n $(echored "ERROR: ")
echo $(echowhite "$message") 1>&2
echo -e "$message" >> "$LOG_FILE";
[ $opt_syslog -eq 1 ] && logger -t "$PROGNAME" -p daemon.err "$message"
;;
@@ -409,6 +433,11 @@ function mirror() {
map_vmids_to_host
map_vmids_to_dsthost "$opt_destination"
if [ $(check_pool_exist "$opt_pool") -eq 0 ]; then
log error "Preflight check: Destination RBD-Pool $opt_pool does not exist."
end_process 255
fi
for vm_id in $svmids; do
(( vmcount++ ))
local file_config; file_config=$(get_config_file)
@@ -428,7 +457,7 @@ function mirror() {
if [ $host_on_destination ]; then
dststatus=$(ssh root@${dstpvnode[$dvmid]} qm status $dvmid|cut -d' ' -f 2)
if [ $dststatus == "running" ]; then
log error "Destination VM is running. bailing out"
log error "VM is running on Destination. bailing out"
end_process 255
fi
fi
@@ -449,21 +478,33 @@ function mirror() {
map_vmids_to_dsthost "$opt_destination"
fi
#--move so we need to shutdown and remove from ha group?
if [ $opt_migrate -eq 1 ]; then
log info "VM $vm_id - Migration requested, shutting down VM on ${pvnode[$vm_id]}"
if [ "$(get_ha_status "$vm_id")" == "started" ]; then
log info "VM $vm_id - remove from HA"
do_run "ha-manager remove $vm_id"
fi
do_run "ssh root@${pvnode[$vm_id]} qm shutdown $vm_id >/dev/null"
fi
#Lock on source + destination
if [ $opt_lock -eq 1 ]; then
do_run "ssh root@"${pvnode[$vm_id]}" qm set "$vm_id" --lock backup" >/dev/null
log info "VM $vm_id - locked $vm_id [rc:$?]"
log info "VM $vm_id - locked $vm_id [rc:$?] on source"
do_run "ssh root@"${dstpvnode[$dvmid]}" qm set "$dvmid" --lock backup" >/dev/null
log info "VM $dvmid - locked $dvmid [rc:$?]"
log info "VM $dvmid - locked $dvmid [rc:$?] on destination"
fi
vm_freeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null
freezerc=$?
if [ $freezerc -gt 0 ]; then
log error "VM $vm_id - QEMU-Guest could not fsfreeze on guest."
(( perf_freeze_failed++ ))
else
(( perf_freeze_ok++ ))
#Freeze fs only if no migration running
if [ $opt_migrate -eq 0 ]; then
vm_freeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null
freezerc=$?
if [ $freezerc -gt 0 ]; then
log error "VM $vm_id - QEMU-Guest could not fsfreeze on guest."
(( perf_freeze_failed++ ))
else
(( perf_freeze_ok++ ))
fi
fi
for disk in $(get_disks_from_config "$file_config"); do
src_image_spec=$(get_image_spec "$disk")
@@ -476,16 +517,17 @@ function mirror() {
(( perf_ss_ok++ ))
fi
done
vm_unfreeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null
unfreezerc=$?
if [ $unfreezerc -gt 0 ]; then
log error "VM $vm_id - QEMU-Guest could not fsunfreeze on guest."
if [ $opt_migrate -eq 0 ]; then
vm_unfreeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null
unfreezerc=$?
if [ $unfreezerc -gt 0 ]; then
log error "VM $vm_id - QEMU-Guest could not fsunfreeze on guest."
fi
if [ ! $opt_keepslock -eq 1 ]; then
do_run "ssh root@${pvnode[$vm_id]} qm unlock $vm_id" >/dev/null
log info "VM $vm_id - unlocked source VM $vm_id [rc:$?]"
fi
fi
if [ ! $opt_keepslock -eq 1 ]; then
do_run "ssh root@${pvnode[$vm_id]} qm unlock $vm_id" >/dev/null
log info "VM $vm_id - unlocked source VM $vm_id [rc:$?]"
fi
for disk in $(get_disks_from_config "$file_config"); do
(( diskcount++ ))
log debug "VMID: $vm_id Disk: $disk DESTVMID: $dvmid"
@@ -573,7 +615,7 @@ function mirror() {
log info "VM $vm_id - Disk Summary: Took $(( enddisk - startdisk )) Seconds to transfer $(human_readable "$perf_bytes_total" 2) in a $xmittype run"
if [ -n "$opt_influx_api_url" ]; then
log info "VM $vm_id - Logging to InfluxDB: $opt_influx_api_url"
influxlp="$opt_influx_job_metrics,vmname=$vmname,destination=$opt_destination,srcimage=$src_image_name,dstimage=$dst_image_name,xmittype=$xmittype bytescalculated=$snapshotsize""i,bytesonwire=$perf_bytes_total""i,xmitrc=$xmitrc""i,freezerc=$freezerc""i,unfreezerc=$unfreezerc""i,basets=$basets""i"
influxlp="$opt_influx_job_metrics,vmname=$vmname,jobname=$opt_influx_jobname,destination=$opt_destination,srcimage=$src_image_name,dstimage=$dst_image_name,xmittype=$xmittype bytescalculated=$snapshotsize""i,bytesonwire=$perf_bytes_total""i,xmitrc=$xmitrc""i,freezerc=$freezerc""i,unfreezerc=$unfreezerc""i,basets=$basets""i"
log debug "InfluxLP: --->\n $influxlp"
cmd="curl --request POST \"$opt_influx_api_url/v2/write?org=$opt_influx_api_org&bucket=$opt_influx_bucket&precision=ns\" --header \"Authorization: Token $opt_influx_token\" --header \"Content-Type: text/plain; charset=utf-8\" --header \"Accept: application/json\" --data-binary '$influxlp'"
do_run "$cmd"
@@ -584,6 +626,12 @@ function mirror() {
ssh root@${dstpvnode[$dvmid]} qm unlock $dvmid
log info "VM $dvmid - Unlocking destination VM $dvmid"
fi
#--migrate so start on destination?
if [ $opt_migrate -eq 1 ]; then
log info "VM $dvmid - Starting VM on ${pvnode[$vm_id]}"
do_run "ssh root@"${dstpvnode[$dvmid]}" qm start "$dvmid >/dev/null
fi
done
endjob=$(date +%s)
log info "Finnished mirror $(date "+%F %T")"
@@ -601,7 +649,7 @@ function mirror() {
log info "Differential Bytes .......: $(human_readable $perf_bytes_diff)"
if [ -n "$opt_influx_api_url" ]; then
log info "VM $vm_id - Logging Job summary to InfluxDB: $opt_influx_api_url"
influxlp="$opt_influx_summary_metrics perf_bytes_diff=$perf_bytes_diff""i,perf_bytes_full=$perf_bytes_full""i,perf_bytes_total=$perf_bytes_total""i,perf_diff_failed=$perf_diff_failed""i,perf_diff_ok=$perf_diff_ok""i,perf_freeze_failed=$perf_freeze_failed""i,perf_freeze_ok=$perf_freeze_ok""i,perf_full_failed=$perf_full_failed""i,perf_full_ok=$perf_full_ok""i,perf_ss_failed=$perf_ss_failed""i,perf_ss_ok=$perf_ss_ok""i,perf_vm_running=$perf_vm_running""i,perf_vm_stopped=$perf_vm_stopped""i"
influxlp="$opt_influx_summary_metrics,jobname=$opt_influx_jobname perf_bytes_diff=$perf_bytes_diff""i,perf_bytes_full=$perf_bytes_full""i,perf_bytes_total=$perf_bytes_total""i,perf_diff_failed=$perf_diff_failed""i,perf_diff_ok=$perf_diff_ok""i,perf_freeze_failed=$perf_freeze_failed""i,perf_freeze_ok=$perf_freeze_ok""i,perf_full_failed=$perf_full_failed""i,perf_full_ok=$perf_full_ok""i,perf_ss_failed=$perf_ss_failed""i,perf_ss_ok=$perf_ss_ok""i,perf_vm_running=$perf_vm_running""i,perf_vm_stopped=$perf_vm_stopped""i"
log debug "InfluxLP: --->\n $influxlp"
cmd="curl --request POST \"$opt_influx_api_url/v2/write?org=$opt_influx_api_org&bucket=$opt_influx_bucket&precision=ns\" --header \"Authorization: Token $opt_influx_token\" --header \"Content-Type: text/plain; charset=utf-8\" --header \"Accept: application/json\" --data-binary '$influxlp'"
do_run "$cmd"
@@ -703,7 +751,7 @@ function rewriteconfig(){
else
sedcmd='sed -e /^$/,$d'
fi
cat "$oldconfig" | sed -r -e "s/^(virtio|ide|scsi|sata|mp)([0-9]+):\s([a-zA-Z0-9]+):(.*)-([0-9]+)-disk-([0-9]+).*,(.*)$/\1\2: $newpool:\4-$newvmid-disk-\6-\3,\7/g" | $sedcmd | sed -e '/^$/,$d' | grep -v "^parent:\s.*$" | ssh "$dst" "cat - >$newconfig"
cat "$oldconfig" | sed -r -e "s/^(virtio|ide|scsi|sata|mp)([0-9]+):\s([a-zA-Z0-9]+):(.*)-([0-9]+)-disk-([0-9]+).*,(.*)$/\1\2: $newpool:\4-$newvmid-disk-\6-\3,\7/g" | $sedcmd | sed -e '/^$/,$d' | sed -e '/ide[0-9]:.*-cloudinit,media=cdrom.*/d' | grep -v "^parent:\s.*$" | ssh "$dst" "cat - >$newconfig"
}
function checkvmid(){
@@ -767,6 +815,24 @@ function get_image_spec(){
echo "$image_spec"
}
function get_ha_status() {
local havmid="$1"
ha_status=$(ha-manager status| grep vm:"$havmid" | cut -d " " -f 4| sed 's/.$//')
echo "$ha_status"
}
function check_pool_exist() {
local poolname="$1"
local -i exists=255
pool_status=$(ssh $opt_destination pvesm status|grep rbd|cut -d " " -f 1|grep $poolname)
if [ "$pool_status" == "$poolname" ]; then
exists=1
else
exists=0
fi
echo $exists
}
function main(){
[ $# = 0 ] && usage;