Files
crossover/crossover
2024-10-15 12:53:00 +02:00

939 lines
36 KiB
Bash
Executable File

#!/bin/bash
# Cross Pool Migration and incremental replication Tool for Proxmox VMs using Ceph.
# Author: Bastian Mäuser <bma@netz.org>
LC_ALL="en_US.UTF-8"
source rainbow.sh
# Predefine if you want
declare opt_influx_api_url=''
declare opt_influx_token=''
declare opt_influx_bucket=''
declare opt_influx_api_org=''
declare opt_influx_jobname=''
declare opt_influx_job_metrics='crossover_xmit'
declare opt_influx_summary_metrics='crossover_jobs'
name=$(basename "$0")
# readonly variables
declare -r NAME=$name
declare -r VERSION=0.9
declare -r PROGNAME=${NAME%.*}
declare -r PVE_DIR="/etc/pve"
declare -r PVE_NODES="$PVE_DIR/nodes"
declare -r QEMU='qemu-server'
declare -r QEMU_CONF_CLUSTER="$PVE_NODES/*/$QEMU"
declare -r EXT_CONF='.conf'
declare -r PVFORMAT_FULL='e:%t r:%e c:%r a:%a %b %p'
declare -r PVFORMAT_SNAP='e:%t c:%r a:%a %b'
logfile=$(mktemp)
declare -r LOG_FILE=$logfile
# associative global arrays
declare -A -g pvnode
declare -A -g dstpvnode
declare -A -g svmids
declare -A -g dvmids
declare -g scluster
declare -g dcluster
declare -g scephversion
declare -g dcephversion
# global integers
declare -g -i perf_freeze_ok=0
declare -g -i perf_freeze_failed=0
declare -g -i perf_ss_ok=0
declare -g -i perf_ss_failed=0
declare -g -i perf_ss_ok=0
declare -g -i perf_ss_failed=0
declare -g -i perf_full_ok=0
declare -g -i perf_full_failed=0
declare -g -i perf_diff_ok=0
declare -g -i perf_diff_failed=0
declare -g -i perf_bytes_full=0
declare -g -i perf_bytes_diff=0
declare -g -i perf_bytes_total=0
declare -g -i perf_vm_running=0
declare -g -i perf_vm_stopped=0
declare -g -i perf_snaps_removed=0
declare -g -i perf_vm_total=0
declare -g -i perf_vm_ok=0
# commandline parameters
declare opt_destination
declare opt_vm_ids=''
declare opt_snapshot_prefix='mirror-'
declare opt_rewrite=''
declare opt_pool='rbd'
declare opt_sshcipher='aes128-gcm@openssh.com,aes128-cbc'
declare opt_tag=''
declare -i opt_prefix_id
declare opt_exclude_vmids=''
declare -i opt_debug=0
declare -i opt_dry_run=0
declare -i opt_syslog=0
declare -i opt_lock=1
declare -i opt_keepslock=0
declare -i opt_keepdlock=0
declare -i opt_overwrite=0
declare -i opt_online=0
declare -i opt_migrate=0
declare -i opt_noconfirm=0
declare opt_keep_local='0s'
declare opt_keep_remote='0s'
declare -r redstconf='^\/etc\/pve\/nodes\/(.*)\/qemu-server\/([0-9]+).conf$'
declare -r recephimg='([a-zA-Z0-9]+)\:(.*)'
declare -r restripsnapshots='/^$/,$d'
declare -r redateex='^([0-9]{4})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})$'
declare -r restripansicolor='s/\x1b\[[0-9;]*m//g'
function usage(){
shift
if [ "$1" != "--no-logo" ]; then
cat << EOF
_____
| |___ ___ ___ ___ ___ _ _ ___ ___
| --| _| . |_ -|_ -| . | | | -_| _|
|_____|_| |___|___|___|___|\_/|___|_| v$VERSION
EOF
fi
cat << EOF
Mirror tool Proxmox VMs on Ceph storages
Usage:
$PROGNAME <COMMAND> [ARGS] [OPTIONS]
$PROGNAME help
$PROGNAME version
$PROGNAME mirror --vmid=<string> --destination=<destionationhost> --pool=<targetpool> --keeplocal=n --keepremote=n
Commands:
version Show version program
help Show help program
mirror Replicate a stopped VM to another Cluster (full clone)
Options:
--sshcipher SSH Cipher to use for transfer (default: aes128-gcm@openssh.com,aes128-cbc)
--tag Include all VMs with a specific tag set in the Proxmox UI (if set, implies vmid=all)
--vmid The source+target ID of the VM/CT, comma separated (eg. --vmid=100:100,101:101), or all for all
--prefixid Prefix for VMID's on target System [optional]
--excludevmids Exclusde VM IDs when using --vmid==all
--destination Target PVE Host in target pool. e.g. --destination=pve04
--pool Ceph pool name in target pool. e.g. --pool=data [default=rbd]
--keeplocal How many additional Snapshots to keep locally. e.g. --keeplocal=2d
--keepremote How many additional Snapshots to keep remote. e.g. --keepremote=7d
--rewrite PCRE Regex to rewrite the Config Files (eg. --rewrite='s/(net0:)(.*)tag=([0-9]+)/\1\2tag=1/g' would
change the VLAN tag from 5 to 1 for net0.
--influxurl Influx API url (e.g. --influxurl=https://your-influxserver.com/api/)
--influxtoken Influx API token with write permission
--influxbucket Influx Bucket to write to (e.g. --influxbucket=telegraf/autogen)
--jobname Descriptive name for the job, used in Statistics
--mail Mail address to send report to, comma-seperated (e.g. --mail=admin@test.com,admin2@test.com)
Switches:
--online Allow online Copy
--migrate Stop VM on Source Cluster before final Transfer and start on destination Cluster
--nolock Don't lock source VM on Transfer (mainly for test purposes)
--keep-slock Keep source VM locked on Transfer
--keep-dlock Keep VM locked after transfer on Destination
--overwrite Overwrite Destination
--noconfirm Don't ask for confirmation before starting --migrate mode (use with care!)
--debug Show Debug Output
Report bugs to <mephisto@mephis.to>
EOF
}
function parse_opts(){
# local action=$1
shift
local args
args=$(getopt \
--options '' \
--longoptions=sshcipher:,tag:,vmid:,prefixid:,excludevmids:,destination:,pool:,keeplocal:,keepremote:,rewrite:,influxurl:,influxorg:,influxtoken:,influxbucket:,jobname:,mail:,online,migrate,nolock,keep-slock,keep-dlock,overwrite,dry-run,noconfirm,debug,syslog \
--name "$PROGNAME" \
-- "$@") \
|| end_process 128
eval set -- "$args"
while true; do
case "$1" in
--sshcipher) opt_sshcipher=$2; shift 2;;
--tag) opt_tag=$2; shift 2;;
--vmid) opt_vm_ids=$2; shift 2;;
--prefixid) opt_prefix_id=$2; shift 2;;
--excludevmids) opt_exclude_vmids=$2; shift 2;;
--destination) opt_destination=$2; shift 2;;
--pool) opt_pool=$2; shift 2;;
--keeplocal) opt_keep_local=$2; shift 2;;
--keepremote) opt_keep_remote=$2; shift 2;;
--rewrite) opt_rewrite=$2; shift 2;;
--influxurl) opt_influx_api_url=$2; shift 2;;
--influxorg) opt_influx_api_org=$2; shift 2;;
--influxtoken) opt_influx_token=$2; shift 2;;
--influxbucket) opt_influx_bucket=$2; shift 2;;
--jobname) opt_influx_jobname=$2; shift 2;;
--mail) opt_addr_mail="$2"; shift 2;;
--online) opt_online=1; shift ;;
--migrate) opt_migrate=1; shift ;;
--dry-run) opt_dry_run=1; shift;;
--noconfirm) opt_noconfirm=1; shift;;
--debug) opt_debug=1; shift;;
--nolock) opt_lock=0; shift;;
--keep-slock) opt_keepslock=1; shift;;
--keep-dlock) opt_keepdlock=1; shift;;
--overwrite) opt_overwrite=1; shift;;
--syslog) opt_syslog=1; shift;;
--) shift; break;;
*) break;;
esac
done
if [ $opt_debug -eq 1 ]; then
log info "============================================"
log info "Proxmox Crosspool Migration: $VERSION";
log info "pid: $(cat /var/run/"$PROGNAME".pid)"
log info "============================================"
log info "Proxmox VE Version:"
echowhite "$(pveversion)"
log info "============================================"
fi
[ -z "$opt_influx_jobname" ] && { log info "Jobname is not set."; end_process 1; }
if [ -n "$opt_keep_local" ]; then
if ! [[ ${opt_keep_local:(-1)} == "s" || ${opt_keep_local:(-1)} == "d" ]]; then
echo "--keeplocal: Parameter malformed. suffix s or d missing"
end_process 255
fi
fi
if [ -n "$opt_keep_remote" ]; then
if ! [[ ${opt_keep_remote:(-1)} == "s" || ${opt_keep_remote:(-1)} == "d" ]]; then
echo "--keepremote: Parameter malformed. suffix s or d missing"
end_process 255
fi
fi
if [ $opt_keepdlock -eq 1 ] && [ $opt_migrate -eq 1 ]; then
log error "--keepdlock/--migrate: Invalid parameter Combination: you can't keep the destination locked in near-live migration mode"
end_process 255
fi
if [ -n "$opt_tag" ] && [ -n "$opt_vm_ids" ] && [ "$opt_vm_ids" != "all" ]; then
log error "You can't use --tag and --vmid at the same time"
end_process 255
fi
[ -n "$opt_tag" ] && [ -z $opt_vm_ids ] && opt_vm_ids="all"
[ -z "$opt_vm_ids" ] && { log info "VM id is not set."; end_process 1; }
if [ "$opt_vm_ids" = "all" ]; then
local all=''
local data=''
local cnt=''
local ids=''
all=$(get_vm_ids "$QEMU_CONF_CLUSTER/*$EXT_CONF" "$LXC_CONF_CLUSTER/*$EXT_CONF")
log debug "all: $all"
all=$(echo "$all" | tr ',' "\n")
opt_exclude_vmids=$(echo "$opt_exclude_vmids" | tr ',' "\n")
for id in $all; do
cnt=$(echo $opt_exclude_vmids | grep -o $id|wc -w)
if [ $cnt == 0 ]; then
vm_ids=$(echo "$vm_ids$id:$opt_prefix_id$id,")
fi
done
vm_ids=$(echo "$vm_ids" | tr ',' "\n")
else
if [ ! -z $opt_prefix_id ]; then
ids=$(echo "$opt_vm_ids" | tr ',' "\n")
for id in $ids; do
vm_ids=$(echo "$vm_ids$id:$opt_prefix_id$id,")
done
vm_ids=$(echo "$vm_ids" | tr ',' "\n")
else
vm_ids=$(echo "$opt_vm_ids" | tr ',' "\n")
fi
fi
}
human_readable() {
b=${1:-0}; d=''; s=0; S=(Bytes {K,M,G,T,P,E,Z,Y}iB)
while ((b > 1024)); do
d="$(printf ".%02d" $((b % 1024 * 100 / 1024)))"
b=$((b / 1024))
let s++
done
echo "$b$d ${S[$s]}"
}
function map_vmids_to_host(){
for node in $(/usr/bin/pvecm nodes | tail +5 | tr -s ' ' | cut -d' ' -f 4)
do
for vm in $(ssh root@$node qm list|tail +2|tr -s ' '|cut -f 2 -d' ')
do
pvnode[$vm]=$node
done
done
}
function map_vmids_to_dsthost(){
for node in $(ssh $1 /usr/bin/pvecm nodes | tail +5 | tr -s ' ' | cut -d' ' -f 4)
do
for vm in $(ssh root@$node qm list|tail +2|tr -s ' '|cut -f 2 -d' ')
do
dstpvnode[$vm]=$node
done
done
}
function exist_file(){
local file=''
for file in $1; do
[ -e "$file" ] && return 0 || return 1
break
done
}
function lookupcephpool() {
pvehost=$1
pvepoolname=$2
res=$(ssh $pvehost cat /etc/pve/storage.cfg | sed -n "/rbd: $pvepoolname/,/^$/p" | grep -E "\s+pool\s" | cut -d " " -f 2)
echo $res
}
function lookupdatapool() {
pvehost=$1
pvepoolname=$2
res=$(ssh $pvehost cat /etc/pve/storage.cfg | sed -n "/rbd: $pvepoolname/,/^$/p" | grep -E "\s+data-pool\s" | cut -d " " -f 2)
echo $res
}
function get_vm_ids(){
local data=''
local conf=''
while [ $# -gt 0 ]; do
for conf in $1; do
[ ! -e "$conf" ] && break
if [ -n "$opt_tag" ] && ! grep -qE "^tags:\s.*$opt_tag(;|$)" $conf; then
continue
fi
conf=$(basename "$conf")
[ "$data" != '' ] && data="$data,"
data="$data${conf%.*}"
done
shift
done
echo "$data"
}
function get_disks_from_config(){
local disks;
local file_config=$1
#disks available for vm/ct
#exclude no backup
#read current config
disks=$(while read -r line; do
[[ "$line" == "" ]] && break
echo "$line"
done < "$file_config" | \
grep -P '^(?:((?:efidisk|virtio|ide|scsi|sata|mp|tpmstate)\d+)|rootfs): ' | \
grep -v -P 'cdrom|none' | \
grep -v -P 'backup=0' | \
awk '{ split($0,a,","); split(a[1],b," "); print b[2]}')
echo "$disks"
}
function log(){
local level=$1
shift 1
local message=$*
local syslog_msg=''
case $level in
debug)
if [ $opt_debug -eq 1 ]; then
echo -e "$(date "+%F %T") DEBUG: $message";
echo -e "$(date "+%F %T") DEBUG: $message" >> "$LOG_FILE";
fi
;;
info)
echo -e "$message";
echo -e "$message" | sed -e 's/\x1b\[[0-9;]*m//g' >> "$LOG_FILE";
syslog_msg=$(echo -e "$message" | sed -e ${restripansicolor})
[ $opt_syslog -eq 1 ] && logger -t "$PROGNAME" "$syslog_msg"
;;
warn)
echo -n "$(echoyellow 'WARNING: ')"
echowhite "$message" 1>&2
echo -e "$message" | sed -e ${restripansicolor} >> "$LOG_FILE";
syslog_msg=$(echo -e "$message" | sed -e ${restripansicolor})
[ $opt_syslog -eq 1 ] && logger -t "$PROGNAME" -p daemon.warn "$syslog_msg"
;;
error)
echo -n "$(echored 'ERROR: ')"
echowhite "$message" 1>&2
echo -e "$message" | sed -e ${restripansicolor} >> "$LOG_FILE";
syslog_msg=$(echo -e "$message" | sed -e ${restripansicolor})
[ $opt_syslog -eq 1 ] && logger -t "$PROGNAME" -p daemon.err "$syslog_msg"
;;
*)
echo "$message" 1>&2
echo -e "$message" | sed -e ${restripansicolor} >> "$LOG_FILE";
syslog_msg=$(echo -e "$message" | sed -e ${restripansicolor})
[ $opt_syslog -eq 1 ] && logger -t "$PROGNAME" "$syslog_msg"
;;
esac
}
function map_source_to_destination_vmid(){
for vm_id_pair in $vm_ids; do
svid=$(echo "$vm_id_pair" | sed -r -e "s/^([0-9]+):([0-9]+)$/\1\n/")
dvid=$(echo "$vm_id_pair" | sed -r -e "s/^([0-9]+):([0-9]+)$/\2\n/")
svmids="$svmids"$'\n'"$svid"
dvmids[$svid]=$dvid
done
}
function mirror() {
local -i rc=0;
parse_opts "$@"
local timestamp; timestamp=$(date +%Y%m%d%H%M%S)
local xmittype
local humantime
local vmname
local -i xmitrc
local -i ssrc
local -i freezerc
local -i unfreezerc
local -i startdisk
local -i enddisk
local -i startjob
local -i endjob
local -i vmcount=0
local -i diskcount=0
local -i startdowntime
local -i enddowntime
local -i ga_ping
local disp_perf_freeze_failed
local disp_perf_ss_failed
local disp_perf_full_failed
local disp_perf_diff_failed
log info "ACTION: $(echowhite Onlinemirror)"
log info "Start mirror $(date "+%F %T")"
startjob=$(date +%s)
get_ceph_version
log info "Local Ceph Version: $scephversion, Remote Ceph Version: $dcephversion"
#create pid file
local pid_file="/var/run/$PROGNAME.pid"
if [[ -e "$pid_file" ]]; then
local pid; pid=$(cat "${pid_file}")
if ps -p "$pid" > /dev/null 2>&1; then
log error "Process already running with pid ${pid}"
end_process 1
fi
fi
if ! echo $$ > "$pid_file"; then
log error "Could not create PID file $pid_file"
end_process 1
fi
scluster=$(grep cluster_name /etc/pve/corosync.conf | cut -d " " -f 4)
dcluster=$(ssh "$opt_destination" grep cluster_name /etc/pve/corosync.conf | cut -d " " -f 4)
if [ $opt_migrate -eq 1 ] && [ $opt_noconfirm -eq 0 ]; then
echo "VM(s) $opt_vm_ids will subsequently be shutdown on [$scluster] and started on [$dcluster]"
read -p "Do you want to proceed? (yes/no) " yn
case $yn in
yes ) echo ok, we will proceed;;
no ) echo exiting...;
exit;;
* ) echo invalid response;
exit 1;;
esac
fi
map_source_to_destination_vmid
map_vmids_to_host
map_vmids_to_dsthost "$opt_destination"
if [ "$(check_pool_exist "$opt_pool")" -eq 0 ]; then
log error "Preflight check: Destination RBD-Pool $opt_pool does not exist."
end_process 255
fi
for vm_id in $svmids; do
file_config="$PVE_NODES/${pvnode[$vm_id]}/$QEMU/$vm_id.conf"
if ! exist_file "$file_config"; then
log error "VM $vm_id - Preflight check: VM $vm_id does not exist on source cluster [$scluster] - skipping to next VM."
continue
fi
ga_ping=$(gaping "$vm_id")
log debug "ga_ping: $ga_ping"
if [ "$ga_ping" -eq 255 ] ; then #vm running but no qemu-guest-agent answering
log error "VM $vm_id - Preflight check: VM $vm_id on source cluster [$scluster] has no qemu-guest-agent running - skipping to next VM."
continue
fi
(( vmcount++ ))
local disk=''
dvmid=${dvmids[$vm_id]}
vmname=$(cat $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf | sed -e ''$restripsnapshots'' | grep "name\:" | cut -d' ' -f 2)
log info "VM $vm_id - Starting mirror for $(echowhite "$vmname")"
srcvmgenid=$(cat $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf | sed -e ''$restripsnapshots'' | grep vmgenid | sed -r -e 's/^vmgenid:\s(.*)/\1/')
dstvmgenid=$(ssh "$opt_destination" cat $PVE_NODES/"${dstpvnode[$dvmid]}"/$QEMU/"$dvmid".conf 2>/dev/null | grep vmgenid | sed -e ''$restripsnapshots'' | sed -r -e 's/^vmgenid:\s(.*)/\1/')
log info "VM $vm_id - Checking for VM $dvmid on destination cluster $opt_destination $QEMU_CONF_CLUSTER"
log debug "DVMID:$dvmid srcvmgenid:$srcvmgenid dstvmgenid:$dstvmgenid"
conf_on_destination=$(ssh "$opt_destination" "ls -d $QEMU_CONF_CLUSTER/$dvmid$EXT_CONF 2>/dev/null")
[[ "$conf_on_destination" =~ $redstconf ]]
host_on_destination=${BASH_REMATCH[1]}
if [ $host_on_destination ]; then
dststatus=$(ssh root@${dstpvnode[$dvmid]} qm status $dvmid|cut -d' ' -f 2)
if [ $dststatus == "running" ]; then
log error "VM is running on Destination Cluster [$dcluster]. bailing out"
end_process 255
fi
fi
#Check if source VM is running
srcstatus=$(ssh root@${pvnode[$vm_id]} qm status $vm_id|cut -d' ' -f 2)
if [ $srcstatus == "running" ] && [ $opt_online -eq 0 ]; then
log error "Source VM is running .. exiting"
end_process 1
fi
if [ ! "$host_on_destination" ] || [ $opt_overwrite -eq 1 ]; then
if [ "$host_on_destination" ] && [ $srcvmgenid != $dstvmgenid ]; then
log error "Source VM genid ($srcvmgenid) doesn't match destination VM genid ($dstvmgenid). This should not happen. Bailing out.."
end_process 255
fi
log info "VM $vm_id - Transmitting Config for VM $vm_id to destination $opt_destination VMID $dvmid"
rewriteconfig $PVE_NODES/"${pvnode[$vm_id]}"/$QEMU/"$vm_id".conf $opt_destination "$opt_pool" $PVE_NODES/"$opt_destination"/$QEMU/"$dvmid".conf "$dvmid"
map_vmids_to_dsthost "$opt_destination"
fi
#--move so we need to shutdown and remove from ha group?
if [ $opt_migrate -eq 1 ]; then
log info "VM $vm_id - Migration requested, shutting down VM on ${pvnode[$vm_id]}"
if [ "$(get_ha_status "$vm_id")" == "started" ]; then
log info "VM $vm_id - remove from HA"
do_run "ha-manager remove $vm_id"
fi
do_run "ssh root@${pvnode[$vm_id]} qm shutdown $vm_id >/dev/null"
startdowntime=$(date +%s)
fi
#Lock on source + destination
if [ $opt_lock -eq 1 ]; then
do_run "ssh root@""${pvnode[$vm_id]}"" qm set ""$vm_id"" --lock backup" >/dev/null
log info "VM $vm_id - locked $vm_id [rc:$?] on source"
do_run "ssh root@""${dstpvnode[$dvmid]}"" qm set ""$dvmid"" --lock backup" >/dev/null
log info "VM $dvmid - locked $dvmid [rc:$?] on destination"
fi
#Freeze fs only if no migration running and qemu-guest-agent okay.
if [ $opt_migrate -eq 0 ] && [ $ga_ping -eq 0 ]; then
vm_freeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null
freezerc=$?
if [ $freezerc -gt 0 ]; then
log warn "VM $vm_id - QEMU-Guest could not fsfreeze on guest."
(( perf_freeze_failed++ ))
else
(( perf_freeze_ok++ ))
fi
fi
for disk in $(get_disks_from_config "$file_config"); do
src_image_spec=$(get_image_spec "$disk")
create_snapshot "$src_image_spec@$opt_snapshot_prefix$timestamp" 2>/dev/null
ssrc=$?
if [ $ssrc -gt 0 ]; then
log warn "VM $vm_id - rbd snap failed."
(( perf_ss_failed++ ))
else
(( perf_ss_ok++ ))
fi
done
if [ $opt_migrate -eq 0 ]; then
vm_unfreeze "$vm_id" "${pvnode[$vm_id]}" >/dev/null
unfreezerc=$?
if [ $unfreezerc -gt 0 ]; then
log error "VM $vm_id - QEMU-Guest could not fsunfreeze on guest."
fi
if [ ! $opt_keepslock -eq 1 ]; then
do_run "ssh root@${pvnode[$vm_id]} qm unlock $vm_id" >/dev/null
log info "VM $vm_id - unlocked source VM $vm_id [rc:$?]"
fi
fi
for disk in $(get_disks_from_config "$file_config"); do
(( diskcount++ ))
log debug "VMID: $vm_id Disk: $disk DESTVMID: $dvmid"
src_image_spec=$(get_image_spec "$disk")
[ -z "$src_image_spec" ] && continue
dst_image_spec=$(echo $src_image_spec | sed -r -e "s/(.*\/[a-zA-Z0-9]+\-)([0-9]+)(\-[a-zA-Z0-9]+\-[0-9]+)/\1$dvmid\3/")
[ -z "$dst_image_spec" ] && continue
[[ $disk =~ $recephimg ]]
src_image_pool_pve=${BASH_REMATCH[1]}
src_image_pool=$(lookupcephpool "localhost" ${BASH_REMATCH[1]})
src_image_name=${BASH_REMATCH[2]}
[[ $dst_image_spec =~ ^.*\/(.*)$ ]]
dst_image_name=${BASH_REMATCH[1]}-$src_image_pool_pve
dst_image_pool=$(lookupcephpool $opt_destination $opt_pool)
dst_data_pool=$(lookupdatapool $opt_destination $opt_pool)
if [ -n "$dst_data_pool" ]; then
dst_data_opt="--data-pool $dst_data_pool"
fi
snapshot_name="@$opt_snapshot_prefix$timestamp"
localsnapcount=$(rbd ls -l $src_image_pool | grep $src_image_name@$opt_snapshot_prefix | cut -d ' ' -f 1|wc -l)
if [ $localsnapcount -ge 2 ]; then
# we have at least 2 local snapshots, to we can make an incremental copy
currentlocal=$(rbd ls -l $src_image_pool | grep $src_image_name@$opt_snapshot_prefix | cut -d ' ' -f 1|tail -n 1)
localts=$(rbd ls -l $src_image_pool | grep $src_image_name@$opt_snapshot_prefix | cut -d ' ' -f 1 | sed -r -e 's/.*@'$opt_snapshot_prefix'(.*)/\1/')
fi
latestremote=$(ssh $opt_destination rbd ls -l $dst_image_pool | grep $dst_image_name@$opt_snapshot_prefix | cut -d ' ' -f 1|tail -n 1)
if [ "$latestremote" ]; then
[[ $latestremote =~ ^.*@$opt_snapshot_prefix([0-9]+)$ ]]
latestremotets=${BASH_REMATCH[1]}
for ts in $localts; do
if [ "$ts" == "$latestremotets" ]; then
basets=$ts
fi
done
fi
if [ -z "$basets" ]; then
xmittype='full'
log debug "No matching Snapshot found on destination - Full Copy $src_image_pool/$src_image_name$snapshot_name to $dst_image_pool/$dst_image_name"
#snapts=$(echo $currentlocal | sed -r -e 's/.*@mirror-(.*)/\1/')
snapshotsize=$(rbd du --pretty-format --format json $src_image_pool/$src_image_name|jq '.images[] | select (.snapshot_id == null) | {provisioned_size}.provisioned_size'|tail -1)
log debug "snapsize: $snapshotsize "
xmitjob="rbd export --rbd-concurrent-management-ops 8 $src_image_pool/$src_image_name$snapshot_name --no-progress - | tee >({ wc -c; } >/tmp/$PROGNAME.$pid.$dst_image_pool-$dst_image_name.size) | pv -s $snapshotsize -F \"VM $vm_id - F $src_image_pool/$src_image_name$snapshot_name: $PVFORMAT_FULL\" | ssh -c $opt_sshcipher $opt_destination rbd import --image-format 2 - $dst_image_pool/$dst_image_name $dst_data_opt 2>/dev/null"
# create initial snapshot on destination
log debug "xmitjob: $xmitjob"
startdisk=$(date +%s)
do_run "$xmitjob"
enddisk=$(date +%s)
xmitrc=$?
if [ ! $xmitrc ]; then
log error "Transmitting Image failed"
(( perf_full_failed++ ))
return 1
else
(( perf_full_ok++ ))
fi
cmd="ssh $opt_destination rbd snap create $dst_image_pool/$dst_image_name$snapshot_name"
do_run "$cmd" 2>/dev/null
log info "VM $vm_id - created snapshot on $dvmid [rc:$?]"
perf_bytes_full=$(( perf_bytes_full + $(cat /tmp/"$PROGNAME"."$pid"."$dst_image_pool"-"$dst_image_name".size) ))
else
xmittype='incremental'
log debug "Basecopy + snapshot on destination - let's just transfer the diff"
log debug "sizer: rbd diff $src_image_pool/$currentlocal --from-snap $opt_snapshot_prefix$basets|gawk --bignum '{ SUM += \$2 } END { print SUM }'"
snapshotsize=$(rbd diff $src_image_pool/$currentlocal --from-snap $opt_snapshot_prefix$basets|gawk --bignum '{ SUM += $2 } END { print SUM }')
log debug "snapshotsize: $snapshotsize"
if [ -z "$snapshotsize" ]; then
#disk was not attached, or really nothing has changed..
snapshotsize=0
fi
xmitjob="rbd export-diff --no-progress --from-snap $opt_snapshot_prefix$basets $src_image_pool/$currentlocal - | tee >({ wc -c; } >/tmp/$PROGNAME.$pid.$dst_image_pool-$dst_image_name.size) | pv -F \"VM $vm_id - I $src_image_pool/$src_image_name$snapshot_name: $PVFORMAT_SNAP\" | ssh -c $opt_sshcipher $opt_destination rbd import-diff --no-progress - $dst_image_pool/$dst_image_name"
log debug "xmitjob: $xmitjob"
startdisk=$(date +%s)
do_run "$xmitjob"
enddisk=$(date +%s)
xmitrc=$?
if [ ! $xmitrc ]; then
log error "Transmitting Image failed"
(( perf_diff_failed++ ))
return 1
else
(( perf_diff_ok++ ))
fi
do_housekeeping "localhost" "$src_image_pool" "$src_image_name" "$opt_keep_local" "$vm_id"
do_housekeeping "$opt_destination" "$dst_image_pool" "$dst_image_name" "$opt_keep_remote" "$vm_id"
perf_bytes_diff=$(( perf_bytes_diff + $(cat /tmp/"$PROGNAME"."$pid"."$dst_image_pool"-"$dst_image_name".size) ))
fi
perf_bytes_total=$(( perf_bytes_total + $(cat /tmp/"$PROGNAME"."$pid"."$dst_image_pool"-"$dst_image_name".size) ))
rm /tmp/"$PROGNAME"."$pid"."$dst_image_pool"-"$dst_image_name".size
log info "VM $vm_id - Disk Summary: Took $(( enddisk - startdisk )) Seconds to transfer $(human_readable "$perf_bytes_total" 2) in a $xmittype run"
if [ -n "$opt_influx_api_url" ]; then
log info "VM $vm_id - Logging to InfluxDB: $opt_influx_api_url"
influxlp="$opt_influx_job_metrics,vmname=$vmname,jobname=$opt_influx_jobname,destination=$opt_destination,srcimage=$src_image_name,dstimage=$dst_image_name,xmittype=$xmittype bytescalculated=$snapshotsize""i,bytesonwire=$perf_bytes_total""i,xmitrc=$xmitrc""i,freezerc=$freezerc""i,unfreezerc=$unfreezerc""i,basets=$basets""i"
log debug "InfluxLP: --->\n $influxlp"
cmd="curl --request POST \"$opt_influx_api_url/v2/write?org=$opt_influx_api_org&bucket=$opt_influx_bucket&precision=ns\" --header \"Authorization: Token $opt_influx_token\" --header \"Content-Type: text/plain; charset=utf-8\" --header \"Accept: application/json\" --data-binary '$influxlp'"
do_run "$cmd"
fi
unset basets
done
if [ $opt_keepdlock -eq 0 ]; then
ssh root@${dstpvnode[$dvmid]} qm unlock $dvmid
log info "VM $dvmid - Unlocking destination VM $dvmid"
fi
#--migrate so start on destination?
if [ $opt_migrate -eq 1 ]; then
log info "VM $dvmid - Starting VM on node ${dstpvnode[$dvmid]} in cluster [$dcluster]"
do_run "ssh root@""${dstpvnode[$dvmid]}"" qm start "$dvmid >/dev/null
enddowntime=$(date +%s)
log info "VM $dvmid - Downtime: $(( enddowntime - startdowntime )) Seconds"
fi
done
endjob=$(date +%s)
log info "Finnished mirror $(date "+%F %T")"
humantime=$(date -ud "@$((endjob-startjob))" +'%H hours %M minutes %S seconds')
log info "Job Summary: Bytes transferred $(human_readable $perf_bytes_total) for $diskcount Disks on $vmcount VMs in $humantime"
if [ "$perf_freeze_failed" -gt 0 ]; then disp_perf_freeze_failed="$(echored $perf_freeze_failed)"; else disp_perf_freeze_failed="$(echogreen $perf_freeze_failed)"; fi
if [ "$perf_ss_failed" -gt 0 ]; then disp_perf_ss_failed="$(echored $perf_ss_failed)"; else disp_perf_ss_failed="$(echogreen $perf_ss_failed)"; fi
if [ "$perf_full_failed" -gt 0 ]; then disp_perf_full_failed="$(echored $perf_full_failed)"; else disp_perf_full_failed="$(echogreen $perf_full_failed)"; fi
if [ "$perf_diff_failed" -gt 0 ]; then disp_perf_diff_failed="$(echored $perf_diff_failed)"; else disp_perf_diff_failed="$(echogreen $perf_diff_failed)"; fi
log info "VM Freeze OK/failed.......: $perf_freeze_ok/$disp_perf_freeze_failed"
log info "RBD Snapshot OK/failed....: $perf_ss_ok/$disp_perf_ss_failed"
log info "RBD export-full OK/failed.: $perf_full_ok/$disp_perf_full_failed"
log info "RBD export-diff OK/failed.: $perf_diff_ok/$disp_perf_diff_failed"
log info "Full xmitted..............: $(human_readable $perf_bytes_full)"
log info "Differential Bytes .......: $(human_readable $perf_bytes_diff)"
if [ -n "$opt_influx_api_url" ]; then
log info "VM $vm_id - Logging Job summary to InfluxDB: $opt_influx_api_url"
influxlp="$opt_influx_summary_metrics,jobname=$opt_influx_jobname perf_bytes_diff=$perf_bytes_diff""i,perf_bytes_full=$perf_bytes_full""i,perf_bytes_total=$perf_bytes_total""i,perf_diff_failed=$perf_diff_failed""i,perf_diff_ok=$perf_diff_ok""i,perf_freeze_failed=$perf_freeze_failed""i,perf_freeze_ok=$perf_freeze_ok""i,perf_full_failed=$perf_full_failed""i,perf_full_ok=$perf_full_ok""i,perf_ss_failed=$perf_ss_failed""i,perf_ss_ok=$perf_ss_ok""i,perf_vm_running=$perf_vm_running""i,perf_vm_stopped=$perf_vm_stopped""i"
log debug "InfluxLP: --->\n $influxlp"
cmd="curl --request POST \"$opt_influx_api_url/v2/write?org=$opt_influx_api_org&bucket=$opt_influx_bucket&precision=ns\" --header \"Authorization: Token $opt_influx_token\" --header \"Content-Type: text/plain; charset=utf-8\" --header \"Accept: application/json\" --data-binary '$influxlp'"
do_run "$cmd"
fi
(( perf_vm_ok++ ))
end_process 0
}
function do_housekeeping(){
local horst=$1
local rbdpool=$2
local rbdimage=$3
local keep=$4
local vm=$5
local snap
local -i keeptime
local -i ts
local -i snapepoch
local -i age
log info "VM $vm - Housekeeping: $horst $rbdpool/$rbdimage, keeping Snapshots for $keep"
cmd="ssh $horst rbd ls -l $rbdpool | grep $rbdimage@$opt_snapshot_prefix | cut -d ' ' -f 1|head -n -1"
snapshots=$($cmd)
if [ "${keep:(-1)}" == "d" ]; then
keep=${keep%?}
keeptime=$(( $keep * 86400 ))
elif [ "${keep:(-1)}" == "s" ]; then
keep=${keep%?}
keeptime=$keep
fi
for snap in $snapshots; do
[[ $snap =~ ^.*@$opt_snapshot_prefix([0-9]+)$ ]]
ts=${BASH_REMATCH[1]}
[[ $ts =~ $redateex ]]
snapepoch=$(date --date "${BASH_REMATCH[1]}/${BASH_REMATCH[2]}/${BASH_REMATCH[3]} ${BASH_REMATCH[4]}:${BASH_REMATCH[5]}:${BASH_REMATCH[6]}" +%s)
age=$(($(date -u +"%s")-$snapepoch ))
if [ $age -gt "$keeptime" ]; then
cmd="ssh $horst rbd snap rm $rbdpool/$snap"
do_run "$cmd" 2>/dev/null
log info "VM $vm_id - Removing Snapshot $horst $rbdpool/$snap ($age""s) [rc:$?]"
if [ $rc -eq 0 ]; then
(( perf_snaps_removed++ ))
fi
fi
done
}
function gaping() {
local vmid=$1
local rc
cmd="ssh root@${pvnode[$vmid]} qm guest cmd $vmid ping >/dev/null 2>&1"
eval "$cmd"
rc=$?
echo $rc
}
function create_snapshot(){
local snap="$1"
log info "VM $vm_id - Creating snapshot $snap"
do_run "rbd snap create $snap"
rc=$?
log debug "create_snapshot() return $rc"
return $rc
}
function vm_freeze() {
local fvm=$1;
local fhost=$2;
status=$(ssh root@"$fhost" qm status "$fvm"|cut -d' ' -f 2)
if ! [[ "$status" == "running" ]]; then
log info "VM $fvm - Not running, skipping fsfreeze-freeze"
(( perf_vm_stopped++ ))
return
else
(( perf_vm_running++ ))
fi
local cmd="ssh root@$fhost /usr/sbin/qm guest cmd $fvm fsfreeze-freeze"
log info "VM $fvm - Issuing fsfreeze-freeze to $fvm on $fhost"
do_run "$cmd"
rc=$?
log debug "vm_freeze() return $rc"
return $rc
}
function vm_unfreeze() {
local fvm=$1;
local fhost=$2;
status=$(ssh root@"$fhost" qm status "$fvm"|cut -d' ' -f 2)
if ! [[ "$status" == "running" ]]; then
log info "VM $fvm - Not running, skipping fsfreeze-thaw"
return
fi
local cmd="ssh root@$fhost /usr/sbin/qm guest cmd $fvm fsfreeze-thaw"
log info "VM $fvm - Issuing fsfreeze-thaw to $fvm on $fhost"
do_run "$cmd"
rc=$?
log debug "vm_unfreeze() return $rc"
return $rc
}
function rewriteconfig(){
local oldconfig=$1
local dst=$2
local newpool=$3
local newconfig=$4
local newvmid=$5
local sedcmd
if [ ! -z "$opt_rewrite" ]; then
sedcmd='sed -r -e '$opt_rewrite
else
sedcmd='sed -e /^$/,$d'
fi
cat "$oldconfig" | sed -r -e "s/^(efidisk|virtio|ide|scsi|sata|mp|tpmstate)([0-9]+):\s([a-zA-Z0-9]+):(.*)-([0-9]+)-disk-([0-9]+).*,(.*)$/\1\2: $newpool:\4-$newvmid-disk-\6-\3,\7/g" | $sedcmd | sed -e '/^$/,$d' | sed -e '/ide[0-9]:.*-cloudinit,media=cdrom.*/d' | grep -v "^parent:\s.*$" | ssh "$dst" "cat - >$newconfig"
}
function checkvmid(){
local dst=$1
local vmid=$2
cmd="ssh $dst ls -l $QEMU_CONF_CLUSTER/$vmid.conf|wc -l"
rval=$($cmd)
echo "$rval"
}
function do_run(){
local cmd=$*;
local -i rc=0;
if [ $opt_dry_run -eq 1 ]; then
echo "$cmd"
rc=$?
else
log debug "$cmd"
eval "$cmd"
rc=$?
[ $rc != 0 ] && log error "$cmd"
log debug "return $rc ps ${PIPESTATUS[@]}"
fi
return $rc
}
function end_process(){
local -i rc=$1;
local -i runtime
local -i bps
local -i ss_total
local subject
if ! [[ -z "$startjob" || -z "$endjob" ]]; then
runtime=$(expr $endjob - $startjob)
bps=$(expr $perf_bytes_total/$runtime)
fi
ss_total=$(expr $perf_ss_ok + $perf_ss_failed)
subject="Crossover [VM:$perf_vm_ok/$vmcount SS:$perf_ss_ok/$ss_total]"
[ $rc != 0 ] && subject="[ERROR] $subject" || subject="[OK] $subject"
local mail;
for mail in $(echo "$opt_addr_mail" | tr "," "\n"); do
do_run "cat '$LOG_FILE' | mail -s '$subject' '$mail'"
done
#remove log
rm "$LOG_FILE"
exit "$rc";
}
function get_image_spec(){
local image_spec;
local disk="$1"
#if krbd enable
image_spec=$(pvesm path "$disk" | grep '^/dev/rbd/' | sed -e "s/^\/dev\/rbd\///")
if [ -z "$image_spec" ]; then
image_spec=$(pvesm path "$disk" | grep '/ceph/' | awk '{ split($0,a,":"); print a[2]}')
fi
echo "$image_spec"
}
function get_ha_status() {
local havmid="$1"
ha_status=$(ha-manager status| grep vm:"$havmid" | cut -d " " -f 4| sed 's/.$//')
echo "$ha_status"
}
function check_pool_exist() {
local poolname="$1"
local -i exists=255
pool_status=$(ssh $opt_destination pvesm status|grep rbd|cut -d " " -f 1|grep $poolname)
if [ "$pool_status" == "$poolname" ]; then
exists=1
else
exists=0
fi
echo $exists
}
function get_ceph_version() {
scephversion=$(ceph -v | cut -d " " -f 3)
dcephversion=$(ssh $opt_destination ceph -v | cut -d " " -f 3)
}
function main(){
[ $# = 0 ] && usage;
#command
case "$1" in
version) echo "$VERSION";;
help) usage "$@";;
mirror) mirror "$@";;
*) usage;;
esac
exit 0;
}
main "$@"