#! /bin/bash 

MDADM=/sbin/mdadm
LOCTL=/usr/local/bin/loopctl
LOSETUP=/sbin/losetup
TIOTEST=/root/tiotest

# How often will the tests be run?
IT=2000
LOGFILE=${LOGFILE:-/tmp/mp-test.log}

PrintHelp()
{
	cat <<END

This is a simple shell script to verify the correct operation of the MD
multipath setup.

WARNING: Data on the real devices given will be lost.

Interactive mode:
	$0 real <md device> <dev count> <real device 1> ... <real device n>
	
	Will run the tests interactively; your help will be required to
	create and restore various failure scenarios.

	Please specify a md device currently not in use, the number of paths
	to the target and the "real devices" at which Linux detects the disk,
	for example:

		$0 real /dev/md0 2 /dev/sda /dev/sdb

Loop mode:
	$0 loop <md device> <dev count> <any real device>

	Will run the tests non-interactively; this can be used to verify
	that md itself is working correctly. The real device you specify
	should be a "backing store" block device; for example, a SCSI device
	or just a logical volume.

	Loop devices will be set up to simulate two access paths to the same
	device.
	
		$0 loop /dev/md0 3 /dev/system/multipath-test

	It is recommended to complete a run in loop mode first.
	
WARNING: Data on the real devices given will be destroyed!

END
}

declare -a DEVS

Exec() {
	local cmd="$@"
	local output
	local rc
	LogStatus "Status prior to executing $cmd:"
	LogNl "Executing $cmd:" no
	output=$($cmd 2>&1)
	rc=$?
	LogNl "$output" no
	LogNl "Exit code $rc" no
	LogNl "---" no
	if [ $rc -ne 0 ]; then
		LogStatus "Status directly after failed command $cmd:"
	fi
	return $rc
}

LogStatus() {
	LOGENTRY=$(echo "$LOGENTRY" ; 
		echo "$@" ; echo "---"; 
		$MDADM --detail $MD 2>&1 ; 
		echo "---" ; echo " ")
}

LogFlush() {
	echo "$LOGENTRY" >>$LOGFILE
	LogClear
}

LogClear() {
	LOGENTRY=""
}

Log() {
	if [ -z "$2" ]; then
		echo -n "$1"
	fi
	LOGENTRY=$(echo -n "$LOGENTRY" "$1")
}

LogNl() {
	if [ -z "$2" ]; then
		echo "$1"
	fi
	LOGENTRY=$(echo -n "$LOGENTRY" ; echo "$1" ; echo " ")
}

FindFreeLoopDevice() {
	local i=0
	local loop=""
	local device
	local found
	while [ $i -le 32 ]; do
		loop="/dev/loop$i"
		if [ ! -b $loop ] ; then
			LogNl "$loop is not a blockdevice"
			return 1
		fi
		found=0
		for device in ${DEVS[@]} ; do
			if [ "$device" == "$loop" ]; then
				found=1
				break
			fi
		done
		if [ $found -ne 1 ] && \
			! $LOSETUP /dev/loop$i >/dev/null 2>&1 ; then
			echo "/dev/loop$i"
			return 0
		fi
		i=$[i+1]
	done
	echo "NULL"
	return 1
}

Unbind() {
	local i=0

	Log "Freeing loop devices: "
	while [ $i -lt ${#DEVS[*]} ]; do
		$LOCTL clear ${DEVS[$i]} >/dev/null 2>&1
		if ! $LOSETUP -d ${DEVS[$i]} 2>/dev/null ; then
			Log " FAIL: ${DEVS[$i]}"
		else
			Log " OK: ${DEVS[$i]}"
		fi
		i=$[i+1]
	done
	LogNl " - DONE"
	return 0
}

VerifyDevices() {
	local i=0
	local MD5=""
	local DEV_MD5=""

	while [ $i -lt ${#DEVS[*]} ]; do
		if [ $i -eq 0 ]; then
			MD5=$(dd if=${DEVS[$i]} bs=1024 count=128 2>/dev/null| md5sum)
		else
			DEV_MD5=$(dd if=${DEVS[$i]} bs=1024 count=128 2>/dev/null| md5sum)
			if [ "$MD5" != "$DEV_MD5" ]; then
				Log "${DEVS[$i]} doesn't seem to point at the same device ${DEVS[0]} does"
				Cleanup 1
			fi
		fi
		i=$[i+1]
	done
	return 0
}

ClearDevices() {
	local force="$1"

	if [ "$force" != "yes" ]; then
		LogNl "Destroying ALL DATA on the devices NOW! _5s_ to abort!"
		sleep 3
	fi

	dd if=/dev/urandom of=${DEVS[0]} bs=1024 count=128 2>/dev/null
	mdadm --zero-superblock ${DEVS[0]}
}

BindDevices() {
	for (( i=0 ; $i < $DEVNO ; i++ )); do
		$LOCTL clear ${DEVS[$i]} >/dev/null 2>&1
		if ! $LOSETUP ${DEVS[$i]} $BACKING_DEV ; then
			LogNl "Failure to bind ${DEVS[$i]} to $BACKING_DEV"
			Cleanup 1
		fi
	done
}

SetupBase() {
	local i=0

	LogNl "SETUP PHASE: "

	if [ $# -le 3 ] ; then
		LogNl "Too few parameters."
		return 1
	fi
	
	MODE=$1 ; shift
	MD=$1 ; shift
	DEVNO=$1 ; shift

	case $MODE in
	real|loop|fake)
		;;
	*)
		return 1;
		;;
	esac

	LogNl "Operating in $MODE mode"

	if [ ! -x $MDADM ] ; then
		LogNl "mdadm utility not found; please install mdadm"
		return 1
	fi

	DETAILS=`mktemp $0.XXXXXXX`

	case $MODE in
	real|fake)
		if [ "$DEVNO" -ne "$#" ]; then
			LogNl "Incorrect of devices specified."
			return 1;
		fi

		for (( i=0 ; $i < $DEVNO ; i=$[i + 1] )); do
			DEVS[$i]="$1"; shift
		done
		
		;;
	loop)
		BACKING_DEV="$1" ; shift

		if [ ! -b "$BACKING_DEV" ]; then
			LogNl "Ivalid backing store specified"
			return 1
		fi

		if [ "$#" -ne "0" ]; then
			LogNl "Superfluous parameters for loop mode"
			return 1
		fi

		for (( i=0 ; $i < $DEVNO ; i++ )); do
			DEVS[$i]=$(FindFreeLoopDevice);
		done

		LogNl "Loop devices being used: ${DEVS[@]}"
		BindDevices
		;;
	esac

	VerifyDevices

	ClearDevices
	
	LogNl "Setup complete."
	LogNl
}

RestartMD() {
	local spares="$1"
	local actives
	local opts

	if [ -n "$FAILED_PATH" ]; then
		ClearError skip
	fi
	
	if [ -z "$spares" ]; then
		opts="-n $DEVNO"
	else
		if [ $spares -ge $DEVNO ] || [ $spares -eq 0 ] ; then
			LogNl "WARNING: spares $spares and device count $DEVNO fixed up!"
			spares=1
		fi
		actives=$[DEVNO - $spares]
		opts="-n $actives -x $spares"
	fi
	
	LogNl
	Log "Starting clean md device: "
	
	# First, remove all left overs
	Exec $MDADM $MD -S

	if [ $MODE == loop ]; then
		Unbind
		BindDevices
	fi
	
	# Get rid of any excess data
	ClearDevices yes

	# Start again!
	if ! yes | Exec $MDADM -C $MD -l mp $opts ${DEVS[@]} ; then
		LogNl "FAILED"
		Cleanup 1
	fi

	UUID=$($MDADM --detail $MD | awk '/UUID : / { print $3 }')
	if [ -z "$UUID" ]; then
		LogNl "FAILED (UUID could not be read)"
		Cleanup 1
	fi
	
	Log "(UUID is: $UUID) "
	LogNl "OK"
}

Cleanup() {
	local err=$1

	LogNl
	LogNl "CLEANUP PHASE:"
	LogNl "Stopping MD device: $MD"
	Exec $MDADM $MD -S
	
	if [ "$MODE" == "loop" ]; then
		Unbind
	fi
	
	if [ -n "$DETAILS" ]; then
		rm -f $DETAILS
	fi
	
	LogFlush

	exit $err
}

GetDetails() {
	
	$MDADM --detail $MD >$DETAILS 2>&1
	
	return 0
}

CheckIfActive() {
	local path=""
	local err=0

	Log "CHECK: All paths should be active: "
	for path in ${DEVS[@]} ; do
		if ! IsActive $path quiet ; then
			err=1
		fi
	done
	
	if [ $err -ne 0 ]; then
		LogNl "FAILED"
		return 1
	fi
	
	LogNl "OK"
}

IntroduceError() {
case $MODE in
real)
	echo "REQUEST: Please fail a path now and tell me which (ie, ${DEVS[0]}):"
	read FAILED_PATH
	if dd if=$FAILED_PATH of=/dev/null bs=1024 count=1024 2>/dev/null; then
	      LogNl "ERROR: $FAILED_PATH still operational."
	fi
	;;
loop|fake)
	FAILED_PATH=$(SelectOnePath)
	LogNl "(Failing $FAILED_PATH)"
	Exec $MDADM $MD --fail $FAILED_PATH
	;;
foo)
	FAILED_PATH=$(SelectOnePath)
	LogNl "(Failing $FAILED_PATH)"
	Exec $LOCTL fail $FAILED_PATH
	;;
esac

	LogNl ""
}

IsFailed() {
	local path="$1"

	GetDetails

	if ! GrepDetails "faulty.*$path\$" ; then
		LogStatus "FAILED ($path not marked faulty)"
		return 1
	fi
	
	LogNl "OK"
}

IsActive() {
	local path="$1"
	local quiet="$2"
	GetDetails

	if ! GrepDetails "active.*$path\$" ; then
		test -n "$quiet" || LogStatus "FAILED ($path not marked active)"
		return 1
	fi
	
	test -n "$quiet" || LogNl "OK"
}

IsSpare() {
	local path="$1"

	GetDetails

	if GrepDetails "active.*$path\$" ; then
		LogStatus "FAILED ($path not spare)"
		return 1
	fi
	
	LogNl "OK"
}

DoSomeIO() {
	Log "(doing some IO on $1) "
	blockdev --flushbufs $1
	$TIOTEST -R -d $1 -o 100 -r 10 -S >/dev/null 2>&1
}

DoLotsOfIO() {
	blockdev --flushbufs $MD
	$TIOTEST -R -d $1 -o 100 -r 40 -S -p 6 >/dev/null 2>&1 
}

DoLotsOfIOwithCheck() {
	blockdev --flushbufs $MD
	$TIOTEST -R -d $1 -o 100 -r 4000 -c -S -p 6
}

SelectOnePath() {
	local pathno=$[RANDOM % $DEVNO]
	local device=${DEVS[$pathno]}
	echo $device	
}

ClearError() {
	local skip_mdstuff="$1"
	
case $MODE in
foo)
	LogNl "Clearing $FAILED_PATH"
	Exec $LOCTL clear $FAILED_PATH
	;;
loop|fake)
	# If mdadm --fail is used, this is a noop
	;;
real)
	echo "Please restore $FAILED_PATH now and press enter when ready"
	read
	;;
esac
	
	if [ -n "$skip_mdstuff" ]; then
		FAILED_PATH=""
		return 0
	fi
	
	Log "CHECK: Setting device $FAILED_PATH clean: "
	if ! Exec $MDADM $MD --clean $FAILED_PATH ; then
		LogNl "FAIL"
		return 1
	else
		LogNl "OK"
	fi

	Log "CHECK: Setting device $FAILED_PATH active: "
	if ! Exec $MDADM $MD --active $FAILED_PATH ; then
		LogNl "FAIL"
		return 1
	else
		LogNl "OK"
	fi
	
	FAILED_PATH=""

}

PerformanceAllDevices() {
	local device
	LogNl "Performance of the single devices:"
	for device in ${DEVS[@]} ; do
		LogNl "$device: "
		DoLotsOfIO $device
	done
}

GrepDetails() {
	local pattern=$1

	GetDetails
	grep -qE "$pattern" $DETAILS
}

CheckAdditionalPath() {
	LogNl "Checking if adding/removing an additional path works. "

	if [ $MODE != loop ]; then
		LogNl "SKIPPED (only in loop mode)"
		return 0
	fi
	
	local device=$(FindFreeLoopDevice)

	if ! losetup $device $BACKING_DEV ; then
		LogNl "FAILED: Couldn't bind $device to $BACKING_DEV"
		return 1
	fi
	
	Log "CHECK: Adding path $device: "
	
	if ! Exec $MDADM $MD --add $device ; then
		LogNl "FAILED"
		return 1
	else
		LogNl "OK"
	fi
	
	Log "CHECK: Enabling path $device: "
	if ! Exec $MDADM $MD --active $device ; then
		LogNl "FAILED"
		return 1
	else
		echo "OK"
	fi
	
	Log "CHECK: Disabling path $device: "
	if ! Exec $MDADM $MD --inactive $device ; then
		LogNl "FAILED"
		return 1
	else
		LogNl "OK"
	fi
	
	echo -n "CHECK: Removing path $device: "
	
	if ! Exec $MDADM $MD --remove $device >/dev/null 2>&1 ; then
		LogNl "FAILED"
		return 1
	else
		LogNl "OK"
	fi

	Exec losetup -d $device

}

CheckIfHotModWorks() {
	local device=$(SelectOnePath)
	
	LogNl "Removing/adding an existing path."
	Log "CHECK: Removing path $device fails if active: "
	
	if Exec $MDADM $MD --remove $device ; then
		LogNl "FAILED"
		return 1
	else
		LogNl "OK"
	fi
	
	Log "CHECK: Setting path $device inactive: "
	if ! Exec $MDADM $MD --inactive $device ; then
		LogNl "FAILED"
		return 1
	else
		LogNl "OK"
	fi
	
	Log "CHECK: Removing path $device: "
	
	if ! Exec $MDADM $MD --remove $device ; then
		LogNl "FAILED"
		return 1
	else
		LogNl "OK"
	fi
	
	Log "CHECK: Adding path $device again: "
	if ! Exec $MDADM $MD --add $device ; then
		LogNl "FAILED"
		return 1
	else
		LogNl "OK"
	fi
	
	Log "CHECK: Enabling path $device again: "
	if ! Exec $MDADM $MD --active $device ; then
		LogNl "FAILED"
		return 1
	else
		LogNl "OK"
	fi
	
}

CheckIfTogglingWorks() {
	local device=$(SelectOnePath)

	Log "CHECK: Toggling active/inactive status of paths: "

	Exec $MDADM $MD --inactive $device
	if GrepDetails "active.*$device\$" ; then
		LogNl "FAILED: $device could not be set inactive"
		return 1
	fi

	Exec $MDADM $MD --active $device
	GetDetails
	if ! GrepDetails "active.*$device\$" ; then
		LogNl "FAILED: $device could not be set active again"
		return 1
	fi

	LogNl "OK"
}

CheckReassembly() {
	LogNl "Checking if md can restart the array: "
	
	Log "CHECK: Stopping $MD: "
	if ! Exec $MDADM $MD -S ; then
		LogNl "FAILED: $MD could not be stopped!"
		return 1
	else
		LogNl "OK"
	fi

	Log "CHECK: Reassembling the array: "	
	if ! Exec $MDADM -A $MD -u $UUID ${DEVS[@]} ; then
		LogNl "FAILED (trying to recover)"
		return 1
	else
		LogNl "OK"
	fi
	
}

CheckMovedDevices() {
	if [ $MODE != "loop" ]; then
		return 0
	fi

	Log "Checking if md can cope with moved devices: "
	if ! Exec $MDADM $MD -S ; then
		LogNl "$MD could not be stopped!"
		return 1
	fi

	local i
	local device
	local -a NEW_DEVS
	
	for (( i=0 ; $i < $DEVNO ; i++ )); do
		NEW_DEVS[$i]=$(FindFreeLoopDevice);
		if ! $LOSETUP ${NEW_DEVS[$i]} $BACKING_DEV ; then
			LogNl "Failure to bind ${NEW_DEVS[$i]} to $BACKING_DEV"
			Cleanup 1
		fi
	done
	
	Unbind >/dev/null 2>&1 

	if ! Exec $MDADM -A $MD -u $UUID ${NEW_DEVS[@]} ; then
		for device in ${NEW_DEVS[@]} ; do
			Exec losetup -d $device
		done
		LogNl "FAILED"
		return 1
	else
		LogNl "OK (proceeding at new devices)"
		DEVS=(${NEW_DEVS[@]})	
	fi
}

CheckFailSpare() {
	LogNl "Checking if failing a spare device works: "

	local spare=$(SelectOnePath)

	Log "CHECK: Marking path $spare spare: "

	Exec $MDADM $MD --inactive $spare
	if ! IsSpare $spare ; then
		return 1
	fi
	
	Log "CHECK: Failing spare path $spare: "
	Exec $MDADM $MD --fail $spare
	if ! IsFailed $spare ; then
		return 1
	fi

	LogNl ""

	Log "CHECK: Reenabling spare: "
	Exec $MDADM $MD --clean $spare --active $spare

	if ! IsActive $spare ; then
		return 1
	fi

	return 0
}

CheckReassemblyAfterFailure() {
	LogNl "Checking if md can restart the array after a failure: "
	
	IntroduceError
	local failed="$FAILED_PATH"

	Log "CHECK: Checking whether $FAILED_PATH has been detected: "
	DoSomeIO $MD
	
	if ! IsFailed $FAILED_PATH ; then
		LogNl "FAILED: Failure was not detected."
		return 1
	fi
	
	Log "CHECK: Stopping failed $MD: "
	if ! Exec $MDADM $MD -S ; then
		LogNl "FAILED $MD could not be stopped!"
		return 1
	else
		LogNl "OK"
	fi

	local tmp_path="$FAILED_PATH"
	# TODO: Can either try to restart with a failed path still present
	#       or not, needs to be conditional!
	# If the path was clear at the time of the reload, it will be present
	# but inactive.
	# Otherwise it will be missing.
	if ! ClearError skip ; then
		LogNl "FAILED: Could not clear error!"
		return 1
	fi
	
	Log "CHECK: Reassembling the failed array: "	
	if ! Exec $MDADM -A $MD -u $UUID ${DEVS[@]} ; then
		LogNl "FAILED (trying to recover)"
		return 1
	else
		LogNl "OK"
	fi

	# This is necessary because it turns out that after the reenabling, it
	# is possible that a path which was previously active is now inactive
	# and vice versa, 'cause md keeps the _number_ of active paths
	# consistent, not necessarily the paths...
	for tmp_path in ${DEVS[@]} ; do
		if ! IsActive $tmp_path yes ; then
			Log "CHECK: Reactivating path $tmp_path: "
			if ! Exec $MDADM $MD --active $tmp_path ; then
				LogNl "FAILED"
				return 1
			else
				LogNl "OK"
			fi
		fi
	done

	return 0
}

IsPersonalityLoaded() {
	grep -qE '^Personalities.*multipath' /proc/mdstat
}

CheckModule() {
	LogNl "CHECKS: Loading personality module"
	if IsPersonalityLoaded ; then
		LogNl "SKIPPED - personality already loaded at startup"
		return 0
	fi
	
	Log "CHECK: Loading multipath module 'manually': "
	Exec modprobe multipath
	if ! IsPersonalityLoaded ; then
		LogNl "FAILED: unable to load module multipath, aborting"
		Cleanup 1
	else
		LogNl "OK"
	fi
	
	Log "CHECK: Unloading module: "
	if ! Exec rmmod multipath ; then
		LogNl "FAILED"
		Cleanup 1
	else
		LogNl "OK"
	fi
	
	Log "CHECK: Loading multipath module by alias md-personality-7: "
	Exec modprobe md-personality-7
	if ! IsPersonalityLoaded ; then
		LogNl "FAILED: unable to load module multipath, aborting"
		Cleanup 1
	else
		LogNl "OK"
	fi
	
	Log "CHECK: Unloading module: "
	if ! Exec rmmod multipath ; then
		LogNl "FAILED"
		Cleanup 1
	else
		LogNl "OK"
	fi
	
}

CheckError() {
	IntroduceError
	Log "CHECK: Checking whether $FAILED_PATH has been detected: "
	DoSomeIO $MD
	IsFailed $FAILED_PATH
	ClearError
}

#######################################################################
# Main script


trap "Cleanup 0" SIGINT

rm $LOGFILE

if ! SetupBase $@ ; then
	PrintHelp
	exit 1
fi

CheckModule

RestartMD

TESTS=(CheckIfTogglingWorks CheckAdditionalPath CheckIfHotModWorks 
	CheckReassembly CheckMovedDevices CheckError
	CheckReassemblyAfterFailure CheckFailSpare)
TESTCOUNT=7

declar -a TESTS_RUN

testsdone=0

LogNl "================================================================="
LogNl "INFO: $IT passes will be run; first all test once, then randomly."

SEED=$[$$ ^ $PPID ^ $SECONDS]
RANDOM=$SEED
LogNl "Random seed is $SEED"
LogNl "================================================================="
LogNl
while [ $testsdone -lt $IT ]; do
	if [ $testsdone -le $TESTCOUNT ] ; then
		# Run through the tests once in order
		testno=$testsdone
	else
		testno=$[RANDOM % ($TESTCOUNT+1)]
	fi

	testsdone=$[testsdone+1]

	if [ -n "${TESTS[$testno]}" ]; then
		TESTS_RUN[$testno]=$[TESTS_RUN[$testno] + 1]
		LogNl "RUNNING TEST $testsdone - ${TESTS[$testno]} / ${TESTS_RUN[$testno]}"
		${TESTS[$testno]}
	else
		LogNl "Generated invalid testnumber $testno"
		LogFlush
		exit 1
	fi
	
	LogFlush
	
	# All tests should leave the array in a full enabled mode
	CheckIfActive
	# This is such an imperative that we don't need to always log
	# it
	if [ $? -ne 0 ]; then
		LogStatus
		LogFlush
		exit 1
	else
		LogClear
	fi
		
	LogNl
done

LogNl "================================================================="
LogNl "SUMMARY:"
LogNl "Number of total test runs: $testsdone"
testno=0
while [ $testno -le $TESTCOUNT ]; do
	LogNl "${TESTS[$testno]} ran for ${TESTS_RUN[$testno]} times"
	testno=$[testno + 1]
done
LogNl "================================================================="
LogNl "ALL TESTS SUCCESSFUL"
LogNl "================================================================="
LogNl

if [ $MODE == "real" ]; then
#	PerformanceAllDevices
#	LogNl "Consolidated (this should be higher ;):"
#	DoLotsOfIO $MD
	echo
else
	LogNl "Skipping performance tests"
fi
echo

Cleanup 0

