#!/bin/bash
#
# Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

required_nodes=2
testdir=/usr/lib/heartbeat/crmtest
. ${testdir}/helper.sh || exit 1

CRM_ERR_SHUTDOWN=0
test_nodes=2


function 2node_fail_test() {

    test_type=$1
    fail_pieces=$2
    fail_node=$3
    good_node=$4
    do_failback=$5
    accelerated=$6

    if [ $fail_node = $test_node_1 ]; then
	moved_rsc=rsc1
    else
	moved_rsc=rsc2
    fi
    
#----

    echo -ne "\033]0;$test_type: Iteration $iteration of $repeats\007"
    crm-cleanup
    do_cmd echo "#############################"
    do_cmd echo "$test_type: Iteration $iteration of $repeats"
# make *sure* theres nothing left over from last time
    
#----

    do_cmd echo "wait for HA to start on ${test_node_1}"
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd remote_cmd $INIT_USER $test_node_1 $HALIB_DIR/heartbeat -M "2>&1 >/dev/null" &
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 5000 \
	-s "${test_node_1} ccm(.*) info: Hostname: ${test_node_1}" \
	-s "${test_node_1} heartbeat(.*) info: Starting(.*)lrmd" \
	-e "${test_node_1} heartbeat(.*)Client(.*) respawning too fast"
    cts_assert "Startup of Heartbeat on ${test_node_1} failed."

#----

    do_cmd echo "wait for CRMd to start on ${test_node_1}"
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd remote_cmd $CRMD_USER $test_node_1 $HALIB_DIR/crmd "$CRM_OPTS" "2>&1 >/dev/null" &
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search  -a -m 5000 \
	-s "${test_node_1} crmd(.*) info:(.*)FSA Hostname: ${test_node_1}" \
	-s "crmd(.*) State transition (.*) \-> S_IDLE"
    cts_assert "CRMd startup on ${test_node_1} failed."

    do_cmd wait_for_state S_IDLE 3 $test_node_1 
    cts_assert "S_IDLE not reached on $test_node_1 (startup)!"

#----

    do_cmd echo Create the first constraint and wait for S_IDLE
    rsc=rsc1
    uuid1=`uuidgen`
    uuid2=`uuidgen`
    uuid3=`uuidgen`
    node_xml="'<rsc_location id=\"${uuid1}\" rsc=\"${rsc}\">
        <rule id=\"${uuid2}\" result=\"can\"/>
	<rule id=\"${uuid3}\" score=\"1000\" boolean_op=\"or\">
	  <expression attribute=\"uname\" operation=\"eq\" value=\"${test_node_1}\"/>
	</rule>
      </rsc_location>'"
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd make_constraint_adv $test_node_1 $node_xml
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 1500 \
	-s "crmd(.*) State transition (.*) \-> S_IDLE"
    cts_assert Adding constraint1 did not pass

#----

    do_cmd echo Create the second constraint and wait for S_IDLE
    rsc=rsc2
    uuid1=`uuidgen`
    uuid2=`uuidgen`
    uuid3=`uuidgen`
    node_xml="'<rsc_location id=\"${uuid1}\" rsc=\"${rsc}\">
	        <rule id=\"${uuid2}\" result=\"can\"/>
		<rule id=\"${uuid3}\" score=\"1000\" boolean_op=\"or\">
		   <expression attribute=\"uname\" operation=\"eq\" value=\"${test_node_2}\"/>
		</rule>
	   </rsc_location>'"
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd make_constraint_adv $test_node_1 $node_xml
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 2500 \
	-s "crmd(.*) State transition (.*) \-> S_IDLE"
    cts_assert Adding constraint2 did not pass

#----

    do_cmd echo Create the first resource and wait for S_IDLE after start
    args="<nvpair name=\"1\" value=\"${ip_rsc_1}\"/>"
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd make_resource $test_node_1 rsc1 heartbeat IPaddr - - stonith $args
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 2500 \
	-s "crmd(.*) State transition (.*) \-> S_IDLE" \
	-s "crmd(.*) Performing op start(.*) on rsc1"
    cts_assert Adding rsc1 did not pass

#----

    do_cmd echo Create the second resource and wait for S_IDLE after start
    args="<nvpair name=\"1\" value=\"${ip_rsc_2}\"/>"
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd make_resource $test_node_1 rsc2 heartbeat IPaddr - - stonith $args
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 2500 \
	-s "crmd(.*) State transition (.*) \-> S_IDLE" \
	-s "crmd(.*) Performing op start(.*) on rsc2"
    cts_assert Adding rsc2 did not pass

#----

    do_cmd echo Create the fencing resource and wait for S_IDLE after start
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd make_incarnation $test_node_1 fence1 stonith null 2 - - ignore
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 2500 \
	-s "crmd(.*) State transition (.*) \-> S_IDLE" \
	-s "crmd(.*) Performing op start(.*) on fence1"
    cts_assert Adding fence1 did not pass

#----

    do_cmd echo Various sanity checks - stage 1
    do_cmd wait_for_state S_IDLE 3 $test_node_1 
    cts_assert "S_IDLE not reached on $test_node_1 (CIB create)!"

    do_cmd is_running rsc1 $test_node_1
    cts_assert "rsc1 NOT running"

    do_cmd is_running rsc2 $test_node_1
    cts_assert "rsc2 NOT running"

    do_cmd is_dc $test_node_1
    cts_assert "$test_node_1 is supposed to be the DC"

    do_cmd is_running rsc1 $test_node_1 x$test_node_1
    cts_assert_false "rsc1 IS running on x$test_node_1"

    do_cmd is_running rsc1 $test_node_1 $test_node_1
    cts_assert "rsc1 NOT running on $test_node_1"

    do_cmd is_running rsc2 $test_node_1 $test_node_1
    cts_assert "rsc2 NOT running on $test_node_1"

#----

    do_cmd echo "wait for HA to start on $test_node_2"
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd remote_cmd $INIT_USER $test_node_2 $HALIB_DIR/heartbeat -M "2>&1 >/dev/null" &
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 5000 \
	-s "${test_node_2} ccm(.*) Hostname: ${test_node_2}" \
	-s "${test_node_2} heartbeat(.*) info: Starting (.*)lrmd" \
	-e "${test_node_2} heartbeat(.*) Client (.*) respawning too fast"
    cts_assert "Startup of Heartbeat on ${test_node_2} failed."

#----

    do_cmd echo "wait for CRMd to start on $test_node_2"
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd remote_cmd $CRMD_USER $test_node_2 $HALIB_DIR/crmd "$CRM_OPTS" "2>&1 >/dev/null" &
    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 5000 \
	-s "${test_node_2} crmd(.*)FSA Hostname: ${test_node_2}" \
	-s "${test_node_2} crmd(.*) State transition S_PENDING \-> S_NOT_DC" 
    cts_assert "CRMd startup on ${test_node_2} failed."

    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 8000 \
	-s "${test_node_1} crmd(.*) State transition(.*) \-> S_IDLE" \
	-s "${test_node_2} crmd(.*) Performing op start(.*) on rsc2"
    cts_assert "rsc2 was not transferred to ${test_node_2} on startup."

#----

    do_cmd echo Various sanity checks - stage 2
    do_cmd wait_for_state S_NOT_DC 30 $test_node_2 
    cts_assert "S_NOT_DC not reached on $test_node_2 (startup - 2)!"

    do_cmd wait_for_state S_IDLE 30 $test_node_1 
    cts_assert "S_IDLE not reached on $test_node_1 (startup - 2)!"

    do_cmd is_running rsc1 $test_node_1
    cts_assert "rsc1 NOT running"

    do_cmd is_running rsc2 $test_node_1
    cts_assert "rsc2 NOT running"

    do_cmd is_running rsc1 $test_node_1 $test_node_1
    cts_assert "rsc1 NOT running on $test_node_1"

    do_cmd is_running rsc2 $test_node_1 $test_node_2
    cts_assert "rsc2 NOT running on $test_node_2"

#----

    is_dc $fail_node 2>&1 > /dev/null
    test_for_election=$?

    do_cmd echo Killing $fail_pieces on $fail_node
    crm_log_pos=$(stat -L -c %s $logfile)
    do_cmd remote_cmd $ADMIN_USER $fail_node "killall -9 $fail_pieces" &

    if [ $test_for_election = 0 ]; then
	do_cmd echo Killed the DC... checking for DC Failover
	do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 90000 \
	    -s "${good_node} crmd(.*) State transition" \
	    -s "${good_node} crmd(.*) State transition (.*) \-> S_ELECTION" \
	    -s "${good_node} crmd(.*) State transition (.*) \-> S_IDLE" 
	cts_assert "Transition of the DC from ${fail_node} to ${good_node} failed."
    else
	do_cmd echo Killed slave node... checking the DC noticed
	do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 90000 \
	    -s "${good_node} crmd(.*) State transition" \
	    -s "${good_node} crmd(.*) State transition (.*) \-> S_POLICY_ENGINE" \
	    -s "${good_node} crmd(.*) State transition (.*) \-> S_IDLE"
	cts_assert "Failure of slave node ${fail_node} was not noticed on the DC (${good_node})."    
    fi

    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 80000 \
	    -s "${god_node} crmd(.*) Performing op start(.*) on ${moved_rsc}"
    cts_assert "Move of ${moved_rsc} to ${good_node} failed."

#----

    do_cmd echo Various sanity checks - stage 3
    do_cmd wait_for_state S_IDLE 60 $good_node
    cts_assert "S_IDLE not reached on $good_node after kill!"

    do_cmd is_running rsc1 $good_node
    cts_assert "rsc1 NOT running on $good_node"

    do_cmd is_running rsc2 $good_node
    cts_assert "rsc2 NOT running on $good_node"

    do_cmd is_running rsc1 $good_node $fail_node
    cts_assert_false "rsc1 IS running on $fail_node"

    do_cmd is_running rsc2 $good_node $good_node
    cts_assert "rsc2 NOT running on $good_node"

#----

    if [ $do_failback = 1 ]; then
	do_cmd echo "Re-Starting on failed node $fail_node"

	if [  "$fail_pieces" = "crmd" ]; then
	    do_cmd echo "HA still running, skipping restart"
	else
	    do_cmd echo "wait for HA to start on $fail_node"
	    crm_log_pos=$(stat -L -c %s $logfile)
	    do_cmd remote_cmd $INIT_USER $fail_node $HALIB_DIR/heartbeat -M "2>&1 >/dev/null" &
	    do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 5000 \
		-s "${fail_node} ccm(.*) Hostname: ${fail_node}" \
		-s "${fail_node} heartbeat(.*) info: Starting (.*)lrmd" \
		-e "${fail_node} heartbeat(.*) Client (.*) respawning too fast"
	    cts_assert "Startup of Heartbeat on ${fail_node} failed."
	fi

#----

	do_cmd echo "wait for CRMd to start on $fail_node"
	crm_log_pos=$(stat -L -c %s $logfile)
	do_cmd remote_cmd $CRMD_USER $fail_node $HALIB_DIR/crmd "$CRM_OPTS" "2>&1 >/dev/null" &
	do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 20000 \
	    -s "${fail_node} crmd(.*)FSA Hostname: ${fail_node}" \
	    -s "${fail_node} crmd(.*) State transition S_PENDING \-> S_NOT_DC" 
	cts_assert "CRMd startup on ${fail_node} failed."

	do_cmd ${testdir}/testutils.pl -p $crm_log_pos -l ${logfile} --search -a -m 20000 \
	    -s "${good_node} crmd(.*) State transition(.*) \-> S_IDLE" \
	    -s "${fail_node} crmd(.*) Performing op start(.*) on ${moved_rsc}"
	cts_assert "$moved_rsc was not transferred to ${fail_node} on startup."

	do_cmd echo Various sanity checks - stage 4
	do_cmd wait_for_state S_NOT_DC 30 $fail_node 
	cts_assert "S_NOT_DC not reached on $fail_node (restart)!"

	do_cmd wait_for_state S_IDLE 30 $good_node
	cts_assert "S_IDLE not reached on $good_node (restart)!"

	do_cmd is_running rsc1 $test_node_1
	cts_assert "rsc1 NOT running"

	do_cmd is_running rsc2 $test_node_1
	cts_assert "rsc2 NOT running"

	do_cmd is_running rsc1 $test_node_1 $test_node_1
	cts_assert "rsc1 NOT running on $test_node_1"

	do_cmd is_running rsc2 $test_node_1 $test_node_2
	cts_assert "rsc2 NOT running on $test_node_2"

    fi

#----
    
    do_cmd echo "test ${test_type}: PASSED"

}


while [ $iteration -lt $repeats ]; do
    iteration=`expr $iteration + 1`
    echo -ne "\033]0;$test_type : Iteration $iteration of $repeats\007"
    echo "########### $test_type : Begining iteration $iteration of $repeats ###########"
(

    2node_fail_test 2node__fail_DC_All        "heartbeat ccm lrmd crmd" ${test_node_1} ${test_node_2} 0 0
    2node_fail_test 2node__fail_slave_All     "heartbeat ccm lrmd crmd" ${test_node_2} ${test_node_1} 0 0
    2node_fail_test 2node__failback_DC_All    "heartbeat ccm lrmd crmd" ${test_node_1} ${test_node_2} 1 0
    2node_fail_test 2node__failback_slave_All "heartbeat ccm lrmd crmd" ${test_node_2} ${test_node_1} 1 0

    2node_fail_test 2node__fail_DC_CRMd        "crmd" ${test_node_1} ${test_node_2} 0 0
    2node_fail_test 2node__fail_slave_CRMd     "crmd" ${test_node_2} ${test_node_1} 0 0
    2node_fail_test 2node__failback_DC_CRMd    "crmd" ${test_node_1} ${test_node_2} 1 0
    2node_fail_test 2node__failback_slave_CRMd "crmd" ${test_node_2} ${test_node_1} 1 0

#2node_fail_test 2node__fail_slave_LRM "lrmd" ${test_node_2} ${test_node_1}
#2node_fail_test 2node__fail_DC_LRM "lrmd"  ${test_node_1} ${test_node_2}

    echo "test suite: PASSED"
)
done


