#!/bin/ksh -p
#
#pragma ident     "@(#)quorumreconfig.sh 1.23 99/10/18"
#
#Copyright (C) 1997-1999 Sun Microsystems, Inc.
#
#

# quorumreconfig - Energizer Cluster Reconfiguration Programs for
#		   Quorum Devices.
#
# Input:
#               All environment variables like "currnodes"/
#		"allnodes"/"locanodeid", CURRSTEP
#	        The environment variable RESERVEQUORUM tells us whether
#               to reserve or not.
#
# Action:       Run Reconfiguration Programs based on current
#               "cmmstepN"|"cmmabort"|"cmmstart"|"cmmreturn".
#
# Output:       Return 0 if success.
#               Return 1 if failure
#               Return 200 if reconfigurtion program result is to
#               be ignored.

# defined for i18n
TEXTDOMAIN=quorumreconfig; export TEXTDOMAIN
TEXTDOMAINDIR=/opt/SUNWcluster/locale; export TEXTDOMAINDIR

# should be used only the reconf_framework.
pre="SUNWcluster.reconf.quorumdev"
pren="SUNWcluster.reconf.nodelock"
set -e

RECONF_SCRIPTS=${RECONF_SCRIPTS:-/opt/SUNWcluster/etc/reconf/scripts}
CLUSTERBIN=${CLUSTERBIN:-/opt/SUNWcluster/bin/}
CLUSTERVAR=${CLUSTERVAR:-/var/opt/SUNWcluster/}
CLUSTERETC=${CLUSTERETC:-/etc/opt/SUNWcluster/}
PATH=/usr/sbin:/usr/bin/:${CLUSTERBIN}:/usr/ccs/bin/:/bin/:/sbin; export PATH
INCLUDE=.

# include common utilities.
${INCLUDE} ${RECONF_SCRIPTS}/reconf_ener.common

##########################################################################
#
# function node_lock_start
#
# Uses the pmfadm facility to start the node_lock process.
#
function node_lock_start
{
     
     #
     # Clean up previous lock file
     #
     /bin/rm -f ${CLUSTERVAR}/nlck.lock

     #
     # Start nodelock process
     #
     ${CLUSTERBIN}/pmfadm -c nodelock -n 0\
		${CLUSTERBIN}/nlck ${clustname} GET ${nltimeout} || error=$?

     #
     # If it did'nt succeed first time, try again.
     #
     if ! ${CLUSTERBIN}/pmfadm -l nodelock > /dev/null 2>&1; then
       ${CLUSTERBIN}/pmfadm -c nodelock -n 0\
		${CLUSTERBIN}/nlck ${clustname} GET ${nltimeout} || error=$?
     fi

     #
     # Sleep for a while to see if it nodelock process gets the lock.
     #
     let i=0
     while [ ! -e ${CLUSTERVAR}/nlck.lock ]; do
          sleep 1
          if (( $i == ${nltimeout} )); then
		break
          fi
          let i=i+1
     done

     #
     # If lock file not created, we failed to get lock.
     #
     if [ ! -e ${CLUSTERVAR}/nlck.lock ]; then
		return 1
     fi

     #
     # Now set retry mode to infinite
     #
     ${CLUSTERBIN}/pmfadm -m nodelock -n -1 -t 10
     return 0
}

###########################################################################
#
# function node_lock_stop
#
# Uses the pmfadm facility to stop the process holding the node_lock
#

function node_lock_stop
{
     #
     # If nodelock process is up, then issue a SIGTERM signal.
     #
     if ${CLUSTERBIN}/pmfadm -l nodelock > /dev/null 2>&1; then
       ${CLUSTERBIN}/pmfadm -s nodelock TERM
     fi

     #
     # Sleep a bit for it to clean up
     #
     sleep 1

     #
     # If it did'nt succeed issue a Warning.
     #
     if ${CLUSTERBIN}/pmfadm -l nodelock > /dev/null 2>&1; then
	log_info "${pren}.1000" "Warning: Unable to Kill nodelock process"
     fi

     /bin/rm -f ${CLUSTERVAR}/nlck.lock
     return 0
}

##########################################################################
#
# function node_lock_fstart
#
# Uses the pmfadm facility to start the node_lock process with the forced
# option.
#
function node_lock_fstart
{
     #
     # Clean up previous lock file
     #
     /bin/rm -f ${CLUSTERVAR}/nlck.lock

     #
     # Start nodelock process, with forced option
     #
     ${CLUSTERBIN}/pmfadm -c nodelock -n -1 -t 10\
		${CLUSTERBIN}/nlck ${clustname} GETF ${nltimeout} || error=$?

     #
     # If it did'nt succeed first time, try again.
     #
     if ! ${CLUSTERBIN}/pmfadm -l nodelock > /dev/null 2>&1; then
       ${CLUSTERBIN}/pmfadm -c nodelock -n -1 -t 10\
		${CLUSTERBIN}/nlck ${clustname} GETF ${nltimeout} || error=$?
     fi

     #
     # Sleep for a while to see if it nodelock process gets the lock.
     #
     let i=0
     while [ ! -e ${CLUSTERVAR}/nlck.lock ]; do
          sleep 1
          if (( $i == ${nltimeout} )); then
		break
          fi
          let i=i+1
     done

     #
     # If lock file not created, we failed to get lock.
     #
     if [ ! -e ${CLUSTERVAR}/nlck.lock ]; then
		return 1
     fi

     return 0
}


# XXX - should we specify the reservation timeouts in the cdb file?
# reserve all quorum devices that this node shares with those other nodes 
# that are not  part of the current membership.

###############################################################
#
# reserve_quorum_dev
#
#	Reserves the quorum device between a pair of nodes or
#	in the case of direct attached clusters between all the
#	nodes.
#
reserve_quorum_dev() {
  log_trace reserve_quorum_dev

  if [ ${directatt} = 1 ]; then
     #
     # In a 4-node cluster with direct attached device
     # there is a single quorum device for the entire
     # cluster, we will race for it here.
     #  
     quorum_dev=`eval enmatch quorumdev.node.0.1`
     if [ -n "$quorum_dev" ]; then
	  log_info "$pre.1000" "${clustname} reserving $quorum_dev as quorum device"
	  RC=0
	  # the disk driver will timeout in 30 seconds if it cannot
	  # achieve stable reservation.
	  ${SSACLI} q_reserve ${quorum_dev} 1>/dev/null 2>&1 || RC=$?
	  if [ ${RC} -ne 0 ]; then
	     # we retry reserve again since the previous error
	     # could be due to check condition not related to
	     # reservation conflict (especially in dual host
	     # sonoma where we have bus reset which results in reservation
	     # failure due to unit attention).
	     #
	     ${SSACLI} q_reserve ${quorum_dev} 1>/dev/null 2>&1 || ( \
	     log_info "$pre.4001"  \
		"${clustname} failed to reserve $quorum_dev as quorum device"
	      lmsg= \
 "`gettext  'Warning: could not reserve the quorum device (%s)'`"
	      printf "${lmsg}" "${quorum_dev}" >&3
	      exit 1)
	  fi
	  touch ${RESERVEDCTLS}.${localnodeid}
     fi
     log_trace_end reserve_quorum_dev
     return
  fi

  for i in ${allnodes}; do
    if [ "${i}" -ne "${localnodeid}" ]; then
      # Node i is not in the current membership. Therefore, reserve quorum 
      # device, if any, shared betwen node i and this node

      i_not_in_currnodes=0
      echo ${currnodes} | /bin/grep ${i} > /dev/null 2>&1 || i_not_in_currnodes=$?
      if [ "${i_not_in_currnodes}" -eq 1 ]; then
	retval=0
	expr ${i} \< ${localnodeid} > /dev/null 2>&1 || retval=$?
	if [ "${retval}" -eq 0 ]; then
	  quorum_dev=`eval enmatch quorumdev.node.${i}.${localnodeid}`
	else
	  quorum_dev=`eval enmatch quorumdev.node.${localnodeid}.${i}`
	fi
	if [ -n "$quorum_dev" ]; then
	  log_info "$pre.1000" "${clustname} reserving $quorum_dev as quorum device"
	  RC=0
	  ${SSACLI} q_reserve ${quorum_dev} 1>/dev/null 2>&1 || RC=$?
	  if [ ${RC} -ne 0 ]; then
	     # we retry reserve again since the previous error
	     # could be due to check condition not related to
	     # reservation conflict.(escpecially in dual host
	     # sonoma where we have bus reset which results in reservation
	     # failure dur to unit attention.
	     #
	     ${SSACLI} q_reserve ${quorum_dev} 1>/dev/null 2>&1 || ( \
	     log_info "$pre.4001"  \
		"${clustname} failed to reserve $quorum_dev as quorum device"
	      lmsg= \
"`gettext 'Warning: could not reserve the quorum device (%s)'`"
	      printf "${lmsg}\n" "${quorum_dev}"
	      exit 1)
	  fi
	  if [ "${retval}" -eq 0 ]; then
	    touch ${RESERVEDCTLS}.${i}.${localnodeid}
	  else
	    touch ${RESERVEDCTLS}.${localnodeid}.${i}
	  fi
	fi
      fi
    fi
  done
  log_trace_end reserve_quorum_dev
}

###############################################################
# release all shared devices. note that the quorum device may #
# be released again - the release is an idempotent operation, #
# so it does not really matter.                               #
###############################################################
###############################################################
# release a quorum device
#
#	release the quorum device if currently held.
#
release_quorum_dev() {

  log_trace release_quorum_dev

  if [ "$1" = "abort" ]; then
    nodelist=${allnodes}
  elif [ "$1" = "step1" ]; then
    nodelist=${currnodes}
  fi
 
   #
   # In a 4-node direct attached cluster
   # there is a single quorum device release it.
   #
   for node in ${nodelist}; do
      if [ ${node} != ${localnodeid} ]; then
	if [ ${directatt} = 1 ]; then
		quorum_dev=`eval enmatch quorumdev.node.0.1`
		if [ -n "$quorum_dev" -a -f ${RESERVEDCTLS}.${localnodeid} ]; then
			${SSACLI} release ${quorum_dev} 1>/dev/null 2>&1 || dummy=$?
			/bin/rm -f ${RESERVEDCTLS}.${localnodeid}
		fi
		log_trace_end release_quorum_dev
		return
	fi
      fi
   done

  #
  # If not a direct attached cluster, find
  # the quorum device to release.
  #
  for node in ${nodelist}; do

    if [ "${node}" != "${localnodeid}" ]; then
      retval=0
      expr ${node} \< ${localnodeid} > /dev/null 2>&1 || retval=$?
      if [ "${retval}" -eq 0 ]; then

	quorum_dev=`enmatch quorumdev.node.${node}.${localnodeid}`

	if [ -n "${quorum_dev}" -a  \
		-f ${RESERVEDCTLS}.${node}.${localnodeid} ]; then
	  # log_info "$pre.1001" \
	#	"Releasing quorum device ${quorum_dev} for ${node} and ${localnodeid}"
	  ${SSACLI} release ${quorum_dev} 1>/dev/null 2>&1 || dummy=$?
	  /bin/rm -f ${RESERVEDCTLS}.${node}.${localnodeid}
	fi
      else
	quorum_dev=`enmatch quorumdev.node.${localnodeid}.${node}`
	if [ -n "${quorum_dev}" -a  \
		-f ${RESERVEDCTLS}.${localnodeid}.${node} ]; then
	  # log_info "$pre.1001"  \
	  # "Releasing quorum device ${quorum_dev} for ${node} and ${localnodeid}"
	  ${SSACLI} release ${quorum_dev} 1>/dev/null 2>&1 || dummy=$?
	  /bin/rm -f ${RESERVEDCTLS}.${localnodeid}.${node}
	fi
      fi
    fi

  done

  log_trace_end release_quorum_dev
}

#####################################################
# This functions corrects the quorum devices links when the controller
# numbers changes.
#
correct_quorum_dev_links()
{
 typeset quorum_dev device quorum_dev1
 integer i j localnodeid
 typeset diskinfo diskset
 

  export PATH=/opt/SUNWcluster/bin/:$PATH

set +e

DEVICEDIR=/var/opt/SUNWcluster/devices

#
# Get the list of devices that we make use of in quorum devices.
# XXX: We will get this from CDB file since the directory may
# contain some obsolete entries.
#

#
# Make Sure that the node is present, in the database.
#
nodename=`eval /bin/uname -n`
numnodes=`enmatch cmm.nodes`
localnodeid=0
while [ ${localnodeid} -lt ${numnodes} ]; do
	if [ `enmatch cluster.node.${localnodeid}.hostname` = ${nodename} ]; then
       		break;
        fi
        localnodeid=localnodeid+1
done

j=0
device=""
while [[ ${j} -lt ${numnodes} ]]
do

	if [ ${localnodeid} -eq $j ]; then
		j=j+1
		continue;
	fi

	if [[ ${j} > ${localnodeid} ]]; then
		quorum_dev=`eval enmatch quorumdev.node.${localnodeid}.${j}`
	else
		quorum_dev=`eval enmatch quorumdev.node.${j}.${localnodeid}`
	fi

	if [[ ! -z ${quorum_dev} ]]; then
		device="${device} ${quorum_dev}"
	fi

	j=j+1
done

for devi in ${device}
do
  # 
  # Get the actual link and check for the presence of 
  # the device.
  #
  devpresent=`/bin/ls -l ${DEVICEDIR}/${devi} 2>/dev/null`
  actdevice=`/bin/ls -l ${DEVICEDIR}/${devi} 2>/dev/null | sed -e 's/^.*-> //' 2>/dev/null`

  #
  # if the quorum device is an ssa, then if it is a disk in the
  # ssa get only the ssa controller number.
  #
  diskinfo=`echo ${devi} | /usr/bin/sed 's/\./ /g`
  disktype=`echo ${diskinfo} |/bin/wc | /bin/awk '{print $2}'`
  if [[ "${disktype}" -eq 2 ]]; then
	diskinfo=`echo ${diskinfo} | awk ' { print $1} '`
  	devpresent=`/bin/ls -l ${DEVICEDIR}/${diskinfo} 2>/dev/null`
  fi


  #
  # check if the quorum devices is an SSA controller.
  #
  X=`echo "${actdevice}" | /bin/grep "/dev/rdsk"`

  #
  # If quorum device is an SSA and we aren't able to get the entry
  # in /var/opt/SUNWcluster/devices/ then create one if present.
  #
  if [[ -z ${X} &&  -z "${devpresent}" ]]; then
	#
	# means that it could be an ssa. or there could be
	# an ssa controller/device present.
	#
	if [[ -z ${ssas} ]]; then
		ssas=`/opt/SUNWcluster/bin/finddevices ssa`
	fi
       correct=`echo ${ssas} | /usr/bin/tr ' ' '\n' | /bin/grep ${diskinfo} \
		| /usr/bin/awk -F: '{print $1}'`
       if [[ ! -z "${correct}" ]]; then
		cnums=`/bin/ls -l /dev/rdsk/${correct}*s2 | head -1`
          	path=`echo ${cnums}  | sed -e 's/^.*->//' |  sed -e 's/^.*\.\.\/\.\.//' -e 's%^\(.*\)/\(.*\)%\1:ctlr%'`
	    rm -f ${DEVICEDIR}/${devi}
            ln -s ${path} ${DEVICEDIR}/${diskinfo} 2>/dev/null
	    continue
        fi
  fi


  #
  # first check if we have /dev/rdsk entries
  # This is because we are interested in /dev/rdsk/ controller number
  # changes.
  # Also if the directory entry is missing try to find it out.
  # (XXX. Should we do it for SSA's also.)
  X=`echo ${actdevice} | /bin/grep "/dev/rdsk"`
  if [[ $? -eq 0  || -z ${devpresent} ]]; then

	#
	# we have rdisk entry, get the inquiry and compare against the
	# one we have
	#
       dev=`echo ${actdevice} | sed -e 's/\/dev\/rdsk\///'`

       inq=""

       if [[ -a ${actdevice} ]]; then
       	 inq=`/opt/SUNWcluster/bin/scssa inquiry ${dev}`
       fi

       if [[ "${inq}" = "${devi}" ]]; then
		#
		# The link is correct and points to the right device.
		# Hence continue with the next device
		continue;
       fi

       #
       # Now the link is not pointing to the right device 
       # hence , first get the list of drives present and check
       # if the disk is present on the different controller.
       # We do it only once for the whole set of devices.
       if [[ -z ${finddevs} ]] ; then
	   #
	   # first get the entries thru find devices.
	   #
	   finddevs=`/opt/SUNWcluster/bin/finddevices disks`
       fi


       #
       #  Now get the correct path of the device with the given 
       #  serial number.
       #
       correct=`echo ${finddevs} | /usr/bin/tr ' ' '\n' | /bin/grep -w ${devi} \
		| /usr/bin/awk -F: '{print $1}'`

       #
       # we have found the correct path and now remove and re-link to
       # to the newest path.
       # XXX: If we have not found the correct path, should we inform
       #      the user here so that they can change there quorum devices.
       #      using "scconf"
       if [[ ! -z ${correct} ]]; then
            rm -f ${DEVICEDIR}/${devi}  
	    ln -s /dev/rdsk/${correct} ${DEVICEDIR}/${devi} 2>/dev/null

       else
	  log_info "$pre.4050" "Quorum Device ${devi} is not present on the system, Please change quorum device using scconf -q option."
	  exit 1
       fi
  fi
done
}


directatt=`eval enmatch cmm.directattacheddevice`
if [ ${directatt} = 1 ]; then
   if [ "${CURRSTEP}" != "startnode" ]; then
	step=${CURRSTEP#cmm*}
	steptimeout=`enmatch cmm.transition.${step}.timeout`
	timeout=`enmatch cluster.tc_ssp.to`
	if (( ${steptimeout} < ${timeout} )); then
		timeout=$(/bin/expr ${steptimeout} / 2)
	fi
	nltimeout=$(/bin/expr ${timeout} + 2)
   fi
fi 


case ${CURRSTEP} in
	startnode)
		correct_quorum_dev_links
		;;
	cmmabort)
		#
		# If a direct attached cluster, release the
		# node-lock if held.
		#
		if [ ${directatt} = 1 -a \
			-f ${tmpdir}/nodelock.${localnodeid} ]; then
			log_info "$pren.1001" "${clustname} Release NodeLock started"
			node_lock_stop || dummy=$?
			/bin/rm -f ${tmpdir}/nodelock.${localnodeid}
			/bin/rm -f ${tmpdir}/startcluster
			log_info "$pren.1002" "${clustname} Release NodeLock completed"
		fi
		#
		# Release the quorum device, if held
		#
		release_quorum_dev abort
		;;
	cmmstep1)
		#
		# If a direct attached cluster and we're in a
		# startcluster transition, then grab the node-lock.
		# Else if I already hold the node-lock and am not
		# the lowest nodeid then release the node-lock.
		#
		if [ ${directatt} = 1 ]; then
		    if [ -e ${tmpdir}/startcluster ]; then
			/bin/rm -f ${tmpdir}/startcluster
			log_info "$pren.1003" "${clustname} Get NodeLock started"
			dummy=0
			node_lock_start || dummy=$?
			if [ ${dummy} -eq  0 ]; then
				/bin/touch ${tmpdir}/nodelock.${localnodeid}
		    		log_info "$pren.1004" "${clustname} Get NodeLock completed"
			else
				log_info "$pren.4002" "${clustname} Failed to obtain NodeLock"
				/bin/rm -f ${tmpdir}/nodelock.${localnodeid}
				exit 1
			fi
		    else
			for node in ${currnodes}; do
			    if [ ${node} != ${localnodeid} -a \
				    -f ${tmpdir}/nodelock.${localnodeid} ]; then
				log_info "$pren.1001" "${clustname} Release NodeLock started"
				node_lock_stop || dummy=$?
				/bin/rm -f ${tmpdir}/nodelock.${localnodeid}
				log_info "$pren.1002" "${clustname} Release NodeLock completed"
			    fi
			    break
			done
		    fi
		fi
		#
		# If CMM has set the RESERVEQUORUM flag then reserve
		# quorum. In case of direct attached cluster this
		# flag is set by CMM only if the membership is 1.
		#
		if [ "${RESERVEQUORUM}" -eq 1 ]; then
		   reserve_quorum_dev
		fi
		release_quorum_dev step1
		;;
	cmmstep2)
		#
		# If a direct attached cluster and I'm the lowest
		# nodeid and I do not already hold the node-lock then,
		# I force grab the node-lock.
		#
		if [ ${directatt} = 1 -a \
			! -f ${tmpdir}/nodelock.${localnodeid} ]; then
			for node in ${currnodes}; do
				if [ ${node} = ${localnodeid} ]; then
					log_info "$pren.1003" "${clustname} Get NodeLock started"
					dummy=0
					node_lock_fstart || dummy=$?
					if [ ${dummy} -eq 0 ]; then
					    /bin/touch ${tmpdir}/nodelock.${localnodeid}
					    log_info "$pren.1003" "${clustname} Force obtained NodeLock"
					else
					    log_info "$pren.3004" "${clustname} WARNING: Failed to Force obtain NodeLock"
					fi
					log_info "$pren.1004" "${clustname} Get NodeLock completed"
				fi
				break
			done
		fi
		;;
			
			
esac

exit 0



