#!/bin/sh
#
# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#
#ident	"@(#)bootcluster	1.53	05/08/29 SMI"

#
# Startup script for clusters.
#

# Check if SMF is present
INCLUDE_FILE="/lib/svc/share/smf_include.sh"
if [ -f $INCLUDE_FILE ]
then
	#Save a copy of file descriptors to the 
	#Greenline logs
	exec 4>&1 5>&2
else
	exec 4>/dev/null 5>&4
fi

#Set up stdout and stderr to write to the console
exec 1>/dev/msglog 2>&1

# failfastd additional check
FAILFASTD_FILE=/etc/cluster/.failfastd
# clexecd additional check
CLEXECD_FILE=/etc/cluster/.clexecd

# binaries core handling
SCCOREDIR=/var/tmp/SUNWscu/core
if [ ! -d ${SCCOREDIR} ]
then
	mkdir -p ${SCCOREDIR}
fi
/usr/bin/coreadm -p ${SCCOREDIR}/core.%n.%f.%p.%t $$

CORES_FILES=`ls ${SCCOREDIR}`

if [ "${CORES_FILES}" != "" ]
then
	echo "Core files in "${SCCOREDIR}": "${CORES_FILES}
	/usr/bin/logger -p kern.err "Core files in ${SCCOREDIR}: ${CORES_FILES}"
fi



# Need to init DID so that we can use DID names for quorum
# devices. This works due to the fact that the scdidadm
# command knows how to fallback to the CCR file interfaces
# when the ORB is not available.
if [ -c /dev/did/admin -a -x /usr/cluster/bin/scdidadm ]
then
	# Log to both the console as well as to the Greenline log 
	# if applicable
	/usr/cluster/bin/scdidadm -u -i 2>&1 | tee /dev/msglog 1>&4
fi

HALT_MESSAGE="Please reboot in non cluster mode(boot -x) and Repair"
#
# Test if we are booting as part of a cluster.
#
/usr/sbin/clinfo > /dev/null 2>&1
if [ $? != 0 ] ; then
	clustered=0
	echo "Not booting as part of a cluster"
else
	/usr/cluster/lib/sc/chkinfr 2>/etc/cluster/chkinfr.err
	if [ $? != 0 ] ; then
		/usr/bin/cat /etc/cluster/chkinfr.err
		echo "UNRECOVERABLE ERROR: /etc/cluster/ccr/infrastructure file is corrupted"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: /etc/cluster/ccr/infrastructure file is corrupted"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	else
		if [ -s /etc/cluster/chkinfr.err ] ; then
			/usr/bin/cat /etc/cluster/chkinfr.err
		else
			/usr/bin/rm -f /etc/cluster/chkinfr.err
		fi
	fi
	clustered=1
	echo "Booting as part of a cluster"

	#
	# Start the quorum user daemon.
	# This needs to happen before starting clustering.
	#
	if [ -x /usr/cluster/lib/sc/qd_userd ]
	then
		/usr/cluster/lib/sc/qd_userd
		if [ $? -ne 0 ]
		then
			echo "UNRECOVERABLE ERROR: Sun Cluster boot: qd_userd not started"
			/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: qd_userd not started"
			echo $HALT_MESSAGE
			/usr/sbin/halt
		fi
	else
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/qd_userd not found"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/qd_userd not found"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

	#
	# Load the cl_comm module to avoid a modload() deadlock when
	# clconfig -c calls ORB::initialize() which tries to modload the
	# transport module.
	#
	m=misc/cl_comm
	/usr/sbin/modload -p $m
	if [ "$?" != "0" ] ; then
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: Could not load module $m"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: Could not load module $m"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

	#
	# Startup the ORB, transport, and the CCR.
	#
	/usr/cluster/lib/sc/clconfig -c
	if [ "$?" != "0" ] ; then
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: Could not initialize cluster framework"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: Could not initialize cluster framework"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

	#
	# This will cause any node which is not in "installmode" to
	# automatically reset its vote count to one whenever it boots back into
	# the cluster.
	#   
	/usr/cluster/bin/scconf -c -q ifnotinstallmode,reset,node=`/sbin/uname -n`
	res=$?
	if [ "$res" != "0" ] ; then
		echo "Sun Cluster boot: reset vote returns "$res
		/usr/bin/logger -p kern.err "Sun Cluster boot: reset vote returns "$res
	fi
fi

# Init DID again now that we've got a current
# copy of the CCR.
if [ -c /dev/did/admin -a -x /usr/cluster/bin/scdidadm ]
then
	/usr/cluster/bin/scdidadm -u -i 2>&1 | tee /dev/msglog 1>&4
fi

if [ $clustered = 1 ]
then
	#
	# Start the failfastd daemon.
	# This needs to happen before launch of clexecd
	#
	if [ -x /usr/cluster/lib/sc/failfastd ]
	then
		if [ -f ${FAILFASTD_FILE} ]
		then
			/bin/rm -f ${FAILFASTD_FILE}
		fi
		/usr/cluster/lib/sc/failfastd
		if [ $? -ne 0 ]
		then
			echo "UNRECOVERABLE ERROR: Sun Cluster boot: failfastd not started"
			/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: failfastd not started"
			echo $HALT_MESSAGE
			/usr/sbin/halt
		fi
		if [ ! -f ${FAILFASTD_FILE} ]
		then
			echo "Sun Cluster boot: failfastd start problem"
			# force a crash dump
			/usr/sbin/uadmin 5 1
		fi
	else
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/failfastd not found"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/failfastd not found"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

	#
	# Start the HA mounter daemon.
	# This needs to happen before enabling global mounts.
	#
	if [ -x /usr/cluster/lib/sc/clexecd ]
	then
		if [ -f ${CLEXECD_FILE} ]
		then
			/bin/rm -f ${CLEXECD_FILE}
		fi
		/usr/cluster/lib/sc/clexecd
		if [ $? -ne 0 ]
		then
			echo "UNRECOVERABLE ERROR: Sun Cluster boot: clexecd not started"
			/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: clexecd not started"
			echo $HALT_MESSAGE
			/usr/sbin/halt
		fi
		if [ ! -f ${CLEXECD_FILE} ]
		then
			echo "Sun Cluster boot: clexecd start problem"
			# force a crash dump
			/usr/sbin/uadmin 5 1
		fi
	else
		echo "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/clexecd not found"
		/usr/bin/logger -p kern.err "UNRECOVERABLE ERROR: Sun Cluster boot: /usr/cluster/lib/sc/clexecd not found"
		echo $HALT_MESSAGE
		/usr/sbin/halt
	fi

fi
