#! /bin/sh
#
#pragma ident       "@(#)lotus_probe.shi 1.10     01/03/27 SMI"
#
# Copyright (c) 1997 by Sun Microsystems, Inc.
# All rights reserved.
#

# Usage: lotus_probe <Domino server name>
# Started up in the background via pmfd in lotus_fm_start during 
# reconfiguration if a) server is running locally and
# local probes are configured on, or if b) server is running remotely
# and remote probes are configured on. 

#
# Add the path to framework binaries, since the probe is not called in the
# context of the methods
#

PATH=${PATH}:/opt/SUNWcluster/bin:/opt/SUNWcluster/ha/lotus
export PATH


# XXX
#set -x


#
#pragma ident "@(#)ds_boiler	1.3	98/09/15 SMI"
#
# common boiler for HA data services
#
#


ARGV0=`basename $0`
LOGGER=logger
HA_SLOGFACILITY=`haget -f syslog_facility`
HA_SLOGTAG=hadf
prog_path=`dirname $0`

# source in ha-services common utilities
. hads_utilities

# add the ha-service directory to the path
PATH=${prog_path}:${PATH}

#
# for use by subsequent hactl command, get hostnames of local and remote hosts
#
LOCALHOST=`uname -n`

#! /bin/sh
#
#pragma ident       "@(#)do_service 1.20     00/08/25 SMI"
#
# Copyright (c) 1997 by Sun Microsystems, Inc.
# All rights reserved.
#
LOTUSCONF=/etc/opt/SUNWsclts/hadsconf

source_env_file $LOTUSCONF
NUMBER_OF_INSTANCES=`echo $_INST_LIST | wc -w`

if [ $? -ne 0 ]; then
        # source_env logs error message if it fails.
        # No need to log another; just exit.
        exit 1
fi

NOTES_DIR=/opt/lotus
NOTES_SERVER=$NOTES_DIR/bin/server
RUNNING_TASKS=""
SERVER_PIDS=""

#
# verify_config - check if we have access to the Notes Domino config directory
#		  and config files 
#
verify_config() {

	# First do some error checking
	
	if [ ! -d ${_INST_BASE_DIR} ] ; then
	       logerr "$prefix.4006" `gettext "base directory <${_INST_BASE_DIR}> does not exist."`
               exit 1
        fi
	
	if [ ! -d $NOTES_DIR/bin ] ; then
		ln -s ${_INST_BASE_DIR} ${NOTES_DIR} 
		if [ $? -ne 0 ] ; then
			logerr "$prefix.4007" `gettext "failed to create symbolic link in $NOTES_DIR "`   
			exit 1
		fi
	fi

	if [ ! -x ${NOTES_SERVER} ]; then
		logerr "$prefix.4008" `gettext "${NOTES_SERVER}> is not executable."`
                exit 1
        fi 

        if [ ! -d ${_INST_CONF_DIR} ]; then
       		logerr "$prefix.4009" `gettext "config directory <${_INST_CONF_DIR}> does not exist."`
                exit 1
	fi

	if [ ! -f ${_INST_CONF_DIR}/notes.ini ]; then
		logerr "$prefix.4012" `gettext "config file <${_INST_CONF_DIR}/notes.ini> does not exist."`
		exit 1
	fi

	#get the product directory of Lotus notes 
	if [ ! -x ${NOTES_DIR}/bin/tools/luname ]; then
		logerr "$prefix.4010" `gettext "failed to get Lotus Notes directory info"` 
		exit 1
	fi
	sun_ver=`${NOTES_DIR}/bin/tools/luname -q`
	rev_dirname=`expr $sun_ver : '\([^|]*\)'`
	NOTES_REV_DIR=${NOTES_DIR}/notes/latest/$rev_dirname		
	if [ ! -d $NOTES_REV_DIR ]; then
		logerr "$prefix.4011" `gettext "failed to get Lotus Notes version directory "` 
		exit 1
	fi
	
	# retrieve the names of all tasks that started by the Domino
	# server at server startup time.  The task names are stored in
        # notes.ini file.
        task_list=`grep -w "ServerTasks" ${_INST_CONF_DIR}/notes.ini`
        LOTUS_TASKNAMES="`echo $task_list | tr ',' ' ' `"
        LOTUS_TASKNAMES="`echo $LOTUS_TASKNAMES | tr '=' ' ' `"
        if [ "${LOTUS_TASKNAMES}" = "" ] ; then
                logerr "$prefix.4013" `gettext "Failed to retrieve the list of Domino tasks from ${_INST_CONF_DIR}/notes.ini."`
        fi

        # Caution !! this list may be changed in the next Lotus Domino release.
        smtpmta_child_procs="isesctl drt imsgcnv osesctl omsgcnv"
        found_smtpmta=`echo $LOTUS_TASKNAMES | grep -i "smtpmta" `
        if [ "${found_smtpmta}" != "" ] ; then
                # if smtpmta task is running, there are five child processes
                # should also running, attach these process names to the
                # task list
                LOTUS_TASKNAMES="$LOTUS_TASKNAMES $smtpmta_child_procs"
        fi

	# take out "ServerTasks" from the list 
	TASK_COUNT=`echo $LOTUS_TASKNAMES | wc -w`
        LOTUS_TASKNAMES=`echo ${LOTUS_TASKNAMES} | cut -f2-"$TASK_COUNT" -d' ' `
	LOTUS_TASKNAMES=`echo ${LOTUS_TASKNAMES} | tr '[A-Z]' '[a-z]' `
	return 0

}

#The following function added to support Multiple Instances.
#Checks if PID belongs to the current instance.

check_pid() {
inst_pid_list=""

temp_inst_name=`get_inst_name`

pid_list=`pmfadm -l ${temp_inst_name} | grep pids: | sed 's/pids://'`


for p in $PID
do
	for q in $pid_list
		do
		if [ $p = $q ]; then
		#The PID belongs to the current Instance INST_NAME
		inst_pid_list="$inst_pid_list $p"
		fi
		done

done
PID=$inst_pid_list
}

get_pid() {

	name=$*
        PID=`/usr/bin/ps -fe | grep -i "$name"  |
		        grep -v "grep"	| nawk -v pat="$name" '
 BEGIN {
   search_pat = substr(pat, 0, 79)
   }
 { if ( match($0, search_pat) )
     if ( ! match($0, "nawk -v pat=") )
        print $2
  }
'`
check_pid
}

# tasks_are_running - checking whether the Domino server tasks running or not
#		 
# return : 0 - the Lotus Domino server tasks are running
#          1 - all the Lotus Domino server tasks are not running
#	   2 - some Lotus Domino server tasks are running 
# 
tasks_are_running() {

        RUNNING_TASKS=""
        for name in ${LOTUS_TASKNAMES} ;
        do
                get_pid $NOTES_REV_DIR/$name
                if [ "${PID}" ]; then
                        RUNNING_TASKS="$RUNNING_TASKS $PID"
                fi
        done
	count=`echo $RUNNING_TASKS | wc -w`

	# all Domino tasks are running on the node
	[ $count = $TASK_COUNT ] && return 0 

	# no task is running on the node
	[ $count = 0 ] && return 1 

	# some Domino tasks are running on the node 
	return 2
		 
}

#
# cleanup_tasks - stop all dangling Domino tasks     
#
cleanup_tasks() {

	DELAY_TIME=5
        cleanup_done=0

        pmfadm -l ${_INST_NAME} > /dev/null 2>&1
        if [ $? -eq 0 ] ; then
		pmfadm -s ${_INST_NAME}
		if [ $? -ne 0 ]; then
			logerr "$prefix.4012" `gettext "Pmfd failed to delete ${_INST_NAME} from queue"` 
		fi
        fi
	
	# if Domino server is still running, try to stop it by issuing Domino
	# stop server command "server -q".  Otherwise, this stop server 
	# command won't work.  We have to kill all tasks using "kill -9"
	if [ "${SERVER_PIDS}" != "" ]; then
		su  - ${NOTES_UNAME} -c "cd ${_INST_CONF_DIR}; ${NOTES_SERVER} -q  2>&1  & "
                timeout=0
		#to support multiple instances 
                #while [ $timeout -ne $STOP_SVC_TIMEOUT ] ;
                while [ $timeout -lt $STOP_SVC_TIMEOUT ] ;
                do
                        sleep $DELAY_TIME
                        get_pid "$NOTES_REV_DIR/server"
                        if [ "${PID}" != "" ]; then
                                timeout=`expr $timeout + 5`
                        else
                                cleanup_done=1
                                break
                        fi
                done

	fi

        if [ $cleanup_done -ne 1 ] ; then
                get_pid "$NOTES_REV_DIR/server"
                if [ "${PID}" != "" ]; then
                        for p in ${PID} ; do
                                kill -KILL $p > /dev/null 2>&1
                        done
                        lognotice "$prefix.2008" `gettext "failed to stop Lotus Domino server ${_INST_NAME}, issuing kill command"`
                fi
                tasks_are_running
                if [  "${RUNNING_TASKS}" != "" ]; then
                        for p in ${RUNNING_TASKS} ; do
                                kill -KILL $p > /dev/null 2>&1
                        done
                        lognotice "$prefix.2009" `gettext "failed to stop Lotus Domino server tasks, issuing kill command"`
                fi

	fi	
	server_shutdown_process=`ps -ef | grep "$NOTES_REV_DIR/server -q" |awk '{print $2}'`
	if [ ! -z "${server_shutdown_process}" ] ; then
		kill -KILL ${server_shutdown_process}
	fi

 
        # check to see if the lock file ~notes.lck exist in
        # lotus server directory.  If yes, release the lock
	if [ -f ${_INST_CONF_DIR}/~notes.lck ]; then
                rm -f ${_INST_CONF_DIR}/~notes.lck  > /dev/null 2>&1
        fi
	#cleanup is now done in STOP
        #if [ -f /tmp/.NOTESMEM_please_do_not_remove* ]; then
        #        rm -f /tmp/.NOTESMEM_please_do_not_remove*
        #fi

	RUNNING_TASKS=""
}

#
# Time to wait for SIGTERM to stop a process.
# This should be in the config file.
#
STOP_TIMEOUT=15
#Need to decrease the START_SVC_TIMEOUT to support multiple instances startup.
#START_SVC_TIMEOUT=20
START_SVC_TIMEOUT=`expr 20 / $NUMBER_OF_INSTANCES`
#STOP_SVC_TIMEOUT=50
#Get the STOP timeout from the framework and distribute it amongst
#all the instances
FRAMEWORK_STOP_TIMEOUT=`hareg -q lotus -T STOP` 
#10 seconds allowed  for calling cleanup routines 
FRAMEWORK_STOP_TIMEOUT=`expr $FRAMEWORK_STOP_TIMEOUT - 10` 
STOP_SVC_TIMEOUT=`expr $FRAMEWORK_STOP_TIMEOUT / $NUMBER_OF_INSTANCES`
STOP_SVC_TIMEOUT=`expr $STOP_SVC_TIMEOUT / 5`

#
# bundle_do_svc <action>
#
# is called for each instance
#
bundle_do_svc ()
{
        action=$1

	prefix="SUNWcluster.ha.lotus.$action"
	NOTES_UNAME=`ls -ld ${_INST_CONF_DIR}/notes.ini | nawk '{print $3}'`

        case $action in

        'start' )

		if [ ! -z "${PROBE_INSTANCE_NAME}" ] ; then
			if [ ${PROBE_INSTANCE_NAME} != ${_INST_NAME} ] ; then
				return 0
			fi
		fi
		#Get the START timeout from the framework and distribute it 
		#amongst all the instances
		FRAMEWORK_START_TIMEOUT=`hareg -q lotus -T START` 
		#10 seconds allowed  for calling cleanup routines 
		FRAMEWORK_START_TIMEOUT=`expr $FRAMEWORK_START_TIMEOUT - 10`
		STOP_SVC_TIMEOUT=`expr $FRAMEWORK_START_TIMEOUT / $NUMBER_OF_INSTANCES`
		is_member "${_INST_LOGICAL_HOST}" "$MASTERED_LOGICAL_HOSTS" || exit 0

		if [ ! -x ${_INST_START} ]; then
			logerr "$prefix.4014" `gettext "<${_INST_START}> is not executable."`
			exit 1
		fi
		verify_config	
		
		# First, Check whether the main Domino server process is 
		# running. If yes, then check whether other Domino tasks are
		# running. If all Domino tasks are running, exit with no error.
		# Otherwise, clean up these tasks.
		# After there is no Domino tasks are running on the node,
		# start Domino server now.
		get_pid "$NOTES_REV_DIR/server" 
		SERVER_PIDS="$PID"	
		tasks_are_running
		status=$?
		if [ $status -eq 0 -a "${SERVER_PIDS}" != "" ]; then
			# Domino server and all tasks are running, don't 
			# need to start them 
                        lognotice "$prefix.2007" `gettext "Lotus Domino server already running"`
			exit 0
		fi
		
		if [ "${SERVER_PIDS}" != "" ]; then
			if [ $status -ne 0 ] ; then
				# When Domino server is running, if some
				# tasks are running or no task is running, 
				# something is wrong and cleanup them
				cleanup_tasks
			fi
		else
			if [ $status -ne 1 ]; then
				# When Domino server is not running,
				# all Domino tasks are running or some
				# tasks are running.  Domino server is sick.
				# Try to cleanup them
				cleanup_tasks
			fi
		fi
		# Now try to start Domino server throught pmfd
		#
		# Code for domino version 4.6.3
		#
		if [ -z ${_INST_PRIV_LOTUS_SERVER_TYPE:-""} ] ; then
			# _INST_PRIV_LOTUS_SERVER_TYPE variable is not defined. This indicates
			# the version previous to 4.6.3 of lotus is configured.
			# start the lotus using the old command.
			pmfadm -c  ${_INST_NAME} ${_INST_START} ${NOTES_UNAME} ${_INST_CONF_DIR} ${NOTES_SERVER}

		else # _INST_PRIV_LOTUS_SERVER_TYPE is defined, and it is having non-null value.
			if [ ${_INST_PRIV_LOTUS_SERVER_TYPE} = "single" ] ; then
				# A domino server 4.6.3 without partition server
				# or a primary domino server is configured.
				# Use the old starting command.
				pmfadm -c  ${_INST_NAME} ${_INST_START} ${NOTES_UNAME} ${_INST_CONF_DIR} ${NOTES_SERVER}
			else # Secondary partition server is configured.
				# Check if readable password file exists...
				if [ ! -r ${_INST_PRIV_LOTUS_PASSWORD_FILE:-""} ] ; then
					logerr "$prefix.4002" `gettext "Could not open the password file : ${_INST_PRIV_LOTUS_PASSWORD_FILE:-""} for instance ${_INST_NAME}"`
					return 1
				fi
				pmfadm -c  ${_INST_NAME} ${_INST_START} ${NOTES_UNAME} ${_INST_CONF_DIR} ${NOTES_SERVER} ${_INST_PRIV_LOTUS_PASSWORD_FILE}
			fi
		fi
		# was server started successfully ?
		if [ $? -ne 0 ]; then
			logerr "$prefix.4000" `gettext "failed to start Lotus Domino server ${_INST_NAME}"`
     			exit 1
		else
			sleep $START_SVC_TIMEOUT 
                 	lognotice "$prefix.2000" `gettext "Started Lotus Domino server ${_INST_NAME}"`
		fi
 	;;


	'stop' | 'abort')
	
		is_member "${_INST_LOGICAL_HOST}" "$NOT_MASTERED_LOGICAL_HOSTS" || exit 0

		# stop the Notes daemons on the system
                # if Domino server configuration directory is not exit,
                # this node does not own the logical host, exit with 0
                if [ ! -f ${_INST_CONF_DIR}/notes.ini ]; then
                        exit 0
                fi

		verify_config
		# First, Check whether the main Domino server process is 
		# running. If yes, then check whether other Domino tasks are
		# running. If all Domino tasks are running, stop them by
		# issuing "server -q" 
		# Otherwise, clean up these tasks by issuing "kill" command.
		get_pid "$NOTES_REV_DIR/server" 
		SERVER_PIDS="$PID"	
		if [ "${SERVER_PID}" = "" ]; then
			tasks_are_running
			# Domino server is not running and  
			# no Domino tasks is running, just exit with 0
			if [ $? = 1 ] ; then
				exit 0
			fi
		fi
		# some tasks are running, cleanup them
		if [ "${action}" = "stop" ]; then
			cleanup_tasks
                	lognotice "$prefix.2002" `gettext "Stopped Lotus Domino server ${_INST_NAME}"`
		else
                	pmfadm -s ${_INST_NAME} KILL 
		fi
		#cleaning up some files created by lotus 
		if [ -f /tmp/.NOTESMEM_please_do_not_remove* ]; then
		       rm -f /tmp/.NOTESMEM_please_do_not_remove*
		fi
        ;;


        'fm_init')
                lognotice "$prefix.2003" `gettext "fm_init not implemented "`
                ;;


	'fm_start')

		#
                # Check if we are about to probe a logical host we master (local
                # probe),  or one which another host masters (remote probe), or
                # one in maintenance mode (don't probe)
                #
                maint=`haget -f is_maint -h ${_INST_LOGICAL_HOST}`
                if [ $? -ne 0 ]; then
                        logerr "$prefix.4003" `gettext "haget(1M) failed for logical host ${_INST_LOGICAL_HOST}"`
			exit 1
		fi

                if  [ "$maint" =  "1" ]; then
			exit 0
		fi

		# set the probe flag, yes = local probing, no = remote probing
                is_member "${_INST_LOGICAL_HOST}" "$MASTERED_LOGICAL_HOSTS" 
		if [ $? = 0 ] ; then
                        local=yes
                else
                        local=no
                fi

		if [ "$local" = "yes"  -a  \
                     "$_INST_PROBE_LOCAL_1" != "y" ]; then
                        # no work to do
                        exit 0
                fi
                if [ "$local" = "no"  -a  \
                     "$_INST_PROBE_REMOTE_1" != "y" ]; then
                        # no work to do
                        exit 0
                fi

		if [ "$local" = "yes" ]; then
			verify_config
		fi

                #
                # Launch a probe using the process monitor.
                # We are using the process monitor just to start and tag
                # the probe, without the retry feature of the process 
		# monitor.
                pmfadm -c ${_INST_NAME}.probe \
			/bin/sh -c "${_INST_PROBE_PROG_1} ${_INST_NAME} $local >/dev/null 2>&1"
                if [ $? -ne 0 ]; then
                        pmfadm -l ${_INST_NAME}.probe > /dev/null 2>&1
                        if [ $? -eq 0 ] ; then
                                exit 0
                        fi

               		logerr  "$prefix.4004" `gettext "Failed to start Lotus Domino probe server ${_INST_NAME}_probe_1"`
			exit 1
                else
                        lognotice "$prefix.2004" `gettext "Started Lotus Domino probe server ${_INST_NAME}_probe_1"`
                fi
	;;

	'fm_stop')

		# If probe not running, do nothing
                ha_svc_not_running ${_INST_NAME}.probe && exit 0

                # pmf kills lotus_probe
                pmfadm -s ${_INST_NAME}.probe -w ${STOP_TIMEOUT} TERM || \
                        pmfadm -s ${_INST_NAME}.probe KILL
                if [ $? -ne 0 ]; then
                        logerr "$prefix.4005" `gettext "failed to stop Lotus probe server ${_INST_NAME}.probe"`
                        exit 1
                else
                        lognotice "$prefix.2005" `gettext "Stopped Lotus probe server ${_INST_NAME}.probe"`
                fi

               ;;

       'fm_check_this_host_ok')

		# If the HA-LOTUS logical host for this instance
                # is not currently mastered by this machine, exit now.

                is_member "${_INST_LOGICAL_HOST}" "$MASTERED_LOGICAL_HOSTS"
                if [ $? -ne 0 ]; then
                        exit 0
                fi

		verify_config

                lognotice "$prefix.2006" `gettext "This host is OK for Notes Domino server $_INST_NAME"`
                ;;

	esac
	exit 0
}	
#include_boiler

INST_NAME=$1
LOCAL=$2

# Used for error message logging
set_inst_name ${INST_NAME}

#
# Restart an Domino server 
#
do_retry ()
{
        if [ "$LOCAL" = "yes" ]; then
                lognotice "$prefix.2010" \
	`gettext "Restarting Domino server $INST_NAME, attempt #${retries}"`
		#if we already check the running tasks  and find out the  
		#Domino server is sick, just try to restart them.        
	        # check to see if the lock file ~notes.lck exist in
        	# lotus server directory.  If yes, release the lock
        	if [ -f ${_INST_CONF_DIR}/~notes.lck ]; then
               		rm -f ${_INST_CONF_DIR}/~notes.lck  > /dev/null 2>&1
        	fi
		#The following cleanup should not be done when running
		#multiple instances.
        	#if [ -f /tmp/.NOTESMEM_please_do_not_remove* ]; then
                #	rm -f /tmp/.NOTESMEM_please_do_not_remove*
        	#fi

		#Now the following code starts only its instance INST_NAME
		lotus_svc_start "${MASTERED_LOGICAL_HOSTS}" "" "" "${INST_NAME}"> /dev/null 2>&1 
        fi
}


# evaluate_server_migration()
#
# Called when the retry logic reaches its wit's end.
# This routine checks if takeover is permitted for this Domino server,
# if this is a local/remote probe and tries to do the best it can.
#
# In any case, the probe is at the end of it's rope here, so it exits.
#
evaluate_server_migration()
{

	# check to see if the cluster is doing reconfiguration
	NEW_CLUST_KEY=`hactl -f cluster_key`
	if [ "$NEW_CLUST_KEY" -ne "$CLUST_KEY" ]; then
                logerr $prefix.4029 `gettext "lotus_probe: Missed cluster reconfiguration. Exiting"`
                exit 1
        fi	
        # Check if takeover is permited for this Domino server,
	# and this is remote host, ask the framework to attempt a takeover.
        if [ "$LOTUS_TAKEOVER" = "y" -a "$LOCAL" = "no" ]; then
                lognotice "$prefix.2011" \
`gettext "Attempting to take ownership of Lotus Domino server $INST_NAME"`
		hactl -t -s lotus -l $LOTUS_HOST
        # Check if failover is permited for an this instace, and if it is,
        # ask the framework to attempt a failover.
        elif [ "$LOTUS_TAKEOVER" = "y" -a "$LOCAL" = "yes" ]; then
                lognotice "$prefix.2012" \
`gettext "Local probe for $INST_NAME: Giving up ownership"`
                hactl -g -s lotus -l $LOTUS_HOST
        elif [ "$LOTUS_TAKEOVER" = "n" -a "$LOCAL" = "yes" ]; then
                logwarn "$prefix.3001" \
`gettext "Failover not permitted for instance $INST_NAME. Giving up"`
        elif [ "$LOTUS_TAKEOVER" = "n" -a "$LOCAL" = "no" ]; then
                logwarn "$prefix.3002" \
`gettext "Takeover not allowed for instance $INST_NAME. Giving up"`
        fi

        exit 0

}

#
# check_tasks - this routine check to see if all Domino task are running 
#		If at server start up, this routin will check ten times
#		before exit  if one of task is not ready.  Otherwise, it
#		will return with no error
#
#		If this is a normal checking, return error when one of task
#		is not running 
#
check_tasks() {


	if [ "$LOCAL" = "no" ]; then
		return 1
	fi

	first_time=$1	
	start_retries=15
	cur_retries=0
	
	if [ $# -eq 1 -a $first_time -eq 1 ]; then
		grace_mode=1
	else
		grace_mode=0
	fi

	while [ $cur_retries -le $start_retries ]; do
		task_not_running=0
       		for taskname in ${LOTUS_TASKNAMES} ; do
			get_pid $NOTES_REV_DIR/$taskname
     	 		if [ "${PID}" = "" ] ; then
				task_not_running=1
				if [ $grace_mode -eq 0 ]; then
                                	lognotice "$prefix.4031" \
                `gettext "Probe detected that Lotus Domino task  $taskname  is not running"`
					break
                                fi
			fi
                done
		if [ $task_not_running -eq 1 ]; then
			if [ $grace_mode  -eq 1 ]; then 
				cur_retries=`expr $cur_retries + 1`
				lognotice "$prefix.2016"  \
			`gettext "Domino server needs more time, continue to wait"` 
			else
				logerr "$prefix.4031" \
		`gettext "Probe detected that some Lotus Domino tasks are not running"` 
				return 0	
			fi	
		else
			# return ok if all tasks are running 
			return 1
		fi
		
	done	
	
	# if get here, some tasks are not running on the node 
	return 0
}	 

if [ -z "$INST_NAME"  -o  -z "$LOCAL" ]; then
        logerr "$prefix.4023"  \
		`gettext "Usage: $ARGV0 <instance> <inst_runs_remotely>"`
        exit 1
fi

MASTERED_LOGICAL_HOSTS="`haget -f mastered`"

LOTUS_PORT=`get_config_param $INST_NAME PORT`
# required parameter
if [ -z "$LOTUS_PORT" ]; then
        logerr "$prefix.4024" \
            `gettext "LOTUS_PORT value not set for instance $INST_NAME"`
        exit 1
fi
LOTUS_CONF_DIR=`get_config_param $INST_NAME CONF_DIR`
# required parameter
if [ -z "$LOTUS_CONF_DIR" ]; then
	logerr "$prefix.4025" \
	    `gettext "LOTUS_CONF_DIR value not set for Domino server $INST_NAME"`
	exit 1
fi

LOTUS_BASE_DIR=`get_config_param $INST_NAME BASE_DIR` 
# required parameter
if [ -z "$LOTUS_BASE_DIR" ]; then
	logerr "$prefix.4025" \
	    `gettext "LOTUS_BASE_DIR value not set for Domino server $INST_NAME"`

	exit 1
fi
LOTUS_HOST=`get_config_param $INST_NAME LOGICAL_HOST`
LOTUS_PROBE_INTERVAL=`get_config_param $INST_NAME PROBE_1_INTERVAL`
# parser requires this to be set, but doesn't check for negative values
if [ $LOTUS_PROBE_INTERVAL -lt 0 ]; then
	lognotice "$prefix.2013" \
`gettext "INTERVAL value is negative for Domino server $INST_NAME; using 60 seconds"`	
	LOTUS_PROBE_INTERVAL=60
fi

LOTUS_PROBE_TIMEOUT=`get_config_param $INST_NAME PROBE_1_TIMEOUT`
# optional parameter, parser doesn't check for <= 0 values
if [ -z "$LOTUS_PROBE_TIMEOUT" ]; then
	lognotice "$prefix.2014" \
`gettext "TIMEOUT value not set for Domino server $INST_NAME; using 60 seconds"`
	LOTUS_PROBE_TIMEOUT=60
fi
# what timeout value is too low?
if [ $LOTUS_PROBE_TIMEOUT -le 0 ]; then
	lognotice "$prefix.2015" \
`gettext "TIMEOUT is <= zero for Domino server $INST_NAME; resetting to 60 seconds"`
	LOTUS_PROBE_TIMEOUT=60
fi

LOTUS_TAKEOVER=`get_config_param $INST_NAME PROBE_1_TAKEOVER`
# optional parameter

if [ "$LOTUS_TAKEOVER" != "y" -a "$LOTUS_TAKEOVER" != "n" ]; then
        logerr  "$prefix.4026" \
`gettext "Unrecognized value of TAKEOVER flag for Domino server $INST_NAME"`
        exit 1
fi

#
# Get the retry time interval (the time window in minutes)
#
RETRY_INTERVAL=`get_config_param $INST_NAME RETRY_INTERVAL`
if [ -z "$RETRY_INTERVAL" ]; then
        # IF RETRY_INTERVAL  was left blank, time window is indefinite
        RETRY_INTERVAL=0
else
        is_numeric $RETRY_INTERVAL
        if [ $? -ne 0 ]; then
                logerr "$prefix.4027"  \
	`gettext "Invalid value of RETRY_INTERVAL for instance $INST_NAME"`
                exit 1
        fi
fi
# Convert to seconds
RETRY_INTERVAL=`expr $RETRY_INTERVAL \* 60`

#
# Get the retry number for this instance
#
RETRY_TIMES=`get_config_param $INST_NAME RETRY_TIMES`
#
# validate the number
#
if [ -z "$RETRY_TIMES" ]; then
        # if RETRY_TIMES was left blank, we'll assume no retries
        RETRY_TIMES=0
else
        is_numeric $RETRY_TIMES
        if [ $? -ne 0 ]; then
                logerr "$prefix.4028"  \
		`gettext "Invalid value of RETRY_TIMES for instance $INST_NAME"`
                exit 1
        fi
fi


prefix="SUNWcluster.ha.lotus.probe"
lognotice "$prefix.2015" \
`gettext "Starting a probing of Domino server $INST_NAME on logical host $LOTUS_HOST"`

NOTES_DIR=/opt/lotus
NOTES_SERVER=$NOTES_DIR/bin/server
RUNNING_TASKS=""
SERVER_PIDS=""

LOTUSPROBEFILE=/var/opt/SUNWcluster/run/.lotus_probe
retries=0
wasdead=0

CLUST_KEY=`hactl -f cluster_key`

if [ "$LOCAL" = "yes" ]; then
	_INST_CONF_DIR=$LOTUS_CONF_DIR
        _INST_BASE_DIR=$LOTUS_BASE_DIR
	verify_config
fi
# We need to wait a while before the first probing,
# Lotus Domino server takes a larger amount of time to start up.
START_PROBE_INTERVAL=100
first_probe=1
	
while : ; do

	tasks_ok=1
	#check to see if all Domino server tasks are running
	#For the first local probing, the Domino server may still in the 
	#processing of starting each tasks.  It may take a large amount of time.
	#Call check_tasks to check each process, this routin will try to
	#check ten times if the tasks come up slowly   
	#For the remote probing, just take a nap before doing the probing
        if [ $first_probe -eq 0 ]; then
                sleep $LOTUS_PROBE_INTERVAL
        else
                if [ "$LOCAL" = "no" ]; then
                        sleep $START_PROBE_INTERVAL
                fi
        fi

        check_tasks $first_probe
        tasks_ok=$?

	#  For now, just try to connect to Lotus Domino
	hatimerun -t $LOTUS_PROBE_TIMEOUT /usr/bin/telnet $LOTUS_HOST  \
		$LOTUS_PORT <<EOF > $LOTUSPROBEFILE 2>&1

EOF
        grep refused $LOTUSPROBEFILE > /dev/null 2>&1
	probing_ok=$?
        if [ $probing_ok -eq 0 -o $tasks_ok -eq 0 ]; then
		#At start-up, if probing fails or some tasks are not up,
		#don't do the takeover or restart server becaues
		#the server tasks probably are not come up yet 
		if [ $first_probe -eq 1 ]; then
                       first_probe=0
                        continue
                fi

		if [ $probing_ok -eq 0 ]; then
 			logerr "$prefix.4030" \
`gettext "Probe detected that Lotus Domino server $INST_NAME is not responding"`
		fi
		wasdead=1
                if [ $RETRY_TIMES -eq 0 ]; then
                        #
                        # This Domino server was configured not to do any 
			# retries.  Do the failover check.
                        #
                        evaluate_server_migration
		elif [ $retries -ge $RETRY_TIMES ]; then
                        #
                        # the retry counter expired, so if
                        # failover is permited, initiate one.
                        #
                        retries=0
                        evaluate_server_migration
		else
                        # and retry counter did not expired,
                        # so do another retry.
                        #
                        retries=`expr $retries + 1`
                        do_retry
			first_probe=1
                fi

                if [ $wasdead -eq 1 ]; then
                        lognotice  "$prefix.2017" \
		`gettext "Lotus Domino server $INST_NAME is on-line now"`
                fi
                wasdead=0
	else
		first_probe=0
        fi
done

# XXX
#set +x
	
