#! /bin/sh
#
# ident	"@(#)hasap_restartR3.shi	1.5	99/01/29 SMI"
#
# Copyright (c) 1997-1999 by Sun Microsystems, Inc.
# All rights reserved.
#
# Usage: hasap_restartR3 InstanceName
#
# hasap_resartR3 is called from the start_net method to start SAP

# ###############################################################
#
# Routine to restart CI and AS.
#
# When we start, DB could be still in the init state, we have to
# try to restart r3 again & again. If the Dev/Test instance or AS 
# is on this physical host, stop it first. Same is the app server.
# Usually when this routine is called, there is no CI running. 
# We also assume ipcs is clean. 
# 
# We need to stop the Test/Devel and app server here.
# There is a chance CI crashes, or a failover happens.
# There is no chance for stop_net_method to run, which
# will stop the test/devel and app server. This is a safety net.
# stopsap executables might not be around since the nfs server
# crashed, when CI=NFS. Thus we need this here.
# We stop all test/devel, app server before we start CI.
# After we start CI, we restart all the app servers.
# 
# ###############################################################

usage()
{
	logerr "$prefix.4066" `gettext "Usage: hasap_restartR3 InstanceName"`
}


# The common data service code is included here so that the environment
# of the restart code will be the same as the other data service methods


#######################################################################
# The common data service code starts here

#
#pragma ident "@(#)ds_boiler	1.3	98/09/15 SMI"
#
# common boiler for HA data services
#
#


ARGV0=`basename $0`
LOGGER=logger
HA_SLOGFACILITY=`haget -f syslog_facility`
HA_SLOGTAG=hadf
prog_path=`dirname $0`

# source in ha-services common utilities
. hads_utilities

# add the ha-service directory to the path
PATH=${prog_path}:${PATH}

#
# for use by subsequent hactl command, get hostnames of local and remote hosts
#
LOCALHOST=`uname -n`

#! /bin/sh
#
# ident "@(#)do_service 1.13     00/11/21 SMI"
#
#

SYSLOG_PREFIX="SUNWcluster.ha.sap"

# Set varible for the HA-SAP configuration file
HASAP_CONFIG_FILE=/etc/opt/SUNWscsap/hadsconf

#
# Call the parser to handle the config file.
#
source_env_file $HASAP_CONFIG_FILE
if [ $? -ne 0 ]; then
        # source_env logs error message if it fails.
        # No need to log another; just exit.
        exit 1
fi

#
# Because the <sid>adm user has a check for 
# tty=console and TERM=sun to check if openwin
# should be launched, set the TERM variable
# to something else so that openwin won't be started.
#
TERM=vt100
export TERM

#
# Timeout to waiting for SIGTERM to stop a process
# This should be in the config file
#
STOP_TIMEOUT=15
NUM_PROBE_RETRIES=3
PROBE_RETRY_PERIOD=1

#
# bundle_do_svc <action>
#
# is called for each instance
#
bundle_do_svc ()
{
	action=$1
	prefix="$SYSLOG_PREFIX.$action"

	#
	# Set instance variables
	#

	SAPSID=`get_sap_config_param "$_INST_NAME" "PRIV_" "YOUR_SAP_SID" "NON_NULL" "" "" ""`
	[ -z "${SAPSID}" ] && exit 1

	CI_INSTANCE_ID=`get_sap_config_param "$_INST_NAME" "PRIV_" "CI_INSTANCE_ID" "NON_NULL" "" "" ""`
	[ -z "${CI_INSTANCE_ID}" ] && exit 1

	CI_STARTSAP_RETRY_CNT=`get_sap_config_param "$_INST_NAME" "PRIV_" "CI_STARTSAP_RETRY_CNT" "NUMBER" "10" "1" ""`
	[ -z "${CI_STARTSAP_RETRY_CNT}" ] && exit 1

	CI_STARTSAP_RETRY_INTERVAL=`get_sap_config_param "$_INST_NAME" "PRIV_" "CI_STARTSAP_RETRY_INTERVAL" "NUMBER" "30" "" ""`
	[ -z "${CI_STARTSAP_RETRY_INTERVAL}" ] && exit 1

	CI=`get_sap_config_param "$_INST_NAME" "" "LOGICAL_HOST" "NON_NULL" "" "" ""`
	[ -z "${CI}" ] && exit 1

	SAPADM=`get_sap_config_param "$_INST_NAME" "PRIV_" "SAP_ADMIN_LOGIN_NAME" "NON_NULL" "" "" ""`
	[ -z "${SAPADM}" ] && exit 1

	CI_SERVICES=`get_sap_config_param "$_INST_NAME" "PRIV_" "CI_SERVICES_STRING" "NON_NULL" "DVEBMGS" "" ""`
	[ -z "${CI_SERVICES}" ] && exit 1

	COMMAND_TIMEOUT=`get_sap_config_param "$_INST_NAME" "PRIV_" "COMMAND_TIMEOUT" "NUMBER" "60" "5" ""`
	[ -z "${COMMAND_TIMEOUT}" ] && exit 1

	PROBE_PROG=`get_sap_config_param "$_INST_NAME" "" "PROBE_1_PROG" "NON_NULL" "/opt/SUNWcluster/ha/sap/hasap_probe" "" ""`
	[ -z "${PROBE_PROG}" ] && exit 1

	#
	# Set path for SAP executable utilities
	#
	SAPEXE=/usr/sap/${SAPSID}/SYS/exe/run

	PATH0=`dirname $0`


	case $action in

	'start')
		#
		# this section of code is not used in HA-SAP
		#
		exit 0
		;;

	'start_net')
		# The code path via generic_svc already checks if the CI is 
		# mastered on this node. If the CI is not mastered on this node,
		# then we will not reach this section.  Thus, from here on, we assume
		# that the CI is mastered on this node.				

		NeedToStart=

		#
		# Test for sapmon code
		#
		if [ ! -x ${PATH0}/sapmon ]; then
			logerr "$prefix.4007" `gettext "Cannot execute ${PATH0}/sapmon. Exiting."`
			exit 1
		fi

		#
		# Check if SAP processes are running
		#
		hatimerun -t ${COMMAND_TIMEOUT} ${PATH0}/sapmon $_INST_NAME proc > /dev/null 2>&1
		probe_status=$?

		if [ $probe_status -eq 0 ]; then
			#
			# Check if message server is running
			#
			sapmon $_INST_NAME ms > /dev/null 2>&1
			probe_status=$?

			if [ $probe_status -eq 0 ]; then
				#
				# If sapmon proc and ms pass, then
				# it is likely that the user has
				# started SAP outside of the clustering
				# software.  In this case, log an error
				# message because SAP will have trouble
				# shutting down or switching over in this
				# state.
				#
				logerr "$prefix.4073" `gettext "SAP instance ${_INST_NAME} appears to be running outside of the control of the clustering software. You must turn off the SAP data service, manually turn off SAP, and then start SAP by turning on the SAP data service. Failure to do so may prevent proper shutdown and switchover. The start code is exiting."`
				exit 1
			else 
			    #
			    # Message server is not running, 
			    # so we need to start SAP
			    #
			    NeedToStart=y		
			fi
		else
   			NeedToStart=y
  		fi

		# ###############################################################
		# 
		# Restart CI and AS here, if needed.
		#
		# ###############################################################

		if [ "$NeedToStart" = "y" ]; then

			#
			# Call hasap_restartR3 script to bring up SAP
			#
			if [ -x "${PATH0}/hasap_restartR3" ]; then
				
				pmfadm -c $_INST_NAME -C 6 /bin/sh -c "${PATH0}/hasap_restartR3 ${_INST_NAME} >/dev/null 2>&1"
				if [ $? -ne 0 ]; then
					logerr "$prefix.4008" `gettext "Failed to start SAP instance ${_INST_NAME}"`
					exit 1
				fi
			else
				logerr "$prefix.4009" `gettext "Cannot execute ${PATH0}/hasap_restartR3. Exiting."`
				exit 1
			fi
		fi

		# The main work of starting SAP is done in the hasap_restartR3 script
		# which is called above
		;;

	'stop_net' | 'abort_net')

		# ###############################################################
		#
		# When this point is reached, there is a stopping job to do.
		# Do:
		#
		# 1. stop all app server/test/develop instances
		# 2. stopsap r3
		# 3. stop sap collector
		#
		# ###############################################################

		SAPOSCOL=/usr/sap/${SAPSID}/SYS/exe/run/saposcol
		stop_all_pid=0

		#
		# remove the instance from pmfd's queue
		#
		pmfadm -s ${_INST_NAME} >/dev/null 2>&1
		if [ $? -ne 0 ]; then
			logerr "$prefix.4002" `gettext "Failed to stop monitoring SAP instance ${_INST_NAME}."`
		fi

		#
		# We will stop all instances before we start the CI in start_net. 
		# However, if we can stop all instances earlier it will cause 
		# fewer problems later. stop_all_instances in start_net
		# is just a safety net for those cases where there is a system crash.
		#
		if [ -x ${PATH0}/hasap_stop_all_instances ]; then
			lognotice "$prefix.2016" `gettext "Executing hasap_stop_all_instances in background."`
			${PATH0}/hasap_stop_all_instances "${_INST_NAME}" DURING_CI_STOP `expr $METHOD_TIMEOUT - 5` &
			stop_all_pid=$!
			lognotice "$prefix.2071" `gettext "Will wait for the stop_all_instances script (pid ${stop_all_pid}) to finish before exiting."`
		fi

		lognotice "$prefix.2006" `gettext "Stopping the SAP Central Instance."`
		lognotice "$prefix.2041" `gettext "Executing stopsap r3 as user \"${SAPADM}\""`

		su - ${SAPADM} -c 'stopsap r3' >/dev/console 2>&1
		su - ${SAPADM} -c "${SAPOSCOL} -k" >/dev/null 2>&1
	    
		#
		# If the stopsap fails, cleaning the ipc's will
		# prevent the SAP processes from doing more work.
		#
		if [ -x ${SAPEXE}/cleanipc ]; then

			numipc=`${SAPEXE}/cleanipc ${CI_INSTANCE_ID} | grep 'Number of IPC-Objects' | awk -F: '{print \$2}'`
			
			if [ -z "$numipc" -o "$numipc" -gt 0 ]; then 

				lognotice "$prefix.2076" `gettext "Found $numipc leftover IPC objects for SAP instance, removing via cleanipc."`
				${SAPEXE}/cleanipc ${CI_INSTANCE_ID} remove >/dev/console 2>&1
			fi

		else
			logwarning "$prefix.3001" `gettext "Cannot execute ${SAPEXE}/cleanipc"`
		fi

		lognotice "$prefix.2042" `gettext "The SAP Central Instance has been stopped."`

		if [ ${stop_all_pid} -ne 0 ]; then
			ps -p  ${stop_all_pid} >/dev/null 2>&1
			if [ $? -eq 0 ]; then
				lognotice "$prefix.2070" `gettext "Waiting for the stop_all_instances script to finish (pid ${stop_all_pid})."`
				wait
				lognotice "$prefix.2072" `gettext "Done waiting for the stop_all_instances script to finish."`
			fi
		fi
			
		;;

	'stop' | 'abort')

		ha_svc_not_running ${_INST_NAME}
		if [ $? -ne 0 ]; then
			lognotice "$prefix.2008" `gettext "Still monitoring some SAP processes. Will stop these processes."`

			# The most likely reason to get here is because we are turning off
			# the data service while it is still trying to start. Because 
			# hasap_restartR3 is run in the background, we won't have another
			# opportunity to stop the script before it starts SAP.

			#
			# use pmf to stop SAP processes
			#
			pmfadm -s ${_INST_NAME} -w ${STOP_TIMEOUT} TERM || \
				pmfadm -s ${_INST_NAME} -w ${STOP_TIMEOUT} KILL

			ha_svc_not_running ${_INST_NAME}
			if [ $? -ne 0 ]; then
				logerr "$prefix.4003" `gettext "Failed to stop SAP instance ${_INST_NAME}"`
				exit 1
			else
				lognotice "$prefix.2009" `gettext "Stopped SAP instance ${_INST_NAME}"`
			fi
		else 
			lognotice "$prefix.2007" `gettext "No SAP processes for instance ${_INST_NAME} were found. Exiting with no action."`
		fi
		;;

	'fm_start')
		ci_physhost=`haget -f master -h "$CI"`
		if [ $? -ne 0 ]; then
		    logerr "$prefix.4004" `gettext "Cannot obtain name of master for ${CI}"`
		    exit 1
		fi

		if [ -z "$ci_physhost" ]; then
		    logerr "$prefix.4059" `gettext "Cannot obtain name of master for ${CI}"`
		    exit 1
		fi

		THIS_PHYS_HOST=`uname -n`

		#
		# If this SAP instance's diskset is in maint mode, exit now.
		#
		MAINT=`haget -f is_maint -h ${CI}`
		if [ "$MAINT" = "1" ]; then
			lognotice "$prefix.2012" `gettext "The SAP Central Instance's logical host (${CI}) is in maintenance mode so the probe will not be started."`
			exit 0
		fi

		#
		# Check if the CI logical host is mastered on this node
		#
		if [ "$ci_physhost" != "$THIS_PHYS_HOST" ]; then
			#
			# This host does not master CI, so we won't start probe
			#
			exit 0
		else
			lognotice "$prefix.2011" `gettext "SAP Central Instance is mastered on this host so the probe will be started."`
		fi

		ha_svc_not_running ${_INST_NAME}.probe
		if [ $? -eq 0 ]; then

			# pmf starts "hasap_probe InstanceName"
			# hasap_probe runs until fm_stop kills it.

			pmfadm -c ${_INST_NAME}.probe -C 1 -n ${NUM_PROBE_RETRIES} -t ${PROBE_RETRY_PERIOD} /bin/sh -c "${PROBE_PROG} ${_INST_NAME} >/dev/null 2>&1"
			if [ $? -ne 0 ]; then
				logerr "$prefix.4005" `gettext "Failed to start SAP probe for instance ${_INST_NAME}"`
				exit 1
			else
				lognotice "$prefix.2013" `gettext "Started SAP probe for instance ${_INST_NAME}"`
			fi
		else
			lognotice "$prefix.2068" `gettext "SAP probe is already running for instance ${_INST_NAME}"`
		fi
		
		;;

	'fm_stop')
		#
		# If probe not running, do nothing
		#
		ha_svc_not_running ${_INST_NAME}.probe && exit 0

		#
		# stop hasap_probe
		#
		pmfadm -s ${_INST_NAME}.probe -w ${STOP_TIMEOUT} TERM || \
			pmfadm -s ${_INST_NAME}.probe KILL

		if [ $? -ne 0 ]; then
			logerr "$prefix.4006" `gettext "Failed to stop SAP probe for instance ${_INST_NAME}"`
			exit 1
		else
			lognotice "$prefix.2014" `gettext "Stopped SAP probe for instance ${_INST_NAME}"`
		fi
		;;

	'fm_check_this_host_ok')
		lognotice "$prefix.2015" `gettext "This host is OK for SAP instance ${_INST_NAME}"`
		;;

	esac

	exit 0
}


#
# get_sap_config_param 
#
#  Gets the instance variables from hadsconf.
#  If the parameter is set incorrectly, then
#  it will try to use the default.  If there is
#  no default, then the parameter will be unset.  
#  If the value is OK, then it will return that value.
#
# Parameters:
#
# $1 is the instance name
# $2 is param prefix to hadsconfig name used for get_config_param
# $3 is param name used in hadsconfig
# $4 if NON_NULL -> value must be non null
#    if Y_OR_N -> value must be non null and either y or n
#    if NUMBER -> value must be non null and a number ( >= 0)
# $5 is the default to use if there is an incorrect or omitted
#    parameter in hadsconfig. If this argument is null and 
#    there is an invalid param, then do not return a value.
# $6 is the minimum numeric value (if $4 == NUMBER)
# $7 is the maximum numeric value (if $4 == NUMBER)
#

get_sap_config_param()
{

	HASAP_INSTANCE_NAME=$1
	PARAM_PREFIX=$2
	HADSCONFIG_NAME=$3
	VAR_TYPE=$4
	DEFAULT=$5
	MIN=$6
	MAX=$7

	#
	# Get the value that the user has set in hadsconfig
	#
	USER_SET_VALUE=`get_config_param ${HASAP_INSTANCE_NAME} "${PARAM_PREFIX}${HADSCONFIG_NAME}"`

	if [  "${VAR_TYPE}" = "NON_NULL" ]; then

		if [ -n "${USER_SET_VALUE}" ]; then
			#
			# The parameter is OK
			#
			echo "${USER_SET_VALUE}"
			return 0
		fi

		logerr "$prefix.4078" `gettext "${HADSCONFIG_NAME} was not set for instance ${HASAP_INSTANCE_NAME}."`

		if [ -z "${DEFAULT}" ]; then
			logerr "$prefix.4079" `gettext "No default value is available for parameter ${HADSCONFIG_NAME}. Run hadsconfig to set the value of this parameter. Exiting."`
			return 1
		fi

		lognotice "$prefix.2078" `gettext "${HADSCONFIG_NAME} is being temporarily set to the default value of \"${DEFAULT}\".  Run hadsconfig to set the value of this parameter."`

		echo ${DEFAULT}
		return 0

	elif [ "${VAR_TYPE}" = "Y_OR_N" ]; then

		if [ "${USER_SET_VALUE}" = "y" ] ||
		   [ "${USER_SET_VALUE}" = "Y" ] ||
		   [ "${USER_SET_VALUE}" = "yes" ] || 
		   [ "${USER_SET_VALUE}" = "YES" ] || 
		   [ "${USER_SET_VALUE}" = "Yes" ] ; then
			echo "y"
			return 0
		fi

		if [ "${USER_SET_VALUE}" = "n" ] ||
		   [ "${USER_SET_VALUE}" = "N" ] ||
		   [ "${USER_SET_VALUE}" = "no" ] || 
		   [ "${USER_SET_VALUE}" = "NO" ] || 
		   [ "${USER_SET_VALUE}" = "No" ] ; then
			echo "n"
			return 0
		fi

		logerr "$prefix.4080" `gettext "The value for parameter ${HADSCONFIG_NAME} is invalid for instance ${HASAP_INSTANCE_NAME}. The value must be \"y\" or \"n\"."`

		if [ -z "${DEFAULT}" ]; then
			logerr "$prefix.4079" `gettext "No default value is available for parameter ${HADSCONFIG_NAME}. Run hadsconfig to set the value of this parameter. Exiting."`
			return 1
		fi

		lognotice "$prefix.2078" `gettext "${HADSCONFIG_NAME} is being temporarily set to the default value of \"${DEFAULT}\".  Run hadsconfig to set the value of this parameter."`

		echo ${DEFAULT}
		return 0


	elif [ "${VAR_TYPE}" = "NUMBER" ]; then

		is_numeric "${USER_SET_VALUE}"
		if [ $? -eq 0 ]; then
			
			#
			# The value is a number.
			# Check if the number is within the appropriate bounds.
			# 

			if [ -n "${MIN}" ]; then

				if [ "${USER_SET_VALUE}" -lt "${MIN}" ]; then

					logerr "$prefix.4080" `gettext "${HADSCONFIG_NAME} must be greater than or equal to ${MIN} for instance ${HASAP_INSTANCE_NAME}."`

					if [ -z "${DEFAULT}" ]; then
						logerr "$prefix.4079" `gettext "No default value is available for parameter ${HADSCONFIG_NAME}. Run hadsconfig to set the value of this parameter. Exiting."`
						return 1
					fi

					lognotice "$prefix.2078" `gettext "${HADSCONFIG_NAME} is being temporarily set to the default value of \"${DEFAULT}\".  Run hadsconfig to set the value of this parameter."`

					echo ${DEFAULT}
					return 0
				fi
			fi

			if [ -n "${MAX}" ]; then

				if [ "${USER_SET_VALUE}" -gt "${MAX}" ]; then

					logerr "$prefix.4081" `gettext "${HADSCONFIG_NAME} must be less than or equal to ${MAX} for instance ${HASAP_INSTANCE_NAME}."`

					if [ -z "${DEFAULT}" ]; then
						logerr "$prefix.4079" `gettext "No default value is available for parameter ${HADSCONFIG_NAME}. Run hadsconfig to set the value of this parameter. Exiting."`
						return 1
					fi

					lognotice "$prefix.2078" `gettext "${HADSCONFIG_NAME} is being temporarily set to the default value of \"${DEFAULT}\".  Run hadsconfig to set the value of this parameter."`

					echo ${DEFAULT}
					return 0
				fi
			fi

			#
			# The parameter is OK
			# 
			echo "${USER_SET_VALUE}"
			return 0
		fi

		#
		# The user set value is not a number.
		#
		logerr "$prefix.4082" `gettext "${HADSCONFIG_NAME} is not a valid number for instance ${HASAP_INSTANCE_NAME}."`

		if [ -z "${DEFAULT}" ]; then
			logerr "$prefix.4079" `gettext "No default value is available for parameter ${HADSCONFIG_NAME}. Run hadsconfig to set the value of this parameter. Exiting."`
			return 1
		fi

		lognotice "$prefix.2078" `gettext "${HADSCONFIG_NAME} is being temporarily set to the default value of \"${DEFAULT}\".  Run hadsconfig to set the value of this parameter."`

		echo ${DEFAULT}
		return 0

	else
	    
		#
		# Should never get here.
		#
		return 1
	
	fi

	#
	# Should never get here.
	#
	return 1
}




#include_boiler

# The common data service code ends here
#######################################################################


#######################################################################
# The hasap_restartR3 code starts here

prefix="${SYSLOG_PREFIX}.restartR3"

#
# Check proper usage
#
if [ $# -lt 1 ]; then
	usage
	exit 2
fi

#
# Argument 1 is the name of the Instance
#
_INST_NAME=$1

if [ -z "$_INST_NAME" ]; then
	usage
	logerr "$prefix.4010" `gettext "Usage: The instance name must be non-null"`
	exit 2
fi

#
# Check that instance name is valid
#
is_member "$_INST_NAME" "$_INST_LIST"
if [ $? -ne 0 ]; then
	usage
	logerr "$prefix.4062" `gettext "Usage: The instance name \"${_INST_NAME}\" is not valid. The following are valid instances: ${_INST_LIST}"`
	exit 2
fi

#
# Used for error message logging
#
set_inst_name ${_INST_NAME}

#
# Set instance variables
#
SAPSID=`get_sap_config_param "$_INST_NAME" "PRIV_" "YOUR_SAP_SID" "NON_NULL" "" "" ""`
[ -z "${SAPSID}" ] && exit 1

CI_INSTANCE_ID=`get_sap_config_param "$_INST_NAME" "PRIV_" "CI_INSTANCE_ID" "NON_NULL" "" "" ""`
[ -z "${CI_INSTANCE_ID}" ] && exit 1

CI=`get_sap_config_param "$_INST_NAME" "" "LOGICAL_HOST" "NON_NULL" "" "" ""`
[ -z "${CI}" ] && exit 1

SAPADM=`get_sap_config_param "$_INST_NAME" "PRIV_" "SAP_ADMIN_LOGIN_NAME" "NON_NULL" "" "" ""`
[ -z "${SAPADM}" ] && exit 1

CI_SERVICES=`get_sap_config_param "$_INST_NAME" "PRIV_" "CI_SERVICES_STRING" "NON_NULL" "DVEBMGS" "" ""`
[ -z "${CI_SERVICES}" ] && exit 1

CI_STARTSAP_RETRY_CNT=`get_sap_config_param "$_INST_NAME" "PRIV_" "CI_STARTSAP_RETRY_CNT" "NUMBER" "10" "1" ""`
[ -z "${CI_STARTSAP_RETRY_CNT}" ] && exit 1

CI_STARTSAP_RETRY_INTERVAL=`get_sap_config_param "$_INST_NAME" "PRIV_" "CI_STARTSAP_RETRY_INTERVAL" "NUMBER" "30" "" ""`
[ -z "${CI_STARTSAP_RETRY_INTERVAL}" ] && exit 1

COMMAND_TIMEOUT=`get_sap_config_param "$_INST_NAME" "PRIV_" "COMMAND_TIMEOUT" "NUMBER" "60" "5" ""`
[ -z "${COMMAND_TIMEOUT}" ] && exit 1

STOP_ALL_RUNTIME=`get_sap_config_param "$_INST_NAME" "PRIV_" "TIME_ALLOWED_TO_STOP_ALL_INSTANCES_BEFORE_CI_START" "NUMBER" "60" "" ""`
[ -z "${STOP_ALL_RUNTIME}" ] && exit 1

ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR=`get_sap_config_param "$_INST_NAME" "PRIV_" "ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR" "Y_OR_N" "n" "" ""`
[ -z "${ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR}" ] && exit 1


#
# Set path for SAP executable utilities
#
SAPEXE=/usr/sap/${SAPSID}/SYS/exe/run

PATH0=`dirname $0`


#
# Test for sapmon code
#
if [ ! -x ${PATH0}/sapmon ]; then
	logerr "$prefix.4011" `gettext "Cannot execute ${PATH0}/sapmon. Exiting."`
	exit 1
fi


#
# Shutdown the application servers to make sure
# they have been stopped.  When the CI server crashes, 
# there is no chance to stop app server or 
# test/development instances during the stop_net method.
# Thus, the hasap_stop_all_instances scripts are called
# again before the start of the SAP CI.
#
if [ -x ${PATH0}/hasap_stop_all_instances ]; then

	if [ "${STOP_ALL_RUNTIME}" -gt 0 ]; then
		if [ "${ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR}" != "y" ]; then
			lognotice "$prefix.2048" `gettext "Executing hasap_stop_all_instances for ${STOP_ALL_RUNTIME} seconds."`

			#
			# The user has specified that if the stop_all script does
			# not complete within ${STOP_ALL_RUNTIME} seconds or returns an 
			# error code, that SAP should not be started on this node.  
			# If this occurs, then we exit the restartR3 code.  The fault monitor 
			# will handle this case based on the other parameters (ALLOW_CI_FAILOVER 
			# flag and NUM_RESTARTS_ON_LOCAL_NODE).
			#
			hatimerun -t ${STOP_ALL_RUNTIME} ${PATH0}/hasap_stop_all_instances "${_INST_NAME}" BEFORE_CI_START ${STOP_ALL_RUNTIME}
			stop_all_rc=$?

			if [ "${stop_all_rc}" -eq 99 ]; then 
				logerr "$prefix.4035" `gettext "hasap_stop_all_instances did not complete within ${STOP_ALL_RUNTIME} seconds."`
				logerr "$prefix.4036" `gettext "The ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR parameter is not set to \"y\" so SAP will not be started. Exiting."`

				# remove this instance from pmfd's queue
				pmfadm -s ${_INST_NAME} >/dev/null 2>&1
				exit 1
			elif [ "${stop_all_rc}" -eq 98 ]; then 
				logerr "$prefix.4037" `gettext "hatimerun detected an error while running hasap_stop_all_instances."`
				logerr "$prefix.4036" `gettext "The ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR parameter is not set to \"y\" so SAP will not be started. Exiting."`

				# remove this instance from pmfd's queue
				pmfadm -s ${_INST_NAME} >/dev/null 2>&1
				exit 1
			elif [ "${stop_all_rc}" -ne 0 ]; then 

				logerr "$prefix.4083" `gettext "hasap_stop_all_instances returned ${stop_all_rc}, which indicates that it could not stop all of the instances."`
				logerr "$prefix.4036" `gettext "The ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR parameter is not set to \"y\" so SAP will not be started. Exiting."`

				# remove this instance from pmfd's queue
				pmfadm -s ${_INST_NAME} >/dev/null 2>&1
				exit 1
			else 
				lognotice "$prefix.2049" `gettext "hasap_stop_all_instances completed within ${STOP_ALL_RUNTIME} seconds."`
				lognotice "$prefix.2050" `gettext "hasap_stop_all_instances exited with code ${stop_all_rc}."`
			fi
		else 
			lognotice "$prefix.2051" `gettext "Executing hasap_stop_all_instances for ${STOP_ALL_RUNTIME} seconds in the foreground."`

			hatimerun -a -t ${STOP_ALL_RUNTIME} ${PATH0}/hasap_stop_all_instances "${_INST_NAME}" BEFORE_CI_START ${STOP_ALL_RUNTIME}
			stop_all_rc=$?

			if [ "${stop_all_rc}" -eq 99 ]; then 
				lognotice "$prefix.2052" `gettext "hasap_stop_all_instances did not complete within ${STOP_ALL_RUNTIME} seconds."`
				lognotice "$prefix.2053" `gettext "hasap_stop_all_instances will continue running in the background."`
				lognotice "$prefix.2079" `gettext "SAP will be started because the ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR parameter is set to \"${ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR}\"."`
			elif [ "${stop_all_rc}" -eq 98 ]; then 
				logerr "$prefix.4039" `gettext "hatimerun detected an error while running hasap_stop_all_instances."`
				lognotice "$prefix.2079" `gettext "SAP will be started because the ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR parameter is set to \"${ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR}\"."`
			elif [ "${stop_all_rc}" -ne 0 ]; then 
				logerr "$prefix.4083" `gettext "hasap_stop_all_instances returned ${stop_all_rc}, which indicates that it could not stop all of the instances."`
				lognotice "$prefix.2079" `gettext "SAP will be started because the ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR parameter is set to \"${ALLOW_CI_START_IF_FOREGROUNDED_STOP_ALL_INSTANCES_RETURNS_ERROR}\"."`
			else 
				lognotice "$prefix.2054" `gettext "hasap_stop_all_instances completed within ${STOP_ALL_RUNTIME} seconds."`
				lognotice "$prefix.2055" `gettext "hasap_stop_all_instances exited with code ${stop_all_rc}."`
			fi
		fi
		
	else
		lognotice "$prefix.2016" `gettext "Executing hasap_stop_all_instances in the background"`
		${PATH0}/hasap_stop_all_instances "${_INST_NAME}" BEFORE_CI_START ${STOP_ALL_RUNTIME} &
	fi
fi


r3Started=
cur_retry=1

while [ ${cur_retry} -le ${CI_STARTSAP_RETRY_CNT} ]; do

	lognotice "$prefix.2017" `gettext "Starting SAP R3. (retry ${cur_retry} of ${CI_STARTSAP_RETRY_CNT})"`

	#
	# Test if Database is available by calling "sapmon $_INST_NAME db"
	#
	hatimerun -t ${COMMAND_TIMEOUT} ${PATH0}/sapmon $_INST_NAME db > /dev/null 2>&1
	probe_status=$?

	if [ ${probe_status} -eq 0 ]; then

		lognotice "$prefix.2018" `gettext "Database is running. Proceeding to startsap r3."`

		#
		# Database is running, so we move on to "startsap r3"
		#

	else

		lognotice "$prefix.2019" `gettext "Database not yet available. Retry in ${CI_STARTSAP_RETRY_INTERVAL} seconds."`

		cur_retry=`expr $cur_retry + 1` 
		sleep ${CI_STARTSAP_RETRY_INTERVAL} 
		continue
	fi

	#
	# Check if there are any remnants of old SAP ipc objects
	# that may hinder the startup of SAP 
	#
	if [ -x ${SAPEXE}/cleanipc ]; then

		numipc=`${SAPEXE}/cleanipc ${CI_INSTANCE_ID} | grep 'Number of IPC-Objects' | awk -F: '{print \$2}'`
			
		if [ -z "$numipc" -o "$numipc" -gt 0 ]; then

			lognotice "$prefix.2020" `gettext "Found $numipc leftover IPC objects for SAP instance, removing via cleanipc."`
			${SAPEXE}/cleanipc ${CI_INSTANCE_ID} remove >/dev/console 2>&1
		fi

	else
		logwarning "$prefix.3000" `gettext "Cannot execute ${SAPEXE}/cleanipc"`
	fi

	
	#
	# Check if there is a kill.sap file.  If so, eval it.
	# 
	SAPKILLFILE=/usr/sap/${SAPSID}/${CI_SERVICES}${CI_INSTANCE_ID}/work/kill.sap

	if [ -f ${SAPKILLFILE} ]; then

		kill_contents=`cat ${SAPKILLFILE}`
		lognotice "$prefix.2021" `gettext "Found leftover kill.sap file. Will execute it as user \"${SAPADM}\". The kill.sap file contains: \"${kill_contents}\""` 

		su - ${SAPADM} -c "eval ${SAPKILLFILE}" >/dev/console 2>&1
		if [ $? -eq 0 ]; then
			# 
			# Give SAP a chance to clean up its resources after
			# running the kill.sap file. We will loop and check
			# if the kill.sap file has been deleted.  
			#
			kill_loop_cnt=0
			kill_sleep_time=2
			kill_loop_max=5
			while [ $kill_loop_cnt -lt $kill_loop_max ]; do

				#
				# Break out of the loop if the kill file is gone
				#
				if [ ! -f ${SAPKILLFILE} ]; then
					break
				else
					su - ${SAPADM} -c "eval ${SAPKILLFILE}" >/dev/console 2>&1	
				fi

				kill_loop_cnt=`expr $kill_loop_cnt + 1`
				sleep $kill_sleep_time
			done
		fi
	fi
				

	lognotice "$prefix.2022" `gettext "Executing startsap r3 as user \"${SAPADM}\":"`

	su - ${SAPADM} -c 'startsap r3' >/dev/console 2>&1
	
	#
	# Check if SAP processes started
	#
	hatimerun -t ${COMMAND_TIMEOUT} ${PATH0}/sapmon $_INST_NAME proc > /dev/null 2>&1
	probe_status=$?

	if [ $probe_status -ne 0 ]; then
		lognotice "$prefix.2024" `gettext "Some SAP R3 process failed to start. Retry in ${CI_STARTSAP_RETRY_INTERVAL} seconds. (proc probe_status=${probe_status})"`
		cur_retry=`expr $cur_retry + 1` 
		sleep ${CI_STARTSAP_RETRY_INTERVAL} 
		continue
	fi

	#
	# Check that the Database is available. If it is not, then we
	# will try to start SAP again in the next execution of the loop
	# because the dialog work processes will exit if they cannot 
	# initially connect to the Database.
	#
	hatimerun -t ${COMMAND_TIMEOUT} ${PATH0}/sapmon $_INST_NAME db > /dev/null 2>&1
	probe_status=$?

	if [ $probe_status -ne 0 ]; then
		lognotice "$prefix.2066" `gettext "The database is unavailable so SAP will not start properly. Retry in ${CI_STARTSAP_RETRY_INTERVAL} seconds. (db probe_status=${probe_status})"`
		cur_retry=`expr $cur_retry + 1` 
		sleep ${CI_STARTSAP_RETRY_INTERVAL} 
		continue
	fi

	#
	# SAP has been started successfully if we passed the
	# above probes. We can exit the retry loop now.
	#
	lognotice "$prefix.2023" `gettext "SAP R3 started."`
	r3Started=y
	break

done


if [ "$r3Started" != "y" ]; then

	logerr "$prefix.4012" `gettext "SAP R3 failed to start after ${CI_STARTSAP_RETRY_CNT} tries. Exiting."`
	pmfadm -s "${_INST_NAME}" >/dev/null 2>&1
	exit 1
fi

	
#
# When R3 starts, it is time to start all instances.
#
if [ -x ${PATH0}/hasap_start_all_instances ]; then

	lognotice "$prefix.2025" `gettext "Executing hasap_start_all_instances"`
	${PATH0}/hasap_start_all_instances "${_INST_NAME}" AFTER_CI_START 0
	start_all_rc=$?
	lognotice "$prefix.2056" `gettext "Finished executing hasap_start_all_instances, which exited with status ${start_all_rc}."`
fi

exit 0

# The hasap_restartR3 code ends here
#######################################################################
