#! /bin/ksh
#
#	Copyright 11/10/00 Sun Microsystems, Inc.  All Rights Reserved.
#
# @(#)hasybase_fmon_start.shs	1.34	00/11/10 SMI
#
# hasybase_fmon_start - start an Sybase fault monitor (hasybase_fmon) for the
#			server #1
#


HA_CLUSTER=""
#	Copyright 10/16/98 Sun Microsystems, Inc.  All Rights Reserved.
#
# @(#)sybase_boiler	1.28 98/10/16 SMI

# begin of common ha-dbms boilerplate

# remember our name
argv0=`basename $0`

# Get the BASEDIR and PRODUCTDIR settings from the installed pkgs
_basedir=`pkgparam SUNWscsyb BASEDIR 2>/dev/null`
_productdir=`pkgparam SUNWscsyb PRODUCTDIR 2>/dev/null`
_basedir=${_basedir:=""}
_productdir=${_productdir:="SUNWcluster"}

LOGGER=/usr/bin/logger

PATH=${_basedir}/opt/${_productdir}/bin:/opt/SUNWcluster/bin:${_basedir}/opt/${_productdir}/ha/sybase:${PATH}
export PATH
HA_FILES=/etc/opt/SUNWscsyb; export HA_FILES
HA_VAR=/var/opt/SUNWscsyb; export HA_VAR

# include HA utilities library
. dbms_utilities
if [ $? -ne 0 ]; then
	$LOGGER -p local7.err -t "$HA_SLOGTAG" "$argv0: Cannot find HA utilities library"
	exit 1
fi

HA_CLUSTER=$CLUSTNAME; export HA_CLUSTER

# some default files and locations
# don't move this up - it depends on HA_FILES, which gets set above
HA_DATABASES=hasybase_databases
HA_DB_SUPPORT=${HA_FILES}/hasybase_support
ds="sybase"

PREFIX="SUNWcluster.ha.${ds}"
HA_FM_NAME="fault_mon"

# verify that we have the hasybase_support file
if [ ! -r $HA_DB_SUPPORT ] ; then
  logerr "4000" \
	"file ${HA_DB_SUPPORT} does not exist or is not readable!"
  exit 2
fi


# talk_start ... - echo the args, run them, and evaluate return value
talk_start() {
  	logprint "$$: $*"
  	if $* ; then
    		return 0
  	else
    		logprint "$$: $* ... failed"
    		return 1
  	fi
}


# talk_start_bg ... - print the args, then run them in the background
talk_start_bg() {
  	logprint "$$: $*"
  	$* &
  	}


# get_pid [ -u userid ] pattern - find the process id of a running program
# if called with "-u userid", only process for that user will be considered
get_pid() {
	if [ "$1" = "-u" ] ; then
		ps_args="-f -u $2"
		shift
		shift
	else
		ps_args="-ef"
	fi
	PID=`/usr/bin/ps $ps_args | grep "\<$*\>" | nawk -v pat="$*" '
 BEGIN { 
   search_pat = substr(pat, 0, 79)
   }
 { if ( match($0, search_pat) )
     if ( ! match($0, "nawk -v pat=") )
    	print $2
  }
'`
}

	
# kill_proc pattern - find and terminate a process
kill_proc() {
  	logprint "Killing $*..." 
  	get_pid $*
  	if [ "$PID" ] ; then
    		for p in ${PID} ; do
      			logprint "Killing process id $p"
      			kill -KILL $p
    		done
  	else
    		logprint "process \"$*\" could not be located..." 
  	fi
}

#get_server_pid <server name>  - check that the SQL dataserver is running or 
#                      not on the system
get_server_pid () {

servername="RUN_$*"
SPID=`/usr/bin/ps -fe | grep "\<$servername\>$" | 
			grep -v "grep" | nawk -v pat="$servername" '
 BEGIN { 
   search_pat = substr(pat,0,79)
   }
 { if ( match($0, search_pat) )
     if ( ! match($0, "nawk -v pat=") )
       		print $2
  }
'`
}

# get_pgid pattern - get the process group id of a running server
get_pgid() {

PGID=`/usr/bin/ps -o pgid -p $1 | grep -v "PGID" | nawk '{print $1}'`

}

# kill_server - kill server processes if it cannot be shutdown normally
kill_server() {

	get_server_pid $*
	if [ "${SPID}" != "" ]; then

		for gpid in $SPID ; do
			get_pgid $gpid
				if [ "${PGID}" != "" ]; then
					kill -KILL -${PGID}
 
				fi
		done
	fi

}

 

# read_ha_databases server - find an entry in the HA_DATABASES file,
# 	parse it apart, and return in variables. Assumes that sybase_home
#	is set!
read_ha_databases() {
        set_inst_name $1
	conf_line=$(get_instance "${ds}" "$1")
	if [ "$conf_line" = "" ] ; then
		logerr "4010" "$1 missing from HA SYBASE CCD!"
		return 1
	fi

        on_off_mode=`echo "$conf_line" | cut -s -f 1`
# The following 'server_name' is and extra line for sybase
	server_name=`echo "$conf_line" | cut -s -f 2`
	logical_host=`echo "$conf_line" | cut -s -f 3`
	poll_cycle=`echo "$conf_line" | cut -s -f 4`
	connect_cycle=`echo "$conf_line" | cut -s -f 5`
	timeout=`echo "$conf_line" | cut -s -f 6`
	restart_delay=`echo "$conf_line" | cut -s -f 7`
	db_login=`echo "$conf_line" | cut -s -f 8`
	runserver_file=`echo "$conf_line" | cut -s -f 9`
	if [ "`echo $conf_line | wc -w`" -eq 11 ] ; then
		backup_server=`echo "$conf_line" | cut -s -f 10`
		runbackup_file=`echo "$conf_line" | cut -s -f 11`
	fi

        set_logical_host $logical_host
	return 0
}

# find_sybase server - finds and sets various Sybase related variables:
#	SYBASE ISQL RUN_FILE
# in case of a problem, it returns with an empty SYBASE variable
find_sybase() {
	typeset my_node
	typeset physical_hosts

  	SYBTAB=/var/opt/sybase/sybtab

  	SYBASE=""
	ISQL=""
        read_ha_databases $1
        if [ $? -ne 0 ]; then
                return 1
        fi
        my_node=`uname -n`
        physical_hosts=$(haget -f physical_hosts -h $logical_host | tr '\012' ' ')
        is_member "$my_node" "$physical_hosts"
        if [ $? -ne 0 ] ; then
                return 1
        fi

	if sybtab_line=`grep "^[	 ]*$1:" $SYBTAB` ; then
		sybase_home=`echo $sybtab_line | awk -F: '{print $2}' -`
		if [ -d $sybase_home ]; then
			if [ -x ${sybase_home}/bin/isql ] ; then
				SYBASE="$sybase_home"
				ISQL="${SYBASE}/bin/isql"
				RUN_FILE="$runserver_file"
			else
				logerr "4030" \
				"No isql executable in ${sybase_home}/bin!"
				return 1
			fi
		fi
	else
		logerr "4040" "Database '$1' not found in ${SYBTAB}"
		return 1	
	fi
    	export SYBASE ISQL RUN_FILE
	return 0

}


make_rpc_call() {
  	logprint "Calling $3 for server $2 on host $1..."
  	ha_dbms_call $1 $2 $3 
  	}

get_remote_host(){
	get_diskgroups $1 $2
	RSHHOST=""
	for X in `haget  -f private_links -h $HA_REMOTEHOST` ; do
	  net_pinghost $X > /dev/null 2>&1
	  if [ $? -eq 0 ] ; then
		RSHHOST=$X
		export RSHHOST
		return 0
	  fi
	done
	return 1
	}

#end of common ha-dbms boiler plate
#include_boiler

prog="${HA_FM_NAME}"

. $HA_VAR/ha_env

#
# getfmon_name <file> <version string>
# returns
#  0 -> version found
#  1 -> version not found
#  Sets variable
#        ha_executable -> executable name
#        action_file   -> action file name
#

get_fmon_name()
{
typeset support_file="$1"
typeset version="$2"

	ha_executable=""
	action_file=""
	/bin/grep -v "^#" ${support_file} | \
	while read support_line ; do
	    result="`echo "$support_line" | /usr/bin/cut -f 1`"
	    if [ "$result" != "" ] ; then
		if [[ $version = $result ]] ; then
			ha_executable=`echo "$support_line" | /usr/bin/cut -f 2`
			action_file=`echo "$support_line" | /usr/bin/cut -f 3`
			return 0
		fi
	    fi
	done

	return 1;

}

#
#
#  Exit after logging a message
#
exit_fmon()
{
# if we reach here, then a fault monitor did not get started
# the reason did already get logged in the various logerr calls above
logerr "${prog}.4060" "Monitors for Sybase database \"${DSQUERY}\" NOT started"
exit 1
}

# Retrieve error log file name from RUN_FILE
get_error_log_name() {

# check '-e' in the begin of the error log name in the RUN_ file 
count=`egrep -c '^-e/' $RUN_FILE`
count1=`egrep -c ' -e/' $RUN_FILE`
total_cnt=`expr $count + $count1`

# if more than one '-e' were found, return error           
if [ "$total_cnt" -ne 1 ]; then
	return 1
fi

# If the -e/<error_log_name> is placed in the beginning of a line in RUN_ file
# we assume that we got the error log file name
if [ "$count" -eq 1 ] ; then
        error_log=`egrep '^-e/' $RUN_FILE | nawk '{print $1}' | cut -c 3-`
fi
 
# If the -e/<error_log_name> is placed in the middle of a line in RUN_ file 
if [ "$count1" -eq 1 ]; then
        buf=`egrep ' -e/' $RUN_FILE`
        for name in $buf ; do
                tmp=`echo $name | cut -b0-2`
                if [ "$tmp" = "-e" ] ; then
                        error_log=`echo $name | cut -c 3-`
                else
                        if [ "$name" = "" ] ; then
				return 1
                        fi
                fi
        done
fi
 
if [ -f "$error_log" ] ; then
	return 0
else
	return 1
fi

} 
# ############# Main ########################## Main ##########################
if [ $# -ne 1 ] ; then
	logerr "${prog}.4000" "usage: $argv0 server"
	exit 2
fi

DSQUERY=$1
read_ha_databases $DSQUERY || exit_fmon
DB_LOGIN=$db_login
export DB_LOGIN
LOCALHOST=`uname -n`

#
# Basically we want fault monitors to skip the first cycle by 
# adding the poll_cycle value to the delay.
# By increasing the probe delay values, we are giving sufficient 
# time for large databases to startup
#
LOCAL_PROBE_DELAY=$(($poll_cycle + 15))
REMOTE_PROBE_DELAY=$(($poll_cycle + 45))

HA_FM_DBMSPROBE_DELAY=${HA_FM_DBMSPROBE_DELAY:-${LOCAL_PROBE_DELAY}}

if [ ${HA_FM_DBMSPROBE_DELAY} -ne 0 ]; then
   HA_FM_DBMSPROBE_DELAY=${LOCAL_PROBE_DELAY}
fi

find_logical_host=`haget -f all_logical_hosts | grep $logical_host`
if [ "$find_logical_host" = "" ] ; then
	logerr "${prog}.4001" \
		"logical host $logical_host is not in the cluster configuration"
	exit_fmon
fi 

remote_args=""

MASTERED_LOGICAL_HOSTS=$(haget -f mastered | tr '\012' ' ')

is_member $logical_host "$MASTERED_LOGICAL_HOSTS"
if [ $? -ne 0 ] ; then
    	remote_args="-r $logical_host"
	HA_FM_DBMSPROBE_DELAY=${REMOTE_PROBE_DELAY}
                
        # Don't start probe if diskset is in maintenance mode.
        # If this instance's logical host is in maint mode, exit now.
        MAINT=`haget -f is_maint -h ${logical_host}`
        if [ "$MAINT" = "1" ]; then
            logdeb "${prog}.2900" \
                  "Logical host in maintenance mode, don't run fault monitor"
            exit 0
        fi

        need_to_run_probe ${logical_host} ${LOCALHOST}
        rc=$?
        if [ $rc -ne 0 ]; then
               logdeb "${prog}.2901" \
                        "No need to run probe (condition $rc)"
           exit 0
        fi

fi

current_mode=$(get_instance_dynamic "${ds}" "$DSQUERY" | awk '{print $1}')
if [ "$current_mode" != "on" ] ; then
         logerr "${prog}.4062"\
                  "on/off mode for \"$DSQUERY\" is not on. Fault monitor not started"
	 exit 0
fi

# Set environment variable HA_REMOTEHOST for this instance before starting
# starting fault monitor.

my_node=`uname -n`
res=$(haget -f physical_hosts -h $logical_host | tr '\012' ' ')
HA_REMOTEHOST=""
for next in ${res}; do
        if [ "$next" != "$my_node" ]; then
                HA_REMOTEHOST="${HA_REMOTEHOST} ${next}"
        fi
done
export HA_REMOTEHOST
export HA_FM_DBMSPROBE_DELAY

binaries_on_logical=""


SYBTAB=/var/opt/sybase/sybtab
if sybtab_line=`grep "^[	 ]*$DSQUERY:" $SYBTAB` ; then
        sybase_home=`echo $sybtab_line | awk -F: '{print $2}' -`
else
   	logerr "4040" "Database '$DSQUERY' not found in ${SYBTAB}"
   	exit_fmon
fi

if [ "$remote_args" == "" ] ; then
        # Local fault monitor
        # Make sure that admin file system is mounted.
	#
        if [ -z "`disk_lh_mounted $logical_host`" ]; then
                logerr "${prog}.4064" \
                "Administrative file system for logical host $logical_host not mounted. Cannot start fault monitor"
                exit_fmon
        fi
fi

syb_version=$(get_dbms_version ${logical_host} ${DSQUERY} sybase)
rc=$?
if [ $rc -ne 0 ]; then
        syb_version=$(get_dbms_version -p ${logical_host} ${DSQUERY} sybase)
        rc=$?
fi

if [ $rc -ne 0  -o  "${syb_version}" = ""  ]; then
  	logerr "${prog}.4066" \
        	"Could not find sybase version for ${DSQUERY}, ($rc)"
  	exit_fmon
fi

get_fmon_name "${HA_DB_SUPPORT}" "${syb_version}"
if [ $? -ne 0 ]; then
        logerr "${prog}.4050" "Sybase version ${syb_version} not supported!"
        exit_fmon
fi

if [ "$ha_executable" = "" ] ; then
	logerr "${prog}.4068" \
	"instance ${DSQUERY}: executable file name missing in line '$support_line' in file $HA_DB_SUPPORT"
	exit_fmon
fi
if [ "$action_file" = "" ] ; then
	logerr "${prog}.4061" \
	"instance ${DSQUERY}: action file name missing in line '$support_line' in file $HA_DB_SUPPORT"
	exit_fmon
fi

if [ "$remote_args" != "" ] ; then

  	# remote server does not have access to error log
  	# This is remote fault monitor

  	if [ ! -f $sybase_home/bin/dataserver ]; then
    		# File does not exist, binaries are on logical
    		binaries_on_logical="$sybase_home"

    		# Set sybase home to /var/opt/sybase
    		export SYBASE="/var/opt/sybase"
  	else                
    		export SYBASE="$sybase_home"
  	fi		
        error_log="none"
	open_client_dir="$SYBASE/locales/us_english/iso_1"
	if [ ! -d $open_client_dir ] ; then
		mkdir -p $SYBASE/locales/us_english
	fi
	if [ ! -d $SYBASE/charsets ] ; then
		mkdir -p $SYBASE/charsets 
	fi
else
  # Local fault monitor      
  # Check SYBASE  environment

  find_sybase $1 || exit_fmon

  get_error_log_name 
				
  if [ $? -ne 0 ]; then				
    	logerr "${prog}.4070" "Could not locate errorlog file"
        error_log="none"
  fi
fi

if [ -z "$remote_args" ]; then
   fmon_args="-m $logical_host"
else
   fmon_args="$remote_args"
fi
	  		
talk_start ${ha_executable} $fmon_args $DSQUERY $poll_cycle $connect_cycle $timeout $restart_delay $HA_FILES/$action_file $error_log 

exit 0

