#! /bin/ksh
#
#	Copyright 03/03/00 Sun Microsystems, Inc.  All Rights Reserved.
#
# @(#)haoracle_fmon_start.sho	1.54	00/03/03	SMI
# haoracle_fmon_start - start an Oracle fault monitor (haoracle_fmon) for the
#			instance #1
#
# Environment:
# HA_ENV - file name of HA environment file
# HA_METASETSERVE - list of the logical hosts to run on the local host
# HA_SIBLING_METASETSERVE - list of the logical hosts not on local host
#

# make sure we reread HA_ENV (might have changed)
# setting HA_CLUSTER to "" forces a re-read in oracle_boiler
HA_CLUSTER=""
# Copyright 06/09/99 Sun Microsystems, Inc.  All Rights Reserved.
# @(#)oracle_boiler	1.35 99/06/09 SMI

# begin of common ha-dbms boilerplate

# remember our name
argv0=`basename $0`

# Get the BASEDIR and PRODUCTDIR settings from the installed pkgs
_basedir=`pkgparam SUNWscor BASEDIR 2>/dev/null`
_productdir=`pkgparam SUNWscor PRODUCTDIR 2>/dev/null`
_basedir=${_basedir:=""}
_productdir=${_productdir:="SUNWcluster"}

LOGGER=/usr/bin/logger
PATH=${_basedir}/opt/${_productdir}/bin:/opt/SUNWcluster/bin:${_basedir}/opt/${_productdir}/ha/oracle:${PATH}
export PATH
HA_FILES=/etc/opt/SUNWscor; export HA_FILES
HA_VAR=/var/opt/SUNWscor; export HA_VAR

# include HA utilities library
. dbms_utilities
if [ $? -ne 0 ]; then
	$LOGGER -p local7.err -t "$HA_SLOGTAG" "$argv0: Cannot find HA utilities library"
	exit 1
fi

HA_CLUSTER=$CLUSTNAME; export HA_CLUSTER

# some default files and locations
# don't move this up - it depends on HA_FILES, which gets set above
HA_DATABASES=haoracle_databases
HA_DB_SUPPORT=${HA_FILES}/haoracle_support; export HA_DB_SUPPORT
ds=oracle
listener_timeout=30

PREFIX="SUNWcluster.ha.${ds}"
HA_FM_NAME="fault_mon"

# verify that we have the haoracle_support file
if [ ! -r $HA_DB_SUPPORT ] ; then
  logerr "4000" \
	"file ${HA_DB_SUPPORT} does not exist or is not readable!"
  exit 2
fi


# talk_start ... - echo the args, run them, and evaluate return value
talk_start() {
  	logprint "$$: $*"
  	if $* ; then
    		return 0
  	else
    		logprint "$$: $* ... failed"
    		return 1
  	fi
}


# talk_start_bg ... - print the args, then run them in the background
talk_start_bg() {
  	logprint "$$: $*"
  	$* &
  	}


# get_pid [ -u userid ] pattern - find the process id of a running program
# if called with "-u userid", only process for that user will be considered
get_pid() {
	if [ "$1" = "-u" ] ; then
		ps_args="-f -u $2"
		shift
		shift
	else
		ps_args="-ef"
	fi
	PID=`/usr/bin/ps $ps_args | grep "\<$*\>" | nawk -v pat="$*" '
 BEGIN { 
   search_pat = substr(pat, 0, 79)
   }
 { if ( match($0, search_pat) )
     if ( ! match($0, "nawk -v pat=") )
       print $2
  }
'`
}


# kill_proc pattern - find and terminate a process
kill_proc() {
  	logprint "Killing $*..." 
  	get_pid $*
  	if [ "$PID" ] ; then
    		for p in ${PID} ; do
      			logprint "Killing process id $p"
      			kill $p
    		done
    		echo
  	else
    		logprint "process \"$*\" could not be located..." 
  	fi
}

# read_ha_databases instance - find an entry in the HA_DATABASES file,
# 	parse it apart, and return in variables. Assumes that oracle_home
#	is set!
read_ha_databases() {
	conf_line=$(get_instance "${ds}" "$1")
        set_inst_name $1

	if [ "$conf_line" = "" ] ; then
		logerr "4010" "$1 missing from HA ORACLE CCD!"
		return 1
	fi
        on_off_mode=`echo "$conf_line" | cut -s -f 1`
	logical_host=`echo "$conf_line" | cut -s -f 3`
	poll_cycle=`echo "$conf_line" | cut -s -f 4`
	connect_cycle=`echo "$conf_line" | cut -s -f 5`
	timeout=`echo "$conf_line" | cut -s -f 6`
	restart_delay=`echo "$conf_line" | cut -s -f 7`
	db_login=`echo "$conf_line" | cut -s -f 8`
	init_ora=`echo "$conf_line" | cut -s -f 9`
        listener_name=`echo "$conf_line" | cut -s -f 10`

        set_logical_host $logical_host
	return 0
}

# find_oracle instance - finds and sets various Oracle related variables:
#	ORACLE_HOME SQLDBA PFILE LD_LIBRARY_PATH
# in case of a problem, it returns with an empty ORACLE_HOME variable
find_oracle() {

	typeset my_node
	typeset physical_hosts

  	ORATAB=/var/opt/oracle/oratab

  	ORACLE_HOME=""
	SQLDBA=""
        read_ha_databases $1
        if [ $? -ne 0 ]; then
                return 1
        fi
        my_node=`uname -n`
        physical_hosts=$(haget -f physical_hosts -h $logical_host | tr '\012' ' ')
        is_member "$my_node" "$physical_hosts"
        if [ $? -ne 0 ] ; then
                return 1
        fi


	if oratab_line=`grep "^[	 ]*$1:" $ORATAB` ; then
		oracle_home=`echo $oratab_line | awk -F: '{print $2}' -`
		if [ -d $oracle_home ]; then
ora_version=$(get_dbms_version ${logical_host} ${ORACLE_SID} oracle)
rc=$?
if [ $rc -ne 0 ]; then
ora_version=$(get_dbms_version -p ${logical_host} ${ORACLE_SID} oracle)
rc=$?
fi
if [ $rc -ne 0  -o  "${ora_version}" = ""  ]; then
logerr "${prog}.4074" \
        "Could not find version for ${ORACLE_SID}, ($rc)"
	 exit 1
fi

			SQLDBA="${oracle_home}/bin/svrmgrl"

			if [ -x ${SQLDBA} ] ; then
				ORACLE_HOME="$oracle_home"
				PFILE="$init_ora"
			else
				logerr "4060" \
				"${SQLDBA} does not exist or is not executable!"
				return 1
			fi
		fi
	else
		logerr "4070" "Database '$1' not found in ${ORATAB}"
		return 1
	fi
	LD_LIBRARY_PATH=${ORACLE_HOME}/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
    	export ORACLE_HOME SQLDBA PFILE LD_LIBRARY_PATH

	return 0

}


make_rpc_call() {
  	logprint "Calling $3 for instance $2 on host $1..."
  	ha_dbms_call $1 $2 $3 
  	}


get_remote_host(){
	get_diskgroups $1 $2
        RSHHOST=""
        for X in `haget  -f private_links -h $HA_REMOTEHOST` ; do
          net_pinghost $X > /dev/null 2>&1
          if [ $? -eq 0 ] ; then
                RSHHOST=$X
                export RSHHOST
                return 0
          fi
        done
        return 1
        }
#end of common ha-dbms boiler plate
#include_boiler

. $HA_VAR/ha_env

# process_parm_file - process an Oracle parameter file $1, follow ifile
#    references, and echo the value for background_dump_dest
process_parm_file() {
	typeset dump_line

	if [ -r "$1" ] ; then
		# Note: the following [ ]'s contain one space and on tab
		line=`grep -i '^[ 	]*background_dump_dest[ 	]*=' $1`
		if [ "$line" != "" ] ; then
			# found
			set -A dump_line $(print ${line} | tr '=' ' ')
			echo ${dump_line[1]}
		else
			# check include files
			ifile_list=`grep -i '^[ 	]*ifile[ 	]*=' $1 | cut -d= -f 2`
			for ifile in $ifile_list ; do
				process_parm_file $ifile
			done
		fi
	else
        	logerr "${prog}.4047"\
		 "Oracle parameter file $1 does not exist or is not readable!"
		echo ""
        fi
}


#
# getfmon_name <file> <version string>
# returns
#  0 -> version found
#  1 -> version not found
#  Sets variable
#        ha_executable -> executable name
#        action_file   -> action file name
#

get_fmon_name()
{
typeset support_file="$1"
typeset version="$2"

	ha_executable=""
	action_file=""
	/bin/grep -v "^#" ${support_file} | \
	while read support_line ; do
	    result="`echo "$support_line" | /usr/bin/cut -f 1`"
	    if [ "$result" != "" ] ; then
		if [[ $version = $result ]] ; then
			ha_executable=`echo "$support_line" | /usr/bin/cut -f 2`
			action_file=`echo "$support_line" | /usr/bin/cut -f 3`
			return 0
		fi
	    fi
	done

	return 1;

}


#
#
#  Exit after logging a message
#
exit_fmon()
{
# if we reach here, then a fault monitor did not get started
# the reason did already get logged in the various logerr calls above
logerr "${prog}.4060" "Monitors for Oracle database \"${ORACLE_SID}\" NOT started"
exit 1
}

# ############# Main ########################## Main ##########################
if [ $# -ne 1 ] ; then
	echo "usage: $argv0 instance"
	exit 2
fi

ORACLE_SID=$1
read_ha_databases $1 
DB_LOGIN=$db_login
export DB_LOGIN
LOCALHOST=`uname -n`

LOCAL_PROBE_DELAY=15
#
# BugID# 4269425 - increase delay to avoid fault monitor race condition
#
REMOTE_PROBE_DELAY=$(($poll_cycle + 45))
HA_FM_DBMSPROBE_DELAY=${HA_FM_DBMSPROBE_DELAY:-${LOCAL_PROBE_DELAY}}

if [ ${HA_FM_DBMSPROBE_DELAY} -ne 0 ]; then
   HA_FM_DBMSPROBE_DELAY=${LOCAL_PROBE_DELAY}
fi

prog="${HA_FM_NAME}"


find_logical_host=`haget -f all_logical_hosts | grep $logical_host`
if [ "$find_logical_host" = "" ] ; then
        logerr "${prog}.4000"\
		 "logical host $logical_host is not in the cluster configuration"
        logerr "${prog}.4010"\
		 "Monitors for oracle database \"$ORACLE_SID\" NOT started"
        exit 1
fi

remote_args=""
#matches=`expr " $HA_METASETSERVE " : ".* $logical_host "`
#if [ "$matches" -eq 0 ] ; then

#MASTERED_LOGICAL_HOSTS="`echo $HA_METASETSERVE | tr ',' ' '`"
MASTERED_LOGICAL_HOSTS=$(haget -f mastered | tr '\012' ' ')

is_member $logical_host "$MASTERED_LOGICAL_HOSTS"
if [ $? -ne 0 ] ; then
    	remote_args="-r $logical_host"
	HA_FM_DBMSPROBE_DELAY=${REMOTE_PROBE_DELAY}

        # Don't start probe if diskset is in maintenance mode.
        # If this instance's logical host is in maint mode, exit now.
        MAINT=`haget -f is_maint -h ${logical_host}`
        if [ "$MAINT" = "1" ]; then
               logdeb "${prog}.2900" \
                        "Logical host in maintenance mode, don't run fault monitor"
            exit 0
        fi

        need_to_run_probe ${logical_host} ${LOCALHOST}
        rc=$?
        if [ $rc -ne 0 ]; then
               logdeb "${prog}.2901" \
                        "No need to run probe (condition $rc)"
           exit 0
        fi
fi

current_mode=$(get_instance_dynamic "${ds}" "$ORACLE_SID" | awk '{print $1}')
if [ "$current_mode" != "on" ] ; then
	logerr "${prog}.4062"\
	"on/off mode for \"$ORACLE_SID\" is not on. Fault monitor not started"
     exit 0
fi

# Set environment variable HA_REMOTEHOST for this instance before starting
# starting fault monitor.
 
my_node=`uname -n`
res=$(haget -f physical_hosts -h $logical_host | tr '\012' ' ')
HA_REMOTEHOST=""
for next in ${res}; do
        if [ "$next" != "$my_node" ]; then
                HA_REMOTEHOST="${HA_REMOTEHOST} ${next}"
        fi
done
export HA_REMOTEHOST

HA_LOGICALHOST=$logical_host
export HA_LOGICALHOST
export HA_FM_DBMSPROBE_DELAY 

binaries_on_logical=

ORATAB=/var/opt/oracle/oratab
if oratab_line=`grep "^[	 ]*$1:" $ORATAB` ; then
     ORACLE_HOME=`echo $oratab_line | awk -F: '{print $2}' -`
else
        logerr "${prog}.4070" \
        "Entry for ORACLE_SID $1 not found in $ORATAB"
        exit_fmon
fi

if [ "$remote_args" != "" ] ; then
	if [ ! -d "$ORACLE_HOME" ] ; then
		binaries_on_logical=$oracle_home
	fi
else
        # Local fault monitor
        #
        if [ -z "`disk_lh_mounted $logical_host`" ]; then
                logerr "${prog}.4072" \
                "Administrative file system for logical host $logical_host not mounted"
                exit_fmon
        fi
fi

ora_version=$(get_dbms_version ${logical_host} ${ORACLE_SID} oracle)
rc=$?
if [ $rc -ne 0 ]; then
        ora_version=$(get_dbms_version -p ${logical_host} ${ORACLE_SID} oracle)
        rc=$?
fi
if [ $rc -ne 0  -o  "${ora_version}" = ""  ]; then
logerr "${prog}.4074" \
        "Could not find version for ${ORACLE_SID}, ($rc)"
        exit_fmon
fi

# starting up remote fault monitor when the Oracle binaries are
# installed on the logical host then the remote server does not
# have access to version and error log, thus,they are placed in $HA_VAR. 
# The remote fmon will be started and exit from here
if [ "$remote_args" != "" ] ; then
        alert_file="none"
        oracle_home="/var/opt/oracle"
        ORACLE_HOME="/var/opt/oracle"
	export ORACLE_HOME ORACLE_SID
export ORA_NLS33=${_basedir}/opt/${_productdir}/ha/oracle/ORA_NLS33
else
	# Local host
	find_oracle $ORACLE_SID
	oracle_owner=`ls -ld ${ORACLE_HOME} | nawk '{print $3}'`
	export ORACLE_LISTENER="$listener_name"
	alert_file=`process_parm_file $PFILE`
	if [ "$alert_file" = "" ] ; then
		logerr "${prog}.4040" \
	"Could not locate background_dump_dest value in parameter file"
		exit 1
	else
		alert_file=${alert_file}/alert_${ORACLE_SID}.log
	fi
fi

if [ "$ORACLE_HOME" = "" ] ; then
	logerr "${prog}.4076" "Error in setting Oracle environment"
	exit_fmon
fi
#---------- end of remote fault monitor for binaries on logical host-----------

# startup listner process if it is not started for local host only. 
# if oracle version is < 7.3 then need to start both
# v1 listener using "orasrv" and v2 listener using "lsnrctl"
# otherwise, start up v2 listener using "lsnrctl"

if [ "$remote_args" = "" ] ; then
(
export LISTENER_NAME=${listener_name:="LISTENER"}
  /usr/bin/ps -e -u $oracle_owner -o args | grep -w "tnslsnr $LISTENER_NAME " | grep -v "grep" > /dev/null
    if [ $? -ne 0 ]; then
        lognotice "${prog}.2000" "starting up Oracle Listener"
        su  $oracle_owner -c sh << EOF > /dev/console 2>&1
        	LD_LIBRARY_PATH=$LD_LIBRARY_PATH
        	export LD_LIBRARY_PATH
       		hatimerun -t $listener_timeout $ORACLE_HOME/bin/lsnrctl start $LISTENER_NAME
EOF
    fi
)&
fi

get_fmon_name "${HA_DB_SUPPORT}" "${ora_version}"
if [ $? -ne 0 ]; then
        logerr "${prog}.4050" "Oracle version ${ora_version} not supported!"
        exit_fmon
fi

if [ "$ha_executable" = "" ] ; then
	logerr "${prog}.4020" \
	"instance ${ORACLE_SID}: executable file name missing in line '$support_line' in file $HA_DB_SUPPORT"
	exit_fmon
fi
if [ "$action_file" = "" ] ; then
	logerr "${prog}.4030" \
	"instance ${ORACLE_SID}: action file name missing in line '$support_line' in file $HA_DB_SUPPORT"
	exit_fmon
fi
export ORA_NLS33=${_basedir}/opt/${_productdir}/ha/oracle/ORA_NLS33
if [ -z "$remote_args" ]; then
   fmon_args="-m $logical_host"
	ora_major_version=`/bin/echo $ora_version | /bin/cut -b1-3`
        if [ "$ora_major_version" = "8.1" ] ; then
                export ORA_NLS33=$ORACLE_HOME/ocommon/nls/admin/data
        fi

else
   fmon_args="$remote_args"
fi

talk_start ${ha_executable} $fmon_args $ORACLE_SID $poll_cycle $connect_cycle $timeout $restart_delay $HA_FILES/$action_file $alert_file
exit 0
