#! /bin/sh
#
#	Copyright 11/19/97 Sun Microsystems, Inc.  All Rights Reserved.
#
# @(#)haoracle.sh	1.15 97/11/19 SMI
#
# haoracle - administrative command for HA-DBMS Oracle
#	(see haoracle_databases(4), haoracle_support(4), haoracle_config(4) )
#	     verifies and updates the HA-DBMS Oracle configuration files
#
#	This utility depends on the existence of the hadfconfig file.
#
# usage: haoracle [ -s ] operation [ instance [ data ...] ]
#	-s:        silent mode
#	operation: one of list, insert, delete, update, start, stop
#	instance:  name of instance (not required for list operation)
#	data ...:  set of data fields for insert or update operation
#
# Note: Some code is somewhat duplicated in haoracle_config!

# remember our name
argv0=`basename $0`

# locations
PATH=/usr/bin
FM_PROGS=/opt/SUNWhadf/fault_progs
HA_FILES=/etc/opt/SUNWhadf/hadf
NFS_FILES=/etc/opt/SUNWhadf/nfs
TMPDIFFS=/tmp/haoraclediffs.$$
TMPFILE=/tmp/haoracle.$$
CMPFILE=/tmp/remotehaoracle.$$

# HA-DBMS file locations
HA_SUPPORT=${HA_FILES}/haoracle_support
HA_DATABASES="${HA_FILES}/haoracle_databases"

# silent mode flag
silent=""

# Oracle locations
ORATAB=/var/opt/oracle/oratab

# bug 1206154 - haoracle should ignore control keys
TRAPSIGNALS="1 2 3 15"

#
# not_positive_numeric() -	not_positive_numeric string
#
#	Returns 1 if the string is numeric and positive, else returns 0
#
not_positive_numeric() {
	[ -z "$1" -o "`expr 0$1 : '.*'`" != "`expr 0$1 : '[0-9]*'`" -o \
		`expr 0$1 \<= 0` != "0" ]
}


#
# not_non_neg_numeric() -	not_non_neg_numeric string
#
#	Returns 1 if the string is numeric and positive or 0, else returns 0
#
not_non_neg_numeric() {
	[ -z "$1" -o "`expr 0$1 : '.*'`" != "`expr 0$1 : '[0-9]*'`" -o \
		`expr 0$1 \< 0` != "0" ]
}


# find_oratab_line instance - find entry in $ORATAB, return in oratab_line
find_oratab_line() {
	oratab_line=`grep "^$1:" $ORATAB`
	if [ "$oratab_line" = "" ] ; then
		log "instance \"$1\" not in $ORATAB"
		return 1
	else
		return 0
	fi
}

		
# hadb_validate_entry  - sanity check of haoracle_databases file
hadb_validate_entry() {

	have_error=0
	db_line=$1 

	# fields are separated by TABs, cut them apart

	mode=`echo "$db_line" | cut -s -f 1`
	instance=`echo "$db_line" | cut -s -f 2`
	logical_host=`echo "$db_line" | cut -s -f 3`
	poll_cycle=`echo "$db_line" | cut -s -f 4`
	connect_cycle=`echo "$db_line" | cut -s -f 5`
	timeout=`echo "$db_line" | cut -s -f 6`
	restart_delay=`echo "$db_line" | cut -s -f 7`
	db_login=`echo "$db_line" | cut -s -f 8`
	init_ora=`echo "$db_line" | cut -s -f 9`

	if [ "`grep $instance $ORATAB`" = "" ] ; then
		errlog "instance $instance not in $ORATAB"
		have_error=1
	fi

	if [ "$logical_host" != "$HA_FOREIGNHOST" -a "$logical_host" != "$HA_NATIVEHOST" ] ; then
		if [ $SYMMETRIC -eq 1 ]; then
			log "Valid logical hostnames are \"$HA_NATIVEHOST\" and \"$HA_FOREIGNHOST\" -"
		else
			log "Valid logical hostname is only \"$ASYMLOGICALHOST\" -"
		fi
		log "  you specified \"$logical_host\"."
		have_error=1
	fi

	if not_positive_numeric $poll_cycle ; then
		errlog "Poll cycle time \"$poll_cycle\" should be a numeric value greater than 0"
		have_error=1
	fi

	if not_non_neg_numeric $connect_cycle ; then
		errlog "Connect cycle count \"$connect_cycle\" should be a numeric value greater than or equal to 0"
		have_error=1
	fi

	if not_non_neg_numeric $timeout ; then
		errlog "Time out \"$timeout\" should be a numeric value greater than or equal to 0"
		have_error=1
	fi
      
	if not_non_neg_numeric $restart_delay ; then
		errlog "Restart delay \"$restart_delay\" should be a numeric value greater than or equal to 0"
		have_error=1
	fi

	if [ `expr "//$db_login" : "//..*/"` = "0" -a $db_login != "/" ] ; then
		errlog "format of database login \"$db_login\" should be 'username/password' or '/'"
		have_error=1
	fi

	TMP_ORA=/tmp/hainit.ora.$$
	if [ ! -f $init_ora ] ; then
		# maybe logical is owned by the other host
		rcp $logical_host:$init_ora $TMP_ORA
		if [ -f $TMP_ORA ] ; then
			init_ora=$TMP_ORA
		else
			errlog "parameter file $init_ora does not exist"
			have_error=1
		fi
	fi

	if [ "`grep -iw 'background_dump_dest' $init_ora`" = "" ] ; then
		errlog "cannot locate background_dump_dest in $init_ora"
		have_error=1
	fi
	rm -f $TMP_ORA

    return $have_error
}


#
# haoracle_list filename - list the contents of the data file, skipping
#	comment lines
haoracle_list() {
	grep -v '^#' $1
}


# haoracle_insert filename instance_name logical_host poll_time connect_cyle timeout \
#	restart_delay db_login [ init_ora ]
# insert a record into filename 
#  
haoracle_insert() {
	filename=$1 ; shift
	instance_name=$1 ; shift
        if [ $# -ne 7 ] ; then
		log "parameters required for insert are: instance host probe_cycle_time connect_cycle_count time_out restart_delay username/password parameter_file"
		usage
	fi

	# make sure line for $instance_name doesn't already exists in file
	line=`grep "^o[a-z]*	$instance_name	" $filename`
	if [ "$line" != "" ] ; then
		log "instance \"$instance_name\" already in $BASEFILE:"
		abort "--> $line"
	fi	

	# build the line for the new entry
	new_line="off	$instance_name	$1	$2	$3	$4	$5	$6	$7"

	hadb_validate_entry "$new_line"
	if [ $? -ne 0 ]; then
		abort "new haoracle_databases line failed sanity check"
	fi

	echo "$new_line" >> ${filename}
}


# haoracle_delete filename instance_name
# delete a record for $instance_name from $filename
haoracle_delete() {
	filename=$1 ; shift
	line=`grep "^o[a-z]*	$1	" $filename`
	if [ "$line" = "" ] ; then
		abort "no entry for instance $1 in $BASEFILE - can't delete"
	fi

	# reject if fault monitor still running
	mode=`echo "$line" | cut -s -f 1`
	if [ "$mode" = "on" ] ; then
		abort "cannot delete entry; stop $1 fault monitor first"
	fi

	# delete all entries with $1 in the second column from the file
	echo "g/^o[^	]*	$1	/d
w
q" | ed -s $filename
	[ $? -ne 0 ] && abort "$1 could not be modified!"
}


# update an existing record in the data file ###################################
haoracle_update() {
	filename=$1 ; shift
	instance_name=$1 ; shift
        if [ $# -ne 7 ] ; then
		log "parameters required for update are: instance host probe_cycle_time connect_cycle_count time_out restart_delay username/password parameter_file"
		usage
	fi

	line=`grep "^o[a-z]*	$instance_name	" $filename`
	if [ "$line" = "" ] ; then
		abort "no entry for instance $instance_name in $BASEFILE - can't update"
	fi
	
	old_mode=`echo "$line" | cut -s -f 1`
	new_line="$old_mode	$instance_name	$1	$2	$3	$4	$5	$6	$7"

	hadb_validate_entry "$new_line"
	if [ $? -ne 0 ]; then
		abort "new haoracle_databases line failed sanity check"
	fi
	    
	# delete the line with $instance_name in the second column from the
	# file, and then insert $new_line in its place (replacing the line)
	echo "g/^o[a-z]*	$instance_name	/d
i
$new_line
.
w
q" | ed -s $filename
	[ $? -ne 0 ] && abort "$filename could not be modified!"
	return 0
}


# haoracle_start_stop_run filename instance ( start | stop )
# starts/stops fault monitoring for $instance
# does not update $filename - see function haoracle_start_stop
haoracle_start_stop_run() {
	# not needed here, just passed for consistency
	filename=$1 ; shift

	# Set no delay since we are manually starting fmon
	HA_FM_DBMSPROBE_DELAY=0; export HA_FM_DBMSPROBE_DELAY

	# call start/stop locally
	log "$2 fault monitor for $1 on localhost..."
	${FM_PROGS}/ha_dbms_call localhost $1 $2 oracle

	# call start remotely
	if [ "$remhostup" = "true" ]; then
		log "$2 fault monitor for $1 on $HA_REMOTEHOST ... "
		if [ $priv1res -eq 0 ]; then
			${FM_PROGS}/ha_dbms_call $PRIV1 $1 $2 oracle
		else
			${FM_PROGS}/ha_dbms_call $PRIV2 $1 $2 oracle
		fi

		if [ "$?" != 0 ]; then
			log "Failed to $2 fault monitor for $1 on $HA_REMOTEHOST"
			remhostup="false"
		fi
	else
		log "No $2 of fault monitor for $1 on $HA_REMOTEHOST."
	fi

	return 0
}

# haoracle_start_stop filename instance ( start | stop )
# changes the on/off status for $instance in $filename
# does not actually start or stop monitoring - see haoracle_start_stop_run
haoracle_start_stop() {
	filename=$1 ; shift
	# find the instance
	line=`grep "^o[^	]*	$1	" $filename`
	if [ "$line" = "" ] ; then
		abort "no entry for instance $1 in $BASEFILE - can't $2"
	fi
	mode=`echo "$line" | cut -s -f 1`
	if [ "$2" = "start" ] ; then
		if [ "$mode" != "off" ] ; then
			abort "instance $1 is not stopped in $BASEFILE!"
		fi
	else
		if [ "$mode" != "on" ] ; then
			abort "instance $1 is not started in $BASEFILE!"
		fi
	fi
	
	# update the entry in the file - find the old entry, and reply
	# off with on and vice versa
	if [ "$2" = "start" ] ; then
	 	ed_command="/^off	$1	/
s/^off/on/
w
q"
	else
	 	ed_command="/^on	$1	/
s/^on/off/
w
q"
	fi
	echo "$ed_command" | ed -s ${filename} > /dev/null
	if [ $? -ne 0 ] ; then
		abort "$filename could not be modified!"
	fi
	return 0
}


# ignore signals
catchsig()
{
	echo "Caught signal; continuing."
}


#
# get_yes_or_no - prompt user for "y" or "n", and read answer. repeat if
#	answer is not "y" or "n". Returns the choice.
#	In silent mode, just returns "y"
#
get_yes_or_no()
{
	if [ "$silent" = "y" ] ; then
		resp="y"
	else
		read resp
		while [ "$resp" != "y" -a "$resp" != "n" ]
		do
			echo
			echo '    Please respond "y" or "n" \c'
			read resp
		done
	fi
}


# log ... - if not silent, echo all parameters
log() {
	[ "$silent" != "y" ] && echo "$*"
}

# errlog - echo all parameters
errlog() {
	echo "ERROR: $*"
}

# abort ... - if not silent, echo all parameters. Then exit with status of 1
abort() {
	[ "$silent" != "y" ] && echo "$*"
	exit 1
}

# usage - print command usage ################################################
usage() {
	log "usage: $argv0 [ -s ] operation [ instance [ data ...] ]"
	log "  -s:        silent mode"
        log "  operation: one of list, insert, delete, update, start, stop"
	log "  instance:  name of instance (not required for list operation)"
        log "  data ...:  set of data fields for insert or update operation"
	exit 2
}


# check who we are - if we are not root, exit immediately
res=`id`
[ $? -ne 0 ] && abort "Cannot execute id command"
[ `expr "$res" : "uid=0(root)"` != 11 ] && abort "$argv0 must be executed as root."

# check for -s and -f parameters
while getopts s c
do
	case $c in
		s) silent="y" ;;
		\?) usage ;;
	esac
done
shift `expr $OPTIND - 1`

export HA_DATABASES


# make sure we have a command
if [ $# -lt 1 ] ; then
	log "missing operation"
	usage
fi
command="$1"
shift
case "$command" in 
#	check|list)			 will_modify=0 ;;
	list)				 will_modify=0 ;;
	start|stop|insert|delete|update) will_modify=1 ;;
	*)				 log "invalid operation"
					 usage ;;
esac


# make sure HA is up
CPID="`/usr/bin/ps -e | sed -n 's/[ 	]*\([0-9]*\).*[ 	]clustd$/\1/p'`"
if [ "$CPID" = "" -a $will_modify -eq 1 ] ; then
	abort "HA is not up; Run hastart first"
fi


# make sure Oracle service is registered and the state is on
reg=`/opt/SUNWhadf/bin/hareg | grep oracle`
if [ -z "$reg"  -a $will_modify -eq 1 ] ;  then
	log "Solstice HA-DBMS for ORACLE7 service is not registered."
	abort "You may run \"hareg -s -r oracle\" to register it."
fi

reg_state=`echo "$reg" | cut -f2 -s`
if [ "$reg_state" != "on" -a $will_modify -eq 1 ] ; then
	log "The state of Solstice HA-DBMS for ORACLE7 service is off."
	abort "You may run \"hareg -y oracle\" to turn the service on."
fi


# make sure we can read ${HA_DATABASES}
if [ ! -r $HA_DATABASES ] ; then
	if [ -r ${HA_DATABASES}.tmpl ] ; then
		log "creating ${HA_DATABASES} from template ${HA_DATABASES}.tmpl"
		cp ${HA_DATABASES}.tmpl ${HA_DATABASES}
		chmod 0600 ${HA_DATABASES}
	else
  		log "file ${HA_DATABASES} does not exist or is not readable!"
		log "could not find or read template ${HA_DATABASES}.tmpl either"
  		exit 2
	fi
fi


# verify that we have instance name, and can update ${HA_DATABASES}
if [ "$command" != "list" -a "$command" != "check" ] ; then
	if [ $# -lt 1 ] ; then
		log "instance name missing"
		usage
	fi
	instance=$1
	shift

	if [ ! -w ${HA_DATABASES} ] ; then
		abort "no permission to update $HA_DATABASES!"
	fi
fi

parms=$*

BASEFILE=`basename ${HA_DATABASES}`
REALFILE=${HA_DATABASES}
export BASEFILE REALFILE


if [ ! -f $HA_FILES/hadfconfig ]; then
	log "Cannot find $HA_FILES/hadfconfig."
	abort "Please use hasetup(1M) to configure the system initially."
fi

HA_LOCALHOST=`uname -n`
res="`grep '^HOSTNAME.*[ 	]'${HA_LOCALHOST}'[ 	].*' $HA_FILES/hadfconfig`"
if [ $? -ne 0 ]; then
	log "Cannot get name of remote host from $HA_FILES/hadfconfig."
	abort "Please use hasetup(1M) to configure the system initially."
fi
[ -z "$res" ] && abort "No HOSTNAME in hadfconfig!"

SYMMETRIC=1
set -- $res > /dev/null
if [ $# -ne 5 ]; then
	abort "Bad HOSTNAME line in hadfconfig."
elif [ "$2" = "$HA_LOCALHOST" ]; then
	HA_REMOTEHOST=$4
	HA_NATIVEHOST=$3
	HA_FOREIGNHOST=$5
	if [ "$5" = "-" ]; then
		HA_FOREIGNHOST=""
		SYMMETRIC=0
		ASYMLOGICALHOST="$HA_NATIVEHOST"
	fi
elif [ "$4" = "$HA_LOCALHOST" ]; then
	HA_REMOTEHOST=$2
	HA_FOREIGNHOST=$3
	HA_NATIVEHOST=$5
	if [ "$5" = "-" ]; then
		HA_NATIVEHOST=""
		SYMMETRIC=0
		ASYMLOGICALHOST="$HA_FOREIGNHOST"
	fi
else
	abort "Bad HOSTNAME line in hadfconfig."
fi

rm -f $TMPFILE
cp $REALFILE $TMPFILE

res="`sed -n 's/^PRIVATELINK[ 	][ 	]*.*'${HA_REMOTEHOST}'[ 	][ 	]*\([^ 	]*\)[ 	]*$/\1/p' $HA_FILES/hadfconfig`"
if [ "$res" = "" ]; then
	rm -f $TMPFILE
	abort "Cannot get names of private links."
fi
set -- $res
PRIV1=$1
if [ "$2" = "" ]; then
	rm -f $TMPFILE
	abort "Cannot get name of second private link to remote host."
fi
PRIV2=$2

res=`/usr/sbin/ping $PRIV1 5`
priv1res=$?
res=`/usr/sbin/ping $PRIV2 5`
priv2res=$?
remhostup="false"
if [ $priv1res -eq 0 ]; then
	OTHER_HOST=$PRIV1
else
	OTHER_HOST=$PRIV2
fi

if [ $priv1res -eq 0 -o $priv2res -eq 0 ]; then
	# we know the other machine is up
	remhostup="true"
	# Compare the files on both systems.
	rcp $OTHER_HOST:$REALFILE $CMPFILE 2> /dev/null
	if [ -r $CMPFILE ]; then
		diff $TMPFILE $CMPFILE > $TMPDIFFS
		if [ $? -ne 0 ]; then
			[ "$silent" != "y" ] && more $TMPDIFFS
			log "\ndiff <$HA_LOCALHOST< version against \c"
			log ">$HA_REMOTEHOST> version"
			log "\nVersions on the two machines differ."
			if [ "$will_modify" = "1" ] ; then
				log "Type 'y' to use $HA_LOCALHOST version; \c"
				log "type 'n' to use $HA_REMOTEHOST version. [y|n] \c"
				get_yes_or_no
				if [ "$resp" != "y" ]; then
					TMPFILE=$CMPFILE
				fi
			fi
		fi
		rm $TMPDIFFS
	else
		# the rcp failed
		log "Failed to copy $HA_REMOTEHOST version of $BASEFILE."
		if [ "$will_modify" = "1" ] ; then
			log "Type 'y' to edit $HA_LOCALHOST version; \c"
			log "type 'n' to exit. [y|n] \c"
			get_yes_or_no
			if [ "$resp" != "y" ]; then
				log "Exiting $argv0."
				rm -f $TMPFILE
				exit 0
			fi
		fi
	fi
else
	log "host $HA_REMOTEHOST is not up; do you want to continue anyway? \c"
	log "[y|n] \c"
	get_yes_or_no
	if [ "$resp" = "y" ]; then
		log "\n### Please remember to distribute $BASEFILE to \c"
		log "$HA_REMOTEHOST later. ###"
		sleep 4
	else
		log "Exiting $argv0."
		rm -f $TMPFILE $CMPFILE
		exit 0
	fi
fi

trap "catchsig" $TRAPSIGNALS

# run the appropriate function
case "$command" in
	list)       haoracle_list $TMPFILE ;;
	insert)     haoracle_insert $TMPFILE $instance $parms ;;
	delete)	    haoracle_delete $TMPFILE $instance ;;
	update)     haoracle_update $TMPFILE $instance $parms ;;
	start|stop) haoracle_start_stop $TMPFILE $instance $command ;;
	*)	    usage ;; # really already checked further up
esac

if [ "$will_modify" = "1" ] ; then
	log "Would you like to distribute $BASEFILE now? [y|n] \c"
	get_yes_or_no
	if [ "$resp" != "y" ] ; then
		log "Not distributing $BASEFILE."
		rm -f $TMPFILE $CMPFILE
		exit 0
	fi

	cp $TMPFILE $REALFILE
	if [ $? -eq 0 ] ; then
		log "\n$BASEFILE has been distributed to $HA_LOCALHOST."
	fi

	# run command specific functions after updating the file
	case "$command" in
		start|stop) haoracle_start_stop_run $TMPFILE $instance $command ;;
	esac

	if [ "$remhostup" = "true" ]; then
		log "Copying $BASEFILE to $HA_REMOTEHOST ... \c"
		rcp $REALFILE $OTHER_HOST:$REALFILE
		if [ $? -eq 0 ]; then
			log "copy complete."
		else
			log "copy failed."
			log "### Please remember to distribute $BASEFILE to \c"
			log "$HA_REMOTEHOST later. ###"
		fi
	else
		log "\nNot copying $BASEFILE to $HA_REMOTEHOST."
		log "### Please remember to distribute $BASEFILE to \c"
		log "$HA_REMOTEHOST later. ###"
	fi

	rm -f $TMPFILE $CMPFILE
fi

exit 0


