#
# ident	"@(#)actiontemp.tcl	1.46	01/06/05 SMI"
#
# Copyright (c) 1996-2001 by Sun Microsystems, Inc.
# All rights reserved.
#
# actiontemp.tcl
#
# This tcl action script is used to respond to edd events generated
# by out of temperature readings detected by the control board temp 
# scripts
#
# Board Types
# 1 sysBrdStarfire		System Board (sb)
# 2 cbStarfire			Control Board
# 3 centerplaneStarfire 	Centerplane Board
# 4 suppBrdStarfire		Centerplane Support Board (csb)

# global variables
global brdtype -1 

proc messages { boardtype messtype { boardnum "" } } {

switch -exact -- $messtype {
	bad { set mess "Warning: Thermal calibration data not available for $boardtype board $boardnum. Check ASIC calibrations and SSP calibration file." }
	911 { set mess "Warning: The Temperature has exceed 911 temp on $boardtype board $boardnum" }
	max { set mess "Warning: The Temperature has exceed Maximum temp on $boardtype board $boardnum" }
	warn { set mess "Warning: The Temperature has exceed Warning temp on $boardtype board $boardnum" }
	high { set mess "EDD ERROR: High temp on $boardtype board $boardnum should not occur" }
	normal { set mess "Temperature has returned to normal on $boardtype board $boardnum" }
	default { set mess "ERROR: Unknown message code send = $messtype" }
	}
return $mess
}


proc getbrdnum { brdname data } {
# check if the correct option and board have been sent
if  { $data == "" } { return X }
switch -exact -- $brdname {	
	cbStarfireNum { if { ($data == 0) || ($data == 1) } { return $data
			} else { return "(ERROR: BAD board number send : $data)" }
			}
	sysBrdStarfireNum { if { ($data >= 0) && ($data <= 15) } { return $data
			} else { return "(ERROR: BAD board number send : $data)" }
			return $data
			}
	centerplaneStarfireNum { 
			if { ($data == 0) || ($data == 1) } { return $data
			} else { return "(ERROR: BAD board number send : $data)" }
			}
	suppBrdStarfireNum { 
			if { ($data == 0) || ($data == 1) } { return $data
			} else { return "(ERROR: BAD board number send : $data)" }
			}
	default { return "( BAD board type sent $brdname) " }
	}
}


proc TempBadact { args } {
    global brdtype
    global boardnum
    # control board temperature sensors are bad
    # figure out which board type the event is for and log messages
    set board [ procestemp $args bad]

    #logMessage "Bad temperature sensors: board $boardnum brdtype $brdtype args:$args" TempBadact

    # run thermcal_config in the hope that we'll clear the condition
    # Notes: Board types 2 and 3 cannot be calibrated.
    # 4267751 - Run thermcal_config on the board that
    # reported bad data and NOT all boards.
    switch -exact -- $brdtype {
	-1 {
		logMessage "Error: -t argument required. received: $args" \
			TempBadact
		return
	}
	1 {	# sb: system board
		exec thermcal_config -b sb -z $boardnum
	}
	2 -	# control board (cannot be calibrated)
	3 {	# centerplane board (cannot be calibrated)
		return
	}
	4 {	# csb: centerplane support board
		exec thermcal_config -b csb -z $boardnum
	}
	default { # Updates any changes (takes a long time!)
		exec thermcal_config -u
	}
    }
}



proc Temp911act { args } {
	global brdtype

	set board [ procestemp $args 911]

	switch -exact -- $brdtype {
	-1 {
		logMessage "Error: -t argument required. received: $args" Temp911act
		return
	}
	1 {	# 911 on a system board
		# no time for advance warning just down the board 

		#
		# Get a SNMP session handle to talk to the snmpd
		#
		if { [ catch { set handle [ snmp session -community edd -retries 3 ] } errmsg ] } {
			logMessage "Error: unable to acquire snmpd session handle: $errmsg" Temp911act
			poweroff
			return 
		}

		# find out which domain the system board is in
		if { [ catch { set num_domains [ $handle get confNumDomain.0 ] } errmsg ] } {
			logMessage "Error: unable to get confNumDomain from MIB: $errmsg" Temp911act
			$handle destroy
			poweroff
			return 
		} else {
			set num_domains [lindex [ lindex $num_domains 0 ] 2]
		}

		for {set inst 0} {$inst < $num_domains} {incr inst} {
			if { [ catch { set data [ $handle get domainSysBrdConfig.$inst ] } errmsg ] } {
				logMessage "Error: unable to get domainSysBrdConfig.$inst from MIB: $errmsg" Temp911act
				$handle destroy
				poweroff
				return -code error
			} else {
				set data [lindex [lindex $data 0] 2]
				set data [split $data -]
				# go through the board list to see if
				# the board is in there.
				set argc [ llength $data ]

				for {set i 0} {$i < $argc} {incr i} {
					set brd [lindex $data $i]
					if { $brd == $board } {
						# we found the domain which
						# has the board.
						if { [ catch { set domain_name [ $handle get domainName.$inst ] } errmsg ] } {
							logMessage "Error: unable to get domainName.$inst: $errmsg" Temp911act
							$handle destroy
							poweroff
							return -code error
						} else if { [ catch { set idnid [ $handle get domainIDNId.$inst ] } errmsg ] } {
							logMessage "Error: unable to get domainIDNId.$inst: $errmsg" Temp911act
							$handle destroy
							poweroff
							return -code error
						} else {
							set domain_name [ lindex [ lindex $domain_name 0] 2]
							set idnid [ lindex [ lindex $idnid 0] 2]
							break
						}
					}
				}
			}

			# if we found the domain, quit looking
			if { [info exists domain_name] } {
				break
			}
		}

		# we are done with our handle
		$handle destroy


		# we failed to find a domain which contains the system board
		# the board is probably just not part of a domain.
		if { ! [info exists domain_name] } {
			logMessage "System board $board is not part of a booted domain (according to MIB)" Temp911act
			poweroffsysboard $board

			return -code 0
		}

		set env(SUNW_HOSTNAME) $domain_name

		# attempt to unlink the domain
		if { [info exists indid] && $idnid != -1 } {
			unlinkdomain $domain_name 1
		}

		poweroffsysboard $board
	}
	2 -
	3 -
	4 {	# 911 on other type of board
		# no time for advance warning just down the system
		poweroff
	}
	default {
		logMessage "Error: bad argument to -t: $argdata" Temp911act 
		return -code error
	}
	}
}


# TempMaxact - handle a MAX temp notification on a board.
# Note - it is critical that this routine not fail to power off
# the board under any circumstances (or errors).

proc TempMaxact { args } {
 
	global brdtype
	global env

	# procestemp will setup $brdtype
	set board [ procestemp $args max ]
	
	#
	# Get a SNMP session handle to talk to the snmpd
	#
	if { [ catch { set handle [ snmp session -community edd -retries 3 ] } errmsg ] } {
		logMessage "Error: unable to acquire snmpd session handle: $errmsg" TempMaxact
		poweroff
		return 
	}

	switch -exact -- $brdtype {
	-1 {	# -1 is brddata's initialization value (should not be passed in)
		logMessage "Error: -t argument required. received: $args" TempMaxact 
		$handle destroy
		return 
	}

	1 {	# MAX temp was for a system board.
		# Here's what we need to do:
		# 1. if board is in a domain, shutdown the domain
		# 2. power off the board.
		# 3. if board is in a domain, and still other
		#    boards with power in the domain, reboot the domain.

		# find out which domain the system board is in
		if { [ catch { set num_domains [ $handle get confNumDomain.0 ] } errmsg ] } {
			logMessage "Error: unable to get confNumDomain from MIB: $errmsg" TempMaxact
			$handle destroy
			poweroff
			return 
		} else {
			set num_domains [lindex [ lindex $num_domains 0 ] 2]
		}

		for {set inst 0} {$inst < $num_domains} {incr inst} {
			if { [ catch { set data [ $handle get domainSysBrdConfig.$inst ] } errmsg ] } {
				logMessage "Error: unable to get domainSysBrdConfig.$inst from MIB: $errmsg" TempMaxact
				$handle destroy
				poweroff
				return -code error
			} else {
				set data [lindex [lindex $data 0] 2]
				set data [split $data -]
				# go through the board list to see if
				# the board is in there.
				set argc [ llength $data ]

				for {set i 0} {$i < $argc} {incr i} {
					set brd [lindex $data $i]
					if { $brd == $board } {
						# we found the domain which
						# has the board.
						if { [ catch { set domain_name [ $handle get domainName.$inst ] } errmsg ] } {
							logMessage "Error: unable to get domainName.$inst: $errmsg" TempMaxact
							$handle destroy
							poweroff
							return -code error
						} elseif { [ catch { set idnid [ $handle get domainIDNId.$inst ] } errmsg ] } {
							logMessage "Error: unable to get domainIDNId.$inst: $errmsg" TempMaxact
							$handle destroy
							poweroff
						} else {
							set domain_name [ lindex [ lindex $domain_name 0] 2]
							set idnid [ lindex [ lindex $idnid 0] 2]
							break
						}
					}
				}
			}

			# if we found the domain, quit looking
			if { [info exists domain_name] } {
				break
			}
		}

		# we are done with our handle
		$handle destroy


		# we failed to find a domain which contains the system board
		# the board is probably just not part of a domain.
		if { ! [info exists domain_name] } {
			logMessage "System board $board is not part of a booted domain (according to MIB)" TempMaxact
			poweroffsysboard $board

			return -code 0
		}

		# if the domain is up: halt the domain, power off board,
		# reboot the domain. Otherwise, just power off the board.

		set env(SUNW_HOSTNAME) $domain_name

		if { [info exists idnid] && ($idnid != -1) ] } {
			unlinkdomain $domain_name 15
		}

		set host_down [check_host -b -q]
		if { $host_down == 1 } { # the host is down
			poweroffsysboard $board
		} else { # the host is up

			# halt the domain containing the board
			shutdowndomain $domain_name

			poweroffsysboard $board

			rebootdomain $domain_name
		}

		break	
	}

	2 {	# MAX temp was for a control board.
		# Here's what we need to do:
		# If there is < 2 control boards, shutdown all domains
		# in the system and power everything off.
		# if there are 2 control boards, power off the control board.

		if { [ catch { set num_conbrd [ $handle get confNumConBrd.0 ] } errmsg ] } {
			logMessage "Error: unable to get confNumConBrd from MIB: $errmsg" TempMaxact
			$handle destroy
			poweroffconboard $board
			return -code error
		} else {
			set num_conbrd [lindex [ lindex $num_conbrd 0 ] 2]
		}

		if { $num_conbrd < 2 } {
			# need to shutdown all the domains in the system
			if { [ catch { set num_domains [ $handle get confNumDomain.0 ] } errmsg ] } {
				logMessage "Error: unable to get confNumDomain from MIB: $errmsg" TempMaxact
				$handle destroy
				poweroffconboard $board
				return -code error
			} else {
				set num_domains [lindex [ lindex $num_domains 0 ] 2]
			}

			for {set inst 0} {$inst < $num_domains} {incr inst} {
				if { [ catch { set data [ $handle get domainName.$inst ] } errmsg ] } {
					logMessage "Error: unable to get domainName.$inst from MIB: $errmsg" TempMaxact
					$handle destroy
					poweroffconboard $board
					return -code error
				} else {
					set data [lindex [ lindex $data 0] 2]
				}

				shutdowndomain $data
			}

			$handle destroy

			# now power off everything
			poweroff

		} else {

			$handle destroy

			# now power off the board
			poweroffconboard $board

		}

		break
		
	}

	3 -
	4 {	# MAX temp was for a centerplane or centerplane support board.
		# Here's what we need to do:
		# 1. shutdown all running domains
		# 2. power everything off.


		# need to shutdown all the domains in the system
		if { [ catch { set num_domains [ $handle get confNumDomain.0 ] } errmsg ] } {
			logMessage "Error: unable to get confNumDomain from MIB: $errmsg" TempMaxact
			poweroff
			$handle destroy
			return -code error
		} else {
			set num_domains [lindex [ lindex $num_domains 0 ] 2]
		}

		for {set inst 0} {$inst < $num_domains} {incr inst} {
			if { [ catch { set data [ $handle get domainName.$inst ] } errmsg ] } {
				logMessage "Error: unable to get domainName.$inst from MIB: $errmsg" TempMaxact
				poweroff
				$handle destroy
				return -code error
			} else {
				set data [lindex [ lindex $data 0] 2]
			}

			shutdowndomain $data
		}

		$handle destroy

		# now power off everything
		poweroff

		break
	}

	default {
		logMessage "Error: bad argument to -t: $argdata" TempMaxact 
		return -code error
	}
	}
}


proc TempWarnact {  args  } {
# temperatures are in the Warning zone, so send out a message only.
# if the temperature continues up a shutdown will occur

set data [ lindex $args 5 ]

if { [ lsearch $data "cbStarfireSen1Temp.0" ] == -1 } { 
		set board [ procestemp $args warn]
	} else {
			# format the data into decimal for the user. 
			set l [llength $data]
			for { set i 0 } { $i < $l } { incr i 2} {
				if { $i > 0 } {
					append out ", "
				}
				set d1 [ lindex $data $i ]
				set ii [ expr ( $i + 1 ) ]
				scan [ lindex $data $ii ] "%f" d2
				if { $d2 != -1 } {
				    set d2 [ expr ( $d2/1000 ) ]
				    append out [ format  "%s %.3f C" $d1 $d2  ]
				} else {
				    append out "$d1 reading is invalid"
				}
			}
			logMessage "Warning: Ambient temperatures are out of range: $out" TempWarnact
	}
}

proc TempHighact { args } {
# temperatures are in the high zone
# High temps have been disabled--this should never occur:
logMessage "EDD error High temps should not occur with this revision of software. " TempHighact

	set board [ procestemp $args high]
}


proc TempNormact { args } {
# control board temperature is back to normal

	set board [ procestemp $args normal]
}

# power a system board off
# NOTE!! - if attempting to power off a system board in a domain,
# the environment variable SUNW_HOSTNAME must be set prior
# calling this routine!!
#
proc poweroffsysboard { args } {

	set board_num $args

	logMessage "Powering down System board $board_num..." poweroffsysboard

	if {[catch {exec power -q -f -off -sb $board_num } ] != 0} {
		logMessage "ERROR: power command failed to power off system board $board_num" poweroffsysboard 
		# we need to get the system board off.
		# so try powering everything off.
		poweroff
		return
	}

}	

# power a control board off
proc poweroffconboard { args } {

	set board_num $args

	logMessage "Powering down Control board $board_num..." poweroffconboard

	if {[catch {exec power -q -f -off -cb $board_num } ] != 0} {
		logMessage "ERROR: power command failed to power off control board $board_num" poweroffconboard 
		# we need to get the control board off.
		# so try powering everything off.
		poweroff
		return
	}
}

# halt a domain (via hostint)
# and wait for it to come down
proc shutdowndomain { args } {
	set domain_name $args

	global env

	set env(SUNW_HOSTNAME) $domain_name

	# make sure the domain is up first
	set host_down [check_host -b -q]
	if { $host_down == 1 } { # the host is down
		return
	}

	logMessage "Halting domain $domain_name..." shutdowndomain

	if {[catch {exec hostint} ] != 0} {

		logMessage "ERROR: hostint failed for domain $domain_name" shutdowndomain
		return
	}

	# here we wait for the host to come down.
	# total wait time here is (num_loop_iterations * 
	# (total time to exec commands (check_host) + 1 sec.))
	# which is probably around 1.5-2 minutes.
	for {set i 0} {$i < 60} {incr i} {
		set host_down [check_host -b -q]
		if { $host_down == 1 } { # the host is down
			break
		}
		exec sleep 1 
	}
}

# reboot a domain (via bringup)
# first make sure there is at least one
# board in the domain which has power.
proc rebootdomain { args } {
	set domain_name $args

	global env

	# first, see if there are any boards in the domain with power...

	#
	# Get a SNMP session handle to talk to the snmpd
	#
	if { [ catch { set handle [ snmp session -community edd -retries 3 ] } errmsg ] } {
		logMessage "Error: unable to acquire snmpd session handle: $errmsg" rebootdomain
		return
	}

	if { [ catch { set num_domains [ $handle get confNumDomain.0 ] } errmsg ] } {
		logMessage "Error: unable to get confNumDomain from MIB: $errmsg" rebootdomain
		$handle destroy
		return 
	} else {
		set num_domains [lindex [ lindex $num_domains 0 ] 2]
	}

	for {set inst 0} {$inst < $num_domains} {incr inst} {
		if { [ catch { set data [ $handle get domainName.$inst ] } errmsg ] } {
			logMessage "Error: unable to get domainName.$inst from MIB: $errmsg" rebootdomain
			$handle destroy
			return 
		} else {
			set data [lindex [lindex $data 0] 2]
		}

		if { $domain_name == $data } {	# we found the domain!!
			if { [ catch { set data [ $handle get domainSysBrdList.$inst ] } errmsg ] } {
				logMessage "Error: unable to get domainSysBrdList.$inst from MIB: $errmsg" rebootdomain
				$handle destroy
				return 
			} else {
				set data [lindex [lindex $data 0] 2]
				set brdlist [split $data -]
			}
		}
	}

	if { ! [ info exists brdlist] } { # we failed to get a brdlist for the domain... :((
		logMessage "Error: failed to get domain info from MIB for domain $domain_name" rebootdomain
		$handle destroy
		return 
	}

	# if we are here, we have a brdlist for the domain!
	# now, see if any of the boards have power...

	if { [ catch { set num_sysbrds [ $handle get confNumSysBrd.0 ] } errmsg ] } {
		logMessage "Error: unable to get confNumSysBrd from MIB: $errmsg" rebootdomain
		$handle destroy
		return 
	} else {
		set num_sysbrds [lindex [ lindex $num_sysbrds 0 ] 2]
	}

	set go_for_it 0
	for {set inst 0} {$inst < $num_sysbrds} {incr inst} {
		if { [ catch { set data [ $handle get sysBrdGenPower.$inst ] } errmsg ] } {
			logMessage "Error: unable to get sysBrdGenPower.$inst from MIB: $errmsg" rebootdomain
			$handle destroy
			return 
		} else {
			set data [lindex [lindex $data 0] 2]

			if { $data == "on" } { #found a board with power

				if { [ catch { set data [ $handle get sysBrdGenNum.$inst ] } errmsg ] } {
					logMessage "Error: unable to get sysBrdGenNum.$inst from MIB: $errmsg" rebootdomain
					$handle destroy
					return 
				} else {
					set data [lindex [lindex $data 0] 2]
				}

				set inlist [lsearch $brdlist $data]

				if { $inlist != -1 } {
					set go_for_it 1
					break
				}
			}
		}
	}

	$handle destroy

	if { $go_for_it == 1 } {
		cd $env(HOME)
		logMessage "Rebooting domain $domain_name..." rebootdomain
		set env(SUNW_HOSTNAME) $domain_name
		set pathname $env(SSPLOGGER)
		append pathname /$domain_name/last_edd_bringup.out
		if {[catch { eval "exec bringup -L -F -A on >$pathname" }] != 0} {
			set err [lindex $errorCode 2]
			logMessage "ERROR: edd initiated bringup failed for domain $domain_name; status = $err. See messages and $pathname file for details ." rebootdomain
		}
	} else {
		logMessage "Error: cannot reboot domain $domain_name: No valid system boards" rebootdomain
	}
}
		
proc poweroff { } {
	logMessage "Shutting down entire system..." poweroff
	if {[catch {exec power -q -f -B -off} ] != 0} {
		logMessage "ERROR: power -q -f -B -off failed	" poweroff
	}
}



proc procestemp { args type } {
# The routine used to check the arguments for the action scripts
# checks the board number and sends the message to the log file.
#

global brdtype
global boardnum

set argc [ llength $args ]

if { $argc < 6 } { logMessage "Error: Wrong number of arguments($argc) sent to action script: $args" procestemp
		return -code error}

# check the switches sent
for {set i 0} {$i < 7 } {incr i} {
	set argdata [lindex $args $i]
	switch -exact -- $argdata {
	-e { incr i
		set errcode [lindex $args $i] }
	-b { incr i
		set boardnum [lindex $args $i]}
	-d { incr i
		set brddata [lindex $args $i]}
	-t { incr i
		set brdtype [lindex $args $i]}
	default {
		logMessage "Error: Wrong option argument sent to action script: $argdata"  procestemp
		return -code error }
	}
	}

# determine which type of board we are using
if { $brdtype == 1 } { set board [ getbrdnum sysBrdStarfireNum $boardnum ] 
			set message [ eval { messages system $type $board } ]
			}

if { $brdtype == 2} { set board [ getbrdnum cbStarfireNum $boardnum ] 
			set message [ eval { messages control $type $board } ]
			}
if { $brdtype == 3} { set board [ getbrdnum centerplaneStarfireNum $boardnum] 
			set message [ eval { messages centerplane $type $board } ]
			}

if { $brdtype == 4} { set board [ getbrdnum suppBrdStarfireNum $boardnum] 
			set message [ eval { messages support $type $board } ]
			}

# format the data into decimal for the user. 
set l [llength $brddata]
for { set i 0 } { $i < $l } { incr i 2} {
	if { $i > 0 } {
		append outdata ", "
	}
	set d1 [ lindex $brddata $i ]
	set ii [ expr ( $i + 1 ) ]
	scan [ lindex $brddata $ii ] "%f" d2
	if { $d2 != -1 } {
		set d2 [ expr ( $d2/1000 ) ]
		append outdata [ format  "%s %.3f C" $d1 $d2  ]
	} else {
		append outdata "$d1 is invalid"
	}
}

logMessage "$message" procestemp
logMessage "Temperature data for board $board, $type trap: $outdata" procestemp
return $board

}

#
# Exec unlink the domain from the IDN network.
# 
proc unlinkdomain { dname tries } {
	#
	# Unlink the SMD reg and sync the other domains in the DIN net, but
	# unchange the ssp mib so that the SSP relink this domain when the
	# domain is up.
	# rv = EBUSY(16): Another IDN or DR operation in progress
	set rv 16		
	set arglist "-E -XM"
	while { ($rv == 16) && ($tries > 0) && \
		[catch {eval "exec domain_unlink $arglist $dname"} err ] } {
		set rv [lindex $errorCode 2]
		set tries [expr $tries - 1]
		if { $tries > 0 } {
			catch { exec sleep 1 }
		}
		logMessage "Retry unlink $dname" unlinkdomain  
	}

}

