#
# ident "@(#)hostint.tcl 1.15     00/02/11 SMI"
#
# Copyright (c) 1996-2000 by Sun Microsystems, Inc.
# All rights reserved.
#
# Description:
#	This file contains the hostint command written in tcl. The hostint
#	command is used to perform a panic on the host for regaining control
#	of a hung system. It uses a more general program capable of delivering
#	a level 15 type interrupt to the host.
#
# Usage:
#	hostint	[-v] [-p proc]
#
# Options:
#	-v 		verbose, displays diagnostic messages
#	-p proc		proc identifies the targeted processor. If none
#			is given, the default proc is the boot processor.
#

# global variables
set arg_p_specified 0
set arg_v_specified 0
set num_sysbrds 0
set sysbrd_list_strg ""
set sysbrd_list {}
set boot_proc_board 0

#
# proc hostint_logMsg
#
proc hostint_logMsg { message } {
    logDomainMessage $message "hostint"
}

#
# proc hostint_usage
#
proc hostint_usage {} {
    puts stderr [gettext {usage: hostint [-v] [-p proc]}]
}

#
# proc panic_the_host
#
proc panic_the_host { cpu } {
    global env
    global arg_v_specified

    # call sigbcmd to perform the actual panic
    if { $arg_v_specified == 1 } {
	# TRANSLATION_NOTE - %d is cpu number, %s is $SUNW_HOSTNAME
	set fmt [gettext {Sending panic interrupt to cpu %d on host %s}]
	puts [format "$fmt" $cpu "$env(SUNW_HOSTNAME)"]
	set cmd "sigbcmd -v -p $cpu panic"
	# TRANSLATION_NOTE - %s is a command line
	puts [format [gettext "Executing %s"] "$cmd"]
    } else {
	set cmd "sigbcmd -p $cpu panic"
    }

    hostint_logMsg "Sending panic interrupt to cpu $cpu"
    if { [catch { eval exec $cmd } err] != 0 } {
	if { $arg_v_specified == 1 } {
	    # TRANSLATION_NOTE - %d is cpu number, %s is an error message or number
	    puts [format [gettext "Failed to interrupt the host (cpu %d). Err: %s"] "$cpu" "$err"]
	}
	hostint_logMsg "Failed to interrupt the host (cpu $cpu). Err: $err"
	return 1
    } else {
	if { $arg_v_specified == 1 } {
	    puts $err
	    # TRANSLATION_NOTE - %s is $SUNW_HOSTNAME
	    puts [format [gettext "Host %s interrupted"] "$env(SUNW_HOSTNAME)"]
	}
	hostint_logMsg "Host interrupted"
	return 0
    }
}

#
# proc check_proc_good
# Ensures proc_num is good; if it is not, return the first good proc on
# the board
#
proc check_proc_good { board proc_num } {
    global env
    global arg_v_specified
    set D_TABLE	"domainIndex domainName"
    set proc_config_strg ""
    set proc_config_list {}

    if { [catch { set handle [snmp session] } errmsg] } {
	hostint_logMsg "ERROR: snmp session error $errmsg" 
	return -1
    }

    set proc_config_strg ""
    $handle walk x "$D_TABLE" {
	set idx [lindex [lindex $x 0] 2]
	set host [lindex [lindex $x 1] 2]
	if { $host == $env(SUNW_HOSTNAME) } {
	    # Got our domain entry
	    set proc_config_strg [lindex [$handle get domainProcConfig.$idx] 0]

	    # The procconfig data looks like this:
	    # {1.3.6.1.4.1.34.2.1.3.1.9.0 {OCTET STRING}
	    # 4.3C-4.2C-4.1C-4.0C-6.3C-6.2C-6.1C-6.0C}
	    set proc_config_strg [lindex $proc_config_strg 2] 
	    break
	}
    }
    $handle destroy

    if { [string compare $proc_config_strg ""] == 0 } {
	hostint_logMsg "ERROR: can't find processors for $env(SUNW_HOSTNAME)"
        return $proc_num
    }

    # The proc config item format is b.pS, where:
    # b is board number, p is proc number, and S is proc state as follows:
    # A - arch. missing		B - black listed
    # C - crunched		F - tested & failed
    # G - good			M - missing
    # R - redlisted		U - good but not configured
    set proc_to_check [expr $proc_num % 4]
    set proc_config_list [split $proc_config_strg -]
    set first_good_proc -1 

    foreach i $proc_config_list {
	set b 0
	set p 0
	set s "R"
	scan $i "%d.%d%s" b p s

	# Only use proc with status "G"
	if { $board == $b } {
	    if { $s == "G" } {
		if { $proc_to_check == $p } {
		    # the proc we are interestd in is good
		    return $proc_num
	        } else {
		    # This is the first proc on the board that is good,
		    # save it in case we need to use it instead
		    if { $first_good_proc == -1 } {
			set first_good_proc [expr $b*4+$p]
		    }
		}
	    }
	}
    }
    if { $first_good_proc == -1 } {
	# Can't find a good proc on the board.
	hostint_logMsg "ERROR: cpu $proc_num is not good and no other good proc on board $board is found." 
    }
    if { $arg_v_specified == 1 } {
	# TRANSLATION_NOTE - %d is cpu number
	set fmt [gettext {Selected next good cpu (%d) to panic}]
	puts [format "$fmt" "$first_good_proc"]
    }
    return $first_good_proc
}

#
# proc get_another_cpu
# Returns -1 if error or no second proc available; otherwise, another proc
#            other than the boot proc is returned.
#
proc get_another_cpu { boot_proc } {
    global env
    global arg_v_specified
    global num_sysbrds
    global sysbrd_list_strg
    global boot_proc_board
    set num_procs_in_boot_brd 0
    set num_procs_in_another_brd 0
    set bootproc_list_strg ""
    set non_bootproc_list_strg ""
    set boot_brd_proc_list {}
    set another_brd_proc_list {}
    set another_brd 0 
    set SYSBRD_TABLE	"sysBrdGenNum sysBrdGenNumProc sysBrdGenProcList"

    # use the MIB to get another proc. Use default community: public
    # The proc returned is dependent on the domain configuration:
    # - Single board/single proc: nothing to try
    # - Single board/multi-proc: try boot_proc+2 or boot_proc+1
    # - Multi-board domain: try boot_proc+4 (actually proc on same slot on
    #   another board), or first good proc on anther board
    if { [catch { set handle [snmp session] } errmsg] } {
	hostint_logMsg "ERROR: snmp session error $errmsg" 
	return -1
    }
    set boot_proc_board [expr $boot_proc/4]
    set sysbrd_list [ split $sysbrd_list_strg - ]
    # This snmp walk will find two boards: one containing the boot
    # proc and one does not 
    $handle walk x "$SYSBRD_TABLE" {
	set brd [lindex [lindex $x 0] 2]
	if { $brd == $boot_proc_board } {
	    # Got the board containing the boot proc
	    set num_procs_in_boot_brd [lindex [lindex $x 1] 2]
	    set bootproc_list_strg [lindex [lindex $x 2] 2]
	    set boot_brd_proc_list [ split $bootproc_list_strg - ]

	    if { $num_sysbrds == 1 } {
		break
	    }

            # non_bootproc_list_strg not empty means already got the
            # list of procs from the non boot board
            if { [string length $non_bootproc_list_strg] != 0 } {
		break
	    }
	} else {
	    # Got a non-bootproc board, make sure it's in the sysbrd list
	    # for the domain
	    if { [lsearch -exact $sysbrd_list $brd] != -1 } {
		# Got our non-boot board; make sure it is not proc-less
		set num_procs_in_another_brd [lindex [lindex $x 1] 2]
		if { $num_procs_in_another_brd > 0 } {
		    set another_brd [expr $brd]
		    set non_bootproc_list_strg [lindex [lindex $x 2] 2]
	    	    set another_brd_proc_list [ split $non_bootproc_list_strg - ]
                    # bootproc_list_strg not empty means already got the
                    # list of procs from the boot board
                    if { [string length $bootproc_list_strg] != 0 } {
			break
		    }
		}
	    }
	}
    }
    $handle destroy
    
    if { $num_sysbrds == 1 } {
	if { $num_procs_in_boot_brd == 1 } {
	    # single proc on single board domain, can't try another proc
	    return -1
	}

	if { [string compare $bootproc_list_strg ""] == 0 } {
	    hostint_logMsg "ERROR: can't find boot processor on board $boot_proc_board for $env(SUNW_HOSTNAME)"
	    return -1
	}

	# Get another proc from the board; try the most likely config 1st.
	# In a 2-proc board, 0 and 2 are populated (by Manufacturing) and
	# encouraged in training classes.
	if { [lsearch -exact $boot_brd_proc_list [expr $boot_proc+2] ] != -1 } {
	    set next_proc [expr $boot_proc+2]
	} else if { [lsearch -exact $boot_brd_proc_list [expr $boot_proc+1] ] != -1 } {
	    set next_proc [expr $boot_proc+1]
	} else if { [lsearch -exact $boot_brd_proc_list [expr $boot_proc+3] ] != -1 } {
	    set next_proc [expr $boot_proc+3]
	} else {
	    # Something is really wrong if we get here
	    if { $arg_v_specified == 1 } {
		# TRANSLATION_NOTE - %d is boot processor 
		# TRANSLATION_NOTE - %s is processor list from the MIB 
		puts [format [gettext "Cannot determine next cpu to panic: boot cpu %d; cpu list: %s"] "$boot_proc" "$bootproc_list_strg"]
	    }
	    hostint_logMsg "ERROR: cannot determine next cpu to panic: boot cpu $cpu; cpu list: $bootproc_list_strg"
	    return -1
	}
	set next_cpu [check_proc_good $boot_proc_board $next_proc]
    } else {
	# Multi-board domain; get the next proc from another board.
	if { [string compare $non_bootproc_list_strg ""] == 0 } {
	    # Possibly the other boards don't have any procs
	    hostint_logMsg "ERROR: can't find processors on board $another_brd for $env(SUNW_HOSTNAME)"
	    return -1 
	}

	# Try to get the proc installed in the same slot number as the
	# boot_proc (i.e., 0, 1, 2, or 3).
	set boot_proc_slot [expr $boot_proc % 4]

	# If the number of procs in the boot proc board and this board are
	# the same, then assume the procs are arranged in the same order.
	if { $num_procs_in_boot_brd == $num_procs_in_another_brd } {
	    set next_proc [lindex $another_brd_proc_list $boot_proc_slot]
	} else {
	    # use first proc on the board
	    set next_proc [lindex $another_brd_proc_list 0]
	}
	set next_cpu [check_proc_good $another_brd $next_proc]
    }  # else multi-board domain

    if { $next_cpu == -1 } {
	# Can't get a good cpu to try, message already logged in
	# proc check_proc_good
	return -1
    }

    if { $arg_v_specified == 1 } {
	# TRANSLATION_NOTE - %d is processor number 
	puts [format [gettext "Next cpu to panic is %d"] "$next_cpu"]
    }
    return $next_cpu
}

#
# proc hostint
#
proc hostint args {
    global env
    global arg_p_specified
    global arg_v_specified
    global num_sysbrds
    global sysbrd_list_strg
    set D_TABLE	"domainName domainBootProc domainNumSysBrd domainSysBrdList"
    set valid_args	"-vp:"

    #
    # check to see if HOST_NAME is set; otherwise log err msg and return
    #
    if {[info exists env(SUNW_HOSTNAME)] == 0} {
	puts stderr [gettext "hostint: SUNW_HOSTNAME undefined. Cannot proceed"]
	return 1
    }

    #
    # get the command line arguments if there are any
    #
    if { [catch {CLA_Parse Args $args $valid_args} errmsg] } {
	# TRANSLATION_NOTE - %s is an error message
	puts [format [gettext "Error: %s"] "$errmsg"]
	hostint_usage
	return 1
    }
    if { [info exists Args(p)] } {
	set arg_p_specified 1
    }
    if { [info exists Args(v)] } {
	set arg_v_specified 1
    }

    #
    # get the proc targeted command argument.
    # if no argument given, get the boot proc number
    #
    if { $arg_p_specified == 1 } {
	set cpu $Args(p)
    } else {
	# use the MIB to get the bootproc. Use default community: public
	if { [catch { set handle [snmp session] } errmsg] } {
	    puts [format [gettext "Error: %s"] "$errmsg"]
	    return 1
	}
	$handle walk x "$D_TABLE" {
	    set host [lindex [lindex $x 0] 2]
	    if { $host == $env(SUNW_HOSTNAME) } {
		# Got our domain name
		set cpu [lindex [lindex $x 1] 2]
		# Sets the num_sysbrds and sysbrd_list_strg globals for
		# later use
		set num_sysbrds [lindex [lindex $x 2] 2]
		set sysbrd_list_strg [lindex [lindex $x 3] 2]
		break
	    }
	}
	$handle destroy
    }

    if { ($cpu < 0) || ($cpu > 63) } {
        if { $arg_p_specified == 1 } {
	    # TRANSLATION_NOTE - %d is a processor number
	    puts [format [gettext "Invalid proc number: %d"] "$cpu"]
	    hostint_logMsg "Invalid proc number: $cpu"
	    return 1
	} else {
	    # TRANSLATION_NOTE - %d is a processor number
	    puts [format [gettext "Invalid proc number obtained from MIB: %d"] "$cpu"]
	    hostint_logMsg "Invalid proc number obtained from MIB: $cpu"
	    return 1
	}
    }
    set rc1 [panic_the_host "$cpu"]
    if { $rc1 == 1 } {
	# sigbcmd panic failed; try another cpu if -p was not specified1
	if { $arg_p_specified == 0 } {
	    set cpu2 [get_another_cpu "$cpu"]
	    set rc2 [panic_the_host "$cpu2"]
	    if { $rc2 == 1 } {
		# sigbcmd panic failed 2nd time; next step is hostreset
		return 1
	    }
	}
    }
    return 0
}
