#!/usr/bin/python2.4

"""
Upgrade the agent on multiple servers.

Debugging (--debug) info and final status prints to stderr,
the rest to stdout.

The main upgrade algorithm is implemented in upgrade_agent().
"""

import sys, os
import re
import logging
import gettext
import locale
import getopt
import codecs
import time
import xml.dom.minidom
from datetime import datetime, timedelta

# Adjust search path to include rest of n1sh files.
sys.path.append(os.path.join(sys.path[0], 'cli'))
import executor
import n1shmain

ALLOWED_LANGUAGES = ('en',)  # This script understands only english.

RE_JOB_STARTED = re.compile(r'Job "(\d+)" started.')

JOB_DONE_SUCCESS = ('Completed',)
JOB_DONE_ERROR = ('Warning', 'Error', 'Stopped', 'Timed Out',)

PAUSE_SEC = 5

SERVER_DETAILS_DIR = '/n1gc-setup/agentupgrade/'

# Ugly, ugly, ugly mapping of xml labels to threshold keys.  Ugly.
# The '_' gets replaced with '.' before being executed.
THRESHOLD_KEYS = {
    'System Load (1 min average)'  : 'cpustats_loadavg1min',
    'System Load (5 min average)'  : 'cpustats_loadavg5min',
    'System Load (15 min average)' : 'cpustats_loadavg15min',
    'CPU Usage'                    : 'cpustats_pctusage',
    'CPU Idle'                     : 'cpustats_pctidle',

    'Memory Free'                  : 'memusage_pctmemfree',
    'Memory Used'                  : 'memusage_pctmemused',
    'Physical Memory Free'         : 'memusage_mbmemfree',
    'Physical Memory Used'         : 'memusage_mbmemused',

    'Swap Space Used (%)'          : 'memusage_pctswapused',

}
FILESYSTEM_KEYS = {
    'File System Used Space (%)'   : 'fsusage_pctused',
}


class IgnoreServerError(Exception):
    """Server is unavailable for upgrade."""

class ServerUpgradeError(Exception):
    """Problem upgrading server."""

class JobExecuteError(Exception):
    """Job didn't start or finished unsuccessfully."""


def main((cout, cerr), argv):
    pargs = parse_args(cerr, argv)
    if not pargs:
        usage(cerr)
        sys.exit(-1)

    servernames, group, argline, filterline, loglevel, role = pargs

    # curr_handler will need to be removed later when the encoded stream
    # is determined
    curr_handler = init_logger(cerr, loglevel)

    terminal_encoding = init_locale()
    session = n1shmain.Session(role, 'xml', terminal_encoding)

    writer = codecs.getwriter(session.encoding)
    cout = writer(cout, 'replace')
    cerr = writer(cerr, 'replace')

    # Reset handler to encoded cerr.
    init_logger(cerr, loglevel, curr_handler)

    logging.info('stdin, stdout and stderr set to "%s" encoding.'
                 % session.encoding)
    logging.info('servers=%s; group="%s"; filters="%s"; role="%s"'
                  % (servernames, group, filterline, role))

    status = 0
    cacao = executor.CacaoExecutor()
    errors = []
    successes = []

    try:
        try:
            cacao.start(session)
            allservers = []
            servers = {}
            #if servernames:
            #    servers = parse_result_servers(
            #                cacao.execute('show server'))
            if group:
                servers = parse_result_servers(
                            cacao.execute('show group %s' % group), errors)
            else:
                servers = parse_result_servers(
                            #cacao.execute('show server %s hardwarehealth=good oshealth=good power=on' % filterline), errors)
                            # Default filters removed per Stephen Leung's
                            # recommendation.
                            cacao.execute('show server %s' % filterline), errors)

            if servernames:
                for k in servers.keys():
                    if k not in servernames:
                        del servers[k]

                for s in servernames:
                    if s not in servers:
                        errors.append('Server %s not found' % s)

            # Do work
            if not servers:
                print >>cerr, 'No matching healthy servers found.'
            else:
                for server in sorted(servers.values()):
                    try:
                        logging.info('Upgrading server "%s"' % (server,))
                        upgrade_agent((cout, cerr), cacao, server, argline,
                                      successes)
                    except IgnoreServerError, (reason):
                        error = '* Ignoring server "%s": %s' \
                                % (server.name, reason)
                        errors.append(error)
                        print >>cout, error
                    except ServerUpgradeError, (reason):
                        error = '* Error upgrading server "%s": %s' \
                                % (server.name, reason)
                        errors.append(error)
                        print >>cout, error

        finally:
            status = cacao.stop()
    except:
        logging.error('Unknown Error', exc_info=True)
        errors.append('See stacktrace (use --debug option)')

    print >>cerr
    print >>cerr, '*** Success:'
    for s in successes or ('None',):
        print >>cerr, s
    print >>cerr
    print >>cerr, '*** Errors encountered:'
    for e in errors or ('None',):
        print >>cerr, e

    logging.shutdown()
    return status


def usage(strm):
    print >>strm, 'Update server state from N1SM 1.1 to N1SM 1.2.\n'
    print >>strm, 'Usage:', os.path.basename(sys.argv[0]), '[--debug] [--role <role>]'
    #print >>strm, '\t[--agentsnmp <cred>] [--agentsnmpv3 <cred>]'
    #print >>strm, '\t[--agentssh <cred>][--agentip <ip>] '
    print >>strm, '\t(--all|<servers>|--group <group>|[--f_power <power>]'
    print >>strm, '\t[--f_hardwarehealth <hardwarehealth>] [--f_oshealth <oshealth>]'
    #print >>strm, '\t[--f_ip <ip>] [--f_jobcount (0|1)] [--f_runningos <os>]'
    print >>strm, '\t[--f_model <model>] [--f_name <name>])'
    print >>strm
    print >>strm, '\t--debug    Show additional status information'
    print >>strm, '\t--role     Specify session role for execution'
    print >>strm
    #print >>strm, '\t--agentip       Specify IP address used for OS monitoring'
    #print >>strm, '\t--agentsnmp     Specify SNMP credentials for OS monitoring'
    #print >>strm, '\t--agentsnmpv3   Specify SNMP V3 credentials for OS monitoring'
    #print >>strm, '\t--agentssh      Specify SSH credentials used for OS monitoring'
    #print >>strm
    #print >>strm, '\t--all      All healthy, powered on servers'
    print >>strm, '\t--all      All servers'
    print >>strm, '\t<servers>  List of space separated servers'
    print >>strm, '\t--group    Group name'
    print >>strm, '\t--f_*      Filter on property'


def parse_args(cerr, argv):
    """
    Return (<list of servers>, arguments, loglevel, role) or None if
    there's a problem.
    """
    try:
        opts, args = getopt.gnu_getopt(argv, '',
                ['agentip=', 'agentsnmp=',
                 'agentsnmpv3=', 'agentssh=',
                 'debug', 'role=',
                 'all',
                 'f_hardwarehealth=', 'f_oshealth=', 'f_power=',
                 #'f_ip=', 'f_jobcount=', 'f_runningos=',
                 'f_model=', 'f_name=', 'group='])
    except getopt.GetoptError, e:
        print >>cerr, e
        print >>cerr
        return

    loglevel = sys.maxint
    argline = []
    filterline = []
    allservers = False
    role = ''
    group = ''
    for o, v in opts:
        if o in ('--debug',):
            loglevel = logging.NOTSET
        elif o in ('--role',):
            role = v
        elif o in ('--all',):
            allservers = True
        elif o in ('--agentip', '--agentsnmp', '--agentsnmpv3', '--agentssh'):
            argline.append('%s=%s' % (o[2:], v))
        elif o[:4].startswith('--f_'):
            filterline.append('%s=%s' % (o[4:], v))
        elif o in ('--group',):
            group = v

    # Run some checks

    # Make sure only one of allservers, args, group, or filterline is set.
    if reduce(lambda c, a: c + (a and 1 or 0),
              (allservers, args, group, filterline), 0) != 1:
        print >>cerr, 'Exactly one of --all, <servers>, --group, or --f_* must be set.'
        print >>cerr
        return

    return args, group, ' '.join(argline), ' '.join(filterline), \
           loglevel, role


def init_logger(strm, lvl, old_handler=None):
    """Configure logger.  Remove the old handler, if there was one."""
    logger = logging.getLogger()
    handler = logging.StreamHandler(strm)
    formatter = logging.Formatter('[%(levelname)8s] %(message)s')
    handler.setFormatter(formatter)
    if old_handler:
        logger.removeHandler(old_handler)
    logger.addHandler(handler)
    logger.setLevel(lvl)

    return handler


def init_locale():
    """Make sure the message catalog for gettext is configured correctly."""
    # Make sure locale is set correctly.
    try:
        loc = locale.setlocale(locale.LC_ALL, '')
        try:
            lang, enc = loc.split('.')
        except ValueError:
            lang = loc
            enc = ''
        enc = enc.split('@')[0]
        if lang.split('_')[0] not in ALLOWED_LANGUAGES:
            # Set to a default locale if none set.  Let system determine
            # the default encoding.
            def_lang = 'en_US'
            #if enc:
            #    def_lang += '.' + enc

            logging.debug('Locale "%s" not in %s.  Using "%s".'
                         % (loc, str(ALLOWED_LANGUAGES), def_lang))
            locale.setlocale(locale.LC_ALL, def_lang)

    except locale.Error, e:
        logging.error('%s: %s' % (e.__class__, str(e)))
        print 'The current locale is not supported.'
        sys.exit(n1shmain.CODE_INTERNAL_ERROR)

    langvar = 'LANGUAGE'
    try:
        l = locale.getlocale(locale.LC_ALL)[0]
        language = os.environ[langvar].split('.')[0]
        logging.debug('%s = %s; should be "%s"' % (langvar, language, l))
        if language != l:
            raise locale.Error  # Borrow so we don't need our own exception.
    except (KeyError, locale.Error):
        # Use the current locale (which may be from default) to
        # set 'langvar' environment variable for the duration of this run.
        l = locale.getlocale(locale.LC_ALL)
        os.environ[langvar] = '%s' % (l[0])
        logging.debug('Setting default %s = "%s"'
                     % (langvar, os.environ[langvar]))


    # Note:  If using symlinks for n1sh.py, the "sys.path[0]" trick may not
    #        work.  Use the commented out line (hardcoded path) instead.
    gettext.bindtextdomain('n1sh', os.path.join(sys.path[0], 'cli/locale'))
    #gettext.bindtextdomain('n1sh', '/opt/sun/n1gc/bin/cli/locale')
    gettext.textdomain('n1sh')

    return enc


def upgrade_agent((cout, cerr), cacao, server, args, successes):
    """Upgrade the server agent."""
    # Make sure system is up and healthy.
    # Health and power checks removed per Stephen Leung's recommendation.
    #if server.power not in ('On',):
    #    raise IgnoreServerError('power is off')
    #if server.oshealth not in ('Good',) or server.hwhealth not in ('Good',):
    #    raise IgnoreServerError('not healthy')

    # Upgrade!
    #cacao.execute = lambda c: executor.Result('')
    print >>cout, 'Server', server.name
    try:
        #print server
        if server.osmonitor in ('Yes',):
            print >>cout, '    replacing osmonitor'
            cmd = 'add server %s feature=osmonitor upgrade=true %s' \
                  % (quote(server.name), args)
            print >>cout, '      %s' % cmd
            execute_job(cout, cacao, cmd)
        elif server.osbasemgmt in ('Yes',):
            print >>cout, '    replacing basemanagement'
            cmd = 'add server %s feature=basemanagement upgrade=true %s' \
                  % (quote(server.name), args)
            print >>cout, '      %s' % cmd
            execute_job(cout, cacao, cmd)

        if server.hwmonitor in ('Enabled',):
            print >>cout, '    enabling monitoring'
            cmd = 'set server %s monitored=true' % quote(server.name)
            print >>cout, '      %s' % cmd
            result = cacao.execute('<<text %s' % cmd)
            if result.status != 0:
                logging.warning(result.output)
                raise ServerUpgradeError('(%s) returned %d: %s'
                                         % (cmd, result.status, result.output))

        print >>cout, '    resetting thresholds...'
        for cmd in make_threshold_cmd(server.name, server.thresholds):
            for icount in range(1, 25):
                print >>cout, '      %s' % cmd
                result = cacao.execute('<<text %s' % cmd)
                if result.status == 0:
                    break;
                time.sleep(5)
            if result.status != 0:
                raise ServerUpgradeError('(%s) returned %d: %s'
                                         % (cmd, result.status, result.output))

    except JobExecuteError, e:
        raise ServerUpgradeError(str(e))

    successes.append('%s upgraded successfully' % quote(server.name))

def make_threshold_cmd(sname, server_thresholds):
    """Generate "set server <server> threshold ..." commands."""
    if not server_thresholds:
        return

    for a in sorted(THRESHOLD_KEYS.values()):
        thresholds = []
        w, c = getattr(server_thresholds, a)
        if w:
            thresholds.append('warning%s=%s'
                              % (((w[0] == '<') and 'low' or 'high'), w[1:]))
        if c:
            thresholds.append('critical%s=%s'
                              % (((c[0] == '<') and 'low' or 'high'), c[1:]))
        if not thresholds:
            continue

        yield ' '.join(['set server %s threshold %s'
                        % (quote(sname), a.replace('_', '.'))]
                       + thresholds)

    for fs in server_thresholds.filesystems:
        for a in sorted(FILESYSTEM_KEYS.values()):
            fsthresholds = []
            w, c = getattr(fs, a)
            if w:
                fsthresholds.append('warning%s=%s'
                                  % (((w[0] == '<') and 'low' or 'high'), w[1:]))
            if c:
                fsthresholds.append('critical%s=%s'
                                  % (((c[0] == '<') and 'low' or 'high'), c[1:]))
            if not fsthresholds:
                continue

            yield ' '.join(['set server %s filesystem %s threshold %s'
                            % (quote(sname), quote(fs.dir), a.replace('_', '.'))]
                           + fsthresholds)




# XML results
class Base(object):
    """Some default features."""
    def __str__(self):
        return self.__class__.__name__ + " " + str(vars(self))


class Job(Base):
    """Salient job data."""
    def __init__(self, jobid, status):
        self.id = jobid
        self.status = status

    def __cmp__(self, o):
        return cmp(int(self.id), int(o.id))


class Server(Base):
    """Salient server data."""
    def __init__(self, name, power, osbasemgmt, osmonitor,
                 oshealth, hwhealth, monitor, thresholds=None):
        self.name = name
        self.power = power
        self.osbasemgmt = osbasemgmt
        self.osmonitor = osmonitor
        self.oshealth = oshealth
        self.hwhealth = hwhealth
        self.hwmonitor = monitor
        self.thresholds = thresholds

    def __cmp__(self, o):
        return cmp(self.name, o.name)


class ServerThresholds(Base):
    """Salient server data."""
    def __init__(self, name):
        """ # All of these will be set based on the values of THRESHOLD_KEYS.
        self.cpustats_loadavg1min = [None, None]
        self.cpustats_loadavg5min = [None, None]
        self.cpustats_loadavg15min = [None, None]
        self.cpustats_pctusage = [None, None]
        self.cpustats_pctidle = [None, None]

        self.memusage_mbmemfree = [None, None]
        self.memusage_mbmemused = [None, None]

        self.memusage_pctmemfree = [None, None]
        self.memusage_pctmemused = [None, None]

        self.memusage_kbswapused = [None, None]
        """

        self.filesystems = []

    class Filesystem(Base):
        def __init__(self, dir):
            self.dir = dir
            """ # All of these will be set based on the values of FILESYSTEM_KEYS.
            self.fsusage_pctused = [None, None]
            self.fsusage_kbspacefree = [None, None]
            """

        def __cmp__(self, o):
            return cmp(self.dir, o.dir)


def parse_result_jobs(result):
    """Return map of {"id" => Job()} from output xml."""
    alljobs = {}

    #print result
    if result.status == 0:
        dom = xml.dom.minidom.parseString(result.output)
        for job in dom.getElementsByTagName('job'):
            jid = get_text_from_node(job, 'id')
            jstatus = get_text_from_node(job, 'job_status')
            alljobs[jid] = Job(jid, jstatus)

    return alljobs


def parse_result_servers(result, errors):
    """Return map of {"name" => Server()} from output xml."""
    allservers = {}

    #print result
    if result.status == 0:
        dom = xml.dom.minidom.parseString(result.output.encode('utf-8'))
        for server in dom.getElementsByTagName('server'):
            sname = get_text_from_node(server, 'name')
            spower = get_text_from_node(server, 'power')  # "On"
            sosbase = get_text_from_node(server, 'OsBasemgmtSupported') # "Yes"
            sosmonitor = get_text_from_node(server, 'OsMonitoringSupported') # "Yes"
            soshealth = get_text_from_node(server, 'OSHealth')   # "Good"
            shwhealth = get_text_from_node(server, 'hardwareHealth')   # "Good"
            smonitor = get_text_from_node(server, 'monitorstate') # "Enabled"

            thresholds = None
            details_file = os.path.join(SERVER_DETAILS_DIR, sname)
            if os.path.exists(details_file):
                dom = None
                try:
                    dom = xml.dom.minidom.parse(file(details_file, 'r'))
                    thresholds = parse_server_details(dom)
                except:
                    logging.error('Problem opening server details for %s'
                                  % (quote(sname),), exc_info=True)
                    type, value = sys.exc_info()[:2]
                    errors.append('No server details for "%s": (%s) %s'
                                  % (sname, type, value))

            allservers[sname] = Server(sname, spower, sosbase,
                                       sosmonitor, soshealth, shwhealth,
                                       smonitor, thresholds)

    return allservers


def parse_server_details(dom):
    """
    Return map of ServerThresholds() from output xml or None.

    ServerThreshold will get filled with as many attributes as possible
    from THRESHOLD_KEYS and FILESYSTEM_KEYS.
    """
    server_det = dom.getElementsByTagName('server')[0]
    sname = get_text_from_node(server_det, 'name')
    thresholds = ServerThresholds(sname)
    for monitor in dom.getElementsByTagName('monitoros'):
        mname = get_text_from_node(monitor, 'monitorName')
        if mname not in THRESHOLD_KEYS:
            continue
        mkey = THRESHOLD_KEYS[mname]
        if not hasattr(thresholds, mkey):
            setattr(thresholds, mkey, [None, None])

        warn = get_text_from_node(monitor, 'warningThreshold').split(' ', 1)[0].rstrip('%')
        if warn not in ('-',):

            getattr(thresholds, mkey)[0] = warn

        crit = get_text_from_node(monitor, 'criticalThreshold').split(' ', 1)[0].rstrip('%')
        if crit not in ('-',):
            getattr(thresholds, mkey)[1] = crit

    for filesys in dom.getElementsByTagName('filesystem'):
        #fname = get_text_from_node(filesys, 'thresholdname')
        fname = get_text_from_node(filesys, 'name')
        #if fname not in FILESYSTEM_KEYS:
        #    continue
        #fthreshold = ServerThresholds.Filesystem(get_text_from_node(filesys, 'name'))
        fthreshold = ServerThresholds.Filesystem(fname)

        # Turns out N1SM 1.1 didn't have the string, and only had one key.
        fkey = 'fsusage_pctused' #FILESYSTEM_KEYS[fname]
        if not hasattr(fthreshold, fkey):
            setattr(fthreshold, fkey, [None, None])

        warn = get_text_from_node(filesys, 'usagewarning').rstrip('%')
        if warn not in ('-',):
            getattr(fthreshold, fkey)[0] = warn

        crit = get_text_from_node(filesys, 'usagecritical').rstrip('%')
        if crit not in ('-',):
            getattr(fthreshold, fkey)[1] = crit

        thresholds.filesystems.append(fthreshold)

    return thresholds


# helpers
def get_text_from_node(dom, n):
    """Return the text value of the first node named "n"."""
    return get_text(dom.getElementsByTagName(n)[0].childNodes)


def get_text(nodelist):
    """Return all the gathered text from node."""
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc


def get_job(cacao, id, now):
    """Return job with id "id"."""
    #result = cacao.execute('show job startafter %s' % now)
    result = cacao.execute('show job')
    if result.status != 0:
        logging.warning(result.output)
        raise JobExecuteError('"show job" failed')
    return parse_result_jobs(result)[id]


def wait_for_job(strm, cacao, id, now):
    """Return whether job completed successfully or failed."""
    strm.write('...waiting for job %s.' % (id))
    strm.flush()
    while True:
        job = get_job(cacao, id, now)
        if job.status in JOB_DONE_SUCCESS + JOB_DONE_ERROR:
            print >>strm
            return job.status

        # Wait a bit
        for s in range(1, PAUSE_SEC):
            time.sleep(1)
            strm.write('.')
            strm.flush()


def execute_job(strm, cacao, cmd):
    """
    Execute command and wait for job to finish.

    Raise JobExecuteError if job failed.
    """
    now = (datetime.utcnow() - timedelta(seconds=60)).strftime('%Y-%m-%dT%H:%M:%SZ')
    #print '[[[ "%s" ]]]' % now
    print >>strm, '        starting job',
    strm.flush()
    result = cacao.execute(cmd)
    if result.status == 0:
        jobid = RE_JOB_STARTED.search(result.output.strip()).group(1)
        jstate = wait_for_job(strm, cacao, jobid, now)
        if jstate not in JOB_DONE_SUCCESS:
            raise JobExecuteError('Job did not complete successfully; jobstate=%s' % jstate)
    else:
        logging.warning(result.output)
        raise JobExecuteError('Job could not start; %s' % cmd)


def quote(s):
    """Return quoted string s."""
    return u'"%s"' % s.replace('\\', '\\\\').replace('"', '\\"')



if __name__ == '__main__':
    status = main((sys.stdout, sys.stderr), sys.argv[1:])
    sys.exit(status)




