#!/bin/bash
## sunONE Calendar Server Watch Dog 0.2
## Created: 2003:07:29.15.52.29
##     checks calendar processes are alive
## Modified: 2003:07:31.09.17.31
##     monitors txn logs
##     monitors csdb available disk space
##     watchdog log rotation
##     uses calendar ics.conf configuration file
## Modified: 2003:08:01.08.02.19
##     monitors ldap connectivity
##     monitors its own status (exiting abnormally sends a warning email) < defunct
## Modified: 2003:08:04.08.23.27
##     monitors S1CS log files for error messages
## Modified: 2003:11:20.18.15.24
##     uses cstool to ping on http port to make sure that cshttp accepts incoming connections
##
## Purpose:
##   Keeps an eye on the S1CS processes and throws an
##   alarm when something goes wrong.
## Usage:
##   No CL parameters launches the daemon
##   with -k option, kills the daemon
##   with -c option, clears log and backs up

## trapping signals section
### logging Ctrl-C user termination (for CL options only of course)
trap 'myLog $LOG_FILE $CSWD_USERSTOP;exit -1' 2
### tagging -k user termination using this tool (SIGNAL 30)
#trap 'myExit' 30

## A simple debug helping function
###
### Purpose:
###   print messages on stdout depending on debuglevel in config file
###   on debug level 99, first myDebug call with the DBG_LEVEL=99 exits
### Expects:
###   [DBG_LEVEL] is the minimum debug level to display message
###   all other parameters are displayed in message
###   uses [CSWD_DBGLEVEL] global variable
### Returns:
###   no return values
myDebug ()
{
  DBG_LEVEL=$1; shift;
  DBG_MSG=$@
  if [ $CSWD_DBGLEVEL -ge $DBG_LEVEL ]; then
    echo $DBG_MSG
  fi
  if [ $DBG_LEVEL -eq 99 ]; then
    EXIT=1    ## preventing to send an email :)
    exit 0
  fi
}


## Simple config parameter reading function
###
### Purpose:
###   reads the value of given parameter in a given conf file
### Expects:
###   [CONF_PRM] is the string you're actually looking for in the config file
###   [CONF_NAME] is the name of the variable you want to return the value to
###   uses [CONF_FILE] global variable
### Returns:
###   no return value
myConf ()
{
  CONF_PRM=$1
  CONF_NAME=$2
  eval $CONF_NAME='"'`grep $CONF_PRM $CONF_FILE | grep -v '^!' | sed -n '$p'|sed 's/=/#%/'|sed 's/.*#% *//'|sed 's/ *$//'|tr -d '"'`'"'
  myDebug 4 "CONF_PRM_VALUE=$CONF_NAME["`eval echo '$'$CONF_NAME`"]"
}

## Usage display function
###
### Purpose:
###   displays a help message on watchdog bad call
### Expects:
###   no parameter
### Returns:
###   no return value
myUsage ()
{
  echo "usage: $0 [option]"
  echo "  [option] maybe one of:"
  echo "    -c clean and back-up log file"
  echo "    -k kill current running watchdog daemon"
  echo "  only one option accepted at a time"
}

## A simple log function
###
### Purpose:
###   appends a message in a log file
### Expects:
###   [FILENAME] is the log file name
###   All other parameters will appear in the logged string
### Returns:
###   no return value
myLog ()
{
  FILENAME=$1
  shift
  date '+%Y:%m:%d:%H.%M.%S - '"`echo "$@"`" >> $FILENAME
}


## A simple mail function
###
### Purpose:
###   sends a mail :)
### Expects:
###   [FRM] is the email address that appears in the from field
###   [DST] is the email address that appears in the to field
###   [SUB] is the email subject
###   All other parameter appears in the body as text
### Returns
###   no return value
myMail ()
{
  if [ $SENDMAIL -eq 1 ]; then
    FRM=$1; shift;
    DST=$1; shift;
    SUB=$1; shift;
    {
      echo "To: $DST"
      echo "From: $FRM"
      echo "Subject: $SUB"
      echo
      echo `date '+TIMESTAMP:%Y:%m:%d:%H.%M.%S'`
      echo "$@"
    } | /usr/lib/sendmail -oi -t
  fi
}

## A simple process monitoring function (logs and sends mail)
###
### Purpose:
###   checks if the status of a given process has changed since last loop
###   if yes, takes action consequently
###   if process has started, logs a message
###   if process has stopped, logs a message and sends an email
### Expects:
###   [process_name] is the name of the process to watched for
###   [wasrunning is] is the status of that process during the previous loop
###   uses global variables [LOG_FILE],[CSWD_EMAILSRC],[CSWD_EMAILDST]
### Returns:
###   no return value
myWatcher ()
{
  process_name=$1
  msg_prefix=`echo $1|tr [a-z] [A-Z]`
  wasrunning=$2
  up_log=`echo $msg_prefix`" IS NOW UP AND RUNNING"
  down_log=`echo $msg_prefix`" IS DOWN"
  down_mail=`echo $msg_prefix`" IS DOWN"
  isrunning=`ps -e|grep $process_name|wc -l|tr -d ' '`
  myDebug 1 "$msg_prefix IS[$isrunning] AND WAS[$wasrunning]"
  if [ $isrunning -eq 0 ] && [ $wasrunning -gt 0 ]; then
    myLog $LOG_FILE $down_log
    myMail $CSWD_EMAILSRC $CSWD_EMAILDST "ERROR:$msg_prefix(DOWN)" $down_mail
  fi
  if [ $isrunning -gt 0 ] && [ $wasrunning -eq 0 ]; then
    myLog $LOG_FILE $up_log
  fi
  eval $process_name"_status=$isrunning" # This line returns the status of [process_name]
}

## A simple disk monitoring function
###
### Purpose:
###    monitor if the occupied disk space is over
###    a given threshold, and takes action
###    if yes, logs a message and sends an email
### Expects:
###   [myDir] is the directory to check for disk occupation
###   [myThreshold] is the occupation threshold over which we send a warning
### Returns:
###   no return value
myDf ()
{
  myDir=$1
  myThreshold=$2
  myUsed=`df -k $1 | sed -n '$p' | sed 's/  */ /g'|cut -f5 -d' '|tr -d '%'`
  myDebug 1 "CSDB USED DISK[$myUsed%] AND LIMIT IS SET TO[$myThreshold]"
  if [ $myUsed -ge $myThreshold ]; then
    eval myMsg=$CSDB_ERR_MSG
    myLog $LOG_FILE $myMsg
    myMail $CSWD_EMAILSRC $CSWD_EMAILDST $CSDB_ERR_SUB $myMsg
  fi
}

## A simple txn log files check
###
### Purpose:
###   counts the number of txn files and sen
### Exepcts:
###   [myDir] the directory where the log files can be found
### Returns:
###   no return value
myTxnCheck ()
{
# if [ $CSDB_CIRCULAR = "yes" ]; then 
    DIR=$1
    if [ -z $DIR ]; then
      DIR='.'
    fi
    if [ -d $DIR ]; then 
      #LOG_NUMBER=`ls $DIR/log.* 2>/dev/null|wc -l|tr -d ' '`
      export LD_LIBRARY_PATH=../lib
      LOG_ALL=`../tools/unsupported/bin/db_archive -l -h $DIR| wc -l|tr -d ' '`
      LOG_PROCESSED=`../tools/unsupported/bin/db_archive -h $DIR| wc -l|tr -d ' '`
      LOG_NUMBER=$[$LOG_ALL-$LOG_PROCESSED]
      myDebug 1 "TXN LOGS[$LOG_NUMBER]"
      if [ $LOG_NUMBER -ge 2 ]; then
        eval myMsg=$TXN_ERR_MSG
        myLog $LOG_FILE $myMsg
        myMail $CSWD_EMAILSRC $CSWD_EMAILDST $TXN_ERR_SUB $myMsg
      fi
    fi
# fi
}

## A simple log monitor for the daemon purposes
###
### Purpose:
###   monitors the watchdog log file size
###   if size exceeds a given limit, then back-up log file
###   and clear it
### Expects:
###   [LOG_MAXSIZE] is the maximum size the log can get
### Returns:
###   no return value
myLogMon ()
{
  LOG_MAXSIZE=$1
  LOG_SIZE=`ls -l $LOG_FILE|sed 's/  */ /g'|cut -f5 -d' '`
  if [ $LOG_SIZE -ge $LOG_MAXSIZE ]; then
    $0 -c
    exit 0
  fi
}

## A simple process termination function
###
### Purpose:
###   Kills a process and delete the relative pid file
### Expects:
###   [myPidFile] is the pid file name
###   [mySignal] is the signal you want to send to your pid (goes 15 if nothing given)
### Returns:
###   no return value
myKill ()
{
  local myPidFile=$1
  if [ -r $myPidFile ]; then
    ## if the pid file exists and is readable, then use it!
    myPid=`cat $myPidFile`
    myDebug 4 "PID FILE FOUND AND PID IS[$myPid]"
  else
    ## In case the pid file has been removed ... never too precautious :)
    myPid=`ps -e | grep $0 |sed 's/^[ \t]*//'|cut -f1 -d' '|sort|sed -n '1p'`
    myDebug 4 "PID FILE NOT FOUND AND PID IS[$PID_NUMBER]"
    ## ok, what is that doing ?
    ## gets the pids of running instances of $0
    ## and sort the results to keep only the oldest process
    ## because this process has the same name so ps will return two processes
    ## the daemon and this one...
  fi
  if [ -n $2 ]; then
    mySignal=$2
  else
    mySignal=15
  fi
  local isMyPidRunning=`ps -p $myPid|sed '1d'|wc -l|tr -d ' '`
  myDebug 4 "myKill: Is my pid[$myPid] from[$myPidFile] running?[$isMyPidRunning]" 
  while [ $isMyPidRunning -gt 0 ]; do
    kill -$mySignal $myPid
    isMyPidRunning=`ps -p $myPid|sed '1d'|wc -l|tr -d ' '`
    myDebug 4 "myKill: Is my pid[$myPid] from[$myPidFile] running?[$isMyPidRunning]" 
  done 
  if [ -w $myPidFile ]; then 
    rm -f $myPidFile
  fi
}

## Watchdog clean termination function
###
### Purpose:
###   perform actions before actually exiting program
###   if in daemon mode, checks if termination has been
###   issued using this utility with -k option (normal termination)
###   or if it is abnormal
### Expects:
###   no parameters
###   uses [DAEMON] and [EXIT] global variables
###   uses [CSWD_EMAILSRC], [CSWD_EMAILDST], [CSWD_DOWN_ABNORMAL_SUB] and [CSWD_DOWN_ABNORMAL_MSG]
###   uses [LOG_FILE] global variable to log messages
###   to send the email
### Returns:
###   no return value
myExit ()
{
  if [ $DAEMON -eq 1 ]; then
    echo "DAEMON IS SET"
    if [ $FATHER -eq 1 ]; then
      echo "FATHER IS SET"
      myKill `cat $PID_FILE.son` 9
    fi
    if [ $EXIT -ne 1 ]; then
      myLog $LOG_FILE $CSWD_DOWN_ABNORMAL_MSG
      myMail $CSWD_EMAILSRC $CSWD_EMAILDST $CSWD_DOWN_ABNORMAL_SUB $CSWD_DOWN_ABNORMAL_MSG
    else
      myLog $LOG_FILE $CSWD_DOWN_NORMAL
    fi
  fi
  exit 0
}

## A simple Ldap monitor
###
### Purpose:
###   checks if ldap server is still up and running
### Expects:
###   no parameters
###   uses [CSWD_EMAILSRC] and [CSWD_EMAILDST] global variables to send a warning email
###   uses [LOG_FILE] global variable to log messages
### Returns:
###   no return values
myLdapCheck ()
{
  myDebug 2 "ldapsearch -h $LDAP_HOST -p $LDAP_PORT -D $LDAP_EUADM -w $LDAP_EUPWD -b $LDAP_BDN uid=$LDAP_UID"
  LDAP_ANSWERS=`/usr/bin/ldapsearch -h $LDAP_HOST -p $LDAP_PORT -D "$LDAP_EUADM" -w $LDAP_EUPWD -b $LDAP_BDN "uid=$LDAP_UID"|wc -l|tr -d ' '`
  myDebug 1 "LDAP_ANSWERS[$LDAP_ANSWERS]"
  if [ $LDAP_ANSWERS -eq 0 ]; then
    myLog $LOG_FILE $LDAP_ERR_MSG
    myMail $CSWD_EMAILSRC $CSWD_EMAILDST $LDAP_ERR_SUB $LDAP_ERR_MSG
  fi
}


##
###
### Purpose:
### Expects:
### Returns:

## A simple ping function
###
### Purpose:
###   Making sure that cshttpd accepts inconming connections
### Expects:
###   no parameters
###   uses [HTTP_ALIVE] global value to detect status change between two checks
### Returns:
###   no return values
myPing ()
{
  ## if HTTP_ALIVE is 0, that means we're just starting csmonitor
  if [ $HTTP_ALIVE -eq 0 ]; then
    ## we're then assuming that cshttp does NOT accept incoming connections
    ## until we actually test it
    HTTP_ALIVE=2
  fi
  ## if it works, then cstool sends only one line back
  ## if it doesn't, cstool sends two lines back
  RESULT=`./cstool ping http | wc -l | tr -d ' '`
  if [ $RESULT -ne $HTTP_ALIVE ]; then
    ## then it means that status has changed since last check
    if [ $RESULT -eq 2 ] &&  [ $HTTP_ALIVE -eq 1 ]; then
      ## then that means that doesn't accept connections anymore...
      ## log a message
      myLog $LOG_FILE $HTTP_PING_ERR
      ## and sens an email if possible
      myMail $CSWD_EMAILSRC $CSWD_EMAILDST $HTTP_PING_SUB $HTTP_PING_ERR
    fi 
    if [ $RESULT -eq 1 ] && [ $HTTP_ALIVE -eq 2 ]; then
      ## then it means that is NOW accepts connection but didn't last we checked
      ## just log a message
      myLog $LOG_FILE $HTTP_PING_OK
    fi
    ## let's save the status for next time
    HTTP_ALIVE=$RESULT
  fi
}

## A simple log explorer
###
### Purpose:
###   Checks a file periodically for new error messages
### Expects:
### Returns:
myLogXplorer ()
{
  myLogFiles=$1
  myErrorMsg=$2
  myProgramName=`echo $0|sed 's/\..*$//'`
  myLastTimeStamp=`cat $myProgramName.myLogFiles.lts`
}

## A simple sendmail test
###
### Purpose:
###   Tests that sendmail is actually there
### Expects:
###   [SM] is the variable to set
### Returns:
###   no return values
mySendmailTest ()
{
  SM=$1
  if [ -x '/usr/lib/sendmail' ]; then
    eval $SM=1
  fi
}
##################
## Main section ##
##################
DAEMON=0                   ## need this to know if we run in daemon mode or one shot mode
FATHER=0                   ## need to know if we are father daemon or son daemon
EXIT=0                     ## need this to know if termination is normal
SENDMAIL=0                 ## testing if we can use sendmail or not
HTTP_ALIVE=0               ## we will later test that cshttpd accepts incoming connections as it should
mySendmailTest SENDMAIL

BASE_FILE=`echo $0 | sed 's/\.sh//'`
PID_FILE="$BASE_FILE.pid"
## Getting configuration parameters from conf file
CSWD_DBGLEVEL=0
CONF_FILE="config/ics.conf"
if [ -r $CONF_FILE ]; then                 ## Let's check first if we find that conf file
  myConf logfile.logdir LOG_DIR
  myConf logfile.monitor.logname LOG_NAME
  if [ -n $LOG_DIR ]; then
    LOG_DIR=`echo $LOG_DIR/`
  else
    LOG_DIR="./"
  fi
  if [ -n $LOG_NAME ]; then
    LOG_FILE=`echo $LOG_DIR$LOG_NAME`
  else
    LOG_FILE=`echo $LOG_DIR$BASE_FILE.log`
  fi
  myConf service.monitor.dbglevel CSWD_DBGLEVEL
else
  echo "S1CS CSMONITOR ERROR: $CONF_FILE NOT READABLE"
  exit 2
fi

### MESSAGES SECTION
###
#### CSWD (CALENDAR SERVER WATCH DOG) MESSAGES
CSWD_LOGCLEANING="CSMONITOR: CLEANING LOG FILE"
CSWD_LOGCLEANED="CSMONITOR: LOG FILE CLEANED"
CSWD_DAEMON="CSMONITOR: LAUNCHING DAEMON"
CSWD_KILL="CSMONITOR: KILLING DAEMON"
CSWD_KILLED="CSMONITOR: DAEMON KILLED"
CSWD_UP="CSMONITOR: DAEMON STARTED"
CSWD_DOWN_NORMAL="CSMONITOR: DAEMON STOPPING BEAUTIFULLY"
CSWD_DOWN_ABNORMAL_MSG="CSMONITOR: DAEMON STOPPING ABNORMALLY"
CSWD_DOWN_ABNORMAL_SUB="CSMONITOR:DIED_ABNORMALY"
CSWD_USERSTOP="CSMONITOR: DAEMON INTERRUPTED BY USER"
####
#### LDAP MESSAGES
LDAP_ERR_SUB="CSMONITOR:LDAP_ERROR"
LDAP_ERR_MSG="LDAP IS NOT RESPONDING"
####
#### TXN MESSAGES
TXN_ERR_SUB="CSMONITOR:TXN_WARNING"
TXN_ERR_MSG='`echo "CSMONITOR FOUND [$LOG_NUMBER] TXN LOGS"`'
####
#### CSDB MESSAGES
CSDB_ERR_SUB="CSMONITOR:CSDB_WARNING"
CSDB_ERR_MSG='`echo "CSDB DIRECTORY IS ALMOST FULL[$myUsed%]"`'
####
#### HTTP MESSAGES
HTTP_PING_ERR="CSMONITOR: CSHTTPD DOES NOT ACCEPT CONNECTIONS ANYMORE ON PORT"
HTTP_PING_SUB="CSMONITOR:CSHTTPD_NO_PING_ANSWER"
HTTP_PING_OK="CSMONITOR: CSHTTPD NOW ACCEPTS CONNECTIONS ON PORT"
#### 
###
###END OF MESSAGE SECTION

### AUTO LAUNCH IN DAEMON MODE AND DETACH FROM TERMINAL
if [ $# -eq 0 ]; then
  #### Let's test if there ain't anotha daemon runnin', bro
  if [ -r $PID_FILE ]; then
    PID_NUMBER=`cat $PID_FILE`
    ALREADY_RUNNING=`ps -p $PID_NUMBER|sed '1d'|wc -l|tr -d ' '`
    myDebug 3 "ALREADY RUNNING [$PID_NUMBER]?[$ALREADY_RUNNING]"
    if [ $ALREADY_RUNNING -gt 0 ]; then
      echo "S1CS CSMONITOR WARNING: A MONITOR IS ALREADY RUNNING. QUITTING"
      exit 1
    fi
  fi
  myLog $LOG_FILE $CSWD_DAEMON
  $0 -d &
  echo $!>$PID_FILE
  disown
  exit 0
else
  getopts "cdhk" myOption
  case $myOption in
    c)  myLog $LOG_FILE $CSWD_LOGCLEANING
        mv $LOG_FILE $LOG_FILE.`date '+%Y-%m-%d'`
        myLog $LOG_FILE $CSWD_LOGCLEANED
        exit 0
        ;;
    d)  myLog $LOG_FILE $CSWD_UP
        echo $CSWD_UP
        ### Getting config parameter in config file
        myConf logfile.monitor.maxlogfilesize CSWD_LOGMAXSIZE
        myConf service.monitor.csdb.logthreshold CSDB_THRESHOLD
        myConf caldb.berkeleydb.homedir.path CSDB_DIR
        myConf caldb.berkeleydb.circularlogging CSDB_CIRCULAR
        myConf service.monitor.emailaddress.from CSWD_EMAILSRC
        myConf service.monitor.emailaddress.to CSWD_EMAILDST
        myConf service.monitor.loopsdelay CSWD_REFRESH
        myConf service.monitor.continuous CSWD_CONTINUOUS
        myConf service.http.port HTTP_PORT
        HTTP_PING_ERR=`echo $HTTP_PING_ERR [$HTTP_PORT]`
        HTTP_PING_OK=`echo $HTTP_PING_OK [$HTTP_PORT]`
        myConf local.ugldaphost LDAP_HOST
        myConf local.ugldapport LDAP_PORT
        myConf local.ugldapbasedn LDAP_BDN
        myConf local.enduseradmindn LDAP_EUADM
        myConf local.enduseradmincred LDAP_EUPWD
        myConf service.admin.calmaster.userid LDAP_UID
        DAEMON=1
        ;;
    k)  if [ -n $PID_NUMBER ]; then
          echo $CSWD_KILL
          myLog $LOG_FILE $CSWD_KILL
          myKill $PID_FILE 9
          myLog $LOG_FILE $CSWD_KILLED
          echo $CSWD_KILLED
        else
          echo "NO $0 DAEMON RUNNING"
        fi
        exit 0
        ;;
    h|\?) myUsage
        exit 0
        ;;
  esac
fi


### watch loop
#### initializing the status (running or not)
csadmind_status=`ps -e|grep csadmind|wc -l|tr -d ' '`
csnotify_status=`ps -e|grep csnotify|wc -l|tr -d ' '`
cshttpd_status=`ps -e|grep cshttpd|wc -l|tr -d ' '`
enpd_status=`ps -e|grep enpd|wc -l|tr -d ' '`
#### intitialization done

while [ 1 -eq 1 ]; do
  myWatcher csadmind $csadmind_status     ## let's watch is csadmind is running fine
  myWatcher csnotify $csnotify_status     ## let's watch csnotify
  myWatcher cshttpd $cshttpd_status       ## let's now watch cshttpd
  myWatcher enpd $enpd_status             ## finally, enpd
  myDf $CSDB_DIR $CSDB_THRESHOLD          ## Checking is there is still enough space available for db
  myTxnCheck $CSDB_DIR                    ## Checking the TXN Logs
  myLdapCheck                             ## Checking if LDAP is alive
  myPing                                  ## Checking if cshttpd accepts incoming connections
  myLogMon $CSWD_LOGMAXSIZE               ## Check the watchdog logfile size
  if [ $CSWD_CONTINUOUS -eq 0 ]; then     ## Do we want delay between loops ?
    sleep $CSWD_REFRESH                   ## if yes, let's wait for the delay set in the conf file
  fi
done
