package Rasagent;
use Repeat;
use Util;
use Debug;
use strict;
use Counter;
use TO;
use Catalog::Implementation;

sub startstop_cron {
  my($class, $start_cron, $inspectors) = @_;
 
  my $HOME = System->get_home();
  unlink "$HOME/System/cron_started";
  my $F = "/var/spool/cron/crontabs/root";
  my ($rc,$l);
  
  my $E0 = "0,5,10,15,20,25,30,35,40,45,50,55";
   if (-d "/opt/SUNWstade/sysbin") {
    $E0 = "0,10,20,30,40,50";
  }
  
  my $COMMENT = "# **** Storage Automated Diagnostic Environment ****\n";
  my $E = $E0 . " * * * * $HOME/bin/rasagent -c >> $HOME/log/cron.log 2>&1\n";
  my $I = "1,6,11,16,21,26,31,36,41,46,51,56 * * * * $HOME/bin/rasinspect -c >> $HOME/log/cron_inspect.log 2>&1\n";
  my $P = "0 3 * * * $HOME/sysbin/patch_scheduler -r >> $HOME/log/cron_patch.log 2>&1\n";
  
  if (open(W, ">/tmp/ras_crons")) {
    if (open(O, $F)) {
       while ($l = <O>) {
         if ($l !~ /$HOME\/bin\/rasagent / && 
	     $l !~ /Storage Automated Diagnostic Environment / && 
             $l !~ /$HOME\/bin\/rasinspect / &&
             $l !~ /sysbin\/patch_scheduler/  ) {
             print W $l;
          }
       }
    }
    if ($start_cron eq "C") {
      print W $COMMENT;
      print W $E;
      print W $I if ($inspectors eq "Y");
      print W $P if (-d "/opt/SUNWstade/sysbin");
    }else{
      print "\n\n";
      print "WARNING: By not enabling the Agent Cron, when the agent is turned on, it\n";
      print "         will not start until the Agent Cron is enabled by running\n";
      print "         ras_install.\n";
      print "\n\n";
    }
    
    close(O); close(W);
    open(O,"/usr/bin/crontab /tmp/ras_crons|");
    my $l = <O>; close(O);
    if ($l) {
       print "Crons update failed: $l \n";
    } else {
      if ($start_cron eq "C") {
         $rc = "  -> cron installed.\n";
      } else {
         $rc = "  -> cron entry removed.\n";
      }
    }
  } else {
    $rc = "Error: Cannot open /tmp/ras_crons: $!\n";
  }
  return $rc;
}

sub check_cron {
  my($class, $notifs, $PIDfile) = @_;

  my $renv = System->get_renv();
  my $cc = $class->cron_conflicts($PIDfile);

# Abort if the agent already aborted the last 3 times it ran.

  if ($cc >= 13 && Debug->level() < 2) {

    if ($cc == 13) {
      my $text = "Storage A.D.E process /opt/SUNWstade/bin/rasagent aborted 3 times in a row and is now shutdown permanently."; 
      Debug->print0($text);
      System->logger("user.error", $text);
      my $c1;
      my $text2 =<<EOF;
The Storage A.D.E agent on $renv->{hostname} failed to complete its run 
3 times in a row and has been shutdown permanently. 
This could be caused by running out of disk space in the DATA directory.
Ensure that the disk is not full in the SUNWstade/DATA directory.
Try running the agent manually with '/opt/SUNWstade/bin/rasagent -d3' to 
locate the problem. 
Running /opt/SUNWstade/bin/rasagent with -d2 or -d3 will reset the agent 
if the run completes normally.
EOF

      foreach my $notif (@$notifs) {
        Mail->mail($notif->{email}, "Storage_ADE",
                         "Agent on $renv->{hostname} aborting", $text2);
        last if ($c1++ > 10);
      }
      # create event
      $class->agentErrorEvent($text2);
    }
    Process->done("rasagent");
    exit(1);
  }

# Abort this agent if an agent is already running or the last agent is hung.
  if ($cc == 1) {
    Process->done("rasagent");
    exit(1);
  }
}


sub agentErrorEvent {
  my($class, $text) = @_;

  my $renv = System->get_renv();

  my $pdm = PDM->new({  dir => System->get_home() ."/DATA",
             renv => $renv ,
          });

  my($rep) = {};
  Agent::addIdentification({renv => $renv}, $rep);
  $rep = ReportContent->new($rep);
  my $key = "host:" . System->hostname();

  my $ev = CIM::Instance->new('NWS_Event', [
                  [ EventType   => 'agent.AlarmEvent'       ],
                  [ EventId     => PDM->getEventSequence()  ],
                  [ Severity    => 2  ],
                  [ Actionable  => 1  ],
                  [ Caption     => "agent.system_error"    ],
                  [ Target      => $key                    ],
                  [ TargetName  => System->hostname()      ],
                  [ Description => $text                   ],
                   ]);
  my $sd = Events->sourceDetector({ event => $ev , host => 1, rep => $rep});

  my $pertains = CIM::Instance->new('NWS_EventPertainsToSoftwareElement', [
                  [ Event         => $ev      ],
                  [ Element       => $sd->[0] ],
                  [ DiscoveryType => "D"      ],
                    ]);
  my $ed = Message->new( { id        => { deviceName => $renv->{hostname} },
                            instances => [$ev, @$sd, $pertains ],
                            state     => ['agent', 0],
                            severity  => 2});
  PDM->saveMessage($ed);

  PDM->saveStateLog();

  # Try calling providers
  my $eventid_list = PDM->getEventIdList(); 
  $pdm->storeMessages();
  Rasagent->processEvents($pdm, $eventid_list);  #Master/Slave/DataHost

}



sub cron_conflicts {
  my($class, $PIDfile) = @_;

  my($cnt, $l, $created, $PID, $pspid, @a);
  my $SIGTERM = 15;
  my($renv, $devices, $hosts, $notifs) = PDM::ConfigFile->read();
  my $email_address = $notifs->[0]{email} || $renv->{admin_email};
  my $device_cnt = $#$devices +1;
  my $host_cnt   = $#$hosts   +1;

  if (!-f $PIDfile) {
    Counter->clear('rasagent');
    return 0;
  }

  open(O,$PIDfile); 
  $PID = <O>; close(O);
  $cnt = 0;
  my $sig = $SIGTERM;
  my $repeat = Repeat->new("/tmp/ras_cron_conflicts", 60*60);

  if (!$PID) {
     Counter->clear('rasagent');
     $repeat->clear();
     return 0;
  }
  my $LB = Labels->read('rasagent');
  #              devices               slaves            
  my $WAIT = (600 * $device_cnt)  + (60 * ($host_cnt+1)) ;
  $WAIT   += 60*60 if ($renv->{solution} eq "N");  # HBAS

  my $last_command;
  while (1) {
     $cnt++;
     if ($cnt > 3) {
        $last_command = Util->readf(System->get_home() . "/DATA/last_run_command");
        Debug->err('FAILING_TO_KILL',$PID, $pspid . ", last command executed: $last_command");
        return 1;
     }
     $sig = 9 if ($cnt == 3);
     my ($pspid, $children) = Util->findProcesses($PID);

     if ($pspid eq $PID) {
       $created =  (stat($PIDfile))[9];
       if (!$created) {
         Counter->clear('rasagent');
         return 0;
       }
       if (time - $created > $WAIT) {
	 $last_command = Util->readf(System->get_home() . "/DATA/last_run_command");
         if ($repeat->read()) {
            require Mail;
            my $hn = System->hostname();
	    if($renv->{solution} ne "N"){
	      # This is a solution rack, get se/se2 name
	      foreach my $d (@$devices) {
	         if($d->{class} =~ /storage.se/){
		    $hn = $d->{name};
		    last;
		 }
	      }
	    }

            Mail->mail($email_address, "Storage_ADE", "CRON_CONFLICT on $hn", 
                       $LB->expand(cron_hung => $pspid, $hn, $last_command));
         }
         Debug->err(CRON_CONFLICT => 
	    "killing process ID: $pspid, last command executed was: $last_command");

         kill $sig, $pspid;
         kill $sig, @$children if ($#$children >= 0);
         sleep(1);
       } else {
         Debug->print('AGENT_RUNNING', $pspid) if (Debug->level());
         return 1;
       }
     #
     # NO pspid MEANS THAT THE LAST RUN ABORTED.
     } elsif (!$pspid) {
       Debug->print('PID' => "Remove File");
       my $cc = Counter->count('rasagent');
       my $MAX = 500;     # 500 x 5mins.
       if ($cc > $MAX) {
          Counter->set('rasagent', 3);
          $cc = 3;
       }
       return 10 + $cc if ($cc >= 3);

       unlink $PIDfile;
       return 0;

     } elsif ($pspid ne $PID) {
       Debug->err('WRONG_PROGRAM', $pspid);
       kill $sig, $pspid if ($pspid =~ /\d+/);
     }
  }
  Counter->clear('rasagent');
  return 0; # OK
}


sub create_pid_file {
  my($class, $LB, $PIDfile) = @_;


  if (open(O,">$PIDfile")) {
    Debug->print(PID => $LB->{create_pid});
    print O $$;
    close(O);
  } else {
    Debug->print(PID =>  $LB->expand('cannot_write_pid', $PIDfile));
    exit;
  }
}

sub read_pid_file {
  my($class, $LB, $PIDfile) = @_;
  my $pid;
  if (open(O,$PIDfile)) {
    $pid = <O>; close(O);
  }
  return $pid;
}

sub randomOrder {
  my($class, $dev1, $dev2) = @_;
  my (@devs, %L);

  foreach my $l (@$dev1) {
     $L{rand(1)} = $l;
  }
  foreach my $l (@$dev2) {
     $L{rand(1)} = $l;
  }
  foreach my $l (sort keys %L) {
     push(@devs, $l);
  }
  return \@devs;
}

# add neighbors to new_list if they are not already in done_list

sub find_neighbors {
  my($class, $Config, $dev) = @_;
  my @new;
  my $l = $class->neighbors($Config, $dev);
  foreach my $d1 (@$l) {
     push(@new, $d1) if ($d1->{active} ne "N");
  }
  return \@new;
}

sub neighbors {
  my($class, $Config, $d) = @_;
  my @neighbors = ();
  my $to = TO->readExistingTopo("MERGE-MASTER");
  return [] if (!$to || !$d);
  my $node = $to->nodeByName("$d->{type}:$d->{key}");
  return [] if (!$node);
  my $elist = $d->{event_list};
  my ($this_port, $pno);

  foreach my $e (@$elist) {
     my($type, $target1, $t_port) = split(/\:/, $e->{Target});
     if ($e->{EventType} =~ /\.LinkEvent/) {
        $this_port = $t_port;
        last;
     } elsif ($e->{Component} =~ /^port.(\d+)$/ || $e->{Component} =~ /^fcPort.(\d+)$/) {
        $this_port = $1;
     }
  }
  my $ports = $node->port();
  my $cnt;
  for ($pno = 0; $pno <= $#$ports; $pno++) {
     next if (defined($this_port) && $pno != $this_port);
     my $p = $ports->[$pno];
     next if (!$p);
     my($target) = $to->nodeByName($p);
     next if (!$target);
     last if ($cnt++ > 2); 
     my $dev;
     my $k = $target->name();
     my($t1, $k1) = split(/\:/, $k);
     if (substr($k,0,5) eq "host:") {
       my $h = $Config->hostByKey($k);
       if ($h) {
         $dev = {type => "host",     name => substr($k,5),
                   ip =>  $h->{ip},  ipno => $h->{ipno}, active => $h->{active},
                };
       }
     } elsif ($k1 ne $d->{key}) {
       $dev = $Config->deviceByKey($k);
     }
     push(@neighbors, $dev) if ($dev);
  }
  return \@neighbors;
}

#  loop == 0: inspector
#  loop == 1: agent loop#
#
sub process_module0 {
  my($class, $static_list, $modname, $LB, $loop, $SINFO) = @_;

  my $agent = Catalog::Implementation->loadAgent($modname);
  if (!$agent){
	 return(1, []);
  }

  my $dev_list = $agent->RUN($static_list, $loop, $SINFO);

  return (0, $dev_list);
}

sub run_inspectors {
  my($class, $LB, $SINFO_DB) = @_;
  my $static_list = [];
  my $SINFO = $SINFO_DB->hash();
  my $sleep;
 
  foreach my $modname ('T3MESSAGE','MESSAGE','DSPMESSAGE','TRAPMESSAGE', 'EVENTS', 'PING', 'EMAILMESSAGE') {
      my($next, $broke_list) = Rasagent->process_module0([], $modname, $LB, 0, $SINFO);
      if ($#$broke_list >= 0) {
         my $cnt;
         $sleep = 30 if (index("T3MESSAGE,DSPMESSAGE", $modname) >= 0);
         $SINFO_DB->Lock();
         foreach my $d (@$broke_list) {
            next if (!$d->{key});
            $cnt += $class->inspectorListAdd($d, $SINFO, $static_list);
         }
         $SINFO_DB->UnLock();
         Debug->print2("Inspector $modname found problems..") if ($cnt);
      }
  }
  sleep($sleep) if ($sleep);
  return $static_list;
}

# these devices were found by inspectors or MESSAGE agents and will be added to the list
# of devices to monitor the first time around or after 2 complete round-robin cycle.
#
sub inspectorListAdd {
  my($class, $d, $SINFO, $static_list) = @_;
  my $k = $d->{key};
  my $cnt;
  my $renv = System->get_renv();

  my $s_info = $SINFO->{$k} || {};
  if (!$s_info->{last_inspection} || ($s_info->{scheduled_run_count} > $s_info->{last_inspection})) {
     $s_info->{last_inspection} = $s_info->{scheduled_run_count} + 1;
     $class->listAdd($static_list, [$d]);
     $SINFO->{$k} = $s_info;
     $cnt++;
  } else {
     Debug->print2("Device $d->{name} not added: scheduled-run_count($s_info->{scheduled_run_count}) <= last_inspection($s_info->{last_inspection})");
  }
  return $cnt;
}

sub listAdd {
   my($class, $list, $new, $label) = @_;
   my ($cnt,$done);
   foreach my $el (@$list) {
     $done .= ",$el->{key},";
   }
   foreach my $el (@$new) {
      if (index($done, ",$el->{key},") < 0) {
         Debug->print2("adding $label '$el->{name}/$el->{ipno}' to monitoring list..") if ($label);
         push(@$list, $el);
         $done .= ",$el->{key},";
         $cnt++;
      }
   }
   $cnt++;
}

# MASTERLOC + DATAHOST : datahost with a master, push info to master
# MASTERLOC            : slave,                  push info to master
# DATAHOST             : datahost stand-alone,   run providers
# <nothing>            : master,                 run providers

sub processEvents {
  my($class, $pdm, $eventid_list) = @_;
  my $LB = Labels->read('rasagent');
  my $MASTER_LOC   = Util->findMaster();
  my $DATAHOST = Util->isDataHost();

  if ($MASTER_LOC) {  # a slave or datahost with a master
    PDM->push($MASTER_LOC); # tell master about new EDOCS to pick-up.
  } else {
    Debug->print2("\n" . $LB->{exe_pro});
    my $providers = Modules->load("Provider");

    foreach my $p (@$providers) {
      my $pro = "Provider::$p";
      my $f = $pro->new($pdm);

      if ($f->can('RUN')) {
         Util->saveLastCommand("Running provider $p\n");
         Debug->print1("\nProvider $p ");
         $f->RUN($eventid_list);
      } else {
         Debug->err('RUN_MISSING', ref($pro) );
      }
    }
    Debug->print1("\nDone with Providers\n\n");
  }
}
  



1;

