#!/bin/perl

# Original from J Greely <jgreely@cis.ohio-state.edu>, 9/30/92
#
# Heavily modified by Brent Chapman <Brent@GreatCircle.COM>

# /sources/cvsrepos/majordomo/digest/digest,v
# 1.12
# 1996/05/23 21:57:52
# cwilson
# Exp
# 
# /sources/cvsrepos/majordomo/digest/digest,v 1.12 1996/05/23 21:57:52 cwilson Exp
# 
# 
# 
# digest,v
# Revision 1.12  1996/05/23  21:57:52  cwilson
# o  patch from pdc to make the sleep time 600 random secs,
#    as well as exit with $EX_TEMPFAIL if the lock couldn't be grabbed.
#
# Revision 1.11  1996/02/01  14:58:35  cwilson
# From: pdc@lunch.engr.sgi.com (Paul Close)
#
# Basically, these diffs add the ability to send a digest based on time or
# number of lines, in addition to the usual byte count.  I've changed the
# options as follows:
#
#     -r same as always.
#     -R receive, but don't make a digest, which allows batching.  See -p
#        to make a digest (or the make digest command).
#     -p make a digest, but only if one should be sent.  A crontab job would
#        run this once a day to implement the time limit.
#
# Revision 1.10  1996/01/17  15:15:20  cwilson
# updated default place of perl.
#
# Revision 1.9  1994/06/18  02:01:29  rouilj
# Added $sendmail_command to digest, and provided a fallback for the
# case where digest is being used standalone.
#
# Revision 1.8  1994/05/17  19:20:30  rouilj
# Now using resend_host parameter on errors-to and from headers. Before
# there was no domain spec put on the addresses.
#
# Revision 1.6  1994/05/08  01:16:44  rouilj
# Fixed bug in file limiting code.
#
# Revision 1.5  1994/05/07  22:01:15  rouilj
# Added code to only include files with the name [0-9]* in a digest.
# It warns if there are files that aren't supposed to be in the
# incomming directory.
#
# Revision 1.4  1994/04/20  23:19:42  rouilj
# Fixed typo that affected the name of the archive dir.
#
# Revision 1.3  1994/03/06  22:47:06  rouilj
# set $main'main_program to mj_digest. Fixed bug that caused precedence
# header to be improperly set up. Fixed fronter and footer values
# obtained from config file.
#
# Revision 1.2  1994/02/21  18:27:38  rouilj
# Made digest part of the config file code enhancements.
#
# Revision 1.8  1992/10/16  21:33:27  brent
# Made RFC1153 compliant.  -Brent
#
# Revision 1.7  1992/10/02  17:03:33  brent
# Cleaned up RCS headers.  -Brent
#
# Revision 1.6  1992/10/02  17:00:13  brent
# Added author credits, RCS headers.  -Brent
#
# Revision 1.5	1992/10/02  16:46:08  brent
# Added blank line after encapsulation boundary.
# Added "Precedence: bulk" header
# 
# Revision 1.4	1992/10/02  16:44:19  brent
# Added "chdir($HOME)" to make everything happy.  -Brent
# 
# Revision 1.3	1992/10/01  23:12:55  brent
# Extensive modifications for Firewalls-Digest.  -Brent
# 
# Revision 1.2	1992/10/01  21:13:21  brent
# Revised .cf file handling; made keys match header names.
# Added "-v" and "-n" switches
# 
# Revision 1.1	1992/10/01  20:42:17  brent
# Initial revision
# 

# Before doing anything else tell the world I am majordomo
# The mj_ prefix is reserved for tools that are part of majordomo proper.
$main'program_name = 'mj_digest';


&init;
&readconfig;

require "shlock.pl";

$sendmail_command = "/usr/lib/sendmail" if ! defined $sendmail_command ;

&set_lock;

if (defined($opt_r)) {
    &receive_message;
    if (&should_be_sent(1)) {
	&make_digest;
    }
} elsif (defined($opt_R)) {
    &receive_message;
} elsif (defined($opt_m)) {
    &make_digest;
} elsif (defined($opt_p)) {
    if (&should_be_sent(1)) {
	&make_digest;
    }
} else {
    &abort("Usage: digest {-r|-R|-m|-p} [-c config|(-C -l list)]\nStopped");
}

&free_lock;

exit(0);

sub receive_message {
    $i = 0;
    do {
        $file = sprintf("%s/%03d", $V{'INCOMING'}, ++$i);
    } until (! -e $file);

    print STDERR "Receiving $i\n";
    open(MSG, ">$file") || &abort("open(MSG, \">$file\"): $!");

    # copy the message
    while (<STDIN>) {
	print MSG $_;
    }

    close(MSG);
}

#
# Use config variables to determine if a digest should be contructed
# and sent, or not.  Measures line count and byte count of messages
# as they would appear in the digest, not as they exist in the spool
# dir.  Side-effect: $file is the last file that should be included
# in this digest, based on the config variables.
#
sub should_be_sent {
    # fudge factors for headers and footers
    $sum = 600 + length($HEADER) + length($HEADERS) + length($TRAILER);
    $lines = 25 + ($HEADER =~ s/\n/\n/g) + ($HEADERS =~ s/\n/\n/g) +
	($TRAILER =~ s/\n/\n/g);
    ##print "start: lines = $lines\n";
    $i = shift;
    while (1) {
	$nextfile = sprintf("%s/%03d", $V{'INCOMING'}, $i++);
	last unless (-e $nextfile);
	$file = $nextfile;
	open(COUNT, "<$file") || &abort("open(COUNT, \"<$file\"): $!");

	$/ = '';		# grab the header
	$head = <COUNT>;

	# only count From/Date/Subject header fields to get a
	# more accurate size and line count.
	$head =~ s/\n\s+/ /g;
	$head =~ /^(From:\s+.*)/i    && ($sum += length($1)+1, $lines++);
	$head =~ /^(Subject:\s+.*)/i && ($sum += length($1)+1, $lines++);
	$head =~ /^(Date:\s+.*)/i    && ($sum += length($1)+1, $lines++);
	$sum++, $lines++;

	# count the body of the message
	undef $/;
	$body = <COUNT>;
	$sum += length($body);
	$lines += ($body =~ s/\n/\n/g);		# count newlines

	$/ = "\n";
	close(COUNT);
	$sum += length($EB) + 3, $lines += 3;	# account for message delimiter

	##printf "After message %03d, lines = $lines\n", $i-1;

	if ($V{'DIGEST_SIZE'} && $sum > $V{'DIGEST_SIZE'}) {
	    return(1);
	}
	if ($V{'DIGEST_LINES'} && $lines > $V{'DIGEST_LINES'}) {
	    return(1);
	}
	if ($V{'MAX_AGE'} && (-M $file) > $V{'MAX_AGE'}) {
	    return(1);
	}
    }

    return(0);
}

#
# Loop through calling 'should_be_sent' to find out how large each digest
# should be and calling send_digest to construct and send each digest.
# All the files in the spool directory are sent.  This could be modified
# to only send "complete" digests.
#
# Note that this will quietly terminate if there are no messages in the
# spool.  I find this preferable to an abort.
#
sub make_digest {
    # disable age detection
    $V{'MAX_AGE'} = 0;
    # use 'should_be_sent' to find out how large each digest should be
    # and loop through the spool dir until it's empty
    $fnum = 0;
    $nextfile = sprintf("%s/%03d", $V{'INCOMING'}, ++$fnum);
    while (-e $nextfile) {
	# starts at $fnum, sets '$file' to the last file to use
	&should_be_sent($fnum);
	&send_digest($file);
	($fnum) = $file =~ m#/(\d+)$#;
	$nextfile = sprintf("%s/%03d", $V{'INCOMING'}, ++$fnum);
	$NUMBER++;
    }

    if (! $opt_d) {
	if ( ! defined($opt_C) ) {
	    open(NUM_FILE, ">$V{'NUM_FILE'}") ||
		    &abort("open(NUM_FILE, \">$NUM_FILE\"): $!");
	    printf NUM_FILE "%d\n", $NUMBER;
	    close(NUM_FILE);
	} else { # hurrah we are using the majordomo config file
	    $config_opts{$opt_l,"digest_issue"} = $NUMBER;
	    &config'writeconfig($listdir, $opt_l);
	}
    }
}

#
# Contruct and send a digest using files in the spool directory up to and
# including the "last file" specified as the first argument.
#
sub send_digest {
    local($lastfile) = shift;
    @files=<$V{'INCOMING'}/*>;
    if ($#files < $[) {
	&abort("No messages.\nStopped ");
    }
    open(TEMP,">$TEMP") || &abort("$TEMP: $!\n");
    print STDERR "producing $V{'NAME'} V$VOLUME #$NUMBER\n";
    foreach $message (@files) {
	print STDERR "non digest input file $message", next
	    if $message !~ m#/\d+$#;
	    open(message) || &abort("$message: $!\n");
	    #side note: "open message or die"?
	    print STDERR "\tprocessing $message\n";
	    push(@processed,$message);

	    $/ = '';
	    $head = <message>;
	    $head =~ s/\n\s+/ /g;
	    $body = "";
	    $subj = ($head =~ /^Subject:\s+(.*)/i)? $1: "[none]";
	    ($from) = $head =~ /^From:\s+(.*)/i;
	    ($date) = $head =~ /^Date:\s+(.*)/i;

	    undef $/;
	    $body = <message>;
	    close(message);

	    # escape ^From <user> <weekday> <month> <day> <hr:min:sec> ...
	    $body =~
		s/^From (\S+\s+\w{3}\s+\w{3}\s+\d+\s+\d+:\d+:\d+)/>From $1/g;
	    $body =~ s/^-/- -/g; # escape encapsulation boundaries in message
	    # trim trailing \n's
	    $len = length($body) - 1;
	    $len-- while (substr($body,$len,1) eq "\n");
	    substr($body,$len+1) = "";

	    $/ = "\n";

## note -- RFC 1153 claims the following headers should be retained, and
## presented in the following order:
##      Date:, From:, To:, Cc:, Subject:, Message-ID:, and Keywords:

	    push(@subj,$subj);
	    print TEMP <<EOF;
Date: $date
From: $from
Subject: $subj

$body

$EB

EOF
	    last if ($message eq $lastfile);
    }
    close(TEMP);


    if ($opt_d) {
	$DIGEST = "/tmp/testdigest.$NUMBER";
    } else {
	$DIGEST = sprintf("%s/v%02d.n%03d", $V{'ARCHIVE'}, $VOLUME, $NUMBER);
    }
    open(DIGEST, ">$DIGEST") || &abort("open(DIGEST, \">$DIGEST\"): $!");

    print DIGEST <<EOF;
From: $V{'FROM'} ($V{'NAME'})
To: $V{'TO'}
Subject: $V{'NAME'} V$VOLUME #$NUMBER
Reply-To: $V{'REPLY-TO'}
Sender: $V{'ERRORS-TO'}
Errors-To: $V{'ERRORS-TO'}
Precedence: $Precedence
$HEADERS

EOF

    $PDATE = &getdate();
    $volstr = sprintf("Volume %02d : Number %03d\n\n",$VOLUME,$NUMBER);
    $width = length($V{'NAME'}) + length($PDATE) + length($volstr);
    if ($width < 76) {
	$center = " " x int((78 - $width) / 2);
    } else {
	$center = " ";
    }
    print DIGEST $V{'NAME'},$center,$PDATE,$center,$volstr,"\n\n";

    foreach (split(/\n/,$HEADER)) {
	    if (/_SUBJECTS_/) {
		    $pre = $`;
		    foreach $subj (@subj) {
			    print DIGEST $pre,$subj,"\n";
		    }
	    }else{
		    print DIGEST "$_\n";
	    }
    }
    print DIGEST "\n";
    print DIGEST "-" x 70,"\n\n";

    open(TEMP);
    print DIGEST <TEMP>;
    close(TEMP);
    unlink($TEMP);

    $end = sprintf("End of %s V%d #%d", $V{'NAME'}, $VOLUME, $NUMBER);
    print DIGEST $end, "\n";
    print DIGEST "*" x length($end), "\n";
    print DIGEST "\n";
    print DIGEST $TRAILER;

    close(DIGEST);

    if ($opt_d) {
	warn "digest output in /tmp/testdigest.$NUMBER\n";
    } else {
	system("$sendmail_command -f$V{'ERRORS-TO'} $V{'REALLY-TO'} < $DIGEST");
	unlink(@processed);
    }

    undef @subj;
    undef @processed;

    return 0;
}

sub init {
	$* = 1;
	$HOME = $ENV{"HOME"} || (getpwuid($>))[7];
	chdir($HOME);
	&getopt("drRmpc:Cl:z") ||
	    &abort("Usage: digest {-r|-R|-m|-p} [-c config|(-C -l list)]\nStopped");
	$config = $opt_c || "$HOME/.digestrc";
	$TEMP = "/tmp/digest.$$";
	$SIG{'INT'} = 'cleanup';
	@MONTHS = ("January","February","March","April","May","June","July",
	           "August","September","October","November","December");
	@DAYS = ("Sunday","Monday","Tuesday","Wednesday","Thursday",
	         "Friday","Saturday");
	$TEMP = "/tmp/digest.$$";
	$EB = "-" x 30;
}

sub readconfig {
	if (defined($opt_C)) {
	   if (!defined($opt_l)) {
		&abort("-C used without -l");
	    } else {
		# Read and execute the .cf file
		$cf = $ENV{"MAJORDOMO_CF"} || 
			"/etc/majordomo.cf";
		if (! -r $cf) {
		    &abort("$cf not readable; stopped");
		}
		eval(`cat $cf`) || die "eval of majordomo.cf failed $@";

		chdir($homedir);

		$opt_l =~ tr/A-Z/a-z/;

	   	require "config_parse.pl";
		# get the digest config file
		&get_config($listdir, $opt_l);

		# map config opts to internal variables and $V array
		$HEADER = $config_opts{$opt_l,"message_fronter"};
		$HEADER =~ s/\001/\n/g;
		$TRAILER = $config_opts{$opt_l,"message_footer"};
		$TRAILER =~ s/\001/\n/g;
		$VOLUME = $config_opts{$opt_l,"digest_volume"};
		$NUMBER = $config_opts{$opt_l,"digest_issue"};
		$Precedence = $config_opts{$opt_l,"precedence"};
		$Precedence = "bulk" if ($Precedence eq "");
		$V{'ARCHIVE'} = "$filedir/$opt_l$filedir_suffix";
		$V{'DIGEST_SIZE'} = $config_opts{$opt_l, "maxlength"};
		$V{'DIGEST_LINES'} = $config_opts{$opt_l, "digest_maxlines"};
		$V{'MAX_AGE'} = $config_opts{$opt_l, "digest_maxdays"};
		$V{'ERRORS-TO'} = $config_opts{$opt_l,"sender"} . "@" .
					$config_opts{$opt_l,"resend_host"};
		$V{'FROM'} = $config_opts{$opt_l, "sender"}. "@" .
					$config_opts{$opt_l,"resend_host"};
		$V{'INCOMING'} = "$digest_work_dir/$opt_l";
		$V{'NAME'} = $config_opts{$opt_l,"digest_name"};
		$V{'REALLY-TO'} = $ARGV[0];
		$V{'REPLY-TO'} = $config_opts{$opt_l,"reply_to"};
		$V{'TO'} = "$opt_l@$whereami";

		# make the headers keyword work
		if ( $config_opts{$opt_l,"message_headers"} ne '' ) {
			$from = $V{'FROM'};
			$HEADERS = &config'substitute_values (
			$config_opts{$opt_l,"message_headers"}, $opt_l);
			$HEADERS =~ s/\001/\n/g;
		}
            } # list is defined
	 } else { # not using -C 
	   open(config) || &abort("$config: $!\n");
	   while (<config>) {
		next if /^\s*$|^\s*#/;
		chop;
		($key,$value) = split(/\s*=\s*/,$_,2);
		$V{$key} = $value;
	   }
	   close(config);

	   open(header,$V{'HEADER'}) || &abort("$V{'HEADER'}: $!\n");
	   $HEADER = join("",<header>);
	   close(header);

	   open(trailer,$V{'TRAILER'}) || &abort("$V{'TRAILER'}: $!\n");
	   $TRAILER = join("",<trailer>);
	   close(trailer);

	   open(VOL_FILE,$V{'VOL_FILE'}) || &abort("$V{'VOL_FILE'}: $!\n");
	   $VOLUME = join("",<VOL_FILE>);
	   chop($VOLUME);
	   close(VOL_FILE);

	   open(NUM_FILE,$V{'NUM_FILE'}) || &abort("$V{'NUM_FILE'}: $!\n");
	   $NUMBER = join("",<NUM_FILE>);
	   chop($NUMBER);
	   close(NUM_FILE);

	   if (defined($V{'HOME'})) {
	       unshift(@INC, $V{'HOME'});
	   } 

	} # end not using -C
}

#my favorite of the existing getopt routines; twisted
#
sub getopt {
	local($_,%opt,$rest) = (split(/([^:])/,$_[0]),'');
	while ($_ = $ARGV[0], /^-(.)/ && shift(@ARGV)) {
		$rest = $';
		last if $1 eq '-';
		if (!defined $opt{$1}) {
			warn "Unrecognized switch \"-$1\".\n";
			return 0;
		}elsif ($opt{$1}) {
			$rest = shift(@ARGV) if $rest eq '';
			eval "\$opt_$1 = \$rest";
		}else{
			eval "\$opt_$1 = 1";
			$rest =~ /^(.)/;
			redo if $rest ne '';
		}
	}
	return 1;
}

sub cleanup {
	unlink($TEMP);
	exit(1);
}

sub getdate {
  local($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
  $year += 1900;
  return("$DAYS[$wday], $MONTHS[$mon] $mday $year");
}

sub set_lock {
    $waittime = 0;
    $lockfile = $V{'INCOMING'} . "/.LOCK";
    while ($waittime < 600) {
	if (&shlock($lockfile)) {
	    # got the lock
	    $lock_set = 1;
	    return(1);
	} else {
	    # didn't get the lock; wait 1-10 seconds and try again.
	    $sleeptime = int(rand(10) + 1);
	    $waittime += $sleeptime;
	    sleep($sleeptime);
	}
    }
    # if we got this far, we ran out of tries on the lock.
    # try again later... --pdc
    exit($EX_TEMPFAIL);
}

sub free_lock {
    if (defined($lock_set)) {
	undef($lock_set);
	return unlink($V{'INCOMING'} . "/.LOCK");
    } else {
	return undef;
    }
}

sub abort {
    local($msg) = shift;

    &free_lock;
    die($msg);
}
