#!/opt/SUNWmail/gtw/perl/bin/perl -ww

# $Id: map_filter,v 1.44.14.1 1998/02/06 18:05:46 kevin Exp $

my $legal = <<'EOF';
This is UNPUBLISHED PROPRIETARY SOURCE CODE of Wingra Technologies
Incorporated; the contents of this file may not be disclosed to third parties,
copied or duplicated in any form, in whole or in part, without the prior
written permission of Wingra Technologies Incorporated.

Permission is hereby granted solely to the licensee for use of this source code
in its unaltered state. This source code may not be modified by licensee
except under specific direction of Wingra Technologies Incorporated. This
source code may not be given under any circumstances to non-licensees in any 
form, including source or binary. Unauthorized modification of this source 
constitutes breach of contract, which voids any potential pending support 
responsibilities by Wingra Technologies Incorporated. Divulging the exact or 
paraphrased contents of this source code to unlicensed parties either directly 
or indirectly constitutes violation of federal and international copyright and 
trade secret laws, and will be duly prosecuted to the fullest extent permitted 
under law.

This software is provided by Wingra Technologies Incorporated ``as is'' and any
express or implied warranties, including, but not limited to, the implied
warranties of merchantability and fitness for a particular purpose are
disclaimed.  In no event shall the regents or contributors be liable for any
direct, indirect, incidental, special, exemplary, or consequential damages
(including, but not limited to, procurement of substitute goods or services;
loss of use, data, or profits; or business interruption) however caused and on
any theory of liability, whether in contract, strict liability, or tort
(including negligence or otherwise) arising in any way out of the use of this
software, even if advised of the possibility of such damage.
EOF

BEGIN
{
    my $os;
    my $addPath;
    $os = `uname -s`;
    chomp $os;
    $os = lc $os;
 
    if (not defined($ENV{"__GETROOT__"}))
    {
	$! = -2;
	print STDERR "Environment variable __GETROOT__ must be set!\n";
	die "RUNTIME error encountered";
    }

    $addPath = `$ENV{"__GETROOT__"} / libDir`;
    chomp $addPath;
    unshift(@INC, $addPath);
 
    $addPath = `$ENV{"__GETROOT__"} / perlDir`;
    chomp $addPath;
    unshift(@INC, $addPath . "/lib/" . $os . "/5.002");
    unshift(@INC, $addPath . "/lib");
    unshift(@INC, $addPath . "/lib/site_perl/" . $os);
    unshift(@INC, $addPath . "/lib/site_perl");
}

require 5.002;		# uses perl5 features heavily
use strict;		# require variable declarations, strict refs, etc
use Getopt::Long 2.2;	# long-style command-line parser
use File::Basename;	# for parsing filenames
use Safe 2.01;		# for safe evaluations of expressions

use Dirsync;		# for .cf file
use MsvLog;		# Missive-style logging
use MDEF;		# Missive Directory Exchange Format

=head1 NAME

map_filter - apply Missive DirSync maps and filters

=head1 SYNOPSIS

B<map_filter>
(B<-debug>) 
(B<-help>) 
B<-input> I<filename> 
(B<-cf> I<filename>)
B<-output> I<filename> 
[ B<-import> | B<-export> ] 
(B<-channel> I<channel>)
(B<-attr> I<filename>)
(B<-usedattrs> I<filename>)
(B<-foreign> I<filename>)
(B<-log> (I<filename>))

=head1 DESCRIPTION

This module applies maps and filters to a canonical-format flat-text input
file to convert the attributes and data to or from X.500 attribute names.

=head1 OPTIONS

=over 5

=item B<-debug>

Print out a ridiculous plethora of debugging information (about 100 lines per
record). 

=item B<-help>

Displays a brief help message.

=item B<-input> I<filename>

I<Required>. The name of the input file.

=item B<-output> I<filename>

I<Required>. The name of the output file.

=item B<-cf> I<filename>

The name of the configuration file, which includes the rules 
for the mapping operation. Defaults to $MSV/etc/dirsync.cf.

=item B<-import> | B<-export>

I<Required>. Use B<-import> to convert from Mail Directory attributes to
Central Directory attributes. B<-export> for vice versa.

=item B<-channel> I<channel>

The name of the channel to use from the config file. If there is only one
channel in that file, this option is unnecessary.

=item B<-attr> I<filename>

Name of the file used to translate between friendly and actual X.500 
attribute names. By default, /usr/lpp/missive/etc/dir_attrs.txt.

=item B<-usedattrs> I<filename>

Store the name of the attributes used in the mapping information in the
file. Used by later processes.

=item B<-foreign> I<filename>

If supplied, the I<foreign> keyword will indicate that the specified record
should be written to the specified file. If ommitted, I<foreign> means 
exactly the same thing as I<reject>. This flag is intended to be used by the
full-sync process, which will use the foreign entries.

=item B<-log> I<filename>

Name of the log file to write to. If no flag given, no logging is performed.
If the flag is given, but no argument is specified, the default log location
is used.

=back

=head1 RESERVED VARIABLES

There are some reserved variables that can be used during the map&filter
process.  These are:

=item B<${OU Level}>

Used to map an entry to an organzational unit in the central directory. 
This variable is only valid during an I<import>.

=item B<$RecordID>

Used to indentify a record that has been filtered.  

=item B<NOTE:>

All reserved variable names are CASE SENSTIVE!

=head1 GRAMMAR

Here is a BNF-style grammar for the config file. (It's close to what the 
Camel book uses to describe Perl, since BNF is hard to type.) Note that I
am assuming that comments are created with _line-initial_ # characters. 
Also note that the implementation will not use anything close to a recursive-
descent parser.

    SCHEMA      ::=     (* CHANNEL *)

    CHANNEL     ::=     '[' channel_name ']' '=' WORD
			'[' md_to_cd ']'
			MAPPINGS
			'[' cd_to_md ']'
			MAPPINGS
			'[' attribute_descriptions ']'
			ATTRIBUTE_DESCRIPTIONS
			'[' attribute_value_mappings ']'
			ATTRIBUTE_MAPPINGS
			'[' config_options ']'
			CONFIG_OPTIONS
    (Separators are bracketed to stand out visually)

    CONFIG_OPTIONS  ::= (* CO *)

    CO          ::=     md_to_cd_add_delete ':' YN
		|       cd_to_md_add_delete ':' YN
		|       common_attribute_owner ':' CAO
		|       timeout ':' NUMBER
		
    ATTRIBUTE_DESCRIPTIONS ::=  (* ATTDESC *)

    ATTDESC     ::=     WORD ':' (* NONCOLON *) ':' (* NONCOLON *) ':' 

    ATTRIBUTE_MAPPINGS  ::= (* ATTMAP *)

    ATTMAP      ::=     NONCOLON ':' NONCOLON

    MAPPINGS    ::=     (* BLOCK *)

    BLOCK       ::=     TRY | MATCH

    TRY         ::=     try (* MATCH *) endtry

    MATCH       ::=     match (? any ?) 
			    (* MATCHES *)
			    REJECT | FOREIGN | ASSIGN
			endmatch

    REJECT      ::=     reject

    FOREIGN     ::=     foreign

    ASSIGN      ::=     assign (* ASSIGNS *)

    ASSIGNS     ::=     ASSIGNLHS '=' ASSIGNRHS

    ASSIGNLHS   ::=     SCALAR

    ASSIGNRHS   ::=     (+ READONLY +)

    MATCHES     ::=     MATCHLHS (? '!' ?) ':' MATCHRHS
		|       '[' ARITH ']'

    MATCHLHS    ::=     (+ READONLY +)

    MATCHRHS    ::=     (+ READWRITE +)

    ARITH       ::=     (+ READONLY +) RLO (+ READONLY +)

    RLO         ::=     '>' | '<' | '>=' | '<=' | '==' | '!='
    (That's "Relational or Logical Operator", except we're not 
    using logical ops.  But it would be just as confusing for 
    the acronym to be "RO", so "RLO" it is.)

    READWRITE   ::=     TEXT | SCALAR

    READONLY    ::=     TEXT | SCALAR | FUNCTION

    SCALAR      ::=     '$' WORD
		|       '$' '{' {+ WORD +} '}'

    TEXT        ::=     (+ [^:&] +)
    (Note that TEXT _will_ allow escaped ":"s and "&"s. I just 
    don't wanna hafta write the regex right now...)

    FUNCTION    ::=     '&' WORD '(' ARG (* ',' ARG *) ')'

    ARG         ::=     SCALAR | WORD | CONSTANT

    CONSTANT    ::=     '"' (+ [^"] +) '"'

    WORD        ::=     LETTER (* ALPHANUM *)

    ALPHANUM    ::=     LETTER | DIGIT

    NONCOLON    ::=     (* [^:\\] | '\' [:\\] *)
    (Yeah, I realize it's not quite right, but you know what 
    I mean...)

    YN          ::=     'y' | 'n'

    CAO         ::=     'md' | 'cd' | 'both-m' | 'both-c'

    LETTER      ::=     [A-Za-z_]

    NUMBER      ::=     (+ DIGIT +)

    DIGIT       ::=     [0-9]

=head1 INTERNALS

=head2 symbol table

Of the format:

	$table{$variable}{"value"} = "foo";
	$table{$variable}{"readonly"} = 1;

=cut
#############################################################################

# deal w/ strict vars by declaring everything
my $debug;		# view debugging info
my $log;		# log file reference
my $numrecords = 0;	# running count of number of user records
my $filtered = 0;	# running count of number of filtered user records
my $numforeign = 0;     # running count of foreign user records
my $result;		# result from various function calls
my $cf;			# the cf file object
my @maps;		# one of the x_to_y cf sections
my %avm_abbrev;		# hash of AVM abbreviations (eg, Street->St.) 
my %avm_expand;		# hash of AVM unabbreviations (eg, St.->Street) 
my %attd;		# Attribute character mappings, keyed on Mail Dir.
			#  attribute names
my %x500_friendly;	# X500 -> friendly attribute mappings
my %friendly_x500;	# friendly -> X500 attribute mappings
my $key;		# key to x500_friendly and friendly_x500
my @record;		# the lines of a record
my $eof;		# have we hit EOF?
my $map_line = 0;	# index into @maps, for non-destructive transversal 
			#  with global state-preservation
my %table;		# the global symbol table
my ($input, $output);	# refs to MDEF objects for i/o
my $foreign;		# ref to MDEF object for foreign-entry i/o
my $oper;		# global of current operation
my $ou_level;		# global for the ou level for a record
my @common_attrs;	# common attributes
my $save_foreign = 0;	# is the CURRENT record a foreign entry we want to save?
my $use_readonly = 0;	# CONSTANT - should we enforce the readonly attribute?
my $dsmTRANS = -3;	# Transient error, restartable
my $dsmCONFIG = -2; 	# Configuration error, restartable

# undefine these to avoid warnings about single-use
undef $::opt_debug;
undef $::opt_help;
undef $::opt_export;
undef $::opt_attr;

# parse the command line
$result = GetOptions("debug", "help", "input=s", "cf=s", "output=s", 
    "import", "export", "attr=s", "usedattrs=s", "channel=s", "log:s",
    "foreign=s");

$! = $dsmCONFIG;
die "Usage: $0 (-debug) (-help) -input <filename> (-cf <filename>)\n" .
    "\t-output <filename> (-channel <channel>) [ -import | -export ] \n" .
    "\t(-usedattrs <filename>) (-log (<filename>)) (-attr <filename>)\n" .
    "\t(-foreign <filename>)\n"
    if ($::opt_help or (!$result) or 
    (!$::opt_input) or (!$::opt_output) or
    (!$::opt_import and !$::opt_export));

# "opt_" is too annoying to type...
if (defined($::opt_debug)) {
  $debug = 1;
} else {
  # redirect STDOUT and STDERR
  open (STDOUT, ">/dev/null") || die ("Unable to redirect STDOUT!");
  open (STDERR, ">/dev/null") || die ("Unable to redirect STDERR!");
}

# define some base pathnames
my $p_bin = `$ENV{"__GETROOT__"} / binDir`;
chomp($p_bin);
$p_bin .= "/";

my $p_etc = `$ENV{"__GETROOT__"} / cfgDir`;
chomp($p_etc);
$p_etc .= "/";

my $p_ds = `$ENV{"__GETROOT__"} / dirsyncDir`;
chomp($p_ds);
$p_ds .= "/";

# put this off until later
#my $p_dsc = $p_ds . "/" . lc($::opt_channel) . "/";

my $p_log = `$ENV{"__GETROOT__"} / logDir`;
chomp($p_log);
$p_log .= "/dirsync/";

# default log directory may not exist, so create it if not there
mkdir($p_log,0770) if ( ! -d $p_log );

############################
# end preliminaries
############################

# create a new config structure
$::opt_cf = "dirsync.cf" if not defined($::opt_cf);
$cf = Dirsync->new(&abspath($p_etc, $::opt_cf));
if (ref($cf) eq "SCALAR")
{
    $! = $dsmCONFIG;
    die "Error loading $::opt_cf: $$cf\n";
}

# if we didn't get a section, guess or die
if (! defined($::opt_channel))
{
    if (scalar (keys %$cf) == 1)
    {
	$::opt_channel = (keys %$cf)[0];
    }
    elsif (scalar (keys %$cf) == 0)
    {
	$! = $dsmCONFIG;
	die "No channels defined in $::opt_cf!\n";
    }
    else
    {
	$! = $dsmCONFIG;
	die "No -section given, but more than one possibility in "
	    . "\n\t<$::opt_cf>!\n";
    }
}

# confirm our channel name is actually a section
if (not grep(/$::opt_channel/, $cf->channels))
{
    $! = $dsmCONFIG;
    die "Channel \"$::opt_channel\" is not present in \n\t<$::opt_cf>!\n";
}

# this is delayed, since we might not have a valid opt_channel until now 
my $p_dsc = $p_ds . lc($::opt_channel) . "/";

# open the log file and say hi
if (defined($::opt_log))
{
    my ($basename) = File::Basename::basename($0);

    # generate a log file name if none provided
    if ($::opt_log eq "")
    {
	$::opt_log = $p_log . MsvLog->logname(lc($::opt_channel));
    }

    $log = new MsvLog(&abspath($p_dsc, $::opt_log), "$basename\[$$\]", 1);
    $! = $dsmCONFIG;
    die $$log . "\n" if (ref($log) eq "SCALAR");

    $result = $log->report(MsvLog::NLS("i"), MsvLog::NLS("Starting %1\$s"),
			   $::opt_import ?
			 MsvLog::NLS("import") : MsvLog::NLS("export"));
    
    $log->abort($dsmTRANS, $$result) if (ref($result) eq "SCALAR"); # just check once...
}
else
{
    # no log file, so just pretend
    $log = stub MsvLog;
}

# open the input and output files
$input = MDEF->open(&abspath($p_dsc, $::opt_input), "r");
if (ref($input) eq "SCALAR") {$log->abort($dsmCONFIG, $$input);}
$output = MDEF->open(&abspath($p_dsc, $::opt_output), "w");
if (ref($output) eq "SCALAR") {$log->abort($dsmTRANS, $$output);}

# if we're storing foreign entries, open that file
if ($::opt_foreign)
{
    $foreign = MDEF->open(&abspath($p_dsc, $::opt_foreign), "w");
    if (ref($foreign) eq "SCALAR") {$log->abort($dsmTRANS, $$foreign);}
}

# are we storing used attributes?
# generate some associative arrays from the config file
%avm_abbrev = &generate_avm(0, 
    $cf->section($::opt_channel, "attribute_value_mappings"));
%avm_expand = &generate_avm(1, 
    $cf->section($::opt_channel, "attribute_value_mappings"));

print STDERR join(" ", %avm_expand), "\n" if $debug;

%attd = &generate_attd($cf->section($::opt_channel, "attribute_description"));

print STDERR join(" ", %attd), "\n" if $debug;

# convert the paired array result of the function to an associative array
%x500_friendly = &get_x500_attrs(&abspath($p_etc, 
    defined($::opt_attr) ? $::opt_attr : "dir_attrs.txt"));
# add extra x500 attributes!!!
$x500_friendly{lc("Native Address")} = "Native Address";
$x500_friendly{lc("cn")} = "Common Name";
# now generate the reversed list
foreach $key (keys %x500_friendly)
{
	$friendly_x500{lc($x500_friendly{$key})} = $key;
}

# generate the usedattrs list, if appropriate
if ($::opt_import and $::opt_usedattrs)
{
    open(ATTRS, ">" . &abspath($p_dsc, $::opt_usedattrs)) 
	or $log->abort($dsmTRANS, MsvLog::NLS("Can't open attribute log file (%1\$s)"),
		       &abspath($p_dsc, $::opt_usedattrs));
    @common_attrs = &common_attrs;
    print ATTRS join("\n", @common_attrs), "\n";
    close ATTRS;

    $log->report(MsvLog::NLS("i"), MsvLog::NLS("Stored %1\$s common attributes"), $#common_attrs);
}

print STDERR join(", ", keys %x500_friendly) if $debug;

while (@record = $input->read_entry())
{
    # check for error from read_entry
    if (ref($record[0]) eq "SCALAR") {$log->abort($dsmCONFIG, ${$record[0]} );}

    $numrecords++;

    # initialize the ou_level variable to undef
    $ou_level = undef;

    # the first line in a record from read_entry is always the current 
    #  operation
    $oper = shift(@record);

    print STDERR "\nwith $oper:\n" if $debug;

    print STDERR join("\n", @record), "\n" if $debug;
    # now process a complete record

    # first, create a new symbol table (%table) with the given record as
    #  the starting data
    &create_symbol_table(@record);

    # now, process the record according to the cf/config file, updating
    #  the symbol table in the process. If we filtered out this record, say
    #  so (if debug).
    if (&process_record(@record) < 0)
    {
	print STDERR "FILTERED!\n" if $debug;
	if (&getvalue("RecordID") ne " ")
	{
	   $log->report(MsvLog::NLS("i"), 
			MsvLog::NLS("Filtered: %1\$s"), &getvalue("RecordID"));
	}
	else
	{
	   print STDERR "Record filtered but \$RecordID not assigned\n" if $debug;
	}
	$filtered++;
    }
    else
    {
	# otherwise, just output the complete symbol table
	&output_symbol_table;
    }
}

print STDERR "\n" if $debug;

$log->report(MsvLog::NLS("i"), MsvLog::NLS("Processed %1\$s records:"), $numrecords);
$log->report(MsvLog::NLS("i"), MsvLog::NLS("%1\$s filtered"), $filtered);
$log->report(MsvLog::NLS("i"), MsvLog::NLS("%1\$s foreign"), $numforeign) if ($::opt_foreign);
$log->report(MsvLog::NLS("i"), MsvLog::NLS("Exiting normally"));

#############################
# end of main program
#############################

# Function: process_record
# Arguments: @record - a record to process with the cf file
# Returns: 0 on success or -1 if we filtered out this record
# Description: Loops through each block of the cf file until either we
#			   filter the record or run out of cf file.
sub process_record
{
    my(@record) = @_;

    # this is a previously-declared global
    $map_line = 0;		# current line in the cf file sections

    if ($::opt_import)
    {
	@maps = $cf->section($::opt_channel, "md_to_cd");
    }
    else
    {
	@maps = $cf->section($::opt_channel, "cd_to_md");
    }

    # loop through the mapping section
    for (;defined(@maps[$map_line]);)
    {
	# parse_block will also return -1 if we filtered
	if (&parse_block < 0)
	{
	    return -1;	# FILTERED
	}
    }

    0;	# completed successfully without filtering
}

# Function: parse_block
# Arguments: none
# Returns: 0 on success, 1 on failure, -1 on rejection
# Description: Parses (sorta recursively) a top-level "block".
#			   BLOCK ::= TRY | MATCH
sub parse_block 
{
    my $line;	# shorthand for the current line

    # loop through the cf, starting where we were before (which is why
    #  $map_line is global), and continuing until we hit a TRY or MATCH
    #  block.
    for (;defined(@maps[$map_line]);)
    {
	$line = @maps[$map_line];
	$map_line++;

	# MATCH
	if ($line =~ /\bmatch\b/)
	{	
	    # check for "any", and pass a boolean to parse_match...
	    return &parse_match(index($line, "any") >= $[);
	}

	# TRY
	if ($line =~ /\btry\b/)
	{
	    return &parse_try;
	}
    }
}

# Function: parse_match
# Arguments: $any - is this a "match any"?
# Returns: 0 on success, 1 on failure, -1 on rejection
# Description: Parses a MATCH. Calls the matching algorithm and determines
#              how to deal with successes and failures.
#              MATCH ::= match (? any ?)
#                            (* MATCHES *)
#                            REJECT | FOREIGN | ASSIGN
#                        endmatch
sub parse_match
{
    my ($any) = @_;		# disjunction? (vs normal conjunction)
    my $line;			# shorthand for current line in cf
    my $gottamatch = 0;	# have we matched any lines yet?
    my $result;			# result variable

    for (;defined(@maps[$map_line]);)
    {
	$line = @maps[$map_line];
	$map_line++;

	# before you ask, I ran timing tests to see if this ran any faster
	#  if we "study $line" first. The answer was that it made no 
	#  perceptable/significant difference. So we don't.

	# check for "endmatch"
	if ($line =~ /\bendmatch\b/)
	{
	    # if "match any", then return success (0) if we matched any, 
	    #  otherwise return failure (1)
	    if ($any)
	    {
		return $gottamatch ? 0 : 1;
	    }
	    else
	    {
		# not any - we succeeded
		return 0;
	    }
	}

	# check for "foreign"
	if ($line =~ /\bforeign\b/)
	{
	    if (not (defined($::opt_foreign) and $::opt_foreign))
	    {
		# we're not using foreign entries, so fall through and treat
		# this like a reject...
		$line = "reject";
	    }
	    else
	    {
		if ($any)
		{
		    if ($gottamatch)
		    {
			print STDERR "foreign!\n" if $debug;
			# set this global flag (read in output_record, cleared
			# when a new record is read)
			$save_foreign = 1;
			$numforeign++;
		    }
		}
		else
		{
		    print STDERR "foreign!\n" if $debug;
	            $save_foreign = 1;
		    $numforeign++;
		}
		next;
	    }
	}

	# check for "reject"
	if ($line =~ /\breject\b/)
	{
	    &skipto("endmatch");

	    # if "match any", then return reject (-1) if we matched any,
	    #  otherwise return failure (1)
	    if ($any)
	    {
		return $gottamatch ? -1 : 1;
	    }
	    else
	    {
		# not any - we reject
		return -1;
	    }
	}

	# check for "assign"
	if ($line =~ /\bassign\b/)
	{
	    # should we actually do the assigns?
	    if ($any)
	    {
		if ($gottamatch)
		{
		   return &parse_assign;
		}
		else
		{
		   return 1;
		}
	    }
	    else	# no, fail
	    {
	        return &parse_assign;
	    }
	}

	# check for an expression
	if ($line =~ /\[(.*)\]/)
	{
	    my $expr;
	    my $cpt;

	    # OK, we've got something in the form [ a > b ], we hope

	    # first, interpolate the whole shebang
	    $expr = &interpolate($1);

	    print STDERR "match: $expr\n" if $debug;

	    if ($expr ne "FAILED")
	    {
	    	# create a new Safe compartment to do the eval
	    	$cpt = new Safe;
	    	$cpt->permit_only("scalar", "const", "i_multiply", "multiply", 
				  "i_divide", "divide", "i_add", "add", 
				  "i_subtract", "subtract", "concat", 
				  "stringify", "i_lt", "lt", "i_gt", "gt", 
				  "i_le", "le", "i_ge", "ge", "i_eq", "eq", 
				  "i_ne", "ne", "slt", "sgt", "sle", "sge", 
				  "seq", "sne", "negate", "not", "abs", 
				  "length", "ord", "chr", "or", "and", 
				  "leaveeval", "pushmark", "substr");

	    	$result = $cpt->reval($expr);

	    	if ($@)
	    	{
		   my $a = $@;
		   $a =~ s/ trapped by .*//;
		   $log->abort($dsmCONFIG, 
		    	       MsvLog::NLS("Can't evaluate forbidden expression: \"%1\$s\" : %2\$s"), $expr, $a);
	    	}

	    	print STDERR "result of eval: $result\n" if $debug;
	    } else
	    {
		undef($result);
	    }
	}
	else
	{
	    # attempt to unify the match
	    $result = &match($line);
	}

	# this can be the result either from a match or from an expression
	#  evaluation. Either way...
	if (defined($result) && $result && ($result ne "FAILED"))
	{
	    $gottamatch++;	# hooray!

	    if ($any)
	    {
		# we've matched one, so short-circuit to the next keyword
		$result = &skipto("endmatch", "reject", "assign", "foreign");

		if ($result eq "reject")
		{	
		    # this was a filter - return that indication
		    return -1;
		}
		elsif ($result eq "foreign")
		{
		    # this is a foreign entry
		    # if we're storing 'em, do so, otherwise pretend we're a
		    #  filter
		    if (defined($::opt_foreign) and $::opt_foreign)
		    {
			print STDERR "foreign!\n" if $debug;
			$save_foreign = 1;
			$numforeign++;
			return 0;
		    }
		    else
		    {
			return -1;
		    }
		}
		elsif ($result eq "assign")
		{
		    return &parse_assign;
		}
		else
		{
		    # this was a map - return success
		    return 0;
		}
	    }
	    else
	    {
		# if normal "match" (not any), just continue
		;
	    }
	}
	else
	{
	    # no match
	    if ($any)
	    {
		# no matter, just continue
		;
	    }
	    else
	    {
		# failed, return failure
		&skipto("endmatch");
		return 1;
	    }
	}
    }

    1;
}
		
# Function: match
# Arguments: a line of the format A:B to unify
# Returns: true/false
# Description: Does a brute-force regex-based unification by interpolating
#              functions and variables on the LHS and matching into (ALWAYS
#              ASSUMED NON-BOUND) variables and constants on the RHS.
sub match
{
    my ($line) = @_;
    my $lhs;		# left and right-hand sides of an equation
    my $rhs;
    my $variable;	# this version includes the $ and maybe some {}s
    my $varname;	# this is just the name of the variable
    my @varnames;	# names of variables on the RHS
    my @varvalues;	# new variable values for variables on the RHS
    my $evalregex;	# string to eval to run the magic regex

    # ok, first try to chop up the line into a variable, a colon, and a RHS
    ($lhs, $rhs) = ($line =~ /^(.+?):(.*)/);

    # remove extra spaces from lhs and rhs
    $lhs =~ s/^\s+//;
    $lhs =~ s/\s+$//;
    $rhs =~ s/^\s+//;
    $rhs =~ s/\s+$//;

    print STDERR "lhs=$lhs\nrhs=$rhs\n" if $debug;

    $lhs = &interpolate($lhs);

    return 0 if ($lhs eq "FAILED");

    print STDERR "now match: $lhs:$rhs\n" if $debug;

    # now try to match the new LHS into the RHS...

    # first, generate a regex by replacing variables with (.+?)
    # this is similar to the above loop
    while ($rhs =~ /\$(\w+)|\$\{(.*?)\}/)
    {
	my $oldprefix = $`;	# store these for later
	my $oldsuffix = $';

	# Since $1 and $2 were or'd in the regex, only one will be defined.
	#  This will grab whichever one was defined.
	if (defined($1))
	{
	    $varname = $variable = $1;
	}
	else
	{
	    $varname = $variable = $2;
	}

	print STDERR "removing $variable ($varname)...\n" if $debug;

	# splice together new rhs
	$rhs = $oldprefix .  "(.+?)" . $oldsuffix;
	
	# add the new variable to the list of replaced variables
	push(@varnames, $varname);
    }

    # now, we've removed the variables, so lets try the match for real now!
    print STDERR "now match: $lhs:$rhs\n" if $debug;

    eval { @varvalues = ($lhs =~ /^$rhs$/i) };

    if ($@ or "@varvalues" eq "")
    {
	# failed to match! oy vey!
	$log->abort($dsmCONFIG, MsvLog::NLS("Eval of \"%1\$s\" failed.\nError: %2\$s"), $evalregex, $@) if $@;
	return 0;
    }

    print STDERR "match succeeded!\n" if $debug;

    # and add the new variables into the symbol table
    foreach $varname (@varnames)
    {
	print STDERR "adding $varname=", $varvalues[0], "\n" if $debug;

	if ($use_readonly and $table{lc($varname)}{"readonly"})
	{
	    $log->report(MsvLog::NLS("i"),
		       MsvLog::NLS("Can't assign into readonly variable '%1\$s'"),
			 $variable);
	    print STDERR "but wait! $varname is readonly!\n" if $debug;
	    next;
	}

	# remove extra spaces from new info
	$varvalues[0] =~ s/^\s+//;
	$varvalues[0] =~ s/\s+$//;

	$table{lc($varname)}{"value"} = $varvalues[0];
	shift @varvalues;
	$table{lc($varname)}{"readonly"} = 0;
    }

    1;
}

# Function: parse_assign
# Arguments: none
# Returns: 0 on success, 1 on failure (-1 on rejection is n/a)
# Description: Parses the assignments following an "assign" keyword.
#		   ASSIGN ::= (* ASSIGNS *)
#		   ASSIGNS ::= ASSIGNLHS '=' ASSIGNRHS
#		   ASSIGNLHS ::= SCALAR
#		   ASSIGNRHS ::= (+ READONLY +)
sub parse_assign
{
    my $line;			# shorthand for current line in cf
    my $variable;		# a variable name
    my $lhs;			# left and right-hand sizes of the equation
    my $rhs;

    for (;defined(@maps[$map_line]);)
    {
	$line = @maps[$map_line];
	$map_line++;

	# check for "endmatch"
	if (index($line, "endmatch") >= $[)
	{
	    # simple - always return success
	    return 0;
	}

	# ok, first try to chop up the line into a variable, an =, and a RHS
	($lhs, $rhs) = ($line =~ /^(.+?)=(.*)/);

	if (not defined($lhs) or not defined($rhs))
	{	
	    $log->report(MsvLog::NLS("I"), MsvLog::NLS("Invalid line in assign section: %1\$s"), $line);
	    &skipto("endmatch");
	    return 1;	# do we really want to do this?? @@@
	}

	# remove extra spaces from lhs and rhs
	$lhs =~ s/^\s+//;
	$lhs =~ s/\s+$//;
	$rhs =~ s/^\s+//;
	$rhs =~ s/\s+$//;

	print STDERR "assign $lhs <- $rhs\n" if $debug;

	# interpolate the RHS
	$rhs = &interpolate($rhs);
	print STDERR "assign $lhs <- $rhs\n" if $debug;

	# assign to LHS

	$lhs =~ /\$(\w+)|\$\{(.*?)\}/;
	# Since $1 and $2 were or'd in the regex, only one will be defined.
	#  This will grab whichever one was defined.
	if (defined($1))
	{
	    $variable = $1;
	}
	else
	{
	    $variable = $2;
	}


	if ($use_readonly and $table{lc($variable)}{"readonly"})
	{
	    $log->report(MsvLog::NLS("I"), 
		MsvLog::NLS("Can't assign to read-only variable '%1\$s'"), $variable);
	    print STDERR "Can't assign to read-only variable '$variable'!\n"
		if $debug;
	    &skipto("endmatch");
	    return 1;	# do we really want to do this?? @@@
	}

	$table{lc($variable)}{"value"} = $rhs;
	$table{lc($variable)}{"readonly"} = 0;
    }

    0;
}

# Function: parse_try
# Arguments: none
# Returns: 0 on success, 1 on failure, -1 on rejection
# Description: This is similar to a parse_block, but pays closer attention to
#              the return values.
#              TRY ::= try (* MATCH *) endtry
sub parse_try
{
    my $line;	# shorthand for the current line
    my $ret;

    # loop through the cf, starting where we were before (which is why
    #  $map_line is global), and continuing until we hit a MATCH block.
    for (;defined(@maps[$map_line]);)
    {
	$line = @maps[$map_line];
	$map_line++;

	# MATCH
	if ($line =~ /\bmatch\b/)
	{	
	    # check for "any", and pass that to parse_match...
	    $ret = &parse_match($line =~ /\bany\b/);

	    if ($ret == -1)	# rejection
	    {
		return $ret;
	    }
	    elsif ($ret != 1)	# 1 is failure
	    {
		&skipto("endtry");
		return 0;	# success
	    }
	}

	# endtry
	if ($line =~ /\bendtry\b/)
	{
	    # uh oh, we didn't run a valid match...
	    return 1;
	}
    }
    $log->report(MsvLog::NLS("I"), MsvLog::NLS("\"try\" without \"endtry\""));
    return 1;
}

# Function: skipto
# Arguments: @targets - list of constant strings 
# Returns: the element of @targets that matched, or 0 for failure
# Description: Searches through the cf for a line containing one of the
#              elements of @targets, and returns whichever $target matched.
#              Useful to find an end* element.
sub skipto
{
    my @targets = @_;
    my $line;		# shorthand for the current cf line
    my $target;		# current target string to compare with

    for (;defined(@maps[$map_line]);)
    {
	$line = @maps[$map_line];
	$map_line++;

	foreach $target (@targets)
	{
	    if ($line =~ /\b$target\b/)
	    {
		return $target;
	    }
	}
    }

    0;
}

# Function: create_symbol_table
# Arguments: @raw - a list of X=Y symbols to add to the new table
# Returns: nothing
# Description: Creates a new symbol %table with the passed-in elements. All
#              new elements are presumed to be read-only.
sub create_symbol_table
{
    my @raw = @_;
    my $lhs;		# left and right-hand side of an equation
    my $rhs;
    my $elem;		# index into @raw

    # clear any old symbol table
    %table = ();

    foreach $elem (@raw)
    {
	next if ($elem =~ /^-/);# remove deleted fields
	$elem =~ s/^[=+]//;	# remove tags for add/delete if appropriate

	# split the expression
	($lhs, $rhs) = ($elem =~ /^(.*?)=(.*)$/);

	if ((not defined($lhs)) || (not defined($rhs)))
	{	
	    $log->abort($dsmCONFIG, MsvLog::NLS("Invalid element: %1\$s"), 
			$elem);
	}

	# remove extra spaces from lhs and rhs
	$lhs =~ s/^\s+//;
	$lhs =~ s/\s+$//;
	$rhs =~ s/^\s+//;
	$rhs =~ s/\s+$//;

	if (($::opt_export) && !(lc($lhs) eq "ou level"))
	{
	    print STDERR "converted $lhs to " if $debug;
	    $lhs = $x500_friendly{lc($lhs)};
	    print STDERR "$lhs for export...\n" if $debug;
	}

	print STDERR "lhs=$lhs\nrhs=$rhs\n" if $debug;

	if (defined($lhs)) 
	{
	   $table{lc("$lhs")}{"value"} = $rhs;
	   $table{lc("$lhs")}{"readonly"} = 1;
	}
    }
}

# Function: print_symbol_table
# Arguments: none
# Returns: nothing
# Description: Prints the current symbol table to STDOUT.
sub print_symbol_table
{
    my $elem;

    foreach $elem (sort keys %table)
    {
	print "$elem = ", &getvalue($elem), "\n" 
	    if ($::opt_export && defined($attd{lc($elem)}) or
		$::opt_import && defined($friendly_x500{lc($elem)}));
    }
}

# Function: output_symbol_table
# Arguments: none
# Returns: nothing
# Description: Prints the current symbol table to OUTPUT. Oughta be combined
#              w/ the above function...
sub output_symbol_table
{
    my $elem;
    my @record;

    foreach $elem (sort keys %table)
    {
	if ($::opt_import && (lc($elem) eq "ou level"))
	{
	   $ou_level = &getvalue($elem);
	   push(@record, $elem . "=" . &getvalue($elem));
	   next;
	}

	if ($::opt_export && defined($attd{lc($elem)}))
	{
	    push(@record, $attd{lc($elem)}{"name"} . "=" . 
		 &trans_attr($elem, &getvalue($elem)));
	}
	elsif ($::opt_import && defined($friendly_x500{lc($elem)}))
	{
	    push(@record, $friendly_x500{lc($elem)} . "=" . &getvalue($elem));
	}

    }

    if ($save_foreign)
    {
	$foreign->write_entry($oper, @record);
	$save_foreign = 0;
    }
    else
    {
	# this is the usual...
	$output->write_entry($oper, @record);
    }
}

# Function: dofunction
# Arguments: $function - name of a function to execute
#            @args - arguments to that function
# Returns: result of the function, or " " if error
# Description: Evaluates and returns the following functions:
#		&field($variable, <field number>, "<field delimiter>")
#		&substr($variable, <offset>, <length>)
#		&length($variable)
#		&expand($v)
#		&abbrev($variablewithlongcontents)
sub dofunction
{
    my ($function, @args) = @_;

    print STDERR "function = '$function'\nargs = '@args'\n" if $debug;

    if ($function eq "field")
    {
	my ($var, $ind, $spl) = @args;	# split and name arguments
	my @varchunks;

	# remove quotes from split variable
	$spl =~ s/^\s*\"(.*)\"\s*$/$1/;

	@varchunks = split(/$spl/i, &getvalue($var));

	# gotta do weird mappings with $ind, though
	if ($ind > 0)
	{
	    # our schema uses 1-based offsets, but perl uses 0-based
	    $ind--;
	}
	elsif ($ind < 0)
	{
	    # we use negative numbers for offset from end, but perl doesn't do
	    #  that at all. Unfortunately. I think Icon does...
	    $ind = $#varchunks + $ind + 1;
	}

	return $varchunks[$ind];
    }
    elsif ($function eq "substr")
    {
	my ($var, $off, $len) = @args;

	# cool, it's built-in
	return substr(&getvalue($var), $off, $len);
    }
    elsif ($function eq "length")
    {
	# cool, it's built-in
	return length(&getvalue($args[0]));
    }
    elsif ($function eq "abbrev")
    {
	my $arg = &getvalue($args[0]);
	my $key;
	my $value;
	
	foreach $key (keys %avm_abbrev)
	{
	    $value = $avm_abbrev{$key};
	    $arg =~ s/\b$key/$value/g;
	}

	return $arg;
    }
    elsif ($function eq "expand")
    {
	my $arg = &getvalue($args[0]);
	my $key;
	my $value;
	
	foreach $key (keys %avm_expand)
	{
	    $value = $avm_expand{$key};
	    print STDERR "expanding $key->$value\n" if $debug;
	    $key = quotemeta $key;	# probably a good idea, but I can't
				        #  recall why...
	    $arg =~ s/\b$key/$value/g;
	}

	return $arg;
    }
    else
    {
	$log->report(MsvLog::NLS("I"), MsvLog::NLS("unknown function: %1\$s"),  $function);
	    return " ";
    }
}

# Function: getvalue
# Arguments: $_ - a variable name
# Returns: the value of that variable, or " " if failed
# Description: Does "$table{$_}{"value"}" with error-checking.
sub getvalue
{
    ($_) = @_;

    s/^\$//;	# remove the ${} if they're there
    s/\{//;				
    s/\}//;
    
    if (defined($table{lc($_)}))
    {
	return $table{lc($_)}{"value"};
    }
    else
    {
	print STDERR "WARNING: variable not defined: $_\n" if $debug;
	return " ";
    }
}

# Function: interpolate
# Arguments: $str - a string with variables and/or functions
# Returns: the interpolated string, or "FAILED" if couldn't find a variable
#	   or function
# Description: Removes via interpolation variables and function from a string.
sub interpolate
{
    my ($str) = @_;
    my $function;	# name of a function
    my $args;		# arguments to a function
    my $variable;	# ${variable}
    my $varname;	# just variable

    while (($function, $args) = ($str =~ /&(\w+)\((.*?)\)/))
    {
	my $oldprefix = $`;	# store these for later
	my $oldsuffix = $';
	my @args;
	my $result;

	@args = split(/,/, $args);

	print STDERR "interpolating &$function(", join("/", @args), ")\n" 
	    if $debug;

	$result = &dofunction($function, @args);

	if ((not defined($result)) or ($result eq " "))
	{
	    print STDERR "WARNING: couldn't evaluate the function ",
		"'$function(", join(",", @args), ")'\n" if $debug;
	    return "FAILED";
	}

	print STDERR "function result = $result\n" if $debug;

	$str = $oldprefix . $result . $oldsuffix;
    }

    # now interpolate all the variables with their values
    while ($str =~ /\$(\w+)|\$\{(.*?)\}/)
    {
	my $oldprefix = $`;	# store these for later
	my $oldsuffix = $';

	# Since $1 and $2 were or'd in the regex, only one will be defined.
	#  This will grab whichever one was defined.
	if (defined($1))
	{
	    $varname = $variable = $1;
	}
	else
	{
	    $varname = $variable = $2;
	}

	print STDERR "interpolating $variable ($varname)...\n" if $debug;

	# bomb out of this match if we haven't defined this variable yet!
	if (!defined($table{lc($varname)}))
	{
	    print STDERR "can't use the unbound variable '$varname' to ",
		"interpolate:\n\t$str\n" if $debug;
		
	    return "FAILED";
	}

	# hey, it's defined! let's interpolate!

	# splice together new str without this variable
	$str = $oldprefix . &getvalue($varname) . $oldsuffix;
    }

    $str;
}

# Function: generate_avm
# Arguments: $reverse - non-zero if we want to generate expansions rather 
#			than abbreviations
# Returns: an associative array with the mappings
# Description: Generates an associative array of Attribute Value Mappings 
#              based on the given "short:long" array. The key/value or
#              value/key distinction is based on the argument.
sub generate_avm
{
    my ($reverse, @avm) = @_;	# do we want small->big?
    my %myavm;
    my $attr;
    my $short;
    my $long;

    foreach $attr (@avm)
    {
	($short, $long) = ($attr =~ /^(.*?):(.*)$/);

	# check for  a legal line!	
	if (defined($short) && defined($long))
	{
 	    $myavm{$short} = $long if $reverse;
	    $myavm{$long} = $short unless $reverse;
	}
    }

    %myavm;
}

# Function: generate_attd
# Arguments: @attd - list of MD attribute descriptions, with good and bad
#                    characters, from the cf file
# Returns: an associative array, keyed on the attribute name, with a value
#          of BADCHARS\001GOODCHARS. 
# Description: Generates an associative array of valid MD attributes, for
#	       fast lookups.
sub generate_attd
{
    my @attd = @_;
    my $attr;
    my %myattd;

    foreach $attr (@attd)
    {
	# (this is much much less scary than the single regex that existed 
	#  before. plus, it works.)

	# ok, pre-map the backwhacked stuff by hand
	$attr =~ s/\\\\/\002/g;
	$attr =~ s/\\:/\001/g;
	$attr =~ s/\\(.)/$1/g;

	# then do the split
	my ($name, $bad, $good) = split(/:/, $attr);

	# then put the control characters back
	$bad =~ s/\001/:/g;
	$bad =~ s/\002/\\/g;
	$good =~ s/\001/:/g;
	$good =~ s/\002/\\/g;

	print STDERR "$name  $bad  $good\n" if $debug;

	$myattd{lc($name)}{"trans"} = "$bad\001$good";
	$myattd{lc($name)}{"name"}  = "$name";
    }

    %myattd;
}

# Function: get_x500_attrs
# Arguments: $file - filename to read attribute list from
# Returns: a list of valid attributes
# Description: Just loads reasonable-looking x500 attributes into a list.
sub get_x500_attrs
{
    no strict 'subs';	# cop-out
    my ($file) = @_;
    my @attrs = ();

    open(DIRATTR, $file) or $log->abort($dsmCONFIG, MsvLog::NLS("Can't open %1\$s : %2\$s"), $file, $!);

    while (<DIRATTR>)
    {
	if (/(.*?):(.*?):/)
	{
	    push(@attrs, lc($1));
	    push(@attrs, lc($2));
	}
	else
	{
	    #$log->report("I", "Potentially corrupted X.500 attributes file");
	    # actually, it turns out this is normal... hmm...
	}
    }

    close DIRATTR;

    @attrs;
}

# Function: common_attrs
# Arguments: none (uses globals - ack!)
# Returns: a list of attributes processed by maps/filters
# Description: Scans throught the maps/filters section of the CF file,
#              grabs the names of all of the variables and returns a list of
#              all that have an entry in the friendly_x500 assoc array. Only
#              variables that appear to be on the correct (unbound) side of
#              a mapping are used. This should fix the filter problem.
#              Returns the x500 names, not the friendly ones. 
sub common_attrs
{
    my (@cd, @md);	# the two CF sections
    my $line;		# current line of CF
    my $mline;		# current line of CF, munged
    my %vars;		# a list of known variables, assoc to be unique
    my $var;		# the variable name found

    # okay, get the two CF sections
    @md = $cf->section($::opt_channel, "md_to_cd");
    @cd = $cf->section($::opt_channel, "cd_to_md");

    # scan through them looking for variables
    foreach $line (@md, @cd)
    {
	$mline = $line;

	while ($mline =~ s/\$(\w+)|\$\{(.*?)\}//)
	{
	    print STDERR "$mline\n" if $debug;

	    undef($var);
	    $var = $1 if defined($1);
	    $var = $2 if defined($2);
	    $log->report(MsvLog::NLS("?"), MsvLog::NLS("can't BOTH be undefined!")), next 
		if !defined($var);
	    
	    # need to check whether this variable is bound at the time it's
	    # used, and only add if it is. 
	    next unless (($line =~ /:.*$var/) or ($line =~ /$var.*=/));

	    $vars{lc($friendly_x500{lc($var)})} = 1 if defined $friendly_x500{lc($var)};
	}
    }

    keys %vars;
}

# Function: trans_attr
# Arguments: elem - name of the element
#            str - string to translate
# Returns: str, with characters mapped by $attr{$elem}
# Description: Maps characters as specified in the attribute_description 
#              section of the .cf file. Should be used for outgoing (-export)
#              mappings only.
sub trans_attr
{
    my ($elem, $str) = @_;	# args
    my ($from, $to);		# map from "from" to "to"
    my $tr;			# tr expression to eval

    # check for missing $elem
    if (not defined($attd{lc($elem)}))
    {
	$log->report(MsvLog::NLS("?"), MsvLog::NLS("Can't translate bogus elem: %1\$s"), $elem);
	return "";
    }

    # get the two halves of the translation table
    ($from, $to) = split(/\001/, $attd{lc($elem)}{"trans"});

    # make sure we actually were able to split it
    if (!defined($from) or !defined($to))
    {
	$log->report(MsvLog::NLS("?"), MsvLog::NLS("Invalid attd for %1\$s: %2\$s"), $attd{lc($elem)}{"name"}, $attd{lc($elem)}{"trans"});
	return "";
    }

    $from = quotemeta $from;	# make sure \s are quoted and stuff

    # this is the expression to evaluate
    $tr = "\$str =~ tr/$from/$to/";

    print STDERR "tr = $tr\n" if $debug;

    eval $tr;
    if ($@)
    {
	$log->report(MsvLog::NLS("?"), MsvLog::NLS("Can't eval \"%1\$s\": %2\$s"), $tr, $@);
	return "";
    }

    # return modified result
    $str;
}
