/*
 *  Copyright (c) 2000 Sun Microsystems.  All rights reserved.
 *  All rights reserved.
 * 
 *  Use of this software is governed by the terms of the license agreement 
 *  for the Sun ONE Compass Server between the parties.
 */


/* ------------------------------------------------------------------------ */


/*
 * filterrules.h - Improved filtering support for Compass
 *
 * Copyright 2003 Sun Microsystems, Inc. All
 * rights reserved. Use of this product is subject
 * to license terms. Federal Acquisitions:
 * Commercial Software -- Government Users
 * Subject to Standard License Terms and
 * Conditions.
 *
 * Sun, Sun Microsystems, the Sun logo, and Sun ONE
 * are trademarks or registered trademarks of Sun Microsystems,
 * Inc. in the United States and other countries.
 */
#ifndef _ROBOT_FILTERRULES_H_
#define _ROBOT_FILTERRULES_H_

typedef struct _fr_filter_s FR_Filter;
typedef struct _fr_rule_s FR_Rule;
typedef struct _fr_starting_point_s FR_StartingPoint;
typedef struct _fr_ruleset_s FR_Ruleset;
typedef struct _fr_manager_s FR_Manager;
typedef struct _fr_action_map_s FR_ActionMap;

#include "libcs/cs.h"		/* Requires AlfredLibcs */
#include "robotapi.h"		/* Requires RobotAPI */


/* FR_FilterType contains the supported filter types */
typedef enum _fr_filtertype_e {
    FR_UNKNOWN = 0,		/* "undefined" */
    FR_PREFIX,			/* "starts with" */
    FR_SUFFIX,			/* "ends with" */
    FR_EXACT,			/* "is" */
    FR_SUBSTR,			/* "contains" */
    FR_REGEX			/* "matches" */
} FR_FilterType;

typedef enum _fr_rulesettype_e {
    FR_SERVER = 1, FR_SERVERS, FR_DOMAINS
} FR_RulesetType;

typedef enum _fr_action_e {
    FR_UNDECIDED = 0, FR_ALLOW, FR_DENY
} FR_Action;

typedef enum _fr_mgr_logtype_e {
    FR_LOG_NORMAL = 0,		/* "normal" */
    FR_LOG_TERSE,		/* "terse" */
    FR_LOG_VERBOSE		/* "verbose" */
} FR_LogType;

typedef enum _fr_protocols_e {	/* safe for bitwise */
    FR_HTTP 	= 0x01,
    FR_HTTPS 	= 0x02,
    FR_FILE 	= 0x04,
    FR_FTP 	= 0x08,
    FR_NNTP 	= 0x10,
    FR_GOPHER 	= 0x20,
    FR_UNKNOWN_PROTOCOL = 0x40
} FR_Protocol;

#define FR_URL2Protocol(_x)	\
((!LIBCS_strncasecmp((_x), "https", 5)) ? FR_HTTPS : /* FIRST */ \
 (!LIBCS_strncasecmp((_x), "http", 4))  ? FR_HTTP : \
 (!LIBCS_strncasecmp((_x), "file", 4)) 	? FR_FILE : \
 (!LIBCS_strncasecmp((_x), "ftp", 3)) 	? FR_FTP : \
 (!LIBCS_strncasecmp((_x), "gopher", 6))? FR_GOPHER : \
 (!LIBCS_strncasecmp((_x), "nntp", 4)) 	? FR_NNTP : \
 (!LIBCS_strncasecmp((_x), "news", 4)) 	? FR_NNTP :  \
 FR_UNKNOWN_PROTOCOL)

#define FR_FilterType2Str(_x)	\
(((_x) == FR_UNKNOWN) ?	"unknown" : \
 ((_x) == FR_PREFIX) ?	"by-prefix" : \
 ((_x) == FR_SUFFIX) ?	"by-suffix" : \
 ((_x) == FR_EXACT) ?	"by-exact" : \
 ((_x) == FR_SUBSTR) ?	"by-substr" : \
 ((_x) == FR_REGEX) ?	"by-regex" : "invalid")

#define  FR_RulesetType2Str(_x)	\
((_x) == FR_SERVERS ? "servers" : "domains")

#define FR_Action2Str(_x)	\
((_x) == FR_ALLOW ? "allow" : \
 (_x) == FR_DENY  ? "deny"  : "undecided")

struct _fr_filter_s {
    char *nickname;		/* end-user-readable for logging purposes */
    char *src;			/* Robot source for matching */
    FR_FilterType type;		/* Type of match */
    char *value;		/* value for matching */
    void *regex;		/* compiled re for FR_REGEX filters */
    int seqno;			/* sequence number for debugging */
    int lineno;			/* line number for debugging */
};

struct _fr_rule_s {
    boolean_t enable;		/* is enabled? */
    int id;			/* unique rule id */
    char *nickname;		/* end-user-readable for logging purposes */
    CSList *filters;		/* ordered list of filters */
    int lineno;			/* line number for debugging */
};

struct _fr_starting_point_s {
    char *url;			/* the url */
    int depth;			/* crawl depth from this starting point */
};

struct _fr_ruleset_s {
    boolean_t enable;		/* is enabled? */
    int id;			/* unique ruleset id */
    char *nickname;		/* end-user-readable for logging purposes */
    FR_RulesetType type;	/* Domain or servers? */
    FR_Action default_act;	/* default action */
    FR_Action applied_act;	/* applied action */
    char **domains;		/* NULL-terminated array of domains */
    char **servers;		/* NULL-terminated array of servers */
    char **protocols;		/* NULL-terminated array of protocols */
    int *ports;			/* NULL-terminated array of ports */
    CSList *rules;		/* ordered list of rules */
    CSList *starting_points;	/* ordered list of starting points/depth */
    FR_ActionMap *amap;		/* overrides applied_act */
    int amapsz;			/* size of amap */
    int amapidx;		/* index into amap */
    int lineno;			/* line number for debugging */
    char * charset;		/* site's default charset */
    char * database;		/* site's target database */
};

struct _fr_action_map_s {
    int id; 			/* unique rule id */
    boolean_t valid;		/* is this record valid? */
    FR_Action action;		/* which override action to take */
};

struct _fr_manager_s {
    char *cffn;			/* where is filterrules.conf? */
    FR_LogType logtype;		/* how to display reasons for rejection? */ 
    CSHashTable *domains;	/* unordered set of rulesets for domains */
    CSHashTable *servers;	/* unordered set of rulesets for servers */
    CSHashTable *rules;		/* unordered set of rules */
    unsigned int protocols_allowed;	/* Which FR_Protocol's? */
};

/* FR_Manager primitives */
NSAPI_PUBLIC FR_Manager *FR_Manager_Parse(const char *filterrules_cf);
NSAPI_PUBLIC int FR_Manager_Free(FR_Manager *);

/* FR_Ruleset primitives */
NSAPI_PUBLIC FR_Ruleset *FR_Ruleset_Create(int id, char *nickname);
NSAPI_PUBLIC int FR_Ruleset_Free(FR_Ruleset *);

/* FR_Rule primitives */
NSAPI_PUBLIC FR_Rule *FR_Rule_Create(int id, char *nickname);
NSAPI_PUBLIC int FR_Rule_Free(FR_Rule *);

/* FR_StartingPoint primitives */
NSAPI_PUBLIC FR_StartingPoint *FR_StartingPoint_Create(const char *url, int depth);
NSAPI_PUBLIC int FR_StartingPoint_Free(FR_StartingPoint *);

/* FR_Filter primitives */
NSAPI_PUBLIC FR_Filter *FR_Filter_Parse(char *str);
NSAPI_PUBLIC int FR_Filter_Free(FR_Filter *);

/* hooks into RobotAPI */
NSAPI_PUBLIC int FR_Execute(libcs_pblock *, CSFilter *, CSResource *);
NSAPI_PUBLIC int FR_Setup(libcs_pblock *, CSFilter *, CSResource *);
NSAPI_PUBLIC int FR_Shutdown(libcs_pblock *, CSFilter *, CSResource *);

/* Returns B_TRUE if the URL is not allowed based on its protocol alone */
NSAPI_PUBLIC boolean_t FR_DenyURLByProtocol(FR_Manager *mgr, char *url);

#endif /* _ROBOT_FILTERRULES_H_ */
