/**
 * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to
 * license terms. Copyright  2001 Sun Microsystems, Inc.
 * Some preexisting portions Copyright  2001 Netscape Communications Corp.
 * All rights reserved.
 */
/*
 * cache.h: Proxy Caching
 *
 * Uses RSA Data Security, Inc. MD5 Message-Digest Algorithm
 * to hash URLs to cache file names.
 *
 *
 * Ari Luotonen
 * Copyright (c) 1995, 1996 Netscape Communcations Corporation
 *
 */

#ifndef NS_PROXY_CACHE_H
#define NS_PROXY_CACHE_H

#include "base/file.h"
#include "base/net.h"
#include "base/buffer.h"
#include "frame/req.h"
#include "frame/func.h"
#include "libproxy/util.h"
#include "libproxy/cpart.h"
#include "libproxy/csect.h"


#define CACHE_DIGEST_LEN	16	/* The length of MD5 signature */
#define CACHE_NAME_LEN		16	/* Enough for 70 million URLs */
#define CACHE_LOCKNAME_LEN	20	/* 16 + ".lck" */


/* Number of directories under each cache section */

#define CACHE_SECOND_LEVEL_DIRS	64


/*
 * Max size to mmap() in one go.
 *
 */
#define CACHE_DEFLT_MAX_MMAP_SIZE	262144		/* 256K */


/*
 * Max length of tokes read by util_get_string_from_file() and
 * written by util_put_string_to_file() which are used for
 * persistant storage of state.
 *
 */

#define CACHE_MAX_TOKEN_LEN	256


#define CIF_MAX_ENTRY_LEN	256
#define CIF_MAX_URL_LEN		2048
#define CIF_MAX_CT_LEN		100
#define CIF_MAX_ENTRIES		2000


/*
 * Rest of the macros undocumented.
 * Internal implementation details, subject to change.
 * Do not use.
 *
 */

/* CPS == Cache Partition/Section */
#define CPS_FILENAME_MAXLEN	32	/* Max length of the following names */
#define CPS_CONF_DIR_NAME	".cache-data"
#define CPS_TYPE_FILE_NAME	".cache-data/.type"
#define CPS_CONF_FILE_NAME	".cache-data/.conf"
#define CPS_STATUS_FILE_NAME	".cache-data/.status"
#define CPS_SIZE_FILE_NAME	".cache-data/.size"
#define CPS_LOCK_FILE_NAME	".cache-data/.lock"
#define CPS_DIM_FILE_NAME	".cache-data/.dim"
#define CPS_SECTS_FILE_NAME	".cache-data/.sects"

#define CACHE_TOKEN_PARTITION	"cache-proxy-cache-partition"
#define CACHE_TOKEN_SECTION	"cache-proxy-cache-section"
#define CACHE_TOKEN_OPEN	"OPEN"
#define CACHE_TOKEN_CLOSED	"CLOSED"
#define CACHE_TOKEN_INITIAL	"INITIAL"

#define CACHE_LOCK_SUFFIX	".lck"
#define CACHE_LOCK_SUFFIX_LEN    4		/* in chars */
#define CACHE_OLD_SUFFIX	".old"

#define CACHE_NDIRS_FN		"ndirs"

#define CACHE_LM_FACTOR_DEFAULT		((float)0)
#define CACHE_MAX_UNCHECK_DEFAULT	((time_t)0)
#define CACHE_TERM_PERCENT_DEFAULT	25
#define CACHE_MIN_K_DEFAULT		0		/*   0 KB */
#define CACHE_MAX_K_DEFAULT		100000		/* 100 MB */

#define MAX_BUF_SIZE      4096

#define MAX_SYNC_LINELEN        1000

extern  int NOTIFY_NUM_CHANGES;
#define DEF_NOTIFY_NUM_CHANGES    10
#define MIN_NOTIFY_NUM_CHANGES     1
#define MAX_NOTIFY_NUM_CHANGES   500


extern  int NOTIFY_BLK_LIMIT;
#define DEF_NOTIFY_BLK_LIMIT     100
#define MIN_NOTIFY_BLK_LIMIT       1
#define MAX_NOTIFY_BLK_LIMIT   10000

extern  int CACHE_FS_FULL_RETRY_AFTER;
#define DEF_CACHE_FS_FULL_RETRY_AFTER     50
#define MIN_CACHE_FS_FULL_RETRY_AFTER      1
#define MAX_CACHE_FS_FULL_RETRY_AFTER   1024


extern  int GC_HI_MARGIN_PERCENT;		/* trigger gc when this limit reached */
#define DEF_GC_HI_MARGIN_PERCENT          80	/* trigger gc when this limit reached */
#define MIN_GC_HI_MARGIN_PERCENT          10	/* trigger gc when this limit reached */
#define MAX_GC_HI_MARGIN_PERCENT         100	/* trigger gc when this limit reached */


extern  int GC_LO_MARGIN_PERCENT;		/* target size after gc triggered */
#define DEF_GC_LO_MARGIN_PERCENT          70	/* target size after gc triggered */
#define MIN_GC_LO_MARGIN_PERCENT           5	/* target size after gc triggered */
#define MAX_GC_LO_MARGIN_PERCENT         100	/* target size after gc triggered */


extern  int GC_EXTRA_MARGIN_PERCENT;		/* gc this much extra */
#define DEF_GC_EXTRA_MARGIN_PERCENT       30	/* gc this much extra */
#define MIN_GC_EXTRA_MARGIN_PERCENT        0	/* gc this much extra */
#define MAX_GC_EXTRA_MARGIN_PERCENT      100	/* gc this much extra */


extern  int GC_LEAVE_FS_FULL_PERCENT;		/* if fs full, don't go below 60% */
#define DEF_GC_LEAVE_FS_FULL_PERCENT      60	/* if fs full, don't go below 60% */
#define MIN_GC_LEAVE_FS_FULL_PERCENT       0	/* if fs full, don't go below 60% */
#define MAX_GC_LEAVE_FS_FULL_PERCENT     100	/* if fs full, don't go below 60% */

#define MAX_PATHNAME_LEN 2000


/*
 * Network connection mode -- normal, connect only when
 * there's no cache, or never.
 */
typedef enum {
    CM_NORMAL = 0,	/* normal -- retrieve/refresh when necessary */
    CM_FAST_DEMO,	/* fast -- retrieve only if not in cache already */
    CM_NEVER		/* never -- never connect to network */
} ConnectMode;


/*
 * State of a cache file.
 * See documentation for ce_lookup() for more specific description.
 *
 */
typedef enum {
    CACHE_EXISTS_NOT = 0,	/* Internal flag -- do not use! */
    CACHE_EXISTS,		/* Internal flag -- do not use! */
    CACHE_NO,			/* No caching: don't read, don't write cache */
    CACHE_CREATE,		/* Create cache; don't read */
    CACHE_REFRESH,		/* Refresh cache; read if not modified */
    CACHE_RETURN_FROM_CACHE,	/* Return directly, no check */
    CACHE_RETURN_ERROR		/* With connect-mode=never when not in cache */
} CacheState;


/*
 * Data structure to hold all the information about one cache entry.
 * This structure is created by ce_lookup() and destroyed by ce_free().
 *
 *
 */
typedef struct _CacheEntry {

    CacheState  state;	/* state of the cache file; DO NOT refer to any
			 * of the other fields in this C struct if state
			 * is other than
			 *	CACHE_REFRESH or
			 *	CACHE_RETURN_FROM_CACHE
			 */

    PRFileDesc *    fd_in;	/* do not use: open cache file for reading */
    PRFileDesc *    fd_out;	/* do not use: open (locked) cache file for writing */
    PRFileMap  * mapfile;
    struct stat finfo;	/* stat info for the cache file */

    int		only_partially_mmapped;
    filebuffer *buf;	/* mmapped buffer for the cache file */

    unsigned char  digest[CACHE_DIGEST_LEN];	/* MD5 for the URL */
    char *      url_dig;	/* URL used to for digest; field #8 in CIF */
    char *	url_cif;	/* URL read from CIF file */
    char *	filname;	/* Relative cache file name */
    char *	dirname;	/* Absolute cache directory name */
    char *	absname;	/* Absolute cache file path */
    char *	lckname;	/* Absolute locked cache file path */
    int		sect_idx;	/* Cache section index */
    int		part_idx;	/* Cache partition index */
    CSect *	section;	/* Cache section that this file belongs to */
    CPart *	partition;	/* Cache partition that this file belongs to */

    int         xfer_time;	/* secs */		/* Field #2 in CIF */
    time_t      last_modified;	/* GMT */		/* Field #3 in CIF */
    time_t      expires;	/* GMT */		/* Field #4 in CIF */
    time_t      last_checked;	/* GMT */		/* Field #5 in CIF */
    long        content_length;				/* Field #6 in CIF */
    char *      content_type;				/* Field #7 in CIF */

    int		is_auth;	/* Authenticated data -- always do recheck */
    int		auth_sent;	/* Client did send the Authorization header */
    long	min_size;	/* Min size for a cache file (in KB) */
    long	max_size;	/* Max size for a cache file (in KB) */

    time_t      last_accessed;	/* GMT for proxy, local for gc */
    int		removed;	/* gc only; file was removed from disk */
    size_t	cf_tot_size;	/* total cache file size */
    size_t	cf_meta_size;	/* cache meta data entry size ("CIF entry") */
    size_t	cf_hdr_size;	/* cached HTTP header size */
    long	bytes_written;	/* Number of bytes written to disk */

    int		category;	/* Value category; bigger is better */
    int		cif_entry_ok;	/* CIF entry found and ok */
    time_t      ims_c;		/* GMT; Client -> proxy if-modified-since */
    time_t	start_time;	/* Transfer start time */
    int         inhibit_caching;/* Bad expires/other reason not to cache */
    int         corrupt_cache_file;	/* Cache file gone corrupt => remove */
    int		write_aborted;	/* True if the cache file write was aborted */
    int         batch_update;	/* We're doing batch update (no real user) */

    char *	cache_exclude;	/* Hdrs not to write to cache (RE) */
    char *	cache_replace;	/* Hdrs to replace with fresh ones from 304 response (RE) */
    char *	cache_nomerge;	/* Hdrs not to merge with the cached ones (RE) */

    PRLock     *lock;
    Session *   sn;
    Request *   rq;
} CacheEntry;


/* Global configuration settings currently in effect */


/* Cache on/off */
NSAPI_PUBLIC extern int cache_on;


/* Top-level cache directories (sections) */
NSAPI_PUBLIC extern int	cache_ndirs;	/* 1, 2, 4, 8, 16, 32, 64, 128, 256 */


/* cache "working directory" -- for misc cache maintenance files */
NSAPI_PUBLIC extern char * cache_wd;


/* Undocumented -- do not use */
extern int	     cache_dim;	/* 0, 1, 2, 3,  4,  5,  6,   7, or   8 */
extern ConnectMode   cache_connect;
extern long          cache_lock_timeout;
extern time_t        cache_cover_errors; /*Return from cache when remote is  */
					 /*unavailable if not older than this*/
extern int           cache_jump_ship_percent;
extern char          cache_exclude_default[];
extern char *        cache_exclude;	/* Don't cache these headers */
extern char          cache_replace_default[];
extern char *        cache_replace;	/* Replace these 304 headers */
extern char          cache_nomerge_default[];
extern char *        cache_nomerge;	/* Don't merge these 304 headers */


/* Max mmap()'ed size */
extern size_t cache_max_mmap_size;


/*
 * Tuning use of mmap() -- for broken HPUX mmap().
 *
 */

/*
 * cache_use_mmap_wr == 1
 *	=> mmap() will be used to mmap the first 256 bytes of a newly written
 *	   cache file to write the actual CIF entry.
 * cache_use_mmap_wr == 0 -- default
 *	=> will use instead lseek()+write()
 *
 */
extern int cache_use_mmap_wr;
#define    CACHE_USE_MMAP_WR_DEFLT	0


/*
 * cache_use_mmap_rdwr == 1 -- default for non-solaris platform
 *	=> mmap() will be used to read the CIF entry and send the cached
 *	   data, and to update the CIF entry
 * cache_use_mmap_rdwr == 0 -- default for solaris platform
 *	=> read() will be used to read the CIF entry and send the cached
 *	   data; lseek()+write() will be used to update the CIF entry
 *
 *
 * During testing it turned out that the HP-UX kernel bug with mmap()
 * only affects initial CIF entry writes, not subsequent updates of
 * an existing cache file's CIF entry.  Therefore, HP-UX default for
 * mmap() updates in SURF-Pack 2 will be 'on' just like on the other
 * platforms.
 *
 */
extern int cache_use_mmap_rdwr;
#ifdef SOLARIS
#define    CACHE_USE_MMAP_RDWR_DEFLT	0
#else
#define    CACHE_USE_MMAP_RDWR_DEFLT	1
#endif


/*
 * Functions
 *
 */


/*
 * Calculates the MD5 signature of the given URL, and stores is to
 * the digest variable.
 *
 *
 */
NSPR_BEGIN_EXTERN_C
NSAPI_PUBLIC void cache_digest(char *url, unsigned char digest[16]);


/*
 * Cache Entry Lookup -- look up a cache entry for the given URL.
 *
 *   sn     current Session *
 *   rq     current Request *
 *   url    URL of the document for which the cache is being looked up for
 *   ims_c  If-modified-since time
 *
 * If caching is not enabled, returns NULL.
 *
 * Otherwise, a newly allocated struct CacheEntry * is returned which
 * must be free'd by the caller by calling ce_free().
 *
 * The CacheEntry structure is returned regardless of wheather or not
 * a cached copy exists in the cache.
 *
 * The existence of it can be determined by looking at the field
 * ce->state:
 *
 *	CACHE_NO
 *		- the document is not and will not be cached;
 *		  other fields in the cache structure may be NULL
 *
 *	CACHE_CREATE
 *		- cache file doesn't exist, but may be created
 *		  once the remote server is contacted.
 *		  However, during the retrieval it may turn out that
 *		  the document is not cacheable.
 *
 *	CACHE_REFRESH
 *		- cache file exists, but it needs to be refreshed
 *		  (an up-to-date check must be made) before it's
 *		  used; note that the data may still be up-to-date,
 *		  but the remote server needs to be contacted to
 *		  find that out.  If not, the cache file will be
 *		  replaced with the new document version sent by
 *		  the remote origin server.
 *
 *	CACHE_RETURN_FROM_CACHE
 *		- cache file exists and is up-to-date based on the
 *		  configuration and current parameters controlling
 *		  what is considered fresh.
 *
 *	CACHE_RETURN_ERROR
 *		- only happens if the proxy is set to no-network mode
 *		  (connect-mode=never), and the document does not exist
 *		  in the cache.
 *
 */
NSAPI_PUBLIC CacheEntry *ce_lookup(Session *sn, Request *rq,
				   char *url, time_t ims_c);


/*
 * ce_free() releases all the memory taken up by the cache entry
 * and its related memory structures.
 *
 *
 */
NSAPI_PUBLIC void ce_free(CacheEntry *ce);
NSPR_END_EXTERN_C



/* -------------------- End of public CacheEntry API --------------------- */


/* REST OF THE FUNCTIONS ARE UNDOCUMENTED -- DO NOT USE */
int cache_get_term_percent(pblock *vars);
time_t cache_get_cover_errors(pblock *vars);
void cache_abort(CacheEntry *ce);
void cache_done(CacheEntry *ce);
void cache_not_modified(CacheEntry *ce);
void cache_clear_fi(CacheEntry *ce);
long return_from_cache(CacheEntry *ce,
		       SYS_NETFD csd,
		       Request *rq,
		       int warn_code,
		       char *warn_text,
		       pblock *resp_304_hdrs);
int cache_should_cache(Request *rq, char *method, char *url);
void calog_add(CacheEntry *ce, char *host, char *pauth_user);
char *calog_lookup_and_clear(CacheEntry *ce, char *rep_http);
int cache_names(CacheEntry *ce);
NSAPI_PUBLIC void cache_set_ndirs(int ndirs);
void cache_notify_change(int sect_idx, long bytes);

NSPR_BEGIN_EXTERN_C

NSAPI_PUBLIC int dry_cache_init(pblock *pb);

NSPR_END_EXTERN_C


/*
 * Internal.
 *
 * Get max and min sizes allowed for the cache file for the current
 * request.
 *
 * Note: Not to be called before all ObjectType function are called!
 *
 * vars is rq->vars.
 *
 * Returns file size in kilobytes.
 *
 */
long cache_get_max_size(pblock *vars);
long cache_get_min_size(pblock *vars);


/*
 * Internal.
 *
 * Get max allowed time since the last up-to-date check.
 *
 * Note: Not to be called before all ObjectType function are called!
 *
 * vars is rq->vars.
 *
 * Return value in seconds.
 *
 */
time_t cache_get_max_uncheck(pblock *vars);


/*
 * Internal.
 *
 * Get the LM factor to be used for the current request when evaluating
 * the cache file.
 *
 * Note: Not to be called before all ObjectType function are called!
 *
 * vars is rq->vars.
 *
 * Return value will be multiplied by the time the document had been
 * un-modified when it was last retrieved/refreshed.  The result is the
 * TTL (time-to-live) starting from the last retrieve/refresh time.
 *
 */
float cache_get_lm_factor(pblock *vars);


/*
 * Internal.
 *
 * Get the network mode (normal, fast-demo, no-network) for the
 * current request.
 *
 * vars is rq->vars.
 *
 */
ConnectMode cache_get_connect_mode(pblock *vars);

void parse_srvhdrs(char *buf, pblock *hdrs, pblock *resp_304_hdrs);



#endif

