/*
 * Routines for efficient processing of HTTP request headers.  The session
 * control block (SCB) contains a req_atom array that assigns a unique code 
 * in the range 1-127 for each header line in the request array that indicates 
 * the header type (accept, connection, etc).  Atom number zero is reserved 
 * and is used to mark the end of the req_atom array.  Other pre-defined atoms 
 * are:
 *    HDR_ATOM_CONTINUE		Header line is a continuation line.
 *    HDR_ATOM_OTHER		Header label does not match known atoms.
 *
 * When created, each atom is assigned a bit number for use in the
 * req_atom_summary longword in the SCB.  Each atom present in the current
 * header will set corresponding assigned bit in the summary.  A bit may be
 * assigned to more than one atom code, except for bit zero (0) which indicates
 * the summary longword is valid (i.e. req_atom[] has been initialized by
 * a scan_headers operation).
 *
 * String comparisons are always case-insensitive.
 *
 * Author:	David Jones
 * Date:	8-OCT-1996
 * Revised:	10-OCT-1996		Make more robust.
 * Revised:	8-AUG-2000		Add -with_space function.
 */
#include <stdio.h>
#include <stdlib.h>

#include "session.h"
#ifndef LOWER_CASE
#include "ctype_np.h"
#endif

#include "http_header.h"		/* globals and prototypes */

struct anode {
    char *tail;				/* Initialized to "" */
    int code;				/* atom number or 0 */
    short node[128];
};

#define NODE_TABLE_SIZE 300
#define ROOT_NODE 0
static int node_table_used = 0;
static int last_mask_bit = 0;
static int text_available = 0;
static int atoms_used = 3;
static char *free_text;
static struct anode node[NODE_TABLE_SIZE];

static long mask_value[128];

/**************************************************************************/
/*  Initialize http_std_atoms and http_std_amask globals, creating the
 *  atoms via http_create_atom.  The most frequently used atoms
 *  are created first so that they get the non-shared mask bits (<16).
 */
int http_init_standard_atoms()
{
#define CREATE_ATOM(name,mem) http_std_atoms.mem = http_create_atom(name,\
 &http_std_amask.mem );
    CREATE_ATOM ( "accept", accept )
    CREATE_ATOM ( "allow", allow )
    CREATE_ATOM ( "authorization", authorization )
    CREATE_ATOM ( "connection", connection )
    CREATE_ATOM ( "content-length", content_length )
    CREATE_ATOM ( "content-type", content_type )
    CREATE_ATOM ( "from", from )
    CREATE_ATOM ( "host", host )
    CREATE_ATOM ( "if-modified-since", if_modified_since )

    CREATE_ATOM ( "pragma", pragma )
    CREATE_ATOM ( "range", range )
    CREATE_ATOM ( "referer", referer )
    CREATE_ATOM ( "user-agent", user_agent )
    CREATE_ATOM ( "transfer-encoding", transfer_encoding )

    CREATE_ATOM ( "accept-language", accept_language )
    CREATE_ATOM ( "accept-encoding", accept_encoding )

    CREATE_ATOM ( "content-base", content_base )
    CREATE_ATOM ( "content-encoding", content_encoding )
    CREATE_ATOM ( "content-range", content_range )
    CREATE_ATOM ( "content-MD5", content_MD5 )
    CREATE_ATOM ( "content-location", content_location )
    CREATE_ATOM ( "expires", expires )
    CREATE_ATOM ( "last-modified", last_modified )
    CREATE_ATOM ( "title", title )

    return 1;
}
/**************************************************************************/
/* Define internal routines for allocating resource items: nodes, mask bits,
 * and character storage.
 */
static int alloc_node()
{
    int i, ndx;
    ndx = node_table_used;
    node_table_used++;
    if ( node_table_used >= NODE_TABLE_SIZE ) {
	return -1;
    }
    node[ndx].tail = "";
    node[ndx].code = 0;
    for ( i = 0; i < 128; i++ ) node[ndx].node[i] = 0;
    return ndx;
}
static long alloc_mask(int code)
{
    /*
     * Take next bit, wrap back to 16 after the first 32.
     */
    last_mask_bit++;
    if ( last_mask_bit > 31 ) last_mask_bit = 16;
    mask_value[code] = (1 << last_mask_bit);
    return mask_value[code];
}
static char *alloc_text ( int size )
{
    char *new;
    size = (size+3) & (~3);		/* round up */
    if ( size > text_available ) {
	/*
	 * Do independant malloc of requested block for large requests
	 * or requests that would discard more than 25% of our basic block.
	 */
	if ( (size > 256) || (text_available > 64) ) return malloc ( size );
	/*
	 * Allocate new block, unused port of previous block is lost.
	 */
	text_available = 256;
	free_text = malloc ( text_available );
    };
    /*
     * Pull new block from front of current allocation block.
     */
    new = free_text;
    free_text += size;
    text_available = text_available - size;
    return new;
}
/**************************************************************************/
/*
 * do case-blind comparison of strings for equality, s2 assumed to be
 * all lower case.  For s1, a colon is considered a string terminator.
 */
static int string_equal ( char *str1, char *str2 )
{
    int i;
    char *s1, *s2, c1, c2;
    s1 = str1;
    s2 = str2;
    while ( *s1 ) {
	c1 = *s1++;
	c2 = *s2++;
	if ( c1 == c2 ) continue;
	if ( LOWER_CASE(c1) == c2 ) continue;
	if ( c1 == ':' ) return c2 ? 0 : 1;
	return 0;
    }
    return (*s2) ? 0 : 1;		/* strings equal if s2 also at end */
}

static int insert_node ( char c, int nid )
{
    int new_nid;
    char lc, uc;
    new_nid = alloc_node();
    if ( new_nid < 0 ) return -1;
    node[nid].node[c] = new_nid;
    lc = LOWER_CASE(c);
    if ( lc != c ) node[nid].node[lc] = new_nid;
    uc = UPPER_CASE(c);
    if ( uc != c ) node[nid].node[uc] = new_nid;
    return new_nid;
}

/**************************************************************************/
/*
 * Return value is atom number or -1.  Duplicate creates are allowed, returning
 * the existing atom number.
 */
int http_create_atom ( char *label, long *mask )	/* return atom value */
{
    int i, j, nid, code, c;
    char *p, *tail;

    if ( node_table_used == 0 ) {
	alloc_node();	/* initialize */
	alloc_mask(HDR_ATOM_OTHER);
	alloc_mask(HDR_ATOM_CONTINUE);
    }
    for ( nid=ROOT_NODE, i = 0; nid >= 0; i++ ) {
	c = label[i];
	if ( (c < 0) || (c > 127) ) return -1;		/* bad label */
	if ( node[nid].node[c] ) {
	    nid = node[nid].node[c];
	} else if ( node[nid].code ) {
	    /*
	     * See if remainder matches.
	     */
	    char *tail;
	    tail = node[nid].tail;
	    if ( string_equal ( &label[i], tail ) ) {
		/*
		 * Tails match, return existing code.
		 */
		code = node[nid].code;
		*mask = mask_value[code];
		return code;
	    } else if ( !*tail ) {
		/*
		 * Create new node one level deeper, current tail is on
		 * terminal node.
		 */
		nid = insert_node ( label[i], nid );

	    } else {
		int new_nid;
		/*
		 * split, new node gets tail with first character stripped.
		 */
		new_nid = insert_node(tail[0], nid);
		node[new_nid].tail = tail;
		node[new_nid].code = node[nid].code;
		for ( j = 0; tail[j]; j++ ) tail[j] = tail[j+1];
		node[nid].tail = "";
		node[nid].code = 0;
		/*
		 * Backup i to force redo.
		 */
		--i;
	    }
	} else {
	    /*
	     * allocate new atom number and place in current node.
	     */
	    if ( nid < 0 ) return -1;
	    node[nid].tail = alloc_text ( tu_strlen ( &label[i] ) + 1 );
	    tu_strlowcase ( node[nid].tail, &label[i] );
	    node[nid].code = code = atoms_used++;
	    *mask = mask_value[code] = alloc_mask(code);
	    return code;
	}
    }
    *mask = 0;
    return -1;
}
/**************************************************************************/
static int convert_label ( char *label )
{
    int i, j, nid, code, c;
    char *p;

    for ( nid=ROOT_NODE, i = 0; nid >= 0; i++ ) {
	c = label[i];
	if ( (c < 0) || (c > 127) ) return -1;		/* bad label */

	if ( node[nid].node[c] ) {
	    nid = node[nid].node[c];
	} else if ( node[nid].code ) {
	    /*
	     * See if remainder matches.
	     */
	    if ( string_equal ( &label[i], node[nid].tail ) ) {
		return node[nid].code;
	    } else break;
	}
    }
    return -1;
}
/**************************************************************************/
/* Scan request headers and fill in matching fields.
 * return value is number of header lines or -1 if invalid header detected.
 */
int http_summarize_headers ( session_ctx scb )
{
    int i, req_len, j, anum;
    char *req_atom, *p;
    string *request;

    scb->req_summary = 1;		/* bit flags summary valid */
    request = scb->request;
    req_atom = scb->req_atom;
    req_atom[0] = req_atom[1] = req_atom[2] = HDR_ATOM_OTHER;
    /*
     * Step through headers lines until end sentinel (non-positive length).
     */
    for ( i = 3; (req_len = request[i].l) > 0; i++ ) {
	/*
	 * Check for continuation.
	 */
	p = request[i].s;
	if ( isspace(*p) ) {
	    req_atom[i] = HDR_ATOM_CONTINUE;
	    continue;
	}
	/*
	 * Parse out label portion of header line and convert to atom number.
	 */
	for ( anum = j = 0; (j < req_len); j++ ) if ( p[j] != ':' ) {
	    anum = convert_label ( p );
	    break;
	}
	/*
	 * Save atom number in summary record.
	 */
	if ( anum > 0 ) {
	    scb->req_summary |= mask_value[anum];
	    req_atom[i] = anum;
	} else if ( j < req_len ) {
	    /*
	     * atom not found for header, mark as 'other'.
	     */
	    scb->req_summary |= mask_value[HDR_ATOM_OTHER];
	    req_atom[i] = HDR_ATOM_OTHER;
	} else {
	     /*
	      * Invalid header line, no label.
	      */
	    req_atom[i] = '\0';
	    return -1;
	}
    }
    req_atom[i] = '\0';
    return i;
}
/**************************************************************************/
/* Scan request headers for those matching atom.  Header content is
 * concatenated to user's buffer with all whitespace remove.
 *
 * Return value:
 *	-1	error
 *	0	Not present.
 *	>0	Number of lines extracted, info is concatentated.
 */
int http_extract_header ( int atom, session_ctx scb, char *buffer,
	int bufsize, int *length )
{
    int i, status, state, j, opos, line_len, line_count, k;
    char *req_atom, *line, c;
    /*
     * Create summary info if needed and verify atom present in request.
     */
    if ( 0 == (1&scb->req_summary) ) {
	status = http_summarize_headers ( scb );
	if ( status < 0 ) { *length = 0; return -1; }
    }
    if ( (mask_value[atom] & scb->req_summary) == 0 ) {
	*length = 0;
	return 0;
    }
    /*
     * Find the records with requested type and append to buffer.
     */
    req_atom = scb->req_atom;
    opos = line_count = 0;
    for ( i = 3; req_atom[i]; i++ ) for (k = 0; req_atom[i] == atom; k++ ) {
	line_len = scb->request[i+k].l;
	line = scb->request[i+k].s;
	line_count++;
	if ( (k==0) && (opos > 0) ) buffer[opos++] = ',';
	for ( state = j = 0; (state>=0) && (j < line_len); j++ ) {
	    /*
	     * switch
	     */
	    c = line[j];
	    switch ( state ) {
	      case 0:
		if ( c == ':' ) {
		    state = 1;	/* end-of-label */
		    if ( ((line_len-j) + opos) >= bufsize ) state = -1;
		}
	        break;
	      case 1:
		if ( isspace(c) ) break;
		buffer[opos++] = c;
		if ( c == '"' ) state = 2;	/* quoted string */
		break;

	      case 2:
		buffer[opos++] = c;
		if ( c == '"' ) state = 1;
		break;
	    }
	}
	if ( state < 0 ) atom = 0;		/* force exit */
	/*
	 * Append continuation records.
	 */
	if ( req_atom[i+k+1] != HDR_ATOM_CONTINUE ) break;
    }

    *length = opos;
    return line_count;
}
/**************************************************************************/
/* Scan request headers for those matching atom.  Header content is
 * concatenated to user's buffer with all but first whitespace character
 * removed.
 *
 * Return value:
 *	-1	error
 *	0	Not present.
 *	>0	Number of lines extracted, info is concatentated.
 */
int http_extract_header_with_space ( int atom, session_ctx scb, char *buffer,
	int bufsize, int *length )
{
    int i, status, state, j, opos, line_len, line_count, k, ws_state;
    char *req_atom, *line, c;
    /*
     * Create summary info if needed and verify atom present in request.
     */
    if ( 0 == (1&scb->req_summary) ) {
	status = http_summarize_headers ( scb );
	if ( status < 0 ) { *length = 0; return -1; }
    }
    if ( (mask_value[atom] & scb->req_summary) == 0 ) {
	*length = 0;
	return 0;
    }
    /*
     * Find the records with requested type and append to buffer.
     */
    req_atom = scb->req_atom;
    opos = line_count = 0;
    for ( i = 3; req_atom[i]; i++ ) for (k = 0; req_atom[i] == atom; k++ ) {
	line_len = scb->request[i+k].l;
	line = scb->request[i+k].s;
	line_count++;
	if ( (k==0) && (opos > 0) ) buffer[opos++] = ',';
	for ( state = j = 0; (state>=0) && (j < line_len); j++ ) {
	    /*
	     * switch
	     */
	    c = line[j];
	    switch ( state ) {
	      case 0:
		if ( c == ':' ) {
		    ws_state = 1;	/* skip spaces following colon */
		    state = 1;	/* end-of-label */
		    if ( ((line_len-j) + opos) >= bufsize ) state = -1;
		}
	        break;
	      case 1:
		if ( isspace(c) ) {
		    if ( ws_state == 1 ) break;
		    c = ' ';		/* convert to SP char */
		    ws_state = 1;
		} else {
		    ws_state = 0;
		}
		buffer[opos++] = c;
		if ( c == '"' ) state = 2;	/* quoted string */
		break;

	      case 2:
		buffer[opos++] = c;
		if ( c == '"' ) state = 1;
		break;
	    }
	}
	if ( state < 0 ) atom = 0;		/* force exit */
	/*
	 * Append continuation records.
	 */
	if ( req_atom[i+k+1] != HDR_ATOM_CONTINUE ) break;
    }

    *length = opos;
    return line_count;
}
/**************************************************************************/
/* Scan request headers for those matching atom.  Matching Header lines 
 * (including the label) are concatenated to user's buffer with a single 
 * linefeed between them.  Continuation lines are folded into the previous.
 *
 * Return value:
 *	-1	error
 *	0	Not present.
 *	>0	Number of lines extracted, info is concatentated.
 */
int http_extract_header_lines ( int atom, session_ctx scb, char *buffer,
	int bufsize, int *length )
{
    int i, status, state, j, opos, line_len, line_count, k;
    char *req_atom, *line, c;
    /*
     * Create summary info if needed and verify atom present in request.
     */
    if ( 0 == (1&scb->req_summary) ) {
	status = http_summarize_headers ( scb );
	if ( status < 0 ) { *length = 0; return -1; }
    }
    if ( (mask_value[atom] & scb->req_summary) == 0 ) {
	*length = 0;
	return 0;
    }
    /*
     * Find the records with requested type and append to buffer.
     */
    req_atom = scb->req_atom;
    opos = line_count = 0;
    for ( i = 3; req_atom[i]; i++ ) for (k = 0; req_atom[i] == atom; k++ ) {
	line_len = scb->request[i+k].l;
	line = scb->request[i+k].s;
	line_count++;
	if ( (k==0) && (opos > 0) ) buffer[opos++] = '\n';

	if ( opos + line_len < bufsize ) {
	    tu_strncpy ( &buffer[opos], line, line_len  );
	    opos += line_len;
	} else {
	}
	/*
	 * Append continuation records.
	 */
	if ( req_atom[i+k+1] != HDR_ATOM_CONTINUE ) {
	    buffer[opos++] = '\n';
	    break;
	}
    }
    *length = opos;
    return line_count;
}


#ifdef TEST_MODE

void display_trie ( int nid, int level )
{
    int i;
    char indent[100];
    tu_strcpy ( indent, "                                                   ");
    indent[level*3] = '\0';
    if ( node[nid].code ) printf ( "node[%d], atom: %d, tail='%s'\n", 
	nid,node[nid].code,node[nid].tail);
    else printf ( "node[%d]\n", nid);

    for ( i = 0; i < 128; i++ ) if ( node[nid].node[i] ) {
	if ( i >= 'A' && i <= 'Z' ) continue;
	printf ( "%s%c -> ", indent, (isprint(i)) ? i : '.' );
	if ( i >= 'A' && i <= 'Z' ) printf ( "(node[%d])\n", node[nid].node[i]);
	else display_trie ( node[nid].node[i], level+1 );
    }
}

int main ( int argc, char **argv ) {
    int i, anum, LIB$INIT_TIMER(), LIB$SHOW_TIMER();
    long mask;
    char txtbuf[4000];
    http_init_standard_atoms();
    LIB$INIT_TIMER();
    for ( i = 0; i < 1000; i++ ) {
	free_text = txtbuf;
	text_available = sizeof(txtbuf);
	node_table_used = 0;
	atoms_used = 3;
	last_mask_bit = 0;
        http_init_standard_atoms();
    }
    LIB$SHOW_TIMER();
    printf("Nodes used: %d, text used: %d, atoms: %d\n", node_table_used,
	sizeof(txtbuf) - text_available, atoms_used );

    display_trie(0,1);

    for ( i = 1; i < argc; i++ ) {
	if ( argv[i][0] == '+' ) anum = http_create_atom ( &argv[i][1], &mask );
	else {
	    int j;
	    LIB$INIT_TIMER();
	    for(j=0;j<100000;j++) anum = convert_label ( argv[i] );
	    LIB$SHOW_TIMER();
	    mask = (anum > 0) ? mask_value[anum] : 0;
	}
	printf("Atom for '%s' is %d, mask: %x\n", argv[i], anum, mask );
    }
    return 1;
}
#endif
#ifdef TEST_MODE2
printf("label[%d] = '%c', node[%d].tail = %d,'%s', node[%d].node[%d] = %d\n", 
i, c, nid, node[nid].code, node[nid].tail, nid, c, node[nid].node[c] );
#endif
