// Matt Wells, copyright Sep 2001

// . class to parse and form HTTP requests

#ifndef GB_HTTPREQUEST_H
#define GB_HTTPREQUEST_H

// . allow for up to 256 cgi fields
// . this was stopping us from having more than about 253 banned ips, so i
//   raised it to 600
//#define MAX_CGI_PARMS 600
// . new prioirty controls has 128 rows!!
#define MAX_CGI_PARMS 1400

// for getting a file from http server
#define MAX_HTTP_FILENAME_LEN 1024

// i raised this from 1.3k to 5.3k so we can log the full request better
//#define MAX_REQ_LEN (1024*5+300)
//#define MAX_REQ_LEN (8024*5+300)

// keep it small now that we use m_reqBuf
//#define MAX_REQ_LEN (1024)

#include "SafeBuf.h"
class TcpSocket;

#include "GbFormat.h"


class HttpRequest {

 public:

	// . form an HTTP request 
	// . use size 0 for HEAD requests
	// . use size -1 for GET whole doc requests
	// . fill in your own offset/size for partial GET requests
	// . returns false and sets errno on error
	bool set ( char *url , int32_t offset = 0 , int32_t size = -1 ,
		   time_t ifModifiedSince = 0 , const char *userAgent = NULL ,
		   const char *proto = "HTTP/1.0" ,
		   bool doPost = false ,
		   const char *cookieJar = NULL ,
		   const char *additionalHeader = NULL , // does not incl \r\n
		   int32_t postContentLen = -1 , // for content-length of POST
		   int32_t proxyIp = 0 ,
		   const char *proxyUsernamePwdAuth = NULL );

	// use this
	SafeBuf m_reqBuf;
	bool    m_reqBufValid;

	// get the request length
	int32_t getRequestLen() const { return m_reqBuf.length(); }

	// . get the outgoing request we made by calling set() above
	// . OR get the first line of an incoming request
	const char *getRequest() const { 
		if ( m_reqBufValid ) return m_reqBuf.getBufStart();
		else return NULL;
	}

	// FORMAT_HTML FORMAT_JSON FORMAT_XML
	char getFormat() const { return getReplyFormat(); }
	char getReplyFormat() const;
	mutable bool m_replyFormatValid;
	mutable char m_replyFormat;

	// get the referer field of the MIME header
	char *getReferer () { return m_ref; }

	// this is NULL terminated too
	char *getUserAgent () { return m_userAgent; }

	// just does a simply gbmemcpy() operation, since it should be pointing
	// into the TcpSocket's buffer which is safe until after reply is sent
	// . returns false and sets g_errno on error, true otherwise
	bool copy(const HttpRequest *r);

	// . the url being reuqested
	// . removes &code= facebook cruft
	bool getCurrentUrl ( SafeBuf &cu );
	bool getCurrentUrlPath ( SafeBuf &cup );

	// . parse an incoming request
	// . returns false and set errno on error
	// . may alloc mem for m_cgiBuf to hold cgi vars from GET or POST op
	bool set ( char *req , int32_t reqSize , TcpSocket *s );

	// for gigablast's own rendering of squid
	bool m_isSquidProxyRequest;
	char *m_squidProxiedUrl;
	int32_t m_squidProxiedUrlLen;

	// is it this type of request?
	bool isGETRequest  () const { return (m_requestType == 0); }
	bool isHEADRequest () const { return (m_requestType == 1); }
	bool isPOSTRequest () const { return (m_requestType == 2); }

	const char *getFilename () const { return m_filename; }
	int32_t  getFilenameLen () const { return m_filenameLen; }
	int32_t  getFileOffset  () const { return m_fileOffset; }
	int32_t  getFileSize    () const { return m_fileSize; }

	const char *getOrigUrlRequest() const { return m_origUrlRequest; }
	int32_t getOrigUrlRequestLen() const { return m_origUrlRequestLen; }

	const char *getHost     () const { return m_host;    }
	int32_t  getHostLen     () const { return m_hostLen; }
	bool  isLocal        () const { return m_isLocal; }


	// . the &ucontent= cgi var does not get its value decoded
	//   because it's already decoded
	// . this is so Mark doesn't have to url encode his injected content
	const char *getUnencodedContent() const { return m_ucontent; }
	int32_t  getUnencodedContentLen() const { return m_ucontentLen; }
	
	// . for parsing the terms in a cgi url
	// . the returned string is NOT NULL terminated
	const char *getString  ( const char *field, int32_t *len = NULL,
				 const char *defaultString = NULL , int32_t *next=NULL) const;
	bool       getBool     ( const char *field, bool defaultBool ) const;
	int32_t    getLong     ( const char *field, int32_t defaultLong ) const;
	int64_t    getLongLong ( const char *field, int64_t defaultLongLong ) const;
	float      getFloat    ( const char *field, double defaultFloat ) const;
	double     getDouble   ( const char *field, double defaultDouble ) const;

	float       getFloatFromCookie   ( const char *field, float def ) const;
	int32_t     getLongFromCookie    ( const char *field, int32_t def ) const;
	int64_t     getLongLongFromCookie( const char *field, int64_t def ) const;
	bool        getBoolFromCookie    ( const char *field, bool def ) const;
	const char *getStringFromCookie  ( const char *field, int32_t *len = NULL,
					  const char *defaultString = NULL ,
					  int32_t *next=NULL) const;
	

	bool hasField ( const char *field ) const;

	// are we a redir? if so return non-NULL
	const char *getRedir() const { return m_redir;    }
	int32_t     getRedirLen() const { return m_redirLen; }

	HttpRequest();
	HttpRequest( const HttpRequest &a );
	~HttpRequest();
	void reset();

	const char *getPath() const { return m_path; }
	int32_t  getPathLen() const { return m_plen; }

	// . get value of cgi "field" term in the requested filename
	// . you know GET /myfile.html?q=123&name=nathaniel
	const char *getValue ( const char *field , int32_t *len=NULL, int32_t *next=NULL) const;

	// get value of the ith field
	const char *getValue ( int32_t i, int32_t *len = NULL) const;

	// get the ith cgi parameter name, return NULL if none
	int32_t  getNumFields( ) const { return m_numFields; }
	const char *getField( int32_t i ) const {
		if ( i >= m_numFields ) return NULL;
		return m_fields[i];
	}
	int32_t  getFieldLen ( int32_t i ) const {
		if ( i >= m_numFields ) return 0;
		return m_fieldLens[i];
	}

private:
	// . s is a cgi string
	// . either the stuff after the '?' in a url
	// . or the content in a POST operation
	// . returns false and sets errno on error
	bool addCgi ( char *s , int32_t slen );

	// . parse cgi field terms into m_fields,m_fieldLens,m_fieldValues
	// . "s" should point to cgi string right after the '?' if it exists
	// . s should have had all it's &'s replaced with /0's
	// . slen should include the last \0
	void parseFields ( char *s , int32_t slen ) ;
	void parseFieldsMultipart ( char *s , int32_t slen ) ;

	// 0 for GET, 1 for HEAD
	char  m_requestType;

	// we decode the filename into this buffer (no cgi)
	char  m_filename[MAX_HTTP_FILENAME_LEN];
	int32_t  m_filenameLen;  // excludes ?cgistuff

	// if request is like "GET /poo?foo=bar"
	// then origUrlRequest is "/poo?foo=bar"
	// references into TcpSocket::m_readBuf
	char *m_origUrlRequest;
	int32_t  m_origUrlRequestLen;

	// virtual host in the Host: field of the mime
	char  m_host[256];
	int32_t  m_hostLen;

	// are we coming from a local machine? 
	bool  m_isLocal;
	
	// . decoded cgi data stored here 
	// . this just points into TcpSocket::m_readBuf
	// . now it points into m_reqBuf.m_buf[]
	char *m_cgiBuf       ;
	int32_t  m_cgiBufLen    ;
	int32_t  m_cgiBufMaxLen ;

	// partial GET file read info
	int32_t  m_fileOffset;
	int32_t  m_fileSize;

	// . cgi field term info stored in here
	// . set by parseFields()
	char *m_fields      [ MAX_CGI_PARMS ];
	int32_t  m_fieldLens   [ MAX_CGI_PARMS ];
	char *m_fieldValues [ MAX_CGI_PARMS ];
	int32_t  m_numFields;

	int32_t m_userIP;
	bool m_isSSL;

	// . ptr to the thing we're getting in the request
	// . used by PageAddUrl4.cpp
	char *m_path;
	int32_t  m_plen;

	char  m_redir[128];
	int32_t  m_redirLen;

	// referer, NULL terminated, from Referer: field in MIME
	char  m_ref [ 256 ];
	int32_t  m_refLen;

	// NULL terminated User-Agent: field in MIME
	char  m_userAgent[128];

	// this points into m_cgiBuf
	char *m_ucontent;
	int32_t  m_ucontentLen;

	char *m_cookiePtr;
	int32_t  m_cookieLen;

	// buffer for adding extra parms
	char *m_cgiBuf2;
	int32_t  m_cgiBuf2Size;
};

const int HTTP_REQUEST_DEFAULT_REQUEST_VERSION = 2;

int getVersionFromRequest ( HttpRequest *r );

#endif // GB_HTTPREQUEST_H