privacore-open-source-searc.../HttpRequest.h
Ivan Skytte Jørgensen beeddcf35d Got rid of gb-include.h
2018-07-26 17:29:51 +02:00

260 lines
8.0 KiB
C++

// Matt Wells, copyright Sep 2001
// . class to parse and form HTTP requests
#ifndef GB_HTTPREQUEST_H
#define GB_HTTPREQUEST_H
// . allow for up to 256 cgi fields
// . this was stopping us from having more than about 253 banned ips, so i
// raised it to 600
//#define MAX_CGI_PARMS 600
// . new prioirty controls has 128 rows!!
#define MAX_CGI_PARMS 1400
// for getting a file from http server
#define MAX_HTTP_FILENAME_LEN 1024
// i raised this from 1.3k to 5.3k so we can log the full request better
//#define MAX_REQ_LEN (1024*5+300)
//#define MAX_REQ_LEN (8024*5+300)
// keep it small now that we use m_reqBuf
//#define MAX_REQ_LEN (1024)
#include "SafeBuf.h"
class TcpSocket;
#include "GbFormat.h"
#include <time.h>
class HttpRequest {
public:
// . form an HTTP request
// . use size 0 for HEAD requests
// . use size -1 for GET whole doc requests
// . fill in your own offset/size for partial GET requests
// . returns false and sets errno on error
bool set ( char *url , int32_t offset = 0 , int32_t size = -1 ,
time_t ifModifiedSince = 0 , const char *userAgent = NULL ,
const char *proto = "HTTP/1.0" ,
bool doPost = false ,
const char *cookieJar = NULL ,
const char *additionalHeader = NULL , // does not incl \r\n
int32_t postContentLen = -1 , // for content-length of POST
int32_t proxyIp = 0 ,
const char *proxyUsernamePwdAuth = NULL );
// use this
SafeBuf m_reqBuf;
bool m_reqBufValid;
// get the request length
int32_t getRequestLen() const { return m_reqBuf.length(); }
// . get the outgoing request we made by calling set() above
// . OR get the first line of an incoming request
const char *getRequest() const {
if ( m_reqBufValid ) return m_reqBuf.getBufStart();
else return NULL;
}
// FORMAT_HTML FORMAT_JSON FORMAT_XML
char getFormat() const { return getReplyFormat(); }
char getReplyFormat() const;
mutable bool m_replyFormatValid;
mutable char m_replyFormat;
// get the referer field of the MIME header
char *getReferer () { return m_ref; }
// this is NULL terminated too
char *getUserAgent () { return m_userAgent; }
// just does a simply gbmemcpy() operation, since it should be pointing
// into the TcpSocket's buffer which is safe until after reply is sent
// . returns false and sets g_errno on error, true otherwise
bool copy(const HttpRequest *r);
// . the url being reuqested
// . removes &code= facebook cruft
bool getCurrentUrl ( SafeBuf &cu );
bool getCurrentUrlPath ( SafeBuf &cup );
// . parse an incoming request
// . returns false and set errno on error
// . may alloc mem for m_cgiBuf to hold cgi vars from GET or POST op
bool set ( char *req , int32_t reqSize , TcpSocket *s );
// for gigablast's own rendering of squid
bool m_isSquidProxyRequest;
char *m_squidProxiedUrl;
int32_t m_squidProxiedUrlLen;
// is it this type of request?
bool isGETRequest () const { return (m_requestType == 0); }
bool isHEADRequest () const { return (m_requestType == 1); }
bool isPOSTRequest () const { return (m_requestType == 2); }
const char *getFilename () const { return m_filename; }
int32_t getFilenameLen () const { return m_filenameLen; }
int32_t getFileOffset () const { return m_fileOffset; }
int32_t getFileSize () const { return m_fileSize; }
const char *getOrigUrlRequest() const { return m_origUrlRequest; }
int32_t getOrigUrlRequestLen() const { return m_origUrlRequestLen; }
const char *getHost () const { return m_host; }
int32_t getHostLen () const { return m_hostLen; }
bool isLocal () const { return m_isLocal; }
// . the &ucontent= cgi var does not get its value decoded
// because it's already decoded
// . this is so Mark doesn't have to url encode his injected content
const char *getUnencodedContent() const { return m_ucontent; }
int32_t getUnencodedContentLen() const { return m_ucontentLen; }
// . for parsing the terms in a cgi url
// . the returned string is NOT NULL terminated
const char *getString ( const char *field, int32_t *len = NULL,
const char *defaultString = NULL , int32_t *next=NULL) const;
bool getBool ( const char *field, bool defaultBool ) const;
int32_t getLong ( const char *field, int32_t defaultLong ) const;
int64_t getLongLong ( const char *field, int64_t defaultLongLong ) const;
float getFloat ( const char *field, double defaultFloat ) const;
double getDouble ( const char *field, double defaultDouble ) const;
float getFloatFromCookie ( const char *field, float def ) const;
int32_t getLongFromCookie ( const char *field, int32_t def ) const;
int64_t getLongLongFromCookie( const char *field, int64_t def ) const;
bool getBoolFromCookie ( const char *field, bool def ) const;
const char *getStringFromCookie ( const char *field, int32_t *len = NULL,
const char *defaultString = NULL ,
int32_t *next=NULL) const;
bool hasField ( const char *field ) const;
// are we a redir? if so return non-NULL
const char *getRedir() const { return m_redir; }
int32_t getRedirLen() const { return m_redirLen; }
HttpRequest();
HttpRequest( const HttpRequest &a );
~HttpRequest();
void reset();
const char *getPath() const { return m_path; }
int32_t getPathLen() const { return m_plen; }
// . get value of cgi "field" term in the requested filename
// . you know GET /myfile.html?q=123&name=nathaniel
const char *getValue ( const char *field , int32_t *len=NULL, int32_t *next=NULL) const;
// get value of the ith field
const char *getValue ( int32_t i, int32_t *len = NULL) const;
// get the ith cgi parameter name, return NULL if none
int32_t getNumFields( ) const { return m_numFields; }
const char *getField( int32_t i ) const {
if ( i >= m_numFields ) return NULL;
return m_fields[i];
}
int32_t getFieldLen ( int32_t i ) const {
if ( i >= m_numFields ) return 0;
return m_fieldLens[i];
}
private:
// . s is a cgi string
// . either the stuff after the '?' in a url
// . or the content in a POST operation
// . returns false and sets errno on error
bool addCgi ( char *s , int32_t slen );
// . parse cgi field terms into m_fields,m_fieldLens,m_fieldValues
// . "s" should point to cgi string right after the '?' if it exists
// . s should have had all it's &'s replaced with /0's
// . slen should include the last \0
void parseFields ( char *s , int32_t slen ) ;
void parseFieldsMultipart ( char *s , int32_t slen ) ;
// 0 for GET, 1 for HEAD
char m_requestType;
// we decode the filename into this buffer (no cgi)
char m_filename[MAX_HTTP_FILENAME_LEN];
int32_t m_filenameLen; // excludes ?cgistuff
// if request is like "GET /poo?foo=bar"
// then origUrlRequest is "/poo?foo=bar"
// references into TcpSocket::m_readBuf
char *m_origUrlRequest;
int32_t m_origUrlRequestLen;
// virtual host in the Host: field of the mime
char m_host[256];
int32_t m_hostLen;
// are we coming from a local machine?
bool m_isLocal;
// . decoded cgi data stored here
// . this just points into TcpSocket::m_readBuf
// . now it points into m_reqBuf.m_buf[]
char *m_cgiBuf ;
int32_t m_cgiBufLen ;
int32_t m_cgiBufMaxLen ;
// partial GET file read info
int32_t m_fileOffset;
int32_t m_fileSize;
// . cgi field term info stored in here
// . set by parseFields()
char *m_fields [ MAX_CGI_PARMS ];
int32_t m_fieldLens [ MAX_CGI_PARMS ];
char *m_fieldValues [ MAX_CGI_PARMS ];
int32_t m_numFields;
int32_t m_userIP;
bool m_isSSL;
// . ptr to the thing we're getting in the request
// . used by PageAddUrl4.cpp
char *m_path;
int32_t m_plen;
char m_redir[128];
int32_t m_redirLen;
// referer, NULL terminated, from Referer: field in MIME
char m_ref [ 256 ];
int32_t m_refLen;
// NULL terminated User-Agent: field in MIME
char m_userAgent[128];
// this points into m_cgiBuf
char *m_ucontent;
int32_t m_ucontentLen;
char *m_cookiePtr;
int32_t m_cookieLen;
// buffer for adding extra parms
char *m_cgiBuf2;
int32_t m_cgiBuf2Size;
};
const int HTTP_REQUEST_DEFAULT_REQUEST_VERSION = 2;
int getVersionFromRequest ( HttpRequest *r );
#endif // GB_HTTPREQUEST_H