privacore-open-source-searc.../HttpMime.h

251 lines
7.3 KiB
C++

// Matt Wells, copyright Jun 2000
// . class to parse an html MIME header
#ifndef GB_HTTPMIME_H
#define GB_HTTPMIME_H
#include <cstdlib>
// convert text/html to CT_HTML for instance
// convert application/json to CT_JSON for instance
int32_t getContentTypeFromStr(const char *s, size_t slen);
const char *extensionToContentTypeStr2 ( const char *ext , int32_t elen ) ;
// the various content types
#define CT_UNKNOWN 0
#define CT_HTML 1
#define CT_TEXT 2
#define CT_XML 3
#define CT_PDF 4
#define CT_DOC 5
#define CT_XLS 6
#define CT_PPT 7
#define CT_PS 8
// images
#define CT_GIF 9
#define CT_JPG 10
#define CT_PNG 11
#define CT_TIFF 12
#define CT_BMP 13
#define CT_JS 14
#define CT_CSS 15
#define CT_JSON 16
#define CT_IMAGE 17
//#define CT_STATUS 18 // an internal type indicating spider reply
#define CT_GZ 19
#define CT_ARC 20
#define CT_WARC 21
#define ET_IDENTITY 0
#define ET_GZIP 1
#define ET_COMPRESS 2
#define ET_DEFLATE 3
extern const char * const g_contentTypeStrings[];
#include <time.h> // time_t mktime()
#include <map>
#include <string>
#include "Url.h"
class SafeBuf;
class HttpMime {
public:
HttpMime();
bool init();
void reset();
// . returns false and sets errno if could not get a valid mime
// . just copies bits and pieces so you can free "mime" whenever
// . we need "url" to set m_locUrl if it's a relative redirect
bool set(const char *httpReply, int32_t replyLen, Url *url);
void setContentType(int32_t t) { m_contentType = t; }
void setHttpStatus(int32_t status) { m_status = status; }
void setBufLen(int32_t bufLen) { m_mimeLen = bufLen; }
// http status: 404, 200, etc.
int32_t getHttpStatus() const { return m_status; }
const char *getContent() const { return m_content; }
int32_t getContentLen() const { return m_contentLen; }
int32_t getContentType() { return m_contentType; }
const Url *getLocationUrl() const { return &m_locUrl; }
// new stuff for Msg13.cpp to use
const char *getLocationField() { return m_locationField; }
int32_t getLocationFieldLen() const { return m_locationFieldLen; }
// . used to create a mime
// . if bytesToSend is < 0 that means send totalContentLen (all doc)
// . if lastModified is 0 we take the current time and use that
// . cacheTime is how long for browser to cache this page in seconds
// . a cacheTime of -2 means do not cache at all
// . a cacheTime of -1 means do not cache when moving forward,
// but when hitting the back button, serve cached results
// . a cache time of 0 means use local caching rules
// . any other cacheTime is an explicit time to cache the page for
// . httpStatus of -1 means to auto determine
void makeMime(int32_t totalContentLen,
int32_t cacheTime,
time_t lastModified,
int32_t offset,
int32_t bytesToSend,
const char *ext,
bool POSTReply,
const char *contentType,
const char *charset,
int32_t httpStatus,
const char *cookie);
// make a redirect mime
void makeRedirMime ( const char *redirUrl , int32_t redirUrlLen );
bool addToCookieJar(const Url *currentUrl, SafeBuf *sb);
static bool addCookieHeader(const char *cookieJar, const char *url, SafeBuf *sb);
const char *getMime() const { return m_buf; }
// does this include the last \r\n\r\n? yes!
int32_t getMimeLen() const { return m_mimeLen; }
const char *getCharset() { return m_charset; }
int32_t getCharsetLen() const { return m_charsetLen; }
const char *getContentLanguage() { return m_contentLanguage; }
int32_t getContentLanguageLen() const { return m_contentLanguageLen; }
const char *getServer() { return m_server; }
int32_t getServerLen() const { return m_serverLen; }
int32_t getContentEncoding() const { return m_contentEncoding; }
const char *getContentEncodingPos() { return m_contentEncodingPos; }
const char *getContentLengthPos() { return m_contentLengthPos; }
const char *getContentTypePos() { return m_contentTypePos; }
int32_t getContentTypeLen() const { return m_contentTypeLen; }
// convert a file extension like "gif" to "images/gif"
static const char *getContentTypeFromExtension ( const char *ext ) ;
static const char *getContentTypeFromExtension ( const char *ext , int32_t elen ) ;
void print() const;
protected:
struct httpcookie_t {
const char *m_cookie;
size_t m_cookieLen;
size_t m_nameLen;
bool m_defaultDomain;
const char *m_domain;
size_t m_domainLen;
const char *m_path;
size_t m_pathLen;
bool m_secure;
bool m_httpOnly;
bool m_expired;
};
bool getNextLine();
bool getField(const char **field, size_t *fieldLen);
bool getValue(const char **value, size_t *valueLen);
bool getAttribute(const char **attribute, size_t *attributeLen, const char **attributeValue, size_t *attributeValueLen);
const char* getCurrentLine() const { return m_currentLine; }
int32_t getCurrentLineLen() const { return m_currentLineLen; }
// compute length of a possible mime starting at "buf"
size_t getMimeLen(const char *buf, size_t bufLen);
void setMime(const char *mime) { m_mime = mime; }
void setMimeLen(int32_t mimeLen) { m_mimeLen = mimeLen; }
void setContent(const char *content) { m_content = content; }
void setCurrentTime(time_t currentTime) {
m_fakeCurrentTime = true;
m_currentTime = currentTime;
}
const std::map<std::string, httpcookie_t>& getCookies() { return m_cookies; }
static bool parseCookieDate(const char *value, size_t valueLen, time_t *time);
private:
// . sets m_status, m_contentLen , ...
// . we need "url" to set m_locUrl if it's a relative redirect
bool parse(const char *mime, int32_t mimeLen, Url *url);
bool parseLocation(const char *field, size_t fieldLen, Url *baseUrl);
bool parseSetCookie(const char *field, size_t fieldLen);
bool parseContentType(const char *field, size_t fieldLen);
bool parseContentLength(const char *field, size_t fieldLen);
bool parseContentEncoding(const char *field, size_t fieldLen);
bool parseContentLanguage(const char *field, size_t fieldLen);
bool parseServer(const char *field, size_t fieldLen);
// used for bz2, gz files
const char *getContentEncodingFromExtension ( const char *ext ) ;
static void addCookie(const httpcookie_t &cookie, const Url &currentUrl, SafeBuf *cookieJar);
static void print(const httpcookie_t &cookie, int count = 0);
const char *m_currentLine;
size_t m_currentLineLen;
size_t m_nextLineStartPos;
size_t m_valueStartPos;
size_t m_attributeStartPos;
time_t m_currentTime;
bool m_fakeCurrentTime;
// these are set by calling set() above
int32_t m_status;
const char *m_content;
int32_t m_contentLen;
int32_t m_contentType;
Url m_locUrl;
const char *m_contentLanguage;
int32_t m_contentLanguageLen;
const char *m_server;
int32_t m_serverLen;
const char *m_locationField;
int32_t m_locationFieldLen;
const char *m_mime;
// buf used to hold a mime we create
char m_buf[1024];
size_t m_mimeLen;
int32_t m_contentEncoding;
const char *m_contentEncodingPos;
const char *m_contentLengthPos;
const char *m_contentTypePos;
size_t m_contentTypeLen;
// Content-Type: text/html;charset=euc-jp // japanese (euc-jp)
// Content-Type: text/html;charset=gb2312 // chinese (gb2312)
const char *m_charset;
int32_t m_charsetLen;
std::map<std::string, httpcookie_t> m_cookies;
};
#endif // GB_HTTPMIME_H