privacore-open-source-searc…/HttpMime.cpp

#include "HttpMime.h"
#include "HashTable.h"
#include "HashTableX.h"
#include "Process.h"
#include "Conf.h"
#include "gbmemcpy.h"
#include "Errno.h"

#ifdef _VALGRIND_
#include <valgrind/memcheck.h>
#endif

// . convert these values to strings
// . these must be 1-1 with the #define's in HttpMime.h
const char * const g_contentTypeStrings [] = {
	""     ,
	"html" ,
	"text" ,
	"xml"  ,
	"pdf"  ,
	"doc"  ,
	"xls"  ,
	"ppt"  ,
	"ps"   , // 8
	"gif"  , // 9
	"jpg"  , // 10
	"png"  , // 11
	"tiff" , // 12
	"bmp"  , // 13
	"javascript" , // 14
	"css"  , // 15
	"json" ,  // 16
	"image", // 17
	"spiderstatus", // 18
	"gzip",	// 19
	"arc",	// 20
	"warc"	// 21
};

HttpMime::HttpMime () {
	// Coverity
	m_content = NULL;
	memset(m_buf, 0, sizeof(m_buf));
	m_mimeLen = 0;
	m_contentEncoding = 0;
	m_fakeCurrentTime = false;

	reset();
}

void HttpMime::reset ( ) {
	m_mime = NULL;

	// clear values
	m_currentLine = NULL;
	m_currentLineLen = 0;
	m_valueStartPos = 0;
	m_nextLineStartPos = 0;
	m_attributeStartPos = 0;

	if (!m_fakeCurrentTime) {
		m_currentTime = time(NULL);
	}

	m_status = -1;
	m_contentLen = -1;
	m_contentType = CT_HTML;
	m_charset = NULL;
	m_charsetLen = 0;
	m_locationField = NULL;
	m_locationFieldLen = 0;
	m_contentLanguage = NULL;
	m_contentLanguageLen = 0;
	m_server = NULL;
	m_serverLen = 0;
	m_contentEncodingPos = NULL;
	m_contentLengthPos = NULL;
	m_contentTypePos = NULL;
	m_contentTypeLen = 0;

	m_cookies.clear();
}

// . returns false if could not get a valid mime
// . we need the url in case there's a Location: mime that's base-relative
bool HttpMime::set ( const char *buf , int32_t bufLen , Url *url ) {
#ifdef _VALGRIND_
	VALGRIND_CHECK_MEM_IS_DEFINED(buf,bufLen);
#endif
	reset();

	// at the very least we should have a "HTTP/x.x 404\[nc]"
	if ( bufLen < 13 ) {
		return false;
	}

	// . get the length of the Mime, must end in \r\n\r\n , ...
	// . m_bufLen is used as the mime length
	m_mime = buf;
	m_mimeLen = getMimeLen(buf, bufLen);

	// . return false if we had no mime boundary
	// . but set m_bufLen to 0 so getMimeLen() will return 0 instead of -1
	//   thus avoiding a potential buffer overflow
	if (m_mimeLen == 0) {
		log(LOG_WARN, "mime: no rnrn boundary detected");
		return false;
	}

	// set this
	m_content = buf + m_mimeLen;

	// . parse out m_status, m_contentLen, m_lastModifiedData, contentType
	// . returns false on bad mime
	return parse ( buf , m_mimeLen , url );
}

// https://tools.ietf.org/html/rfc2616#section-19.3
// The line terminator for message-header fields is the sequence CRLF. However, we recommend that applications,
// when parsing such headers, recognize a single LF as a line terminator and ignore the leading CR.

// . returns 0 if no boundary found
size_t HttpMime::getMimeLen(const char *buf, size_t bufLen) {
#ifdef _VALGRIND_
	VALGRIND_CHECK_MEM_IS_DEFINED(buf,bufLen);
#endif
	// the size of the terminating boundary, either 1 or 2 bytes.
	// just the last \n in the case of a \n\n or \r in the case
	// of a \r\r, but it is the full \r\n in the case of a last \r\n\r\n
	size_t bsize = 0;

	// find the boundary
	size_t i;
	for ( i = 0 ; i < bufLen ; i++ ) {
		// continue until we hit a \r or \n
		if ( buf[i] != '\r' && buf[i] != '\n' ) continue;
		// boundary check
		if ( i + 1 >= bufLen ) continue;
		// prepare for a smaller mime size
		bsize = 1;
		// \r\r
		if ( buf[i  ] == '\r' && buf[i+1] == '\r' ) break;
		// \n\n
		if ( buf[i  ] == '\n' && buf[i+1] == '\n' ) break;
		// boundary check
		if ( i + 3 >= bufLen ) continue;
		// prepare for a larger mime size
		bsize = 2;
		// \r\n\r\n
		if ( buf[i  ] == '\r' && buf[i+1] == '\n' &&
		     buf[i+2] == '\r' && buf[i+3] == '\n'  ) break;
		// \n\r\n\r
		if ( buf[i  ] == '\n' && buf[i+1] == '\r' &&
		     buf[i+2] == '\n' && buf[i+3] == '\r'  ) break;
	}

	// return false if could not find the end of the MIME
	if ( i == bufLen ) {
		return 0;
	}

	return i + bsize * 2;
}

bool HttpMime::getNextLine() {
	// clear values
	m_currentLine = NULL;
	m_currentLineLen = 0;
	m_valueStartPos = 0;
	m_attributeStartPos = 0;

	size_t currentPos = m_nextLineStartPos;

	// don't cross limit
	if (currentPos == m_mimeLen) {
		return false;
	}

	m_currentLine = m_mime + currentPos;

	// cater for multiline header
	size_t linePos = currentPos;
	do {
		bool foundLineEnding = false;

		char currentChar = m_mime[currentPos];
		while (currentPos < m_mimeLen) {
			if (!foundLineEnding) {
				if (currentChar == '\r' || currentChar == '\n') {
					foundLineEnding = true;
					m_currentLineLen = (currentPos - linePos);
				}
			} else {
				if (currentChar != '\r' && currentChar != '\n') {
					break;
				}
			}

			currentChar = m_mime[++currentPos];
		}
	} while (m_currentLineLen && currentPos < m_mimeLen && (m_mime[currentPos] == ' ' || m_mime[currentPos] == '\t'));

	if (m_currentLineLen == 0) {
		// set to end of mime
		m_currentLineLen = (m_mime + m_mimeLen) - m_currentLine;
	}

	// store next lineStartPos
	m_nextLineStartPos = currentPos;

	logTrace(g_conf.m_logTraceHttpMime, "line='%.*s'", static_cast<int>(m_currentLineLen), m_currentLine);

	return true;
}

bool HttpMime::getField(const char **field, size_t *fieldLen) {
	size_t currentLinePos = m_valueStartPos;

	const char *colonPos = (const char *)memchr(m_currentLine + currentLinePos, ':', m_currentLineLen);

	// no colon
	if (colonPos == NULL) {
		return false;
	}

	currentLinePos = colonPos - m_currentLine;
	m_valueStartPos = currentLinePos + 1;

	*field = m_currentLine;
	*fieldLen = currentLinePos;

	// strip ending whitespaces
	while (*fieldLen > 0 && is_wspace_a(m_currentLine[*fieldLen - 1])) {
		--(*fieldLen);
	}

	logTrace(g_conf.m_logTraceHttpMime, "field='%.*s'", static_cast<int>(*fieldLen), *field);

	return (*fieldLen > 0);
}

bool HttpMime::getValue(const char **value, size_t *valueLen) {
	// strip starting whitespaces
	while (is_wspace_a(m_currentLine[m_valueStartPos]) && (m_valueStartPos < m_currentLineLen)) {
		++m_valueStartPos;
	}

	*value = m_currentLine + m_valueStartPos;
	*valueLen = m_currentLineLen - m_valueStartPos;

	const char *semicolonPos = (const char *)memchr(*value, ';', *valueLen);
	if (semicolonPos) {
		// value should end at semicolon if present
		*valueLen = semicolonPos - *value;
		m_attributeStartPos = semicolonPos - m_currentLine + 1;
	}

	// strip ending whitespace
	while (*valueLen > 0 && (is_wspace_a((*value)[*valueLen - 1]))) {
		--(*valueLen);
	}

	logTrace(g_conf.m_logTraceHttpMime, "value='%.*s'", static_cast<int>(*valueLen), *value);

	return (*valueLen > 0);
}

bool HttpMime::getAttribute(const char **attribute, size_t *attributeLen, const char **attributeValue, size_t *attributeValueLen) {
	// initialize value
	*attribute = NULL;
	*attributeLen = 0;
	*attributeValue = NULL;
	*attributeValueLen = 0;

	// no attribute
	if (m_attributeStartPos == 0) {
		return false;
	}

	// strip starting whitespaces
	while (is_wspace_a(m_currentLine[m_attributeStartPos]) && (m_attributeStartPos < m_currentLineLen)) {
		++m_attributeStartPos;
	}

	*attribute = m_currentLine + m_attributeStartPos;
	*attributeLen = m_currentLineLen - m_attributeStartPos;

	// next attribute
	const char *semicolonPos = (const char *)memchr(*attribute, ';', *attributeLen);
	if (semicolonPos) {
		*attributeLen = semicolonPos - *attribute;
		m_attributeStartPos = semicolonPos - m_currentLine + 1;
	} else {
		m_attributeStartPos = 0;
	}

	// attribute value
	const char *equalPos = (const char *)memchr(*attribute, '=', *attributeLen);
	if (equalPos) {
		*attributeValueLen = *attributeLen;
		*attributeLen = equalPos - *attribute;
		*attributeValueLen -= *attributeLen + 1;
		*attributeValue = equalPos + 1;

		logTrace(g_conf.m_logTraceHttpMime, "attributeLen=%d attributeValueLen=%d", static_cast<int>(*attributeLen), static_cast<int>(*attributeValueLen));

		// strip ending attribute whitespace
		while (*attributeLen && is_wspace_a((*attribute)[*attributeLen - 1])) {
			--(*attributeLen);
		}

		// strip starting attribute value whitespace/quote
		while (*attributeValueLen && (is_wspace_a((*attributeValue)[0]) || (*attributeValue)[0] == '"' || (*attributeValue)[0] == '\'')) {
			++(*attributeValue);
			--(*attributeValueLen);
		}

		// strip ending attribute value whitespace/quote
		while (*attributeValueLen && (is_wspace_a((*attributeValue)[*attributeValueLen - 1]) || (*attributeValue)[*attributeValueLen - 1] == '"' || (*attributeValue)[*attributeValueLen - 1] == '\'')) {
			--(*attributeValueLen);
		}
	}

	logTrace(g_conf.m_logTraceHttpMime, "attributeLen=%d attributeValueLen=%d", static_cast<int>(*attributeLen), static_cast<int>(*attributeValueLen));


	// cater for empty values between semicolon
	// eg: Set-Cookie: name=value; Path=/; ;SECURE; HttpOnly;
	if (*attributeLen == 0 && m_attributeStartPos) {
		return getAttribute(attribute, attributeLen, attributeValue, attributeValueLen);
	}

	logTrace(g_conf.m_logTraceHttpMime, "attribute='%.*s' value='%.*s'", static_cast<int>(*attributeLen), *attribute, static_cast<int>(*attributeValueLen), *attributeValue);

	return (*attributeLen > 0);
}

// Location
bool HttpMime::parseLocation(const char *field, size_t fieldLen, Url *baseUrl) {
	static const char s_location[] = "location";
	static const size_t s_locationLen = strlen(s_location);

	if (fieldLen == s_locationLen && strncasecmp(field, s_location, fieldLen) == 0) {
		const char *value = NULL;
		size_t valueLen = 0;

		if (getValue(&value, &valueLen)) {
			m_locationField = value;
			m_locationFieldLen = valueLen;

			if (baseUrl) {
				m_locUrl.set(baseUrl, m_locationField, m_locationFieldLen);
			}
		}

		return true;
	}

	return false;
}

// https://tools.ietf.org/html/rfc2616#section-3.3.1
// Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
// Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
// Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
bool HttpMime::parseCookieDate(const char *value, size_t valueLen, time_t *time) {
	std::string dateStr(value, valueLen);

	struct tm tm = {};
	// not set by strptime()
	tm.tm_isdst = -1;

	const char *dashPos = (const char*)memchr(value, '-', valueLen);
	const char *commaPos = (const char*)memchr(value, ',', valueLen);

	// Fri, 02 Dec 2016 17:29:41 -0000
	if (dashPos && dashPos + 4 < value + valueLen) {
		if (memcmp(dashPos, "-0000", 5) == 0) {
			dashPos = NULL;
		}
	}

	if (dashPos) {
		if (commaPos) {
			// Sunday, 06-Nov-94 08:49:37 GMT (RFC 850)
			if (strptime(dateStr.c_str(), "%a, %d-%b-%y %T", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}

			// Sun, 27-Nov-2016 22:15:17 GMT
			if (strptime(dateStr.c_str(), "%a, %d-%b-%Y %T", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}

			// Sat, 26-11-2026 16:41:30 GMT
			if (strptime(dateStr.c_str(), "%a, %d-%m-%Y %T", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}
		} else {
			// Thu 31-Dec-2020 00:00:00 GMT
			if (strptime(dateStr.c_str(), "%a %d-%b-%Y %T", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}

			// 2018-03-21 18:43:07
			if (strptime(dateStr.c_str(), "%Y-%m-%d %T", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}
		}
	} else {
		if (commaPos) {
			// Sun, 06 Nov 1994 08:49:37 GMT (RFC 1123)
			if (strptime(dateStr.c_str(), "%a, %d %b %Y %T", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}
		} else {
			// Sun Nov  6 08:49:37 1994 (asctime)
			if (strptime(dateStr.c_str(), "%a %b %d %T %Y", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}

			// Sat 26 Nov 2016 13:38:06 GMT
			if (strptime(dateStr.c_str(), "%a %d %b %Y %T", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}

			// 23 Nov 2026 23:34:25 GMT
			if (strptime(dateStr.c_str(), "%d %b %Y %T", &tm) != NULL) {
				*time = timegm(&tm);
				return true;
			}
		}
	}

	logTrace(g_conf.m_logTraceHttpMime, "invalid date format='%.*s'", static_cast<int>(valueLen), value);
	return false;
}

// Set-Cookie
bool HttpMime::parseSetCookie(const char *field, size_t fieldLen) {
	static const char s_setCookie[] = "set-cookie";
	static const size_t s_setCookieLen = strlen(s_setCookie);

	static const char s_expires[] = "expires";
	static const size_t s_expiresLen = strlen(s_expires);

	static const char s_maxAge[] = "max-age";
	static const size_t s_maxAgeLen = strlen(s_maxAge);

	static const char s_domain[] = "domain";
	static const size_t s_domainLen = strlen(s_domain);

	static const char s_path[] = "path";
	static const size_t s_pathLen = strlen(s_path);

	static const char s_secure[] = "secure";
	static const size_t s_secureLen = strlen(s_secure);

	static const char s_httpOnly[] = "httponly";
	static const size_t s_httpOnlyLen = strlen(s_httpOnly);

	if (fieldLen == s_setCookieLen && strncasecmp(field, s_setCookie, fieldLen) == 0) {
		httpcookie_t cookie = {};

		const char *value = NULL;
		size_t valueLen = 0;

		if (getValue(&value, &valueLen)) {
			cookie.m_cookie = value;
			cookie.m_cookieLen = valueLen;

			const char *equalPos = (const char *)memchr(value, '=', valueLen);
			if (!equalPos) {
				// missing '=' character. ignore cookie
				return true;
			}

			cookie.m_nameLen = equalPos - value;

			logTrace(g_conf.m_logTraceHttpMime, "name=%.*s (len=%d)", static_cast<int>(cookie.m_nameLen), cookie.m_cookie, static_cast<int>(cookie.m_nameLen));

			// attribute
			// https://tools.ietf.org/html/rfc6265#section-5.2
			const char *attribute = NULL;
			size_t attributeLen = 0;
			const char *attributeValue = NULL;
			size_t attributeValueLen = 0;

			bool foundMaxAge = false;
			while (getAttribute(&attribute, &attributeLen, &attributeValue, &attributeValueLen)) {
				logTrace(g_conf.m_logTraceHttpMime, "attribute=%.*s (len=%d)", static_cast<int>(attributeLen), attribute, static_cast<int>(attributeLen));
				logTrace(g_conf.m_logTraceHttpMime, "attributeValueLen=%d", static_cast<int>(attributeValueLen) );

				if (attributeValueLen > 0) {
					// everything inside here needs an attribute value

					// expires
					if (attributeLen == s_expiresLen && strncasecmp(attribute, s_expires, attributeLen) == 0) {
						// max-age overrides expires
						if (foundMaxAge) {
							continue;
						}

						time_t expiry = 0;
						if (parseCookieDate(attributeValue, attributeValueLen, &expiry) && expiry < m_currentTime) {
							// expired
							logTrace(g_conf.m_logTraceHttpMime,
							         "expires='%.*s'. expiry=%ld currentTime=%ld expired cookie. ignoring",
							         static_cast<int>(attributeValueLen), attributeValue, expiry, m_currentTime);
							cookie.m_expired = true;
						}

						continue;
					}

					// max-age
					// https://tools.ietf.org/html/rfc6265#section-5.2.2
					// If the first character of the attribute-value is not a DIGIT or a "-"
					// character, ignore the cookie-av.
					// If the remainder of attribute-value contains a non-DIGIT character, ignore the cookie-av.
					// Let delta-seconds be the attribute-value converted to an integer.
					// If delta-seconds is less than or equal to zero (0), let expiry-time be the earliest representable date and time.
					// Otherwise, let the expiry-time be the current date and time plus delta-seconds seconds.
					if (attributeLen == s_maxAgeLen && strncasecmp(attribute, s_maxAge, attributeLen) == 0) {
						foundMaxAge = true;
						int32_t maxAge = strtol(attributeValue, NULL, 10);
						if (maxAge == 0) {
							// expired
							logTrace(g_conf.m_logTraceHttpMime, "max-age=%.*s. expired cookie. ignoring",
							         static_cast<int>(attributeValueLen), attributeValue);
							cookie.m_expired = true;
						}
						continue;
					}

					// domain
					// https://tools.ietf.org/html/rfc6265#section-5.2.3
					// If the attribute-value is empty, the behavior is undefined.
					// However, the user agent SHOULD ignore the cookie-av entirely.
					// If the first character of the attribute-value string is %x2E ("."):
					//   Let cookie-domain be the attribute-value without the leading %x2E (".") character.
					// Otherwise:
					//   Let cookie-domain be the entire attribute-value.
					// Convert the cookie-domain to lower case.
					if (attributeLen == s_domainLen && strncasecmp(attribute, s_domain, attributeLen) == 0) {
						// ignore first '.'
						if (attributeValue[0] == '.') {
							++attributeValue;
							--attributeValueLen;
						}

						cookie.m_domain = attributeValue;
						cookie.m_domainLen = attributeValueLen;
						continue;
					}

					// path
					// https://tools.ietf.org/html/rfc6265#section-5.2.4
					// If the attribute-value is empty or if the first character of the attribute-value is not %x2F ("/"):
					//   Let cookie-path be the default-path.
					// Otherwise:
					//   Let cookie-path be the attribute-value.
					if (attributeLen == s_pathLen && strncasecmp(attribute, s_path, attributeLen) == 0) {
						if (attributeValue[0] == '/') {
							cookie.m_path = attributeValue;
							cookie.m_pathLen = attributeValueLen;
						}
						continue;
					}
				}

				// secure
				if (attributeLen == s_secureLen && strncasecmp(attribute, s_secure, attributeLen) == 0) {
					cookie.m_secure = true;
					continue;
				}

				// httpOnly
				if (attributeLen == s_httpOnlyLen && strncasecmp(attribute, s_httpOnly, attributeLen) == 0) {
					cookie.m_httpOnly = true;
					continue;
				}

				// add parsing of other attributes here
			}

			// if we reach here means cookie should be stored
			m_cookies[std::string(cookie.m_cookie, cookie.m_nameLen)] = cookie;
		}

		return true;
	}

	return false;
}

// Content-Type
bool HttpMime::parseContentType(const char *field, size_t fieldLen) {
	static const char s_contentType[] = "content-type";
	static const size_t s_contentTypeLen = strlen(s_contentType);

	if (fieldLen == s_contentTypeLen && strncasecmp(field, s_contentType, fieldLen) == 0) {
		const char *value = NULL;
		size_t valueLen = 0;

		if (getValue(&value, &valueLen)) {
			m_contentTypePos = value;
			m_contentTypeLen = valueLen;

			static const char s_charset[] = "charset";
			static const size_t s_charsetLen = strlen(s_charset);

			const char *attribute = NULL;
			size_t attributeLen = 0;
			const char *attributeValue = NULL;
			size_t attributeValueLen = 0;
			while (getAttribute(&attribute, &attributeLen, &attributeValue, &attributeValueLen)) {
				logTrace(g_conf.m_logTraceHttpMime, "attribute=%.*s (len=%d)", static_cast<int>(attributeLen), attribute, static_cast<int>(attributeLen));
				logTrace(g_conf.m_logTraceHttpMime, "attributeValueLen=%d", static_cast<int>(attributeValueLen));

				if (attributeValueLen > 0) {
					// charset
					if (attributeLen == s_charsetLen && strncasecmp(attribute, s_charset, attributeLen) == 0) {
						m_charset = attributeValue;
						m_charsetLen = attributeValueLen;
						continue;
					}
				}
			}

			// returns CT_UNKNOWN if unknown
			m_contentType = getContentTypeFromStr(m_contentTypePos, m_contentTypeLen);
			if (m_contentType == CT_UNKNOWN) {
				log(LOG_WARN, "http: unrecognized content type '%.*s'", (int)m_contentTypeLen, m_contentTypePos);
			}
		}

		return true;
	}

	return false;
}

// Content-Length
bool HttpMime::parseContentLength(const char *field, size_t fieldLen) {
	static const char s_contentLength[] = "content-length";
	static const size_t s_contentLengthLen = strlen(s_contentLength);

	if (fieldLen == s_contentLengthLen && strncasecmp(field, s_contentLength, fieldLen) == 0) {
		const char *value = NULL;
		size_t valueLen = 0;

		if (getValue(&value, &valueLen)) {
			m_contentLengthPos = value;
			m_contentLen = strtol(m_contentLengthPos, NULL, 10);
		}

		return true;
	}

	return false;
}

// Content-Encoding
bool HttpMime::parseContentEncoding(const char *field, size_t fieldLen) {
	static const char s_contentEncoding[] = "content-encoding";
	static const size_t s_contentEncodingLen = strlen(s_contentEncoding);

	if (fieldLen == s_contentEncodingLen && strncasecmp(field, s_contentEncoding, fieldLen) == 0) {
		const char *value = NULL;
		size_t valueLen = 0;

		if (getValue(&value, &valueLen)) {
			m_contentEncodingPos = value;

			static const char s_gzip[] = "gzip";
			static const size_t s_gzipLen = strlen(s_gzip);

			static const char s_deflate[] = "deflate";
			static const size_t s_deflateLen = strlen(s_deflate);

			if (valueLen == s_gzipLen && strnstr(value, s_gzip, valueLen)) {
				m_contentEncoding = ET_GZIP;
			} else if (valueLen == s_deflateLen && strnstr(value, s_deflate, valueLen)) {
				m_contentEncoding = ET_DEFLATE;
			}
		}

		return true;
	}

	return false;
}

// Content-Language
// https://tools.ietf.org/html/rfc2616#section-14.12
bool HttpMime::parseContentLanguage(const char *field, size_t fieldLen) {
	static const char s_contentLanguage[] = "content-language";
	static const size_t s_contentLanguageLen = strlen(s_contentLanguage);

	if (fieldLen == s_contentLanguageLen && strncasecmp(field, s_contentLanguage, fieldLen) == 0) {
		const char *value = NULL;
		size_t valueLen = 0;

		if (getValue(&value, &valueLen)) {
			m_contentLanguage = value;
			m_contentLanguageLen = valueLen;
		}

		return true;
	}

	return false;
}


// Server
bool HttpMime::parseServer(const char *field, size_t fieldLen) {
	static const char s_server[] = "server";
	static const size_t s_serverLen = strlen(s_server);

	if (fieldLen == s_serverLen && strncasecmp(field, s_server, fieldLen) == 0) {
		const char *value = NULL;
		size_t valueLen = 0;

		if (getValue(&value, &valueLen)) {
			m_server = value;
			m_serverLen = valueLen;
		}

		return true;
	}

	return false;
}

// https://tools.ietf.org/html/rfc2616#section-2.2
// HTTP/1.1 header field values can be folded onto multiple lines if the continuation line begins with a space or
// horizontal tab. All linear white space, including folding, has the same semantics as SP.
// A recipient MAY replace any linear white space with a single SP before interpreting the field value or
// forwarding the message downstream.
//
// LWS            = [CRLF] 1*( SP | HT )

// https://tools.ietf.org/html/rfc2616#section-4.2
// Each header field consists of a name followed by a colon (":") and the field value. Field names are case-insensitive.
// The field value MAY be preceded by any amount of LWS, though a single SP is preferred.
// Header fields can be extended over multiple lines by preceding each extra line with at least one SP or HT.
//
// message-header = field-name ":" [ field-value ]
// field-name     = token
// field-value    = *( field-content | LWS )
// field-content  = <the OCTETs making up the field-value
//                  and consisting of either *TEXT or combinations
//                  of token, separators, and quoted-string>

// https://tools.ietf.org/html/rfc2616#section-19.3
// Clients SHOULD be tolerant in parsing the Status-Line and servers tolerant when parsing the Request-Line.
// In particular, they SHOULD accept any amount of SP or HT characters between fields, even though only a single SP is required.

// https://tools.ietf.org/html/rfc7230#section-3.2.4
// Historically, HTTP header field values could be extended over multiple lines by preceding each extra line with at least
// one space or horizontal tab (obs-fold).  This specification deprecates such line folding except within the message/http media type
//
// A user agent that receives an obs-fold in a response message that is not within a message/http container MUST replace
// each received obs-fold with one or more SP octets prior to interpreting the field value.

/// @todo ALC we currently don't cater for multiple cookie with the same name but different domain (we take the last entry)
/// eg: Set-Cookie: CFID=77593661; Expires=session; domain=tennisexpress.com; Path=/
///     Set-Cookie: CFID=77593661; Expires=session; domain=.tennisexpress.com; Path=/
///     Set-Cookie: CFID=77593661; Expires=session; domain=www.tennisexpress.com; Path=/

// returns false on bad mime
bool HttpMime::parse(const char *mime, int32_t mimeLen, Url *url) {
#ifdef _VALGRIND_
	VALGRIND_CHECK_MEM_IS_DEFINED(mime,mimeLen);
#endif
	// reset locUrl to 0
	m_locUrl.reset();

	// return if we have no valid complete mime
	if (mimeLen == 0) {
		return false;
	}

	// status is on first line
	m_status = -1;

	// skip HTTP/x.x till we hit a space
	const char *p = mime;
	const char *pend = mime + mimeLen;
	while (p < pend && !is_wspace_a(*p)) p++;
	// then skip over spaces
	while (p < pend && is_wspace_a(*p)) p++;
	// return false on a problem
	if (p == pend) return false;
	// then read in the http status
	m_status = atol2(p, pend - p);
	// if no Content-Type: mime field was provided, assume html
	m_contentType = CT_HTML;
	// assume default charset
	m_charset = NULL;
	m_charsetLen = 0;

	// skip over first line
	getNextLine();

	while (getNextLine()) {
		const char *field = NULL;
		size_t fieldLen = 0;

		if (getField(&field, &fieldLen)) {
			if (parseContentEncoding(field, fieldLen)) {
				continue;
			}

			if (parseContentLanguage(field, fieldLen)) {
				continue;
			}

			if (parseContentLength(field, fieldLen)) {
				continue;
			}

			if (parseContentType(field, fieldLen)) {
				continue;
			}

			if (parseLocation(field, fieldLen, url)) {
				continue;
			}

			if (parseSetCookie(field, fieldLen)) {
				continue;
			}

			if (parseServer(field, fieldLen)) {
				continue;
			}

			// add parsing of other header here
		}
	}

	return true;
}


int32_t getContentTypeFromStr(const char *s, size_t slen) {
	int32_t ct = CT_UNKNOWN;
	if (strncasecmp(s, "text/", 5) == 0) {
		if (strncasecmp(s, "text/html", slen) == 0) {
			ct = CT_HTML;
		} else if (strncasecmp(s, "text/plain", slen) == 0) {
			ct = CT_TEXT;
		} else if (strncasecmp(s, "text/xml", slen) == 0) {
			ct = CT_XML;
		} else if (strncasecmp(s, "text/txt", slen) == 0) {
			ct = CT_TEXT;
		} else if (strncasecmp(s, "text/javascript", slen) == 0) {
			ct = CT_JS;
		} else if (strncasecmp(s, "text/x-js", slen) == 0) {
			ct = CT_JS;
		} else if (strncasecmp(s, "text/js", slen) == 0) {
			ct = CT_JS;
		} else if (strncasecmp(s, "text/css", slen) == 0) {
			ct = CT_CSS;
		} else {
			ct = CT_TEXT;
		}
	} else if (strncasecmp(s, "text", slen) == 0) {
		ct = CT_TEXT;
	} else if (strncasecmp(s, "txt", slen) == 0) {
		ct = CT_TEXT;
	} else if (strncasecmp(s, "application/xml", slen) == 0) {
		ct = CT_XML;
	} else if (strncasecmp(s, "application/xhtml+xml", slen) == 0) {
		// we were not able to spider links on an xhtml doc because
		// this was set to CT_XML, so try CT_HTML
		ct = CT_HTML;
	} else if (strncasecmp(s, "application/rss+xml", slen) == 0) {
		ct = CT_XML;
	} else if (strncasecmp(s, "rss", slen) == 0) {
		ct = CT_XML;
	} else if (strncasecmp(s, "application/rdf+xml", slen) == 0) {
		ct = CT_XML;
	} else if (strncasecmp(s, "application/atom+xml", slen) == 0) {
		ct = CT_XML;
	} else if (strncasecmp(s, "atom+xml", slen) == 0) {
		ct = CT_XML;
	} else if (strncasecmp(s, "application/pdf", slen) == 0) {
		ct = CT_PDF;
	} else if (strncasecmp(s, "application/msword", slen) == 0) {
		ct = CT_DOC;
	} else if (strncasecmp(s, "application/vnd.ms-excel", slen) == 0) {
		ct = CT_XLS;
	} else if (strncasecmp(s, "application/vnd.ms-powerpoint", slen) == 0) {
		ct = CT_PPT;
	} else if (strncasecmp(s, "application/mspowerpoint", slen) == 0) {
		ct = CT_PPT;
	} else if (strncasecmp(s, "application/postscript", slen) == 0) {
		ct = CT_PS;
	} else if (strncasecmp(s, "application/warc", slen) == 0) {
		ct = CT_WARC;
	} else if (strncasecmp(s, "application/arc", slen) == 0) {
		ct = CT_ARC;
	} else if (strncasecmp(s, "image/gif", slen) == 0) {
		ct = CT_GIF;
	} else if (strncasecmp(s, "image/jpeg", slen) == 0) {
		ct = CT_JPG;
	} else if (strncasecmp(s, "image/png", slen) == 0) {
		ct = CT_PNG;
	} else if (strncasecmp(s, "image/tiff", slen) == 0) {
		ct = CT_TIFF;
	} else if (strncasecmp(s, "image/", 6) == 0) {
		ct = CT_IMAGE;
	} else if (strncasecmp(s, "application/javascript", slen) == 0) {
		ct = CT_JS;
	} else if (strncasecmp(s, "application/x-javascript", slen) == 0) {
		ct = CT_JS;
	} else if (strncasecmp(s, "application/x-gzip", slen) == 0) {
		ct = CT_GZ;
	} else if (strncasecmp(s, "application/json", slen) == 0) {
		ct = CT_JSON;
	} else if (strncasecmp(s, "application/vnd.wap.xhtml+xml", slen) == 0) {
		ct = CT_HTML;
	} else if (strncasecmp(s, "binary/octet-stream", slen) == 0) {
		ct = CT_UNKNOWN;
	} else if (strncasecmp(s, "application/octet-stream", slen) == 0) {
		ct = CT_UNKNOWN;
	} else if (strncasecmp(s, "application/binary", slen) == 0) {
		ct = CT_UNKNOWN;
	} else if (strncasecmp(s, "application/x-tar", slen) == 0) {
		ct = CT_UNKNOWN;
	} else if (strncasecmp(s, "audio/", 6) == 0) {
		ct = CT_UNKNOWN;
	}

	return ct;
}

// the table that maps a file extension to a content type
static HashTableX s_mimeTable;
static bool s_init = false;

void resetHttpMime ( ) {
	s_mimeTable.reset();
}

const char *extensionToContentTypeStr2 ( const char *ext , int32_t elen ) {
	// assume text/html if no extension provided
	if ( ! ext || ! ext[0] ) return NULL;
	if ( elen <= 0 ) return NULL;
	// get hash for table look up
	int32_t key = hash32 ( ext , elen );
	char **pp = (char **)s_mimeTable.getValue ( &key );
	if ( ! pp ) return NULL;
	return *pp;
}

const char *HttpMime::getContentTypeFromExtension ( const char *ext , int32_t elen) {
	// assume text/html if no extension provided
	if ( ! ext || ! ext[0] ) return "text/html";
	if ( elen <= 0 ) return "text/html";
	// get hash for table look up
	int32_t key = hash32 ( ext , elen );
	char **pp = (char **)s_mimeTable.getValue ( &key );
	// if not found in table, assume text/html
	if ( ! pp ) return "text/html";
	return *pp;
}


// . list of types is on: http://www.duke.edu/websrv/file-extensions.html
// . i copied it to the bottom of this file though
const char *HttpMime::getContentTypeFromExtension ( const char *ext ) {
	// assume text/html if no extension provided
	if ( ! ext || ! ext[0] ) return "text/html";
	// get hash for table look up
	int32_t key = hash32n ( ext );
	char **pp = (char **)s_mimeTable.getValue ( &key );
	// if not found in table, assume text/html
	if ( ! pp ) return "text/html";
	return *pp;
}

const char *HttpMime::getContentEncodingFromExtension ( const char *ext ) {
	if ( ! ext ) return NULL;
	if ( strcasecmp ( ext ,"bz2"  )==0 ) return "x-bzip2";
	if ( strcasecmp ( ext ,"gz"   )==0 ) return "x-gzip";
	//if ( strcasecmp ( ext ,"htm"   ) == 0 ) return "text/html";
	//if ( strcasecmp ( ext ,"html"  ) == 0 ) return "text/html";
	return NULL;
}

// make a redirect mime
void HttpMime::makeRedirMime ( const char *redir , int32_t redirLen ) {
	char *p = m_buf;
	gbmemcpy ( p , "HTTP/1.0 302 RD\r\nLocation: " , 27 );
	p += 27;
	if ( redirLen > 600 ) redirLen = 600;
	gbmemcpy ( p , redir , redirLen );
	p += redirLen;
	*p++ = '\r';
	*p++ = '\n';
	*p++ = '\r';
	*p++ = '\n';
	*p = '\0';
	m_mimeLen = p - m_buf;
	if ( m_mimeLen > 1023 ) { g_process.shutdownAbort(true); }
	// set the mime's length
	//m_bufLen = strlen ( m_buf );
}

// a cacheTime of -1 means browser should not cache at all
void HttpMime::makeMime  ( int32_t    totalContentLen    ,
			   int32_t    cacheTime          ,
			   time_t  lastModified       ,
			   int32_t    offset             ,
			   int32_t    bytesToSend        ,
			   const char   *ext                ,
			   bool    POSTReply          ,
			   const char   *contentType        ,
			   const char   *charset            ,
			   int32_t    httpStatus         ,
			   const char   *cookie             ) {
	// assume UTF-8
	//if ( ! charset ) charset = "utf-8";
	// . make the content type line
	// . uses a static buffer
	if ( ! contentType )
		contentType = getContentTypeFromExtension ( ext );

	// do not cache plug ins
	if ( contentType && strcmp(contentType,"application/x-xpinstall")==0)
		cacheTime = -2;

	// assume UTF-8, but only if content type is text
	// . No No No!!!
	// . This prevents charset specification in html files
	// . -partap

	//if ( ! charset && contentType && strncmp(contentType,"text",4)==0)
	//	charset = "utf-8";
	// this is used for bz2 and gz files (mp3?)
	const char *contentEncoding = getContentEncodingFromExtension ( ext );
	// the string
	char enc[128];
	if ( contentEncoding )
		sprintf ( enc , "Content-Encoding: %s\r\n", contentEncoding );
	else
		enc[0] = '\0';
	// get the time now
	//time_t now = getTime();
	time_t now = getTime();
	// get the greenwhich mean time (GMT)
	char ns[128];
	struct tm tm_buf;
	struct tm *timeStruct = gmtime_r(&now,&tm_buf);
	// Wed, 20 Mar 2002 16:47:30 GMT
	strftime ( ns , 126 , "%a, %d %b %Y %T GMT" , timeStruct );
	// if lastModified is 0 use now
	if ( lastModified == 0 ) lastModified = now;
	// convert lastModified greenwhich mean time (GMT)
	char lms[128];
	timeStruct = gmtime_r(&lastModified,&tm_buf);
	// Wed, 20 Mar 2002 16:47:30 GMT
	strftime ( lms , 126 , "%a, %d %b %Y %T GMT" , timeStruct );
	// . the pragma no cache string (used just for proxy servers?)
	// . also use cache-control: for the browser itself (HTTP1.1, though)
	// . pns = "Pragma: no-cache\nCache-Control: no-cache\nExpires: -1\n";
	char tmp[128];
	const char *pns;
	// with cache-control on, when you hit the back button, it reloads
	// the page, this is bad for most things... so we only avoid the
	// cache for index.html and PageAddUrl.cpp (the main and addurl page)
	if      ( cacheTime == -2 ) pns =  "Cache-Control: no-cache\r\n"
					   "Pragma: no-cache\r\n"
					   "Expires: -1\r\n";
	// so when we click on a control link, it responds correctly.
	// like turning spiders on.
	else if  ( cacheTime == -1 ) pns = "Pragma: no-cache\r\n"
					   "Expires: -1\r\n";
	// don't specify cache times if it's 0 (let browser regulate it)
	else if ( cacheTime == 0 ) pns = "";
	// otherwise, expire tag: "Expires: Wed, 23 Dec 2001 10:23:01 GMT"
	else {
		time_t  expDate = now + cacheTime;
		timeStruct = gmtime_r(&expDate,&tm_buf);
		strftime ( tmp , 100 , "Expires: %a, %d %b %Y %T GMT\r\n",
			   timeStruct );
		pns = tmp;
	}
	// . set httpStatus
	// . a reply to a POST (not a GET or HEAD) should be 201
	char *p = m_buf;
	const char *smsg = "";
	if ( POSTReply ) {
		if ( httpStatus == -1 ) httpStatus = 200;
		if ( httpStatus == 200 ) smsg = " OK";
		if ( ! charset ) charset = "utf-8";
		//sprintf ( m_buf ,
		p += sprintf ( p,
			  "HTTP/1.0 %" PRId32"%s\r\n"
			  "Date: %s\r\n"
			       //"P3P: CP=\"CAO PSA OUR\"\r\n"
			  "Access-Control-Allow-Origin: *\r\n"
			  "Server: Gigablast/1.0\r\n"
			  "Content-Length: %" PRId32"\r\n"
			  "Connection: Close\r\n"
			  "%s"
			  "Content-Type: %s\r\n",
			  httpStatus , smsg ,
			  ns , totalContentLen , enc , contentType  );
			  //pns ,
	                  //ns );
			  //lms );
	}
	// . is it partial content?
	// . if bytesToSend is < 0 it means "totalContentLen"
	else if ( offset > 0 || bytesToSend != -1 ) {
		if ( httpStatus == -1 ) httpStatus = 206;
		if ( ! charset ) charset = "utf-8";
		//sprintf ( m_buf ,
		p += sprintf( p,
			      "HTTP/1.0 %" PRId32" Partial content\r\n"
			      "%s"
			      "Content-Length: %" PRId32"\r\n"
			      "Content-Range: %" PRId32"-%" PRId32"(%" PRId32")\r\n"// added "bytes"
			      "Connection: Close\r\n"
			      // for ajax support
			      "Access-Control-Allow-Origin: *\r\n"
			      "Server: Gigablast/1.0\r\n"
			      "%s"
			      "Date: %s\r\n"
			      "Last-Modified: %s\r\n"
			      "Content-Type: %s\r\n",
			      httpStatus ,
			      enc ,bytesToSend ,
			      offset , offset + bytesToSend ,
			      totalContentLen ,
			      pns ,
			      ns ,
			      lms , contentType );
		// otherwise, do a normal mime
	}
	else {
		char encoding[256];
		if (charset) sprintf(encoding, "; charset=%s", charset);
		else encoding[0] = '\0';


		if ( httpStatus == -1 ) httpStatus = 200;
		if ( httpStatus == 200 ) smsg = " OK";
		//sprintf ( m_buf ,
		p += sprintf( p,
			      "HTTP/1.0 %" PRId32"%s\r\n"
			      , httpStatus , smsg );
		// if content length is not known, as in diffbot.cpp, then
		// do not print it into the mime
		if ( totalContentLen >= 0 )
			p += sprintf ( p ,
				       // make it at least 4 spaces so we can
				       // change the length of the content
				       // should we insert a login bar in
				       // Proxy::storeLoginBar()
				       "Content-Length: %04" PRId32"\r\n"
				       , totalContentLen );
		p += sprintf ( p ,
			      "%s"
			      "Content-Type: %s",
			       enc , contentType );
		if ( charset ) p += sprintf ( p , "; charset=%s", charset );
		p += sprintf ( p , "\r\n");
		p += sprintf ( p ,
			       "Connection: Close\r\n"
			       "Access-Control-Allow-Origin: *\r\n"
			       "Server: Gigablast/1.0\r\n"
			       "%s"
			       "Date: %s\r\n"
			       "Last-Modified: %s\r\n" ,
			       pns ,
			       ns ,
			       lms );
	}
	// write the cookie if we have one
	if (cookie) {
		// now it is a list of Set-Cookie: x=y\r\n lines
		//p += sprintf ( p, "Set-Cookie: %s\r\n", cookie);
		if ( strncmp(cookie, "Set-Cookie", 10) != 0 ) {
			p += sprintf(p,"Set-Cookie: ");
		}
		p += sprintf ( p, "%s", cookie);
		if ( p[-1] != '\n' && p[-2] != '\r' ) {
			*p++ = '\r';
			*p++ = '\n';
		}
	}

	// write another line to end the mime
	p += sprintf(p, "\r\n");
	// set the mime's length
	//m_bufLen = strlen ( m_buf );
	m_mimeLen = p - m_buf;
}


//FILE EXTENSIONS to MIME CONTENT-TYPE
//------------------------------------

// set hash table
static const char * const s_ext[] = {
	"ai", "application/postscript",
	"aif", "audio/x-aiff",
	"aifc", "audio/x-aiff",
	"aiff", "audio/x-aiff",
	"asc", "text/plain",
	"au", "audio/basic",
	"avi", "video/x-msvideo",
	"bcpio", "application/x-bcpio",
	"bin", "application/octet-stream",
	"bmp", "image/gif",
	"bz2", "application/x-bzip2",
	"c", "text/plain",
	"cc", "text/plain",
	"ccad", "application/clariscad",
	"cdf", "application/x-netcdf",
	"class", "application/octet-stream",
	"cpio", "application/x-cpio",
	"cpt", "application/mac-compactpro",
	"csh", "application/x-csh",
	"css", "text/css",
	"dcr", "application/x-director",
	"dir", "application/x-director",
	"dms", "application/octet-stream",
	"doc", "application/msword",
	"drw", "application/drafting",
	"dvi", "application/x-dvi",
	"dwg", "application/acad",
	"dxf", "application/dxf",
	"dxr", "application/x-director",
	"eps", "application/postscript",
	"etx", "text/x-setext",
	"exe", "application/octet-stream",
	"ez", "application/andrew-inset",
	"f", "text/plain",
	"f90", "text/plain",
	"fli", "video/x-fli",
	"gif", "image/gif",
	"gtar", "application/x-gtar",
	"gz", "application/x-gzip",
	"h", "text/plain",
	"hdf", "application/x-hdf",
	"hh", "text/plain",
	"hqx", "application/mac-binhex40",
	"htm", "text/html",
	"html", "text/html",
	"ice", "x-conference/x-cooltalk",
	"ief", "image/ief",
	"iges", "model/iges",
	"igs", "model/iges",
	"ips", "application/x-ipscript",
	"ipx", "application/x-ipix",
	"jpe", "image/jpeg",
	"jpeg", "image/jpeg",
	"jpg", "image/jpeg",
	"js", "application/x-javascript",
	"kar", "audio/midi",
	"latex", "application/x-latex",
	"lha", "application/octet-stream",
	"lsp", "application/x-lisp",
	"lzh", "application/octet-stream",
	"m", "text/plain",
	"man", "application/x-troff-man",
	"me", "application/x-troff-me",
	"mesh", "model/mesh",
	"mid", "audio/midi",
	"midi", "audio/midi",
	"mif", "application/vnd.mif",
	"mime", "www/mime",
	"mov", "video/quicktime",
	"movie", "video/x-sgi-movie",
	"mp2", "audio/mpeg",
	"mp3", "audio/mpeg",
	"mpe", "video/mpeg",
	"mpeg", "video/mpeg",
	"mpg", "video/mpeg",
	"mpga", "audio/mpeg",
	"ms", "application/x-troff-ms",
	"msh", "model/mesh",
	"nc", "application/x-netcdf",
	"oda", "application/oda",
	"pbm", "image/x-portable-bitmap",
	"pdb", "chemical/x-pdb",
	"pdf", "application/pdf",
	"pgm", "image/x-portable-graymap",
	"pgn", "application/x-chess-pgn",
	"png", "image/png",
	"ico", "image/x-icon",
	"pnm", "image/x-portable-anymap",
	"pot", "application/mspowerpoint",
	"ppm", "image/x-portable-pixmap",
	"pps", "application/mspowerpoint",
	"ppt", "application/mspowerpoint",
	"ppz", "application/mspowerpoint",
	"pre", "application/x-freelance",
	"prt", "application/pro_eng",
	"ps", "application/postscript",
	"qt", "video/quicktime",
	"ra", "audio/x-realaudio",
	"ram", "audio/x-pn-realaudio",
	"ras", "image/cmu-raster",
	"rgb", "image/x-rgb",
	"rm", "audio/x-pn-realaudio",
	"roff", "application/x-troff",
	"rpm", "audio/x-pn-realaudio-plugin",
	"rtf", "text/rtf",
	"rtx", "text/richtext",
	"scm", "application/x-lotusscreencam",
	"set", "application/set",
	"sgm", "text/sgml",
	"sgml", "text/sgml",
	"sh", "application/x-sh",
	"shar", "application/x-shar",
	"silo", "model/mesh",
	"sit", "application/x-stuffit",
	"skd", "application/x-koan",
	"skm", "application/x-koan",
	"skp", "application/x-koan",
	"skt", "application/x-koan",
	"smi", "application/smil",
	"smil", "application/smil",
	"snd", "audio/basic",
	"sol", "application/solids",
	"spl", "application/x-futuresplash",
	"src", "application/x-wais-source",
	"step", "application/STEP",
	"stl", "application/SLA",
	"stp", "application/STEP",
	"sv4cpio", "application/x-sv4cpio",
	"sv4crc", "application/x-sv4crc",
	"swf", "application/x-shockwave-flash",
	"t", "application/x-troff",
	"tar", "application/x-tar",
	"tcl", "application/x-tcl",
	"tex", "application/x-tex",
	"texi", "application/x-texinfo",
	"texinfo", "application/x-texinfo",
	"tif", "image/tiff",
	"tiff", "image/tiff",
	"tr", "application/x-troff",
	"tsi", "audio/TSP-audio",
	"tsp", "application/dsptype",
	"tsv", "text/tab-separated-values",
	"txt", "text/plain",
	"unv", "application/i-deas",
	"ustar", "application/x-ustar",
	"vcd", "application/x-cdlink",
	"vda", "application/vda",
	"viv", "video/vnd.vivo",
	"vivo", "video/vnd.vivo",
	"vrml", "model/vrml",
	"wav", "audio/x-wav",
	"wrl", "model/vrml",
	"xbm", "image/x-xbitmap",
	"xlc", "application/vnd.ms-excel",
	"xll", "application/vnd.ms-excel",
	"xlm", "application/vnd.ms-excel",
	"xls", "application/vnd.ms-excel",
	"xlw", "application/vnd.ms-excel",
	"xml", "text/xml",
	"xpm", "image/x-xpixmap",
	"xwd", "image/x-xwindowdump",
	"xyz", "chemical/x-pdb",
	"zip", "application/zip",
	"xpi", "application/x-xpinstall",
	// newstuff
	"warc", "application/warc",
	"arc", "application/arc"
};

// . init s_mimeTable in this call
// . called from HttpServer::init
// . returns false and sets g_errno on error
bool HttpMime::init ( ) {
	// only need to call once
	if ( s_init ) return true;
	// make sure only called once
	s_init = true;

	if ( ! s_mimeTable.set(4,sizeof(char *),256,NULL,0,false,"mimetbl"))
		return false;
	// set table from internal list
	for ( uint32_t i = 0 ; i < sizeof(s_ext)/sizeof(char *) ; i+=2 ) {
		int32_t key = hash32n ( s_ext[i] );
		if ( ! s_mimeTable.addKey ( &key , &s_ext[i+1] ) ) {
			log(LOG_WARN, "HttpMime::init: failed to set table.");
			return false;
		}
	}
	// quick text
	const char *tt = getContentTypeFromExtension ( "zip" );
	if ( strcmp(tt,"application/zip") != 0 ) {
		g_errno = EBADENGINEER;
		log(LOG_WARN, "http: Failed to init mime table correctly.");
		return false;
	}
	// a more thorough test
	for ( uint32_t i = 0 ; i < sizeof(s_ext)/sizeof(char *) ; i+=2) {
		tt = getContentTypeFromExtension ( s_ext[i] );
		if ( strcmp(tt,s_ext[i+1]) == 0 ) continue;
		g_errno = EBADENGINEER;
		log(LOG_WARN, "http: Failed to do mime table correctly. i=%" PRId32,i);
		return false;
	}

	// TODO: set it from a user supplied file here
	return true;
}

void HttpMime::addCookie(const httpcookie_t &cookie, const Url &currentUrl, SafeBuf *cookieJar) {
	// don't add expired cookie into cookie jar
	if (cookie.m_expired) {
		return;
	}

	if (cookie.m_domain) {
		cookieJar->safeMemcpy(cookie.m_domain, cookie.m_domainLen);
		cookieJar->pushChar('\t');
		cookieJar->safeStrcpy(cookie.m_defaultDomain ? "FALSE\t" : "TRUE\t");
	} else {
		cookieJar->safeMemcpy(currentUrl.getHost(), currentUrl.getHostLen());
		cookieJar->pushChar('\t');

		cookieJar->safeStrcpy("FALSE\t");
	}

	if (cookie.m_path) {
		cookieJar->safeMemcpy(cookie.m_path, cookie.m_pathLen);
		cookieJar->pushChar('\t');
	} else {
		if (currentUrl.getPathLen()) {
			cookieJar->safeMemcpy(currentUrl.getPath(), currentUrl.getPathLen());
		} else {
			cookieJar->pushChar('/');
		}
		cookieJar->pushChar('\t');
	}

	if (cookie.m_secure) {
		cookieJar->safeStrcpy("TRUE\t");
	} else {
		cookieJar->safeStrcpy("FALSE\t");
	}

	// we're not using expiration field
	cookieJar->safeStrcpy("0\t");

	int32_t currentLen = cookieJar->length();
	cookieJar->safeMemcpy(cookie.m_cookie, cookie.m_cookieLen);

	// cater for multiline cookie
	const char *currentPos = cookieJar->getBufStart() + currentLen;
	const char *delPosStart = NULL;
	int32_t delLength = 0;
	while (currentPos < cookieJar->getBufPtr() - 1) {
		if (delPosStart) {
			if (is_wspace_a(*currentPos) || *currentPos == '\n' || *currentPos == '\r') {
				++delLength;
			} else {
				break;
			}
		} else {
			if (*currentPos == '\n' || *currentPos == '\r') {
				delPosStart = currentPos;
				++delLength;
			}
		}

		++currentPos;
	}
	cookieJar->removeChunk1(delPosStart, delLength);

	/// @todo ALC handle httpOnly attribute

	cookieJar->pushChar('\n');
}

bool HttpMime::addToCookieJar(const Url *currentUrl, SafeBuf *sb) {
	/// @note Slightly modified from Netscape HTTP Cookie File format
	/// Difference is we only have one column for name/value

	// http://www.cookiecentral.com/faq/#3.5
	// The layout of Netscape's cookies.txt file is such that each line contains one name-value pair.
	// An example cookies.txt file may have an entry that looks like this:
	// .netscape.com TRUE / FALSE 946684799 NETSCAPE_ID 100103
	//
	// Each line represents a single piece of stored information. A tab is inserted between each of the fields.
	// From left-to-right, here is what each field represents:
	//
	// domain - The domain that created AND that can read the variable.
	// flag - A TRUE/FALSE value indicating if all machines within a given domain can access the variable. This value is set automatically by the browser, depending on the value you set for domain.
	// path - The path within the domain that the variable is valid for.
	// secure - A TRUE/FALSE value indicating if a secure connection with the domain is needed to access the variable.
	// expiration - The UNIX time that the variable will expire on. UNIX time is defined as the number of seconds since Jan 1, 1970 00:00:00 GMT.
	// name/value - The name/value of the variable.

	/// @todo ALC we should sort cookie-list
	// The user agent SHOULD sort the cookie-list in the following order:
	// *  Cookies with longer paths are listed before cookies with shorter paths.
	// *  Among cookies that have equal-length path fields, cookies with earlier creation-times are listed
	// before cookies with later creation-times.

	// fill in cookies from cookieJar
	std::map<std::string, httpcookie_t> oldCookies;

	const char *cookieJar = sb->getBufStart();
	int32_t cookieJarLen = sb->length();

	const char *lineStartPos = cookieJar;
	const char *lineEndPos = NULL;
	while ((lineEndPos = (const char*)memchr(lineStartPos, '\n', cookieJarLen - (lineStartPos - cookieJar))) != NULL) {
		const char *currentPos = lineStartPos;
		const char *tabPos = NULL;
		unsigned fieldCount = 0;

		httpcookie_t cookie = {};
		while (fieldCount < 5 && (tabPos = (const char*)memchr(currentPos, '\t', lineEndPos - currentPos)) != NULL) {
			switch (fieldCount) {
				case 0:
					// domain
					cookie.m_domain = currentPos;
					cookie.m_domainLen = tabPos - currentPos;
					break;
				case 1:
					// flag
					if (memcmp(currentPos, "TRUE", 4) != 0) {
						cookie.m_defaultDomain = true;
					}
					break;
				case 2: {
					// path
					cookie.m_path = currentPos;
					cookie.m_pathLen = tabPos - currentPos;
				} break;
				case 3:
					// secure
					cookie.m_secure = (memcmp(currentPos, "TRUE", 4) == 0);
					break;
				case 4:
					// expiration
					break;
			}

			currentPos = tabPos + 1;
			++fieldCount;
		}

		cookie.m_cookie = currentPos;
		cookie.m_cookieLen = lineEndPos - currentPos;

		const char *equalPos = (const char *)memchr(cookie.m_cookie, '=', cookie.m_cookieLen);
		if (equalPos) {
			cookie.m_nameLen = equalPos - cookie.m_cookie;

			oldCookies[std::string(cookie.m_cookie, cookie.m_nameLen)] = cookie;
		}

		lineStartPos = lineEndPos + 1;
	}
	// we don't need to care about the last line (we always end on \n)

	SafeBuf newCookieJar;

	// add old cookies
	for (auto &pair : oldCookies) {
		if (m_cookies.find(pair.first) == m_cookies.end()) {
			addCookie(pair.second, *currentUrl, &newCookieJar);
		}
	}

	// add new cookies
	for (auto &pair : m_cookies) {
		addCookie(pair.second, *currentUrl, &newCookieJar);
	}

	newCookieJar.nullTerm();

	// replace old with new
	sb->reset();
	sb->safeMemcpy(&newCookieJar);
	sb->nullTerm();

	return true;
}

bool HttpMime::addCookieHeader(const char *cookieJar, const char *url, SafeBuf *sb) {
	Url tmpUrl;
	tmpUrl.set(url);

	SafeBuf tmpSb;

	size_t cookieJarLen = strlen(cookieJar);

	const char *lineStartPos = cookieJar;
	const char *lineEndPos = NULL;
	while ((lineEndPos = (const char*)memchr(lineStartPos, '\n', cookieJarLen - (lineStartPos - cookieJar))) != NULL) {
		const char *currentPos = lineStartPos;
		const char *tabPos = NULL;
		unsigned fieldCount = 0;

		bool skipCookie = false;
		const char *domain = NULL;
		int32_t domainLen = 0;
		while (fieldCount < 5 && (tabPos = (const char*)memchr(currentPos, '\t', lineEndPos - currentPos)) != NULL) {
			switch (fieldCount) {
				case 0:
					// domain
					domain = currentPos;
					domainLen = tabPos - currentPos;
					break;
				case 1:
					// flag
					if (memcmp(currentPos, "TRUE", 4) == 0) {
						// allow subdomain
						if (tmpUrl.getHostLen() >= domainLen) {
							if (!endsWith(tmpUrl.getHost(), tmpUrl.getHostLen(), domain, domainLen)) {
								// doesn't end with domain - ignore cookie
								skipCookie = true;
								break;
							}
						} else {
							skipCookie = true;
							break;
						}
					} else {
						// only specific domain
						if (tmpUrl.getHostLen() != domainLen || strncasecmp(domain, tmpUrl.getHost(), domainLen) != 0) {
							// non-matching domain - ignore cookie
							skipCookie = true;
							break;
						}
					}
					break;
				case 2: {
					// path
					const char *path = currentPos;
					int32_t pathLen = tabPos - currentPos;
					if (strncasecmp(path, tmpUrl.getPath(), pathLen) == 0) {
						if (tmpUrl.getPathLen() != pathLen) {
							if (path[pathLen - 1] != '/' && tmpUrl.getPath()[tmpUrl.getPathLen() - 1] != '/') {
								// non-matching path - ignore cookie
								skipCookie = true;
								break;
							}
						}
					} else {
						// non-matching path - ignore cookie
						skipCookie = true;
						break;
					}
				} break;
				case 3:
					// secure

					break;
				case 4:
					// expiration

					break;
			}

			currentPos = tabPos + 1;
			++fieldCount;
		}

		if (!skipCookie) {
			tmpSb.safeMemcpy(currentPos, lineEndPos - currentPos);
			tmpSb.pushChar(';');
		}

		lineStartPos = lineEndPos + 1;
	}
	// we don't need to care about the last line (we always end on \n)

	if (tmpSb.length() > 0) {
		sb->safeStrcpy("Cookie: ");
		sb->safeMemcpy(&tmpSb);
		sb->safeStrcpy("\r\n");
	}

	return true;
}

void HttpMime::print() const {
	logf(LOG_TRACE, "HttpMime info");
	logf(LOG_TRACE, "Cookies :");
	int i = 0;
	for (auto &pair : m_cookies) {
		print(pair.second, i++);
	}
}

void HttpMime::print(const httpcookie_t &cookie, int count) {
	logf(LOG_TRACE, "\tcookie #%d :", count);
	logf(LOG_TRACE, "\t\tname     : %.*s", static_cast<int>(cookie.m_nameLen), cookie.m_cookie);
	logf(LOG_TRACE, "\t\tvalue    : %.*s", static_cast<int>(cookie.m_cookieLen - cookie.m_nameLen - 1), cookie.m_cookie + cookie.m_nameLen + 1);
	logf(LOG_TRACE, "\t\tpath     : %.*s", static_cast<int>(cookie.m_pathLen), cookie.m_path);
	logf(LOG_TRACE, "\t\tdomain   : %.*s", static_cast<int>(cookie.m_domainLen), cookie.m_domain);
	logf(LOG_TRACE, "\t\tsecure   : %s", cookie.m_secure ? "true" : "false");
	logf(LOG_TRACE, "\t\thttponly : %s", cookie.m_httpOnly ? "true" : "false");
}