#include "Collectiondb.h"
#include "HttpServer.h"
#include "HttpRequest.h"
#include "Msg0.h"
#include "Pages.h"
#include "SafeBuf.h"
#include "Tagdb.h"
#include "Spider.h"
#include "Mem.h"
#include "Conf.h"
#include "ip.h"
#include "max_url_len.h"
#include <time.h>


namespace {

struct State {
	TcpSocket   *m_socket;
	HttpRequest  m_r;
	
	const char  *m_coll;
	int32_t      m_collLen;
	collnum_t    m_collnum;
	char         m_url_str[MAX_URL_LEN];	//the url we are working on. Empty if none
	
	Url          m_url;
	int32_t      m_firstip;
	Msg8a        m_msg8a;			//for getting tagrec for firstip
	TagRec       m_tagrec;			//tagrec
	Msg0         m_msg0;			//for getting spiderdb records
	RdbList      m_rdbList;			//spiderdb records
};

}

static bool getTagRec(State *st);
static void gotTagRec(void *state);
static bool getSpiderRecs(State *st);
static void gotSpiderRecs(void *state);
static bool gotSpiderRecs2(State *st);
static bool sendResult(State *st);

static const char *formatTime(time_t when, char buf[32]);
static const char *formatTimeMs(int64_t when, char buf[32]);


//Normal flow (assuming no errors):
//	sendPageSpiderdbLookup
//	initiateProcessing
//	<m_msg8a.getTagRec>
//	gotTagRec
//	<msg0.getList>
//	gotSpiderRecs
//	gotSpiderRecs2
//	sendResult



// . returns false if blocked, true otherwise
// . sets g_errno on error
// . make a web page displaying the spider requests and replies for url given via cgi
// . call g_httpServer.sendDynamicPage() to send it
bool sendPageSpiderdbLookup(TcpSocket *s, HttpRequest *r) {
	// get the url from the cgi vars
	int url_len;
	const char *url = r->getString("url", &url_len);
	if(url && !url[0])
		url = NULL;
	
	//set up state
	State *st;
	try {
		st = new State;
	} catch(std::bad_alloc&) {
		g_errno = ENOMEM;
		log(LOG_ERROR, "PageSpiderdbLookup: new(%i): %s", (int)sizeof(State), mstrerror(g_errno));
		return g_httpServer.sendErrorReply(s, 500, mstrerror(g_errno));
	}
	mnew(st, sizeof(*st), "pgspdrdblookup");
	st->m_socket = s;
	st->m_r.copy(r);
	
	int32_t  collLen = 0;
	const char *coll    = st->m_r.getString("c",&collLen);
	if(!coll || !coll[0]) {
		coll = g_conf.getDefaultColl( );
		collLen = strlen(coll);
	}
	st->m_coll    = coll;
	st->m_collLen = collLen;
	
	const CollectionRec *cr = g_collectiondb.getRec(st->m_coll,st->m_collLen);
	if(!cr) {
		g_errno = ENOCOLLREC;
		return sendResult(st);
	}
	st->m_collnum = cr->m_collnum;
	
	if(url) {
		memcpy(st->m_url_str, url, url_len);
		st->m_url_str[url_len] = '\0';
		st->m_url.set(st->m_url_str);
	} else
		st->m_url_str[0] = '\0';
	
	//if an URL has been specified the start working on that. Otherwise just show the initial page
	if(st->m_url_str[0])
		return getTagRec(st);
	else
		return sendResult(st);
}


static bool getTagRec(State *st) {
	if(!st->m_msg8a.getTagRec(&st->m_url, st->m_collnum, 0, st, gotTagRec, &st->m_tagrec))
		return false;
	if(g_errno)
		return sendResult(st);
	gotTagRec(st);
	return true;
}


static void gotTagRec(void *state) {
	State *st = reinterpret_cast<State*>(state);
	if(g_errno) {
		sendResult(st);
		return;
	}
	
	//find firstip in tag sand set state
	const Tag *tag = st->m_tagrec.getTag("firstip");
	if(tag) {
		st->m_firstip = atoip(tag->getTagData());
		log(LOG_DEBUG,"PageSpiderdbLookup: Found tag, firstip=0x%08x", st->m_firstip);
	} else {
		st->m_firstip = 0;
		log(LOG_INFO,"PageSpiderdbLookup: Didn't find firstip tag for %s", st->m_url_str);
		g_errno = ENOFIRSTIPFOUND;
		sendResult(st);
		return;
	}
	
	if(getSpiderRecs(st))
		gotSpiderRecs2(st);
}


static bool getSpiderRecs(State *st) {
	logTrace(g_conf.m_logTracePageSpiderdbLookup, "getSpiderRecs(%p)",st);
	int64_t uh48 = hash64b(st->m_url_str);
	key128_t startKey = Spiderdb::makeFirstKey(st->m_firstip, uh48);
	key128_t endKey = Spiderdb::makeLastKey(st->m_firstip, uh48);
	logTrace(g_conf.m_logTracePageSpiderdbLookup, "getSpiderRecs(%p): Calling Msg0::getList()", st);
	if(!st->m_msg0.getList(-1, //hostId
		               RDB_SPIDERDB,
		               st->m_collnum,
		               &st->m_rdbList,
		               (const char*)&startKey,
		               (const char*)&endKey,
		               1000000,  //minRecSizes, -1 is not supported. We don't expect the two expected records to exceed 1MB
		               st,
		               gotSpiderRecs,
		               0, //niceness,
		               true, //doErrorCorrection
		               true, //includeTree,
		               -1, //firstHostId
		               0, //startFileNum,
		               -1, //numFiles,
		               10000, //timeout in msecs
		               false, //isRealMerge
		               false, //noSplit (?)
		               -1))//forceParitySplit
		return false;
	logTrace(g_conf.m_logTracePageSpiderdbLookup, "getSpiderRecs: msg0.getlist didn't block");
	return true;
}



static void gotSpiderRecs(void *state) {
	logTrace(g_conf.m_logTracePageSpiderdbLookup, "gotSpiderRecs(%p)", state);
	State *st = reinterpret_cast<State*>(state);
	gotSpiderRecs2(st);
}

static bool gotSpiderRecs2(State *st) {
	logTrace(g_conf.m_logTracePageSpiderdbLookup, "gotSpiderRecs2(%p)", st);
	logTrace(g_conf.m_logTracePageSpiderdbLookup, "gotSpiderRecs2: g_errno=%d", g_errno);
	logTrace(g_conf.m_logTracePageSpiderdbLookup, "gotSpiderRecs2: st->m_rdbList.getListSize()=%d", st->m_rdbList.getListSize());
	return sendResult(st);
}


static bool respondWithError(State *st, int32_t error, const char *errmsg) {
	// get the socket
	TcpSocket *s = st->m_socket;

	SafeBuf sb;
	const char *contentType = NULL;
	switch(st->m_r.getReplyFormat()) {
		case FORMAT_HTML:
			g_pages.printAdminTop(&sb, s, &st->m_r, NULL);
			sb.safePrintf("<p>%s</p>", errmsg);
			g_pages.printAdminBottom2(&sb);
			contentType = "text/html";
			break;
		case FORMAT_JSON:
			sb.safePrintf("{\"response\":{\n"
			              "\t\"statusCode\":%" PRId32",\n"
			              "\t\"statusMsg\":\"", error);
			sb.jsonEncode(errmsg);
			sb.safePrintf("\"\n"
			              "}\n"
			              "}\n");
			contentType = "application/json";
			break;
		default:
			contentType = "application/octet-stream";
			break;
	}

	mdelete (st, sizeof(State) , "pgspdrdblookup");
	delete st;

	return g_httpServer.sendDynamicPage(s, sb.getBufStart(), sb.length(), -1, false, contentType);
}


static void generatePageHtml(int32_t shardNum, int32_t firstIp, int32_t robotsShardNum, const char *url, const SpiderRequest *spiderRequest, const SpiderReply *spiderReply, SafeBuf *sb) {
	// print URL in box
	sb->safePrintf("<br>\n"
		              "Enter URL: "
		              "<input type=text name=url value=\"%s\" size=60>", url);
	sb->safePrintf("</form><br/><br/>\n");

	if (shardNum >= 0) {
		sb->safePrintf("<table class=\"main\" width=100%%>\n");
		sb->safePrintf("<tr class=\"level1\"><th colspan=50>Host information</th></tr>\n");
		sb->safePrintf("<tr><td>Shard:</td><td>%u</td></tr>\n", static_cast<uint32_t>(shardNum));
		int32_t numHosts;
		const Host *host = g_hostdb.getShard(shardNum, &numHosts);
		if(host) {
			sb->safePrintf("<tr><td>Host:</td><td>");
			while (numHosts--) {
				if (host->m_spiderEnabled) {
					sb->safePrintf(" %u", host->m_hostId);
				}
				host++;
			}
			sb->safePrintf("</td></tr>\n");
		}
	}

	if (robotsShardNum >= 0) {
		int32_t numHosts;
		const Host *host = g_hostdb.getShard(robotsShardNum, &numHosts);
		if(host) {
			sb->safePrintf("<tr><td>Robots.txt host:</td><td>");
			while (numHosts--) {
				if (host->m_spiderEnabled) {
					sb->safePrintf(" %u", host->m_hostId);
				}
				host++;
			}
			sb->safePrintf("</td></tr>\n");
		}
	}

	if (shardNum >= 0) {
		char ipbuf[16];
		iptoa(firstIp,ipbuf);
		sb->safePrintf("<tr><td>FirstIP:</td><td>%s</td></tr>\n", ipbuf);
		sb->safePrintf("</table>\n");
		sb->safePrintf("<br/>\n");
	}


	if (spiderRequest) {
		char ipbuf[16];
		char timebuf[32];
		sb->safePrintf("<table class=\"main\" width=100%%>\n");
		sb->safePrintf("  <tr class=\"level1\"><th colspan=50>Spider request</th></tr>\n");
		sb->safePrintf("  <tr class=\"level2\"><th>Field</th><th>Value</th></tr>\n");
		sb->safePrintf("  <tr><td>m_firstIp</td><td>%s</td></tr>\n", iptoa(spiderRequest->m_firstIp, ipbuf));
		sb->safePrintf("  <tr><td>m_addedTime</td><td>%s (%d)</td></tr>\n", formatTime(spiderRequest->m_addedTime, timebuf), spiderRequest->m_addedTime);
		sb->safePrintf("  <tr><td>m_prevErrCode</td><td>%d</td></tr>\n", spiderRequest->m_prevErrCode);
		sb->safePrintf("  <tr><td>m_priority</td><td>%d</td></tr>\n", spiderRequest->m_priority);
		sb->safePrintf("  <tr><td>m_errCount</td><td>%d</td></tr>\n", spiderRequest->m_errCount);
		sb->safePrintf("  <tr><td>m_isAddUrl</td><td>%s</td></tr>\n", spiderRequest->m_isAddUrl ? "true" : "false");
		sb->safePrintf("  <tr><td>m_isPageReindex</td><td>%s</td></tr>\n", spiderRequest->m_isPageReindex ? "true" : "false");
		sb->safePrintf("  <tr><td>m_isUrlCanonical</td><td>%s</td></tr>\n", spiderRequest->m_isUrlCanonical ? "true" : "false");
		sb->safePrintf("  <tr><td>m_isPageParser</td><td>%s</td></tr>\n", spiderRequest->m_isPageParser ? "true" : "false");
		sb->safePrintf("  <tr><td>m_urlIsDocId</td><td>%s</td></tr>\n", spiderRequest->m_urlIsDocId ? "true" : "false");
		sb->safePrintf("  <tr><td>m_forceDelete</td><td>%s</td></tr>\n", spiderRequest->m_forceDelete ? "true" : "false");
		sb->safePrintf("  <tr><td>m_fakeFirstIp</td><td>%s</td></tr>\n", spiderRequest->m_fakeFirstIp ? "true" : "false");
		sb->safePrintf("</table>\n");
	}

	if (spiderRequest && spiderReply) {
		sb->safePrintf("<br/>\n");
	}

	if(spiderReply) {
		char timebuf[32];
		sb->safePrintf("<table class=\"main\" width=100%%>\n");
		sb->safePrintf("  <tr class=\"level1\"><th colspan=50>Spider reply</th><tr>\n");
		sb->safePrintf("  <tr class=\"level2\"><th>Field</th><th>Value</th></tr>\n");
		sb->safePrintf("  <tr><td>m_spideredTime</td><td>%s (%d)</td></tr>\n", formatTime(spiderReply->m_spideredTime, timebuf), spiderReply->m_spideredTime);
		sb->safePrintf("  <tr><td>m_errCode</td><td>%d</td></tr>\n", spiderReply->m_errCode);
		sb->safePrintf("  <tr><td>m_percentChangedPerDay</td><td>%f</td></tr>\n", spiderReply->m_percentChangedPerDay);
		sb->safePrintf("  <tr><td>m_contentHash32</td><td>%u</td></tr>\n", spiderReply->m_contentHash32);
		sb->safePrintf("  <tr><td>m_crawlDelayMS</td><td>%d</td></tr>\n", spiderReply->m_crawlDelayMS);
		sb->safePrintf("  <tr><td>m_downloadEndTime</td><td>%s (%ld)</td></tr>\n", formatTimeMs(spiderReply->m_downloadEndTime, timebuf), spiderReply->m_downloadEndTime);
		sb->safePrintf("  <tr><td>m_httpStatus</td><td>%d</td></tr>\n", spiderReply->m_httpStatus);
		sb->safePrintf(" <tr><td>m_errCount</td><td>%d</td></tr>\n", spiderReply->m_errCount);
		sb->safePrintf(" <tr><td>m_langId</td><td>%d</td></tr>\n", spiderReply->m_langId);
		sb->safePrintf(" <tr><td>m_isIndexed</td><td>%s</td></tr>\n", spiderReply->m_isIndexed ? "true" : "false");
		sb->safePrintf("</table>\n");
	}

	if(!spiderRequest && !spiderReply) {
		sb->safePrintf("<strong>No request, no reply.</strong>\n");
	}
}

static void generatePageJSON(int32_t shardNum, int32_t firstIp, int32_t robotsShardNum, const SpiderRequest *spiderRequest, const SpiderReply *spiderReply, SafeBuf *sb) {
	sb->safePrintf("{\n");
	if (shardNum >= 0) {
		sb->safePrintf("\"shard\": %u,\n", static_cast<uint32_t>(shardNum));

		int32_t numHosts;
		const Host *host = g_hostdb.getShard(shardNum, &numHosts);
		if(host) {
			sb->safePrintf("\"host\": [");

			bool isFirst = true;
			while (numHosts--) {
				if (host->m_spiderEnabled) {
					if (!isFirst) {
						sb->safePrintf(", ");
					}
					sb->safePrintf("%u", host->m_hostId);
					isFirst = false;
				}
				host++;
			}

			sb->safePrintf("]");
		}
	}

	if (robotsShardNum >= 0) {
		int32_t numHosts;
		const Host *host = g_hostdb.getShard(robotsShardNum, &numHosts);
		if(host) {
			sb->safePrintf(",\n\"robotsTxtHost\": [");

			bool isFirst = true;
			while (numHosts--) {
				if (host->m_spiderEnabled) {
					if (!isFirst) {
						sb->safePrintf(", ");
					}
					sb->safePrintf("%u", host->m_hostId);
					isFirst = false;
				}
				host++;
			}

			sb->safePrintf("]");
		}
	}
	if (shardNum >= 0) {
		char ipbuf[16];
		iptoa(firstIp,ipbuf);
		sb->safePrintf(",\n\"firstIp\": \"%s\"", iptoa(firstIp, ipbuf));
	}



	if (spiderRequest) {
		sb->safePrintf(",\n\"spiderRequest\": {\n");

		char ipbuf[16];

		sb->safePrintf("\t\"firstIp\": \"%s\",\n", iptoa(spiderRequest->m_firstIp, ipbuf));
		sb->safePrintf("\t\"addedTime\": %u,\n", spiderRequest->m_addedTime);
		sb->safePrintf("\t\"priority\": %d,\n", spiderRequest->m_priority);
		sb->safePrintf("\t\"prevErrCode\": %d,\n", spiderRequest->m_prevErrCode);
		sb->safePrintf("\t\"errCount\": %d,\n", spiderRequest->m_errCount);
		sb->safePrintf("\t\"isAddUrl\": %s,\n", spiderRequest->m_isAddUrl ? "true" : "false");
		sb->safePrintf("\t\"isPageReindex\": %s,\n", spiderRequest->m_isPageReindex ? "true" : "false");
		sb->safePrintf("\t\"isUrlCanonical\": %s,\n", spiderRequest->m_isUrlCanonical ? "true" : "false");
		sb->safePrintf("\t\"isPageParser\": %s,\n", spiderRequest->m_isPageParser ? "true" : "false");
		sb->safePrintf("\t\"urlIsDocId\": %s,\n", spiderRequest->m_urlIsDocId ? "true" : "false");
		sb->safePrintf("\t\"forceDelete\": %s,\n", spiderRequest->m_forceDelete ? "true" : "false");
		sb->safePrintf("\t\"fakeFirstIp\": %s\n", spiderRequest->m_fakeFirstIp ? "true" : "false");

		sb->safePrintf("}");
	}

	if(spiderReply) {
		sb->safePrintf(",\n\"spiderReply\": {\n");

		sb->safePrintf("\t\"spideredTime\": %d,\n", spiderReply->m_spideredTime);
		sb->safePrintf("\t\"errCode\": %d,\n", spiderReply->m_errCode);
		sb->safePrintf("\t\"percentChangedPerDay\": %f,\n", spiderReply->m_percentChangedPerDay);
		sb->safePrintf("\t\"contentHash32\": %u,\n", spiderReply->m_contentHash32);
		sb->safePrintf("\t\"crawlDelayMS\": %d,\n", spiderReply->m_crawlDelayMS);
		sb->safePrintf("\t\"downloadEndTime\": %ld,\n", spiderReply->m_downloadEndTime);
		sb->safePrintf("\t\"httpStatus\": %d,\n", spiderReply->m_httpStatus);
		sb->safePrintf("\t\"errCount\": %d,\n", spiderReply->m_errCount);
		sb->safePrintf("\t\"langId\": %d,\n", spiderReply->m_langId);
		sb->safePrintf("\t\"isIndexed\": %s\n", spiderReply->m_isIndexed ? "true" : "false");

		sb->safePrintf("}\n");
	}

	sb->safePrintf("}");
}

static bool sendResult(State *st) {
	logTrace(g_conf.m_logTracePageSpiderdbLookup, "st(%p): sendResult: g_errno=%d", st, g_errno);
	// get the socket
	TcpSocket *s = st->m_socket;

	SafeBuf sb;
	// print standard header
	sb.reserve2x ( 32768 );

	if(g_errno) {
		return respondWithError(st, g_errno, mstrerror(g_errno));
	}

	int32_t shardNum = -1;
	int32_t robotsShardNum = -1;
	if(st->m_url_str[0]) {
		int64_t uh48 = hash64b(st->m_url_str);
		key128_t startKey = Spiderdb::makeFirstKey(st->m_firstip, uh48);
		shardNum = g_hostdb.getShardNum(RDB_SPIDERDB, &startKey);

		//
		// locate host that caches robots.txt
		//
		Url u;
		u.set(st->m_url_str);

		// build robots.txt url
		char urlRobots[MAX_URL_LEN+1];
		char *p = urlRobots;
		if ( ! u.getScheme() )
		{
			p += sprintf ( p , "http://" );
		}
		else
		{
			gbmemcpy ( p , u.getScheme() , u.getSchemeLen() );
			p += u.getSchemeLen();
			p += sprintf(p,"://");
		}

		gbmemcpy ( p , u.getHost() , u.getHostLen() );
		p += u.getHostLen();

		// add port if not default
		if ( u.getPort() != u.getDefaultPort() ) {
			p += sprintf( p, ":%" PRId32, u.getPort() );
		}
		p += sprintf ( p , "/robots.txt" );

		// find host based on firstip and robots.txt url
		int32_t nh     = g_hostdb.getNumHosts();
		robotsShardNum = hash32h(((uint32_t)st->m_firstip >> 8), 0) % nh;
		if((uint32_t)st->m_firstip >> 8 == 0) {
			// If the first IP is not set for the request then we don't
			// want to hammer the first host with spidering enabled.
			robotsShardNum = hash32n ( urlRobots ) % nh;
		}
		robotsShardNum = robotsShardNum % g_hostdb.getNumShards();
	}

	//locate spider request and reply
	const SpiderRequest *spiderRequest = NULL;
	const SpiderReply *spiderReply = NULL;
	if(st->m_rdbList.getListSize()>0) {
		logTrace(g_conf.m_logTracePageSpiderdbLookup, "st(%p): sendResult: st->m_rdbList.getListSize()=%d", st, st->m_rdbList.getListSize());
		for(st->m_rdbList.resetListPtr(); !st->m_rdbList.isExhausted(); st->m_rdbList.skipCurrentRecord()) {
			const char *currentRec = st->m_rdbList.getCurrentRec();
			logHexTrace(g_conf.m_logTracePageSpiderdbLookup, currentRec, st->m_rdbList.getCurrentRecSize(), "st(%p): ", st);
			if (KEYNEG(currentRec)) {
				continue; //skip negative records (which should even be there)
			}

			if (Spiderdb::isSpiderRequest((const key128_t *)currentRec)) {
				logTrace(g_conf.m_logTracePageSpiderdbLookup, "it's a request");
				spiderRequest = reinterpret_cast<const SpiderRequest*>(currentRec);
			} else  {
				logTrace(g_conf.m_logTracePageSpiderdbLookup, "it's a reply");
				spiderReply = reinterpret_cast<const SpiderReply*>(currentRec);
			}
		}
	}

	const char *contentType = NULL;
	switch(st->m_r.getReplyFormat()) {
		case FORMAT_HTML:
			g_pages.printAdminTop(&sb, s, &st->m_r, NULL);
			generatePageHtml(shardNum, st->m_firstip, robotsShardNum, st->m_url_str, spiderRequest, spiderReply, &sb);
			g_pages.printAdminBottom2(&sb);
			contentType = "text/html";
			break;
		case FORMAT_JSON:
			generatePageJSON(shardNum, st->m_firstip, robotsShardNum, spiderRequest, spiderReply, &sb);
			contentType = "application/json";
			break;
		default:
			contentType = "text/html";
			sb.safePrintf("oops!");
			break;

	}

	// don't forget to cleanup
	mdelete(st, sizeof(State) , "pgspdrdblookup");
	delete st;

	// now encapsulate it in html head/tail and send it off
	return g_httpServer.sendDynamicPage (s, sb.getBufStart(), sb.length(), -1, false, contentType);
}



static const char *formatTime(time_t when, char buf[32]) {
	struct tm t;
	gmtime_r(&when, &t);
	strftime(buf,32,"%Y-%m-%dT%H:%M:%SZ",&t);
	return buf;
}

static const char *formatTimeMs(int64_t when, char buf[32]) {
	time_t when_secs = when/1000;
	struct tm t;
	gmtime_r(&when_secs, &t);
	strftime(buf,32,"%Y-%m-%dT%H:%M:%S",&t);
	sprintf(strchr(buf,'\0'),".%03dZ",(int)(when%1000));
	return buf;
}