forked from Mirrors/privacore-open-source-search-engine
		
	
		
			
				
	
	
		
			315 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			315 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
#include "MsgC.h"
 | 
						|
#include "Process.h"
 | 
						|
#include "UdpServer.h"
 | 
						|
#include "Dns.h"
 | 
						|
#include "Conf.h"
 | 
						|
#include "Hostdb.h"
 | 
						|
#include "ip.h"
 | 
						|
#include "Mem.h"
 | 
						|
#include "Errno.h"
 | 
						|
#include "Log.h"
 | 
						|
#include "utf8_fast.h"
 | 
						|
#include "hash.h"
 | 
						|
 | 
						|
 | 
						|
MsgC::MsgC ( ) {
 | 
						|
	memset(m_request, 0, sizeof(m_request));
 | 
						|
	m_ipPtr = NULL;
 | 
						|
	m_callback = NULL;
 | 
						|
	m_msge1 = NULL;
 | 
						|
	m_msge1State = 0;
 | 
						|
}
 | 
						|
 | 
						|
MsgC::~MsgC ( ) {
 | 
						|
}
 | 
						|
 | 
						|
static void gotReplyWrapper ( void *state , void *state2 ) ;
 | 
						|
static void handleRequest   ( UdpSlot *slot , int32_t niceness  ) ;
 | 
						|
static void gotMsgCIpWrapper( void *state, int32_t ip);
 | 
						|
	
 | 
						|
bool MsgC::registerHandler ( ) {
 | 
						|
	// . register ourselves with the high priority udp server
 | 
						|
	// . it calls our callback when it receives a msg of type 0x0c
 | 
						|
	if ( ! g_udpServer.registerHandler ( msg_type_c, handleRequest ))
 | 
						|
		return false;
 | 
						|
	return true;
 | 
						|
}
 | 
						|
 | 
						|
// returns false if blocked, true otherwise
 | 
						|
bool MsgC::getIp(const char *hostname, int32_t hostnameLen, int32_t *ip, void *state,
 | 
						|
                 void (*callback)(void *state, int32_t ip), int32_t niceness) {
 | 
						|
	m_mcast.reset();
 | 
						|
	m_callback=callback;
 | 
						|
	m_ipPtr = ip;
 | 
						|
	// sanity check
 | 
						|
	if ( ! m_ipPtr ) { g_process.shutdownAbort(true); }
 | 
						|
	// First check if g_dns has it. This function is a part of the 
 | 
						|
	// g_dns.getIp() function, except that we do not lookup the ip in
 | 
						|
	// the dns server directly after not finding it in the cache.
 | 
						|
	if ( !hostname || hostnameLen <= 0 ) {
 | 
						|
		log(LOG_LOGIC,"dns: msgc: Asked to get IP of zero length "
 | 
						|
		    "hostname.");
 | 
						|
		*ip = 0;
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
	// . don't accept large hostnames
 | 
						|
	// . technically the limit is 255 but i'm stricter
 | 
						|
	if ( hostnameLen >= 254 ) {
 | 
						|
		g_errno = EHOSTNAMETOOBIG;
 | 
						|
		log("dns: msgc: Asked to get IP of hostname over 253 "
 | 
						|
			   "characters long.");
 | 
						|
		*ip = 0;
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
	// debug
 | 
						|
	//char c = hostname[hostnameLen];
 | 
						|
	//if ( c != 0 ) hostname[hostnameLen] = 0;
 | 
						|
	log(LOG_DEBUG,"dns: msgc: getting ip for [%s]", hostname);
 | 
						|
	    
 | 
						|
	    
 | 
						|
	//if ( c != 0 ) hostname[hostnameLen] = c;
 | 
						|
	// if url is already in a.b.c.d format return that
 | 
						|
	if ( is_digit(hostname[0]) ) {
 | 
						|
		*ip = atoip ( hostname , hostnameLen );
 | 
						|
		// somebody put a http://3.0/ link in here
 | 
						|
		//if ( *ip >= 1 && *ip < 10 )
 | 
						|
		//	log("dns: hostname had ip %s",iptoa(*ip));
 | 
						|
		//if ( *ip == 3 ) { g_process.shutdownAbort(true); }
 | 
						|
		if ( *ip != 0 ) return true;
 | 
						|
	}
 | 
						|
	
 | 
						|
	// key is hash of the hostname
 | 
						|
	key96_t key = Dns::getKey ( hostname , hostnameLen );
 | 
						|
	
 | 
						|
	// is it in the /etc/hosts file?
 | 
						|
	if ( g_conf.m_useEtcHosts && g_dns.isInFile ( key , ip ))return 1;
 | 
						|
		
 | 
						|
		
 | 
						|
	// . try getting from the cache first
 | 
						|
	// . this returns true if was in the cache and sets *ip to the ip
 | 
						|
	// . ip is set to 0 for non-existent domains, and -1 if there was
 | 
						|
	//   a dns timed out error getting it the last time. these will be
 | 
						|
	//   cached for about a day.
 | 
						|
	if ( g_dns.isInCache ( key , ip ) ) {
 | 
						|
		if ( *ip == 3 ) { g_process.shutdownAbort(true); }
 | 
						|
		// debug msg
 | 
						|
		//log(LOG_DEBUG, "dns::getIp: %s (key=%" PRIu64") has ip=%s in cache!!!",
 | 
						|
		//     tmp,key.n0,iptoa(*ip));
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
 | 
						|
 | 
						|
	// So its not in the local dns cache, so lets look in the 
 | 
						|
	// s_localDnsCache. For that I need to pass the url
 | 
						|
	m_u.set( hostname, hostnameLen );
 | 
						|
	/*
 | 
						|
	*ip=g_spiderCache.getLocalIp(&m_u);
 | 
						|
	if (*ip !=0) {
 | 
						|
		//Lets add to the local dns 
 | 
						|
		g_dns.addToCache(key,*ip);
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
	*/
 | 
						|
	//c = hostname[hostnameLen];
 | 
						|
	//if ( c != 0 ) hostname[hostnameLen] = 0;
 | 
						|
 | 
						|
	//if ( c != 0 ) hostname[hostnameLen] = c;
 | 
						|
	// Ok, so now it is not in dns cache or in spider cache, so lets send
 | 
						|
	// the dns lookup request 
 | 
						|
	// to a host in the cluster based on the hash of the hostname DIV'd
 | 
						|
	// with the number of hosts in the cluster, g_hostdb.m_numHosts. The
 | 
						|
	// request should be looked up in a new RdbCache (not Dns.cpp's local
 | 
						|
	// cache) once delivered to the responsible host.
 | 
						|
	// We have to send hostname, and the size is hostnameLen
 | 
						|
	strncpy(m_request,hostname,hostnameLen);	
 | 
						|
	int32_t requestSize = hostnameLen;
 | 
						|
	// null end it
 | 
						|
	m_request[requestSize]='\0';
 | 
						|
	requestSize++;
 | 
						|
	//uint32_t groupNum=key.n0 % g_hostdb.m_numShards;
 | 
						|
	//uint32_t groupId=g_hostdb.getGroupId(groupNum);
 | 
						|
 | 
						|
	// with the new iframe tag expansion logic in Msg13.cpp, the
 | 
						|
	// spider proxy will create a newXmlDoc to do that and will call
 | 
						|
	// MsgC to lookup ips.. so we do not want to send a msgc request
 | 
						|
	// to ourselves, so just call the dns directly
 | 
						|
	if ( g_hostdb.m_myHost->m_isProxy ) {
 | 
						|
		if ( g_dns.getIp( hostname, 
 | 
						|
				  hostnameLen, 
 | 
						|
				  ip, 
 | 
						|
				  state,
 | 
						|
				  callback)) 
 | 
						|
			return true;
 | 
						|
		// ok, we blocked, call callback when done
 | 
						|
		return false;
 | 
						|
	}
 | 
						|
 | 
						|
	// there was logic for getting ip from a proxy here
 | 
						|
	// removed in commit bab9e9da06a8edeb8a7677c2e90f72766f6ba782 as it was never used
 | 
						|
 | 
						|
	// get a hostid that should house this ip in its local cache
 | 
						|
	Host *host = g_dns.getIPLookupHost(key);
 | 
						|
 | 
						|
	if ( g_conf.m_logDebugDns ) {
 | 
						|
		int32_t fip = 0;
 | 
						|
		if ( host ) fip = host->m_ip;
 | 
						|
		char ipbuf[16];
 | 
						|
		log(LOG_DEBUG,"dns: msgc: multicasting for ip for %s to %s.",
 | 
						|
		    hostname, iptoa(fip,ipbuf));
 | 
						|
	}
 | 
						|
 | 
						|
	// it should have set g_errno to EDEADHOST if this happens
 | 
						|
	if ( ! host ) {
 | 
						|
		log("dns: please add primary dns to spider proxy gb.conf");
 | 
						|
		if ( ! g_errno ) { g_process.shutdownAbort(true); }
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
 | 
						|
	int32_t          firstHostId = host->m_hostId;
 | 
						|
	// the handling server will timeout its dns algorithm and send us
 | 
						|
	// back an EDNSTIMEDOUT error, so we do not need to have any timeout
 | 
						|
	// here unless we are niceness 0, which we need in case the handling
 | 
						|
	// servers goes down, we do not want to wait for it and would rather
 | 
						|
	// call the callback with an EUDPTIMEDOUT error after 60 seconds.
 | 
						|
	int64_t timeout = (niceness==0) ? multicast_msg1c_getip_default_timeout : multicast_infinite_send_timeout;
 | 
						|
 | 
						|
	// key is useless for us
 | 
						|
	if (!m_mcast.send(m_request, requestSize, msg_type_c, false, host->m_shardNum, false, 0, this, state, gotReplyWrapper, timeout, niceness, firstHostId, false)) {
 | 
						|
		//did not block, error
 | 
						|
		log(LOG_DEBUG,"dns: msgc: mcast had error: %s",
 | 
						|
		    mstrerror(g_errno));
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
	//Should always block, unless error
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
void gotReplyWrapper ( void *state , void *state2 ) {
 | 
						|
	MsgC *THIS = (MsgC *)state;
 | 
						|
	// this can set g_errno. sets to ETRYAGAIN if checksum is wrong
 | 
						|
	// so XmlDoc.cpp should try again! maybe later...
 | 
						|
	int32_t ip = THIS->gotReply();
 | 
						|
	// debug
 | 
						|
	if ( g_conf.m_logDebugDns ) {
 | 
						|
		char ipbuf[16];
 | 
						|
		logf(LOG_DEBUG,"dns: msgc: got reply of %s for %s. "
 | 
						|
		     "state=%p mcast=%p",
 | 
						|
		     iptoa(*THIS->m_ipPtr,ipbuf), THIS->m_u.getUrl(), state2,
 | 
						|
		     &THIS->m_mcast);
 | 
						|
	}
 | 
						|
	THIS->m_callback(state2,ip);
 | 
						|
}
 | 
						|
 | 
						|
int32_t MsgC::gotReply(){
 | 
						|
	int32_t replySize,maxSize;
 | 
						|
	bool freeIt;
 | 
						|
	char *reply = m_mcast.getBestReply (&replySize, &maxSize, &freeIt);
 | 
						|
	*m_ipPtr = 0;
 | 
						|
	int32_t ip2 = 0;
 | 
						|
 | 
						|
	// sanity check
 | 
						|
	if (replySize != 12 || !reply ){
 | 
						|
		g_errno = EBADREPLYSIZE;
 | 
						|
		log( "dns: msgc: Bad reply size of %p",
 | 
						|
		     reply );
 | 
						|
	}
 | 
						|
        else {
 | 
						|
		*m_ipPtr = *(int32_t *)reply;
 | 
						|
		// repeated ip
 | 
						|
		ip2 = *(int32_t *)(reply + 4);
 | 
						|
		// an actual checksum
 | 
						|
		//int32_t crc = *(int32_t *)(reply + 8);
 | 
						|
	}
 | 
						|
	// debug
 | 
						|
	char ipbuf[16];
 | 
						|
	log(LOG_DEBUG,"dns: msgc: got reply of %s for %s.",
 | 
						|
	    iptoa(*m_ipPtr,ipbuf), m_u.getUrl());
 | 
						|
	// test checkusm
 | 
						|
	if ( *m_ipPtr != ip2 ) {
 | 
						|
		log("dns: ip checksum is incorrect. %" PRIu32" != %" PRIu32". "
 | 
						|
		    "setting to -1.",  *m_ipPtr,ip2);
 | 
						|
		g_errno = ETRYAGAIN;
 | 
						|
		*m_ipPtr = -1;
 | 
						|
		return -1;
 | 
						|
	}
 | 
						|
 | 
						|
	// . if we have to free the buffer
 | 
						|
	// . if freeIt is false that maeans we own the reply buffer
 | 
						|
	if (!freeIt) {
 | 
						|
		//log (LOG_DEBUG,"msgC: Multicast asked to free buffer");
 | 
						|
		mfree(reply,maxSize,"MulticastMsgC");
 | 
						|
	}
 | 
						|
	// sanity check
 | 
						|
	if ( (uint32_t)*m_ipPtr <= 255 &&
 | 
						|
	     (uint32_t)*m_ipPtr >  0      ) {
 | 
						|
		log("dns: msgc: got msgc ip reply of %" PRIu32" for %s. wtf? trying "
 | 
						|
		    "again.", *m_ipPtr,m_u.getUrl());
 | 
						|
		g_errno = ETRYAGAIN;
 | 
						|
		*m_ipPtr = 0;
 | 
						|
		//g_process.shutdownAbort(true); }
 | 
						|
	}
 | 
						|
	// . don't add to cache if there was an error.
 | 
						|
	// . at this level, these are multicast errors, not dns errors
 | 
						|
	if(g_errno) return *m_ipPtr;
 | 
						|
 | 
						|
	// Now we can add stuff to the local dns and spider cache.
 | 
						|
	key96_t key = Dns::getKey ( m_u.getHost(),m_u.getHostLen() );
 | 
						|
	// just cache for hour locally since ttl may not have been that high
 | 
						|
	// as given to us from the authoratative name server. 
 | 
						|
	// TODO: return the ttl as well.
 | 
						|
       	g_dns.addToCache(key,*m_ipPtr,60*60*24);
 | 
						|
	// and tell the spider the ip of this url so it can do its IP based
 | 
						|
	// throttling.
 | 
						|
	//g_spiderCache.addLocalIp(&m_u,*m_ipPtr);
 | 
						|
	return *m_ipPtr;
 | 
						|
}
 | 
						|
 | 
						|
// . only return false if you want slot to be nuked w/o replying
 | 
						|
// . MUST always call g_udpServer::sendReply() or sendErrorReply()
 | 
						|
void handleRequest(UdpSlot *slot, int32_t /*niceness*/) {
 | 
						|
	// get the request, should be the hostname
 | 
						|
	char *hostname = slot->m_readBuf;
 | 
						|
 | 
						|
	// do not include the \0 at the end in the length
 | 
						|
	int32_t  hostnameLen = slot->m_readBufSize - 1;
 | 
						|
 | 
						|
	int32_t ip=0;
 | 
						|
 | 
						|
	log(LOG_DEBUG,"dns: msgc: handle request called for %s state=%p", hostname,slot);
 | 
						|
 | 
						|
	// check dns cache for the hostname. This should also send to
 | 
						|
	// the dnsServer. If it is not in the cache, getIp puts it in.
 | 
						|
	if (g_dns.getIp(hostname, hostnameLen, &ip, slot, gotMsgCIpWrapper)) {
 | 
						|
		gotMsgCIpWrapper(slot, ip);
 | 
						|
	}
 | 
						|
	return;
 | 
						|
}
 | 
						|
	
 | 
						|
 | 
						|
 | 
						|
void gotMsgCIpWrapper( void *state, int32_t ip){
 | 
						|
	UdpSlot *slot=(UdpSlot *) state;
 | 
						|
 | 
						|
	log(LOG_DEBUG,"dns: msgc sending reply for state=%p.",state);
 | 
						|
 | 
						|
	//to fit the ip address
 | 
						|
	//char reply[12];
 | 
						|
	// don't put it on the stack because sendReply does not copy!
 | 
						|
	char *reply = slot->m_shortSendBuffer;
 | 
						|
	int32_t replySize=12;
 | 
						|
#if SHORTSENDBUFFERSIZE < 12
 | 
						|
#error Slot::m_shortSendBuffer must be at least 12 bytes
 | 
						|
#endif
 | 
						|
	//	reply=(char*) mmalloc(replySize,"MsgC");
 | 
						|
	char *p = reply;
 | 
						|
	*(int32_t *)p = ip; p += 4;
 | 
						|
	// repeat it as a checksum
 | 
						|
	*(int32_t *)p = ip; p += 4;
 | 
						|
	// an actual checksum
 | 
						|
	*(int32_t *)p = hash32h ( ip , 0 ); p += 4;
 | 
						|
 | 
						|
	g_udpServer.sendReply(reply, replySize, NULL, 0, slot);
 | 
						|
 | 
						|
	return;
 | 
						|
}
 |