privacore-open-source-searc.../MsgC.cpp
Ivan Skytte Jørgensen beeddcf35d Got rid of gb-include.h
2018-07-26 17:29:51 +02:00

315 lines
9.7 KiB
C++

#include "MsgC.h"
#include "Process.h"
#include "UdpServer.h"
#include "Dns.h"
#include "Conf.h"
#include "Hostdb.h"
#include "ip.h"
#include "Mem.h"
#include "Errno.h"
#include "Log.h"
#include "utf8_fast.h"
#include "hash.h"
MsgC::MsgC ( ) {
memset(m_request, 0, sizeof(m_request));
m_ipPtr = NULL;
m_callback = NULL;
m_msge1 = NULL;
m_msge1State = 0;
}
MsgC::~MsgC ( ) {
}
static void gotReplyWrapper ( void *state , void *state2 ) ;
static void handleRequest ( UdpSlot *slot , int32_t niceness ) ;
static void gotMsgCIpWrapper( void *state, int32_t ip);
bool MsgC::registerHandler ( ) {
// . register ourselves with the high priority udp server
// . it calls our callback when it receives a msg of type 0x0c
if ( ! g_udpServer.registerHandler ( msg_type_c, handleRequest ))
return false;
return true;
}
// returns false if blocked, true otherwise
bool MsgC::getIp(const char *hostname, int32_t hostnameLen, int32_t *ip, void *state,
void (*callback)(void *state, int32_t ip), int32_t niceness) {
m_mcast.reset();
m_callback=callback;
m_ipPtr = ip;
// sanity check
if ( ! m_ipPtr ) { g_process.shutdownAbort(true); }
// First check if g_dns has it. This function is a part of the
// g_dns.getIp() function, except that we do not lookup the ip in
// the dns server directly after not finding it in the cache.
if ( !hostname || hostnameLen <= 0 ) {
log(LOG_LOGIC,"dns: msgc: Asked to get IP of zero length "
"hostname.");
*ip = 0;
return true;
}
// . don't accept large hostnames
// . technically the limit is 255 but i'm stricter
if ( hostnameLen >= 254 ) {
g_errno = EHOSTNAMETOOBIG;
log("dns: msgc: Asked to get IP of hostname over 253 "
"characters long.");
*ip = 0;
return true;
}
// debug
//char c = hostname[hostnameLen];
//if ( c != 0 ) hostname[hostnameLen] = 0;
log(LOG_DEBUG,"dns: msgc: getting ip for [%s]", hostname);
//if ( c != 0 ) hostname[hostnameLen] = c;
// if url is already in a.b.c.d format return that
if ( is_digit(hostname[0]) ) {
*ip = atoip ( hostname , hostnameLen );
// somebody put a http://3.0/ link in here
//if ( *ip >= 1 && *ip < 10 )
// log("dns: hostname had ip %s",iptoa(*ip));
//if ( *ip == 3 ) { g_process.shutdownAbort(true); }
if ( *ip != 0 ) return true;
}
// key is hash of the hostname
key96_t key = Dns::getKey ( hostname , hostnameLen );
// is it in the /etc/hosts file?
if ( g_conf.m_useEtcHosts && g_dns.isInFile ( key , ip ))return 1;
// . try getting from the cache first
// . this returns true if was in the cache and sets *ip to the ip
// . ip is set to 0 for non-existent domains, and -1 if there was
// a dns timed out error getting it the last time. these will be
// cached for about a day.
if ( g_dns.isInCache ( key , ip ) ) {
if ( *ip == 3 ) { g_process.shutdownAbort(true); }
// debug msg
//log(LOG_DEBUG, "dns::getIp: %s (key=%" PRIu64") has ip=%s in cache!!!",
// tmp,key.n0,iptoa(*ip));
return true;
}
// So its not in the local dns cache, so lets look in the
// s_localDnsCache. For that I need to pass the url
m_u.set( hostname, hostnameLen );
/*
*ip=g_spiderCache.getLocalIp(&m_u);
if (*ip !=0) {
//Lets add to the local dns
g_dns.addToCache(key,*ip);
return true;
}
*/
//c = hostname[hostnameLen];
//if ( c != 0 ) hostname[hostnameLen] = 0;
//if ( c != 0 ) hostname[hostnameLen] = c;
// Ok, so now it is not in dns cache or in spider cache, so lets send
// the dns lookup request
// to a host in the cluster based on the hash of the hostname DIV'd
// with the number of hosts in the cluster, g_hostdb.m_numHosts. The
// request should be looked up in a new RdbCache (not Dns.cpp's local
// cache) once delivered to the responsible host.
// We have to send hostname, and the size is hostnameLen
strncpy(m_request,hostname,hostnameLen);
int32_t requestSize = hostnameLen;
// null end it
m_request[requestSize]='\0';
requestSize++;
//uint32_t groupNum=key.n0 % g_hostdb.m_numShards;
//uint32_t groupId=g_hostdb.getGroupId(groupNum);
// with the new iframe tag expansion logic in Msg13.cpp, the
// spider proxy will create a newXmlDoc to do that and will call
// MsgC to lookup ips.. so we do not want to send a msgc request
// to ourselves, so just call the dns directly
if ( g_hostdb.m_myHost->m_isProxy ) {
if ( g_dns.getIp( hostname,
hostnameLen,
ip,
state,
callback))
return true;
// ok, we blocked, call callback when done
return false;
}
// there was logic for getting ip from a proxy here
// removed in commit bab9e9da06a8edeb8a7677c2e90f72766f6ba782 as it was never used
// get a hostid that should house this ip in its local cache
Host *host = g_dns.getIPLookupHost(key);
if ( g_conf.m_logDebugDns ) {
int32_t fip = 0;
if ( host ) fip = host->m_ip;
char ipbuf[16];
log(LOG_DEBUG,"dns: msgc: multicasting for ip for %s to %s.",
hostname, iptoa(fip,ipbuf));
}
// it should have set g_errno to EDEADHOST if this happens
if ( ! host ) {
log("dns: please add primary dns to spider proxy gb.conf");
if ( ! g_errno ) { g_process.shutdownAbort(true); }
return true;
}
int32_t firstHostId = host->m_hostId;
// the handling server will timeout its dns algorithm and send us
// back an EDNSTIMEDOUT error, so we do not need to have any timeout
// here unless we are niceness 0, which we need in case the handling
// servers goes down, we do not want to wait for it and would rather
// call the callback with an EUDPTIMEDOUT error after 60 seconds.
int64_t timeout = (niceness==0) ? multicast_msg1c_getip_default_timeout : multicast_infinite_send_timeout;
// key is useless for us
if (!m_mcast.send(m_request, requestSize, msg_type_c, false, host->m_shardNum, false, 0, this, state, gotReplyWrapper, timeout, niceness, firstHostId, false)) {
//did not block, error
log(LOG_DEBUG,"dns: msgc: mcast had error: %s",
mstrerror(g_errno));
return true;
}
//Should always block, unless error
return false;
}
void gotReplyWrapper ( void *state , void *state2 ) {
MsgC *THIS = (MsgC *)state;
// this can set g_errno. sets to ETRYAGAIN if checksum is wrong
// so XmlDoc.cpp should try again! maybe later...
int32_t ip = THIS->gotReply();
// debug
if ( g_conf.m_logDebugDns ) {
char ipbuf[16];
logf(LOG_DEBUG,"dns: msgc: got reply of %s for %s. "
"state=%p mcast=%p",
iptoa(*THIS->m_ipPtr,ipbuf), THIS->m_u.getUrl(), state2,
&THIS->m_mcast);
}
THIS->m_callback(state2,ip);
}
int32_t MsgC::gotReply(){
int32_t replySize,maxSize;
bool freeIt;
char *reply = m_mcast.getBestReply (&replySize, &maxSize, &freeIt);
*m_ipPtr = 0;
int32_t ip2 = 0;
// sanity check
if (replySize != 12 || !reply ){
g_errno = EBADREPLYSIZE;
log( "dns: msgc: Bad reply size of %p",
reply );
}
else {
*m_ipPtr = *(int32_t *)reply;
// repeated ip
ip2 = *(int32_t *)(reply + 4);
// an actual checksum
//int32_t crc = *(int32_t *)(reply + 8);
}
// debug
char ipbuf[16];
log(LOG_DEBUG,"dns: msgc: got reply of %s for %s.",
iptoa(*m_ipPtr,ipbuf), m_u.getUrl());
// test checkusm
if ( *m_ipPtr != ip2 ) {
log("dns: ip checksum is incorrect. %" PRIu32" != %" PRIu32". "
"setting to -1.", *m_ipPtr,ip2);
g_errno = ETRYAGAIN;
*m_ipPtr = -1;
return -1;
}
// . if we have to free the buffer
// . if freeIt is false that maeans we own the reply buffer
if (!freeIt) {
//log (LOG_DEBUG,"msgC: Multicast asked to free buffer");
mfree(reply,maxSize,"MulticastMsgC");
}
// sanity check
if ( (uint32_t)*m_ipPtr <= 255 &&
(uint32_t)*m_ipPtr > 0 ) {
log("dns: msgc: got msgc ip reply of %" PRIu32" for %s. wtf? trying "
"again.", *m_ipPtr,m_u.getUrl());
g_errno = ETRYAGAIN;
*m_ipPtr = 0;
//g_process.shutdownAbort(true); }
}
// . don't add to cache if there was an error.
// . at this level, these are multicast errors, not dns errors
if(g_errno) return *m_ipPtr;
// Now we can add stuff to the local dns and spider cache.
key96_t key = Dns::getKey ( m_u.getHost(),m_u.getHostLen() );
// just cache for hour locally since ttl may not have been that high
// as given to us from the authoratative name server.
// TODO: return the ttl as well.
g_dns.addToCache(key,*m_ipPtr,60*60*24);
// and tell the spider the ip of this url so it can do its IP based
// throttling.
//g_spiderCache.addLocalIp(&m_u,*m_ipPtr);
return *m_ipPtr;
}
// . only return false if you want slot to be nuked w/o replying
// . MUST always call g_udpServer::sendReply() or sendErrorReply()
void handleRequest(UdpSlot *slot, int32_t /*niceness*/) {
// get the request, should be the hostname
char *hostname = slot->m_readBuf;
// do not include the \0 at the end in the length
int32_t hostnameLen = slot->m_readBufSize - 1;
int32_t ip=0;
log(LOG_DEBUG,"dns: msgc: handle request called for %s state=%p", hostname,slot);
// check dns cache for the hostname. This should also send to
// the dnsServer. If it is not in the cache, getIp puts it in.
if (g_dns.getIp(hostname, hostnameLen, &ip, slot, gotMsgCIpWrapper)) {
gotMsgCIpWrapper(slot, ip);
}
return;
}
void gotMsgCIpWrapper( void *state, int32_t ip){
UdpSlot *slot=(UdpSlot *) state;
log(LOG_DEBUG,"dns: msgc sending reply for state=%p.",state);
//to fit the ip address
//char reply[12];
// don't put it on the stack because sendReply does not copy!
char *reply = slot->m_shortSendBuffer;
int32_t replySize=12;
#if SHORTSENDBUFFERSIZE < 12
#error Slot::m_shortSendBuffer must be at least 12 bytes
#endif
// reply=(char*) mmalloc(replySize,"MsgC");
char *p = reply;
*(int32_t *)p = ip; p += 4;
// repeat it as a checksum
*(int32_t *)p = ip; p += 4;
// an actual checksum
*(int32_t *)p = hash32h ( ip , 0 ); p += 4;
g_udpServer.sendReply(reply, replySize, NULL, 0, slot);
return;
}