much more since we are mostly dynamically allocating, only a few smaller arrays use the 3000 on the stack.
287 lines
8.5 KiB
C++
287 lines
8.5 KiB
C++
// Matt Wells, copyright Jul 2002
|
|
|
|
// . all disk accesses should use this global class to record their stats
|
|
// . this class can dump a gif graph to disk in the html directory with
|
|
// the filename "Stats.gif"
|
|
// . you may add stats to this class, but do not remove them as they may
|
|
// be necessary for Statsdb.
|
|
|
|
#ifndef _STATS_H_
|
|
#define _STATS_H_
|
|
|
|
#include "SafeBuf.h"
|
|
#include "UdpProtocol.h" // MAX_MSG_TYPES
|
|
//#include "IndexReadInfo.h"
|
|
|
|
class StatPoint {
|
|
public:
|
|
int32_t m_numBytes ;
|
|
int64_t m_startTime ;
|
|
int64_t m_endTime ;
|
|
int m_color ;
|
|
char m_rdbId ;
|
|
char m_type ;
|
|
};
|
|
|
|
#define MAX_POINTS 6000
|
|
#define MAX_WIDTH 6
|
|
//#define DY 1000 // pixels vertical
|
|
#define DY 500 // pixels vertical
|
|
#define DX 1000 // pixels across
|
|
#define DT (10*1000) // time window, 10 seconds
|
|
#define MAX_LINES (DY / (MAX_WIDTH+1)) // leave free pixel above each line
|
|
|
|
#define STAT_GENERIC 0
|
|
#define STAT_QUERY 1
|
|
#define MAX_BUCKETS 16
|
|
|
|
class Stats {
|
|
|
|
public:
|
|
|
|
// just clear our points array when we're born
|
|
Stats ( ) ;
|
|
|
|
// we take lower 3 bytes of "color" for the rgb color of this line/pt
|
|
void addStat_r ( int32_t numBytes ,
|
|
int64_t startTime ,
|
|
int64_t endTime ,
|
|
int32_t color = 0x00000000 , // black
|
|
char type = STAT_GENERIC,
|
|
char *fnName = NULL);
|
|
|
|
// . use -1 to see points from ALL times stored
|
|
// . dumps a bar graph
|
|
// . each bar represents a stat in time, from inception to completion
|
|
// . useful for seeing possible sources of contention
|
|
//void dumpGIF ( int64_t startTime = -1 , int64_t endTime = -1 );
|
|
|
|
|
|
void printGraphInHtml ( SafeBuf &sb );
|
|
|
|
// this graphs:
|
|
// 1. stats per second
|
|
// 2. avg time of completion
|
|
// 3. st dev highway around avg
|
|
void dumpPerSecGIF ( char type ,
|
|
int64_t start = -1 ,
|
|
int64_t end = -1 );
|
|
|
|
// store the points here
|
|
StatPoint m_pts [ MAX_POINTS ];
|
|
int32_t m_next;
|
|
|
|
// don't graph any points that start BEFORE this time because our
|
|
// sampling is not accurate because we had to throw points away
|
|
// due to the MAX_POINTS limit
|
|
//int64_t m_minWindowStartTime;
|
|
|
|
// . conglomerate scores
|
|
// . # seeks is estimated since we don't know the disk fragmentation
|
|
//int64_t m_numSeeks;
|
|
// sum of these 2 should equal m_numSeeks
|
|
//int64_t m_numReads;
|
|
//int64_t m_numWrites;
|
|
// total bytes read and written since server was started
|
|
//int64_t m_bytesRead;
|
|
//int64_t m_bytesWritten;
|
|
void addPoint ( StatPoint **points ,
|
|
int32_t *numPoints ,
|
|
StatPoint *p ) ;
|
|
|
|
|
|
|
|
//queries
|
|
void logAvgQueryTime(int64_t startTime);
|
|
void calcQueryStats();
|
|
|
|
void clearMsgStats();
|
|
|
|
int64_t m_startTime;
|
|
int64_t m_upTime;
|
|
int64_t m_lastQueryLogTime;
|
|
int64_t m_queryTimes;
|
|
float m_avgQueriesPerSec;
|
|
int32_t m_numQueries;
|
|
int32_t m_numSuccess;
|
|
int32_t m_numFails;
|
|
int32_t m_totalNumQueries;
|
|
int32_t m_totalNumSuccess;
|
|
int32_t m_totalNumFails;
|
|
float m_avgQueryTime;
|
|
float m_successRate;
|
|
//int64_t m_readSignals;
|
|
//int64_t m_writeSignals;
|
|
|
|
// set in BigFile.cpp
|
|
int32_t m_slowDiskReads;
|
|
|
|
// when we have to close a socket because too many are open.. count it
|
|
int32_t m_closedSockets;
|
|
|
|
// keep track of last 1000 urls spidered for reporting spider stats
|
|
void addSpiderPoint ( int32_t errCode, bool isNew ) ;
|
|
int32_t m_spiderSample;
|
|
int32_t m_spiderErrors;
|
|
int32_t m_spiderNew;
|
|
int32_t m_spiderErrorsNew;
|
|
int32_t m_errCodes[1000];
|
|
char m_isSampleNew[1000];
|
|
int64_t m_totalSpiderSuccessNew;
|
|
int64_t m_totalSpiderErrorsNew;
|
|
int64_t m_totalSpiderSuccessOld;
|
|
int64_t m_totalSpiderErrorsOld;
|
|
int64_t m_allErrorsNew[65536];
|
|
int64_t m_allErrorsOld[65536];
|
|
|
|
time_t m_uptimeStart;
|
|
|
|
// Deduped stats
|
|
int32_t m_msg3aRecallCnt;
|
|
// when we have to re-requrest docid lists for each split, inc this one
|
|
int32_t m_msg3aSlowRecalls;
|
|
// when we just request more docids from the same tier
|
|
int32_t m_msg3aFastRecalls;
|
|
// how many resolutions did we get on each tier
|
|
//int32_t m_tierHits [MAX_TIERS];
|
|
//int64_t m_tierTimes[MAX_TIERS];
|
|
// how many searches did not get enough results?
|
|
int32_t m_tier2Misses;
|
|
// one count for each CR_* defined in Msg51.h
|
|
int32_t m_filterStats[30];
|
|
//int32_t m_totalDedupCand;
|
|
//int32_t m_dedupedCand;
|
|
//int32_t m_bannedDups;
|
|
//int32_t m_bigHackDups;
|
|
//int32_t m_summaryDups;
|
|
//int32_t m_contentDups;
|
|
//int32_t m_clusteredTier1;
|
|
//int32_t m_clusteredTier2;
|
|
//int32_t m_errored;
|
|
int32_t m_msg3aRecalls[6];
|
|
SafeBuf m_keyCols;
|
|
//int32_t m_numTermsVsTier[14][MAX_TIERS];
|
|
//int32_t m_termsVsTierExp[14][MAX_TIERS][7];
|
|
|
|
// use m_start so we know what msg stats to clear with memset
|
|
char m_start;
|
|
// and stats for how long to send a request or reply from
|
|
// start to finish. the first "2" is the niceness, 0 or 1, and
|
|
// the second "2" is 0 if sending a reply and 1 if sending a request.
|
|
int64_t m_msgTotalOfSendTimes [MAX_MSG_TYPES][2][2];
|
|
int64_t m_msgTotalSent [MAX_MSG_TYPES][2][2];
|
|
int64_t m_msgTotalSentByTime [MAX_MSG_TYPES][2][2][MAX_BUCKETS];
|
|
// how long we wait after receiving the request until handler is called
|
|
int64_t m_msgTotalOfQueuedTimes [MAX_MSG_TYPES][2];
|
|
int64_t m_msgTotalQueued [MAX_MSG_TYPES][2];
|
|
int64_t m_msgTotalQueuedByTime [MAX_MSG_TYPES][2][MAX_BUCKETS];
|
|
// msg stats, from UdpServer.cpp and UdpSlot.cpp. all times in
|
|
// milliseconds. the second index is the niceness, 0 or 1. Buckets
|
|
// are for breaking down times by the time range: 0-1ms, 2-3ms,
|
|
// 4-7ms, 8-15ms, ...
|
|
int64_t m_msgTotalOfHandlerTimes [MAX_MSG_TYPES][2];
|
|
int64_t m_msgTotalHandlersCalled [MAX_MSG_TYPES][2];
|
|
int64_t m_msgTotalHandlersByTime [MAX_MSG_TYPES][2][MAX_BUCKETS];
|
|
|
|
int32_t m_packetsIn [MAX_MSG_TYPES][2];
|
|
int32_t m_packetsOut [MAX_MSG_TYPES][2];
|
|
int32_t m_acksIn [MAX_MSG_TYPES][2];
|
|
int32_t m_acksOut [MAX_MSG_TYPES][2];
|
|
int32_t m_reroutes [MAX_MSG_TYPES][2];
|
|
int32_t m_errors [MAX_MSG_TYPES][2];
|
|
int32_t m_timeouts [MAX_MSG_TYPES][2]; // specific error
|
|
int32_t m_nomem [MAX_MSG_TYPES][2]; // specific error
|
|
int32_t m_dropped [MAX_MSG_TYPES][2]; // dropped dgram
|
|
int32_t m_cancelRead [MAX_MSG_TYPES][2]; // dropped dgram
|
|
|
|
int32_t m_parsingInconsistencies;
|
|
|
|
int32_t m_totalOverflows;
|
|
|
|
// count ip and domain hammer for Msg13.cpp here
|
|
//int32_t m_numBackoffs;
|
|
|
|
// used by msg39
|
|
int32_t m_recomputeCacheMissess;
|
|
// if the msg3a advances to the next tier, of course, it will be
|
|
// a cache miss, so don't count those, they are justified
|
|
// recomputeCacheMisses
|
|
int32_t m_icacheTierJumps;
|
|
|
|
// when SpiderLoop calls SpiderCache::getSpiderRec() how many times
|
|
// does it actually get back a url to spider? and how many times does
|
|
// it miss (not get a url to spider)?
|
|
//int32_t m_spiderUrlsHit;
|
|
//int32_t m_spiderUrlsMissed;
|
|
|
|
int32_t m_compressedBytesIn;
|
|
int32_t m_uncompressedBytesIn;
|
|
|
|
|
|
// spider compression proxy stats set in Msg13.cpp
|
|
int64_t m_compressAllDocs;
|
|
int64_t m_compressAllBytesIn;
|
|
int64_t m_compressAllBytesOut;
|
|
|
|
int64_t m_compressMimeErrorDocs;
|
|
int64_t m_compressMimeErrorBytesIn;
|
|
int64_t m_compressMimeErrorBytesOut;
|
|
|
|
int64_t m_compressUnchangedDocs;
|
|
int64_t m_compressUnchangedBytesIn;
|
|
int64_t m_compressUnchangedBytesOut;
|
|
|
|
int64_t m_compressBadContentDocs;
|
|
int64_t m_compressBadContentBytesIn;
|
|
int64_t m_compressBadContentBytesOut;
|
|
|
|
int64_t m_compressBadLangDocs;
|
|
int64_t m_compressBadLangBytesIn;
|
|
int64_t m_compressBadLangBytesOut;
|
|
|
|
int64_t m_compressBadCharsetDocs;
|
|
int64_t m_compressBadCharsetBytesIn;
|
|
int64_t m_compressBadCharsetBytesOut;
|
|
|
|
int64_t m_compressBadCTypeDocs;
|
|
int64_t m_compressBadCTypeBytesIn;
|
|
int64_t m_compressBadCTypeBytesOut;
|
|
|
|
int64_t m_compressHasIframeDocs;
|
|
int64_t m_compressHasIframeBytesIn;
|
|
int64_t m_compressHasIframeBytesOut;
|
|
|
|
int64_t m_compressPlainLinkDocs;
|
|
int64_t m_compressPlainLinkBytesIn;
|
|
int64_t m_compressPlainLinkBytesOut;
|
|
|
|
int64_t m_compressEmptyLinkDocs;
|
|
int64_t m_compressEmptyLinkBytesIn;
|
|
int64_t m_compressEmptyLinkBytesOut;
|
|
|
|
int64_t m_compressFullPageDocs;
|
|
int64_t m_compressFullPageBytesIn;
|
|
int64_t m_compressFullPageBytesOut;
|
|
|
|
int64_t m_compressHasDateDocs;
|
|
int64_t m_compressHasDateBytesIn;
|
|
int64_t m_compressHasDateBytesOut;
|
|
|
|
int64_t m_compressRobotsTxtDocs;
|
|
int64_t m_compressRobotsTxtBytesIn;
|
|
int64_t m_compressRobotsTxtBytesOut;
|
|
|
|
int64_t m_compressUnknownTypeDocs;
|
|
int64_t m_compressUnknownTypeBytesIn;
|
|
int64_t m_compressUnknownTypeBytesOut;
|
|
|
|
// use m_end so we know what msg stats to clear with memset
|
|
char m_end;
|
|
|
|
//bool m_gotLock;
|
|
};
|
|
|
|
extern class Stats g_stats;
|
|
|
|
#endif
|