mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-15 02:36:08 -04:00
Remove IndexList.h
This commit is contained in:
10
Images.cpp
10
Images.cpp
@ -469,22 +469,16 @@ void Images::gotTermList ( ) {
|
||||
if ( g_errno ) return;
|
||||
// check docids in termlist
|
||||
m_list.resetListPtr();
|
||||
#if 0
|
||||
IndexList doesnt know how to deal with modern posdb format, so the check below gives undefined results
|
||||
|
||||
// loop over it
|
||||
for ( ; ! m_list.isExhausted() ; m_list.skipCurrentRecord() ) {
|
||||
// get the first rec
|
||||
int64_t d = m_list.getCurrentDocId();
|
||||
// note it
|
||||
//log("dup: image is dupped");
|
||||
// is it us? if so ignore it
|
||||
if ( d == m_docId ) continue;
|
||||
if ( Posdb::getDocId(m_list.getCurrentRec()) == m_docId ) continue;
|
||||
// crap, i guess our image url is not unique. mark it off.
|
||||
m_errors[i] = EDOCDUP;
|
||||
// no need to go further
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
3
Images.h
3
Images.h
@ -5,7 +5,6 @@
|
||||
|
||||
#include "Msg0.h"
|
||||
#include "Msg13.h"
|
||||
#include "IndexList.h"
|
||||
#include "MsgC.h"
|
||||
#include "SafeBuf.h"
|
||||
#include "GbFormat.h" // FORMAT_HTML
|
||||
@ -143,7 +142,7 @@ class Images {
|
||||
collnum_t m_collnum;
|
||||
|
||||
int64_t m_docId;
|
||||
IndexList m_list;
|
||||
RdbList m_list;
|
||||
|
||||
int32_t m_latestIp;
|
||||
MsgC m_msgc;
|
||||
|
50
IndexList.h
50
IndexList.h
@ -1,50 +0,0 @@
|
||||
// Matt Wells, copyright Jun 2001
|
||||
|
||||
// . IndexList is a list of keys
|
||||
// . some keys are 12 bytes, some are 6 bytes (compressed)
|
||||
// . see Indexdb.h for format of the keys
|
||||
|
||||
// . you can set from a TitleRec/SiteRec pair
|
||||
// . you can set from a TermTable
|
||||
// . we use this class to generate the final indexList for a parsed document
|
||||
// . we have to do some #include's in the .cpp cuz the TitleRec contains an
|
||||
// IndexList for holding indexed link Text
|
||||
// . the TermTable has an addIndexList() function
|
||||
// . we map 32bit scores from a TermTable to 8bits scores by taking the log
|
||||
// of the score if it's >= 128*256, otherwise, we keep it as is
|
||||
|
||||
|
||||
// override all funcs in RdbList where m_useShortKeys is true...
|
||||
// skipCurrentRec() etc need to use m_useHalfKeys in RdbList cuz
|
||||
// that is needed by many generic routines, like merge_r, RdbMap, Msg1, Msg22..
|
||||
// We would need to make it a virtual function which would slow things down...
|
||||
// or make those classes have specialized functions for IndexLists... in
|
||||
// addition to the RdbLists they already support
|
||||
|
||||
#ifndef GB_INDEXLIST_H
|
||||
#define GB_INDEXLIST_H
|
||||
|
||||
#include "RdbList.h"
|
||||
#include "Titledb.h"
|
||||
|
||||
class IndexList : public RdbList {
|
||||
|
||||
public:
|
||||
|
||||
// these 2 assume 12 and 6 byte keys respectively
|
||||
int64_t getCurrentDocId () {
|
||||
if ( isHalfBitOn ( m_listPtr ) ) return getDocId6 (m_listPtr);
|
||||
else return getDocId12(m_listPtr);
|
||||
}
|
||||
int64_t getDocId12 ( char *rec ) {
|
||||
return ((*(uint64_t *)(rec)) >> 2) & DOCID_MASK; }
|
||||
int64_t getDocId6 ( char *rec ) {
|
||||
int64_t docid;
|
||||
*(int32_t *)(&docid) = *(int32_t *)(void*)rec;
|
||||
((char *)&docid)[4] = rec[4];
|
||||
docid >>= 2;
|
||||
return docid & DOCID_MASK;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // GB_INDEXLIST_H
|
Reference in New Issue
Block a user