Injection script fixes.
Temporary fix for core when injecting large warc.
This commit is contained in:
2
Makefile
2
Makefile
@ -776,4 +776,4 @@ install-pkgs-local:
|
||||
warcinjector:
|
||||
-rm -r /home/zak/.pex/build/inject-*
|
||||
-rm -r /home/zak/.pex/install/inject-*
|
||||
pex -r requests -r sqlite3 -r pyopenssl -r ndg-httpsclient -r pyasn1 -r multiprocessing -e inject.inject:main -o script/warc-inject -s '/home/zak/repos/open-source-search-engine/script/' --no-wheel
|
||||
pex -r requests -r pyopenssl -r ndg-httpsclient -r pyasn1 -r multiprocessing -e inject.inject:main -o script/warc-inject -s '/home/zak/repos/open-source-search-engine/script/' --no-wheel
|
||||
|
@ -3482,6 +3482,11 @@ bool XmlDoc::indexWarcOrArc ( char ctype ) {
|
||||
if ( ctype == CT_WARC ) {
|
||||
// find "WARC/1.0" or whatever
|
||||
char *whp = m_fptr;
|
||||
if( ! whp ) {
|
||||
// FIXME: shouldn't get here with a NULL
|
||||
log("build: No buffer for file=%s", file->getFilename());
|
||||
goto warcDone;
|
||||
}
|
||||
// we do terminate last warc rec with \0 so be aware of that...
|
||||
int32_t maxCount = 10;
|
||||
for ( ; *whp && strncmp(whp,"WARC/",5) && --maxCount>0; whp++);
|
||||
|
@ -9,6 +9,7 @@ import subprocess
|
||||
import multiprocessing
|
||||
import sqlite3
|
||||
import datetime
|
||||
import sys
|
||||
|
||||
#Generate environment with:
|
||||
#pex -r requests -r multiprocessing -e inject:main -o warc-inject -s '.' --no-wheel
|
||||
@ -44,7 +45,7 @@ def injectItem(item, c):
|
||||
'metadata':json.dumps(itemMetadata),
|
||||
'c':'ait'}
|
||||
print "sending", postVars,' to gb'
|
||||
if False:
|
||||
if True:
|
||||
rp = requests.post("http://localhost:8000/admin/inject", postVars)
|
||||
statusCode = rp.status_code
|
||||
print postVars['url'], rp.status_code
|
||||
@ -77,28 +78,31 @@ def getPage(page):
|
||||
|
||||
|
||||
def main():
|
||||
getPage(4)
|
||||
# from multiprocessing.pool import ThreadPool
|
||||
# pool = ThreadPool(processes=5)
|
||||
# print pool.map(getPage, xrange(1,1200))
|
||||
print 'arguments were', sys.argv
|
||||
if len(sys.argv) == 2:
|
||||
if sys.argv[1] == 'init':
|
||||
init()
|
||||
print 'initialized'
|
||||
return sys.exit(0)
|
||||
if sys.argv[1] == 'reset':
|
||||
import os
|
||||
os.unlink('items.db')
|
||||
init()
|
||||
return sys.exit(0)
|
||||
else:
|
||||
#getPage(4)
|
||||
from multiprocessing.pool import ThreadPool
|
||||
pool = ThreadPool(processes=10)
|
||||
print pool.map(getPage, xrange(1,1300))
|
||||
|
||||
|
||||
def init():
|
||||
db = sqlite3.connect('items.db', detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
|
||||
c = db.cursor()
|
||||
db.execute('''CREATE TABLE items
|
||||
c.execute('''CREATE TABLE items
|
||||
(item text, file text, updated timestamp, status integer)''')
|
||||
|
||||
|
||||
db.commit()
|
||||
db.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if len(sys.argv) == 2:
|
||||
if sys.argv[1] == 'init':
|
||||
init()
|
||||
if sys.argv[1] == 'reset':
|
||||
import os
|
||||
os.unlink('items.db')
|
||||
init()
|
||||
else:
|
||||
main()
|
||||
main()
|
||||
|
Binary file not shown.
Reference in New Issue
Block a user