Fix title for PDF files & add some simple tests for it

2025-07-11 02:16:07 -04:00 · 2015-12-01 12:38:51 +01:00
parent cb12d07632
commit 0884edf08e
15 changed files with 962 additions and 676 deletions
--- a/Matches.cpp
+++ b/Matches.cpp
@ -344,10 +344,10 @@ bool Matches::set ( XmlDoc   *xd         ,
 			    niceness ) )
 		return false;

-	// also use the title from the title tag, because sometimes 
-	// it does not equal "tt->getTitle()"
-	int32_t  a     = tt->m_titleTagStart;
-	int32_t  b     = tt->m_titleTagEnd;
+	// also use the title from the title tag, because sometimes it does not equal "tt->getTitle()"
+	int32_t  a     = tt->getTitleTagStart();
+	int32_t  b     = tt->getTitleTagEnd();
+
 	char *start = NULL;
 	char *end   = NULL;
 	if ( a >= 0 && b >= 0 && b>a ) {
--- a/PageCrawlBot.cpp
+++ b/PageCrawlBot.cpp
@ -2738,7 +2738,7 @@ bool printCrawlDetails2 (SafeBuf *sb , CollectionRec *cx , char format ) {

 	if ( format == FORMAT_JSON ) {
 		sb->safePrintf("{"
-			       "\"response:{\n"
+			       "\"response\":{\n"
 			       "\t\"statusCode\":%"INT32",\n"
 			       "\t\"statusMsg\":\"%s\",\n"
 			       "\t\"jobCreationTimeUTC\":%"INT32",\n"
@ -2758,7 +2758,7 @@ bool printCrawlDetails2 (SafeBuf *sb , CollectionRec *cx , char format ) {
 			       );
 		sb->safePrintf("\t\"currentTime\":%"UINT32",\n",
 			       (uint32_t)getTimeGlobal() );
-		sb->safePrintf("\t\"currentTimeUTC\":%"UINT32",\n",
+		sb->safePrintf("\t\"currentTimeUTC\":%"UINT32"\n",
 			       (uint32_t)getTimeGlobal() );
 		sb->safePrintf("\t}\n");
 		sb->safePrintf("}\n");
--- a/Pos.cpp
+++ b/Pos.cpp
@ -32,83 +32,65 @@ int32_t Pos::filter( char *p, char *pend, class Words *words, int32_t a, int32_t
 // . returns false and sets g_errno on error
 // . if f is non-NULL store filtered words into there. back to back spaces
 //   are eliminated.
-bool Pos::set (Words  *words  ,
-		char   *f   ,
-		char   *fend,
-		int32_t   *len ,
-		int32_t    a   ,
-		int32_t    b   ,
-		char   *buf ,
-		int32_t    bufSize ) {
-
+bool Pos::set (Words *words, char *f, char *fend, int32_t *len , int32_t a , int32_t b, char *buf, int32_t bufSize ) {
 	// free m_buf in case this is a second call
-	if ( ! f ) reset();
+	if ( ! f ) {
+		reset();
+	}

-	int32_t        nw    = words->getNumWords();
-	int32_t       *wlens = words->m_wordLens;
-	nodeid_t   *tids  = words->getTagIds(); // m_tagIds;
-	char      **wp    = words->m_words;
-	//int32_t       *ss    = NULL;
-	//int64_t  *wids  = words->m_wordIds;
-	//if ( scores ) ss  = scores->m_scores;
+	int32_t nw = words->getNumWords();
+	int32_t *wlens = words->m_wordLens;
+	nodeid_t *tids = words->getTagIds(); // m_tagIds;
+	char **wp = words->m_words;

 	// save start point for filtering
 	char *fstart = f;

 	// -1 is the default value
-	if ( b == -1 ) b = nw;
+	if ( b == -1 ) {
+		b = nw;
+	}

 	// alloc array if need to
 	int32_t need = (nw+1) * 4;

 	// do not destroy m_pos/m_numWords if only filtering into a buffer
-	if ( f ) goto skip;
+	if ( !f ) {
+		m_needsFree = false;

-	m_needsFree = false;
-
-	m_buf = m_localBuf;
-	if ( need > POS_LOCALBUFSIZE && need < bufSize ) 
-		m_buf = buf;
-	else if ( need > POS_LOCALBUFSIZE ) {
-		m_buf = (char *)mmalloc(need,"Pos");
-		m_needsFree = true;
+		m_buf = m_localBuf;
+		if ( need > POS_LOCALBUFSIZE && need < bufSize )
+			m_buf = buf;
+		else if ( need > POS_LOCALBUFSIZE ) {
+			m_buf = (char *)mmalloc(need,"Pos");
+			m_needsFree = true;
+		}
+		// bail on error
+		if ( ! m_buf ) return false;
+		m_bufSize = need;
+		m_pos      = (int32_t *)m_buf;
+		m_numWords = nw;
 	}
-	// bail on error
-	if ( ! m_buf ) return false;
-	m_bufSize = need;
-	m_pos      = (int32_t *)m_buf;
-	m_numWords = nw;

- skip:
 	// this is the CHARACTER count. 
 	int32_t pos = 0;
 	bool trunc = false;
 	char *p , *pend;
-	//char *nextp;
-	//int32_t  skip;

 	char* lastBreak = NULL;
-	// utf8 char
-	//int32_t c;
-	// its size in bytes
-	//char cs;
-
-	// int16_tcut
-	//Section **sp = NULL;
-	//if ( sections ) sp = sections->m_sectionPtrs;
-
-	//int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_MARQUEE;

 	// flag for stopping back-to-back spaces. only count those as one char.
 	bool lastSpace = false;
- 	int32_t maxCharSize = 4; // we are utf8 
+	int32_t maxCharSize = 4; // we are utf8
 	for ( int32_t i = a ; i < b ; i++ ) {
-		if (trunc) break;
-		// set pos for the ith word to "pos"
-		if ( ! f ) m_pos[i] = pos;
+		if (trunc) {
+			break;
+		}

-		// if inside a bad tag, skip it
-		//if ( sp && (sp[i]->m_flags & badFlags) ) continue;
+		// set pos for the ith word to "pos"
+		if ( ! f ) {
+			m_pos[i] = pos;
+		}

 		// is tag?
 		if ( tids && tids[i] ) {
@ -130,35 +112,44 @@ bool Pos::set (Words  *words  ,
 			}
 			// if had a previous breaking tag and no non-tag
 			// word after it, do not count back-to-back spaces
-			if ( lastSpace ) continue;
+			if ( lastSpace ) {
+				continue;
+			}
+
 			// if had a br tag count it as a '.'
-			if ( tids[i] ) { // == 20 ) { // <br> 
+			if ( tids[i] ) { // <br>
 				// are we filtering?
 				if ( f && f != fstart ) {
 					if ((fend-f>2*maxCharSize)) {
 						*f++ = '.';
 						*f++ = ' ';
+					} else {
+						trunc = true;
 					}
-					else trunc = true;
 				}
-				// count as double periods
-				//pos += 3;
+
 				// no, just single period.
 				pos += 2;
 				lastSpace = true;
+
 				continue;
 			}
+
 			// are we filtering?
 			if ( f ) {
 				if ((fend-f > maxCharSize)) {
 					*f++ = ' ';
+				} else {
+					trunc = true;
 				}
-				else trunc = true;
 			}
+
 			// count as a single space
 			pos++;
+
 			// do not allow back-to-back spaces
 			lastSpace = true;
+
 			continue;
 		}
 		
@ -171,52 +162,71 @@ bool Pos::set (Words  *words  ,
 		for ( ; p < pend ; p += cs ) {
 			// get size
 			cs = getUtf8CharSize(p);
+
 			// do not count space if one before
 			if ( is_wspace_utf8 (p) ) {
-				if ( lastSpace ) continue;
+				if ( lastSpace ) {
+					continue;
+				}
+
 				lastSpace = true;
+
 				// are we filtering?
 				if ( f ) {
 					if (fend-f > 1 ) {
 						lastBreak = f;
 						*f++ = ' ';
+					} else {
+						trunc = true;
 					}
-					else trunc = true;
 				}
+
 				pos++;
 				continue;
 			}
 			if ( f ) {
-				if (fend-f > cs){
+				if (fend-f > cs) {
 					// change '|' to commas
-					if ( *p == '|' )
+					if ( *p == '|' ) {
 						*f++ = ',';
-					else if ( cs == 1 )
+					} else if ( cs == 1 ) {
 						*f++ = *p;
-					else {
+					} else {
 						gbmemcpy(f,p,cs);
 						f += cs;
 					}
 				}
-				else trunc = true;
+				else {
+					trunc = true;
+				}
 			}

 			pos++; 
 			lastSpace = false;
 		}
 	}
+
 	if (trunc) {
 		if(lastBreak == NULL) {
 			*len = 0;
 			return false;
 		}
- 		else if(f) f = lastBreak;
+		else if (f) {
+			f = lastBreak;
+		}
 	}
+
 	// set pos for the END of the last word here (used in Summary.cpp)
-	if ( ! f ) m_pos[nw] = pos;
-	// NULL terminate f
-	else { *len = f - fstart; }
-	if ( fend-f > maxCharSize) { *f = '\0';}
+	if ( ! f ) {
+		m_pos[nw] = pos;
+	} else { // NULL terminate f
+		*len = f - fstart;
+	}
+
+	if ( fend-f > maxCharSize) {
+		*f = '\0';
+	}
+
 	// Success
 	return true;
 }
--- a/Pos.h
+++ b/Pos.h
@ -38,8 +38,6 @@ class Pos {
 		      int32_t          a     =  0 ,
 		      int32_t          b     = -1 );

-	int32_t getMemUsed () { return m_bufSize; };
-
 	// . the position in CHARACTERS of word i is given by m_pos[i]
 	// . this is NOT the byte position. you can have 2, 3 or even 4
 	//   byte characters in utf8. the purpose here is for counting 
--- a/Title.cpp
+++ b/Title.cpp
--- a/Title.h
+++ b/Title.h
@ -3,10 +3,16 @@
 #ifndef _TITLE_H_
 #define _TITLE_H_

-#include "Query.h"
+#include <stdint.h>

 #define TITLE_LOCAL_SIZE 2048

+// forward declaration
+class XmlDoc;
+class Xml;
+class Words;
+class Query;
+
 class Title {
 public:
 	Title();
@ -14,6 +20,7 @@ public:

 	void reset();

+	/// @todo correct comments
 	// . set m_title to the title of the document represented by "xd"
 	// . if getHardTitle is true will always use the title in the <title>
 	//   tag, but if that is not present, will try dmoz titles before
@ -30,42 +37,45 @@ public:
 	// . does not consult words with scores of 0 (unless a meta tag)
 	// . maxTitleChars is in chars (a utf16 char is 2 bytes or more)
 	// . maxTitleChars of -1 means no max
-	bool setTitle (class XmlDoc   *xd            ,
-			class Xml      *xml           ,
-			class Words    *words         ,
-			int32_t            maxTitleChars ,
-			int32_t            maxTitleWords ,
-			Query          *q ,
-			int32_t niceness );
+	bool setTitle(XmlDoc *xd, Xml *xml, Words *words, int32_t maxTitleChars, Query *q, int32_t niceness);

+	char *getTitle() {
+		return m_title;
+	}

-	char *getTitle     ( ) { return m_title; }
-	int32_t  getTitleLen ( ) { return m_titleBytes; } // does NOT include \0
+	// does NOT include \0
+	int32_t getTitleLen() {
+		return m_titleBytes;
+	}

+	int32_t getTitleTagStart() {
+		return m_titleTagStart;
+	}

-	bool copyTitle (class Words *words, int32_t  t0, int32_t  t1 );
+	int32_t getTitleTagEnd() {
+		return m_titleTagEnd;
+	}

-	float getSimilarity ( Words  *w1 , int32_t i0 , int32_t i1 , Words  *w2 , int32_t t0 , int32_t t1 );
+	bool copyTitle(Words *words, int32_t t0, int32_t t1);

-	char *m_title;
-	int32_t  m_titleBytes; // in bytes. does NOT include \0
-	int32_t  m_titleAllocSize;
-	char  m_localBuf [ TITLE_LOCAL_SIZE ];
-	char  m_niceness;
-
-	int32_t  m_maxTitleChars;
-
-	int32_t m_titleTagStart ;
-	int32_t m_titleTagEnd   ;
+	float getSimilarity(Words *w1, int32_t i0, int32_t i1, Words  *w2, int32_t t0, int32_t t1);

 private:

-	bool setTitle4 (class XmlDoc *xd,
-	                class Xml *xml,
-	                class Words *words,
-	                int32_t maxTitleChars,
-	                int32_t maxTitleWords,
-	                Query *q);
+	char *m_title;
+	int32_t m_titleBytes; // in bytes. does NOT include \0
+	int32_t m_titleAllocSize;
+	char m_localBuf[TITLE_LOCAL_SIZE];
+	char m_niceness;
+
+	int32_t  m_maxTitleChars;
+
+	int32_t m_titleTagStart;
+	int32_t m_titleTagEnd;
+
+
+
+	bool setTitle4(XmlDoc *xd, Xml *xml, Words *words, int32_t maxTitleChars, int32_t maxTitleWords, Query *q);

 };

--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -3081,7 +3081,7 @@ bool XmlDoc::indexDoc2 ( ) {
 	//if ( m_forceDelete  ) m_decCount = true;

 	// fix for the exact quota bug found on eurekster collection. bug 229
-	// if we're not a new doc, then don't increment the count because 
+	// if we're not a new doc, then don't increment the count because
 	// we have been already counted as the old doc. MDW: i added the
 	// condition that if decCount is true we need to update the count!
 	if ( *isIndexed && ! m_decCount ) return logIt();
@ -3106,12 +3106,12 @@ bool XmlDoc::indexDoc2 ( ) {
 	if ( m_msg16.m_termIdHost == 0 ) { char *xx = NULL; *xx = 0; }
 	if ( m_msg16.m_termIdDom  == 0 ) { char *xx = NULL; *xx = 0; }

-	// . Msg36 gets the correct count from disk and puts it in cache. It 
+	// . Msg36 gets the correct count from disk and puts it in cache. It
 	//   doesn't try to increment or decrement the quotas in cache, because
 	//   then it would have to be done on all twins, and also the correct
-	//   split will have to be found. 
-	// . Actually, we should only use the cache on one host to hold the 
-	//   sum of all splits. This will be the authority cache. 
+	//   split will have to be found.
+	// . Actually, we should only use the cache on one host to hold the
+	//   sum of all splits. This will be the authority cache.
 	if ( ! m_updatedCounts ) {
 		// only call this once
 		m_updatedCounts = true;
@ -9247,7 +9247,7 @@ int32_t *XmlDoc::getSummaryVector ( ) {
 	//int32_t avail = 5000;
 	//int32_t len;
 	// put title into there
-	int32_t tlen = ti->m_titleBytes - 1;
+	int32_t tlen = ti->getTitleLen() - 1;
 	//if ( len > avail ) len = avail - 10;
 	if ( tlen < 0 ) tlen = 0;

@ -9260,7 +9260,7 @@ int32_t *XmlDoc::getSummaryVector ( ) {

 	//gbmemcpy ( p , ti->m_title , len );
 	//p += len;
-	sb.safeMemcpy ( ti->m_title , tlen );
+	sb.safeMemcpy ( ti->getTitle() , tlen );
 	// space separting the title from summary
 	if ( tlen > 0 ) sb.pushChar(' ');

@ -21547,7 +21547,7 @@ int32_t *XmlDoc::getSpiderPriority ( ) {
 	return &m_priority;
 }

-bool XmlDoc::logIt ( SafeBuf *bb ) {
+bool XmlDoc::logIt (SafeBuf *bb ) {

 	// set errCode
 	int32_t errCode = m_indexCode;
@ -33774,31 +33774,47 @@ SafeBuf *XmlDoc::getHeaderTagBuf() {
 	

 Title *XmlDoc::getTitle ( ) {
-	if ( m_titleValid ) return &m_title;
+	if ( m_titleValid ) {
+		return &m_title;
+	}
+
 	// need a buncha crap
 	Xml *xml = getXml();
-	if ( ! xml || xml == (Xml *)-1 ) return (Title *)xml;
+	if ( ! xml || xml == (Xml *)-1 ) {
+		return (Title *)xml;
+	}
+
 	Words *ww = getWords();
-	if ( ! ww || ww == (Words *)-1 ) return (Title *)ww;
+	if ( ! ww || ww == (Words *)-1 ) {
+		return (Title *)ww;
+	}
+
 	Query *q = getQuery();
-	if ( ! q ) return (Title *)q;
-	CollectionRec *cr = getCollRec();
-	if ( ! cr ) return NULL;
-	int32_t titleMaxLen = cr->m_titleMaxLen;
-	if ( m_req ) titleMaxLen = m_req->m_titleMaxLen;
+	if ( ! q ) {
+		return (Title *)q;
+	}
+
+	int32_t titleMaxLen = 256;
+	if ( m_req ) {
+		titleMaxLen = m_req->m_titleMaxLen;
+	} else {
+		CollectionRec *cr = getCollRec();
+		if (cr) {
+			titleMaxLen = cr->m_titleMaxLen;
+		}
+	}
+
 	// limit for speed, some guys have a 100k word title!
-	if ( titleMaxLen > 256 ) titleMaxLen = 256;
+	if ( titleMaxLen > 256 ) {
+		titleMaxLen = 256;
+	}

 	m_titleValid = true;

-	if ( ! m_title.setTitle ( this        ,
-				  xml         ,
-				  ww          ,
-				  titleMaxLen ,
-				  0xffff      ,
-				  q           ,
-				  m_niceness  ) )
+	if ( ! m_title.setTitle( this, xml, ww, titleMaxLen, q, m_niceness) ) {
 		return NULL;
+	}
+
 	return &m_title;
 }

@ -33857,9 +33873,9 @@ Summary *XmlDoc::getSummary () {
 	m_cpuSummaryStartTime = start;

 	// make sure summary does not include title
-	char *tbuf    = ti->m_title;
+	char *tbuf    = ti->getTitle();
 	// this does not include the terminating \0
-	int32_t  tbufLen = ti->m_titleBytes;
+	int32_t  tbufLen = ti->getTitleLen();

 	// compute the summary
 	bool status;
--- a/test/system/conftest.py
+++ b/test/system/conftest.py
@ -17,7 +17,8 @@ def gb(request):

    # verify gb is running
    try:
-        requests.get('http://' + gb_config.host + ':' + gb_config.port)
+        api = gigablast.GigablastAPI(gb_config)
+        api.status()
    except requests.exceptions.ConnectionError:
        pytest.skip('Gigablast instance down')

@ -26,3 +27,14 @@ def gb(request):

    request.addfinalizer(finalize)
    return gb_config
+
+
+@pytest.fixture(scope='function')
+def gb_api(request, gb):
+    api = gigablast.GigablastAPI(gb)
+
+    def finalize():
+        api.finalize()
+
+    request.addfinalizer(finalize)
+    return api
--- a/test/system/data/office/test_word_no_properties.pdf
+++ b/test/system/data/office/test_word_no_properties.pdf
--- a/test/system/data/office/test_word_with_properties.pdf
+++ b/test/system/data/office/test_word_with_properties.pdf
--- a/test/system/gigablast.py
+++ b/test/system/gigablast.py
@ -7,17 +7,113 @@ class GigablastConfig:
        self.port = config['port']


-class GigablastSearch:
+class GigablastAPI:
+    class _HTTPStatus:
+        @staticmethod
+        def compare(status, expected_status):
+            return status[status.find('(')+1:status.find(')')] == expected_status
+
+        @staticmethod
+        def doc_force_delete():
+            return 'Doc force deleted'
+
    def __init__(self, gb_config):
-        self.config = gb_config
+        self._config = gb_config
+        self._add_urls = set()
+
+    def finalize(self):
+        # cleanup urls
+        for url in self._add_urls:
+            self.delete_url(url, True)
+
+    def _get_url(self, path):
+        return 'http://' + self._config.host + ':' + self._config.port + '/' + path
+
+    @staticmethod
+    def _apply_default_payload(payload):
+        payload.setdefault('c', 'main')
+        payload.setdefault('format', 'json')
+        payload.setdefault('showinput', '0')
+
+    def _check_http_status(self, e, expected_status):
+        # hacks to cater for inject returning invalid status line
+        if (len(e.args) == 1 and
+                type(e.args[0]) == requests.packages.urllib3.exceptions.ProtocolError and
+                len(e.args[0].args) == 2):
+            import http.client
+            if type(e.args[0].args[1]) == http.client.BadStatusLine:
+                if self._HTTPStatus.compare(str(e.args[0].args[1]), expected_status):
+                    return True
+        return False
+
+    def _add_url(self, url, payload=None):
+        if not payload:
+            payload = {}
+
+        self._apply_default_payload(payload)
+
+        payload.update({'urls': url})
+
+        response = requests.get(self._get_url('admin/addurl'), params=payload)
+
+        return response.json()
+
+    def _inject(self, url, payload=None):
+        if not payload:
+            payload = {}
+
+        self._apply_default_payload(payload)
+
+        payload.update({'url': url})
+
+        response = requests.get(self._get_url('admin/inject'), params=payload)
+
+        # inject doesn't seem to wait until document is completely indexed
+        from time import sleep
+        sleep(0.1)
+
+        return response.json()
+
+    def add_url(self, url, real_time=True):
+        self._add_urls.add(url)
+
+        if real_time:
+            return self._inject(url)['response']['statusCode'] == 0
+        else:
+            return self._add_url(url)['response']['statusCode'] == 0
+
+    def delete_url(self, url, finalizer=False):
+        if not finalizer:
+            self._add_urls.discard(url)
+
+        payload = {'deleteurl': '1'}
+
+        try:
+            self._inject(url, payload)
+        except requests.exceptions.ConnectionError as e:
+            # delete url returns invalid HTTP status line
+            return self._check_http_status(e, self._HTTPStatus.doc_force_delete())
+
+        return False

    def search(self, query, payload=None):
        if not payload:
            payload = {}

-        payload.update({'format': 'json'})
+        self._apply_default_payload(payload)
+
        payload.update({'q': query})

-        response = requests.get('http://' + self.config.host + ':' + self.config.port + '/search', params=payload)
+        response = requests.get(self._get_url('search'), params=payload)
+
+        return response.json()
+
+    def status(self, payload=None):
+        if not payload:
+            payload = {}
+
+        self._apply_default_payload(payload)
+
+        response = requests.get(self._get_url('admin/status'), params=payload)

        return response.json()
--- a/test/system/pytest.ini
+++ b/test/system/pytest.ini
@ -0,0 +1,2 @@
+[pytest]
+norecursedirs = data
--- a/test/system/requirements.txt
+++ b/test/system/requirements.txt
@ -3,3 +3,4 @@

 pytest==2.8.3
 requests==2.7.0
+pytest-localserver==0.3.4
--- a/test/system/test_index_documents.py
+++ b/test/system/test_index_documents.py
@ -0,0 +1,27 @@
+import pytest
+import os
+
+
+@pytest.mark.parametrize('file_location, url_with_file, content_type, expected_title', [
+    # file_location                     url_with_file   content_type            expected_title
+    ('test_word_no_properties.pdf',     True,           'application/pdf',      'test_word_no_properties.pdf'),
+    ('test_word_no_properties.pdf',     False,          'application/pdf',      ''),
+    ('test_word_with_properties.pdf',   True,           'application/pdf',      'Title for Microsoft Word (in title)'),
+])
+def test_index_documents_office(gb_api, httpserver, file_location, url_with_file, content_type, expected_title):
+    httpserver.serve_content(content=open('data/office/' + file_location, 'rb').read(),
+                             headers={'content-type': content_type})
+    print(httpserver.url)
+
+    # format url
+    file_url = httpserver.url + '/'
+    if url_with_file:
+        file_url += os.path.basename(file_location)
+
+    # add url
+    assert gb_api.add_url(file_url) == True
+
+    # verify result
+    result = gb_api.search('url:' + file_url)
+    assert len(result['results']) == 1
+    assert result['results'][0]['title'] == expected_title
--- a/test/system/test_search_language.py
+++ b/test/system/test_search_language.py
@ -1,5 +1,4 @@
 import pytest
-import gigablast


@pytest.mark.parametrize('query, fx_qlang, fx_blang, fx_fetld, fx_country, expected_lang', [
@ -21,11 +20,9 @@ import gigablast
    ('Smurfene',    '',         '',         '',         '',         'is'),  # wrong
    ('Smurfene',    '',         'en-US',    '',         '',         'en'),  # wrong
    ('Smurfene',    '',         'no-NO',    '',         '',         'no'),
-    ('Smurfene',    '',         '',         '',         'no',       'no')
+    ('Smurfene',    '',         '',         '',         'no',       'no'),
 ])
-def test_search_language_hint(gb, query, fx_qlang, fx_blang, fx_fetld, fx_country, expected_lang):
-    gb_search = gigablast.GigablastSearch(gb)
-
+def test_search_language_hint(gb_api, query, fx_qlang, fx_blang, fx_fetld, fx_country, expected_lang):
    payload = {}

    # add language hints
@ -34,6 +31,6 @@ def test_search_language_hint(gb, query, fx_qlang, fx_blang, fx_fetld, fx_countr
    payload.update({'fx_fetld': fx_fetld})
    payload.update({'fx_country': fx_country})

-    result = gb_search.search(query, payload)
+    result = gb_api.search(query, payload)

    assert result['queryInfo']['queryLanguageAbbr'] == expected_lang