2016-09-23 11:05:22 +02:00
# include <gtest/gtest.h>
2016-02-18 17:10:18 +01:00
# include "Summary.h"
# include "HttpMime.h" // CT_HTML
# include <cstdio>
# include "Xml.h"
# include "Words.h"
# include "Phrases.h"
# include "Sections.h"
# include "Pos.h"
# include "Query.h"
# include "Url.h"
# include "Matches.h"
# include "Linkdb.h"
# include "Title.h"
# define MAX_BUF_SIZE 1024
# define HTML_FORMAT "<html><head>%s< / head><body>%s< / body>< / html>"
2016-05-24 16:07:46 +02:00
static void generateSummary ( Summary & summary , char * htmlInput , const char * queryStr , const char * urlStr ) {
2016-02-18 17:10:18 +01:00
Xml xml ;
2016-09-23 00:54:04 +02:00
ASSERT_TRUE ( xml . set ( htmlInput , strlen ( htmlInput ) , 0 , CT_HTML ) ) ;
2016-02-18 17:10:18 +01:00
Words words ;
ASSERT_TRUE ( words . set ( & xml , true ) ) ;
Bits bits ;
2016-09-23 01:15:54 +02:00
ASSERT_TRUE ( bits . set ( & words ) ) ;
2016-02-18 17:10:18 +01:00
Url url ;
url . set ( urlStr ) ;
Sections sections ;
2016-09-23 02:30:19 +02:00
ASSERT_TRUE ( sections . set ( & words , & bits , & url , " " , CT_HTML ) ) ;
2016-02-18 17:10:18 +01:00
Query query ;
2016-12-22 12:29:49 +01:00
ASSERT_TRUE ( query . set2 ( queryStr , langEnglish , true , true ) ) ;
2016-02-18 17:10:18 +01:00
LinkInfo linkInfo ;
2016-02-18 22:18:42 +01:00
memset ( & linkInfo , 0 , sizeof ( LinkInfo ) ) ;
linkInfo . m_lisize = sizeof ( LinkInfo ) ;
2016-02-18 17:10:18 +01:00
Title title ;
2016-09-23 02:30:19 +02:00
ASSERT_TRUE ( title . setTitle ( & xml , & words , 80 , & query , & linkInfo , & url , NULL , 0 , CT_HTML , langEnglish ) ) ;
2016-02-18 17:10:18 +01:00
Pos pos ;
ASSERT_TRUE ( pos . set ( & words ) ) ;
Bits bitsForSummary ;
ASSERT_TRUE ( bitsForSummary . setForSummary ( & words ) ) ;
2016-02-23 14:35:40 +01:00
Phrases phrases ;
2016-09-23 02:40:50 +02:00
ASSERT_TRUE ( phrases . set ( & words , & bits ) ) ;
2016-02-23 14:35:40 +01:00
2016-02-18 17:10:18 +01:00
Matches matches ;
matches . setQuery ( & query ) ;
2016-09-23 03:02:09 +02:00
ASSERT_TRUE ( matches . set ( & words , & phrases , & sections , & bitsForSummary , & pos , & xml , & title , & url , & linkInfo ) ) ;
2016-02-18 17:10:18 +01:00
2016-03-01 14:49:03 +01:00
summary . setSummary ( & xml , & words , & sections , & pos , & query , 180 , 3 , 3 , 180 , & url , & matches , title . getTitle ( ) , title . getTitleLen ( ) ) ;
2016-02-18 17:10:18 +01:00
}
2016-05-24 13:07:59 +02:00
TEST ( SummaryTest , StripSamePunct ) {
2016-02-18 17:10:18 +01:00
const char * body =
" <pre> "
2016-02-18 22:18:42 +01:00
" --------------------------------------------------------------------------------- \n "
" | Name | Total Donations | \n "
" --------------------------------------------------------------------------------- \n "
" | JENNI STANLEY | $10.00 | \n "
" --------------------------------------------------------------------------------- \n "
" | CANDRA BUDGE | $22.00 | \n "
" --------------------------------------------------------------------------------- \n "
" | JESSE NICLEY | $34.00 | \n "
" --------------------------------------------------------------------------------- \n "
" | SHARON YOKLEY | $45.00 | \n "
" --------------------------------------------------------------------------------- \n "
2016-02-18 17:10:18 +01:00
" </pre> " ;
char input [ MAX_BUF_SIZE ] ;
std : : sprintf ( input , HTML_FORMAT , " " , body ) ;
2016-02-18 22:18:42 +01:00
Summary summary ;
2016-02-18 17:10:18 +01:00
generateSummary ( summary , input , " jesse budge " , " http://www.example.com/ " ) ;
2016-05-13 11:31:28 +02:00
EXPECT_STREQ ( " CANDRA BUDGE | $22.00 | … | JESSE NICLEY | $34.00 … " , summary . getSummary ( ) ) ;
2016-02-18 17:10:18 +01:00
}
2016-05-24 13:07:59 +02:00
2016-11-11 16:48:25 +01:00
TEST ( SummaryTest , DISABLED_BUGNoEllipsisAdded ) {
2016-05-24 13:07:59 +02:00
const char * head =
" <title>Instrument prices by Acme Inc.</title> \n "
" <meta name= \" description \" content= \" Unorthodox musical instrument value estimation \" > \n " ;
const char * body =
" <h1>Unusual saxophone valuation</h1> \n "
" <p>Looking for knowing how much your saxophone is worth and what an appropriate insurance should be?. We provide that and other relevant information such as procedures, locations and time tables</p> \n "
" <p>We also provide valuation for other musical instrucments.</p> \n " ;
char input [ MAX_BUF_SIZE ] ;
std : : sprintf ( input , HTML_FORMAT , head , body ) ;
Summary summary ;
generateSummary ( summary , input , " saxophone " , " http://www.example.com/ " ) ;
2016-06-01 11:10:06 +02:00
/// @todo ALC we're not adding ellipsis here due to lack of space. we should take one less word instead and add ellipsis.
2016-11-11 16:48:25 +01:00
EXPECT_STREQ ( " Unusual saxophone valuation. Looking for knowing how much your saxophone is worth and what an appropriate insurance should be?. We provide that and other relevant information … " , summary . getSummary ( ) ) ;
2016-05-24 16:07:46 +02:00
}
2016-11-11 16:48:25 +01:00
TEST ( SummaryTest , DISABLED_BUGEllipsisAdded ) {
2016-05-24 16:07:46 +02:00
const char * body = " Giraffe on rollerblades. Penguin on skateboard. The giraffe is way faster than that plumb bird with pathetic wings. \n " ;
char input [ MAX_BUF_SIZE ] ;
std : : sprintf ( input , " %s " , body ) ;
Summary summary ;
generateSummary ( summary , input , " giraffe " , " http://www.example.com/ " ) ;
2016-06-01 11:10:06 +02:00
/// @todo ALC we're adding ellipsis even with a full sentence.
2016-11-11 16:48:25 +01:00
EXPECT_STREQ ( " Giraffe on rollerblades. Penguin on skateboard. The giraffe is way faster than that plumb bird with pathetic wings. " , summary . getSummary ( ) ) ;
2016-06-09 17:06:29 +02:00
}
TEST ( SummaryTest , DefaultSummary ) {
const char * head = " <qtitle>f1 doc</qtitle> " ;
const char * body = " <p>cucumber</p> \n "
2016-06-09 18:09:05 +02:00
" <a href= \" f3.html \" >snegl</a> \n "
2016-06-09 17:06:29 +02:00
" snegl \n " ;
char input [ MAX_BUF_SIZE ] ;
std : : sprintf ( input , HTML_FORMAT , head , body ) ;
Summary summary ;
generateSummary ( summary , input , " banana " , " http://www.example.com/ " ) ;
EXPECT_STREQ ( " cucumber. snegl snegl " , summary . getSummary ( ) ) ;
2016-09-23 00:54:04 +02:00
}