2016-06-08 17:16:10 -04:00
/ *
* @ licstart The following is the entire license notice for the
* JavaScript code in this file .
*
2016-06-08 03:56:16 -04:00
* Copyright ( C ) 2006 - 2014 Alexander Schier , Martin Thelian , Michael Peter Christen ,
* Florian Richter , Stefan Förster , David Wieditz
*
*
* This file is part of YaCy .
*
* YaCy is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 2 of the License , or
* ( at your option ) any later version .
*
* YaCy is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with YaCy . If not , see < http : //www.gnu.org/licenses/>.
2016-06-08 17:16:10 -04:00
*
* @ licend The above is the entire license notice
* for the JavaScript code in this file .
2016-06-08 03:56:16 -04:00
* /
2016-06-06 03:20:03 -04:00
2016-11-07 21:05:51 -05:00
var AJAX _OFF = "env/grafics/empty.gif" ;
var AJAX _ON = "env/grafics/ajax.gif" ;
2008-03-30 15:51:19 -04:00
var timeout = "" ;
2017-06-29 05:25:27 -04:00
/ * *
* @ param xmlDoc { XMLDocument } the xml document to use
* @ param tagName { string } a XML tag name
* @ returns { string } the first XML tag node value with the specified tag name , if exist . Else the empty string .
* /
function getXMLTagNodeValue ( xmlDoc , tagName ) {
var nodeValue = "" ;
if ( xmlDoc != null && tagName != null ) {
var xmlElements = xmlDoc . getElementsByTagName ( tagName ) ;
if ( xmlElements != null && xmlElements . length > 0 && xmlElements [ 0 ] . firstChild != null ) {
nodeValue = xmlElements [ 0 ] . firstChild . nodeValue ;
}
}
return nodeValue ;
}
2006-02-18 07:01:52 -05:00
function handleResponse ( ) {
2010-09-30 08:50:34 -04:00
if ( http . readyState == 4 ) {
2017-06-29 05:25:27 -04:00
/* Clean the robots status */
var robotsOKspan = document . getElementById ( "robotsOK" ) ;
if ( robotsOKspan != null && robotsOKspan . firstChild ) {
robotsOKspan . removeChild ( robotsOKspan . firstChild ) ;
}
2006-02-18 07:01:52 -05:00
var response = http . responseXML ;
2007-05-06 05:52:04 -04:00
2010-09-30 08:50:34 -04:00
// get the document title
2017-06-29 05:25:27 -04:00
var doctitle = getXMLTagNodeValue ( response , "title" ) ;
2010-10-05 20:00:23 -04:00
document . getElementById ( "bookmarkTitle" ) . value = doctitle ;
2007-05-06 05:52:04 -04:00
2009-07-24 07:54:04 -04:00
// determine if crawling is allowed by the robots.txt
2017-06-29 05:25:27 -04:00
var docrobotsOK = getXMLTagNodeValue ( response , "robots" ) ;
if ( docrobotsOK == "1" ) {
2017-06-16 02:50:57 -04:00
var img = document . createElement ( "img" ) ;
2016-11-07 21:05:51 -05:00
img . setAttribute ( "src" , "env/grafics/ok.png" ) ;
2017-06-29 05:36:47 -04:00
img . setAttribute ( "title" , "Crawl is allowed by the website robots.txt rules (or no robots.txt file is provided)." ) ;
2007-05-07 10:05:49 -04:00
img . setAttribute ( "width" , "32px" ) ;
img . setAttribute ( "height" , "32px" ) ;
2010-10-05 20:00:23 -04:00
img . setAttribute ( "alt" , "robots.txt - OK" ) ;
2006-02-18 07:01:52 -05:00
robotsOKspan . appendChild ( img ) ;
2017-06-29 05:25:27 -04:00
} else if ( docrobotsOK == "0" ) {
2017-06-16 02:50:57 -04:00
var img = document . createElement ( "img" ) ;
2016-11-07 21:05:51 -05:00
img . setAttribute ( "src" , "env/grafics/bad.png" ) ;
2017-06-29 05:36:47 -04:00
img . setAttribute ( "title" , "Crawl is disallowed by the website robots.txt rules." ) ;
2007-05-07 10:05:49 -04:00
img . setAttribute ( "width" , "32px" ) ;
img . setAttribute ( "height" , "32px" ) ;
2010-10-05 20:00:23 -04:00
img . setAttribute ( "alt" , "robots.txt - Bad" ) ;
2006-02-18 07:01:52 -05:00
robotsOKspan . appendChild ( img ) ;
2010-10-05 20:00:23 -04:00
// robotsOKspan.appendChild(img);
2010-09-30 08:50:34 -04:00
} else {
2006-02-18 07:01:52 -05:00
robotsOKspan . appendChild ( document . createTextNode ( "" ) ) ;
document . getElementById ( "robotsOK" ) . innerHTML = "" ;
2007-05-06 05:52:04 -04:00
}
2010-09-30 08:50:34 -04:00
// get the sitemap URL contained in the robots.txt
2007-06-07 03:40:32 -04:00
if ( document . getElementsByName ( "sitemapURL" ) . length > 0 ) {
2017-06-29 05:25:27 -04:00
var sitemap = getXMLTagNodeValue ( response , "sitemap" ) ;
2014-06-15 16:50:04 -04:00
document . getElementsByName ( "sitemapURL" ) [ 0 ] . value = sitemap ;
if ( sitemap ) document . getElementById ( "sitemap" ) . disabled = false ;
2007-06-07 03:40:32 -04:00
}
2017-06-29 05:25:27 -04:00
var sitelist = getXMLTagNodeValue ( response , "sitelist" ) ;
2017-06-16 02:50:57 -04:00
document . getElementById ( "sitelistURLs" ) . innerHTML = sitelist ;
2017-06-17 03:33:14 -04:00
var expandButton = document . getElementById ( "expandSiteListBtn" ) ;
var siteListRadio = document . getElementById ( "sitelist" ) ;
if ( sitelist ) {
siteListRadio . disabled = false ;
2017-06-29 05:25:27 -04:00
var hasMoreLinks = getXMLTagNodeValue ( response , "hasMoreLinks" ) ;
if ( hasMoreLinks == "true" ) {
2017-06-17 03:33:14 -04:00
expandButton . style . visibility = "visible" ;
expandButton . disabled = false ;
} else {
expandButton . style . visibility = "hidden" ;
}
} else {
siteListRadio . disabled = true ;
siteListRadio . checked = false ;
var urlModeRadio = document . getElementById ( "url" ) ;
if ( urlModeRadio != null ) {
urlModeRadio . checked = true ;
}
if ( expandButton != null ) {
expandButton . style . visibility = "hidden" ;
}
}
2007-05-06 05:52:04 -04:00
// clear the ajax image
2010-10-05 20:00:23 -04:00
document . getElementById ( "ajax" ) . setAttribute ( "src" , AJAX _OFF ) ;
2006-02-18 07:01:52 -05:00
}
}
2010-09-30 08:50:34 -04:00
function changed ( ) {
2006-02-18 07:01:52 -05:00
window . clearTimeout ( timeout ) ;
2017-06-17 03:33:14 -04:00
timeout = window . setTimeout ( loadInfos , 1500 ) ;
2006-02-18 07:01:52 -05:00
}
2010-09-30 08:50:34 -04:00
2017-06-17 03:33:14 -04:00
/ * *
* @ param loadAll { Boolean } when true , load all links , else limit to the 100 first
* /
function loadInfos ( loadAll ) {
2007-05-06 05:52:04 -04:00
// displaying ajax image
2010-10-05 20:00:23 -04:00
document . getElementById ( "ajax" ) . setAttribute ( "src" , AJAX _ON ) ;
2007-05-06 05:52:04 -04:00
2017-06-16 02:50:57 -04:00
var url = document . getElementById ( "crawlingURL" ) . value ;
2014-12-28 08:36:43 -05:00
if ( url . indexOf ( "ftp" ) == 0 || url . indexOf ( "smb" ) == 0 ) document . getElementById ( "crawlingQ" ) . checked = true ; // since the pdf parser update for page separation, we need to set this
2020-01-16 14:59:02 -05:00
sndReq ( 'api/getpageinfo_p.xml?actions=title,robots' + ( loadAll ? '' : '&maxLinks=50' ) + '&url=' + encodeURIComponent ( url ) ) ;
2016-11-07 21:05:51 -05:00
document . getElementById ( "api" ) . innerHTML = "<a href='api/getpageinfo_p.xml?actions=title,robots&url=" + url + "' id='apilink'><img src='env/grafics/api.png' width='60' height='40' alt='API'/></a><span>See the page info about the start url.</span>" ;
2006-02-18 07:01:52 -05:00
}