mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-09-03 15:26:13 -04:00
Having the collection attribute for those imports enables to use those import methods for YaCy Pack generation which requires the collection name to produce a pack name. Fixed also file upload option through browser for both methods. For large zim/warc files you require large memory settings to be able to cache the uploaded file. You also need large memory requirements in your browser to upload gigabytes of files through multipart/form-data uploads.
59 lines
2.6 KiB
HTML
59 lines
2.6 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<title>YaCy '#[clientname]#': Warc Import</title>
|
|
#%env/templates/metas.template%#
|
|
#(import)#::<meta http-equiv="REFRESH" content="10;url=IndexImportWarc_p.html" />
|
|
<!-- the url= removes http get parameters on refresh, preventing restart of import -->
|
|
#(/import)#
|
|
</head>
|
|
<body id="IndexImportWarc">
|
|
#%env/templates/header.template%#
|
|
#%env/templates/submenuIndexImport.template%#
|
|
<h2>Web Archive File Import</h2>
|
|
|
|
#(import)#
|
|
<p>No import thread is running, you can start a new thread here</p>
|
|
<form action="IndexImportWarc_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
|
<!-- no post method here, we don't want to transmit the whole file, only the path-->
|
|
<fieldset>
|
|
<legend>Warc File Selection: select an warc file (which may be gz compressed)</legend>
|
|
<p>
|
|
You can download warc archives for example here
|
|
<a href="https://archive.org/search.php?query=subject%3A%22warcarchives%22&and[]=subject%3A%22warcarchives%22" target="_blank">Internet Archive</a>.
|
|
</p>
|
|
<dl>
|
|
<dt class="TableCellDark"><label for="file">File:</label></dt>
|
|
<dd><input name="file" id="file" type="file" value="" size="75" /></dd>
|
|
<dt></dt>
|
|
<dd>or</dd>
|
|
<dt class="TableCellDark"><label for="url">Url:</label></dt>
|
|
<dd><input name="url" id="url" value="" size="75"/></dd>
|
|
<dt></dt>
|
|
<dt class="TableCellDark"><label for="collection">Collection:</label></dt>
|
|
<dd><input name="collection" id="collection" type="text" value="user" size="75" /></dd>
|
|
<dd><input name="submit" class="btn btn-primary" type="submit" value="Import Warc File" /></dd>
|
|
</dl>
|
|
</fieldset>
|
|
</form>
|
|
|
|
<br />
|
|
::
|
|
<form>
|
|
<fieldset><legend>Import Process</legend>
|
|
<dl>
|
|
<dt>Thread:</dt><dd>#[thread]#</dd>
|
|
<dt>Warc File:</dt><dd>#[warcfile]#</dd>
|
|
<dt>Processed:</dt><dd>#[count]# Entries</dd>
|
|
<dt>Speed:</dt><dd>#[speed]# pages per second</dd>
|
|
<dt>Running Time:</dt><dd>#[runningHours]# hours, #[runningMinutes]# minutes</dd>
|
|
<dt>Remaining Time:</dt><dd>#[remainingHours]# hours, #[remainingMinutes]# minutes</dd>
|
|
</dl>
|
|
</fieldset>
|
|
<input name="abort" type="submit" class="btn btn-danger" value="Stop"/>
|
|
</form>
|
|
#(/import)#
|
|
|
|
#%env/templates/footer.template%#
|
|
</body>
|
|
</html> |