index +search info

This commit is contained in:
gauthiier 2019-07-17 12:55:47 +02:00
parent f06a720e55
commit 07a026e93f
12 changed files with 186 additions and 210 deletions

View File

@ -15,7 +15,7 @@ def run(args):
i = 0 i = 0
for u in args.url: for u in args.url:
name = args.names[i] if i < len(args.names) else None name = args.names[i] if i < len(args.names) else None
lists.crawl.crawl(u, name, args.arch) lists.crawl.crawl(url=u, name=name, sublist_name=name, archive_dir=args.arch) #<-- sublist for nettime
i = i + 1 i = i + 1
sys.exit() sys.exit()

View File

@ -26,7 +26,9 @@ def crawl(url, name, sublist_name=None, archive_dir="archives"):
# special case -- nettime. # special case -- nettime.
# the name should be the sublist_name (i.e nettime-l) # the name should be the sublist_name (i.e nettime-l)
elif "nettime" in name: elif "nettime" in name:
mhonarc_nettime.collect_from_url(url, name, name, archive_dir) if sublist_name is None:
sublist_name = name
mhonarc_nettime.collect_from_url(url, name, sublist_name, archive_dir)
else: else:
print('mhonarc?') print('mhonarc?')

View File

@ -26,17 +26,20 @@ def collect_from_url(url, name, sublist_name, base_archive_dir="archives", mbox=
threads = [] threads = []
lists = soup.select('ul:nth-of-type(2) li') lists = soup.select('ul:nth-of-type(2) li')
for l in lists: for l in lists:
if l.strong is None: if l.strong is None:
continue continue
name = l.strong.string name = l.strong.string
print(name + " - " + sublist_name)
if name.lower() == sublist_name.lower(): if name.lower() == sublist_name.lower():
threads_url_list = [] threads_url_list = []
threads_links = l.select('ul li a') threads_links = l.select('ul li a')
for t in threads_links: for t in threads_links:
thread_url = urllib.parse.urljoin(base_url, t.get('href')) thread_url = urllib.parse.urljoin(base_url, t.get('href'))
threads_url_list.append(thread_url) threads_url_list.append(thread_url)
@ -79,6 +82,10 @@ def collect_threads_from_url(url, base_archive_dir, mbox=False):
base_url = url base_url = url
# collect name # collect name
e = soup.select('p:nth-of-type(1) title')
print(soup)
threads_name = soup.select('p:nth-of-type(1) title')[0].string threads_name = soup.select('p:nth-of-type(1) title')[0].string
threads_name = threads_name.replace(' ', '_') threads_name = threads_name.replace(' ', '_')

View File

@ -1,79 +0,0 @@
import logging, os, json
import search.archive
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
logging.info('**** new Singleton instance')
return cls._instances[cls]
class Archives(metaclass=Singleton):
def __init__(self, archives_dir=None):
if archives_dir==None:
from www import config
self.archives_dir = config.ARCHIVES_PATH
else:
self.archives_dir = archives_dir
self.data = {}
self.loaded = False
logging.info('loading archives...')
self.load()
logging.info('done.')
def load(self):
if self.loaded:
return
if not os.path.isdir(self.archives_dir):
logging.error("Archives:: the path - " + self.archives_dir + " - is not a valid directory. Aborting.")
logging.error(" -- current cwd is: " + os.getcwd())
return
arch = [d for d in os.listdir(self.archives_dir) if os.path.isdir(os.path.join(self.archives_dir, d))]
self.data = {}
for a in arch:
logging.info("loading " + a)
# archive_path = os.path.join(self.archives_dir, a)
self.data[a] = self.load_archive(self.archives_dir, a)
logging.info("done.")
self.loaded = True
def load_archive(self, archive_dir, archive_name):
if not os.path.isdir(archive_dir):
logging.error("Archives:: the path - " + archive_dir + " - is not a valid directory. Aborting.")
return
archive = search.archive.Archive(archive_dir)
archive.load(archive_name)
return archive
# # -- shoudl use Archive in searh module here....
# files = [f for f in os.listdir(archive_dir) if f.endswith('.json')]
# arch = {}
# for f in files:
# file_path = os.path.join(archive_dir, f)
# with open(file_path) as fdata:
# arch[f.replace('.json', '')] = json.load(fdata)
# return arch

View File

@ -11,6 +11,11 @@ import logging
def index(): def index():
return render_template("index.html") return render_template("index.html")
@app.route('/favicon.ico')
def favicon():
return send_from_directory(os.path.join(app.root_path, 'static'),
'favicon.ico', mimetype='image/vnd.microsoft.icon')
@app.route('/search') @app.route('/search')
def searh(): def searh():

BIN
www/static/CYBERPLA.GIF Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 527 B

BIN
www/static/cover.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

BIN
www/static/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 894 B

17
www/static/ooo.js Normal file
View File

@ -0,0 +1,17 @@
var c = 255
var x = 0
function b() {
c -= 1;
document.body.style.backgroundColor = 'rgb(' + c + ',' + c + ',' + c + ')';
}
function m() {
x += 0.7
s = document.getElementById('search');
s.style.left = x + 'px'
s.style.top = x + 'px'
}
window.onload = function () {
// setInterval(b, 500);
setInterval(m, 200);
};

View File

@ -2,6 +2,11 @@
$(document).ready(function(){ $(document).ready(function(){
$('#loading').hide() $('#loading').hide()
$('#info').click( function() {
console.log("click");
$('#info-search').toggle();
});
$('#search').submit(function(e) { $('#search').submit(function(e) {
e.preventDefault(); e.preventDefault();
args = $(this).serialize(); args = $(this).serialize();
@ -38,7 +43,7 @@ function search_result_archive(a) {
$.each(r.hits, function(j, h){ $.each(r.hits, function(j, h){
// console.log(h) // console.log(h)
let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>'; let hit = '<li><a href="' + h.url+ '" target="_blank">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>';
hits += hit; hits += hit;
}); });
hits += "</ul>"; hits += "</ul>";

View File

@ -1,6 +1,11 @@
<html> <html>
<head></head> <head>
<body> <title>Times of Nettime</title>
<a href="/search"><h3>---> SEARCH <---</h3></a> <script type="text/javascript" src="{{ url_for('static',filename='ooo.js') }}"></script>
</head>
<body bgcolor="blue">
<div id="search" style="position: absolute;">
<a href="/search"><h3><img src="{{ url_for('static',filename='CYBERPLA.GIF') }}" width="150"></h3></a>
</div>
</body> </body>
</html> </html>

View File

@ -1,5 +1,6 @@
<html> <html>
<head> <head>
<title>Times of Nettime</title>
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='c3.min.css') }}"> <link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='c3.min.css') }}">
<script type=text/javascript src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script> <script type=text/javascript src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
<script type="text/javascript" src="{{ url_for('static',filename='d3.min.js') }}" charset="utf-8"></script> <script type="text/javascript" src="{{ url_for('static',filename='d3.min.js') }}" charset="utf-8"></script>
@ -21,8 +22,21 @@
{% endfor %} {% endfor %}
</select> </select>
<input type="submit" value="search" id="submit"> <input type="submit" value="search" id="submit">
<input type="button" value=" ? " id="info">
<div id="loading">Loading...</div> <div id="loading">Loading...</div>
</form> </form>
<div id="info-search" style="display: none">
<table><tbody><tr><th>Operator</th><th> </th></tr>
<tr><td>+</td><td>The word is mandatory in all text returned.</td></tr>
<tr><td>-</td><td>The word cannot appear in any text returned.</td></tr>
<tr><td>&lt;</td><td>The word that follows has a lower relevance than other words, although text containing it will still match</td></tr>
<tr><td>&gt;</td><td>The word that follows has a higher relevance than other words.</td></tr>
<tr><td>()</td><td>Used to group words into subexpressions.</td></tr>
<tr><td>~</td><td>The word following contributes negatively to the relevance of the text (which is different to the '-' operator, which specifically excludes the word, or the '&lt;' operator, which still causes the word to contribute positively to the relevance of the text.</td></tr>
<tr><td>*</td><td>The wildcard, indicating zero or more characters. It can only appear at the end of a word.</td></tr>
<tr><td>"</td><td>Anything enclosed in the double quotes is taken as a whole (so you can match phrases, for example).</td></tr>
</tbody></table>
</div>
<div id="graph"></div> <div id="graph"></div>
<div id="results"></div> <div id="results"></div>
</body> </body>