This commit is contained in:
gauthiier 2017-07-27 10:09:33 +02:00
parent 064a05b806
commit 3b01ec68c6
6 changed files with 370 additions and 1 deletions

37
search.py Normal file
View File

@ -0,0 +1,37 @@
import sys, logging, argparse
import search.archive
logging.basicConfig(level=logging.DEBUG)
def run(args):
if not args.keyword:
sys.exit('No keyword. Aborting.')
if not args.list:
args.list = ['spectre', 'crumb', 'empyre'] ## eh....
for l in args.list:
arch = search.archive.Archive('archives/')
arch.load(l)
r = arch.search(keyword=args.keyword, field=args.field)
for z in r['results']:
print(z['thread'] + " ---- " + str(z['nbr_hits']))
for zz in z['hits']:
print(" " + zz['url'])
print(" " + zz['index_str'])
sys.exit()
if __name__ == "__main__":
p = argparse.ArgumentParser(description='Searches mailinglists archives')
p.add_argument('keyword', metavar="keyword", help="keyword to search")
p.add_argument('--list', help="mailinglist(s') name(s)", nargs="+")
p.add_argument('--field', help="message field (i.e. 'content' or 'subject', etc.)", default="content")
args = p.parse_args()
run(args)

0
search/__init__.py Normal file
View File

107
search/archive.py Normal file
View File

@ -0,0 +1,107 @@
import logging, os, json, re
from datetime import datetime
class Archive():
def __init__(self, archives_dir=None):
if archives_dir==None:
self.archives_dir = "archives/"
else:
self.archives_dir = archives_dir
self.loaded = False
def load(self, archive_name=None):
if archive_name == None:
raise Exception('Archive is not specified')
archive_path = os.path.join(self.archives_dir, archive_name)
if not os.path.isdir(archive_path):
raise Exception('Archive ' + path + ' does not exist')
self.archive_name = archive_name
self.archive_path = archive_path
files = [f for f in os.listdir(archive_path) if f.endswith('.json')]
self.archive = {}
for f in files:
file_path = os.path.join(archive_path, f)
label = f.replace('.json', '')
with open(file_path) as fdata:
self.archive[label] = json.load(fdata)
self.loaded = True
def search_message(self, keyword, msg, index_str, results, field='content'):
nbr_hits = 0
if msg[field].find(keyword) > 0:
nbr_hits += 1
results.append({ "index_str": index_str, "subject": msg['subject'], "date": msg['date'], "author_name": msg['author_name'], "url": msg['url'] })
if 'follow-up' in msg:
i = 0
for m in msg['follow-up']:
current_index_str = index_str + '/' + str(i)
nbr_hits += self.search_message(keyword, m, current_index_str, results, field)
i += 1
return nbr_hits
def search(self, keyword, field='content'):
search_results = { "keyword": keyword, "field": field, "archive": self.archive_name, "results": [] }
for k, v in sorted(self.archive.items(), key=get_key, reverse=True):
current_index_str = self.archive_name + '/' + k
hits = []
nbr_hits = 0
i = 0
for m in v['threads']:
current_index_str = self.archive_name + '/' + k + '/' + str(i)
nbr_hits += self.search_message(keyword, m, current_index_str, hits, field)
i += 1
if nbr_hits > 0:
search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits})
return search_results
def get_key(kv_tuple):
k = kv_tuple[0]
# k is of the form "Month_Year" - ex.: "January_2001"
try:
return datetime.strptime(k, "%B_%Y")
except Exception:
pass
# k is of the form "Month(abv)_Year(abv)" - ex.: "Jan_01"
try:
return datetime.strptime(k, "%b_%y")
except Exception:
pass
# k is of the form "Year" - ex.: "2001"
try:
return datetime.strptime(k, "%Y")
except Exception:
pass
print("--------------")
print(k)
return None

View File

@ -1,6 +1,7 @@
from flask import render_template
from flask import render_template, request, jsonify
from www import app
from www import archives
import search.archive
from datetime import datetime
@app.route('/')
@ -46,6 +47,9 @@ def get_list(list):
@app.route('/<list>/<sublist>')
def get_sublist(list, sublist):
print(list)
print(sublist)
sublist = sublist.replace(' ', '_')
if list in archives.archives_data and sublist in archives.archives_data[list]:
return render_template("threads.html", sublist_name=sublist, threads=archives.archives_data[list][sublist]['threads'])
@ -81,6 +85,54 @@ def get_follow_ups(list, sublist, index, follow_ups):
else:
'nope nope'
@app.route('/search')
def searh():
if len(request.args) < 1:
k = archives.archives_data.keys()
return render_template("search.html", archives=k)
k_arg = request.args.get('keyword')
l_arg = request.args.get('list')
sl_arg = request.args.get('sublist')
if k_arg is None or k_arg.strip() == '':
return "no keyword..."
if l_arg is None:
return "no list..."
if not (l_arg == "all") and not (l_arg in archives.archives_data):
return "list '" + l_arg + "' does not exist"
if sl_arg is not None:
if not sl_arg in archives.archives_data[l]:
return "sublist '" + sl_arg + "' does not exist in list '" + l_arg + "'"
lists = []
if l_arg == "all":
for k in archives.archives_data.keys():
lists.append(k)
else:
lists.append(l_arg)
################################
##
## need to chache all the below
##
################################
results = []
for l in lists:
a = search.archive.Archive()
a.load(l)
results.append(a.search(k_arg))
return jsonify(result=results)

150
www/static/search.js Normal file
View File

@ -0,0 +1,150 @@
$(document).ready(function(){
$('#search').on('submit', function(e) {
e.preventDefault();
args = $(this).serialize();
$.get('/search?'+args, function(data) {
console.log(data);
$('#graph').empty();
$('#results').empty();
$.each(data.result, function(i, item) {
search_result_archive(item);
});
graph(data);
});
});
});
function search_result_archive(a) {
$('<div/>', {
id: a.archive,
class: "archive",
}).appendTo('#results');
$('#' + a.archive).append("<h3>" + a.archive + "</h3>");
$.each(a.results, function(i, r) {
$('<ul/>', {
id: r.thread + "-" + a.archive,
text: r.thread.replace('_', ' ')
}).appendTo('#' + a.archive);
let hits = "<ul>";
$.each(r.hits, function(j, h){
let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>';
hits += hit;
});
hits += "</ul>";
$('#' + r.thread + "-" + a.archive).append(hits);
});
}
var min_month = new Date(2000, 0);
var max_month = new Date();
function diff_months(d1, d2) {
var months;
months = (d2.getFullYear() - d1.getFullYear()) * 12;
months -= d1.getMonth();
months += d2.getMonth();
return months <= 0 ? 0 : months;
}
function format(date) {
var month_names = [
"Jan", "Feb", "Mar",
"Apr", "May", "Jun", "Jul",
"Aug", "Sep", "Oct",
"Nov", "Dec"
];
return month_names[date.getMonth()] + ' ' + date.getFullYear();
//return date.getMonth() + ' - ' + date.getFullYear();
}
function graph(data) {
var d = diff_months(min_month, max_month);
var vec = new Array();
for(let ar of data.result) {
let ar_vec = new Array(d + 1).fill(0);
ar_vec[0] = ar.archive;
for(let r of ar.results) {
let date = new Date(Date.parse(r.thread.replace("_", " 1, "))); // this may blow...
let index = diff_months(min_month, date);
ar_vec[index + 1] = r.nbr_hits;
}
vec.push(ar_vec);
}
// var x_axis = new Array(d + 1);
// x_axis[0] = 'x';
// for (let i = 1; i < d+1; i++) {
// let d = new Date(min_month.getFullYear(), min_month.getMonth());
// d.setMonth(d.getMonth() + (i - 1));
// x_axis[i] = format(d);
// }
// vec.push(x_axis);
var x_axis = new Array(d);
for (let i = 0; i < d; i++) {
let d = new Date(min_month.getFullYear(), min_month.getMonth());
d.setMonth(d.getMonth() + i);
x_axis[i] = format(d);
}
console.log(vec);
var chart = c3.generate({
bindto: '#graph',
data: {
columns: vec,
type: 'bar'
},
axis: {
x: {
type: 'category',
categories: x_axis,
tick: {
culling: {
max: 15
},
multiline:false
}
}
},
bar: {
width: {
ratio: 0.9
}
}
});
}

23
www/templates/search.html Normal file
View File

@ -0,0 +1,23 @@
<html>
<head>
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='lib/c3.min.css') }}">
<script type=text/javascript src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
<script type="text/javascript" src="{{ url_for('static',filename='lib/d3.min.js') }}" charset="utf-8"></script>
<script type="text/javascript" src="{{ url_for('static',filename='lib/c3.min.js') }}"></script>
<script type=text/javascript src="{{ url_for('static',filename='search.js') }}"></script>
</head>
<body>
<form action="/search" method="get" id="search">
<label>keyword: </label><input type="search" name="keyword">
<select form="search" name="list">
<option value="all">all</option>
{% for a in archives %}
<option value="{{ a }}">{{ a }}</option>
{% endfor %}
</select>
<input type="submit" value="search" id="submit">
</form>
<div id="graph"></div>
<div id="results"></div>
</body>
</html>