search
This commit is contained in:
parent
064a05b806
commit
3b01ec68c6
37
search.py
Normal file
37
search.py
Normal file
@ -0,0 +1,37 @@
|
||||
import sys, logging, argparse
|
||||
import search.archive
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
def run(args):
|
||||
|
||||
if not args.keyword:
|
||||
sys.exit('No keyword. Aborting.')
|
||||
|
||||
if not args.list:
|
||||
args.list = ['spectre', 'crumb', 'empyre'] ## eh....
|
||||
|
||||
for l in args.list:
|
||||
arch = search.archive.Archive('archives/')
|
||||
arch.load(l)
|
||||
|
||||
r = arch.search(keyword=args.keyword, field=args.field)
|
||||
|
||||
for z in r['results']:
|
||||
print(z['thread'] + " ---- " + str(z['nbr_hits']))
|
||||
for zz in z['hits']:
|
||||
print(" " + zz['url'])
|
||||
print(" " + zz['index_str'])
|
||||
|
||||
sys.exit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
p = argparse.ArgumentParser(description='Searches mailinglists archives')
|
||||
p.add_argument('keyword', metavar="keyword", help="keyword to search")
|
||||
p.add_argument('--list', help="mailinglist(s') name(s)", nargs="+")
|
||||
p.add_argument('--field', help="message field (i.e. 'content' or 'subject', etc.)", default="content")
|
||||
|
||||
args = p.parse_args()
|
||||
|
||||
run(args)
|
||||
0
search/__init__.py
Normal file
0
search/__init__.py
Normal file
107
search/archive.py
Normal file
107
search/archive.py
Normal file
@ -0,0 +1,107 @@
|
||||
import logging, os, json, re
|
||||
from datetime import datetime
|
||||
|
||||
class Archive():
|
||||
|
||||
def __init__(self, archives_dir=None):
|
||||
if archives_dir==None:
|
||||
self.archives_dir = "archives/"
|
||||
else:
|
||||
self.archives_dir = archives_dir
|
||||
|
||||
self.loaded = False
|
||||
|
||||
def load(self, archive_name=None):
|
||||
|
||||
if archive_name == None:
|
||||
raise Exception('Archive is not specified')
|
||||
|
||||
archive_path = os.path.join(self.archives_dir, archive_name)
|
||||
if not os.path.isdir(archive_path):
|
||||
raise Exception('Archive ' + path + ' does not exist')
|
||||
|
||||
self.archive_name = archive_name
|
||||
self.archive_path = archive_path
|
||||
|
||||
files = [f for f in os.listdir(archive_path) if f.endswith('.json')]
|
||||
|
||||
self.archive = {}
|
||||
|
||||
for f in files:
|
||||
file_path = os.path.join(archive_path, f)
|
||||
label = f.replace('.json', '')
|
||||
with open(file_path) as fdata:
|
||||
self.archive[label] = json.load(fdata)
|
||||
|
||||
self.loaded = True
|
||||
|
||||
def search_message(self, keyword, msg, index_str, results, field='content'):
|
||||
|
||||
nbr_hits = 0
|
||||
if msg[field].find(keyword) > 0:
|
||||
nbr_hits += 1
|
||||
results.append({ "index_str": index_str, "subject": msg['subject'], "date": msg['date'], "author_name": msg['author_name'], "url": msg['url'] })
|
||||
|
||||
if 'follow-up' in msg:
|
||||
i = 0
|
||||
for m in msg['follow-up']:
|
||||
current_index_str = index_str + '/' + str(i)
|
||||
nbr_hits += self.search_message(keyword, m, current_index_str, results, field)
|
||||
i += 1
|
||||
|
||||
return nbr_hits
|
||||
|
||||
|
||||
def search(self, keyword, field='content'):
|
||||
|
||||
search_results = { "keyword": keyword, "field": field, "archive": self.archive_name, "results": [] }
|
||||
|
||||
for k, v in sorted(self.archive.items(), key=get_key, reverse=True):
|
||||
|
||||
current_index_str = self.archive_name + '/' + k
|
||||
hits = []
|
||||
nbr_hits = 0
|
||||
i = 0
|
||||
for m in v['threads']:
|
||||
current_index_str = self.archive_name + '/' + k + '/' + str(i)
|
||||
nbr_hits += self.search_message(keyword, m, current_index_str, hits, field)
|
||||
i += 1
|
||||
|
||||
if nbr_hits > 0:
|
||||
search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits})
|
||||
|
||||
return search_results
|
||||
|
||||
|
||||
|
||||
def get_key(kv_tuple):
|
||||
|
||||
k = kv_tuple[0]
|
||||
|
||||
# k is of the form "Month_Year" - ex.: "January_2001"
|
||||
try:
|
||||
return datetime.strptime(k, "%B_%Y")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# k is of the form "Month(abv)_Year(abv)" - ex.: "Jan_01"
|
||||
try:
|
||||
return datetime.strptime(k, "%b_%y")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# k is of the form "Year" - ex.: "2001"
|
||||
try:
|
||||
return datetime.strptime(k, "%Y")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print("--------------")
|
||||
print(k)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from flask import render_template
|
||||
from flask import render_template, request, jsonify
|
||||
from www import app
|
||||
from www import archives
|
||||
import search.archive
|
||||
from datetime import datetime
|
||||
|
||||
@app.route('/')
|
||||
@ -46,6 +47,9 @@ def get_list(list):
|
||||
@app.route('/<list>/<sublist>')
|
||||
def get_sublist(list, sublist):
|
||||
|
||||
print(list)
|
||||
print(sublist)
|
||||
|
||||
sublist = sublist.replace(' ', '_')
|
||||
if list in archives.archives_data and sublist in archives.archives_data[list]:
|
||||
return render_template("threads.html", sublist_name=sublist, threads=archives.archives_data[list][sublist]['threads'])
|
||||
@ -81,6 +85,54 @@ def get_follow_ups(list, sublist, index, follow_ups):
|
||||
else:
|
||||
'nope nope'
|
||||
|
||||
@app.route('/search')
|
||||
def searh():
|
||||
|
||||
if len(request.args) < 1:
|
||||
k = archives.archives_data.keys()
|
||||
return render_template("search.html", archives=k)
|
||||
|
||||
k_arg = request.args.get('keyword')
|
||||
l_arg = request.args.get('list')
|
||||
sl_arg = request.args.get('sublist')
|
||||
|
||||
if k_arg is None or k_arg.strip() == '':
|
||||
return "no keyword..."
|
||||
|
||||
if l_arg is None:
|
||||
return "no list..."
|
||||
|
||||
if not (l_arg == "all") and not (l_arg in archives.archives_data):
|
||||
return "list '" + l_arg + "' does not exist"
|
||||
|
||||
if sl_arg is not None:
|
||||
if not sl_arg in archives.archives_data[l]:
|
||||
return "sublist '" + sl_arg + "' does not exist in list '" + l_arg + "'"
|
||||
|
||||
lists = []
|
||||
if l_arg == "all":
|
||||
for k in archives.archives_data.keys():
|
||||
lists.append(k)
|
||||
else:
|
||||
lists.append(l_arg)
|
||||
|
||||
################################
|
||||
##
|
||||
## need to chache all the below
|
||||
##
|
||||
################################
|
||||
|
||||
results = []
|
||||
for l in lists:
|
||||
a = search.archive.Archive()
|
||||
a.load(l)
|
||||
results.append(a.search(k_arg))
|
||||
|
||||
return jsonify(result=results)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
150
www/static/search.js
Normal file
150
www/static/search.js
Normal file
@ -0,0 +1,150 @@
|
||||
|
||||
$(document).ready(function(){
|
||||
$('#search').on('submit', function(e) {
|
||||
e.preventDefault();
|
||||
args = $(this).serialize();
|
||||
$.get('/search?'+args, function(data) {
|
||||
console.log(data);
|
||||
$('#graph').empty();
|
||||
$('#results').empty();
|
||||
$.each(data.result, function(i, item) {
|
||||
search_result_archive(item);
|
||||
});
|
||||
graph(data);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
function search_result_archive(a) {
|
||||
$('<div/>', {
|
||||
id: a.archive,
|
||||
class: "archive",
|
||||
}).appendTo('#results');
|
||||
$('#' + a.archive).append("<h3>" + a.archive + "</h3>");
|
||||
$.each(a.results, function(i, r) {
|
||||
$('<ul/>', {
|
||||
id: r.thread + "-" + a.archive,
|
||||
text: r.thread.replace('_', ' ')
|
||||
}).appendTo('#' + a.archive);
|
||||
let hits = "<ul>";
|
||||
$.each(r.hits, function(j, h){
|
||||
let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>';
|
||||
hits += hit;
|
||||
});
|
||||
hits += "</ul>";
|
||||
$('#' + r.thread + "-" + a.archive).append(hits);
|
||||
});
|
||||
}
|
||||
|
||||
var min_month = new Date(2000, 0);
|
||||
var max_month = new Date();
|
||||
|
||||
function diff_months(d1, d2) {
|
||||
var months;
|
||||
months = (d2.getFullYear() - d1.getFullYear()) * 12;
|
||||
months -= d1.getMonth();
|
||||
months += d2.getMonth();
|
||||
return months <= 0 ? 0 : months;
|
||||
}
|
||||
|
||||
function format(date) {
|
||||
var month_names = [
|
||||
"Jan", "Feb", "Mar",
|
||||
"Apr", "May", "Jun", "Jul",
|
||||
"Aug", "Sep", "Oct",
|
||||
"Nov", "Dec"
|
||||
];
|
||||
return month_names[date.getMonth()] + ' ' + date.getFullYear();
|
||||
//return date.getMonth() + ' - ' + date.getFullYear();
|
||||
}
|
||||
|
||||
|
||||
function graph(data) {
|
||||
var d = diff_months(min_month, max_month);
|
||||
var vec = new Array();
|
||||
for(let ar of data.result) {
|
||||
let ar_vec = new Array(d + 1).fill(0);
|
||||
ar_vec[0] = ar.archive;
|
||||
for(let r of ar.results) {
|
||||
let date = new Date(Date.parse(r.thread.replace("_", " 1, "))); // this may blow...
|
||||
let index = diff_months(min_month, date);
|
||||
ar_vec[index + 1] = r.nbr_hits;
|
||||
}
|
||||
vec.push(ar_vec);
|
||||
}
|
||||
|
||||
|
||||
// var x_axis = new Array(d + 1);
|
||||
// x_axis[0] = 'x';
|
||||
// for (let i = 1; i < d+1; i++) {
|
||||
// let d = new Date(min_month.getFullYear(), min_month.getMonth());
|
||||
// d.setMonth(d.getMonth() + (i - 1));
|
||||
// x_axis[i] = format(d);
|
||||
// }
|
||||
|
||||
// vec.push(x_axis);
|
||||
|
||||
var x_axis = new Array(d);
|
||||
for (let i = 0; i < d; i++) {
|
||||
let d = new Date(min_month.getFullYear(), min_month.getMonth());
|
||||
d.setMonth(d.getMonth() + i);
|
||||
x_axis[i] = format(d);
|
||||
}
|
||||
|
||||
|
||||
console.log(vec);
|
||||
|
||||
var chart = c3.generate({
|
||||
bindto: '#graph',
|
||||
data: {
|
||||
columns: vec,
|
||||
type: 'bar'
|
||||
},
|
||||
axis: {
|
||||
x: {
|
||||
type: 'category',
|
||||
categories: x_axis,
|
||||
tick: {
|
||||
culling: {
|
||||
max: 15
|
||||
},
|
||||
multiline:false
|
||||
}
|
||||
}
|
||||
},
|
||||
bar: {
|
||||
width: {
|
||||
ratio: 0.9
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
23
www/templates/search.html
Normal file
23
www/templates/search.html
Normal file
@ -0,0 +1,23 @@
|
||||
<html>
|
||||
<head>
|
||||
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='lib/c3.min.css') }}">
|
||||
<script type=text/javascript src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
|
||||
<script type="text/javascript" src="{{ url_for('static',filename='lib/d3.min.js') }}" charset="utf-8"></script>
|
||||
<script type="text/javascript" src="{{ url_for('static',filename='lib/c3.min.js') }}"></script>
|
||||
<script type=text/javascript src="{{ url_for('static',filename='search.js') }}"></script>
|
||||
</head>
|
||||
<body>
|
||||
<form action="/search" method="get" id="search">
|
||||
<label>keyword: </label><input type="search" name="keyword">
|
||||
<select form="search" name="list">
|
||||
<option value="all">all</option>
|
||||
{% for a in archives %}
|
||||
<option value="{{ a }}">{{ a }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="submit" value="search" id="submit">
|
||||
</form>
|
||||
<div id="graph"></div>
|
||||
<div id="results"></div>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
x
Reference in New Issue
Block a user