2019-08-24 14:21:26 +02:00
|
|
|
from flask import render_template, request, jsonify, send_from_directory
|
2017-07-25 11:30:04 +02:00
|
|
|
from www import app
|
2019-07-11 13:21:42 +02:00
|
|
|
import archive.archive as archive
|
|
|
|
|
import config
|
|
|
|
|
import www.config as wconfig
|
2017-07-25 11:30:04 +02:00
|
|
|
from datetime import datetime
|
2019-12-09 13:40:21 +01:00
|
|
|
import os, logging
|
2017-11-07 14:36:05 +01:00
|
|
|
|
|
|
|
|
|
2017-07-25 11:30:04 +02:00
|
|
|
@app.route('/')
|
|
|
|
|
def index():
|
2019-07-11 13:21:42 +02:00
|
|
|
return render_template("index.html")
|
2017-07-25 11:30:04 +02:00
|
|
|
|
2019-07-17 12:55:47 +02:00
|
|
|
@app.route('/favicon.ico')
|
|
|
|
|
def favicon():
|
|
|
|
|
return send_from_directory(os.path.join(app.root_path, 'static'),
|
|
|
|
|
'favicon.ico', mimetype='image/vnd.microsoft.icon')
|
|
|
|
|
|
2019-12-09 13:40:21 +01:00
|
|
|
@app.route('/lists')
|
|
|
|
|
def lists():
|
|
|
|
|
l = archive.list_tables_db(config.db['database'], config.db['host'], config.db['user'], config.db['password'])
|
|
|
|
|
return jsonify(lists=l)
|
|
|
|
|
|
2017-07-27 10:09:33 +02:00
|
|
|
@app.route('/search')
|
|
|
|
|
def searh():
|
|
|
|
|
|
|
|
|
|
if len(request.args) < 1:
|
2019-08-24 13:06:59 +02:00
|
|
|
# q: list all table or keep predefined wconfig.lists_to_serve?
|
2019-08-24 14:21:26 +02:00
|
|
|
wconfig.lists_to_serve = archive.list_tables_db(config.db['database'], config.db['host'], config.db['user'], config.db['password'])
|
2019-07-11 13:21:42 +02:00
|
|
|
return render_template("search.html", archives=wconfig.lists_to_serve, fields=['content', 'from'])
|
2017-07-27 10:09:33 +02:00
|
|
|
|
|
|
|
|
k_arg = request.args.get('keyword')
|
|
|
|
|
l_arg = request.args.get('list')
|
2017-11-06 14:11:18 +01:00
|
|
|
f_arg = request.args.get('field')
|
2017-07-27 10:09:33 +02:00
|
|
|
|
|
|
|
|
if k_arg is None or k_arg.strip() == '':
|
|
|
|
|
return "no keyword..."
|
|
|
|
|
|
2019-07-11 13:21:42 +02:00
|
|
|
if l_arg != "all" and l_arg not in wconfig.lists_to_serve:
|
2017-07-27 10:09:33 +02:00
|
|
|
return "list '" + l_arg + "' does not exist"
|
|
|
|
|
|
2019-07-11 13:21:42 +02:00
|
|
|
if f_arg not in ['content', 'from']:
|
|
|
|
|
return "field '" + f_arg + "' does not exist"
|
2017-07-27 10:09:33 +02:00
|
|
|
|
2017-11-06 14:11:18 +01:00
|
|
|
|
2017-07-27 10:09:33 +02:00
|
|
|
lists = []
|
|
|
|
|
if l_arg == "all":
|
2019-07-11 13:21:42 +02:00
|
|
|
lists = wconfig.lists_to_serve
|
2017-07-27 10:09:33 +02:00
|
|
|
else:
|
|
|
|
|
lists.append(l_arg)
|
|
|
|
|
|
2019-12-09 13:40:21 +01:00
|
|
|
print(lists)
|
|
|
|
|
|
2017-11-09 13:49:22 +01:00
|
|
|
|
2017-07-27 10:09:33 +02:00
|
|
|
################################
|
|
|
|
|
##
|
2019-07-11 13:21:42 +02:00
|
|
|
## need to cache all the below.....
|
2017-07-27 10:09:33 +02:00
|
|
|
##
|
|
|
|
|
################################
|
|
|
|
|
|
|
|
|
|
results = []
|
2017-11-07 14:28:20 +01:00
|
|
|
|
|
|
|
|
logging.info("search keyword = " + k_arg)
|
|
|
|
|
|
2017-07-27 10:09:33 +02:00
|
|
|
for l in lists:
|
2019-07-11 13:21:42 +02:00
|
|
|
|
|
|
|
|
with archive.Archive(l, config=config.db) as a:
|
|
|
|
|
if f_arg == 'content':
|
|
|
|
|
r = a.content_search(k_arg)
|
|
|
|
|
else:
|
|
|
|
|
r = a.from_search(k_arg)
|
|
|
|
|
|
|
|
|
|
# format data to return
|
|
|
|
|
search_results = { "keyword": k_arg, "field": f_arg, "archive": a.archive_name, "results": [] }
|
|
|
|
|
month_year_results = {}
|
|
|
|
|
|
|
|
|
|
for (from_, author_name_, subject_, date_, url_) in r:
|
|
|
|
|
m_y = date_.strftime("%B_%Y")
|
|
|
|
|
if m_y not in month_year_results:
|
|
|
|
|
month_year_results[m_y] = []
|
|
|
|
|
month_year_results[m_y].append({ 'url': url_, 'subject': subject_, 'author_name': author_name_})
|
|
|
|
|
|
|
|
|
|
for k, v in sorted(month_year_results.items(), key=get_key, reverse=True):
|
|
|
|
|
search_results['results'].append({ 'thread': k, 'nbr_hits': len(v), 'hits': v})
|
|
|
|
|
|
|
|
|
|
# search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits})
|
|
|
|
|
# where:
|
|
|
|
|
# 'thread' = "%B_%Y" aka. January 2001
|
|
|
|
|
# 'nbr_hits' = nbr hits for that month
|
|
|
|
|
# 'hits' = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}]
|
|
|
|
|
|
|
|
|
|
results.append(search_results)
|
|
|
|
|
|
2017-11-08 11:48:28 +01:00
|
|
|
|
2019-07-11 13:21:42 +02:00
|
|
|
sorted_results = sorted(results, key=get_result_key)
|
|
|
|
|
return jsonify(result=sorted_results)
|
2017-07-27 10:09:33 +02:00
|
|
|
|
2017-11-04 13:34:05 +01:00
|
|
|
|
2019-07-11 13:21:42 +02:00
|
|
|
def get_key(kv):
|
|
|
|
|
return datetime.strptime(kv[0], "%B_%Y")
|
2017-11-04 13:34:05 +01:00
|
|
|
|
|
|
|
|
def get_result_key(r):
|
|
|
|
|
return r['archive']
|
2017-07-27 10:09:33 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-07-25 11:30:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|