from flask import render_template, request, jsonify, send_from_directory from www import app import archive.archive as archive import config from datetime import datetime import os, logging, time lists_to_serve = archive.list_tables_db(config.db['database'], config.db['host'], config.db['user'], config.db['password']) @app.route('/') def index(): return render_template("index.html") @app.route('/favicon.ico') def favicon(): return send_from_directory(os.path.join(app.root_path, 'static'), 'favicon.ico', mimetype='image/vnd.microsoft.icon') @app.route('/lists') def lists(): l = archive.list_tables_db(config.db['database'], config.db['host'], config.db['user'], config.db['password']) return jsonify(lists=l) @app.route('/search') def searh(): global lists_to_serve if len(request.args) < 1: # q: list all table or keep predefined lists_to_serve? lists_to_serve = archive.list_tables_db(config.db['database'], config.db['host'], config.db['user'], config.db['password']) return render_template("search.html", archives=lists_to_serve, fields=['content', 'from']) k_arg = request.args.get('keyword') l_arg = request.args.get('list') f_arg = request.args.get('field') if k_arg is None or k_arg.strip() == '': return "no keyword..." if l_arg != "all" and l_arg not in lists_to_serve: print("list '" + l_arg + "' does not exist") print(lists_to_serve) return "list '" + l_arg + "' does not exist" if f_arg not in ['content', 'from']: return "field '" + f_arg + "' does not exist" logging.info("/search k_arg=" + k_arg + " l_arg=" + l_arg + " f_arg=" + f_arg) lists = [] if l_arg == "all": lists = archive.list_tables_db(config.db['database'], config.db['host'], config.db['user'], config.db['password']) else: lists.append(l_arg) ################################ ## ## need to cache all the below..... ## ################################ results = [] logging.info("search keyword = " + k_arg) for l in lists: with archive.Archive(l, config=config.db) as a: if f_arg == 'content': r = a.content_search(k_arg) else: r = a.from_search(k_arg) # format data to return search_results = { "keyword": k_arg, "field": f_arg, "archive": a.archive_name, "results": [] } month_year_results = {} for (from_, author_name_, subject_, date_, url_) in r: m_y = date_.strftime("%B_%Y") if m_y not in month_year_results: month_year_results[m_y] = [] month_year_results[m_y].append({ 'url': url_, 'subject': subject_, 'author_name': author_name_}) for k, v in sorted(month_year_results.items(), key=get_key, reverse=True): search_results['results'].append({ 'thread': k, 'nbr_hits': len(v), 'hits': v}) # search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits}) # where: # 'thread' = "%B_%Y" aka. January 2001 # 'nbr_hits' = nbr hits for that month # 'hits' = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}] results.append(search_results) sorted_results = sorted(results, key=get_result_key) return jsonify(result=sorted_results) def get_key(kv): return datetime.strptime(kv[0], "%B_%Y") def get_result_key(r): return r['archive']