import sys, logging, argparse from datetime import datetime import archive.archive as archive import config logging.basicConfig(level=logging.DEBUG) def get_key(kv): return datetime.strptime(kv[0], "%B_%Y") def get_result_key(r): return r['archive'] def run(args): if not args.keyword: sys.exit('No keyword. Aborting.') if args.field not in ["content", "from"]: sys.exit('Invalid field ' + args.field + '. Aborting.') all_lists = archive.list_tables_db(config.db['database'], config.db['host'], config.db['user'], config.db['password']) if not args.list or (len(args.list) == 1 and args.list[0] == "all"): args.list = all_lists results = [] for l in args.list: if l not in all_lists: logging.warning(l + "is not a valud list... continuing") continue k_arg = args.keyword f_arg = args.field with archive.Archive(l, config=config.db) as a: if f_arg == 'content': r = a.content_search(k_arg) else: r = a.from_search(k_arg) # format data to return search_results = { "keyword": k_arg, "field": f_arg, "archive": a.archive_name, "results": [] } month_year_results = {} for (from_, author_name_, subject_, date_, url_) in r: m_y = date_.strftime("%B_%Y") if m_y not in month_year_results: month_year_results[m_y] = [] month_year_results[m_y].append({ 'url': url_, 'subject': subject_, 'author_name': author_name_}) for k, v in sorted(month_year_results.items(), key=get_key, reverse=True): search_results['results'].append({ 'thread': k, 'nbr_hits': len(v), 'hits': v}) results.append(search_results) return sorted(results, key=get_result_key) # for l in args.list: # arch = search.archive.Archive('archives/') # arch.load(l) # r = arch.search(keyword=args.keyword, field=args.field) # for z in r['results']: # print(z['thread'] + " ---- " + str(z['nbr_hits'])) # for zz in z['hits']: # print(" " + zz['url']) # print(" " + zz['index_str']) # sys.exit() if __name__ == "__main__": p = argparse.ArgumentParser(description='Searches mailinglists archives') p.add_argument('keyword', metavar="keyword", help="keyword to search") p.add_argument('--list', '-l', help="mailinglist(s') name(s) (default 'all')", nargs="+") p.add_argument('--field', '-f', help="message field (i.e. 'content' or 'from' (default 'content'))", default="content") p.add_argument('--json', '-j', help="json output") args = p.parse_args() result = run(args) if args.json: import json print(json.dumps(result, indent=4)) else: print(result)