95 lines
2.9 KiB
Python
95 lines
2.9 KiB
Python
import sys, logging, argparse
|
|
from datetime import datetime
|
|
import archive.archive as archive
|
|
import config
|
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
def get_key(kv):
|
|
return datetime.strptime(kv[0], "%B_%Y")
|
|
|
|
def get_result_key(r):
|
|
return r['archive']
|
|
|
|
def run(args):
|
|
|
|
if not args.keyword:
|
|
sys.exit('No keyword. Aborting.')
|
|
|
|
if args.field not in ["content", "from"]:
|
|
sys.exit('Invalid field ' + args.field + '. Aborting.')
|
|
|
|
all_lists = archive.list_tables_db(config.db['database'], config.db['host'], config.db['user'], config.db['password'])
|
|
|
|
if not args.list or (len(args.list) == 1 and args.list[0] == "all"):
|
|
args.list = all_lists
|
|
|
|
results = []
|
|
for l in args.list:
|
|
|
|
if l not in all_lists:
|
|
logging.warning(l + "is not a valid list... continuing")
|
|
continue
|
|
|
|
k_arg = args.keyword
|
|
f_arg = args.field
|
|
|
|
with archive.Archive(l, config=config.db) as a:
|
|
if f_arg == 'content':
|
|
r = a.content_search(k_arg)
|
|
else:
|
|
r = a.from_search(k_arg)
|
|
|
|
# format data to return
|
|
search_results = { "keyword": k_arg, "field": f_arg, "archive": a.archive_name, "results": [] }
|
|
month_year_results = {}
|
|
|
|
for (from_, author_name_, subject_, date_, url_) in r:
|
|
m_y = date_.strftime("%B_%Y")
|
|
if m_y not in month_year_results:
|
|
month_year_results[m_y] = []
|
|
month_year_results[m_y].append({ 'url': url_, 'subject': subject_, 'author_name': author_name_})
|
|
|
|
for k, v in sorted(month_year_results.items(), key=get_key, reverse=True):
|
|
search_results['results'].append({ 'thread': k, 'nbr_hits': len(v), 'hits': v})
|
|
|
|
results.append(search_results)
|
|
|
|
|
|
return sorted(results, key=get_result_key)
|
|
|
|
|
|
# for l in args.list:
|
|
# arch = search.archive.Archive('archives/')
|
|
# arch.load(l)
|
|
|
|
# r = arch.search(keyword=args.keyword, field=args.field)
|
|
|
|
# for z in r['results']:
|
|
# print(z['thread'] + " ---- " + str(z['nbr_hits']))
|
|
# for zz in z['hits']:
|
|
# print(" " + zz['url'])
|
|
# print(" " + zz['index_str'])
|
|
|
|
# sys.exit()
|
|
|
|
if __name__ == "__main__":
|
|
|
|
p = argparse.ArgumentParser(description='Searches mailinglists archives')
|
|
p.add_argument('keyword', metavar="keyword", help="keyword to search")
|
|
p.add_argument('--list', '-l', help="mailinglist(s') name(s) (default 'all')", nargs="+")
|
|
p.add_argument('--field', '-f', help="message field (i.e. 'content' or 'from' (default 'content'))", default="content")
|
|
p.add_argument('--json', '-j', help="json output")
|
|
|
|
args = p.parse_args()
|
|
|
|
result = run(args)
|
|
|
|
if args.json:
|
|
import json
|
|
print(json.dumps(result, indent=4))
|
|
else:
|
|
print(result)
|
|
|
|
|