import logging, os, json, re from datetime import datetime class Archive(): def __init__(self, archives_dir=None): if archives_dir==None: from www import config self.archives_dir = config.ARCHIVES_PATH else: self.archives_dir = archives_dir self.loaded = False def load(self, archive_name=None): if archive_name == None: raise Exception('Archive is not specified') archive_path = os.path.join(self.archives_dir, archive_name) if not os.path.isdir(archive_path): raise Exception('Archive ' + path + ' does not exist') self.archive_name = archive_name self.archive_path = archive_path files = [f for f in os.listdir(archive_path) if f.endswith('.json')] self.archive = {} for f in files: file_path = os.path.join(archive_path, f) label = f.replace('.json', '') with open(file_path) as fdata: self.archive[label] = json.load(fdata) self.loaded = True def search_message(self, keyword, msg, index_str, results, field='content'): nbr_hits = 0 if msg[field].find(keyword) > 0: nbr_hits += 1 results.append({ "index_str": index_str, "subject": msg['subject'], "date": msg['date'], "author_name": msg['author_name'], "url": msg['url'] }) if 'follow-up' in msg: i = 0 for m in msg['follow-up']: current_index_str = index_str + '/' + str(i) nbr_hits += self.search_message(keyword, m, current_index_str, results, field) i += 1 return nbr_hits def search(self, keyword, field='content'): search_results = { "keyword": keyword, "field": field, "archive": self.archive_name, "results": [] } for k, v in sorted(self.archive.items(), key=get_key, reverse=True): current_index_str = self.archive_name + '/' + k hits = [] nbr_hits = 0 i = 0 for m in v['threads']: current_index_str = self.archive_name + '/' + k + '/' + str(i) nbr_hits += self.search_message(keyword, m, current_index_str, hits, field) i += 1 if nbr_hits > 0: search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits}) return search_results def get_key(kv_tuple): k = kv_tuple[0] # k is of the form "Month_Year" - ex.: "January_2001" try: return datetime.strptime(k, "%B_%Y") except Exception: pass # k is of the form "Month(abv)_Year(abv)" - ex.: "Jan_01" try: return datetime.strptime(k, "%b_%y") except Exception: pass # k is of the form "Year" - ex.: "2001" try: return datetime.strptime(k, "%Y") except Exception: pass print("--------------") print(k) return None