diff --git a/search/archive.py b/search/archive.py index 664cd7b..7b4631a 100644 --- a/search/archive.py +++ b/search/archive.py @@ -5,6 +5,8 @@ import analysis.archive ## circular... import analysis.query import analysis.format +import threading + class Archive(): def __init__(self, archives_dir=None): @@ -16,6 +18,9 @@ class Archive(): self.loaded = False + self.lock_search = threading.Lock() + self.lock_threads_ranking = threading.Lock() + def load(self, archive_name=None): if archive_name == None: @@ -59,46 +64,50 @@ class Archive(): def search(self, keyword, field='content', min_hits=0): - search_results = { "keyword": keyword, "field": field, "archive": self.archive_name, "results": [] } + with self.lock_search: - for k, v in sorted(self.archive.items(), key=get_key, reverse=True): + search_results = { "keyword": keyword, "field": field, "archive": self.archive_name, "results": [] } - current_index_str = self.archive_name + '/' + k - hits = [] - nbr_hits = 0 - i = 0 - for m in v['threads']: - current_index_str = self.archive_name + '/' + k + '/' + str(i) - nbr_hits += self.search_message(keyword, m, current_index_str, hits, field) - i += 1 + for k, v in sorted(self.archive.items(), key=get_key, reverse=True): - if nbr_hits > min_hits: - # nettime-l - fix (the name of the thread from ex. 'nettime-l_Jan_01' to 'January 2001') - if k.startswith("nettime-l_"): - dt = datetime.strptime(k, "nettime-l_%b_%y") - k = dt.strftime("%B_%Y") - search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits}) + current_index_str = self.archive_name + '/' + k + hits = [] + nbr_hits = 0 + i = 0 + for m in v['threads']: + current_index_str = self.archive_name + '/' + k + '/' + str(i) + nbr_hits += self.search_message(keyword, m, current_index_str, hits, field) + i += 1 - return search_results + if nbr_hits > min_hits: + # nettime-l - fix (the name of the thread from ex. 'nettime-l_Jan_01' to 'January 2001') + if k.startswith("nettime-l_"): + dt = datetime.strptime(k, "nettime-l_%b_%y") + k = dt.strftime("%B_%Y") + search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits}) + + return search_results def threads_ranking(self, rank=5): - search_results = { "keyword": "thread ranking", "field": "ranking", "archive": self.archive_name, "results": [] } + with self.lock_threads_ranking: - a = analysis.archive.Archive(self) - q = a.query(); + search_results = { "keyword": "thread ranking", "field": "ranking", "archive": self.archive_name, "results": [] } - ranking = q.threads_ranking(rank=rank) + a = analysis.archive.Archive(self) + q = a.query(); - for i in ranking: - r = analysis.format.frame_to_dictionary_threads_ranking(ranking[i]) - for h in r: - hit = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}] - search_results['results'].append({'thread': h['date'], 'nbr_hits': h['nbr-references'], 'hits': hit}) - del a - del q + ranking = q.threads_ranking(rank=rank) - return search_results + for i in ranking: + r = analysis.format.frame_to_dictionary_threads_ranking(ranking[i]) + for h in r: + hit = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}] + search_results['results'].append({'thread': h['date'], 'nbr_hits': h['nbr-references'], 'hits': hit}) + del a + del q + + return search_results