MEGA -- DB

2019-07-11 13:21:42 +02:00 · 2019-07-11 13:21:42 +02:00 · 4197cd4d32
commit 4197cd4d32
parent 3703dcc169
25 changed files with 663 additions and 1657 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,11 @@
-# mailinglists specific
+# listservs specific
 archives/
-figs/
+config/
 config.py
 test.py
 #macos
 .DS_Store
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/6
+++ b/6
@ -1,3 +1,9 @@
 TODO (July 2019):
 	- refactor archive.py and search.py
 	- test lists import with mariadb backend
 usage: archive.py [-h] [--arch ARCH] url [url ...]
 Mailinglists are dead. Long live mailinglists!
--- a/analyse.py
+++ b/analyse.py
@ -1,230 +0,0 @@
 import os
 # matplot view/windows
 import matplotlib
 matplotlib.interactive(True)
 # pd display
 import pandas as pd
 pd.set_option('display.max_colwidth', 100)
 from analysis.archive import Archive
 from analysis.query import Query
 from analysis.plot import Plot
 import analysis.format
 # spectre: slategrey
 # nettime: red
 # crumb: purple
 # empyre: darkblue
 def save_fig_cohort(q, name, dir, color):
 	t = name + " - Cohorts"	
 	pp = q.cohort().plot(color=color, title=t)
 	ts = name + "_cohorts.png"
 	filename = os.path.join(dir, ts)
 	pp.get_figure().savefig(filename)
 def save_fig_messages_total(q, name, dir, color):
 	t = name + " - Nbr. Messages"	
 	pp = q.activity_overall().plot(kind='bar', color=color, title=t)
 	ts = name + "_messages.png"
 	filename = os.path.join(dir, ts)
 	pp.get_figure().savefig(filename)
 def save_fig_threads_total(q, name, dir, color):
 	t = name + " - Nbr. Threads"	
 	pp = q.threads_overall().plot(kind='bar', color=color, title=t)
 	ts = name + "_threads.png"
 	filename = os.path.join(dir, ts)
 	pp.get_figure().savefig(filename)
 def save_fig_messages_constituency(q, name, dir):
 	t = name + " - Messages Constituency"
 	replies = pd.Series(q.replies_overall(series=True))
 	# threads = pd.Series(q.single_threads_overall(series=True))
 	threads = pd.Series(q.threads_overall(series=True))
 	messages = pd.Series(q.activity_overall(series=True))
 	single_messages = messages - (replies + threads)
 	# df = {'a': single_messages, 'b': threads, 'c': replies}
 	# df = pd.DataFrame([single_messages, threads, replies], columns=['a', 'b', 'c'])
 	df = pd.concat([single_messages.to_frame('single-messages').astype(int), threads.to_frame('threads').astype(int), replies.to_frame('replies').astype(int)], axis=1)
 	pp = df.plot(kind='bar', stacked=True, title=t)
 	# pp = [single_messages, threads, replies].plot(kind='bar', stacked=True)
 	ts = name + "_constituency.png"
 	filename = os.path.join(dir, ts)
 	pp.get_figure().savefig(filename)
 def save_fig_avg_threads_replies(q, name, dir, color):
 	t = name + " - Avg. Threads + Replies"
 	replies = pd.Series(q.replies_overall(series=True))
 	threads = pd.Series(q.threads_overall(series=True))
 	messages = pd.Series(q.activity_overall(series=True))
 	avg_threads_messages = (replies + threads) / messages
 	pp = pd.DataFrame(avg_threads_messages).plot(kind='bar', color=color, title=t)
 	ts = name + "_avg_threads_replies.png"
 	filename = os.path.join(dir, ts)
 	pp.get_figure().savefig(filename)
 def save_fig_diff_threads_replies_vs_messages(q, name, dir, color):
 	t = name + " - Diff. Threads + Replies vs Single Messages"
 	replies = pd.Series(q.replies_overall(series=True))
 	threads = pd.Series(q.threads_overall(series=True))
 	rt = replies + threads
 	messages = pd.Series(q.activity_overall(series=True))
 	diff_threads_messages =  (2 * rt) - messages
 	pp = pd.DataFrame(diff_threads_messages).plot(kind='bar', color=color, title=t)
 	ts = name + "_diff_threads_replies_messages.png"
 	filename = os.path.join(dir, ts)
 	pp.get_figure().savefig(filename)
 def save_fig_ratio_replies_threads(q, name, dir, color):
 	t = name + " - Ratio Replies per Thread"
 	replies = pd.Series(q.replies_overall(series=True))
 	threads = pd.Series(q.threads_overall(series=True))
 	ratio_replies_threads =  replies / threads
 	pp = pd.DataFrame(ratio_replies_threads).plot(kind='bar', color=color, title=t)
 	ts = name + "_ratio_replies_threads.png"
 	filename = os.path.join(dir, ts)
 	pp.get_figure().savefig(filename)
 def html_td_rank_year(year, data):
 	td_str = '<td class="td_list">'
 	if year in data:
 		td_str += analysis.format.table_threads_ranking(data[year])
 	td_str += '</td>'
 	return td_str
 def html_table_ranking_per_year(ranking_nettime, ranking_crumb, ranking_spectre, ranking_empyre):
 	html_str = '<table id="rankings">'
 	html_str += '<tr>'
 	html_str += '<td class="td_year_t">year</td>'
 	html_str += '<td class="td_list_t">nettime</td>'
 	html_str += '<td class="td_list_t">crumb</td>'
 	html_str += '<td class="td_list_t">spectre</td>'
 	html_str += '<td class="td_list_t">empyre</td>'
 	html_str += '</tr>'
 	years = sorted(ranking_nettime.keys())
 	print(years)
 	for i in years:
 		html_str += '<tr>'
 		html_str += '<td class="td_list">' + i + '</td>'
 		html_str += html_td_rank_year(i, ranking_nettime)
 		html_str += html_td_rank_year(i, ranking_crumb)
 		html_str += html_td_rank_year(i, ranking_spectre)
 		html_str += html_td_rank_year(i, ranking_empyre)
 		html_str += '</tr>'
 	html_str += '</table>'
 	return html_str
 print("nettime")
 #nettime
 nt = Archive('nettime-l')
 ntq = nt.query()
 ntp = Plot(ntq)
 # save_fig_cohort(ntq, 'nettime', 'figs/', 'red')
 # save_fig_messages_total(ntq, 'nettime', 'figs/', 'red')
 # save_fig_threads_total(ntq, 'nettime', 'figs/', 'red')
 # save_fig_messages_constituency(ntq, 'nettime', 'figs/')
 # save_fig_avg_threads_replies(ntq, 'nettime', 'figs/', 'red')
 # save_fig_diff_threads_replies_vs_messages(ntq, 'nettime', 'figs/', 'red')
 # save_fig_ratio_replies_threads(ntq, 'nettime', 'figs/', 'red')
 ranking_nettime = ntq.threads_ranking(rank=15)
 # print(r['2000'])
 # print(analysis.format.table_threads_ranking(r['2000']))
 print("crumb")
 #crumb
 cr = Archive('crumb')
 crq = cr.query()
 crp = Plot(crq)
 # save_fig_cohort(crq, 'crumb', 'figs/', 'purple')
 # save_fig_messages_total(crq, 'crumb', 'figs/', 'purple')
 # save_fig_threads_total(crq, 'crumb', 'figs/', 'purple')
 # save_fig_messages_constituency(crq, 'crumb', 'figs/')
 # save_fig_avg_threads_replies(crq, 'crumb', 'figs/', 'purple')
 # save_fig_diff_threads_replies_vs_messages(crq, 'crumb', 'figs/', 'purple')
 # save_fig_ratio_replies_threads(crq, 'crumb', 'figs/', 'purple')
 ranking_crumb = crq.threads_ranking(rank=15)
 print("empyre")
 #empyre
 em = Archive('empyre')
 emq = em.query()
 emp = Plot(emq)
 # save_fig_cohort(emq, 'empyre', 'figs/', 'darkblue')
 # save_fig_messages_total(emq, 'empyre', 'figs/', 'darkblue')
 # save_fig_threads_total(emq, 'empyre', 'figs/', 'darkblue')
 # save_fig_messages_constituency(emq, 'empyre', 'figs/')
 # save_fig_avg_threads_replies(emq, 'empyre', 'figs/', 'darkblue')
 # save_fig_diff_threads_replies_vs_messages(emq, 'empyre', 'figs/', 'darkblue')
 # save_fig_ratio_replies_threads(emq, 'empyre', 'figs/', 'darkblue')
 ranking_empyre = emq.threads_ranking(rank=15)
 print("spectre")
 #spectre
 sp = Archive('spectre')
 spq = sp.query()
 spp = Plot(spq)
 # save_fig_cohort(spq, 'spectre', 'figs/', 'slategrey')
 # save_fig_messages_total(spq, 'spectre', 'figs/', 'slategrey')
 # save_fig_threads_total(spq, 'spectre', 'figs/', 'slategrey')
 # save_fig_messages_constituency(spq, 'spectre', 'figs/')
 # save_fig_avg_threads_replies(spq, 'spectre', 'figs/', 'slategrey')
 # save_fig_diff_threads_replies_vs_messages(spq, 'spectre', 'figs/', 'slategrey')
 # save_fig_ratio_replies_threads(spq, 'spectre', 'figs/', 'slategrey')
 ranking_spectre = spq.threads_ranking(rank=15)
 ## comparative ranking
 rankings = html_table_ranking_per_year(ranking_nettime, ranking_crumb, ranking_spectre, ranking_empyre)
 html_template = 'figs/ranking/index_template.html'
 with open(html_template, 'r') as fp:
 	h = fp.read()
 html = h.replace("--table--", rankings)
 html_output = 'figs/ranking/index.html'
 with open(html_output, 'w+') as fp:
 	fp.write(html)
--- a/analysis/archive.py
+++ b/analysis/archive.py
@ -1,165 +0,0 @@
 import numpy as np
 import pandas as pd
 import email, email.parser
 import os, datetime, json, gzip, re
 import analysis.util
 import analysis.query
 import search.archive ## circular...
 def filter_date(msg, archive_name):
 	time_tz = analysis.util.format_date(msg, archive_name)
 	if not time_tz:
 		return None
 	dt = datetime.datetime.fromtimestamp(time_tz)
 	try:
 		date_time = pd.to_datetime(dt)
 	except pd.tslib.OutOfBoundsDatetime:
 		print('time out of bound')
 		print(dt)
 		return None
 	min_date = pd.to_datetime(analysis.util.min_date(archive_name), format='%d/%m/%Y')
 	max_date = pd.to_datetime(datetime.datetime.now())
 	if date_time < min_date or date_time > max_date:
 		return None
 	return date_time
 def message_to_tuple_record(msg, records, archive_name, references='X'):
 	# check date first?
 	date = filter_date(msg, archive_name)
 	if not date:
 		print("Archive::filter_date returned None. Skip.")
 		return
 	# check / filter from email address second?
 	from_addr = analysis.util.format_from(msg, archive_name)
 	if not from_addr:
 		print("Archive::analysis.util.format_from returned None. Skip.")
 		return
 	url = analysis.util.format_url(msg, archive_name)
 	author = analysis.util.format_author(msg, archive_name)
 	subject = analysis.util.format_subject(msg, archive_name)
 	message_id = analysis.util.format_id(msg, archive_name)
 	content = analysis.util.format_content(msg, archive_name)
 	records.append((message_id,
 						from_addr,
 						author,
 						subject,
 						date,
 						url,
 						len(content),
 						0 if not 'follow-up' in msg else len(msg['follow-up']),
 						references))
 	# recursive follow up -- but references is not keeping track really...
 	if 'follow-up' in msg:
 		for f in msg['follow-up']:
 			message_to_tuple_record(f, records, archive_name, references=message_id)
 	return 
 def json_data_to_pd_dataframe(json_data, archive_name):
 	records = []
 	for d in json_data:
 		for dd in d['threads']:
 			message_to_tuple_record(dd, records, archive_name)
 	print('zzzzzzzzz ----> ' + archive_name + " ---- " + str(len(records)))
 	df = pd.DataFrame.from_records(records,
 						index='date',
 						columns=['message-id',
 									'from',
 									'author',
 									'subject',
 									'date',
 									'url',
 									'content-length',
 									'nbr-references',
 									'references'])
 	df.index.name = 'date'
 	return df
 def load_from_file(filename, archive_name, archive_dir, json_data=None):
 	if not filename.endswith('.json.gz'):
 		file_path = os.path.join(archive_dir, filename + '.json.gz')
 	else:
 		file_path = os.path.join(archive_dir, filename)
 	if os.path.isfile(file_path):
 		with gzip.open(file_path, 'r') as fp:
 			json_data = json.load(fp)
 			return json_data_to_pd_dataframe(json_data['threads'], archive_name)
 	else:
 		#list of all "filename[...].json.gz" in archive_dir
 		files = sorted([f for f in os.listdir(archive_dir) if os.path.isfile(os.path.join(archive_dir, f)) and f.startswith(filename) and f.endswith('.json.gz')])
 		if files:
 			filename = files[-1] # take the most recent (listed alpha-chronological)
 			file_path = os.path.join(archive_dir, filename)
 			if os.path.isfile(file_path):
 				with gzip.open(file_path, 'r') as fp:
 					json_data = json.load(fp)
 					return json_data_to_pd_dataframe(json_data['threads'], archive_name)
 		else:
 			#list of all json files in archive_dir/filename
 			dir_path = os.path.join(archive_dir, filename)
 			if not os.path.isdir(dir_path):
 				return None
 			files = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) and f.endswith('.json')]
 			if not files:
 				return None
 			# load all json files
 			threads = []
 			for file_path in files:
 				with open(file_path, 'r') as fp:
 					json_data = json.load(fp)
 					threads.append(json_data)
 			print('---> ' + archive_name)
 			return json_data_to_pd_dataframe(threads, archive_name)
 def load_from_search_archive(archive):
 	threads = []
 	for k, v in archive.archive.items():
 		threads.append(v)
 	return json_data_to_pd_dataframe(threads, archive.archive_name)
 class Archive:
 	data = None				# "raw" json data
 	dataframe = None 		# main pd dataframe
 	def __init__(self, archive_name, archive_dir="archives"):
 		if isinstance(archive_name, pd.core.frame.DataFrame):
 			self.dataframe = archive_name ## no copies here
 		if isinstance(archive_name, search.archive.Archive):
 			self.dataframe = load_from_search_archive(archive_name)
 		if isinstance(archive_name, str):
 			# need a filename or a dir name....
 			self.dataframe = load_from_file(archive_name, archive_name, archive_dir, self.data)
 	def query(self):
 		q = analysis.query.Query(self)
 		return q
--- a/analysis/format.py
+++ b/analysis/format.py
@ -1,165 +0,0 @@
 import analysis.query
 import logging, html, numpy
 from tabulate import tabulate
 def makeurl(text, url):
 	return '<a href="' + url + '">' + text + "</a>"
 def table_threads_ranking(ranking_dataframe):
 	html_str = '<table class="threads_ranking">'
 	html_str += '<tr>'
 	html_str += '<td class="td_date_t">date</td>'
 	html_str += '<td class="td_subject_t">subject</td>'
 	html_str += '<td class="td_from_t">from</td>'
 	html_str += '<td class="td_rep_t">replies</td>'
 	html_str += '</tr>'
 	for i, row in ranking_dataframe.iterrows():
 		html_str += '<tr>'
 		html_str += '<td class="td_date">' + str(i) + '</td>'
 		html_str += '<td class="td_subject">' + makeurl(row['subject'], row['url']) + '</td>'
 		html_str += '<td class="td_from">' + row['from'] + '</td>'
 		html_str += '<td class="td_rep">' + str(row['nbr-references']) + '</td>'
 		html_str += '</tr>'
 	html_str += "</table>"
 	return html_str
 def frame_to_dictionary_threads_ranking(ranking_dataframe):
 	results = []
 	for i, row in ranking_dataframe.iterrows():
 		d = {'date': str(i), 'subject': row['subject'], 'url': row['url'], 'from': row['from'], 'nbr-references': row['nbr-references']}
 		results.append(d)
 	return results
 class Html:
 	query = None
 	def __init__(self, q=None):
 		if not isinstance(q, query.Query):
 			logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
 			raise Exception()
 		self.query = q
 	def threads_ranking(self, rank=5, resolution=None):
 		data = self.query.threads_ranking(rank=rank)
 		h = html.HTML()
 		t = h.table()
 		r = t.tr
 		r.td('date', klass='td_date_t')
 		r.td('from', klass='td_from_t')
 		r.td('replies', klass='td_rep_t')
 		r.td('subject', klass='td_subject_t')
 		for i, row in data.iterrows():
 			r = t.tr
 			print(row.index)
 			r.td(str(row['date']), klass='td_date')
 			r.td(row['from'], klass='td_from')
 			r.td(str(row['nbr-references']), klass='td_rep')
 			r.td('', klass='td_subject').text(str(h.a(row['subject'], href=row['url'])), escape=False)
 		return str(t)
 	@staticmethod
 	def from_dataframe(data_frame, table_name=None, name_map={}, url_map={}):
 		header = []
 		if data_frame.index.name in name_map:
 			header.append(name_map[data_frame.index.name])
 		else:
 			header.append(data_frame.index.name)
 		for h in data_frame.columns:
 			if h in name_map:
 				h = name_map[h]
 			header.append(h)
 		css_header = []
 		css_element = []
 		for i in header:
 			css_header.append('td_' + i + '_t')
 			css_element.append('td_' + i)
 		h = html.HTML()
 		if table_name:
 			t = h.table(id=table_name, klass=table_name + '_t')
 		else:
 			t = h.table()
 		# url map
 		url_hash = {}
 		url_skip = []
 		url_keys = url_map.keys()
 		for u in url_keys:
 			 if u in header and url_map[u] in header:
 			 	url_indx = header.index(url_map[u])
 			 	url_hash[header.index(u)] = url_indx
 			 	url_skip.append(url_indx)
 			 	header.pop(url_indx)
 		#header
 		r = t.tr
 		n = 0
 		for j in header:
 			r.td(str(j), klass=css_header[n])
 			n += 1
 		#elements		
 		for k, row in data_frame.iterrows():
 			r = t.tr
 			r.td(str(k), klass=css_element[0])
 			n = 1
 			for l in row:
 				if n in url_skip:
 					continue
 				if isinstance(l, float): 
 					if l % 1 > 0:
 						l = '{0:.4f}'.format(l)
 					else:
 						l = int(l)
 				if n in url_hash.keys():
 					url = row[url_hash[n] - 1]
 					r.td('', klass=css_element[n]).text(str(h.a(str(l), href=url)), escape=False)
 				else:
 					r.td(str(l), klass=css_element[n])
 				n += 1
 		return str(t)
 class Tab:
 	@staticmethod
 	def from_dataframe(data_frame, name_map={}, format=".0f"):
 		header = []
 		header.append(data_frame.index.name)
 		for h in data_frame.columns:
 			if h in name_map:
 				h = name_map[h]
 			header.append(h)
 		return tabulate(data_frame, headers=header, floatfmt=format)
--- a/analysis/plot.py
+++ b/analysis/plot.py
@ -1,79 +0,0 @@
 import numpy as np
 import pandas as pd
 import analysis.query
 # for colormaps see: 
 # http://scipy.github.io/old-wiki/pages/Cookbook/Matplotlib/Show_colormaps
 # http://pandas.pydata.org/pandas-docs/stable/visualization.html#colormaps
 # http://matplotlib.org/examples/color/colormaps_reference.html
 # for colors see:
 # http://matplotlib.org/examples/color/named_colors.html
 # spectre: slategrey
 # nettime: red
 # crumb: purple
 # empyre: darkblue
 def bar_plot_series(series, title, color='blueviolet', ylim=None):
 	return series.plot(kind = 'bar', title=title, color=color, alpha=0.8, stacked=True, ylim=ylim)
 def save(plot, name):
 	fig = plot.get_figure()
 	fig.savefig(name)
 class Plot:
 	query = None
 	def __init__(self, q=None):
 		if not isinstance(q, analysis.query.Query):
 			logging.error("HtmlFormat constructor Error: query must be of type analysis.query.Query")
 			raise Exception()
 		self.query = q
 	'''
 	activity
 	'''					
 	def activity_from_ranking(self, resolution='y', rank=5, colormap='spectral', figsize=(8, 7)):
 		activity_rank = self.query.activity_from_ranking(rank=rank, series=True).keys()
 		series = []
 		for k in activity_rank:
 			series.append(self.query.activity_from(k, resolution, series=True))
 		df = pd.concat(series, axis=1)
 		return df.plot.area(colormap='spectral', figsize=figsize, stacked=False)
 	'''
 	content lenght
 	'''
 	def content_length_from_ranking(self, resolution='y', rank=5, colormap='spectral', figsize=(8, 7)):
 		content_rank = self.query.content_length_from_ranking(rank=rank, series=True).keys()
 		series = []
 		for k in content_rank:
 			series.append(self.query.content_length_from(k, resolution, series=True))
 		df = pd.concat(series, axis=1)
 		return df.plot.area(colormap=colormap, figsize=figsize, stacked=False)		
 	'''
 	threads
 	'''			
 	def threads_from_ranking(self, resolution='y', rank=5, colormap='spectral', figsize=(8, 7)):
 		threads_rank = self.query.threads_from_ranking(rank=rank, series=True).keys()
 		series = []
 		for k in threads_rank:
 			series.append(self.query.threads_from(k, resolution, series=True))
 		df = pd.concat(series, axis=1)
 		return df.plot.area(colormap=colormap, figsize=figsize, stacked=False)
--- a/analysis/query.py
+++ b/analysis/query.py
@ -1,573 +0,0 @@
 import numpy as np
 import pandas as pd
 import analysis.archive
 import logging
 class Query:
 	archive = None			# analysis.archive.Archive object
 	activity = None			# (very) sparse dataframe (index=date(month), columns=from, values=activity(month))
 	content_length = None	# (very) sparse dataframe (index=date(month), columns=from, values=content-length(month in bytes))
 	threads = None			# ...
 	single_threads = None
 	replies = None			# ...
 	def __init__(self, arch=None):
 		if not isinstance(arch, analysis.archive.Archive):
 			logging.error("Query constructor Error: arch must be of type analysis.archive.Archive")
 			raise Exception()
 		self.archive = arch
 	'''
 	activity
 	'''			
 	def _activity(self):
 		if self.activity is None:
 			from_index = self.archive.dataframe.reindex(columns=['from'])
 			self.activity = from_index.groupby([pd.TimeGrouper(freq='M'), 'from']).size().unstack('from').fillna(0)
 		return self.activity
 	def activity_from(self, email_address, resolution='y', series=False):
 		eaddr = email_address.replace('@', '{at}').lower()
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None		
 		self._activity()
 		try:
 			af = self.activity[eaddr]			
 		except KeyError:
 			return None
 		activity_from = af.groupby([pd.TimeGrouper(freq=freq)]).sum()
 		if freq == 'AS':
 			activity_from.index = activity_from.index.format(formatter=lambda x: x.strftime('%Y'))
 			activity_from.index.name = 'year'
 		else:
 			activity_from.index = activity_from.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 			activity_from.index.name = 'year-month'
 		if series:
 			return activity_from
 		return activity_from.to_frame('nbr-messages').astype(int)
 	def activity_from_ranking(self, rank=5, filter_nettime=True, series=False):
 		self._activity()
 		afr = self.activity.sum(axis=0).order(ascending=False)
 		if filter_nettime:
 			p = r'^((?!nettime*).)*$'
 			afr = afr[afr.index.str.contains(p)]
 		if series:
 			return afr[:rank]
 		return afr[:rank].to_frame('nbr-messages').astype(int)	
 	# def activity_overall(self, resolution='y', series=False):
 	# 	freq = 'M'
 	# 	if resolution.lower() == 'y':
 	# 		freq = 'AS'
 	# 	elif resolution.lower() == 'm':
 	# 		freq = 'M'
 	# 	else:
 	# 		return None
 	# 	self._activity()
 	# 	y = self.activity.sum(axis=1)
 	# 	y = y.groupby([pd.TimeGrouper(freq=freq)]).sum()
 	# 	if freq == 'AS':
 	# 		y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
 	# 		y.index.name = 'year'
 	# 	else:
 	# 		y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 	# 		y.index.name = 'year-month'
 	# 	if series:
 	# 		return y
 	# 	return y.to_frame('nbr-messages').astype(int)
 	def activity_overall(self, resolution='y', series=False):
 		a = self.archive.dataframe['url']
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		y = self.archive.dataframe['url'].groupby([pd.TimeGrouper(freq=freq)]).count()
 		if freq == 'AS':
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
 			y.index.name = 'year'
 		else:
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 			y.index.name = 'year-month'
 		if series:
 			return y
 		return y.to_frame('nbr-messages').astype(int)
 	def cohort(self, resolution='m', series=False):
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		self._activity()
 		c = self.activity.idxmax().order().to_frame('date')
 		c.index = c['date']
 		cohort = c.groupby([pd.TimeGrouper(freq=freq)]).size()
 		if freq == 'AS':
 			cohort.index = cohort.index.format(formatter=lambda x: x.strftime('%Y'))
 			cohort.index.name = 'year'
 		else:
 			cohort.index = cohort.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 			cohort.index.name = 'year-month'
 		if series:
 			return cohort
 		return cohort.to_frame('first-messages').astype(int)
 	'''
 	content lenght
 	'''
 	def _content_length(self):
 		if self.content_length is None:
 			from_content_index = self.archive.dataframe.reindex(columns=['from', 'content-length'])
 			self.content_length = from_content_index.groupby([pd.TimeGrouper(freq='M'), 'from']).sum()
 			self.content_length = self.content_length.reset_index().pivot(columns='from', index='date', values='content-length').fillna(0)
 		return self.content_length
 	def content_length_from(self, email_address, resolution='y', series=False):
 		eaddr = email_address.replace('@', '{at}').lower()
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None		
 		self._content_length()
 		try:
 			af = self.content_length[eaddr]			
 		except KeyError:
 			return None
 		content_length_from = af.groupby([pd.TimeGrouper(freq=freq)]).sum()
 		if freq == 'AS':
 			content_length_from.index = content_length_from.index.format(formatter=lambda x: x.strftime('%Y'))
 			content_length_from.index.name = 'year'
 		else:
 			content_length_from.index = content_length_from.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 			content_length_from.index.name = 'year-month'
 		if series:
 			return content_length_from
 		return content_length_from.to_frame('nbr-bytes').astype(int)
 	def content_length_from_ranking(self, resolution='y', rank=5, filter_nettime=True, series=False):
 		self._content_length()
 		cfr = self.content_length.sum(axis=0).order(ascending=False)
 		if filter_nettime:
 			p = r'^((?!nettime*).)*$'
 			cfr = cfr[cfr.index.str.contains(p)]
 		if series:
 			return cfr[:rank]
 		return cfr[:rank].to_frame('nbr-bytes').astype(int)
 	def content_length_overall(self, resolution='y', series=False):
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		self._content_length()
 		y = self.content_length.sum(axis=1)
 		y = y.groupby([pd.TimeGrouper(freq=freq)]).sum()
 		if freq == 'AS':
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
 			y.index.name = 'year'
 		else:
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 			y.index.name = 'year-month'
 		if series:
 			return y
 		return y.to_frame('nbr-bytes').astype(int)
 	'''
 	threads
 	'''			
 	def _threads(self, thresh=0):
 		print("doing threads")
 		if self.threads is None:
 			self.threads = self.archive.dataframe[self.archive.dataframe['nbr-references'] > thresh].reindex(columns=['from','nbr-references','subject', 'url', 'message-id']).sort_values('nbr-references', ascending=False)
 		if self.single_threads is None:
 			self.single_threads = self.archive.dataframe[(self.archive.dataframe['references'] == 'X') & (self.archive.dataframe['nbr-references'] > thresh)].reindex(columns=['from','nbr-references','subject', 'url', 'message-id']).sort_values('nbr-references', ascending=False)
 		return self.threads;
 	def threads_ranking(self, rank=5, resolution='y'):
 		self._threads()
 		if resolution == None:
 			data = self.threads.drop('message-id', axis=1)[:rank]
 			return data.reindex_axis(['subject', 'from', 'nbr-references', 'url'], axis=1)
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		# get the threads ranking per time resolution
 		# 
 		data = self.threads.drop('message-id', axis=1)
 		data = data.groupby([pd.TimeGrouper(freq=freq)])
 		r = {}
 		for k, v in data:
 			if freq == 'AS':
 				time_key = k.strftime('%Y')
 			else:
 				time_key = k.strftime('%Y-%m')
 			frame = v[:rank]
 			frame = frame.reindex_axis(['subject', 'from', 'nbr-references', 'url'], axis=1)
 			r[time_key] = frame
 		return r
 	def threads_replies_to(self, email_address, resolution='y', series=False):
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		self._threads()
 		eaddr = email_address.replace('@', '{at}').lower()
 		self._threads()
 		threads_from = self.threads.reindex(columns=['from', 'nbr-references'])
 		threads_from_ranking = threads_from.groupby([pd.TimeGrouper(freq=freq), 'from']).sum()  # <-- sum = adding up nbr references 
 		threads_from_ranking = threads_from_ranking.reset_index().pivot(columns='from', index='date', values='nbr-references').fillna(0)
 		if series:
 			return threads_from_ranking[eaddr]
 		threads_from_ranking = threads_from_ranking[eaddr].to_frame('nbr-threads').astype(int)
 		if freq == 'AS':
 			threads_from_ranking.index = threads_from_ranking.index.format(formatter=lambda x: x.strftime('%Y'))
 			threads_from_ranking.index.name = 'year'
 		else:
 			threads_from_ranking.index = threads_from_ranking.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 			threads_from_ranking.index.name = 'year-month'
 		return threads_from_ranking
 	def threads_replies_to_ranking(self, rank=5, filter_nettime=True):
 		self._threads()
 		tfr = self.threads.reindex(columns=['from', 'nbr-references']).groupby('from').sum().sort_values('nbr-references', ascending=False)
 		if filter_nettime:
 			p = r'^((?!nettime*).)*$'
 			tfr = tfr[tfr.index.str.contains(p)]
 		tfr = tfr[:rank].astype(int)
 		return tfr
 	def threads_initiated_from_ranking(self, rank=5, filter_nettime=True, series=False):
 		self._threads()
 		tir = self.threads.reindex(columns=['from']).groupby('from').size().sort_values(ascending=False)
 		if filter_nettime:
 			p = r'^((?!nettime*).)*$'
 			tir = tir[tir.index.str.contains(p)]
 		if series:
 			return tir[:rank]
 		return tir[:rank].to_frame('nbr-initiated-threads').astype(int)
 	def threads_activity_threads_initiated_avg_ranking(self, rank=5, filter_nettime=True):
 		# activity
 		self._activity()
 		afr = self.activity.sum(axis=0).astype(int)
 		if filter_nettime:
 			p = r'^((?!nettime*).)*$'
 			afr = afr[afr.index.str.contains(p)]
 		# initiated threads [top 25]
 		self._threads()
 		tir = self.threads.reindex(columns=['from']).groupby('from').size().sort_values(ascending=False)[:25] # <-- top 25
 		if filter_nettime:
 			p = r'^((?!nettime*).)*$'
 			tir = tir[tir.index.str.contains(p)]
 		inter = afr.index.intersection(tir.index)
 		avg = tir[inter] / afr[inter]
 		labels = ['messages', 'threads', 'avg.threads']
 		return pd.concat([afr[avg.index], tir[avg.index], avg], axis=1, keys=labels).sort_values('avg.threads', ascending=False)[:rank]
 	def threads_initiated_replies_avg_ranking(self, rank=5, filter_nettime=True):
 		self._threads()
 		#initiated
 		tir = self.threads.reindex(columns=['from']).groupby('from').size().sort_values(ascending=False)
 		if filter_nettime:
 			p = r'^((?!nettime*).)*$'
 			tir = tir[tir.index.str.contains(p)]
 		#replies [top 25]
 		tfr = self.threads.reindex(columns=['from', 'nbr-references']).groupby('from').sum().sort_values('nbr-references', ascending=False)[:25] # <-- top 25
 		if filter_nettime:
 			p = r'^((?!nettime*).)*$'
 			tfr = tfr[tfr.index.str.contains(p)]
 		tfr = tfr['nbr-references']			# dataframe to series
 		inter = tir.index.intersection(tfr.index)
 		avg = tfr[inter] / tir[inter] 
 		labels = ['threads', 'replies', 'avg.replies']
 		return pd.concat([tir[avg.index], tfr[avg.index], avg], axis=1, keys=labels).sort_values('avg.replies', ascending=False)[:rank]
 	def threads_overall(self, resolution='y', aggregate='count', series=False, tresh=0):
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		agg = aggregate.lower()
 		if not agg in ['sum', 'mean', 'count']:
 			return None
 		if not self.threads is None:
 			del self.threads
 			self.threads = None
 		self._threads(tresh)
 		if agg == 'sum':
 			# number of replies total (re: sum all the replies)
 			y = self.threads.groupby([pd.TimeGrouper(freq=freq)]).sum()
 		elif agg == 'mean':
 			y = self.threads.groupby([pd.TimeGrouper(freq=freq)]).mean()
 		else:
 			# number of threads (re: msgs with at least one reply)
 			y = self.threads['nbr-references'].groupby([pd.TimeGrouper(freq=freq)]).count()
 		if freq == 'AS':
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
 			y.index.name = 'year'
 		else:
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 			y.index.name = 'year-month'
 		if series:
 			return y
 		return y.to_frame('nbr-threads').astype(int)
 	def single_threads_overall(self, resolution='y', aggregate='sum', series=False, tresh=1):
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		agg = aggregate.lower()
 		if not agg in ['sum', 'mean', 'count']:
 			return None
 		if not self.single_threads is None:
 			del self.single_threads
 			self.single_threads = None
 		self._threads(tresh)
 		y = self.single_threads['nbr-references'].groupby([pd.TimeGrouper(freq=freq)]).count()
 		if freq == 'AS':
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
 			y.index.name = 'year'
 		else:
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))			
 			y.index.name = 'year-month'
 		if series:
 			return y
 		return y.to_frame('nbr-threads').astype(int)
 	'''
 	replies
 	'''
 	def _replies(self):
 		if self.replies is None:
 			self.replies = self.archive.dataframe[self.archive.dataframe['references'] != 'X'].reindex(columns=['from','references'])
 			self.non_replies = self.archive.dataframe[self.archive.dataframe['references'] == 'X'].reindex(columns=['from','references'])
 		return self.replies;
 	def replies_ranking(self, rank=5, resolution=None):
 		self._replies()
 		if resolution == None:
 			data = self.replies.groupby('from').size().sort_values(ascending=False)[:rank]
 			return data.to_frame('nbr_replies')
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		# get the threads ranking per time resolution
 		# 
 		data = self.replies.groupby([pd.TimeGrouper(freq=freq)])
 		r = {}
 		for k, v in data:
 			if freq == 'AS':
 				time_key = k.strftime('%Y')
 			else:
 				time_key = k.strftime('%Y-%m')
 			frame = v.groupby('from').size().sort_values(ascending=False)[:rank]
 			r[time_key] = frame.to_frame('nbr-replies')
 		return r
 	def replies_avg_ranking(self, rank=5, filter_nettime=True):
 			# activity
 			self._activity()
 			afr = self.activity.sum(axis=0)
 			if filter_nettime:
 				p = r'^((?!nettime*).)*$'
 				afr = afr[afr.index.str.contains(p)]
 			# replies in thread [top 25]
 			self._replies()
 			rpl = data = self.replies.groupby('from').size().sort_values(ascending=False)[:25]
 			inter = afr.index.intersection(rpl.index)
 			avg = rpl[inter] / afr[inter]
 			labels = ['messages', 'replies', 'avg.replies']
 			return pd.concat([afr[avg.index], rpl[avg.index], avg], axis=1, keys=labels).sort_values('avg.replies', ascending=False)[:rank]
 	def replies_overall(self, resolution='y', series=False):
 		freq = 'M'
 		if resolution.lower() == 'y':
 			freq = 'AS'
 		elif resolution.lower() == 'm':
 			freq = 'M'
 		else:
 			return None
 		if not self.replies is None:
 			del self.replies
 			self.replies = None
 		self._replies()
 		y = self.replies['references'].groupby([pd.TimeGrouper(freq=freq)]).count()
 		if freq == 'AS':
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
 			y.index.name = 'year'
 		else:
 			y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))
 			y.index.name = 'year-month'
 		if series:
 			return y
 		return y.to_frame('nbr-replies').astype(int)
--- a/analysis/util.py
+++ b/analysis/util.py
@ -1,92 +0,0 @@
 import email
 import hashlib
 def format_content(msg, archive_name):
 	return msg['content']
 def format_url(msg, archive_name):
 	return msg['url']
 def format_author(msg, archive_name):
 	return msg['author_name']
 def format_from_token(from_str, sep):
 	fff = from_str
 	from_addr = email.utils.parseaddr(from_str)[1]
 	fffa = email.utils.parseaddr(from_str)
 	if sep not in from_addr:
 		tok = from_str.split()
 		try:
 			at = tok.index(sep)
 			from_addr = ''.join([tok[at-1], '{AT}', tok[at+1]])
 			if from_addr.startswith('<') or from_addr.endswith('>'):
 				from_addr = from_addr.strip('<').strip('>')
 		except ValueError:
 			print(tok)
 			print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
 			print("*** " + fff)
 			print("+++")
 			print(fffa)
 			print("----")
 			return None
 	else:
 		from_addr = from_addr.replace(sep, '{AT}')
 	return from_addr.lower()
 def format_from(msg, archive_name):
 	from_str = msg['from']	
 	if " {AT} " in from_str:
 		return format_from_token(from_str, '{AT}')
 	elif " at " in from_str:
 		return format_from_token(from_str, 'at')
 	elif "@" in from_str:
 		return format_from_token(from_str, '@')
 	else:
 		return from_str
 # returns utc timestamp
 def format_date(msg, archive_name):
 	date_str = msg['date']
 	time_tz = None
 	try:
 		date_tz = email.utils.parsedate_tz(date_str)
 		time_tz = email.utils.mktime_tz(date_tz) #utc timestamp
 	except TypeError:
 		print("Format Date TypeError")
 		print("  > " + date_str)
 		return None
 	except ValueError:
 		print("Format Date ValueError")
 		print("  > " + date_str)
 		return None
 	finally:
 		return time_tz
 def format_subject(msg, archive_name):
 	return msg['subject']
 def format_id(msg, archive_name):
 	if "message-id" in msg:
 		return msg['message-id']
 	else:
 		# create hash with author_name + date
 		s = msg['author_name'] + msg['date']
 		sha = hashlib.sha1(s.encode('utf-8'))
 		return sha.hexdigest()
 # format='%d/%m/%Y'
 def min_date(archive_name):
 	if "nettime" in archive_name:
 		return '01/10/1995'
 	elif archive_name == "spectre":
 		return '01/08/2001'
 	elif archive_name == "empyre":
 		return '01/01/2002'
 	elif archive_name == "crumb":
 		return '01/02/2001'
--- a/archive/init.py
+++ b/archive/init.py
--- a/archive/archive.py
+++ b/archive/archive.py
@ -0,0 +1,257 @@
 import email, email.parser
 import os, json, gzip, re
 import mysql.connector as mariadb
 import archive.sql, archive.util
 from datetime import date, datetime
 from dateutil import parser
 import terminal.progress
 def load_from_file(filename, archive_name, archive_dir):
 	if not filename.endswith('.json.gz'):
 		file_path = os.path.join(archive_dir, filename + '.json.gz')
 	else:
 		file_path = os.path.join(archive_dir, filename)
 	if os.path.isfile(file_path):
 		with gzip.open(file_path, 'r') as fp:
 			json_data = json.load(fp)
 			return (json_data, archive_name)
 	else:
 		#list of all "filename[...].json.gz" in archive_dir
 		files = sorted([f for f in os.listdir(archive_dir) if os.path.isfile(os.path.join(archive_dir, f)) and f.startswith(filename) and f.endswith('.json.gz')])
 		if files:
 			filename = files[-1] # take the most recent (listed alpha-chronological)
 			file_path = os.path.join(archive_dir, filename)
 			if os.path.isfile(file_path):
 				with gzip.open(file_path, 'r') as fp:
 					json_data = json.load(fp)
 					return (json_data, archive_name) 					# <--- this makes no sense....
 		else:
 			#list of all json files in archive_dir/filename
 			dir_path = os.path.join(archive_dir, filename)
 			if not os.path.isdir(dir_path):
 				return None
 			files = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) and f.endswith('.json')]
 			if not files:
 				return None
 			# load all json files
 			threads = []
 			for file_path in files:
 				with open(file_path, 'r') as fp:
 					json_data = json.load(fp)
 					threads.append(json_data)
 			return (threads, archive_name)
 def connect_db(database, host, user, password):
 	try:
 		con = mariadb.connect(host=host, user=user, password=password, database=database)
 	except mariadb.Error as error:
 		print("Error: {}".format(error))
 		if error.errno == 1049:
 			if util.y_n_question("Table " + archive_name + " does not exist. Create it?"):
 				print("creating")
 			else: 
 				print("not creating")
 		return None
 	finally:
 		return con
 class Archive:
 	data = None	# "raw" json data
 	db_con = None
 	def __init__(self, archive_name, archive_dir):
 		if isinstance(archive_name, str):
 			# need a filename or a dir name....
 			print("reading archive " + archive_name, end='')
 			(self.data, self.archive_name) = load_from_file(archive_name, archive_name, archive_dir)
 			print(" - done.")
 	def __init__(self, archive_name, database, host, user, password):
 		self.archive_name = archive_name
 		self.db_con = connect_db(database, host, user, password)
 	def __init__(self, archive_name, config):
 		self.archive_name = archive_name
 		self.db_con = connect_db(config['database'], config['host'], config['user'], config['password'])
 	def __enter__(self):
 		return self
 	def __exit__(self, exc_type, exc_value, traceback):
 		if self.db_con is not None:
 			self.db_con.close()
 	def create_db(self, host, database, user, password):
 		print("creating table: " + self.archive_name, end='')
 		self.db_con = connect_db(database, host, user, password)
 		if self.db_con is None:
 			return
 		try:			
 			cursor = self.db_con.cursor()
 			cursor.execute(archive.sql.CREATE.format(self.archive_name))
 		except mariadb.Error as error:
 			print("Error: {}".format(error))
 		finally:
 			cursor.close()
 		print(" - done.")
 	def insert_db(self, host, database, user, password):
 		self.db_con = connect_db(database, host, user, password)
 		if self.db_con is None:
 			return		
 		try:
 			cursor = self.db_con.cursor()
 			progress = terminal.progress.ProgressBar(self.archive_name, len(self.data), fmt=terminal.progress.ProgressBar.FULL)
 			for t in self.data:
 				n_inserted = self.recursive_insert_db(cursor, t["threads"])
 				# print(" - insert: " + str(n_inserted), end='')
 				if n_inserted > 0:
 					self.db_con.commit()
 				progress.current += 1
 				progress()
 			progress.done()
 			self.db_con.commit()
 		except mariadb.Error as error:
 			pass
 			# print("Error: {}".format(error))
 		finally:
 			cursor.close()
 	def recursive_insert_db(self, cursor, thread):
 		n_inserted = 0
 		for m in thread:
 			try:
 				from_ = archive.util.format_from(m)
 				author_name_ = archive.util.format_author(m)
 				to_ = archive.util.format_to(m)					
 				date_ = archive.util.format_date(m, self.archive_name)
 				if date_ is None or from_ is None:
 					# print("\nerrorororororo")
 					# print(m['from'] + " -- " + m['date'])
 					continue
 				cursor.execute(archive.sql.INSERT, (from_,author_name_,to_,m["subject"],date_,m["content-type"],m["content"],m["url"]))
 				n_inserted += 1
 				if "follow-up" in m:
 					n_inserted += self.recursive_insert_db(cursor, m["follow-up"])
 			except mariadb.Error as error:
 				if error.errno == 1062:
 					#duplication continue  <------------------------- look this up... 
 					# print("\nError: {}".format(error))
 					continue
 		return n_inserted
 	def content_search(self, term, bool=True):
 		if self.db_con is None:
 			print("Not connection to database...")
 			return
 		try:
 			cursor = self.db_con.cursor(buffered=True)
 			if bool:
 				cursor.execute(archive.sql.CONTENT_QUERY_BOOLEAN.format(self.archive_name, term))
 			else:
 				cursor.execute(archive.sql.CONTENT_QUERY.format(self.archive_name, term))
 			# print(cursor.rowcount)
 			results = []
 			for (from_, author_name_, subject_, date_, url_) in cursor:
 				results.append((from_, author_name_, subject_, date_, url_))
 				# print("{} {} {}".format(from_, str(date_), url_))
 			return results
 		except mariadb.Error as error:
 			print("Error: {}".format(error))
 		finally:
 			cursor.close()
 	def from_search(self, term, bool=True):
 		if self.db_con is None:
 			print("Not connection to database...")
 			return
 		try:
 			cursor = self.db_con.cursor(buffered=True)
 			if bool:
 				cursor.execute(archive.sql.FROM_QUERY_BOOLEAN.format(self.archive_name, term))
 			else:
 				cursor.execute(archive.sql.FROM_QUERY.format(self.archive_name, term))
 			# print(cursor.rowcount)
 			results = []
 			for (from_, author_name_, subject_, date_, url_) in cursor:
 				results.append((from_, author_name_, subject_, date_, url_))
 				# print("{} {} {}".format(from_, str(date_), url_))
 			return results
 		except mariadb.Error as error:
 			print("Error: {}".format(error))
 		finally:
 			cursor.close()
 	# analysis
 	def longest_field(self, field, thread, max_length=0):
 		import archive.util
 		for m in thread:
 			if not field in m:
 				if "threads" in m:
 					max_length = self.longest_field(field, m["threads"], max_length)
 				continue
 			if m[field] is None:
 				continue
 			if field == "from":
 				m[field] = archive.util.format_from(m)
 			elif field == "author_name":
 				m[field] = archive.util.format_author(m)
 			elif field == "to":
 				m[field] = archive.util.format_to(m)
 			elif field == "date":
 				m[field] = str(archive.util.format_date(m, self.archive_name))
 			if m[field] is None:
 				continue
 			l = len(m[field])
 			if l > max_length:
 				max_length = l
 				print(">> " + m[field])
 			if "follow-up" in m:
 				max_length = self.longest_field(field, m["follow-up"], max_length)
 		return max_length
--- a/archive/sql.py
+++ b/archive/sql.py
@ -0,0 +1,31 @@
 CREATE = "CREATE TABLE `{}` (" \
 	"`from_` varchar(85) NOT NULL," \
 	"`author_name_` varchar(200) NOT NULL," \
 	"`to_` text(60)," \
 	"`subject_` varchar(3500) NOT NULL," \
 	"`date_` datetime NOT NULL," \
 	"`content_type_` varchar(15) NOT NULL," \
 	"`content_` mediumtext NOT NULL," \
 	"`url_`	varchar(100) NOT NULL," \
 "PRIMARY KEY(`from_`, `date_`)," \
 "FULLTEXT (`subject_`, `content_`)," \
 "FULLTEXT (`from_`, `author_name_`)" \
 ") ENGINE = InnoDB;"
 INSERT = ("INSERT INTO nettime_l" 
 	"(from_, author_name_, to_, subject_, date_, content_type_, content_, url_) "
 	"VALUES (%s, %s, %s, %s, %s, %s, %s, %s)")
 CONTENT_QUERY_BOOLEAN = ("SELECT from_, author_name_, subject_, date_, url_ FROM {} "
 	"WHERE MATCH(subject_, content_) AGAINST('{}' IN BOOLEAN MODE) ORDER BY date_")
 CONTENT_QUERY_NL = ("SELECT from_, author_name_, subject_, date_, url_ FROM {} "
 	"WHERE MATCH(subject_, content_) AGAINST('{}') ORDER BY date_")
 FROM_QUERY_BOOLEAN = ("SELECT from_, author_name_, subject_, date_, url_ FROM {} "
 	"WHERE MATCH(from_, author_name_) AGAINST('{}' IN BOOLEAN MODE) ORDER BY date_")
 FROM_QUERY_NL = ("SELECT from_, author_name_, subject_, date_, url_ FROM {} "
 	"WHERE MATCH(from_, author_name_) AGAINST('{}') ORDER BY date_")
 # SELECT from_, author_name_, subject_, date_, url_ FROM nettime_l WHERE MATCH(content_) AGAINST('%s' IN BOOLEAN MODE)
--- a/archive/util.py
+++ b/archive/util.py
@ -0,0 +1,225 @@
 import email, datetime, sys
 import hashlib
 import dateparser
 def format_content(msg):
 	return msg['content']
 def format_url(msg):
 	return msg['url']
 def format_author(msg):
 	if 'author_name' not in msg or msg['author_name'] is None:
 		return None
 	author_str = msg['author_name'].replace('"', '')
 	if "by way of" in author_str:
 		toks = author_str.split("by way of")
 		if toks[0] == "":
 			author_str = format_from(msg)
 		elif toks[0][-1] == "(":
 			author_str = toks[0][:-1].strip()
 		else:
 			author_str = toks[0]
 	if ("(" in author_str) or ("<" in author_str):
 		# ex. zx {AT} xyz.net (Michel Foucault) OR Michel Foucault (c'estcommeca.com) OR Michel Foucault <zx {AT} xyz.net>	
 		# print("±±±±±±")
 		# print("name: " + author_str)
 		# print("from: " + msg['from'])			
 		if not '@' in author_str.lower().replace('{at}', '@').replace(' at ', '@'):
 			author_str = author_str.split('(')[0].strip()
 		else:
 			author_str = email.utils.parseaddr(author_str)[0]
 		# print("	Name:" + author_str.replace('"', ''))
 		# print("	From:" + format_from(msg))
 	if " ," in author_str:
 		# nettime's_roving_reporter ,       thing.net {AT} bbs.thing.net
 	 	author_str = author_str.split(' ,')[0]
 	return author_str
 def format_from_token(from_str, sep):
 	from_addr = email.utils.parseaddr(from_str)[1]
 	if sep not in from_addr:
 		tok = from_str.split()		
 		try:
 			at = tok.index(sep)
 			from_addr = ''.join([tok[at-1], '{AT}', tok[at+1]])
 			if from_addr.startswith('<') or from_addr.endswith('>'):
 				from_addr = from_addr.strip('<').strip('>')
 		except ValueError:
 			print(tok)
 			print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
 			return None
 	else:
 		from_addr = from_addr.replace(sep, '{AT}')
 	return "".join(from_addr.lower().split())
 def format_from(msg):
 	if 'from' not in msg or msg['from'] is None:
 		return None
 	from_str = msg['from']
 	if " {AT} " in from_str:
 		return format_from_token(from_str, '{AT}')
 	elif " at " in from_str:
 		return format_from_token(from_str, 'at')
 	elif "@" in from_str:
 		return format_from_token(from_str, '@')
 	else:
 		return "".join(from_str.split())
 def format_to(msg):
 	if "to" not in msg or msg["to"] is None:
 		return None
 	to_str = msg["to"]
 	toks = email.utils.parseaddr(to_str)
 	# print(toks)
 	if len(toks) == 2:
 		to_str = toks[1]
 	return "".join(to_str.lower().split()) 
 # returns utc timestamp --- old...
 def format_date_utc(msg, archive_name):
 	if 'date' not in msg or msg['date'] is None:
 		return None
 	date_str = msg['date'].replace('.', '')
 	time_tz = None
 	try:
 		date_tz = email.utils.parsedate_tz(date_str)
 		time_tz = email.utils.mktime_tz(date_tz) #utc timestamp
 	except TypeError:
 		print("Format Date TypeError")
 		print("  > " + date_str)
 		return None
 	except ValueError:
 		print("Format Date ValueError")
 		print("  > " + date_str)
 		return None
 	finally:
 		return time_tz
 def format_date(msg, archive_name):
 	if 'date' not in msg or msg['date'] is None:
 		return None
 	# date_str = msg['date'].replace('.', '')
 	date_str = msg['date']
 	# fix Thu, 01 Aug 2002 17:33:08 +0900 (JST)
 	if '(' in date_str:
 		date_str = date_str.split('(')[0].rstrip()
 	date_time = dateparser.parse(date_str)
 	if date_time is None:
 		# random stuff...
 		fix = False
 		toks = date_str.split()
 		if len(toks[-1]) == 5 or len(toks[-1]) == 4:
 			# ex. Thu, 24 Jan 2002 15:21:31 -0000
 			if toks[-1] in ['+0000', '-0000', '0000']:
 				date_str = date_str[:-5]
 				fix = True
 			# ex. Fri, 25 Jan 2002 13:21:49 +1050
 			elif toks[-1][-2] == '5':
 				d = list(date_str)
 				d[-2] = '3'
 				date_str = "".join(d)
 				fix = True
 		if toks[-1][-1] != '0':
 			#ex. 'Fri,', '20', 'Jun', '1997', '02:58:59', '-0005' 
 			date_str = date_str[:-5]
 			fix = True
 		if 'Fru' in toks[0]:
 			date_str = date_str.replace('Fru', 'Fri')
 			fix = True
 		elif 'Thur' in toks[0]:
 			date_str = date_str.replace('Thur', 'Thu')
 			fix = True
 		if not fix:
 			# print("----")
 			return None
 		date_time = dateparser.parse(date_str)
 		if date_time is None:
 			if 'GMT' in date_str:
 				# ex. 'Mon,', '15', 'Jan', '96', '02:55', 'GMT+0100'
 				date_str = date_str.split('GMT')[0].rstrip()
 				fix = True
 			if 'METDST' in toks[-1]:
 				# ex. 'Sat,', '3', 'May', '97', '21:07', 'METDST'
 				date_str = date_str.replace('METDST', 'MET')
 				fix = True
 			if not fix:
 				# print("++++")
 				return None
 			date_time = dateparser.parse(date_str)
 			return date_time
 	# else:
 	# 	print(date_str)
 	# date_time = datetime.datetime.fromtimestamp(time_tz)
 	min_d = datetime.datetime.strptime(min_date(archive_name), "%d/%m/%Y")
 	max_d = datetime.datetime.now()
 	date_time_naive = date_time.replace(tzinfo=None)
 	if date_time_naive < min_d or date_time_naive > max_d:
 		return None
 	return date_time		
 def format_subject(msg, archive_name):
 	if 'subject' not in msg or msg['subject'] is None:
 		return None
 	return msg['subject']
 def format_id(msg, archive_name):
 	if "message-id" in msg:
 		return msg['message-id']
 	else:
 		# create hash with author_name + date
 		s = msg['author_name'] + msg['date']
 		sha = hashlib.sha1(s.encode('utf-8'))
 		return sha.hexdigest()
 # format='%d/%m/%Y'
 def min_date(archive_name):
 	if "nettime" in archive_name:
 		return '01/10/1995'
 	elif archive_name == "spectre":
 		return '01/08/2001'
 	elif archive_name == "empyre":
 		return '01/01/2002'
 	elif archive_name == "crumb":
 		return '01/02/2001'
--- a/conda_env.yml
+++ b/conda_env.yml
@ -1,29 +1,34 @@
-name: listservs
+name: listserv
 channels:
- defaults
+  - defaults
 dependencies:
- beautiful-soup=4.3.2=py34_0
+  - ca-certificates=2019.5.15=0
- click=6.7=py34_0
+  - openssl=1.0.2s=h1de35cc_0
- flask=0.12=py34_0
+  - pip=9.0.1=py34_1
- gunicorn=19.1.0=py34_0
+  - python=3.4.5=0
- html5lib=0.999=py34_0
+  - readline=6.2=2
- itsdangerous=0.24=py34_0
+  - setuptools=27.2.0=py34_0
- jinja2=2.9.6=py34_0
+  - sqlite=3.13.0=0
- markupsafe=0.23=py34_2
+  - tk=8.5.18=0
- openssl=1.0.2l=0
+  - wheel=0.29.0=py34_0
- pastedeploy=1.5.2=py34_1
+  - xz=5.2.4=h1de35cc_4
- pip=9.0.1=py34_1
+  - zlib=1.2.11=h1de35cc_3
- python=3.4.5=0
+  - pip:
- readline=6.2=2
+    - beautifulsoup4==4.7.1
- setuptools=27.2.0=py34_0
+    - click==7.0
- six=1.10.0=py34_0
+    - dateparser==0.7.1
- sqlite=3.13.0=0
+    - flask==1.0.4
- tk=8.5.18=0
+    - gunicorn==19.9.0
- werkzeug=0.11.15=py34_0
+    - itsdangerous==1.1.0
- wheel=0.29.0=py34_0
+    - jinja2==2.10.1
- xz=5.2.2=1
+    - markupsafe==1.1.1
- zlib=1.2.8=3
+    - mysql-connector-python==8.0.16
- pip:
+    - protobuf==3.8.0
-  - beautifulsoup4==4.3.2
+    - python-dateutil==2.8.0
-  - webencodings==0.5.1
+    - pytz==2019.1
    - regex==2019.6.8
    - six==1.12.0
    - soupsieve==1.9.2
    - tzlocal==1.5.1
    - werkzeug==0.15.4
--- a/search/archive.py
+++ b/search/archive.py
@ -1,150 +0,0 @@
 import logging, os, json, re
 from datetime import datetime
 import analysis.archive ## circular...
 import analysis.query
 import analysis.format
 import threading
 class Archive():
 	def __init__(self, archives_dir=None):
 		if archives_dir==None:
 			from www import config
 			self.archives_dir = config.ARCHIVES_PATH
 		else:
 			self.archives_dir = archives_dir
 		self.loaded = False
 		self.lock_search = threading.Lock()
 		self.lock_threads_ranking = threading.Lock()
 	def load(self, archive_name=None):
 		if archive_name == None:
 			raise Exception('Archive is not specified')
 		archive_path = os.path.join(self.archives_dir, archive_name)
 		if not os.path.isdir(archive_path):
 			raise Exception('Archive ' + path + ' does not exist')
 		self.archive_name = archive_name
 		self.archive_path = archive_path
 		files = [f for f in os.listdir(archive_path) if f.endswith('.json')]
 		self.archive = {}
 		for f in files:
 			file_path = os.path.join(archive_path, f)
 			label = f.replace('.json', '')
 			with open(file_path) as fdata:
 				self.archive[label] = json.load(fdata)	
 		self.loaded = True		
 	def search_message(self, keyword, msg, index_str, results, field='content'):
 		nbr_hits = 0
 		if msg[field] is not None and msg[field].lower().find(keyword.lower()) > 0:
 			nbr_hits += 1
 			results.append({ "index_str": index_str, "subject": msg['subject'], "date": msg['date'], "author_name": msg['author_name'], "url": msg['url'] })
 		if 'follow-up' in msg:
 			i = 0
 			for m in msg['follow-up']:
 				current_index_str = index_str + '/' + str(i)
 				nbr_hits += self.search_message(keyword, m, current_index_str, results, field)
 				i += 1
 		return nbr_hits
 	def search(self, keyword, field='content', min_hits=0):
 		with self.lock_search:
 			search_results = { "keyword": keyword, "field": field, "archive": self.archive_name, "results": [] }
 			for k, v in sorted(self.archive.items(), key=get_key, reverse=True):
 				current_index_str = self.archive_name + '/' + k
 				hits = []
 				nbr_hits = 0
 				i = 0
 				for m in v['threads']:
 					current_index_str = self.archive_name + '/' + k + '/' + str(i)
 					nbr_hits += self.search_message(keyword, m, current_index_str, hits, field)
 					i += 1
 				if nbr_hits > min_hits:
 					# nettime-l - fix (the name of the thread from ex. 'nettime-l_Jan_01' to 'January 2001')
 					if k.startswith("nettime-l_"):
 						dt = datetime.strptime(k, "nettime-l_%b_%y")
 						k = dt.strftime("%B_%Y")
 					search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits})
 			return search_results
 	def threads_ranking(self, rank=5):
 		with self.lock_threads_ranking:
 			search_results = { "keyword": "thread ranking", "field": "ranking", "archive": self.archive_name, "results": [] }
 			a = analysis.archive.Archive(self)
 			q = a.query();
 			ranking = q.threads_ranking(rank=rank)
 			for i in ranking:
 				r = analysis.format.frame_to_dictionary_threads_ranking(ranking[i])
 				for h in r:
 					hit = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}]				
 					search_results['results'].append({'thread': h['date'], 'nbr_hits': h['nbr-references'], 'hits': hit})
 			del a
 			del q
 			return search_results
 def get_key(kv_tuple):
 	k = kv_tuple[0]	
 	# k is of the form "Month_Year" - ex.: "January_2001"
 	try:
 		return datetime.strptime(k, "%B_%Y")
 	except Exception:
 		pass
 	# k is of the form "Month(abv)_Year(abv)" - ex.: "Jan_01"
 	try:
 		return datetime.strptime(k, "%b_%y")
 	except Exception:
 		pass
 	# k is of the form "Year" - ex.: "2001"
 	try:
 		return datetime.strptime(k, "%Y")
 	except Exception:
 		pass
 	# nettime-l - fix - k is of the form "nettime-l_Month(abv)_Year(abv)" - ex.: "nettime-l_Jan_01"
 	try:
 		return datetime.strptime(k, "nettime-l_%b_%y")
 	except Exception:
 		pass
 	print("--------------")
 	print(k)
 	return None
--- a/2
+++ b/2
@ -1 +1 @@
-source activate listservs
+source activate listserv
--- a/terminal/progress.py
+++ b/terminal/progress.py
@ -0,0 +1,43 @@
 from __future__ import print_function
 import sys
 import re
 # https://stackoverflow.com/questions/3160699/python-progress-bar
 class ProgressBar(object):
    DEFAULT = 'Progress: %(bar)s %(percent)3d%%'
    FULL = '%(bar)s %(current)d/%(total)d (%(percent)3d%%) %(remaining)d to go'
    def __init__(self, title, total, width=40, fmt=DEFAULT, symbol='=',
                 output=sys.stderr):
        assert len(symbol) == 1
        self.title = title
        self.total = total
        self.width = width
        self.symbol = symbol
        self.output = output
        self.fmt = re.sub(r'(?P<name>%\(.+?\))d',
            r'\g<name>%dd' % len(str(total)), fmt)
        self.current = 0
    def __call__(self):
        percent = self.current / float(self.total)
        size = int(self.width * percent)
        remaining = self.total - self.current
        bar = self.title + ' [' + self.symbol * size + ' ' * (self.width - size) + ']'
        args = {
            'total': self.total,
            'bar': bar,
            'current': self.current,
            'percent': percent * 100,
            'remaining': remaining
        }
        print('\r' + self.fmt % args, file=self.output, end='')
    def done(self):
        self.current = self.total
        self()
        print('', file=self.output)
--- a/terminal/util.py
+++ b/terminal/util.py
@ -0,0 +1,16 @@
 def y_n_question(question_str):
 	yes = {'yes','y', 'ye', ''}
 	no = {'no','n'}
 	while True:
 		sys.stdout.write(question_str + " [Y/n]: ")
 		choice = input().lower()
 		if choice in yes:
 		   return True
 		elif choice in no:
 		   return False
 		else:
 		   sys.stdout.write("\nPlease respond with 'yes' or 'no'\n")
 		   continue		
--- a/3
+++ b/3
@ -1,2 +1 @@
-
+gunicorn -w 1 --bind 0.0.0.0:5555 www-serve:app
 gunicorn -w 1 -b 127.0.0.1:5555 www-serve:app
--- a/www-serve.py
+++ b/www-serve.py
@ -1,2 +1,4 @@
 from www import app
-#app.run(debug=True, threaded=True, use_reloader=False) 	# uncomment this line to run flask's server
+
 if __name__ == "__main__":
 	app.run(debug=True, use_reloader=False)
--- a/www/routes.py
+++ b/www/routes.py
@ -1,144 +1,46 @@
 from flask import render_template, request, jsonify
 from www import app
-from www import archives
+import archive.archive as archive
-import search.archive
+import config
 import www.config as wconfig
 from datetime import datetime
 import logging
 logging.info(' ------- arch = Archives() -------- ')
 arch = archives.Archives()
 arch.load()
 archives_data = arch.data
@app.route('/')
 def index():
-	k = archives_data.keys()
+	return render_template("index.html")
 	return render_template("index.html", archives=k)
 # def get_key(kv_tuple):
 # 	k = kv_tuple[0]
 # 	# k is of the form "Month_Year" - ex.: "January_2001"
 # 	try:
 # 		return datetime.strptime(k, "%B_%Y")
 # 	except Exception:
 # 		pass
 # 	# k is of the form "Month(abv)_Year(abv)" - ex.: "Jan_01"
 # 	try:
 # 		return datetime.strptime(k, "%b_%y")
 # 	except Exception:
 # 		pass
 # 	# k is of the form "Year" - ex.: "2001"
 # 	try:
 # 		return datetime.strptime(k, "%Y")
 # 	except Exception:
 # 		pass
 # 	return None
@app.route('/<list>')
 def get_list(list):
 	if list in archives_data:
 		d = []
 		for k, v in sorted(archives_data[list].archive.items(), key=search.archive.get_key, reverse=True):
 			d.append({"name": k, "url": v['url'], "nbr_threads": len(v['threads'])})
 		return render_template("list.html", list_name=list, list=d)
 	else:
 		return 'nee nee'
@app.route('/<list>/<sublist>')
 def get_sublist(list, sublist):
 	print(list)
 	print(sublist)
 	sublist = sublist.replace(' ', '_')
 	if list in archives_data and sublist in archives_data[list].archive:
 		return render_template("threads.html", sublist_name=sublist, threads=archives_data[list].archive[sublist]['threads'])
 	else:
 		return 'na na'
@app.route('/<list>/<sublist>/<int:index>')
 def get_message(list, sublist, index):
 	sublist = sublist.replace(' ', '_')
 	index = int(index)
 	if list in archives_data and sublist in archives_data[list].archive and index < len(archives_data[list].archive[sublist]['threads']):
 		return render_template("message.html", message=archives_data[list].archive[sublist]['threads'][index])
 	else:
 		'non non'
@app.route('/<list>/<sublist>/<int:index>/<path:follow_ups>')
 def get_follow_ups(list, sublist, index, follow_ups):
 	sublist = sublist.replace(' ', '_')
 	index = int(index)
 	ups = follow_ups.split('/')
 	follow = []
 	for u in ups:
 		follow.append(int(u))
 	if list in archives_data and sublist in archives_data[list].archive and index < len(archives_data[list].archive[sublist]['threads']):
 		message = archives_data[list].archive[sublist]['threads'][index]
 		for f in follow:
 			message = message['follow-up'][f]
 		return render_template("message.html", message=message)
 	else:
 		'nope nope'
@app.route('/search')
 def searh():
 	if len(request.args) < 1:
-		k = archives_data.keys()
+		return render_template("search.html", archives=wconfig.lists_to_serve, fields=['content', 'from'])
 		return render_template("search.html", archives=k, fields=['content', 'from(name)', 'from(email)'], hits=['n/a', '2', '3', '4', '5', '6', '7', '8', '9'])
 	k_arg = request.args.get('keyword')
 	l_arg = request.args.get('list')
 	sl_arg = request.args.get('sublist')
 	f_arg = request.args.get('field')
 	h_arg = request.args.get('hits')
 	if k_arg is None or k_arg.strip() == '':
 		return "no keyword..."
-	if l_arg is None:
+	if l_arg != "all" and l_arg not in wconfig.lists_to_serve:
 		return "no list..."
 	if not (l_arg == "all") and not (l_arg in archives_data):
 		return "list '" + l_arg + "' does not exist"
-	if sl_arg is not None:
+	if f_arg not in ['content', 'from']:
-		if not sl_arg in archives_data[l]:
+		return "field '" + f_arg + "' does not exist"
 			return "sublist '" + sl_arg + "' does not exist in list '" + l_arg + "'"
 	if f_arg == "from(name)":
 		f_arg = 'author_name'
 	elif f_arg == "from(email)":
 		f_arg = 'from'
 	lists = []
 	if l_arg == "all":
-		for k in archives_data.keys():
+		lists = wconfig.lists_to_serve
 			lists.append(k)
 	else:
 		lists.append(l_arg)
 	nbr_hits = 0
 	if h_arg in ['2', '3', '4', '5', '6', '7', '8', '9']:
 		nbr_hits = int(h_arg)
 	################################
 	##
-	##	need to cache all the below
+	##	need to cache all the below.....
 	##
 	################################
@ -147,18 +49,41 @@ def searh():
 	logging.info("search keyword = " + k_arg)
 	for l in lists:
 		if k_arg == "rank":
 			logging.info("	ranking " + l)
 			s = archives_data[l].threads_ranking()
 		else:
 			s = archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits)
-		results.append(s)
+		with archive.Archive(l, config=config.db) as a:
 			if f_arg == 'content':
 				r = a.content_search(k_arg)
 			else:
 				r = a.from_search(k_arg)
-	## -- sort results?
+			# format data to return
-	search_results = sorted(results, key=get_result_key)
+			search_results = { "keyword": k_arg, "field": f_arg, "archive": a.archive_name, "results": [] }
 			month_year_results = {}
-	return jsonify(result=search_results)
+			for (from_, author_name_, subject_, date_, url_) in r:
 				m_y = date_.strftime("%B_%Y")
 				if m_y not in month_year_results:
 					month_year_results[m_y] = []
 				month_year_results[m_y].append({ 'url': url_, 'subject': subject_, 'author_name': author_name_})
 			for k, v in sorted(month_year_results.items(), key=get_key, reverse=True):
 				search_results['results'].append({ 'thread': k, 'nbr_hits': len(v), 'hits': v})
 			# search_results['results'].append({ 'thread': k, 'nbr_hits': nbr_hits, 'hits': hits})
 			# where:
 			#	'thread' = "%B_%Y" aka. January 2001
 			#	'nbr_hits' = nbr hits for that month
 			#	'hits' = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}]
 		results.append(search_results)
 	sorted_results = sorted(results, key=get_result_key)
 	return jsonify(result=sorted_results)		
 def get_key(kv):
 	return datetime.strptime(kv[0], "%B_%Y")
 def get_result_key(r):
 	return r['archive']
--- a/www/templates/index.html
+++ b/www/templates/index.html
@ -1,8 +1,6 @@
 <html>
 <head></head>
 <body>
-	 {% for a in archives %}
+	 <a href="/search"><h3>---> SEARCH <---</h3></a>
 	 <a href="/{{ a }}"><h3>{{ a }}</h3></a>
 	 {% endfor %}
 </body>
 </html>
--- a/www/templates/list.html
+++ b/www/templates/list.html
@ -1,10 +0,0 @@
 <html>
 <head></head>
 <body>
 	<ul>
 	 {% for t in list %}
 	 <li><a href="{{ list_name }}/{{ t.name }}"><h3>{{ t.name }} -- {{ t.nbr_threads }}</h3></a></li>
 	 {% endfor %}
 	 </ul>
 </body>
 </html>
--- a/www/templates/message.html
+++ b/www/templates/message.html
@ -1,11 +0,0 @@
 <html>
 <head>
 	<meta charset="UTF-8">
 </head>
 <body>
 	 <h3>{{ message.subject }}</h3>
 	 <h4>{{ message.author_name }}</h4>
 	 <h4>{{ message.date }}</h4>
 	 <p>{{ message.content }} </p>
 </body>
 </html>
--- a/www/templates/search.html
+++ b/www/templates/search.html
@ -20,11 +20,6 @@
 			 <option value="{{ a }}">{{ a }}</option>
 			 {% endfor %}
 		</select>		
 		<select form="search" name="hits">
 			 {% for a in hits %}
 			 <option value="{{ a }}">{{ a }}</option>
 			 {% endfor %}
 		</select>				
 		<input type="submit" value="search" id="submit">
 		<div id="loading">Loading...</div>
 	</form>
--- a/www/templates/threads.html
+++ b/www/templates/threads.html
@ -1,25 +0,0 @@
 <html>
 <head></head>
 <body>
 {% macro message(m, index, urlpath)-%}
 {% set path = urlpath + '/' + index|string %}
 	<li>
 		 {{ index }}. <a href="{{ path }}">{{ m.subject }}</a> <i>{{ m.author_name }}</i>
 		 {% if m.get('follow-up') %}
 		 <ul>
 		 	{% for msg in m.get('follow-up') %}
 		 		{{ message(m=msg, index=loop.index - 1, urlpath=path) }}
 		 	{% endfor %}
 		 </ul>
 		 {% endif %}		
 	</li>
 {%- endmacro %}
 	<ul>
 	 {% for m in threads recursive %}
 	 	{{ message(m=m, index=loop.index - 1, urlpath=sublist_name) }}
 	 {% endfor %}
 	 </ul>
 </body>
 </html>
`@ -1 +1 @@`
	`source activate listservs`	`source activate listserv`
`@ -1,2 +1 @@`
		`gunicorn -w 1 --bind 0.0.0.0:5555 www-serve:app`
	`gunicorn -w 1 -b 127.0.0.1:5555 www-serve:app`