diff --git a/README b/README
new file mode 100644
index 0000000..aa6805c
--- /dev/null
+++ b/README
@@ -0,0 +1,10 @@
+usage: archive.py [-h] [--arch ARCH] url [url ...]
+
+Mailinglists are dead. Long live mailinglists!
+
+positional arguments:
+  url          mailinglist urls to archive
+
+optional arguments:
+  -h, --help   show this help message and exit
+  --arch ARCH  path to archives directory (default='archives')
\ No newline at end of file
diff --git a/archive.py b/archive.py
new file mode 100644
index 0000000..c78646d
--- /dev/null
+++ b/archive.py
@@ -0,0 +1,25 @@
+import sys, logging, argparse
+import lists
+
+logging.basicConfig(level=logging.DEBUG)
+
+def run(args):
+
+    if not args.url:
+        sys.exit('No url(s). Aborting.')
+
+    ## check valid url?... hmm... nej
+    for u in args.url:
+        lists.crawl.crawl(u)
+    
+    sys.exit()
+
+if __name__ == "__main__":
+
+    p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
+    p.add_argument('url', metavar="url", help="mailinglist urls to archive", nargs="+")
+    p.add_argument('--arch', help="path to archives directory (default='archives')", default="archives")
+
+    args = p.parse_args()
+
+    run(args)
diff --git a/export/__init__.py b/export/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/export/mbox.py b/export/mbox.py
new file mode 100644
index 0000000..fb81610
--- /dev/null
+++ b/export/mbox.py
@@ -0,0 +1 @@
+# mbox export
\ No newline at end of file
diff --git a/lists/__init__.py b/lists/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lists/crawl.py b/lists/crawl.py
new file mode 100644
index 0000000..a71f636
--- /dev/null
+++ b/lists/crawl.py
@@ -0,0 +1,4 @@
+# crawl dispatch
+
+def crawl(url, archive_dir):
+	return
\ No newline at end of file
diff --git a/lists/mhonarc.py b/lists/mhonarc.py
new file mode 100644
index 0000000..192a0b2
--- /dev/null
+++ b/lists/mhonarc.py
@@ -0,0 +1,250 @@
+import urllib2, urllib, urlparse
+import logging
+from bs4 import BeautifulSoup
+import email, email.parser
+from email.mime.text import MIMEText
+import mailbox
+import time, dateutil, string
+from pprint import pprint as pp
+import sys, os, re, json, gzip
+import traceback
+
+DELAY = 0.2
+
+# hack for the mailbox module (re: force mbox.add() encoding to utf8)
+reload(sys) 
+sys.setdefaultencoding('utf8')
+
+
+def collect_from_url(url, sublist_name, base_arch_dir="archives", mbox=False):
+
+    response = urllib2.urlopen(url)
+    html = response.read()
+    soup = BeautifulSoup(html, "html.parser")
+
+    # base url 
+    base_url = soup.select('body p:nth-of-type(2) base')[0].get('href')
+
+	#collect name
+    list_name = soup.select('body p:nth-of-type(2) base title')[0].string
+    logging.info("Getting " + list_name + " list archive for " + sublist_name)
+
+    lists = soup.select('ul:nth-of-type(2) li')
+
+    threads = []
+
+    for l in lists:
+
+    	if l.strong is None:
+    		continue
+
+    	name = l.strong.string
+
+    	if name.lower() == sublist_name.lower():
+
+            threads_url_list = []
+            threads_links = l.select('ul li a')
+            for t in threads_links:
+                thread_url = urlparse.urljoin(base_url, t.get('href'))
+                threads_url_list.append(thread_url)
+
+            nbr_threads = str(len(threads_url_list))
+            n = 0
+
+            for u in threads_url_list:
+                n += 1
+                logging.info("## " + str(n) + " / " + nbr_threads + " ##")
+                threads.append(collect_threads_from_url(u, base_arch_dir, mbox))                
+
+            return threads
+
+            # for u in threads_url_list[0:10]:
+            #     print "---------------------------------------"
+            #     tt = collect_threads_from_url(u, base_arch_dir, mbox)
+            #     threads.append(tt)
+                
+
+    return None
+
+def collect_threads_from_url(url, base_arch_dir, mbox):
+
+    response = urllib2.urlopen(url)
+    html = response.read()
+    soup = BeautifulSoup(html, "html.parser")
+
+    # base url 
+    base_url = url
+
+    # collect name
+    threads_name = soup.select('p:nth-of-type(1) title')[0].string
+    threads_name = threads_name.replace(' ', '_')
+
+    # thread data struct
+    threads = {'name' : threads_name, 'url' : base_url, 'threads' : []}
+
+    logging.info("Collecting Threads of: " + threads_name)
+
+    # check if archive already exists
+    file_path = os.path.join(base_arch_dir, threads['name'] + ".json")
+    if os.path.isfile(file_path):
+        logging.info("archive already exists. loading from file " + file_path)
+        with open(file_path, 'r') as fpin:
+            threads = json.load(fpin)
+    else:
+        lists = soup.select('ul:nth-of-type(1) > li')
+
+        nbr_threads = str(len(lists))
+        n = 0
+
+        for l in lists:
+            n += 1
+            logging.info("> " + str(n) + " / " + nbr_threads)
+
+            try:
+                thread = archive_thread(l, base_url, None)
+                threads['threads'].append(thread)
+            except:
+                ex_type, ex, tb = sys.exc_info()
+                print ex_type
+                print ex
+                traceback.print_tb(tb)
+                del tb                
+                continue
+
+            time.sleep(DELAY)
+
+        # write 
+        logging.info("writing archive to file " + file_path)
+
+        with open(file_path, 'w') as fp:
+            json.dump(threads, fp, indent=4)
+
+    if mbox:
+        mbox_path = os.path.join(base_arch_dir, threads['name'] + ".txt")
+        mbox_path_gz = mbox_path + ".gz"
+        logging.info("writing mbox  " + mbox_path)
+        if not os.path.isfile(mbox_path):
+            box = mailbox.mbox(mbox_path)
+            box.lock()
+            try:
+                for t in threads['threads']:
+                    write_mbox_message(t, box)
+                box.flush()
+            except:
+                ex_type, ex, tb = sys.exc_info()
+                print ex_type
+                print ex
+                traceback.print_tb(tb)
+                del tb
+            finally:
+                box.unlock()
+
+            with open(mbox_path) as fpin, gzip.open(mbox_path + '.gz', 'wb') as fpout:
+                fpout.writelines(fpin)
+
+        else:
+            logging.info("mbox  " + mbox_path + " already exists.")            
+
+    logging.info("done. ")
+
+    return threads
+
+    
+
+def archive_thread(li, base_url, parent_thread_data):
+
+	thread_link = li.select('strong a')[0]
+	thread_url = urlparse.urljoin(base_url, thread_link.get('href'))
+	thread_id = thread_link.get('name')
+	thread_title = thread_link.string
+	thread_author_name = li.select('em')[0].string
+
+	message = {u'id': thread_id, u'subject': thread_title, u'url': thread_url, u'author_name': thread_author_name}
+
+	collect_message(thread_url, message)
+
+	follow = li.select('ul > li')
+	if len(follow) > 0:
+		for f in follow:
+			follow_link = f.select('strong a')
+			if len (follow_link) > 0:
+				archive_thread(f, base_url, message)  ## recursion
+	
+	if parent_thread_data is None:
+		return message
+
+	if u'follow-up' not in parent_thread_data:
+		parent_thread_data[u'follow-up'] = []
+
+	parent_thread_data[u'follow-up'].append(message)
+
+	return message
+
+
+def collect_message(url, message):
+
+    print url
+
+    response = urllib2.urlopen(url)
+    html = response.read()
+    soup = BeautifulSoup(html, "html.parser")
+
+    #note: this should follow an RFC header standard -- MHonArc has header info in the 1th <pre>
+
+    message_labels = ('to', 'subject', 'from', 'date', 'message-id', 'content-type')    
+
+    # mhonarc xcomments
+    # ref: http://www.schlaubert.de/MHonArc/doc/resources/printxcomments.html
+    message['subject'] = parse_xcomment(soup, "X-Subject")
+    message['date'] = parse_xcomment(soup, "X-Date")
+    message['from'] = parse_xcomment(soup, "X-From-R13") #useless...
+    message['message-id'] = parse_xcomment(soup, 'X-Message-Id')
+    message['content-type'] = parse_xcomment(soup, 'X-Content-Type')
+
+    # parse what is displayed on the page
+
+    info = soup.select('ul:nth-of-type(1) > li')
+
+    for i in info:
+        if i.em == None:
+            continue
+    	field = i.em.string
+    	if field.lower() in message_labels:
+    		message[field.lower()] = i.text.strip(field + ": ")
+
+    ## reformat from -- [author_name, email_addr]
+
+    # from_addr = email.utils.parseaddr(message['from'])
+    # message['author_name'] = from_addr[0]
+    # message['from'] = from_addr[1]
+
+    ## -- content --
+    message['content'] = soup.select('pre:nth-of-type(2)')[0].text
+
+# mhonarc xcomments
+# ref: http://www.schlaubert.de/MHonArc/doc/resources/printxcomments.html
+def parse_xcomment(soup, xcom):
+    com = soup.find(text=re.compile(xcom))
+    if com is not None:
+        return com.strip('<!-- ').strip(' -->').strip(xcom + ":").strip()
+    return com
+
+def to_mbox_message(msg):
+    mime = MIMEText('', 'plain', _charset='utf8')
+    mime['From'] = msg['from']
+    mime['Subject'] = msg['subject']
+    mime['Message-Id'] = msg['message-id']
+    mime['Date'] = msg['date']
+    mime.set_payload(msg['content'], charset='utf8')
+    mbox_message = mailbox.mboxMessage(mime)
+    mbox_message.set_from(mime['From'], email.utils.parsedate(mime['Date']))
+    return mbox_message
+
+# throws exception
+def write_mbox_message(msg, mbox):
+    mbox_msg = to_mbox_message(msg)
+    mbox.add(mbox_msg) # here
+    if u'follow-up' in msg:
+        for f in msg['follow-up']:
+            write_mbox_message(f, mbox)
+
diff --git a/lists/pipermail.py b/lists/pipermail.py
new file mode 100644
index 0000000..626f86d
--- /dev/null
+++ b/lists/pipermail.py
@@ -0,0 +1 @@
+# pipermail
\ No newline at end of file