nettime/nettime/archiver.py
2016-07-21 16:07:42 +02:00

26 lines
934 B
Python

import urllib2, urllib, urlparse
import os, re, json, gzip
import mhonarccrawl
import datetime
def archive_from_url(url, sublist_name="nettime-l", archive_dir="archives"):
url = url.rstrip()
archive_list_dir = check_dir(archive_dir, sublist_name)
archive_name = sublist_name.lower()
archive_date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
archive = {'name' : sublist_name.lower(), 'url': url, 'date': archive_date, 'threads' : []}
archive['threads'] = mhonarccrawl.collect_from_url(url, sublist_name, archive_list_dir, mbox=True)
file_path = os.path.join(archive_dir, archive_name + "_" + archive_date + ".json.gz")
with gzip.open(file_path, 'w') as fp:
json.dump(archive, fp, indent=4)
return
def check_dir(base_dir, list_name):
arc_dir = os.path.join(base_dir, list_name)
if not os.path.exists(arc_dir):
os.makedirs(arc_dir)
return arc_dir