151 lines
5.8 KiB
Python
Raw Normal View History

2015-11-13 11:03:57 +01:00
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from math import ceil, floor
from common import *
"""
sync(meta):
Update meta data files for those that have changed.
Check for changed pads by looking at revisions & comparing to existing
"""
def jsonload (url):
f = urlopen(url)
data = f.read()
f.close()
return json.loads(data)
def load_padinfo(p):
with open(p) as f:
info = json.load(f)
info['api'] = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
return info
def main (args):
p = ArgumentParser("")
2015-11-13 14:21:38 +01:00
p.add_argument("padid", nargs="*", default=[])
2015-11-13 11:03:57 +01:00
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
p.add_argument("--pub", default="pub", help="pub path for output, default: pub")
p.add_argument("--group", default="g", help="group path for output, default: g")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument("--meta", default=False, action="store_true", help="download meta to file, default: False")
p.add_argument("--text", default=False, action="store_true", help="download text to file, default: False")
p.add_argument("--html", default=False, action="store_true", help="download html to file, default: False")
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to file, default: False")
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
2015-11-13 11:03:57 +01:00
args = p.parse_args(args)
info = load_padinfo(args.padinfo)
data = {}
data['apikey'] = info['apikey']
2015-11-13 14:21:38 +01:00
if args.padid:
padids = args.padid
else:
padids = jsonload(info['api']+'listAllPads?'+urlencode(data))['data']['padIDs']
2015-11-13 11:03:57 +01:00
padids.sort()
numpads = len(padids)
# maxmsglen = 0
2015-11-13 11:03:57 +01:00
count = 0
for i, padid in enumerate(padids):
if args.skip != None and i<args.skip:
continue
p = (float(i) / numpads)
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
# if len(msg) > maxmsglen:
# maxmsglen = len(msg)
# sys.stderr.write("\r{0}".format(" "*maxmsglen))
2015-11-13 11:03:57 +01:00
sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush()
2015-11-13 11:03:57 +01:00
data['padID'] = padid.encode("utf-8")
p = padpath(padid, args.pub, args.group)
metapath = p + ".meta.json"
revisions = None
tries = 1
skip = False
while True:
try:
if os.path.exists(metapath):
with open(metapath) as f:
meta = json.load(f)
revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
if meta['revisions'] == revisions:
skip=True
break
meta = {'padid': padid.encode("utf-8")}
if revisions == None:
meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
else:
meta['revisions' ] = revisions
if (meta['revisions'] == 0) and (not args.zerorevs):
# print("Skipping zero revs", file=sys.stderr)
skip=True
break
# todo: load more metadata!
meta['pad'], meta['group'] = splitpadname(padid)
meta['pathbase'] = p
meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
meta['author_ids'] = jsonload(info['api']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
break
except HTTPError as e:
tries += 1
if tries > 3:
print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr)
skip=True
break
if skip:
2015-11-13 11:03:57 +01:00
continue
count += 1
print (padid.encode("utf-8"))
if args.all or (args.meta or args.text or args.html or args.dhtml):
try:
os.makedirs(os.path.split(metapath)[0])
except OSError:
pass
if args.all or args.meta:
with open(metapath, "w") as f:
json.dump(meta, f)
# Process text, html, dhtml, all options
if args.all or args.text:
text = jsonload(info['api']+'getText?'+urlencode(data))
text = text['data']['text']
with open(p+".txt", "w") as f:
f.write(text.encode("utf-8"))
if args.all or args.html:
html = jsonload(info['api']+'getHTML?'+urlencode(data))
html = html['data']['html']
with open(p+".html", "w") as f:
f.write(html.encode("utf-8"))
if args.all or args.dhtml:
data['startRev'] = "0"
html = jsonload(info['api']+'createDiffHTML?'+urlencode(data))
html = html['data']['html']
with open(p+".diff.html", "w") as f:
f.write(html.encode("utf-8"))
print("\n{0} pad(s) changed".format(count), file=sys.stderr)