From aa4f478e2fa6d59999515de42b56cd495be2a7ca Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Thu, 19 Nov 2015 12:47:03 +0100 Subject: [PATCH] self contained sync command with per output options and all flag --- etherdump/commands/sync.py | 111 ++++++++++++++++++++++++++----------- 1 file changed, 79 insertions(+), 32 deletions(-) diff --git a/etherdump/commands/sync.py b/etherdump/commands/sync.py index 98a3110..57672d5 100644 --- a/etherdump/commands/sync.py +++ b/etherdump/commands/sync.py @@ -36,6 +36,11 @@ def main (args): p.add_argument("--pub", default="pub", help="pub path for output, default: pub") p.add_argument("--group", default="g", help="group path for output, default: g") p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") + p.add_argument("--meta", default=False, action="store_true", help="download meta to file, default: False") + p.add_argument("--text", default=False, action="store_true", help="download text to file, default: False") + p.add_argument("--html", default=False, action="store_true", help="download html to file, default: False") + p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to file, default: False") + p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") args = p.parse_args(args) info = load_padinfo(args.padinfo) @@ -48,7 +53,7 @@ def main (args): padids = jsonload(info['api']+'listAllPads?'+urlencode(data))['data']['padIDs'] padids.sort() numpads = len(padids) - maxmsglen = 0 + # maxmsglen = 0 count = 0 for i, padid in enumerate(padids): if args.skip != None and i maxmsglen: - maxmsglen = len(msg) - sys.stderr.write("\r{0}".format(" "*maxmsglen)) + # if len(msg) > maxmsglen: + # maxmsglen = len(msg) + # sys.stderr.write("\r{0}".format(" "*maxmsglen)) sys.stderr.write(msg.encode("utf-8")) sys.stderr.flush() + data['padID'] = padid.encode("utf-8") p = padpath(padid, args.pub, args.group) metapath = p + ".meta.json" revisions = None - if os.path.exists(metapath): - with open(metapath) as f: - meta = json.load(f) - revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] - if meta['revisions'] == revisions: - continue - - meta = {'padid': padid.encode("utf-8")} - if revisions == None: - meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] - else: - meta['revisions' ] = revisions + tries = 1 + skip = False + while True: + try: + if os.path.exists(metapath): + with open(metapath) as f: + meta = json.load(f) + revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] + if meta['revisions'] == revisions: + skip=True + break + + meta = {'padid': padid.encode("utf-8")} + if revisions == None: + meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] + else: + meta['revisions' ] = revisions - if (meta['revisions'] == 0) and (not args.zerorevs): - # print("Skipping zero revs", file=sys.stderr) + if (meta['revisions'] == 0) and (not args.zerorevs): + # print("Skipping zero revs", file=sys.stderr) + skip=True + break + + # todo: load more metadata! + meta['pad'], meta['group'] = splitpadname(padid) + meta['pathbase'] = p + meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) + meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat() + meta['author_ids'] = jsonload(info['api']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] + break + except HTTPError as e: + tries += 1 + if tries > 3: + print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr) + skip=True + break + + if skip: continue count += 1 - # todo: load more metadata! - meta['pad'], meta['group'] = splitpadname(padid) - meta['pathbase'] = p - meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) - meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat() - meta['author_ids'] = jsonload(info['api'] +'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] - # save it - try: - os.makedirs(os.path.split(metapath)[0]) - except OSError: - pass - with open(metapath, "w") as f: - json.dump(meta, f) + print (padid.encode("utf-8")) - print("\nWrote {0} files...".format(count), file=sys.stderr) + if args.all or (args.meta or args.text or args.html or args.dhtml): + try: + os.makedirs(os.path.split(metapath)[0]) + except OSError: + pass + + if args.all or args.meta: + with open(metapath, "w") as f: + json.dump(meta, f) + + # Process text, html, dhtml, all options + if args.all or args.text: + text = jsonload(info['api']+'getText?'+urlencode(data)) + text = text['data']['text'] + with open(p+".txt", "w") as f: + f.write(text.encode("utf-8")) + + if args.all or args.html: + html = jsonload(info['api']+'getHTML?'+urlencode(data)) + html = html['data']['html'] + with open(p+".html", "w") as f: + f.write(html.encode("utf-8")) + + if args.all or args.dhtml: + data['startRev'] = "0" + html = jsonload(info['api']+'createDiffHTML?'+urlencode(data)) + html = html['data']['html'] + with open(p+".diff.html", "w") as f: + f.write(html.encode("utf-8")) + + print("\n{0} pad(s) changed".format(count), file=sys.stderr)