diff --git a/etherdump/commands/gethtml.py b/etherdump/commands/gethtml.py index ca0c79a..9a2fc48 100644 --- a/etherdump/commands/gethtml.py +++ b/etherdump/commands/gethtml.py @@ -1,8 +1,12 @@ from __future__ import print_function from argparse import ArgumentParser import json -from urllib import urlencode -from urllib2 import urlopen, HTTPError, URLError +from urllib.parse import urlencode +from urllib.request import urlopen +from urllib.error import HTTPError, URLError + +# from urllib import urlencode +# from urllib2 import urlopen, HTTPError, URLError def main(args): @@ -27,8 +31,8 @@ def main(args): if args.showurl: print (requesturl) else: - results = json.load(urlopen(requesturl))['data'] + results = json.loads(urlopen(requesturl).read().decode("utf-8"))['data'] if args.format == "json": print (json.dumps(results)) else: - print (results['html'].encode("utf-8")) + print (results['html']) diff --git a/etherdump/commands/pull.py b/etherdump/commands/pull.py index 220c193..4b776ab 100644 --- a/etherdump/commands/pull.py +++ b/etherdump/commands/pull.py @@ -244,11 +244,15 @@ def main (args): html = html['data']['html'] ver["path"] = p+".raw.html" ver["url"] = quote(ver["path"]) - doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) - html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) + # JUN 2016: chaning to save REALLY the RAW / unchanged HTML from the API with open(ver["path"], "w") as f: - # f.write(html.encode("utf-8")) - print (ET.tostring(doc, method="html", encoding="unicode"), file=f) + print(html, file=f) + + # doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) + # html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) + # with open(ver["path"], "w") as f: + # # f.write(html.encode("utf-8")) + # print (ET.tostring(doc, method="html", encoding="unicode"), file=f) # output meta if args.all or args.meta: diff --git a/etherdump/commands/pushhtml.py b/etherdump/commands/pushhtml.py new file mode 100644 index 0000000..e8faad7 --- /dev/null +++ b/etherdump/commands/pushhtml.py @@ -0,0 +1,33 @@ +from etherdump.commands.sethtml import sethtml, pushhtml +import argparse +import os, sys +import json + + +def main(args): + p = argparse.ArgumentParser("""Indiscriminantly PUSH the contents of dumped html files to an etherpad, clobbering any existing content!""") + p.add_argument("input", nargs="+", help="Metadata files, e.g. *.meta.json") + p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument("--basepath", default=".") + args = p.parse_args(args) + + with open(args.padinfo) as f: + info = json.load(f) + + apiurl = info.get("localapiurl", info["apiurl"]) + apikey = info['apikey'] + for n in args.input: + with open(n) as f: + meta = json.load(f) + for v in meta['versions']: + if v['type'] == 'html': + path = v['path'] + if args.basepath: + path = os.path.join(args.basepath, path) + break + padid = meta['padid'] + with open(path) as f: + htmlsrc = f.read() + print ("Pushing {0} to {1}".format(path, padid), file=sys.stderr) + pushhtml(apiurl, apikey, padid, htmlsrc) + diff --git a/etherdump/commands/sethtml.py b/etherdump/commands/sethtml.py index 7b6a0cf..669ffc3 100644 --- a/etherdump/commands/sethtml.py +++ b/etherdump/commands/sethtml.py @@ -1,15 +1,50 @@ -from __future__ import print_function -from argparse import ArgumentParser -import json, sys -from urllib import urlencode -from urllib2 import urlopen, HTTPError, URLError -import requests +from urllib.request import urlopen +from urllib.parse import urlencode +import json +import re +import argparse -LIMIT_BYTES = 100*1000 +# HOST_PORT="localhost:9001" +# APIKEY="439afe700152ed5f5cdc43e9bf0a6ab0697c422db0e7277d43f2e1af4f155d79" + +def create_pad (apiurl, apikey, padid): + # url = "http://{0}/api/1/createPad".format(hostport) + url = apiurl + "createPad" + data = ( + ('apikey', apikey), + ('padID', padid), + ) + f = urlopen(url, data=urlencode(data).encode("utf-8")) + return json.loads(f.read().decode("utf-8")) + + +def sethtml (apiurl, apikey, padid, html): + # strip the (initial) title tag + html = re.sub(r".*?", "", html, 1, re.I) + data = ( + ('apikey', apikey), + ('padID', padid), + ('html', html) + ) + # url = "http://{0}/api/1/setHTML".format(hostport) + url = apiurl + "setHTML" + data = urlencode(data).encode("utf-8") + f = urlopen(url, data=data) + return json.loads(f.read().decode("utf-8")) + +def pushhtml (apiurl, apikey, padid, html): + """ Use sethtml, call createPad if necessary """ + resp = sethtml(apiurl, apikey, padid, html) + if resp['code'] == 1: + # print ("ERROR {0}, trying to create pad first".format(resp['message'])) + create_pad(apiurl, apikey, padid) + resp = sethtml(apiurl, apikey, padid, html) + return resp + def main(args): - p = ArgumentParser("calls the setHTML API function for the given padid") + p = argparse.ArgumentParser("calls the setHTML API function for the given padid") p.add_argument("padid", help="the padid") p.add_argument("--html", default=None, help="html, default: read from stdin") p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json") @@ -21,46 +56,15 @@ def main(args): with open(args.padinfo) as f: info = json.load(f) - apiurl = info.get("apiurl") - # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) -# data = {} -# data['apikey'] = info['apikey'] -# data['padID'] = args.padid # is utf-8 encoded - createPad = False + apiurl = info.get("localapiurl", info["apiurl"]) + apikey = info['apikey'] + + with open(args.html) as f: + htmlsrc = f.read() + # print (type(htmlsrc)) if args.create: - # check if it's in fact necessary - requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid}) - results = json.load(urlopen(requesturl)) - print (json.dumps(results, indent=2), file=sys.stderr) - if results['code'] != 0: - createPad = True - - if args.html: - html = args.html + resp = pushhtml(apiurl, apikey, args.padid, htmlsrc) else: - html = sys.stdin.read() - - params = {} - params['apikey'] = info['apikey'] - params['padID'] = args.padid - - if createPad: - requesturl = apiurl+'createPad' - if args.showurl: - print (requesturl) - results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl)) - results = json.loads(results.text) - print (json.dumps(results, indent=2)) - - if len(html) > LIMIT_BYTES and args.limit: - print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr) - html = html[:LIMIT_BYTES] - - requesturl = apiurl+'setHTML' - if args.showurl: - print (requesturl) - # params['html'] = html - results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl)) - results = json.loads(results.text) - print (json.dumps(results, indent=2)) + resp = sethtml(apiurl, apikey, args.padid, htmlsrc) + print (resp)