updated sethtml and new pushhtml

This commit is contained in:
Michael Murtaugh 2019-06-07 16:52:02 +02:00
parent 4a8219b48c
commit 17fb11fae2
4 changed files with 101 additions and 56 deletions

View File

@ -1,8 +1,12 @@
from __future__ import print_function from __future__ import print_function
from argparse import ArgumentParser from argparse import ArgumentParser
import json import json
from urllib import urlencode from urllib.parse import urlencode
from urllib2 import urlopen, HTTPError, URLError from urllib.request import urlopen
from urllib.error import HTTPError, URLError
# from urllib import urlencode
# from urllib2 import urlopen, HTTPError, URLError
def main(args): def main(args):
@ -27,8 +31,8 @@ def main(args):
if args.showurl: if args.showurl:
print (requesturl) print (requesturl)
else: else:
results = json.load(urlopen(requesturl))['data'] results = json.loads(urlopen(requesturl).read().decode("utf-8"))['data']
if args.format == "json": if args.format == "json":
print (json.dumps(results)) print (json.dumps(results))
else: else:
print (results['html'].encode("utf-8")) print (results['html'])

View File

@ -244,11 +244,15 @@ def main (args):
html = html['data']['html'] html = html['data']['html']
ver["path"] = p+".raw.html" ver["path"] = p+".raw.html"
ver["url"] = quote(ver["path"]) ver["url"] = quote(ver["path"])
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) # JUN 2016: chaning to save REALLY the RAW / unchanged HTML from the API
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
with open(ver["path"], "w") as f: with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8")) print(html, file=f)
print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
# doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
# html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
# with open(ver["path"], "w") as f:
# # f.write(html.encode("utf-8"))
# print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
# output meta # output meta
if args.all or args.meta: if args.all or args.meta:

View File

@ -0,0 +1,33 @@
from etherdump.commands.sethtml import sethtml, pushhtml
import argparse
import os, sys
import json
def main(args):
p = argparse.ArgumentParser("""Indiscriminantly PUSH the contents of dumped html files to an etherpad, clobbering any existing content!""")
p.add_argument("input", nargs="+", help="Metadata files, e.g. *.meta.json")
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument("--basepath", default=".")
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = info.get("localapiurl", info["apiurl"])
apikey = info['apikey']
for n in args.input:
with open(n) as f:
meta = json.load(f)
for v in meta['versions']:
if v['type'] == 'html':
path = v['path']
if args.basepath:
path = os.path.join(args.basepath, path)
break
padid = meta['padid']
with open(path) as f:
htmlsrc = f.read()
print ("Pushing {0} to {1}".format(path, padid), file=sys.stderr)
pushhtml(apiurl, apikey, padid, htmlsrc)

View File

@ -1,15 +1,50 @@
from __future__ import print_function from urllib.request import urlopen
from argparse import ArgumentParser from urllib.parse import urlencode
import json, sys import json
from urllib import urlencode import re
from urllib2 import urlopen, HTTPError, URLError import argparse
import requests
LIMIT_BYTES = 100*1000 # HOST_PORT="localhost:9001"
# APIKEY="439afe700152ed5f5cdc43e9bf0a6ab0697c422db0e7277d43f2e1af4f155d79"
def create_pad (apiurl, apikey, padid):
# url = "http://{0}/api/1/createPad".format(hostport)
url = apiurl + "createPad"
data = (
('apikey', apikey),
('padID', padid),
)
f = urlopen(url, data=urlencode(data).encode("utf-8"))
return json.loads(f.read().decode("utf-8"))
def sethtml (apiurl, apikey, padid, html):
# strip the (initial) title tag
html = re.sub(r"<title>.*?</title>", "", html, 1, re.I)
data = (
('apikey', apikey),
('padID', padid),
('html', html)
)
# url = "http://{0}/api/1/setHTML".format(hostport)
url = apiurl + "setHTML"
data = urlencode(data).encode("utf-8")
f = urlopen(url, data=data)
return json.loads(f.read().decode("utf-8"))
def pushhtml (apiurl, apikey, padid, html):
""" Use sethtml, call createPad if necessary """
resp = sethtml(apiurl, apikey, padid, html)
if resp['code'] == 1:
# print ("ERROR {0}, trying to create pad first".format(resp['message']))
create_pad(apiurl, apikey, padid)
resp = sethtml(apiurl, apikey, padid, html)
return resp
def main(args): def main(args):
p = ArgumentParser("calls the setHTML API function for the given padid") p = argparse.ArgumentParser("calls the setHTML API function for the given padid")
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--html", default=None, help="html, default: read from stdin") p.add_argument("--html", default=None, help="html, default: read from stdin")
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
@ -21,46 +56,15 @@ def main(args):
with open(args.padinfo) as f: with open(args.padinfo) as f:
info = json.load(f) info = json.load(f)
apiurl = info.get("apiurl")
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
# data = {}
# data['apikey'] = info['apikey']
# data['padID'] = args.padid # is utf-8 encoded
createPad = False apiurl = info.get("localapiurl", info["apiurl"])
apikey = info['apikey']
with open(args.html) as f:
htmlsrc = f.read()
# print (type(htmlsrc))
if args.create: if args.create:
# check if it's in fact necessary resp = pushhtml(apiurl, apikey, args.padid, htmlsrc)
requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid})
results = json.load(urlopen(requesturl))
print (json.dumps(results, indent=2), file=sys.stderr)
if results['code'] != 0:
createPad = True
if args.html:
html = args.html
else: else:
html = sys.stdin.read() resp = sethtml(apiurl, apikey, args.padid, htmlsrc)
print (resp)
params = {}
params['apikey'] = info['apikey']
params['padID'] = args.padid
if createPad:
requesturl = apiurl+'createPad'
if args.showurl:
print (requesturl)
results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print (json.dumps(results, indent=2))
if len(html) > LIMIT_BYTES and args.limit:
print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
html = html[:LIMIT_BYTES]
requesturl = apiurl+'setHTML'
if args.showurl:
print (requesturl)
# params['html'] = html
results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print (json.dumps(results, indent=2))