updated sethtml and new pushhtml

This commit is contained in:
Michael Murtaugh 2019-06-07 16:52:02 +02:00
parent 4a8219b48c
commit 17fb11fae2
4 changed files with 101 additions and 56 deletions

View File

@ -1,8 +1,12 @@
from __future__ import print_function
from argparse import ArgumentParser
import json
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
# from urllib import urlencode
# from urllib2 import urlopen, HTTPError, URLError
def main(args):
@ -27,8 +31,8 @@ def main(args):
if args.showurl:
print (requesturl)
else:
results = json.load(urlopen(requesturl))['data']
results = json.loads(urlopen(requesturl).read().decode("utf-8"))['data']
if args.format == "json":
print (json.dumps(results))
else:
print (results['html'].encode("utf-8"))
print (results['html'])

View File

@ -244,11 +244,15 @@ def main (args):
html = html['data']['html']
ver["path"] = p+".raw.html"
ver["url"] = quote(ver["path"])
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
# JUN 2016: chaning to save REALLY the RAW / unchanged HTML from the API
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
print(html, file=f)
# doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
# html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
# with open(ver["path"], "w") as f:
# # f.write(html.encode("utf-8"))
# print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
# output meta
if args.all or args.meta:

View File

@ -0,0 +1,33 @@
from etherdump.commands.sethtml import sethtml, pushhtml
import argparse
import os, sys
import json
def main(args):
p = argparse.ArgumentParser("""Indiscriminantly PUSH the contents of dumped html files to an etherpad, clobbering any existing content!""")
p.add_argument("input", nargs="+", help="Metadata files, e.g. *.meta.json")
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument("--basepath", default=".")
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = info.get("localapiurl", info["apiurl"])
apikey = info['apikey']
for n in args.input:
with open(n) as f:
meta = json.load(f)
for v in meta['versions']:
if v['type'] == 'html':
path = v['path']
if args.basepath:
path = os.path.join(args.basepath, path)
break
padid = meta['padid']
with open(path) as f:
htmlsrc = f.read()
print ("Pushing {0} to {1}".format(path, padid), file=sys.stderr)
pushhtml(apiurl, apikey, padid, htmlsrc)

View File

@ -1,15 +1,50 @@
from __future__ import print_function
from argparse import ArgumentParser
import json, sys
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
import requests
from urllib.request import urlopen
from urllib.parse import urlencode
import json
import re
import argparse
LIMIT_BYTES = 100*1000
# HOST_PORT="localhost:9001"
# APIKEY="439afe700152ed5f5cdc43e9bf0a6ab0697c422db0e7277d43f2e1af4f155d79"
def create_pad (apiurl, apikey, padid):
# url = "http://{0}/api/1/createPad".format(hostport)
url = apiurl + "createPad"
data = (
('apikey', apikey),
('padID', padid),
)
f = urlopen(url, data=urlencode(data).encode("utf-8"))
return json.loads(f.read().decode("utf-8"))
def sethtml (apiurl, apikey, padid, html):
# strip the (initial) title tag
html = re.sub(r"<title>.*?</title>", "", html, 1, re.I)
data = (
('apikey', apikey),
('padID', padid),
('html', html)
)
# url = "http://{0}/api/1/setHTML".format(hostport)
url = apiurl + "setHTML"
data = urlencode(data).encode("utf-8")
f = urlopen(url, data=data)
return json.loads(f.read().decode("utf-8"))
def pushhtml (apiurl, apikey, padid, html):
""" Use sethtml, call createPad if necessary """
resp = sethtml(apiurl, apikey, padid, html)
if resp['code'] == 1:
# print ("ERROR {0}, trying to create pad first".format(resp['message']))
create_pad(apiurl, apikey, padid)
resp = sethtml(apiurl, apikey, padid, html)
return resp
def main(args):
p = ArgumentParser("calls the setHTML API function for the given padid")
p = argparse.ArgumentParser("calls the setHTML API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--html", default=None, help="html, default: read from stdin")
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
@ -21,46 +56,15 @@ def main(args):
with open(args.padinfo) as f:
info = json.load(f)
apiurl = info.get("apiurl")
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
# data = {}
# data['apikey'] = info['apikey']
# data['padID'] = args.padid # is utf-8 encoded
createPad = False
apiurl = info.get("localapiurl", info["apiurl"])
apikey = info['apikey']
with open(args.html) as f:
htmlsrc = f.read()
# print (type(htmlsrc))
if args.create:
# check if it's in fact necessary
requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid})
results = json.load(urlopen(requesturl))
print (json.dumps(results, indent=2), file=sys.stderr)
if results['code'] != 0:
createPad = True
if args.html:
html = args.html
resp = pushhtml(apiurl, apikey, args.padid, htmlsrc)
else:
html = sys.stdin.read()
params = {}
params['apikey'] = info['apikey']
params['padID'] = args.padid
if createPad:
requesturl = apiurl+'createPad'
if args.showurl:
print (requesturl)
results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print (json.dumps(results, indent=2))
if len(html) > LIMIT_BYTES and args.limit:
print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
html = html[:LIMIT_BYTES]
requesturl = apiurl+'setHTML'
if args.showurl:
print (requesturl)
# params['html'] = html
results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print (json.dumps(results, indent=2))
resp = sethtml(apiurl, apikey, args.padid, htmlsrc)
print (resp)