updated sethtml and new pushhtml
This commit is contained in:
parent
4a8219b48c
commit
17fb11fae2
@ -1,8 +1,12 @@
|
||||
from __future__ import print_function
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
from urllib import urlencode
|
||||
from urllib2 import urlopen, HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
# from urllib import urlencode
|
||||
# from urllib2 import urlopen, HTTPError, URLError
|
||||
|
||||
|
||||
def main(args):
|
||||
@ -27,8 +31,8 @@ def main(args):
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
else:
|
||||
results = json.load(urlopen(requesturl))['data']
|
||||
results = json.loads(urlopen(requesturl).read().decode("utf-8"))['data']
|
||||
if args.format == "json":
|
||||
print (json.dumps(results))
|
||||
else:
|
||||
print (results['html'].encode("utf-8"))
|
||||
print (results['html'])
|
||||
|
||||
@ -244,11 +244,15 @@ def main (args):
|
||||
html = html['data']['html']
|
||||
ver["path"] = p+".raw.html"
|
||||
ver["url"] = quote(ver["path"])
|
||||
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
|
||||
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
|
||||
# JUN 2016: chaning to save REALLY the RAW / unchanged HTML from the API
|
||||
with open(ver["path"], "w") as f:
|
||||
# f.write(html.encode("utf-8"))
|
||||
print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
|
||||
print(html, file=f)
|
||||
|
||||
# doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
|
||||
# html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
|
||||
# with open(ver["path"], "w") as f:
|
||||
# # f.write(html.encode("utf-8"))
|
||||
# print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
|
||||
|
||||
# output meta
|
||||
if args.all or args.meta:
|
||||
|
||||
33
etherdump/commands/pushhtml.py
Normal file
33
etherdump/commands/pushhtml.py
Normal file
@ -0,0 +1,33 @@
|
||||
from etherdump.commands.sethtml import sethtml, pushhtml
|
||||
import argparse
|
||||
import os, sys
|
||||
import json
|
||||
|
||||
|
||||
def main(args):
|
||||
p = argparse.ArgumentParser("""Indiscriminantly PUSH the contents of dumped html files to an etherpad, clobbering any existing content!""")
|
||||
p.add_argument("input", nargs="+", help="Metadata files, e.g. *.meta.json")
|
||||
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument("--basepath", default=".")
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
info = json.load(f)
|
||||
|
||||
apiurl = info.get("localapiurl", info["apiurl"])
|
||||
apikey = info['apikey']
|
||||
for n in args.input:
|
||||
with open(n) as f:
|
||||
meta = json.load(f)
|
||||
for v in meta['versions']:
|
||||
if v['type'] == 'html':
|
||||
path = v['path']
|
||||
if args.basepath:
|
||||
path = os.path.join(args.basepath, path)
|
||||
break
|
||||
padid = meta['padid']
|
||||
with open(path) as f:
|
||||
htmlsrc = f.read()
|
||||
print ("Pushing {0} to {1}".format(path, padid), file=sys.stderr)
|
||||
pushhtml(apiurl, apikey, padid, htmlsrc)
|
||||
|
||||
@ -1,15 +1,50 @@
|
||||
from __future__ import print_function
|
||||
from argparse import ArgumentParser
|
||||
import json, sys
|
||||
from urllib import urlencode
|
||||
from urllib2 import urlopen, HTTPError, URLError
|
||||
import requests
|
||||
from urllib.request import urlopen
|
||||
from urllib.parse import urlencode
|
||||
import json
|
||||
import re
|
||||
import argparse
|
||||
|
||||
|
||||
LIMIT_BYTES = 100*1000
|
||||
# HOST_PORT="localhost:9001"
|
||||
# APIKEY="439afe700152ed5f5cdc43e9bf0a6ab0697c422db0e7277d43f2e1af4f155d79"
|
||||
|
||||
def create_pad (apiurl, apikey, padid):
|
||||
# url = "http://{0}/api/1/createPad".format(hostport)
|
||||
url = apiurl + "createPad"
|
||||
data = (
|
||||
('apikey', apikey),
|
||||
('padID', padid),
|
||||
)
|
||||
f = urlopen(url, data=urlencode(data).encode("utf-8"))
|
||||
return json.loads(f.read().decode("utf-8"))
|
||||
|
||||
|
||||
def sethtml (apiurl, apikey, padid, html):
|
||||
# strip the (initial) title tag
|
||||
html = re.sub(r"<title>.*?</title>", "", html, 1, re.I)
|
||||
data = (
|
||||
('apikey', apikey),
|
||||
('padID', padid),
|
||||
('html', html)
|
||||
)
|
||||
# url = "http://{0}/api/1/setHTML".format(hostport)
|
||||
url = apiurl + "setHTML"
|
||||
data = urlencode(data).encode("utf-8")
|
||||
f = urlopen(url, data=data)
|
||||
return json.loads(f.read().decode("utf-8"))
|
||||
|
||||
def pushhtml (apiurl, apikey, padid, html):
|
||||
""" Use sethtml, call createPad if necessary """
|
||||
resp = sethtml(apiurl, apikey, padid, html)
|
||||
if resp['code'] == 1:
|
||||
# print ("ERROR {0}, trying to create pad first".format(resp['message']))
|
||||
create_pad(apiurl, apikey, padid)
|
||||
resp = sethtml(apiurl, apikey, padid, html)
|
||||
return resp
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("calls the setHTML API function for the given padid")
|
||||
p = argparse.ArgumentParser("calls the setHTML API function for the given padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--html", default=None, help="html, default: read from stdin")
|
||||
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
@ -21,46 +56,15 @@ def main(args):
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
info = json.load(f)
|
||||
apiurl = info.get("apiurl")
|
||||
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||
# data = {}
|
||||
# data['apikey'] = info['apikey']
|
||||
# data['padID'] = args.padid # is utf-8 encoded
|
||||
|
||||
createPad = False
|
||||
apiurl = info.get("localapiurl", info["apiurl"])
|
||||
apikey = info['apikey']
|
||||
|
||||
with open(args.html) as f:
|
||||
htmlsrc = f.read()
|
||||
# print (type(htmlsrc))
|
||||
if args.create:
|
||||
# check if it's in fact necessary
|
||||
requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid})
|
||||
results = json.load(urlopen(requesturl))
|
||||
print (json.dumps(results, indent=2), file=sys.stderr)
|
||||
if results['code'] != 0:
|
||||
createPad = True
|
||||
|
||||
if args.html:
|
||||
html = args.html
|
||||
resp = pushhtml(apiurl, apikey, args.padid, htmlsrc)
|
||||
else:
|
||||
html = sys.stdin.read()
|
||||
|
||||
params = {}
|
||||
params['apikey'] = info['apikey']
|
||||
params['padID'] = args.padid
|
||||
|
||||
if createPad:
|
||||
requesturl = apiurl+'createPad'
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl))
|
||||
results = json.loads(results.text)
|
||||
print (json.dumps(results, indent=2))
|
||||
|
||||
if len(html) > LIMIT_BYTES and args.limit:
|
||||
print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
|
||||
html = html[:LIMIT_BYTES]
|
||||
|
||||
requesturl = apiurl+'setHTML'
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
# params['html'] = html
|
||||
results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl))
|
||||
results = json.loads(results.text)
|
||||
print (json.dumps(results, indent=2))
|
||||
resp = sethtml(apiurl, apikey, args.padid, htmlsrc)
|
||||
print (resp)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user