This commit is contained in:
Michael Murtaugh
2015-12-04 17:17:32 +01:00
parent 568a8f0790
commit 8d5ebd6f01
5 changed files with 257 additions and 5 deletions
+23
View File
@@ -0,0 +1,23 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, os
def main(args):
p = ArgumentParser("")
p.add_argument("input", nargs="+", help="filenames")
p.add_argument("--indent", type=int, default=2, help="indent")
args = p.parse_args(args)
inputs = args.input
inputs.sort()
ret = []
for p in inputs:
with open(p) as f:
meta = json.load(f)
ret.append(meta)
if args.indent:
print (json.dumps(ret, indent=args.indent))
else:
print (json.dumps(ret))
+3
View File
@@ -11,6 +11,9 @@ def splitpadname (padid):
else:
return (u"", padid)
def padurl (padid, ):
return padid
def padpath (padid, pub_path=u"", group_path=u""):
g, p = splitpadname(padid)
if type(g) == unicode:
+60
View File
@@ -0,0 +1,60 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, os, re
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from jinja2 import FileSystemLoader, Environment
def group (items, key=lambda x: x):
ret = []
keys = {}
for item in items:
k = key(item)
if k not in keys:
keys[k] = []
keys[k].append(item)
for k in sorted(keys):
keys[k].sort()
ret.append(keys[k])
return ret
def main(args):
p = ArgumentParser("")
p.add_argument("input", nargs="+", help="filenames")
p.add_argument("--templates", default=None, help="templates path")
args = p.parse_args(args)
tmpath = args.templates
if tmpath == None:
tmpath = os.path.split(os.path.abspath(__file__))[0]
tmpath = os.path.split(tmpath)[0]
tmpath = os.path.join(tmpath, "data", "templates")
env = Environment(loader=FileSystemLoader(tmpath))
template = env.get_template("pad_index.html")
inputs = args.input
inputs.sort()
inputs = [x for x in inputs if os.path.isdir(x)]
def base (x):
return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
# TODO: MODIFY THIS TO MAKE THE OUTPUT JOINABLE with the collected META DATA
# evt: how can the metadata become a GRAPH structure!!! with each output DOCUMENT
#
print ("<ol>")
for x in inputs:
padid = x
metapath = os.path.join(x, "{0}.meta.json".format(padid))
if os.path.exists(metapath):
print ("""<li><a href="{0}">{0}</a></li>""".format(x))
with open(metapath) as f:
meta = json.load(f)
indexpath = os.path.join(x, "index.html")
with open(indexpath, "w") as f:
print (template.render(**meta).encode("utf-8"), file=f)
print ("</ol>")
+43 -5
View File
@@ -14,6 +14,11 @@ pull(meta):
Update meta data files for those that have changed.
Check for changed pads by looking at revisions & comparing to existing
todo...
use/prefer public interfaces ? (export functions)
"""
def main (args):
@@ -29,6 +34,9 @@ def main (args):
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False")
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
p.add_argument("--folder", default=False, action="store_true", help="dump files to folder named PADID (meta, text, html, dhtml), default: False")
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout")
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous")
args = p.parse_args(args)
info = loadpadinfo(args.padinfo)
@@ -44,27 +52,51 @@ def main (args):
# maxmsglen = 0
count = 0
for i, padid in enumerate(padids):
# TODO...
"""
Self-containted documents / and/or document receipts
storing enough information to reconstruct (or understand an error occurred)
"""
if args.skip != None and i<args.skip:
continue
progressbar(i, numpads, padid)
data['padID'] = padid.encode("utf-8")
p = padpath(padid, args.pub, args.group)
if args.folder:
try:
os.makedirs(p)
except OSError:
pass
p = os.path.join(p, padid.encode("utf-8"))
metapath = p + ".meta.json"
revisions = None
tries = 1
skip = False
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
if type(padurlbase) == unicode:
padurlbase = padurlbase.encode("utf-8")
while True:
try:
if os.path.exists(metapath):
with open(metapath) as f:
meta = json.load(f)
revisions = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
if meta['revisions'] == revisions:
if meta['revisions'] == revisions and not args.force:
skip=True
break
## TODO: OUTPUT TO DIRECTORIES with DATA EMBEDDED IN DOCUMENTS
## (or else in surrounding meta data!!)
meta = {'padid': padid.encode("utf-8")}
# this should be less of a hack
# TODO TEST!!!
meta["padurl"] = padurlbase + padid.encode("utf-8")
if revisions == None:
meta['revisions'] = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
else:
@@ -76,7 +108,7 @@ def main (args):
break
# todo: load more metadata!
meta['pad'], meta['group'] = splitpadname(padid)
meta['group'], meta['pad'] = splitpadname(padid)
meta['pathbase'] = p
meta['lastedited_raw'] = int(getjson(info['apiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
@@ -88,13 +120,16 @@ def main (args):
print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr)
skip=True
break
else:
sleep(3)
if skip:
continue
count += 1
print (padid.encode("utf-8"))
if args.output:
print (padid.encode("utf-8"))
if args.all or (args.meta or args.text or args.html or args.dhtml):
try:
@@ -104,7 +139,7 @@ def main (args):
if args.all or args.meta:
with open(metapath, "w") as f:
json.dump(meta, f)
json.dump(meta, f, indent=2)
# Process text, html, dhtml, all options
if args.all or args.text:
@@ -112,6 +147,9 @@ def main (args):
text = text['data']['text']
with open(p+".txt", "w") as f:
f.write(text.encode("utf-8"))
# once the content is settled, compute a hash
# and link it in the metadata!
if args.all or args.html:
html = getjson(info['apiurl']+'getHTML?'+urlencode(data))
@@ -143,4 +181,4 @@ def main (args):
else:
sleep(0.1)
print("\n{0} pad(s) changed".format(count), file=sys.stderr)
print("\n{0} pad(s) loaded".format(count), file=sys.stderr)