From ca4276e93ba754fce0cfa32cfeb57e1a2c3bb9bf Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Thu, 22 Feb 2018 15:40:27 +0100 Subject: [PATCH] index magic --- etherdump/commands/index.py | 146 ++++++++++++++++++++++++---- etherdump/data/templates/index.html | 2 +- 2 files changed, 129 insertions(+), 19 deletions(-) diff --git a/etherdump/commands/index.py b/etherdump/commands/index.py index 79d3631..db86c5c 100644 --- a/etherdump/commands/index.py +++ b/etherdump/commands/index.py @@ -1,7 +1,8 @@ from __future__ import print_function from argparse import ArgumentParser -import sys, json, re, os +import sys, json, re, os, time from datetime import datetime +import dateutil.parser try: # python2 @@ -27,6 +28,7 @@ index: """ def group (items, key=lambda x: x): + """ returns a list of lists, of items grouped by a key function """ ret = [] keys = {} for item in items: @@ -39,8 +41,19 @@ def group (items, key=lambda x: x): ret.append(keys[k]) return ret +# def base (x): +# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) + +def splitextlong (x): + """ split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """ + m = re.search(r"^(.*?)(\..*)$", x) + if m: + return m.groups() + else: + return x, '' + def base (x): - return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) + return splitextlong(x)[0] def excerpt (t, chars=25): if len(t) > chars: @@ -60,10 +73,18 @@ def url_base (url): ret += "/" return ret +def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'): + if type(t) == str: + dt = dateutil.parser.parse(t) + return dt.strftime(format) + else: + return time.strftime(format, time.localtime(t)) + def main (args): p = ArgumentParser("Convert dumped files to a document via a template.") - p.add_argument("input", nargs="+", help="filenames (uses .meta.json files)") + p.add_argument("input", nargs="+", help="Files to list (.meta.json files)") + p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in") p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html") p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: ./.etherdump/settings.json") @@ -103,19 +124,55 @@ def main (args): env = Environment(loader=FileSystemLoader(tmpath)) env.filters["excerpt"] = excerpt + env.filters["datetimeformat"] = datetimeformat template = env.get_template(args.template) info = loadpadinfo(args.padinfo) inputs = args.input inputs.sort() - inputs = group(inputs, base) + # Use "base" to strip (longest) extensions + # inputs = group(inputs, base) - def loadmeta(paths): + def wrappath (p): + path = "./{0}".format(p) + ext = os.path.splitext(p)[1][1:] + return { + "url": path, + "path": path, + "code": 200, + "type": ext + } + + def metaforpaths (paths): + ret = {} + pid = base(paths[0]) + ret['pad'] = ret['padid'] = pid + ret['versions'] = [wrappath(x) for x in paths] + lastedited = None for p in paths: - if p.endswith(".meta.json"): - with open(p) as f: - return json.load(f) + mtime = os.stat(p).st_mtime + if lastedited == None or mtime > lastedited: + lastedited = mtime + ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S") + ret["lastedited_raw"] = mtime + return ret + + def loadmeta(p): + # Consider a set of grouped files + # Otherwise, create a "dummy" one that wraps all the files as versions + if p.endswith(".meta.json"): + with open(p) as f: + return json.load(f) + # # IF there is a .meta.json, load it & MERGE with other files + # if ret: + # # TODO: merge with other files + # for p in paths: + # if "./"+p not in ret['versions']: + # ret['versions'].append(wrappath(p)) + # return ret + # else: + # return metaforpaths(paths) def fixdates (padmeta): d = dateutil.parser.parse(padmeta["lastedited_iso"]) @@ -124,9 +181,59 @@ def main (args): return padmeta pads = map(loadmeta, inputs) + pads = [x for x in pads if x != None] pads = map(fixdates, pads) args.pads = list(pads) + inputs = args.input + inputs.sort() + removelist = [] + + def has_version (padinfo, path): + return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path] + + pads_by_base = {} + for p in args.pads: + # print ("Trying padid", p['padid'], file=sys.stderr) + padbase = os.path.splitext(p['padid'])[0] + pads_by_base[padbase] = p + padbases = list(pads_by_base.keys()) + # SORT THEM LONGEST FIRST TO ensure that LONGEST MATCHES MATCH + padbases.sort(key=lambda x: len(x), reverse=True) + # print ("PADBASES", file=sys.stderr) + # for pb in padbases: + # print (" ", pb, file=sys.stderr) + + def could_have_base (x, y): + return x == y or (x.startswith(y) and x[len(y):].startswith(".")) + + def get_best_pad (x): + for pb in padbases: + p = pads_by_base[pb] + if could_have_base(x, pb): + return p + + for x in inputs: + # pair input with a pad if possible + p = get_best_pad(x) + if p: + if not has_version(p, x): + print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr) + p['versions'].append(wrappath(x)) + # else: + # print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr) + removelist.append(x) + # Removed Matches files + for x in removelist: + inputs.remove(x) + # print ("Remaining files:", file=sys.stderr) + # for x in inputs: + # print (x, file=sys.stderr) + # print (file=sys.stderr) + # Add "fake" pads for remaining files + for x in inputs: + args.pads.append(metaforpaths([x])) + if args.timestamp == None: args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -165,18 +272,21 @@ def main (args): for v in p["versions"]: t = v["type"] versions_by_type[t] = v - with open (versions_by_type["text"]["path"]) as f: - p["text"] = f.read() - # ADD IN LINK + if "text" in versions_by_type: + with open (versions_by_type["text"]["path"]) as f: + p["text"] = f.read() + + # ADD IN LINK TO PAD AS "link" for v in linkversions: - vdata = versions_by_type[v] - try: - if v == "pad" or os.path.exists(vdata["path"]): - p["link"] = absurl(vdata["url"], linkbase) - break - except KeyError as e: - pass + if v in versions_by_type: + vdata = versions_by_type[v] + try: + if v == "pad" or os.path.exists(vdata["path"]): + p["link"] = absurl(vdata["url"], linkbase) + break + except KeyError as e: + pass if args.output: with open(args.output, "w") as f: diff --git a/etherdump/data/templates/index.html b/etherdump/data/templates/index.html index e4192e2..2804830 100644 --- a/etherdump/data/templates/index.html +++ b/etherdump/data/templates/index.html @@ -92,7 +92,7 @@ $(document).ready(function() {% for v in pad.versions %}{{v.type}} {% endfor %} - {{ pad.lastedited_iso|replace("T", " ") }} + {{ pad.lastedited_iso|datetimeformat }} {{ pad.revisions }} {{ pad.author_ids|length }}