From a6e704c3f8f5d1243a072c5fbfff56c9c55d9936 Mon Sep 17 00:00:00 2001 From: gauthiier Date: Wed, 20 Aug 2014 22:08:24 +0200 Subject: [PATCH] added parse-skim.py + cleaned parse.py --- parse/parse-skim.py | 77 +++++++++++++++++++++++++++++++++++++++++++++ parse/parse.py | 3 -- 2 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 parse/parse-skim.py diff --git a/parse/parse-skim.py b/parse/parse-skim.py new file mode 100644 index 0000000..be7ba92 --- /dev/null +++ b/parse/parse-skim.py @@ -0,0 +1,77 @@ +#!/usr/bin/python + +from statemachine import StateMachine +import sys, string, json + +output = {'QUOTES' : [], 'NOTES' : []} + +def error(c): + fp, l = c + sys.stderr.write('Unidentifiable line:\n'+ l) + +def eof(c): + print json.dumps(output, sys.stdout) + +def parse(c): + fp, l = c + while 1: + line = fp.readline() + if not line: return eof, (fp, line) + if line[:1] == '*': return section(line), (fp, line) + else: continue + +def highlight(c): + fp, l = c + p = page(l) + text = fp.readline() + output['QUOTES'].append({'pp' : p, 'quote' : text}) + return parse(c) + +def anchored_note(c): + fp, l = c + p = page(l) + text = fp.readline() + fp.readline() + note = fp.readline() + output['QUOTES'].append({'pp' : p, 'quote' : text}) + output['NOTES'].append({'pp' : p, 'note' : note}) + return parse(c) + +def box(c): + fp, l = c + p = page(l) + text = fp.readline() + output['QUOTES'].append({'pp' : p, 'quote' : text}) + return parse(c) + +def text_note(c): + fp, l = c + p = page(l) + text = fp.readline() + output['NOTES'].append({'pp' : p, 'note' : text}) + return parse(c) + +## helper fncts +def section(line): + line = string.upper(line) + if string.find(line, 'HIGHLIGHT') >= 0: return highlight + elif string.find(line, 'ANCHORED NOTE') >= 0: return anchored_note + elif string.find(line, 'BOX') >= 0: return box + elif string.find(line, 'TEXT NOTE') >= 0: return text_note + else: return parse + +def page(line): + return line.rstrip('\n').split(',')[1][-1] + +if __name__ == '__main__': + m = StateMachine(); + m.add_state(parse) + m.add_state(highlight) + m.add_state(anchored_note) + m.add_state(box) + m.add_state(text_note) + m.add_state(error, end_state=1) + m.add_state(eof, end_state=1) + m.set_start(parse) + m.run((sys.stdin, '')) + diff --git a/parse/parse.py b/parse/parse.py index 6ebdae5..322a8fd 100755 --- a/parse/parse.py +++ b/parse/parse.py @@ -9,12 +9,9 @@ output = {'QUOTES' : [], 'NOTES' : []} def error(c): fp, l = c - # Don't want to get here! Unidentifiable line sys.stderr.write('Unidentifiable line:\n'+ l) def eof(c): - # Normal termination -- Cleanup code might go here. - sys.stdout.write('Processing Successful\n') fpindx = open('.indx','wb') json.dump(output, fpindx)