added parse-skim.py + cleaned parse.py

This commit is contained in:
gauthiier 2014-08-20 22:08:24 +02:00
parent e3164b8147
commit a6e704c3f8
2 changed files with 77 additions and 3 deletions

77
parse/parse-skim.py Normal file
View File

@ -0,0 +1,77 @@
#!/usr/bin/python
from statemachine import StateMachine
import sys, string, json
output = {'QUOTES' : [], 'NOTES' : []}
def error(c):
fp, l = c
sys.stderr.write('Unidentifiable line:\n'+ l)
def eof(c):
print json.dumps(output, sys.stdout)
def parse(c):
fp, l = c
while 1:
line = fp.readline()
if not line: return eof, (fp, line)
if line[:1] == '*': return section(line), (fp, line)
else: continue
def highlight(c):
fp, l = c
p = page(l)
text = fp.readline()
output['QUOTES'].append({'pp' : p, 'quote' : text})
return parse(c)
def anchored_note(c):
fp, l = c
p = page(l)
text = fp.readline()
fp.readline()
note = fp.readline()
output['QUOTES'].append({'pp' : p, 'quote' : text})
output['NOTES'].append({'pp' : p, 'note' : note})
return parse(c)
def box(c):
fp, l = c
p = page(l)
text = fp.readline()
output['QUOTES'].append({'pp' : p, 'quote' : text})
return parse(c)
def text_note(c):
fp, l = c
p = page(l)
text = fp.readline()
output['NOTES'].append({'pp' : p, 'note' : text})
return parse(c)
## helper fncts
def section(line):
line = string.upper(line)
if string.find(line, 'HIGHLIGHT') >= 0: return highlight
elif string.find(line, 'ANCHORED NOTE') >= 0: return anchored_note
elif string.find(line, 'BOX') >= 0: return box
elif string.find(line, 'TEXT NOTE') >= 0: return text_note
else: return parse
def page(line):
return line.rstrip('\n').split(',')[1][-1]
if __name__ == '__main__':
m = StateMachine();
m.add_state(parse)
m.add_state(highlight)
m.add_state(anchored_note)
m.add_state(box)
m.add_state(text_note)
m.add_state(error, end_state=1)
m.add_state(eof, end_state=1)
m.set_start(parse)
m.run((sys.stdin, ''))

View File

@ -9,12 +9,9 @@ output = {'QUOTES' : [], 'NOTES' : []}
def error(c): def error(c):
fp, l = c fp, l = c
# Don't want to get here! Unidentifiable line
sys.stderr.write('Unidentifiable line:\n'+ l) sys.stderr.write('Unidentifiable line:\n'+ l)
def eof(c): def eof(c):
# Normal termination -- Cleanup code might go here.
sys.stdout.write('Processing Successful\n')
fpindx = open('.indx','wb') fpindx = open('.indx','wb')
json.dump(output, fpindx) json.dump(output, fpindx)