nnnotes/parse/parse.py
2014-08-18 18:38:12 +02:00

119 lines
2.9 KiB
Python
Executable File

#!/usr/bin/env python
from statemachine import StateMachine
import sys, string, re, json
markups = {'QUOTES' : ('PAGE', 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : ('NOTE', '#', 'tags', 'note', 'fpc')}
output = {'QUOTES' : [], 'NOTES' : []}
def error(cargo):
# Don't want to get here! Unidentifiable line
sys.stderr.write('Unidentifiable line:\n'+ line)
def eof(cargo):
# Normal termination -- Cleanup code might go here.
sys.stdout.write('Processing Successful\n')
fpindx = open('.indx','wb')
json.dump(output, fpindx)
def parse(c):
fp, l = c
while 1:
line = fp.readline()
if not line: return eof, (fp, line)
if line[:2] == '##': return section(line), (fp, line)
else: continue
def QUOTES(c):
fp, l = c
sys.stdout.write('QUOTES\n')
while 1:
line = fp.readline()
if not line: return eof, (fp, line)
elif string.find(string.upper(line), 'PAGE') >= 0: return segment, (fp, line, 'QUOTES', markups['QUOTES'])
else: continue
def NOTES(c):
fp, l = c
sys.stdout.write('NOTES\n')
while 1:
line = fp.readline()
if not line: return eof, (fp, line)
elif string.find(string.upper(line), 'NOTE') >= 0: return segment, (fp, line, 'NOTES', markups['NOTES'])
else: continue
def segment(c):
fp, l, sect, mk = c
m, x, tt, y, cnt = mk
c = ''
t = []
q = ''
cc = ''
# identifier
c = ext_identifier(l)
while 1:
cursor = fp.tell()
line = fp.readline()
if not line:
# transition: EOF - record entry
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
return eof, (fp, line)
elif string.find(string.upper(line), m) >= 0:
# transition: new segment - record entry
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
return segment, (fp, line, sect, mk)
elif line[:1] == '<':
# tags
t += ext_tags(line)
continue
elif line[:2] == '##':
# transition: new section - record entry
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
return section(line), (fp, line)
elif line == '\n' :
continue
else:
# text
if not cc:
cc = cursor
q += line
continue
## helper fncts
def section(line):
line = string.upper(line)
if string.find(line, 'NOTES') >= 0: return NOTES
elif string.find(line, 'QUOTES') >= 0: return QUOTES
elif string.find(line, 'REFERENCE') >= 0: return parse
else: return error
def ext_identifier(line):
b = string.rsplit(line)
return b[-1]
def ext_tags(line):
line = line.rstrip('\n').replace(' ','')
t = re.split('<|>', line)
return [v for v in t if v]
def rec_segment(idf, tags, text, cnt, mk):
if not text:
print 'hmm... no quote on pp.', c
return None
section_i, idf_i, tags_i, text_i, cnt_i = mk
entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
output[section_i].append(entry)
if __name__ == '__main__':
m = StateMachine();
m.add_state(parse)
m.add_state(NOTES)
m.add_state(QUOTES)
m.add_state(segment)
m.add_state(error, end_state=1)
m.add_state(eof, end_state=1)
m.set_start(parse)
m.run((sys.stdin, ''))