haha! commit
This commit is contained in:
Executable
+118
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from statemachine import StateMachine
|
||||
import sys, string, re, json
|
||||
|
||||
markups = {'QUOTES' : ('PAGE', 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : ('NOTE', '#', 'tags', 'note', 'fpc')}
|
||||
output = {'QUOTES' : [], 'NOTES' : []}
|
||||
|
||||
|
||||
def error(cargo):
|
||||
# Don't want to get here! Unidentifiable line
|
||||
sys.stderr.write('Unidentifiable line:\n'+ line)
|
||||
|
||||
def eof(cargo):
|
||||
# Normal termination -- Cleanup code might go here.
|
||||
sys.stdout.write('Processing Successful\n')
|
||||
fpindx = open('.indx','wb')
|
||||
json.dump(output, fpindx)
|
||||
|
||||
def parse(c):
|
||||
fp, l = c
|
||||
while 1:
|
||||
line = fp.readline()
|
||||
if not line: return eof, (fp, line)
|
||||
if line[:2] == '##': return section(line), (fp, line)
|
||||
else: continue
|
||||
|
||||
def QUOTES(c):
|
||||
fp, l = c
|
||||
sys.stdout.write('QUOTES\n')
|
||||
while 1:
|
||||
line = fp.readline()
|
||||
if not line: return eof, (fp, line)
|
||||
elif string.find(string.upper(line), 'PAGE') >= 0: return segment, (fp, line, 'QUOTES', markups['QUOTES'])
|
||||
else: continue
|
||||
|
||||
def NOTES(c):
|
||||
fp, l = c
|
||||
sys.stdout.write('NOTES\n')
|
||||
while 1:
|
||||
line = fp.readline()
|
||||
if not line: return eof, (fp, line)
|
||||
elif string.find(string.upper(line), 'NOTE') >= 0: return segment, (fp, line, 'NOTES', markups['NOTES'])
|
||||
else: continue
|
||||
|
||||
def segment(c):
|
||||
fp, l, sect, mk = c
|
||||
m, x, tt, y, cnt = mk
|
||||
c = ''
|
||||
t = []
|
||||
q = ''
|
||||
cc = ''
|
||||
# identifier
|
||||
c = ext_identifier(l)
|
||||
while 1:
|
||||
cursor = fp.tell()
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
# transition: EOF - record entry
|
||||
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
||||
return eof, (fp, line)
|
||||
elif string.find(string.upper(line), m) >= 0:
|
||||
# transition: new segment - record entry
|
||||
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
||||
return segment, (fp, line, sect, mk)
|
||||
elif line[:1] == '<':
|
||||
# tags
|
||||
t += ext_tags(line)
|
||||
continue
|
||||
elif line[:2] == '##':
|
||||
# transition: new section - record entry
|
||||
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
||||
return section(line), (fp, line)
|
||||
elif line == '\n' :
|
||||
continue
|
||||
else:
|
||||
# text
|
||||
if not cc:
|
||||
cc = cursor
|
||||
q += line
|
||||
continue
|
||||
|
||||
## helper fncts
|
||||
def section(line):
|
||||
line = string.upper(line)
|
||||
if string.find(line, 'NOTES') >= 0: return NOTES
|
||||
elif string.find(line, 'QUOTES') >= 0: return QUOTES
|
||||
elif string.find(line, 'REFERENCE') >= 0: return parse
|
||||
else: return error
|
||||
|
||||
def ext_identifier(line):
|
||||
b = string.rsplit(line)
|
||||
return b[-1]
|
||||
|
||||
def ext_tags(line):
|
||||
line = line.rstrip('\n').replace(' ','')
|
||||
t = re.split('<|>', line)
|
||||
return [v for v in t if v]
|
||||
|
||||
def rec_segment(idf, tags, text, cnt, mk):
|
||||
if not text:
|
||||
print 'hmm... no quote on pp.', c
|
||||
return None
|
||||
section_i, idf_i, tags_i, text_i, cnt_i = mk
|
||||
entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
|
||||
output[section_i].append(entry)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
m = StateMachine();
|
||||
m.add_state(parse)
|
||||
m.add_state(NOTES)
|
||||
m.add_state(QUOTES)
|
||||
m.add_state(segment)
|
||||
m.add_state(error, end_state=1)
|
||||
m.add_state(eof, end_state=1)
|
||||
m.set_start(parse)
|
||||
m.run((sys.stdin, ''))
|
||||
@@ -0,0 +1,32 @@
|
||||
class InitializationError(Exception): pass
|
||||
class StateMachine:
|
||||
def __init__(self):
|
||||
self.handlers = []
|
||||
self.startState = None
|
||||
self.endStates = []
|
||||
|
||||
def add_state(self, handler, end_state=0):
|
||||
self.handlers.append(handler)
|
||||
if end_state:
|
||||
self.endStates.append(handler)
|
||||
|
||||
def set_start(self, handler):
|
||||
self.startState = handler
|
||||
|
||||
def run(self, cargo=None):
|
||||
if not self.startState:
|
||||
raise InitializationError,\
|
||||
"must call .set_start() before .run()"
|
||||
if not self.endStates:
|
||||
raise InitializationError, \
|
||||
"at least one state must be an end_state"
|
||||
handler = self.startState
|
||||
while 1:
|
||||
(newState, cargo) = handler(cargo)
|
||||
if newState in self.endStates:
|
||||
newState(cargo)
|
||||
break
|
||||
elif newState not in self.handlers:
|
||||
raise RuntimeError, "Invalid target %s" % newState
|
||||
else:
|
||||
handler = newState
|
||||
Reference in New Issue
Block a user