new nnnotes and qqquotes identifiers
This commit is contained in:
parent
c3962aa5ad
commit
e88d0d1973
@ -7,6 +7,7 @@ fileout = ''
|
|||||||
fileref = ''
|
fileref = ''
|
||||||
notes = []
|
notes = []
|
||||||
quotes = []
|
quotes = []
|
||||||
|
notes_cnt = 0
|
||||||
|
|
||||||
def error(c):
|
def error(c):
|
||||||
fp, l = c
|
fp, l = c
|
||||||
@ -33,7 +34,7 @@ def QUOTES(c):
|
|||||||
if not line:
|
if not line:
|
||||||
emit_remaining_quotes()
|
emit_remaining_quotes()
|
||||||
return eof, (fp, line)
|
return eof, (fp, line)
|
||||||
elif string.find(string.upper(line), 'PAGE') >= 0: return process_quote, (fp, line)
|
elif is_quote_identifier(line): return process_quote, (fp, line)
|
||||||
elif line[:2] == '##':
|
elif line[:2] == '##':
|
||||||
emit_remaining_quotes()
|
emit_remaining_quotes()
|
||||||
return section(line), (fp, line)
|
return section(line), (fp, line)
|
||||||
@ -49,7 +50,7 @@ def NOTES(c):
|
|||||||
emit_line('\n')
|
emit_line('\n')
|
||||||
emit_remaining_notes()
|
emit_remaining_notes()
|
||||||
return eof, (fp, line)
|
return eof, (fp, line)
|
||||||
elif string.find(string.upper(line), 'NOTE') >= 0: return process_note, (fp, line)
|
elif is_note_identifier(line): return process_note, (fp, line)
|
||||||
elif line[:2] == '##':
|
elif line[:2] == '##':
|
||||||
emit_line('\n')
|
emit_line('\n')
|
||||||
emit_remaining_notes()
|
emit_remaining_notes()
|
||||||
@ -60,7 +61,7 @@ def NOTES(c):
|
|||||||
|
|
||||||
def process_quote(c):
|
def process_quote(c):
|
||||||
fp, l = c
|
fp, l = c
|
||||||
ppnbr = int(string.rsplit(l)[-1])
|
ppnbr = int(extract_identifier(l))
|
||||||
for i in quotes.keys():
|
for i in quotes.keys():
|
||||||
if int(i) < ppnbr:
|
if int(i) < ppnbr:
|
||||||
emit_quotes(quotes[i])
|
emit_quotes(quotes[i])
|
||||||
@ -68,7 +69,11 @@ def process_quote(c):
|
|||||||
return QUOTES(c)
|
return QUOTES(c)
|
||||||
|
|
||||||
def process_note(c):
|
def process_note(c):
|
||||||
|
global notes_cnt
|
||||||
fp, l = c
|
fp, l = c
|
||||||
|
cnt = int(extract_identifier(l))
|
||||||
|
if(cnt > notes_cnt):
|
||||||
|
notes_cnt = cnt
|
||||||
emit_line(l)
|
emit_line(l)
|
||||||
return NOTES(c)
|
return NOTES(c)
|
||||||
|
|
||||||
@ -89,6 +94,18 @@ def section(line):
|
|||||||
elif string.find(line, 'REFERENCE') >= 0: return parse
|
elif string.find(line, 'REFERENCE') >= 0: return parse
|
||||||
else: return parse
|
else: return parse
|
||||||
|
|
||||||
|
def is_quote_identifier(line):
|
||||||
|
l = line.strip().upper()
|
||||||
|
return l.startswith("<!--") and l.find("PAGE") >= 0
|
||||||
|
|
||||||
|
def is_note_identifier(line):
|
||||||
|
l = line.strip().upper()
|
||||||
|
return l.startswith("<!--") and l.find("NOTE") >= 0
|
||||||
|
|
||||||
|
def extract_identifier(line):
|
||||||
|
t = line.strip().replace('<!--', '').replace('-->', '')
|
||||||
|
return t.strip().rsplit()[-1]
|
||||||
|
|
||||||
def emit_remaining_quotes():
|
def emit_remaining_quotes():
|
||||||
rest = []
|
rest = []
|
||||||
for i in quotes:
|
for i in quotes:
|
||||||
@ -100,8 +117,8 @@ def emit_quotes(list):
|
|||||||
emit_quote(list.pop())
|
emit_quote(list.pop())
|
||||||
|
|
||||||
def emit_quote(data):
|
def emit_quote(data):
|
||||||
emit_line("page " + data['pp'] + "\n\n")
|
emit_line("<!--page " + data['pp'] + "-->\n\n")
|
||||||
emit_line(">" + data['quote'] + "\n")
|
emit_line(">\"" + data['quote'] + "\" pp." + data['pp'] + "\n")
|
||||||
emit_line('\n')
|
emit_line('\n')
|
||||||
|
|
||||||
def emit_remaining_notes():
|
def emit_remaining_notes():
|
||||||
@ -112,9 +129,10 @@ def emit_remaining_notes():
|
|||||||
emit_note(j)
|
emit_note(j)
|
||||||
|
|
||||||
def emit_note(data):
|
def emit_note(data):
|
||||||
emit_line("note: " + data['pp'] + "\n\n")
|
global notes_cnt
|
||||||
emit_line(data['note'] + "\n" )
|
notes_cnt += 1
|
||||||
emit_line('\n')
|
emit_line("<!--note " + str(notes_cnt) + "-->\n\n")
|
||||||
|
emit_line(str(notes_cnt) + ". " + data['note'] + "\n\n" )
|
||||||
|
|
||||||
def emit_line(l):
|
def emit_line(l):
|
||||||
#l = l.encode('utf-8')
|
#l = l.encode('utf-8')
|
||||||
@ -164,8 +182,6 @@ if __name__ == '__main__':
|
|||||||
data = json.load(fp2)
|
data = json.load(fp2)
|
||||||
fp2.close()
|
fp2.close()
|
||||||
|
|
||||||
print data
|
|
||||||
|
|
||||||
if not data['QUOTES'] and not data['NOTES']:
|
if not data['QUOTES'] and not data['NOTES']:
|
||||||
print "Document up-to-date."
|
print "Document up-to-date."
|
||||||
fileout.close()
|
fileout.close()
|
||||||
@ -174,6 +190,8 @@ if __name__ == '__main__':
|
|||||||
quotes = reoder(data['QUOTES'])
|
quotes = reoder(data['QUOTES'])
|
||||||
notes = reoder(data['NOTES'])
|
notes = reoder(data['NOTES'])
|
||||||
|
|
||||||
|
notes_cnt = 0
|
||||||
|
|
||||||
m = StateMachine();
|
m = StateMachine();
|
||||||
m.add_state(parse)
|
m.add_state(parse)
|
||||||
m.add_state(NOTES)
|
m.add_state(NOTES)
|
||||||
|
|||||||
@ -15,4 +15,4 @@ skimnotes get -format txt $f
|
|||||||
|
|
||||||
parse-skim.py < "$filename.txt"
|
parse-skim.py < "$filename.txt"
|
||||||
|
|
||||||
#rm $filename.txt
|
rm $filename.txt
|
||||||
@ -3,7 +3,19 @@
|
|||||||
from statemachine import StateMachine
|
from statemachine import StateMachine
|
||||||
import sys, string, re, json
|
import sys, string, re, json
|
||||||
|
|
||||||
markups = {'QUOTES' : ('PAGE', 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : ('NOTE', '#', 'tags', 'note', 'fpc')}
|
def is_quote_identifier(line):
|
||||||
|
l = line.strip().upper()
|
||||||
|
return l.startswith("<!--") and l.find("PAGE") >= 0
|
||||||
|
|
||||||
|
def is_note_identifier(line):
|
||||||
|
l = line.strip().upper()
|
||||||
|
return l.startswith("<!--") and l.find("NOTE") >= 0
|
||||||
|
|
||||||
|
def is_tag_identifier(line):
|
||||||
|
l = line.strip()
|
||||||
|
return l.startswith('<') and not l.startswith('<!')
|
||||||
|
|
||||||
|
markups = {'QUOTES' : (is_quote_identifier, 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : (is_note_identifier, '#', 'tags', 'note', 'fpc')}
|
||||||
output = {'QUOTES' : [], 'NOTES' : []}
|
output = {'QUOTES' : [], 'NOTES' : []}
|
||||||
|
|
||||||
|
|
||||||
@ -20,7 +32,7 @@ def parse(c):
|
|||||||
while 1:
|
while 1:
|
||||||
line = fp.readline()
|
line = fp.readline()
|
||||||
if not line: return eof, (fp, line)
|
if not line: return eof, (fp, line)
|
||||||
if line[:2] == '##': return section(line), (fp, line)
|
if line.strip().startswith('##'): return section(line), (fp, line)
|
||||||
else: continue
|
else: continue
|
||||||
|
|
||||||
def QUOTES(c):
|
def QUOTES(c):
|
||||||
@ -28,8 +40,8 @@ def QUOTES(c):
|
|||||||
while 1:
|
while 1:
|
||||||
line = fp.readline()
|
line = fp.readline()
|
||||||
if not line: return eof, (fp, line)
|
if not line: return eof, (fp, line)
|
||||||
elif line.strip().upper().startswith('PAGE'): return segment, (fp, line, 'QUOTES', markups['QUOTES'])
|
elif is_quote_identifier(line): return segment, (fp, line, 'QUOTES', markups['QUOTES'])
|
||||||
elif line.strip().startswith(u'##'): return section(line), (fp, line)
|
elif line.strip().startswith('##'): return section(line), (fp, line)
|
||||||
else: continue
|
else: continue
|
||||||
|
|
||||||
def NOTES(c):
|
def NOTES(c):
|
||||||
@ -37,8 +49,8 @@ def NOTES(c):
|
|||||||
while 1:
|
while 1:
|
||||||
line = fp.readline()
|
line = fp.readline()
|
||||||
if not line: return eof, (fp, line)
|
if not line: return eof, (fp, line)
|
||||||
elif line.strip().upper().startswith('NOTE'): return segment, (fp, line, 'NOTES', markups['NOTES'])
|
elif is_note_identifier(line): return segment, (fp, line, 'NOTES', markups['NOTES'])
|
||||||
elif line[:2] == '##': return section(line), (fp, line)
|
elif line.strip().startswith('##'): return section(line), (fp, line)
|
||||||
else: continue
|
else: continue
|
||||||
|
|
||||||
def segment(c):
|
def segment(c):
|
||||||
@ -49,25 +61,28 @@ def segment(c):
|
|||||||
q = ''
|
q = ''
|
||||||
cc = ''
|
cc = ''
|
||||||
# identifier
|
# identifier
|
||||||
c = ext_identifier(l)
|
c = extract_identifier(l)
|
||||||
while 1:
|
while 1:
|
||||||
cursor = fp.tell()
|
cursor = fp.tell()
|
||||||
line = fp.readline()
|
line = fp.readline()
|
||||||
|
|
||||||
|
|
||||||
if not line:
|
if not line:
|
||||||
# transition: EOF - record entry
|
# transition: EOF - record entry
|
||||||
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
record_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
||||||
return eof, (fp, line)
|
return eof, (fp, line)
|
||||||
elif line.strip().upper().startswith(m):
|
|
||||||
|
elif m(line):
|
||||||
# transition: new segment - record entry
|
# transition: new segment - record entry
|
||||||
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
record_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
||||||
return segment, (fp, line, sect, mk)
|
return segment, (fp, line, sect, mk)
|
||||||
elif line[:1] == '<':
|
elif is_tag_identifier(line):
|
||||||
# tags
|
# tags
|
||||||
t += ext_tags(line)
|
t += extract_tags(line)
|
||||||
continue
|
continue
|
||||||
elif line[:2] == '##':
|
elif line[:2] == '##':
|
||||||
# transition: new section - record entry
|
# transition: new section - record entry
|
||||||
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
record_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
||||||
return section(line), (fp, line)
|
return section(line), (fp, line)
|
||||||
elif line == '\n' :
|
elif line == '\n' :
|
||||||
continue
|
continue
|
||||||
@ -81,32 +96,39 @@ def segment(c):
|
|||||||
## helper fncts
|
## helper fncts
|
||||||
def section(line):
|
def section(line):
|
||||||
line = string.upper(line)
|
line = string.upper(line)
|
||||||
if string.find(line, 'NOTES') >= 0: return NOTES
|
if line.find('NOTES') >= 0: return NOTES
|
||||||
elif string.find(line, 'QUOTES') >= 0: return QUOTES
|
elif line.find('QUOTES') >= 0: return QUOTES
|
||||||
elif string.find(line, 'REFERENCE') >= 0: return parse
|
elif line.find('REFERENCE') >= 0: return parse
|
||||||
else: return parse
|
else: return parse
|
||||||
|
|
||||||
# todo - optimise this (i.e: id != only the last word)
|
# todo - optimise this (i.e: id != only the last word)
|
||||||
def ext_identifier(line):
|
def extract_identifier(line):
|
||||||
b = string.rsplit(line)
|
t = line.strip().replace('<!--', '').replace('-->', '')
|
||||||
return b[-1]
|
return t.strip().rsplit()[-1]
|
||||||
|
|
||||||
def ext_tags(line):
|
def extract_tags(line):
|
||||||
line = line.rstrip('\n').replace(' ','')
|
line = line.rstrip('\n').replace(' ','')
|
||||||
t = re.split('<|>', line)
|
t = re.split('<|>', line)
|
||||||
return [v for v in t if v]
|
return [v for v in t if v]
|
||||||
|
|
||||||
def rec_segment(idf, tags, text, cnt, mk):
|
def record_segment(idf, tags, text, cnt, mk):
|
||||||
if not text:
|
if not text:
|
||||||
#sys.stderr.write('hmm... no quote on pp.' + idf)
|
#sys.stderr.write('hmm... no quote on pp.' + idf)
|
||||||
return None
|
return None
|
||||||
if text[0] == '>':
|
text = escape_quote(text)
|
||||||
text = text[1:]
|
text = escape_note(text)
|
||||||
text = text.strip()
|
|
||||||
section_i, idf_i, tags_i, text_i, cnt_i = mk
|
section_i, idf_i, tags_i, text_i, cnt_i = mk
|
||||||
entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
|
entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
|
||||||
output[section_i].append(entry)
|
output[section_i].append(entry)
|
||||||
|
|
||||||
|
def escape_quote(line):
|
||||||
|
if(not line.strip().startswith('>')):
|
||||||
|
return line
|
||||||
|
l = re.sub('\"*\"', '', line.strip()[1:])
|
||||||
|
return re.sub('pp.[0-9]+', '', l)
|
||||||
|
|
||||||
|
def escape_note(line):
|
||||||
|
return re.sub('^[0-9]+.', '', line).strip()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
m = StateMachine();
|
m = StateMachine();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user