From e88d0d19731ccc9ff1089b32862a62bf4481ee3d Mon Sep 17 00:00:00 2001 From: gauthiier Date: Mon, 25 Aug 2014 17:57:34 +0200 Subject: [PATCH] new nnnotes and qqquotes identifiers --- parse/inject.py | 38 +++++++++++++++++------- parse/parse-skim.sh | 2 +- parse/parse.py | 70 +++++++++++++++++++++++++++++---------------- 3 files changed, 75 insertions(+), 35 deletions(-) diff --git a/parse/inject.py b/parse/inject.py index 5e54c46..035d5ab 100755 --- a/parse/inject.py +++ b/parse/inject.py @@ -7,6 +7,7 @@ fileout = '' fileref = '' notes = [] quotes = [] +notes_cnt = 0 def error(c): fp, l = c @@ -33,7 +34,7 @@ def QUOTES(c): if not line: emit_remaining_quotes() return eof, (fp, line) - elif string.find(string.upper(line), 'PAGE') >= 0: return process_quote, (fp, line) + elif is_quote_identifier(line): return process_quote, (fp, line) elif line[:2] == '##': emit_remaining_quotes() return section(line), (fp, line) @@ -49,7 +50,7 @@ def NOTES(c): emit_line('\n') emit_remaining_notes() return eof, (fp, line) - elif string.find(string.upper(line), 'NOTE') >= 0: return process_note, (fp, line) + elif is_note_identifier(line): return process_note, (fp, line) elif line[:2] == '##': emit_line('\n') emit_remaining_notes() @@ -60,7 +61,7 @@ def NOTES(c): def process_quote(c): fp, l = c - ppnbr = int(string.rsplit(l)[-1]) + ppnbr = int(extract_identifier(l)) for i in quotes.keys(): if int(i) < ppnbr: emit_quotes(quotes[i]) @@ -68,7 +69,11 @@ def process_quote(c): return QUOTES(c) def process_note(c): + global notes_cnt fp, l = c + cnt = int(extract_identifier(l)) + if(cnt > notes_cnt): + notes_cnt = cnt emit_line(l) return NOTES(c) @@ -89,6 +94,18 @@ def section(line): elif string.find(line, 'REFERENCE') >= 0: return parse else: return parse +def is_quote_identifier(line): + l = line.strip().upper() + return l.startswith("', '') + return t.strip().rsplit()[-1] + def emit_remaining_quotes(): rest = [] for i in quotes: @@ -100,8 +117,8 @@ def emit_quotes(list): emit_quote(list.pop()) def emit_quote(data): - emit_line("page " + data['pp'] + "\n\n") - emit_line(">" + data['quote'] + "\n") + emit_line("\n\n") + emit_line(">\"" + data['quote'] + "\" pp." + data['pp'] + "\n") emit_line('\n') def emit_remaining_notes(): @@ -112,9 +129,10 @@ def emit_remaining_notes(): emit_note(j) def emit_note(data): - emit_line("note: " + data['pp'] + "\n\n") - emit_line(data['note'] + "\n" ) - emit_line('\n') + global notes_cnt + notes_cnt += 1 + emit_line("\n\n") + emit_line(str(notes_cnt) + ". " + data['note'] + "\n\n" ) def emit_line(l): #l = l.encode('utf-8') @@ -164,8 +182,6 @@ if __name__ == '__main__': data = json.load(fp2) fp2.close() - print data - if not data['QUOTES'] and not data['NOTES']: print "Document up-to-date." fileout.close() @@ -174,6 +190,8 @@ if __name__ == '__main__': quotes = reoder(data['QUOTES']) notes = reoder(data['NOTES']) + notes_cnt = 0 + m = StateMachine(); m.add_state(parse) m.add_state(NOTES) diff --git a/parse/parse-skim.sh b/parse/parse-skim.sh index d773de7..370a459 100755 --- a/parse/parse-skim.sh +++ b/parse/parse-skim.sh @@ -15,4 +15,4 @@ skimnotes get -format txt $f parse-skim.py < "$filename.txt" -#rm $filename.txt \ No newline at end of file +rm $filename.txt \ No newline at end of file diff --git a/parse/parse.py b/parse/parse.py index 227cf9f..df3793a 100755 --- a/parse/parse.py +++ b/parse/parse.py @@ -3,7 +3,19 @@ from statemachine import StateMachine import sys, string, re, json -markups = {'QUOTES' : ('PAGE', 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : ('NOTE', '#', 'tags', 'note', 'fpc')} +def is_quote_identifier(line): + l = line.strip().upper() + return l.startswith("', '') + return t.strip().rsplit()[-1] -def ext_tags(line): +def extract_tags(line): line = line.rstrip('\n').replace(' ','') t = re.split('<|>', line) return [v for v in t if v] -def rec_segment(idf, tags, text, cnt, mk): +def record_segment(idf, tags, text, cnt, mk): if not text: #sys.stderr.write('hmm... no quote on pp.' + idf) return None - if text[0] == '>': - text = text[1:] - text = text.strip() + text = escape_quote(text) + text = escape_note(text) section_i, idf_i, tags_i, text_i, cnt_i = mk entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt} output[section_i].append(entry) +def escape_quote(line): + if(not line.strip().startswith('>')): + return line + l = re.sub('\"*\"', '', line.strip()[1:]) + return re.sub('pp.[0-9]+', '', l) + +def escape_note(line): + return re.sub('^[0-9]+.', '', line).strip() if __name__ == '__main__': m = StateMachine();