python refactoring

This commit is contained in:
gauthiier 2016-03-16 14:30:32 +01:00
parent 5e4beb6b71
commit c19dc35e18
19 changed files with 287 additions and 260 deletions

5
.gitignore vendored
View File

@ -1,2 +1,5 @@
CONFIG
*.pyc
*.pyc
build
dist
*.egg-info

27
bin/iiindex Executable file
View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
import os, sys, glob
from nnnotes import parse
if __name__ == '__main__':
indexfile = '.indx'
notes = glob.glob('*.mmd')
if len(notes) > 1:
sys.exit('More or less *.mmd files than expected.')
elif len(notes) < 1:
sys.exit('No *.mmd in current directory.')
try:
note = open(notes[0], 'r+')
except:
sys.exit('Cannot open ' + notes[0])
with open(indexfile, 'w') as indx:
parse.run(note, indx)

40
bin/iiinject Executable file
View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
import os, sys, glob, json
from nnnotes import compare, inject
if __name__ == '__main__':
indexfile = '.indx'
pdfs = glob.glob('*.pdf')
if len(pdfs) > 1:
sys.exit('More than one pdf in current directory. No obvious choice. Aborting.')
if len(pdfs) == 1:
pdf = pdfs[0]
if os.path.isfile(indexfile):
tmp = '.tmp'
os.system('plfr -json ' + pdf + ' > ' + tmp) ### relying on plfr
diff = compare.run(indexfile, tmp) ### new highlights in pdf?
with open(tmp, 'w') as fptmp:
json.dump(diff, fptmp)
indexfile = tmp
else:
os.system('plfr -json ' + pdf + ' > ' + indexfile)
notes = glob.glob('*.mmd')
if len(notes) > 1:
sys.exit('More or less *.mmd files than expected.')
elif len(notes) < 1:
sys.exit('No *.mmd in current directory.')
note = notes[0]
inject.run(note, indexfile)
if os.path.isfile('.tmp'):
os.remove('.tmp')

83
bin/nnnote Executable file
View File

@ -0,0 +1,83 @@
#!/usr/bin/env python
import os, sys, argparse, shutil, subprocess
import getpass, time
from nnnotes import TEMPLATE_PATH
def yes_no(question):
sys.stdout.write(question)
yes = set(['yes','y', 'ye', ''])
no = set(['no','n'])
choice = raw_input().lower()
if choice in yes:
return True
elif choice in no:
return False
else:
sys.stdout.write("Please respond with 'yes' or 'no'")
def check_note_exists(title):
if os.path.isdir(title):
q = yes_no('The note already exists in the current directory.\nDo you want to proceed and erase the current note? [y/n] ')
if not q:
sys.exit('Notes already exists. Aborting.')
return True
return False
if __name__ == '__main__':
p = argparse.ArgumentParser()
p.add_argument('-t', '--title', action="store", help="title of the note", required=True)
p.add_argument('-f', '--file', action="store", help="(pdf) file to extract notes from")
p.add_argument('-b', '--bibtex', action="store", help="bibtex file containing bibliographical information")
p.add_argument('-i', '--bibtexitem', action="store", help="name of the item in the bibtex file")
p.add_argument('-s', '--style', action="store", help="bibliographic style")
args = p.parse_args()
if not check_note_exists(args.title):
# make directory for the note
os.makedirs(args.title)
# copy template files -- this needs to change....
#home = os.path.dirname(os.path.realpath(__file__))
os.system('cp -aR ' + TEMPLATE_PATH + '/* ' + args.title)
os.chdir(args.title)
# copy note file if it exists
if args.file is not None and os.path.isfile(args.file):
shutil.copy2(args.file, os.path.basename(args.file))
# style
style_arg = ''
if args.style is not None:
os.system('sed -i.bak "s#CSL\ :=#CSL\ := ' + args.style + '#g" Makefile')
style_arg = '-s ' + args.style
# bibtex item
bibtexitem_arg = ''
if args.bibtexitem is not None:
bibtexitem_arg = '-i ' + args.bibtexitem
# generate bibliographic element
if args.bibtex is not None and os.path.isfile(args.bibtex):
os.system('sed -i.bak "s#BIB\ :=#BIB\ := ' + args.bibtex + '#g" Makefile')
ref = '> ' + subprocess.check_output('csl_unsorted ' + args.bibtex + ' ' + bibtexitem_arg + ' ' + style_arg + ' -f md', shell=True).rstrip()
os.system('sed -i.bak "s~*MACHINE-REF*~' + ref + '~g ; s~%\ title~%\ ' + ref + '~g" notes.mmd')
author = getpass.getuser()
date = time.strftime("%d/%m/%Y")
os.system('sed -i.bak "s~%\ author~%\ ' + author + '~g ; s~%\ date~%\ ' + date + '~g" notes.mmd')
# cleanup
os.system('rm *.bak')
os.chdir('..')

3
nnnotes/__init__.py Normal file
View File

@ -0,0 +1,3 @@
import os
TEMPLATE_PATH = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'template')

View File

@ -1,4 +1,8 @@
#!/usr/bin/env python
# compares two json formatted indexes
# and return the difference
import sys, os, string, json
from operator import attrgetter
@ -8,8 +12,8 @@ fp2 = ''
def difference(data1, data2):
output = {'QUOTES' : [], 'NOTES' : []}
d1 = {i['quote']: i for i in data1['QUOTES']}
d2 = {i['quote']: i for i in data2['QUOTES']}
d1 = {i['quote'].rstrip(): i for i in data1['QUOTES']}
d2 = {i['quote'].rstrip(): i for i in data2['QUOTES']}
#create sets
s1 = set(d1.keys())
@ -47,28 +51,32 @@ def open_file(p):
sys.exit('File %s does not exists... Aborting.' % p)
return open(p, 'rb')
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('No input file... Aborting.')
try:
fp1 = open_file(sys.argv[1])
except:
sys.exit("Can't open file " + sys.argv[1] + ". Aborting.")
if len(sys.argv) < 3:
fp2 = sys.stdin
else:
try:
fp2 = open_file(sys.argv[2])
except:
sys.exit("Can't open file " + sys.argv[2] + ". Aborting.")
def run(filename1, filename2):
#open files
try:
fp1 = open_file(filename1)
except:
sys.exit("Can't open file " + filename1 + ". Aborting.")
try:
fp2 = open_file(filename2)
except:
# it may be std.in
try:
fp2 = filename2
fp2.tell()
except:
sys.exit("Can't open file " + filename2 + ". Aborting.")
#read data
try:
sdata = fp1.read()
data1 = json.loads(sdata)
except:
e = "<compare> Error loading data from" + sys.argv[1] + ". Aborting.\n"
e = "<compare> Error loading data from" + filename1 + ". Aborting.\n"
if sdata:
e += "Traceback: " + sdata1
e += "Traceback: " + sdata
fp2.close()
sys.exit(e)
finally:
@ -86,11 +94,21 @@ if __name__ == '__main__':
finally:
fp2.close()
#process
data = difference(data1, data2)
data['QUOTES'] = sorted(data['QUOTES'], key=lambda entry: int(entry['pp']))
data['NOTES'] = sorted(data['NOTES'], key=lambda entry: int(entry['pp']))
json.dump(data, sys.stdout)
#dump
return data
#main allows unix piping
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('No input file... Aborting.')
json.dump(run(sys.argv[1], sys.stdin), sys.stdout)

View File

@ -1,4 +1,9 @@
#!/usr/bin/env python
# reads (1) a formatted md file, (2) json formatted index
# and injects (sorts and appends) in the md file the information
# contained in the index
from statemachine import StateMachine
import sys, os, string, json, shutil, codecs, traceback
@ -14,7 +19,6 @@ def error(c):
sys.stderr.write('Unidentifiable line:\n'+ l)
def eof(c):
print "eof"
return
def parse(c):
@ -120,10 +124,13 @@ def emit_quotes(list):
def emit_quote(data):
emit_line("<!--page " + data['pp'] + "-->\n\n")
emit_line(">\"" + data['quote'] + "\" pp." + data['pp'] + "\n")
emit_line(">\"" + data['quote'] + "\" p." + data['pp'] + "\n")
emit_line('\n')
def emit_remaining_notes():
print "emit_remaining_notes"
rest = []
for i in notes:
rest.extend(notes[i])
@ -168,25 +175,27 @@ def backupfile(p):
shutil.copy2(p, bak)
return codecs.open(bak, 'r', 'utf-8')
def run(filename1, filename2):
global fileout, fileref, notes, quotes, notes_cnt
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('No input file... Aborting.')
# fp1 should be the incoming .mmd file
try:
fileref = backupfile(sys.argv[1])
fileout = open_fileoutput(sys.argv[1])
try:
fileref = backupfile(filename1)
fileout = open_fileoutput(filename1)
fileout.seek(0)
except:
sys.exit("Can't open file " + sys.argv[1] + ". Aborting.")
sys.exit("Can't open file " + filename1 + ". Aborting.")
if len(sys.argv) < 3:
fp2 = sys.stdin
else:
try:
fp2 = open_file(filename2)
except:
# it may be stdin
try:
fp2 = open_file(sys.argv[2])
fp2 = filename2
fp2.tell()
except:
sys.exit("Can't open file " + sys.argv[2] + ". Aborting.")
sys.exit("Can't open file " + filename2 + ". Aborting.")
# fp2 should be the incoming (json) data to inject in fp1
@ -211,8 +220,6 @@ if __name__ == '__main__':
quotes = reoder(data['QUOTES'])
notes = reoder(data['NOTES'])
print quotes
notes_cnt = 0
try:
@ -234,7 +241,22 @@ if __name__ == '__main__':
finally:
fileout.close()
fileref.close()
sys.exit(trace)
return trace
#main allows unix piping
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('No input file... Aborting.')
if len(sys.argv) < 3:
fp2 = sys.stdin
else:
fp2 = sys.argv[2]
trace = run(sys.argv[1], fp2)
sys.exit(trace)

View File

@ -1,5 +1,8 @@
#!/usr/bin/env python
# parses the information containned in a formatted md file
# and constructs a json formatted index
from statemachine import StateMachine
import sys, string, re, json
@ -17,14 +20,14 @@ def is_tag_identifier(line):
markups = {'QUOTES' : (is_quote_identifier, 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : (is_note_identifier, '#', 'tags', 'note', 'fpc')}
output = {'QUOTES' : [], 'NOTES' : []}
fpindex = None
def error(c):
fp, l = c
sys.stderr.write('Unidentifiable line:\n'+ l)
def eof(c):
fpindx = open('.indx','wb')
json.dump(output, fpindx)
json.dump(output, fpindex)
def parse(c):
fp, l = c
@ -123,12 +126,16 @@ def escape_quote(line):
if(not line.strip().startswith('>')):
return line
l = re.sub('\"*\"', '', line.strip()[1:])
return re.sub('pp.[0-9]+', '', l)
return re.sub('p.[0-9]+', '', l)
def escape_note(line):
return re.sub('^[0-9]+.', '', line).strip()
if __name__ == '__main__':
def run(fpin, fpout):
global fpindex
fpindex = fpout
m = StateMachine();
m.add_state(parse)
m.add_state(NOTES)
@ -137,4 +144,9 @@ if __name__ == '__main__':
m.add_state(error, end_state=1)
m.add_state(eof, end_state=1)
m.set_start(parse)
m.run((sys.stdin, ''))
m.run((fpin, ''))
#main allows unix piping
if __name__ == '__main__':
fpindx = open('.indx','wb')
run(sys.stdin, fpindx)

View File

@ -16,22 +16,25 @@ $(OUT)%.pdf : %.mmd
#html
HTML := $(patsubst %.mmd,$(OUT)%.html,$(wildcard *.mmd))
$(OUT)%.html : %.mmd
$(OUT)%.html: %.mmd
pandoc $< -s -t html5 --template=p/nnnote-template.html5 --filter pandoc-citeproc --csl $(CSL) --bibliography $(BIB) -o $@
all : dir $(HTML) index
all: dir $(HTML) index
index :
parse.py < $(wildcard *.mmd)
index:
iiindex
inject:
iiinject
clean :
clean:
rm -f $(HTML)
rm -f $(PDF)
neat:
rm *.bak
rebuild : clean all
rebuild: clean all
dir :
mkdir -p $(OUT)
dir:
mkdir -p $(OUT)

View File

@ -1,18 +0,0 @@
#!/bin/bash
INDXFILE='.indx'
for i in *.mmd; do
f=$i
done
if [[ ! -f $INDXFILE ]]; then
make --quiet index
elif [[ $f -nt $INDXFILE ]]; then
rm $INDXFILE
make --quiet index
fi
#parse-skim.sh | compare.py .indx
parse-plfr.sh | compare.py .indx

View File

@ -1,16 +0,0 @@
#!/bin/bash
for i in *.mmd; do
f=$i
done
if [[ -z '$f' ]]; then
echo "No mmd file (markdown source file) in directory... Aborting.";
exit;
fi
# inject new nnnotes in source file
compare.sh | inject.py $f
# update index
make --quiet index

View File

@ -1,13 +0,0 @@
#!/bin/bash
for i in *.pdf; do
f=$i
done
if [[ -d '$f' ]]; then
echo "$f No pdf file in directory... Aborting.";
exit;
fi
plfr -json $f

View File

@ -1,77 +0,0 @@
#!/usr/bin/python
from statemachine import StateMachine
import sys, string, json
output = {'QUOTES' : [], 'NOTES' : []}
def error(c):
fp, l = c
sys.stderr.write('Unidentifiable line:\n'+ l)
def eof(c):
print json.dumps(output, sys.stdout)
def parse(c):
fp, l = c
while 1:
line = fp.readline()
if not line: return eof, (fp, line)
if line[:1] == '*': return section(line), (fp, line)
else: continue
def highlight(c):
fp, l = c
p = page(l)
text = fp.readline()
output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
return parse(c)
def anchored_note(c):
fp, l = c
p = page(l)
text = fp.readline()
fp.readline()
note = fp.readline()
output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
output['NOTES'].append({'pp' : p, 'note' : note.strip()})
return parse(c)
def box(c):
fp, l = c
p = page(l)
text = fp.readline()
output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
return parse(c)
def text_note(c):
fp, l = c
p = page(l)
text = fp.readline()
output['NOTES'].append({'pp' : p, 'note' : text.strip()})
return parse(c)
## helper fncts
def section(line):
line = string.upper(line)
if string.find(line, 'HIGHLIGHT') >= 0: return highlight
elif string.find(line, 'ANCHORED NOTE') >= 0: return anchored_note
elif string.find(line, 'BOX') >= 0: return box
elif string.find(line, 'TEXT NOTE') >= 0: return text_note
else: return parse
def page(line):
return line.rstrip('\n').split(',')[-1].split(' ')[-1]
if __name__ == '__main__':
m = StateMachine();
m.add_state(parse)
m.add_state(highlight)
m.add_state(anchored_note)
m.add_state(box)
m.add_state(text_note)
m.add_state(error, end_state=1)
m.add_state(eof, end_state=1)
m.set_start(parse)
m.run((sys.stdin, ''))

View File

@ -1,23 +0,0 @@
#!/bin/bash
for i in *.pdf; do
f=$i
done
if [[ -d '$f' ]]; then
echo "$f No pdf file in directory... Aborting.";
exit;
fi
filename="${f%.*}".txt
skimnotes get -format txt $f
if [[ ! -f $filename ]]; then
echo "No skim notes in pdf $f. Aborting.";
exit;
fi
parse-skim.py < $filename
rm $filename

View File

@ -1,63 +0,0 @@
#!/usr/bin/env python
import sys, os, json
from optparse import OptionParser
def open_file(p):
if not os.path.exists(p):
sys.exit('File %s does not exists... Aborting.' % p)
return open(p, 'rb')
def refactor(data):
data_out = {}
for d in data:
nid = ''
if(d['author']):
nid = d['author'][0]['family'] + d['issued']['date-parts'][0][0]
elif(d['editor']):
nid = d['editor'][0]['family'] + d['issued']['date-parts'][0][0]
if nid in data_out:
for c in range(97, 122):
nid = nid + chr(c)
if not nid in data:
break
data_out[nid] = d
return data_out
if __name__ == '__main__':
p = OptionParser();
p.add_option('-i', '--index', action="store_true", help="prints out index")
options, args = p.parse_args()
if len(args) < 1:
sys.exit('No input file... Aborting.')
try:
fp = open_file(args[0])
except:
sys.exit("Can't open file " + args[0] + ". Aborting.")
try:
data = json.loads(fp.read())
except:
e = "<refactorbib> Error loading data from" + sys.argv[1] + ". Aborting.\n"
if sdata:
e += "Traceback: " + sdata1
sys.exit(e)
finally:
fp.close()
out = refactor(data)
if options.index:
for e in out.keys():
print '> ' + e + ' - ' + out[e]['title'] + ' '
else:
print json.dumps(out, sort_keys=True, indent=2, separators=(',', ': '))

26
setup.py Normal file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env python
import os, sys
from setuptools import setup, find_packages
PACKAGE = 'nnnotes'
with open('README') as file:
README = file.read()
setup(
name = 'nnnotes',
version = 'v0',
packages = find_packages(),
package_data = {PACKAGE: ['template/Makefile', 'template/*.mmd', 'template/p/*']},
scripts = ['bin/nnnote', 'bin/iiindex', 'bin/iiinject'],
provides=[PACKAGE],
author = 'gauthiier',
author_email = 'd@gauthiier.info',
url = 'https://github.com/gauthiier/nnnotes',
long_description=README,
classifiers=[
"Topic :: Utilities",
"License :: MIT License",
]
)