wpt_csv_filter_batch

This commit is contained in:
gauthiier 2015-07-08 14:05:44 +02:00
parent 86fe046f43
commit 6ff788a782
3 changed files with 60 additions and 3 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
.DS_Stroe
*.csv
csv/
bugs.json
*.pyc

View File

@ -76,7 +76,11 @@ def run(options):
#write ouput
fname, ext = os.path.splitext(os.path.basename(csv_file))
out_csv_filename = fname + "__filtered" + ext
if not os.path.exists(options.outputdir):
os.makedirs(options.outputdir)
out_csv_filename = options.outputdir + fname + "__filtered" + ext
out_csv = open(out_csv_filename, 'w')
writer = csv.DictWriter(out_csv, fieldnames=csv_fields)
@ -140,6 +144,8 @@ if __name__ == '__main__':
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
p.add_option('-s', '--stats', action="store_true", help="prints basic stats")
p.add_option('-o', '--outputdir', action="store", help="output directory", default="")
options, args = p.parse_args()

View File

@ -0,0 +1,49 @@
#!/usr/bin/python2.7
import sys, csv, json, os
from optparse import OptionParser
import wpt_csv_filter as wptf
class option:
pass
def run(options):
if not options.inputdir:
sys.exit('No input directory specified. Aborting.')
if not options.bugs:
sys.exit('No ghostery (formated) bugs input file. Aborting.')
if not os.path.exists(options.inputdir):
sys.exit('Input directory does not exists. Aborting.')
if not os.path.exists(options.outputdir):
os.makedirs(options.outputdir)
out = options.inputdir
csv_files = []
for (dirpath, dirnames, filenames) in os.walk(out):
for fn in filenames:
fname, ext = os.path.splitext(fn)
if ext == '.csv':
csv_files.append(os.path.join(dirpath, fn))
break
for f in csv_files:
options.file = f
wptf.run(options)
print "..............."
if __name__ == '__main__':
p = OptionParser();
p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)")
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
p.add_option('-s', '--stats', action="store_true", help="prints basic stats")
p.add_option('-o', '--outputdir', action="store", help="output directory (where all the filtered csv files will be placed)", default="")
options, args = p.parse_args()
run(options)