From 6ff788a782e5356517d4ca64b40275998c63bd3c Mon Sep 17 00:00:00 2001 From: gauthiier Date: Wed, 8 Jul 2015 14:05:44 +0200 Subject: [PATCH] wpt_csv_filter_batch --- .gitignore | 4 ++- webpagetest/wpt_csv_filter.py | 10 ++++-- webpagetest/wpt_csv_filter_batch.py | 49 +++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 3 deletions(-) create mode 100755 webpagetest/wpt_csv_filter_batch.py diff --git a/.gitignore b/.gitignore index 50ddd66..3c621fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .DS_Stroe *.csv -bugs.json \ No newline at end of file +csv/ +bugs.json +*.pyc \ No newline at end of file diff --git a/webpagetest/wpt_csv_filter.py b/webpagetest/wpt_csv_filter.py index 845c784..3e1afb6 100755 --- a/webpagetest/wpt_csv_filter.py +++ b/webpagetest/wpt_csv_filter.py @@ -76,7 +76,11 @@ def run(options): #write ouput fname, ext = os.path.splitext(os.path.basename(csv_file)) - out_csv_filename = fname + "__filtered" + ext + + if not os.path.exists(options.outputdir): + os.makedirs(options.outputdir) + + out_csv_filename = options.outputdir + fname + "__filtered" + ext out_csv = open(out_csv_filename, 'w') writer = csv.DictWriter(out_csv, fieldnames=csv_fields) @@ -131,7 +135,7 @@ def run(options): print "widgets: " + str(stats['widgets']) + ' - ' + str(stats['widgets'] / stats['total']) + '%' print "privacy: " + str(stats['privacy']) + ' - ' + str(stats['privacy'] / stats['total']) + '%' print "..............." - print "* JUNK RATIO * " + str((stats['ads'] + stats['trackers'] + stats['analytics'] + stats['widgets'] + stats['privacy']) / stats['total']) + '%' + print "* JUNK RATIO * " + str((stats['ads'] + stats['trackers'] + stats['analytics'] + stats['widgets'] + stats['privacy']) / stats['total']) + '%' if __name__ == '__main__': @@ -140,6 +144,8 @@ if __name__ == '__main__': p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file") p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element") p.add_option('-s', '--stats', action="store_true", help="prints basic stats") + p.add_option('-o', '--outputdir', action="store", help="output directory", default="") + options, args = p.parse_args() diff --git a/webpagetest/wpt_csv_filter_batch.py b/webpagetest/wpt_csv_filter_batch.py new file mode 100755 index 0000000..83169ee --- /dev/null +++ b/webpagetest/wpt_csv_filter_batch.py @@ -0,0 +1,49 @@ +#!/usr/bin/python2.7 + +import sys, csv, json, os +from optparse import OptionParser +import wpt_csv_filter as wptf + +class option: + pass + +def run(options): + + if not options.inputdir: + sys.exit('No input directory specified. Aborting.') + + if not options.bugs: + sys.exit('No ghostery (formated) bugs input file. Aborting.') + + if not os.path.exists(options.inputdir): + sys.exit('Input directory does not exists. Aborting.') + + if not os.path.exists(options.outputdir): + os.makedirs(options.outputdir) + + out = options.inputdir + csv_files = [] + for (dirpath, dirnames, filenames) in os.walk(out): + for fn in filenames: + fname, ext = os.path.splitext(fn) + if ext == '.csv': + csv_files.append(os.path.join(dirpath, fn)) + break + + for f in csv_files: + options.file = f + wptf.run(options) + print "..............." + +if __name__ == '__main__': + + p = OptionParser(); + p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)") + p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file") + p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element") + p.add_option('-s', '--stats', action="store_true", help="prints basic stats") + p.add_option('-o', '--outputdir', action="store", help="output directory (where all the filtered csv files will be placed)", default="") + + options, args = p.parse_args() + + run(options)