diff --git a/webpagetest/html/+++/lestyle.css b/webpagetest/html/+++/lestyle.css new file mode 100644 index 0000000..6523b56 --- /dev/null +++ b/webpagetest/html/+++/lestyle.css @@ -0,0 +1,38 @@ +body { + width: 100%; + height: 100%; + margin: 0; + padding: 0; +} + +.agent { + padding: 1.5em; + float: left; + margin: 1em; + width: 20em; + background-color: #eeeeee; +} + +.card { + padding: 1.5em; + float: left; + display: inline; + margin: 1em; + width: 20em; + background-color: #eeeeee; +} + +.data { + margin-left: 0.5em; +} + +.name { + font-weight: bold; +} + +table { + width: 100%; + dborder: 1px solid red; +} + + diff --git a/webpagetest/html/index_template.html b/webpagetest/html/index_template.html new file mode 100644 index 0000000..f8a2b57 --- /dev/null +++ b/webpagetest/html/index_template.html @@ -0,0 +1,10 @@ + + + + + Cards - Micro-Temporalities [dmi15] + + + [[content]] + + diff --git a/webpagetest/html/stats_to_htmlcards.py b/webpagetest/html/stats_to_htmlcards.py new file mode 100644 index 0000000..b5f9e48 --- /dev/null +++ b/webpagetest/html/stats_to_htmlcards.py @@ -0,0 +1,113 @@ +import sys, csv, json, os, re + +def emit_header(): + str_s = '
' + str_s += 'Location:
' + str_s += 'Amsterdam
\n' + str_s += '
\n' + str_s += 'User-Agent:
' + str_s += "Mozilla/5.0 (Windows NT 6.1; WOW64)
AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/43.0.2357.132 Safari/537.36 PTST/221\n" + str_s += '
\n' + return str_s + + +def emit_name(name, date, time): + str_s = '
' + name + '
\n' + str_s += '
' + date + '
\n' + str_s += '
' + time + '
\n' + return str_s + +def string_format_percentage(pct): + v = int(pct * 100) + return str(v) + + +def emit_table_row(elem, index, total): + return '' + '' + index +': ' + ''+ str(int(elem[index])) + '' + string_format_percentage(elem[index] / total) + '%' + '\n' + +def emit_size(size): + total = size['widget'] + size['ad'] + size['privacy'] + size['-'] + size['analytics'] + size['tracker'] + if total == 0: + total = 1 + str_s = '
' + str_s += '

Objects Size (bytes)

\n' + str_s += '
\n' + str_s += '\n' + str_s += emit_table_row(size, 'ad', total).replace('ad', 'ads') + str_s += emit_table_row(size, 'analytics', total) + str_s += emit_table_row(size, 'tracker', total).replace('tracker', 'trackers') + str_s += emit_table_row(size, 'widget', total).replace('widget', 'widgets') + str_s += emit_table_row(size, '-', total).replace('-', 'other') + str_s += '
\n' + str_s += '
\n' + str_s += '
\n' + return str_s + +def emit_item(item): + total = item['total'] + total_junk = item['widget'] + item['ad'] + item['privacy'] + item['analytics'] + item['tracker'] + if total == 0: + total = 1 + str_s = '
' + str_s += '

Page Http Request Elements

\n' + str_s += '
\n' + str_s += '\n' + str_s += emit_table_row(item, 'ad', total).replace('ad', 'ads') + str_s += emit_table_row(item, 'analytics', total) + str_s += emit_table_row(item, 'tracker', total).replace('tracker', 'trackers') + str_s += emit_table_row(item, 'widget', total).replace('widget', 'widgets') + str_s += emit_table_row(item, '-', total).replace('-', 'other') + str_s += '
\n' + str_s += '
\n' + str_s += '
\n' + return str_s + +def emit_time(time): + total = time['widget'] + time['ad'] + time['privacy'] + time['-'] + time['analytics'] + time['tracker'] + if total == 0: + total = 1 + str_s = '
' + str_s += '

(Micro) Timing (ms)

\n' + str_s += '
\n' + str_s += '\n' + str_s += emit_table_row(time, 'ad', total).replace('ad', 'ads') + str_s += emit_table_row(time, 'analytics', total) + str_s += emit_table_row(time, 'tracker', total).replace('tracker', 'trackers') + str_s += emit_table_row(time, 'widget', total).replace('widget', 'widgets') + str_s += emit_table_row(time, '-', total).replace('-', 'other') + str_s += '
\n' + str_s += '
\n' + str_s += '
\n' + return str_s + +if __name__ == '__main__': + + fp = sys.stdin + try: + stats = json.loads(fp.read()) + except Exception, ee: + sys.exit('Error loading data... Aborting.') + + try: + template = open(os.path.join('.', 'index_template.html'), 'r+'); + except: + print('error opening template file. aborting...'); + sys.exit(0); + + content = "" + content += emit_header() + for e in stats: + card = "
\n" + stats = e['stats'] + card += "\t\t\t" + emit_name(stats['host'], stats['date'], stats['time']) + card += "\t\t\t" + emit_item(stats['items']) + card += "\t\t\t" + emit_size(stats['sizes']) + card += "\t\t\t" + emit_time(stats['times']) + card += "
\n" + content += card + + html = template.read().replace('[[content]]', content); + + print html + + diff --git a/webpagetest/wpt_csv_filter.py b/webpagetest/wpt_csv_filter.py index ac580d5..7871724 100755 --- a/webpagetest/wpt_csv_filter.py +++ b/webpagetest/wpt_csv_filter.py @@ -1,13 +1,14 @@ #!/usr/bin/python2.7 -import sys, csv, json, os +import sys, csv, json, os, re from optparse import OptionParser # list of fileds from the wpt csv file to keep csv_fields = [ 'Date', 'Time', -'tracker_type', +'bug_type', +'bug_name', 'Sequence Number', 'Host', 'IP Address', @@ -40,14 +41,24 @@ csv_fields = [ 'Initiator Line', 'Expires', 'Cached', -'Cookie Count(out)', +'Cookie Count(out)' ] -def filter_fields(wpt_row, type): +# matches a given url to all possible bugs +def match(url, bugs): + for b in bugs: + pattern = re.compile(b['pattern']) + if(pattern.search(url)): + return {'name': b['name'], 'type': b['type'], } + return None + +# filters the csv file (deleted some columns) and adds the 'tracker type' +def filter_fields(wpt_row, type, name): for k in wpt_row.keys(): if k not in csv_fields: del wpt_row[k] - wpt_row['tracker_type'] = type + wpt_row['bug_type'] = type + wpt_row['bug_name'] = name def run(options): @@ -72,8 +83,6 @@ def run(options): except Exception, ee: sys.exit('Error loading bugs data... Aborting.') - stats = {'total' : 0.0, 'ads': 0.0, 'trackers': 0.0, 'analytics': 0.0, 'widgets': 0.0, 'privacy': 0.0, 'blank': 0.0} - #write ouput fname, ext = os.path.splitext(os.path.basename(csv_file)) @@ -87,59 +96,24 @@ def run(options): writer.writeheader() last_seq = 0 + for r in wpt_data: seq = int(r['Sequence Number']) if seq < last_seq: break last_seq = seq - stats['total'] += 1 - host = r['Host'] - if any(a in host for a in bugs_data['ads']): -# print "ads: " + host - filter_fields(r, 'ad') - writer.writerow(r); - stats['ads'] += 1 - continue - if any(a in host for a in bugs_data['trackers']): -# print "trackers: " + host - filter_fields(r, 'tracker') - writer.writerow(r); - stats['trackers'] += 1 - continue - if any(a in host for a in bugs_data['analytics']): -# print "analytics: " + host - filter_fields(r, 'analytics') - writer.writerow(r); - stats['analytics'] += 1 - continue - if any(a in host for a in bugs_data['widgets']): -# print "widgets: " + host - filter_fields(r, 'widget') - writer.writerow(r); - stats['widgets'] += 1 - continue - if any(a in host for a in bugs_data['privacy']): -# print "privacy: " + host - filter_fields(r, 'privacy') - writer.writerow(r); - stats['privacy'] += 1 - continue - if options.keep: - stats['blank'] += 1 - filter_fields(r, '-') - writer.writerow(r); + print str(seq) - if options.stats: - print "----- Stats: " + fname + ext + " -----" - print "total (elements): " + str(stats['total']) - print "ads: " + str(stats['ads']) + ' - ' + str(stats['ads'] / stats['total']) + '%' - print "trackers: " + str(stats['trackers']) + ' - ' + str(stats['trackers'] / stats['total']) + '%' - print "analytics: " + str(stats['analytics']) + ' - ' + str(stats['analytics'] / stats['total']) + '%' - print "widgets: " + str(stats['widgets']) + ' - ' + str(stats['widgets'] / stats['total']) + '%' - print "privacy: " + str(stats['privacy']) + ' - ' + str(stats['privacy'] / stats['total']) + '%' - print "..............." - print "* JUNK RATIO * " + str((stats['ads'] + stats['trackers'] + stats['analytics'] + stats['widgets'] + stats['privacy']) / stats['total']) + '%' + url = r['Host'] + r['URL'] + bug = match(url, bugs_data['bugs']) + if bug: + filter_fields(r, bug['type'], bug['name']) + writer.writerow(r); + else: + if options.keep: + filter_fields(r, '-', '-') + writer.writerow(r); if __name__ == '__main__': @@ -147,7 +121,6 @@ if __name__ == '__main__': p.add_option('-f', '--file', action="store", help="wpt csv input file") p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file") p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element") - p.add_option('-s', '--stats', action="store_true", help="prints basic stats") p.add_option('-o', '--outputdir', action="store", help="output directory", default="") diff --git a/webpagetest/wpt_csv_filter_batch.py b/webpagetest/wpt_csv_filter_batch.py index 91f3433..489f091 100755 --- a/webpagetest/wpt_csv_filter_batch.py +++ b/webpagetest/wpt_csv_filter_batch.py @@ -1,6 +1,6 @@ #!/usr/bin/python2.7 -import sys, csv, json, os +import sys, csv, json, os, time from optparse import OptionParser import wpt_csv_filter as wptf @@ -30,11 +30,15 @@ def run(options): csv_files.append(os.path.join(dirpath, fn)) break + i = 0 for f in csv_files: + i += 1 options.file = f - print "processing - " + f + print str(i) + "/" + str(len(csv_files)) + " - " + f + start_time = time.time() wptf.run(options) - print ".......done........" + duration = time.time() - start_time + print "done - " + time.strftime('%H:%M:%S', time.gmtime(duration)) if __name__ == '__main__': @@ -42,7 +46,6 @@ if __name__ == '__main__': p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)") p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file") p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element") - p.add_option('-s', '--stats', action="store_true", help="prints basic stats") p.add_option('-o', '--outputdir', action="store", help="output directory (where all the filtered csv files will be placed)", default="") options, args = p.parse_args() diff --git a/webpagetest/wpt_csv_stats.py b/webpagetest/wpt_csv_stats.py new file mode 100644 index 0000000..fb3cbe0 --- /dev/null +++ b/webpagetest/wpt_csv_stats.py @@ -0,0 +1,47 @@ +import sys, csv, json, os, re +from optparse import OptionParser + +def run(csv_file_path): + + if not os.path.exists(csv_file_path): + sys.exit('Input file does not exists. Aborting.') + + stats = { + 'date' : None, + 'time' : None, + 'host' : None, + 'items' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0, 'total' : 0.0}, + 'sizes' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0}, + 'times' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0} + } + + with open(csv_file_path) as csv_file: + wpt_data = csv.DictReader(csv_file) + items = stats['items'] + sizes = stats['sizes'] + times = stats['times'] + for d in wpt_data: + if not stats['host']: + stats['host'] = d['Host'] # first line + stats['date'] = d['Date'] # first line + stats['time'] = d['Time'] # first line + items['total'] += 1 + bug_type = d['bug_type'] + items[bug_type] += 1 + sizes[bug_type] += int(d['Object Size']) + times[bug_type] += int(d['Time to Load (ms)']) + + return stats + +if __name__ == '__main__': + + p = OptionParser(); + p.add_option('-f', '--file', action="store", help="wpt csv input file") + + options, args = p.parse_args() + + if not options.file: + sys.exit('No wpt csv input file specified. Aborting.') + + stats = run(options.file) + print json.dumps(stats, indent=2, separators=(',',':')) diff --git a/webpagetest/wpt_csv_stats_batch.py b/webpagetest/wpt_csv_stats_batch.py new file mode 100644 index 0000000..c140547 --- /dev/null +++ b/webpagetest/wpt_csv_stats_batch.py @@ -0,0 +1,34 @@ +import sys, csv, json, os, re +from optparse import OptionParser +import wpt_csv_stats + +def run(input_dir): + + if not os.path.exists(input_dir): + sys.exit('Input directory does not exists. Aborting.') + + stats = [] + + csv_files = [] + for (dirpath, dirnames, filenames) in os.walk(input_dir): + for fn in filenames: + fname, ext = os.path.splitext(fn) + if ext == '.csv': + filepath = os.path.join(dirpath, fn) + stats.append({'name' : fname, 'stats': wpt_csv_stats.run(filepath)}) + break + + return stats + +if __name__ == '__main__': + + p = OptionParser(); + p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)") + + options, args = p.parse_args() + + if not options.inputdir: + sys.exit('No input directory specified. Aborting.') + + stats = run(options.inputdir) + print json.dumps(stats, indent=2, separators=(',',':'))