cv_filters + stats + htmlcards
This commit is contained in:
parent
a5004449a5
commit
caa8b3ddb5
38
webpagetest/html/+++/lestyle.css
Normal file
38
webpagetest/html/+++/lestyle.css
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
body {
|
||||||
|
width: 100%;
|
||||||
|
height: 100%;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.agent {
|
||||||
|
padding: 1.5em;
|
||||||
|
float: left;
|
||||||
|
margin: 1em;
|
||||||
|
width: 20em;
|
||||||
|
background-color: #eeeeee;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card {
|
||||||
|
padding: 1.5em;
|
||||||
|
float: left;
|
||||||
|
display: inline;
|
||||||
|
margin: 1em;
|
||||||
|
width: 20em;
|
||||||
|
background-color: #eeeeee;
|
||||||
|
}
|
||||||
|
|
||||||
|
.data {
|
||||||
|
margin-left: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.name {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
table {
|
||||||
|
width: 100%;
|
||||||
|
dborder: 1px solid red;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
10
webpagetest/html/index_template.html
Normal file
10
webpagetest/html/index_template.html
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||||
|
<link rel="stylesheet" type="text/css" href="+++/lestyle.css"/>
|
||||||
|
<title>Cards - Micro-Temporalities [dmi15]</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
[[content]]
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
113
webpagetest/html/stats_to_htmlcards.py
Normal file
113
webpagetest/html/stats_to_htmlcards.py
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
import sys, csv, json, os, re
|
||||||
|
|
||||||
|
def emit_header():
|
||||||
|
str_s = '<div class="agent">'
|
||||||
|
str_s += '<b>Location</b>:<br>'
|
||||||
|
str_s += 'Amsterdam<br>\n'
|
||||||
|
str_s += '<br>\n'
|
||||||
|
str_s += '<b>User-Agent</b>:<br>'
|
||||||
|
str_s += "Mozilla/5.0 (Windows NT 6.1; WOW64)<br>AppleWebKit/537.36 (KHTML, like Gecko)<br>Chrome/43.0.2357.132 Safari/537.36 PTST/221\n"
|
||||||
|
str_s += '</div>\n'
|
||||||
|
return str_s
|
||||||
|
|
||||||
|
|
||||||
|
def emit_name(name, date, time):
|
||||||
|
str_s = '<div class="name">' + name + '</div>\n'
|
||||||
|
str_s += '<div class="date">' + date + '</div>\n'
|
||||||
|
str_s += '<div class="time">' + time + '</div>\n'
|
||||||
|
return str_s
|
||||||
|
|
||||||
|
def string_format_percentage(pct):
|
||||||
|
v = int(pct * 100)
|
||||||
|
return str(v)
|
||||||
|
|
||||||
|
|
||||||
|
def emit_table_row(elem, index, total):
|
||||||
|
return '<tr>' + '<td>' + index +': ' + '</td><td>'+ str(int(elem[index])) + '</td><td>' + string_format_percentage(elem[index] / total) + '%' + '</td></tr>\n'
|
||||||
|
|
||||||
|
def emit_size(size):
|
||||||
|
total = size['widget'] + size['ad'] + size['privacy'] + size['-'] + size['analytics'] + size['tracker']
|
||||||
|
if total == 0:
|
||||||
|
total = 1
|
||||||
|
str_s = '<div class="size">'
|
||||||
|
str_s += '<h4>Objects Size (bytes)</h4>\n'
|
||||||
|
str_s += '<div class="data">\n'
|
||||||
|
str_s += '<table>\n'
|
||||||
|
str_s += emit_table_row(size, 'ad', total).replace('ad', 'ads')
|
||||||
|
str_s += emit_table_row(size, 'analytics', total)
|
||||||
|
str_s += emit_table_row(size, 'tracker', total).replace('tracker', 'trackers')
|
||||||
|
str_s += emit_table_row(size, 'widget', total).replace('widget', 'widgets')
|
||||||
|
str_s += emit_table_row(size, '-', total).replace('-', 'other')
|
||||||
|
str_s += '</table>\n'
|
||||||
|
str_s += '</div>\n'
|
||||||
|
str_s += '</div>\n'
|
||||||
|
return str_s
|
||||||
|
|
||||||
|
def emit_item(item):
|
||||||
|
total = item['total']
|
||||||
|
total_junk = item['widget'] + item['ad'] + item['privacy'] + item['analytics'] + item['tracker']
|
||||||
|
if total == 0:
|
||||||
|
total = 1
|
||||||
|
str_s = '<div class="items">'
|
||||||
|
str_s += '<h4>Page Http Request Elements</h4>\n'
|
||||||
|
str_s += '<div class="data">\n'
|
||||||
|
str_s += '<table>\n'
|
||||||
|
str_s += emit_table_row(item, 'ad', total).replace('ad', 'ads')
|
||||||
|
str_s += emit_table_row(item, 'analytics', total)
|
||||||
|
str_s += emit_table_row(item, 'tracker', total).replace('tracker', 'trackers')
|
||||||
|
str_s += emit_table_row(item, 'widget', total).replace('widget', 'widgets')
|
||||||
|
str_s += emit_table_row(item, '-', total).replace('-', 'other')
|
||||||
|
str_s += '</table>\n'
|
||||||
|
str_s += '</div>\n'
|
||||||
|
str_s += '</div>\n'
|
||||||
|
return str_s
|
||||||
|
|
||||||
|
def emit_time(time):
|
||||||
|
total = time['widget'] + time['ad'] + time['privacy'] + time['-'] + time['analytics'] + time['tracker']
|
||||||
|
if total == 0:
|
||||||
|
total = 1
|
||||||
|
str_s = '<div class="times">'
|
||||||
|
str_s += '<h4>(Micro) Timing (ms)</h4>\n'
|
||||||
|
str_s += '<div class="data">\n'
|
||||||
|
str_s += '<table>\n'
|
||||||
|
str_s += emit_table_row(time, 'ad', total).replace('ad', 'ads')
|
||||||
|
str_s += emit_table_row(time, 'analytics', total)
|
||||||
|
str_s += emit_table_row(time, 'tracker', total).replace('tracker', 'trackers')
|
||||||
|
str_s += emit_table_row(time, 'widget', total).replace('widget', 'widgets')
|
||||||
|
str_s += emit_table_row(time, '-', total).replace('-', 'other')
|
||||||
|
str_s += '</table>\n'
|
||||||
|
str_s += '</div>\n'
|
||||||
|
str_s += '</div>\n'
|
||||||
|
return str_s
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
fp = sys.stdin
|
||||||
|
try:
|
||||||
|
stats = json.loads(fp.read())
|
||||||
|
except Exception, ee:
|
||||||
|
sys.exit('Error loading data... Aborting.')
|
||||||
|
|
||||||
|
try:
|
||||||
|
template = open(os.path.join('.', 'index_template.html'), 'r+');
|
||||||
|
except:
|
||||||
|
print('error opening template file. aborting...');
|
||||||
|
sys.exit(0);
|
||||||
|
|
||||||
|
content = ""
|
||||||
|
content += emit_header()
|
||||||
|
for e in stats:
|
||||||
|
card = "<div class='card'>\n"
|
||||||
|
stats = e['stats']
|
||||||
|
card += "\t\t\t" + emit_name(stats['host'], stats['date'], stats['time'])
|
||||||
|
card += "\t\t\t" + emit_item(stats['items'])
|
||||||
|
card += "\t\t\t" + emit_size(stats['sizes'])
|
||||||
|
card += "\t\t\t" + emit_time(stats['times'])
|
||||||
|
card += "</div>\n"
|
||||||
|
content += card
|
||||||
|
|
||||||
|
html = template.read().replace('[[content]]', content);
|
||||||
|
|
||||||
|
print html
|
||||||
|
|
||||||
|
|
||||||
@ -1,13 +1,14 @@
|
|||||||
#!/usr/bin/python2.7
|
#!/usr/bin/python2.7
|
||||||
|
|
||||||
import sys, csv, json, os
|
import sys, csv, json, os, re
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
|
|
||||||
# list of fileds from the wpt csv file to keep
|
# list of fileds from the wpt csv file to keep
|
||||||
csv_fields = [
|
csv_fields = [
|
||||||
'Date',
|
'Date',
|
||||||
'Time',
|
'Time',
|
||||||
'tracker_type',
|
'bug_type',
|
||||||
|
'bug_name',
|
||||||
'Sequence Number',
|
'Sequence Number',
|
||||||
'Host',
|
'Host',
|
||||||
'IP Address',
|
'IP Address',
|
||||||
@ -40,14 +41,24 @@ csv_fields = [
|
|||||||
'Initiator Line',
|
'Initiator Line',
|
||||||
'Expires',
|
'Expires',
|
||||||
'Cached',
|
'Cached',
|
||||||
'Cookie Count(out)',
|
'Cookie Count(out)'
|
||||||
]
|
]
|
||||||
|
|
||||||
def filter_fields(wpt_row, type):
|
# matches a given url to all possible bugs
|
||||||
|
def match(url, bugs):
|
||||||
|
for b in bugs:
|
||||||
|
pattern = re.compile(b['pattern'])
|
||||||
|
if(pattern.search(url)):
|
||||||
|
return {'name': b['name'], 'type': b['type'], }
|
||||||
|
return None
|
||||||
|
|
||||||
|
# filters the csv file (deleted some columns) and adds the 'tracker type'
|
||||||
|
def filter_fields(wpt_row, type, name):
|
||||||
for k in wpt_row.keys():
|
for k in wpt_row.keys():
|
||||||
if k not in csv_fields:
|
if k not in csv_fields:
|
||||||
del wpt_row[k]
|
del wpt_row[k]
|
||||||
wpt_row['tracker_type'] = type
|
wpt_row['bug_type'] = type
|
||||||
|
wpt_row['bug_name'] = name
|
||||||
|
|
||||||
def run(options):
|
def run(options):
|
||||||
|
|
||||||
@ -72,8 +83,6 @@ def run(options):
|
|||||||
except Exception, ee:
|
except Exception, ee:
|
||||||
sys.exit('Error loading bugs data... Aborting.')
|
sys.exit('Error loading bugs data... Aborting.')
|
||||||
|
|
||||||
stats = {'total' : 0.0, 'ads': 0.0, 'trackers': 0.0, 'analytics': 0.0, 'widgets': 0.0, 'privacy': 0.0, 'blank': 0.0}
|
|
||||||
|
|
||||||
#write ouput
|
#write ouput
|
||||||
fname, ext = os.path.splitext(os.path.basename(csv_file))
|
fname, ext = os.path.splitext(os.path.basename(csv_file))
|
||||||
|
|
||||||
@ -87,59 +96,24 @@ def run(options):
|
|||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|
||||||
last_seq = 0
|
last_seq = 0
|
||||||
|
|
||||||
for r in wpt_data:
|
for r in wpt_data:
|
||||||
seq = int(r['Sequence Number'])
|
seq = int(r['Sequence Number'])
|
||||||
if seq < last_seq:
|
if seq < last_seq:
|
||||||
break
|
break
|
||||||
last_seq = seq
|
last_seq = seq
|
||||||
stats['total'] += 1
|
|
||||||
host = r['Host']
|
|
||||||
if any(a in host for a in bugs_data['ads']):
|
|
||||||
# print "ads: " + host
|
|
||||||
filter_fields(r, 'ad')
|
|
||||||
writer.writerow(r);
|
|
||||||
stats['ads'] += 1
|
|
||||||
continue
|
|
||||||
if any(a in host for a in bugs_data['trackers']):
|
|
||||||
# print "trackers: " + host
|
|
||||||
filter_fields(r, 'tracker')
|
|
||||||
writer.writerow(r);
|
|
||||||
stats['trackers'] += 1
|
|
||||||
continue
|
|
||||||
if any(a in host for a in bugs_data['analytics']):
|
|
||||||
# print "analytics: " + host
|
|
||||||
filter_fields(r, 'analytics')
|
|
||||||
writer.writerow(r);
|
|
||||||
stats['analytics'] += 1
|
|
||||||
continue
|
|
||||||
if any(a in host for a in bugs_data['widgets']):
|
|
||||||
# print "widgets: " + host
|
|
||||||
filter_fields(r, 'widget')
|
|
||||||
writer.writerow(r);
|
|
||||||
stats['widgets'] += 1
|
|
||||||
continue
|
|
||||||
if any(a in host for a in bugs_data['privacy']):
|
|
||||||
# print "privacy: " + host
|
|
||||||
filter_fields(r, 'privacy')
|
|
||||||
writer.writerow(r);
|
|
||||||
stats['privacy'] += 1
|
|
||||||
continue
|
|
||||||
if options.keep:
|
|
||||||
stats['blank'] += 1
|
|
||||||
filter_fields(r, '-')
|
|
||||||
writer.writerow(r);
|
|
||||||
|
|
||||||
|
print str(seq)
|
||||||
|
|
||||||
if options.stats:
|
url = r['Host'] + r['URL']
|
||||||
print "----- Stats: " + fname + ext + " -----"
|
bug = match(url, bugs_data['bugs'])
|
||||||
print "total (elements): " + str(stats['total'])
|
if bug:
|
||||||
print "ads: " + str(stats['ads']) + ' - ' + str(stats['ads'] / stats['total']) + '%'
|
filter_fields(r, bug['type'], bug['name'])
|
||||||
print "trackers: " + str(stats['trackers']) + ' - ' + str(stats['trackers'] / stats['total']) + '%'
|
writer.writerow(r);
|
||||||
print "analytics: " + str(stats['analytics']) + ' - ' + str(stats['analytics'] / stats['total']) + '%'
|
else:
|
||||||
print "widgets: " + str(stats['widgets']) + ' - ' + str(stats['widgets'] / stats['total']) + '%'
|
if options.keep:
|
||||||
print "privacy: " + str(stats['privacy']) + ' - ' + str(stats['privacy'] / stats['total']) + '%'
|
filter_fields(r, '-', '-')
|
||||||
print "..............."
|
writer.writerow(r);
|
||||||
print "* JUNK RATIO * " + str((stats['ads'] + stats['trackers'] + stats['analytics'] + stats['widgets'] + stats['privacy']) / stats['total']) + '%'
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
@ -147,7 +121,6 @@ if __name__ == '__main__':
|
|||||||
p.add_option('-f', '--file', action="store", help="wpt csv input file")
|
p.add_option('-f', '--file', action="store", help="wpt csv input file")
|
||||||
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
|
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
|
||||||
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
|
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
|
||||||
p.add_option('-s', '--stats', action="store_true", help="prints basic stats")
|
|
||||||
p.add_option('-o', '--outputdir', action="store", help="output directory", default="")
|
p.add_option('-o', '--outputdir', action="store", help="output directory", default="")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/python2.7
|
#!/usr/bin/python2.7
|
||||||
|
|
||||||
import sys, csv, json, os
|
import sys, csv, json, os, time
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
import wpt_csv_filter as wptf
|
import wpt_csv_filter as wptf
|
||||||
|
|
||||||
@ -30,11 +30,15 @@ def run(options):
|
|||||||
csv_files.append(os.path.join(dirpath, fn))
|
csv_files.append(os.path.join(dirpath, fn))
|
||||||
break
|
break
|
||||||
|
|
||||||
|
i = 0
|
||||||
for f in csv_files:
|
for f in csv_files:
|
||||||
|
i += 1
|
||||||
options.file = f
|
options.file = f
|
||||||
print "processing - " + f
|
print str(i) + "/" + str(len(csv_files)) + " - " + f
|
||||||
|
start_time = time.time()
|
||||||
wptf.run(options)
|
wptf.run(options)
|
||||||
print ".......done........"
|
duration = time.time() - start_time
|
||||||
|
print "done - " + time.strftime('%H:%M:%S', time.gmtime(duration))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
@ -42,7 +46,6 @@ if __name__ == '__main__':
|
|||||||
p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)")
|
p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)")
|
||||||
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
|
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
|
||||||
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
|
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
|
||||||
p.add_option('-s', '--stats', action="store_true", help="prints basic stats")
|
|
||||||
p.add_option('-o', '--outputdir', action="store", help="output directory (where all the filtered csv files will be placed)", default="")
|
p.add_option('-o', '--outputdir', action="store", help="output directory (where all the filtered csv files will be placed)", default="")
|
||||||
|
|
||||||
options, args = p.parse_args()
|
options, args = p.parse_args()
|
||||||
|
|||||||
47
webpagetest/wpt_csv_stats.py
Normal file
47
webpagetest/wpt_csv_stats.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
import sys, csv, json, os, re
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
def run(csv_file_path):
|
||||||
|
|
||||||
|
if not os.path.exists(csv_file_path):
|
||||||
|
sys.exit('Input file does not exists. Aborting.')
|
||||||
|
|
||||||
|
stats = {
|
||||||
|
'date' : None,
|
||||||
|
'time' : None,
|
||||||
|
'host' : None,
|
||||||
|
'items' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0, 'total' : 0.0},
|
||||||
|
'sizes' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0},
|
||||||
|
'times' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0}
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(csv_file_path) as csv_file:
|
||||||
|
wpt_data = csv.DictReader(csv_file)
|
||||||
|
items = stats['items']
|
||||||
|
sizes = stats['sizes']
|
||||||
|
times = stats['times']
|
||||||
|
for d in wpt_data:
|
||||||
|
if not stats['host']:
|
||||||
|
stats['host'] = d['Host'] # first line
|
||||||
|
stats['date'] = d['Date'] # first line
|
||||||
|
stats['time'] = d['Time'] # first line
|
||||||
|
items['total'] += 1
|
||||||
|
bug_type = d['bug_type']
|
||||||
|
items[bug_type] += 1
|
||||||
|
sizes[bug_type] += int(d['Object Size'])
|
||||||
|
times[bug_type] += int(d['Time to Load (ms)'])
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
p = OptionParser();
|
||||||
|
p.add_option('-f', '--file', action="store", help="wpt csv input file")
|
||||||
|
|
||||||
|
options, args = p.parse_args()
|
||||||
|
|
||||||
|
if not options.file:
|
||||||
|
sys.exit('No wpt csv input file specified. Aborting.')
|
||||||
|
|
||||||
|
stats = run(options.file)
|
||||||
|
print json.dumps(stats, indent=2, separators=(',',':'))
|
||||||
34
webpagetest/wpt_csv_stats_batch.py
Normal file
34
webpagetest/wpt_csv_stats_batch.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import sys, csv, json, os, re
|
||||||
|
from optparse import OptionParser
|
||||||
|
import wpt_csv_stats
|
||||||
|
|
||||||
|
def run(input_dir):
|
||||||
|
|
||||||
|
if not os.path.exists(input_dir):
|
||||||
|
sys.exit('Input directory does not exists. Aborting.')
|
||||||
|
|
||||||
|
stats = []
|
||||||
|
|
||||||
|
csv_files = []
|
||||||
|
for (dirpath, dirnames, filenames) in os.walk(input_dir):
|
||||||
|
for fn in filenames:
|
||||||
|
fname, ext = os.path.splitext(fn)
|
||||||
|
if ext == '.csv':
|
||||||
|
filepath = os.path.join(dirpath, fn)
|
||||||
|
stats.append({'name' : fname, 'stats': wpt_csv_stats.run(filepath)})
|
||||||
|
break
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
p = OptionParser();
|
||||||
|
p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)")
|
||||||
|
|
||||||
|
options, args = p.parse_args()
|
||||||
|
|
||||||
|
if not options.inputdir:
|
||||||
|
sys.exit('No input directory specified. Aborting.')
|
||||||
|
|
||||||
|
stats = run(options.inputdir)
|
||||||
|
print json.dumps(stats, indent=2, separators=(',',':'))
|
||||||
Loading…
x
Reference in New Issue
Block a user