cv_filters + stats + htmlcards
This commit is contained in:
parent
a5004449a5
commit
caa8b3ddb5
38
webpagetest/html/+++/lestyle.css
Normal file
38
webpagetest/html/+++/lestyle.css
Normal file
@ -0,0 +1,38 @@
|
||||
body {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.agent {
|
||||
padding: 1.5em;
|
||||
float: left;
|
||||
margin: 1em;
|
||||
width: 20em;
|
||||
background-color: #eeeeee;
|
||||
}
|
||||
|
||||
.card {
|
||||
padding: 1.5em;
|
||||
float: left;
|
||||
display: inline;
|
||||
margin: 1em;
|
||||
width: 20em;
|
||||
background-color: #eeeeee;
|
||||
}
|
||||
|
||||
.data {
|
||||
margin-left: 0.5em;
|
||||
}
|
||||
|
||||
.name {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
dborder: 1px solid red;
|
||||
}
|
||||
|
||||
|
||||
10
webpagetest/html/index_template.html
Normal file
10
webpagetest/html/index_template.html
Normal file
@ -0,0 +1,10 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<link rel="stylesheet" type="text/css" href="+++/lestyle.css"/>
|
||||
<title>Cards - Micro-Temporalities [dmi15]</title>
|
||||
</head>
|
||||
<body>
|
||||
[[content]]
|
||||
</body>
|
||||
</html>
|
||||
113
webpagetest/html/stats_to_htmlcards.py
Normal file
113
webpagetest/html/stats_to_htmlcards.py
Normal file
@ -0,0 +1,113 @@
|
||||
import sys, csv, json, os, re
|
||||
|
||||
def emit_header():
|
||||
str_s = '<div class="agent">'
|
||||
str_s += '<b>Location</b>:<br>'
|
||||
str_s += 'Amsterdam<br>\n'
|
||||
str_s += '<br>\n'
|
||||
str_s += '<b>User-Agent</b>:<br>'
|
||||
str_s += "Mozilla/5.0 (Windows NT 6.1; WOW64)<br>AppleWebKit/537.36 (KHTML, like Gecko)<br>Chrome/43.0.2357.132 Safari/537.36 PTST/221\n"
|
||||
str_s += '</div>\n'
|
||||
return str_s
|
||||
|
||||
|
||||
def emit_name(name, date, time):
|
||||
str_s = '<div class="name">' + name + '</div>\n'
|
||||
str_s += '<div class="date">' + date + '</div>\n'
|
||||
str_s += '<div class="time">' + time + '</div>\n'
|
||||
return str_s
|
||||
|
||||
def string_format_percentage(pct):
|
||||
v = int(pct * 100)
|
||||
return str(v)
|
||||
|
||||
|
||||
def emit_table_row(elem, index, total):
|
||||
return '<tr>' + '<td>' + index +': ' + '</td><td>'+ str(int(elem[index])) + '</td><td>' + string_format_percentage(elem[index] / total) + '%' + '</td></tr>\n'
|
||||
|
||||
def emit_size(size):
|
||||
total = size['widget'] + size['ad'] + size['privacy'] + size['-'] + size['analytics'] + size['tracker']
|
||||
if total == 0:
|
||||
total = 1
|
||||
str_s = '<div class="size">'
|
||||
str_s += '<h4>Objects Size (bytes)</h4>\n'
|
||||
str_s += '<div class="data">\n'
|
||||
str_s += '<table>\n'
|
||||
str_s += emit_table_row(size, 'ad', total).replace('ad', 'ads')
|
||||
str_s += emit_table_row(size, 'analytics', total)
|
||||
str_s += emit_table_row(size, 'tracker', total).replace('tracker', 'trackers')
|
||||
str_s += emit_table_row(size, 'widget', total).replace('widget', 'widgets')
|
||||
str_s += emit_table_row(size, '-', total).replace('-', 'other')
|
||||
str_s += '</table>\n'
|
||||
str_s += '</div>\n'
|
||||
str_s += '</div>\n'
|
||||
return str_s
|
||||
|
||||
def emit_item(item):
|
||||
total = item['total']
|
||||
total_junk = item['widget'] + item['ad'] + item['privacy'] + item['analytics'] + item['tracker']
|
||||
if total == 0:
|
||||
total = 1
|
||||
str_s = '<div class="items">'
|
||||
str_s += '<h4>Page Http Request Elements</h4>\n'
|
||||
str_s += '<div class="data">\n'
|
||||
str_s += '<table>\n'
|
||||
str_s += emit_table_row(item, 'ad', total).replace('ad', 'ads')
|
||||
str_s += emit_table_row(item, 'analytics', total)
|
||||
str_s += emit_table_row(item, 'tracker', total).replace('tracker', 'trackers')
|
||||
str_s += emit_table_row(item, 'widget', total).replace('widget', 'widgets')
|
||||
str_s += emit_table_row(item, '-', total).replace('-', 'other')
|
||||
str_s += '</table>\n'
|
||||
str_s += '</div>\n'
|
||||
str_s += '</div>\n'
|
||||
return str_s
|
||||
|
||||
def emit_time(time):
|
||||
total = time['widget'] + time['ad'] + time['privacy'] + time['-'] + time['analytics'] + time['tracker']
|
||||
if total == 0:
|
||||
total = 1
|
||||
str_s = '<div class="times">'
|
||||
str_s += '<h4>(Micro) Timing (ms)</h4>\n'
|
||||
str_s += '<div class="data">\n'
|
||||
str_s += '<table>\n'
|
||||
str_s += emit_table_row(time, 'ad', total).replace('ad', 'ads')
|
||||
str_s += emit_table_row(time, 'analytics', total)
|
||||
str_s += emit_table_row(time, 'tracker', total).replace('tracker', 'trackers')
|
||||
str_s += emit_table_row(time, 'widget', total).replace('widget', 'widgets')
|
||||
str_s += emit_table_row(time, '-', total).replace('-', 'other')
|
||||
str_s += '</table>\n'
|
||||
str_s += '</div>\n'
|
||||
str_s += '</div>\n'
|
||||
return str_s
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
fp = sys.stdin
|
||||
try:
|
||||
stats = json.loads(fp.read())
|
||||
except Exception, ee:
|
||||
sys.exit('Error loading data... Aborting.')
|
||||
|
||||
try:
|
||||
template = open(os.path.join('.', 'index_template.html'), 'r+');
|
||||
except:
|
||||
print('error opening template file. aborting...');
|
||||
sys.exit(0);
|
||||
|
||||
content = ""
|
||||
content += emit_header()
|
||||
for e in stats:
|
||||
card = "<div class='card'>\n"
|
||||
stats = e['stats']
|
||||
card += "\t\t\t" + emit_name(stats['host'], stats['date'], stats['time'])
|
||||
card += "\t\t\t" + emit_item(stats['items'])
|
||||
card += "\t\t\t" + emit_size(stats['sizes'])
|
||||
card += "\t\t\t" + emit_time(stats['times'])
|
||||
card += "</div>\n"
|
||||
content += card
|
||||
|
||||
html = template.read().replace('[[content]]', content);
|
||||
|
||||
print html
|
||||
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
#!/usr/bin/python2.7
|
||||
|
||||
import sys, csv, json, os
|
||||
import sys, csv, json, os, re
|
||||
from optparse import OptionParser
|
||||
|
||||
# list of fileds from the wpt csv file to keep
|
||||
csv_fields = [
|
||||
'Date',
|
||||
'Time',
|
||||
'tracker_type',
|
||||
'bug_type',
|
||||
'bug_name',
|
||||
'Sequence Number',
|
||||
'Host',
|
||||
'IP Address',
|
||||
@ -40,14 +41,24 @@ csv_fields = [
|
||||
'Initiator Line',
|
||||
'Expires',
|
||||
'Cached',
|
||||
'Cookie Count(out)',
|
||||
'Cookie Count(out)'
|
||||
]
|
||||
|
||||
def filter_fields(wpt_row, type):
|
||||
# matches a given url to all possible bugs
|
||||
def match(url, bugs):
|
||||
for b in bugs:
|
||||
pattern = re.compile(b['pattern'])
|
||||
if(pattern.search(url)):
|
||||
return {'name': b['name'], 'type': b['type'], }
|
||||
return None
|
||||
|
||||
# filters the csv file (deleted some columns) and adds the 'tracker type'
|
||||
def filter_fields(wpt_row, type, name):
|
||||
for k in wpt_row.keys():
|
||||
if k not in csv_fields:
|
||||
del wpt_row[k]
|
||||
wpt_row['tracker_type'] = type
|
||||
wpt_row['bug_type'] = type
|
||||
wpt_row['bug_name'] = name
|
||||
|
||||
def run(options):
|
||||
|
||||
@ -72,8 +83,6 @@ def run(options):
|
||||
except Exception, ee:
|
||||
sys.exit('Error loading bugs data... Aborting.')
|
||||
|
||||
stats = {'total' : 0.0, 'ads': 0.0, 'trackers': 0.0, 'analytics': 0.0, 'widgets': 0.0, 'privacy': 0.0, 'blank': 0.0}
|
||||
|
||||
#write ouput
|
||||
fname, ext = os.path.splitext(os.path.basename(csv_file))
|
||||
|
||||
@ -87,67 +96,31 @@ def run(options):
|
||||
writer.writeheader()
|
||||
|
||||
last_seq = 0
|
||||
|
||||
for r in wpt_data:
|
||||
seq = int(r['Sequence Number'])
|
||||
if seq < last_seq:
|
||||
break
|
||||
last_seq = seq
|
||||
stats['total'] += 1
|
||||
host = r['Host']
|
||||
if any(a in host for a in bugs_data['ads']):
|
||||
# print "ads: " + host
|
||||
filter_fields(r, 'ad')
|
||||
|
||||
print str(seq)
|
||||
|
||||
url = r['Host'] + r['URL']
|
||||
bug = match(url, bugs_data['bugs'])
|
||||
if bug:
|
||||
filter_fields(r, bug['type'], bug['name'])
|
||||
writer.writerow(r);
|
||||
stats['ads'] += 1
|
||||
continue
|
||||
if any(a in host for a in bugs_data['trackers']):
|
||||
# print "trackers: " + host
|
||||
filter_fields(r, 'tracker')
|
||||
writer.writerow(r);
|
||||
stats['trackers'] += 1
|
||||
continue
|
||||
if any(a in host for a in bugs_data['analytics']):
|
||||
# print "analytics: " + host
|
||||
filter_fields(r, 'analytics')
|
||||
writer.writerow(r);
|
||||
stats['analytics'] += 1
|
||||
continue
|
||||
if any(a in host for a in bugs_data['widgets']):
|
||||
# print "widgets: " + host
|
||||
filter_fields(r, 'widget')
|
||||
writer.writerow(r);
|
||||
stats['widgets'] += 1
|
||||
continue
|
||||
if any(a in host for a in bugs_data['privacy']):
|
||||
# print "privacy: " + host
|
||||
filter_fields(r, 'privacy')
|
||||
writer.writerow(r);
|
||||
stats['privacy'] += 1
|
||||
continue
|
||||
else:
|
||||
if options.keep:
|
||||
stats['blank'] += 1
|
||||
filter_fields(r, '-')
|
||||
filter_fields(r, '-', '-')
|
||||
writer.writerow(r);
|
||||
|
||||
|
||||
if options.stats:
|
||||
print "----- Stats: " + fname + ext + " -----"
|
||||
print "total (elements): " + str(stats['total'])
|
||||
print "ads: " + str(stats['ads']) + ' - ' + str(stats['ads'] / stats['total']) + '%'
|
||||
print "trackers: " + str(stats['trackers']) + ' - ' + str(stats['trackers'] / stats['total']) + '%'
|
||||
print "analytics: " + str(stats['analytics']) + ' - ' + str(stats['analytics'] / stats['total']) + '%'
|
||||
print "widgets: " + str(stats['widgets']) + ' - ' + str(stats['widgets'] / stats['total']) + '%'
|
||||
print "privacy: " + str(stats['privacy']) + ' - ' + str(stats['privacy'] / stats['total']) + '%'
|
||||
print "..............."
|
||||
print "* JUNK RATIO * " + str((stats['ads'] + stats['trackers'] + stats['analytics'] + stats['widgets'] + stats['privacy']) / stats['total']) + '%'
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
p = OptionParser();
|
||||
p.add_option('-f', '--file', action="store", help="wpt csv input file")
|
||||
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
|
||||
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
|
||||
p.add_option('-s', '--stats', action="store_true", help="prints basic stats")
|
||||
p.add_option('-o', '--outputdir', action="store", help="output directory", default="")
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/python2.7
|
||||
|
||||
import sys, csv, json, os
|
||||
import sys, csv, json, os, time
|
||||
from optparse import OptionParser
|
||||
import wpt_csv_filter as wptf
|
||||
|
||||
@ -30,11 +30,15 @@ def run(options):
|
||||
csv_files.append(os.path.join(dirpath, fn))
|
||||
break
|
||||
|
||||
i = 0
|
||||
for f in csv_files:
|
||||
i += 1
|
||||
options.file = f
|
||||
print "processing - " + f
|
||||
print str(i) + "/" + str(len(csv_files)) + " - " + f
|
||||
start_time = time.time()
|
||||
wptf.run(options)
|
||||
print ".......done........"
|
||||
duration = time.time() - start_time
|
||||
print "done - " + time.strftime('%H:%M:%S', time.gmtime(duration))
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@ -42,7 +46,6 @@ if __name__ == '__main__':
|
||||
p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)")
|
||||
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
|
||||
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
|
||||
p.add_option('-s', '--stats', action="store_true", help="prints basic stats")
|
||||
p.add_option('-o', '--outputdir', action="store", help="output directory (where all the filtered csv files will be placed)", default="")
|
||||
|
||||
options, args = p.parse_args()
|
||||
|
||||
47
webpagetest/wpt_csv_stats.py
Normal file
47
webpagetest/wpt_csv_stats.py
Normal file
@ -0,0 +1,47 @@
|
||||
import sys, csv, json, os, re
|
||||
from optparse import OptionParser
|
||||
|
||||
def run(csv_file_path):
|
||||
|
||||
if not os.path.exists(csv_file_path):
|
||||
sys.exit('Input file does not exists. Aborting.')
|
||||
|
||||
stats = {
|
||||
'date' : None,
|
||||
'time' : None,
|
||||
'host' : None,
|
||||
'items' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0, 'total' : 0.0},
|
||||
'sizes' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0},
|
||||
'times' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0}
|
||||
}
|
||||
|
||||
with open(csv_file_path) as csv_file:
|
||||
wpt_data = csv.DictReader(csv_file)
|
||||
items = stats['items']
|
||||
sizes = stats['sizes']
|
||||
times = stats['times']
|
||||
for d in wpt_data:
|
||||
if not stats['host']:
|
||||
stats['host'] = d['Host'] # first line
|
||||
stats['date'] = d['Date'] # first line
|
||||
stats['time'] = d['Time'] # first line
|
||||
items['total'] += 1
|
||||
bug_type = d['bug_type']
|
||||
items[bug_type] += 1
|
||||
sizes[bug_type] += int(d['Object Size'])
|
||||
times[bug_type] += int(d['Time to Load (ms)'])
|
||||
|
||||
return stats
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
p = OptionParser();
|
||||
p.add_option('-f', '--file', action="store", help="wpt csv input file")
|
||||
|
||||
options, args = p.parse_args()
|
||||
|
||||
if not options.file:
|
||||
sys.exit('No wpt csv input file specified. Aborting.')
|
||||
|
||||
stats = run(options.file)
|
||||
print json.dumps(stats, indent=2, separators=(',',':'))
|
||||
34
webpagetest/wpt_csv_stats_batch.py
Normal file
34
webpagetest/wpt_csv_stats_batch.py
Normal file
@ -0,0 +1,34 @@
|
||||
import sys, csv, json, os, re
|
||||
from optparse import OptionParser
|
||||
import wpt_csv_stats
|
||||
|
||||
def run(input_dir):
|
||||
|
||||
if not os.path.exists(input_dir):
|
||||
sys.exit('Input directory does not exists. Aborting.')
|
||||
|
||||
stats = []
|
||||
|
||||
csv_files = []
|
||||
for (dirpath, dirnames, filenames) in os.walk(input_dir):
|
||||
for fn in filenames:
|
||||
fname, ext = os.path.splitext(fn)
|
||||
if ext == '.csv':
|
||||
filepath = os.path.join(dirpath, fn)
|
||||
stats.append({'name' : fname, 'stats': wpt_csv_stats.run(filepath)})
|
||||
break
|
||||
|
||||
return stats
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
p = OptionParser();
|
||||
p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)")
|
||||
|
||||
options, args = p.parse_args()
|
||||
|
||||
if not options.inputdir:
|
||||
sys.exit('No input directory specified. Aborting.')
|
||||
|
||||
stats = run(options.inputdir)
|
||||
print json.dumps(stats, indent=2, separators=(',',':'))
|
||||
Loading…
x
Reference in New Issue
Block a user