diff --git a/webpagetest/html/+++/lestyle.css b/webpagetest/html/+++/lestyle.css
new file mode 100644
index 0000000..6523b56
--- /dev/null
+++ b/webpagetest/html/+++/lestyle.css
@@ -0,0 +1,38 @@
+body {
+ width: 100%;
+ height: 100%;
+ margin: 0;
+ padding: 0;
+}
+
+.agent {
+ padding: 1.5em;
+ float: left;
+ margin: 1em;
+ width: 20em;
+ background-color: #eeeeee;
+}
+
+.card {
+ padding: 1.5em;
+ float: left;
+ display: inline;
+ margin: 1em;
+ width: 20em;
+ background-color: #eeeeee;
+}
+
+.data {
+ margin-left: 0.5em;
+}
+
+.name {
+ font-weight: bold;
+}
+
+table {
+ width: 100%;
+ dborder: 1px solid red;
+}
+
+
diff --git a/webpagetest/html/index_template.html b/webpagetest/html/index_template.html
new file mode 100644
index 0000000..f8a2b57
--- /dev/null
+++ b/webpagetest/html/index_template.html
@@ -0,0 +1,10 @@
+
+
+
+
+ Cards - Micro-Temporalities [dmi15]
+
+
+ [[content]]
+
+
diff --git a/webpagetest/html/stats_to_htmlcards.py b/webpagetest/html/stats_to_htmlcards.py
new file mode 100644
index 0000000..b5f9e48
--- /dev/null
+++ b/webpagetest/html/stats_to_htmlcards.py
@@ -0,0 +1,113 @@
+import sys, csv, json, os, re
+
+def emit_header():
+ str_s = ''
+ str_s += 'Location:
'
+ str_s += 'Amsterdam
\n'
+ str_s += '
\n'
+ str_s += 'User-Agent:
'
+ str_s += "Mozilla/5.0 (Windows NT 6.1; WOW64)
AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/43.0.2357.132 Safari/537.36 PTST/221\n"
+ str_s += '
\n'
+ return str_s
+
+
+def emit_name(name, date, time):
+ str_s = '' + name + '
\n'
+ str_s += '' + date + '
\n'
+ str_s += '' + time + '
\n'
+ return str_s
+
+def string_format_percentage(pct):
+ v = int(pct * 100)
+ return str(v)
+
+
+def emit_table_row(elem, index, total):
+ return '' + '| ' + index +': ' + ' | '+ str(int(elem[index])) + ' | ' + string_format_percentage(elem[index] / total) + '%' + ' |
\n'
+
+def emit_size(size):
+ total = size['widget'] + size['ad'] + size['privacy'] + size['-'] + size['analytics'] + size['tracker']
+ if total == 0:
+ total = 1
+ str_s = ''
+ str_s += '
Objects Size (bytes)
\n'
+ str_s += '
\n'
+ str_s += '
\n'
+ str_s += emit_table_row(size, 'ad', total).replace('ad', 'ads')
+ str_s += emit_table_row(size, 'analytics', total)
+ str_s += emit_table_row(size, 'tracker', total).replace('tracker', 'trackers')
+ str_s += emit_table_row(size, 'widget', total).replace('widget', 'widgets')
+ str_s += emit_table_row(size, '-', total).replace('-', 'other')
+ str_s += '
\n'
+ str_s += '
\n'
+ str_s += '
\n'
+ return str_s
+
+def emit_item(item):
+ total = item['total']
+ total_junk = item['widget'] + item['ad'] + item['privacy'] + item['analytics'] + item['tracker']
+ if total == 0:
+ total = 1
+ str_s = ''
+ str_s += '
Page Http Request Elements
\n'
+ str_s += '
\n'
+ str_s += '
\n'
+ str_s += emit_table_row(item, 'ad', total).replace('ad', 'ads')
+ str_s += emit_table_row(item, 'analytics', total)
+ str_s += emit_table_row(item, 'tracker', total).replace('tracker', 'trackers')
+ str_s += emit_table_row(item, 'widget', total).replace('widget', 'widgets')
+ str_s += emit_table_row(item, '-', total).replace('-', 'other')
+ str_s += '
\n'
+ str_s += '
\n'
+ str_s += '
\n'
+ return str_s
+
+def emit_time(time):
+ total = time['widget'] + time['ad'] + time['privacy'] + time['-'] + time['analytics'] + time['tracker']
+ if total == 0:
+ total = 1
+ str_s = ''
+ str_s += '
(Micro) Timing (ms)
\n'
+ str_s += '
\n'
+ str_s += '
\n'
+ str_s += emit_table_row(time, 'ad', total).replace('ad', 'ads')
+ str_s += emit_table_row(time, 'analytics', total)
+ str_s += emit_table_row(time, 'tracker', total).replace('tracker', 'trackers')
+ str_s += emit_table_row(time, 'widget', total).replace('widget', 'widgets')
+ str_s += emit_table_row(time, '-', total).replace('-', 'other')
+ str_s += '
\n'
+ str_s += '
\n'
+ str_s += '
\n'
+ return str_s
+
+if __name__ == '__main__':
+
+ fp = sys.stdin
+ try:
+ stats = json.loads(fp.read())
+ except Exception, ee:
+ sys.exit('Error loading data... Aborting.')
+
+ try:
+ template = open(os.path.join('.', 'index_template.html'), 'r+');
+ except:
+ print('error opening template file. aborting...');
+ sys.exit(0);
+
+ content = ""
+ content += emit_header()
+ for e in stats:
+ card = "\n"
+ stats = e['stats']
+ card += "\t\t\t" + emit_name(stats['host'], stats['date'], stats['time'])
+ card += "\t\t\t" + emit_item(stats['items'])
+ card += "\t\t\t" + emit_size(stats['sizes'])
+ card += "\t\t\t" + emit_time(stats['times'])
+ card += "
\n"
+ content += card
+
+ html = template.read().replace('[[content]]', content);
+
+ print html
+
+
diff --git a/webpagetest/wpt_csv_filter.py b/webpagetest/wpt_csv_filter.py
index ac580d5..7871724 100755
--- a/webpagetest/wpt_csv_filter.py
+++ b/webpagetest/wpt_csv_filter.py
@@ -1,13 +1,14 @@
#!/usr/bin/python2.7
-import sys, csv, json, os
+import sys, csv, json, os, re
from optparse import OptionParser
# list of fileds from the wpt csv file to keep
csv_fields = [
'Date',
'Time',
-'tracker_type',
+'bug_type',
+'bug_name',
'Sequence Number',
'Host',
'IP Address',
@@ -40,14 +41,24 @@ csv_fields = [
'Initiator Line',
'Expires',
'Cached',
-'Cookie Count(out)',
+'Cookie Count(out)'
]
-def filter_fields(wpt_row, type):
+# matches a given url to all possible bugs
+def match(url, bugs):
+ for b in bugs:
+ pattern = re.compile(b['pattern'])
+ if(pattern.search(url)):
+ return {'name': b['name'], 'type': b['type'], }
+ return None
+
+# filters the csv file (deleted some columns) and adds the 'tracker type'
+def filter_fields(wpt_row, type, name):
for k in wpt_row.keys():
if k not in csv_fields:
del wpt_row[k]
- wpt_row['tracker_type'] = type
+ wpt_row['bug_type'] = type
+ wpt_row['bug_name'] = name
def run(options):
@@ -72,8 +83,6 @@ def run(options):
except Exception, ee:
sys.exit('Error loading bugs data... Aborting.')
- stats = {'total' : 0.0, 'ads': 0.0, 'trackers': 0.0, 'analytics': 0.0, 'widgets': 0.0, 'privacy': 0.0, 'blank': 0.0}
-
#write ouput
fname, ext = os.path.splitext(os.path.basename(csv_file))
@@ -87,59 +96,24 @@ def run(options):
writer.writeheader()
last_seq = 0
+
for r in wpt_data:
seq = int(r['Sequence Number'])
if seq < last_seq:
break
last_seq = seq
- stats['total'] += 1
- host = r['Host']
- if any(a in host for a in bugs_data['ads']):
-# print "ads: " + host
- filter_fields(r, 'ad')
- writer.writerow(r);
- stats['ads'] += 1
- continue
- if any(a in host for a in bugs_data['trackers']):
-# print "trackers: " + host
- filter_fields(r, 'tracker')
- writer.writerow(r);
- stats['trackers'] += 1
- continue
- if any(a in host for a in bugs_data['analytics']):
-# print "analytics: " + host
- filter_fields(r, 'analytics')
- writer.writerow(r);
- stats['analytics'] += 1
- continue
- if any(a in host for a in bugs_data['widgets']):
-# print "widgets: " + host
- filter_fields(r, 'widget')
- writer.writerow(r);
- stats['widgets'] += 1
- continue
- if any(a in host for a in bugs_data['privacy']):
-# print "privacy: " + host
- filter_fields(r, 'privacy')
- writer.writerow(r);
- stats['privacy'] += 1
- continue
- if options.keep:
- stats['blank'] += 1
- filter_fields(r, '-')
- writer.writerow(r);
+ print str(seq)
- if options.stats:
- print "----- Stats: " + fname + ext + " -----"
- print "total (elements): " + str(stats['total'])
- print "ads: " + str(stats['ads']) + ' - ' + str(stats['ads'] / stats['total']) + '%'
- print "trackers: " + str(stats['trackers']) + ' - ' + str(stats['trackers'] / stats['total']) + '%'
- print "analytics: " + str(stats['analytics']) + ' - ' + str(stats['analytics'] / stats['total']) + '%'
- print "widgets: " + str(stats['widgets']) + ' - ' + str(stats['widgets'] / stats['total']) + '%'
- print "privacy: " + str(stats['privacy']) + ' - ' + str(stats['privacy'] / stats['total']) + '%'
- print "..............."
- print "* JUNK RATIO * " + str((stats['ads'] + stats['trackers'] + stats['analytics'] + stats['widgets'] + stats['privacy']) / stats['total']) + '%'
+ url = r['Host'] + r['URL']
+ bug = match(url, bugs_data['bugs'])
+ if bug:
+ filter_fields(r, bug['type'], bug['name'])
+ writer.writerow(r);
+ else:
+ if options.keep:
+ filter_fields(r, '-', '-')
+ writer.writerow(r);
if __name__ == '__main__':
@@ -147,7 +121,6 @@ if __name__ == '__main__':
p.add_option('-f', '--file', action="store", help="wpt csv input file")
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
- p.add_option('-s', '--stats', action="store_true", help="prints basic stats")
p.add_option('-o', '--outputdir', action="store", help="output directory", default="")
diff --git a/webpagetest/wpt_csv_filter_batch.py b/webpagetest/wpt_csv_filter_batch.py
index 91f3433..489f091 100755
--- a/webpagetest/wpt_csv_filter_batch.py
+++ b/webpagetest/wpt_csv_filter_batch.py
@@ -1,6 +1,6 @@
#!/usr/bin/python2.7
-import sys, csv, json, os
+import sys, csv, json, os, time
from optparse import OptionParser
import wpt_csv_filter as wptf
@@ -30,11 +30,15 @@ def run(options):
csv_files.append(os.path.join(dirpath, fn))
break
+ i = 0
for f in csv_files:
+ i += 1
options.file = f
- print "processing - " + f
+ print str(i) + "/" + str(len(csv_files)) + " - " + f
+ start_time = time.time()
wptf.run(options)
- print ".......done........"
+ duration = time.time() - start_time
+ print "done - " + time.strftime('%H:%M:%S', time.gmtime(duration))
if __name__ == '__main__':
@@ -42,7 +46,6 @@ if __name__ == '__main__':
p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)")
p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file")
p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element")
- p.add_option('-s', '--stats', action="store_true", help="prints basic stats")
p.add_option('-o', '--outputdir', action="store", help="output directory (where all the filtered csv files will be placed)", default="")
options, args = p.parse_args()
diff --git a/webpagetest/wpt_csv_stats.py b/webpagetest/wpt_csv_stats.py
new file mode 100644
index 0000000..fb3cbe0
--- /dev/null
+++ b/webpagetest/wpt_csv_stats.py
@@ -0,0 +1,47 @@
+import sys, csv, json, os, re
+from optparse import OptionParser
+
+def run(csv_file_path):
+
+ if not os.path.exists(csv_file_path):
+ sys.exit('Input file does not exists. Aborting.')
+
+ stats = {
+ 'date' : None,
+ 'time' : None,
+ 'host' : None,
+ 'items' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0, 'total' : 0.0},
+ 'sizes' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0},
+ 'times' : {'ad': 0.0, 'tracker': 0.0, 'analytics': 0.0, 'widget': 0.0, 'privacy': 0.0, '-': 0.0}
+ }
+
+ with open(csv_file_path) as csv_file:
+ wpt_data = csv.DictReader(csv_file)
+ items = stats['items']
+ sizes = stats['sizes']
+ times = stats['times']
+ for d in wpt_data:
+ if not stats['host']:
+ stats['host'] = d['Host'] # first line
+ stats['date'] = d['Date'] # first line
+ stats['time'] = d['Time'] # first line
+ items['total'] += 1
+ bug_type = d['bug_type']
+ items[bug_type] += 1
+ sizes[bug_type] += int(d['Object Size'])
+ times[bug_type] += int(d['Time to Load (ms)'])
+
+ return stats
+
+if __name__ == '__main__':
+
+ p = OptionParser();
+ p.add_option('-f', '--file', action="store", help="wpt csv input file")
+
+ options, args = p.parse_args()
+
+ if not options.file:
+ sys.exit('No wpt csv input file specified. Aborting.')
+
+ stats = run(options.file)
+ print json.dumps(stats, indent=2, separators=(',',':'))
diff --git a/webpagetest/wpt_csv_stats_batch.py b/webpagetest/wpt_csv_stats_batch.py
new file mode 100644
index 0000000..c140547
--- /dev/null
+++ b/webpagetest/wpt_csv_stats_batch.py
@@ -0,0 +1,34 @@
+import sys, csv, json, os, re
+from optparse import OptionParser
+import wpt_csv_stats
+
+def run(input_dir):
+
+ if not os.path.exists(input_dir):
+ sys.exit('Input directory does not exists. Aborting.')
+
+ stats = []
+
+ csv_files = []
+ for (dirpath, dirnames, filenames) in os.walk(input_dir):
+ for fn in filenames:
+ fname, ext = os.path.splitext(fn)
+ if ext == '.csv':
+ filepath = os.path.join(dirpath, fn)
+ stats.append({'name' : fname, 'stats': wpt_csv_stats.run(filepath)})
+ break
+
+ return stats
+
+if __name__ == '__main__':
+
+ p = OptionParser();
+ p.add_option('-i', '--inputdir', action="store", help="input directory (where all the wpt csv files reside)")
+
+ options, args = p.parse_args()
+
+ if not options.inputdir:
+ sys.exit('No input directory specified. Aborting.')
+
+ stats = run(options.inputdir)
+ print json.dumps(stats, indent=2, separators=(',',':'))