commit 0f9a067c9195bfdef2fefd5ecc0e3846493f3ad5 Author: gauthiier Date: Wed Jul 8 09:05:51 2015 +0200 HAHA! commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..50ddd66 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Stroe +*.csv +bugs.json \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..7eca423 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +###Micro-Temporalities of the Web \ No newline at end of file diff --git a/firebug-netexp/firefox_ext/fireStarter-0.1.a3.xpi b/firebug-netexp/firefox_ext/fireStarter-0.1.a3.xpi new file mode 100644 index 0000000..9231b21 Binary files /dev/null and b/firebug-netexp/firefox_ext/fireStarter-0.1.a3.xpi differ diff --git a/firebug-netexp/firefox_ext/fireStarter-0.1a6.xpi b/firebug-netexp/firefox_ext/fireStarter-0.1a6.xpi new file mode 100644 index 0000000..2844281 Binary files /dev/null and b/firebug-netexp/firefox_ext/fireStarter-0.1a6.xpi differ diff --git a/firebug-netexp/firefox_ext/firebug-1.12.8.xpi b/firebug-netexp/firefox_ext/firebug-1.12.8.xpi new file mode 100644 index 0000000..a2b23b1 Binary files /dev/null and b/firebug-netexp/firefox_ext/firebug-1.12.8.xpi differ diff --git a/firebug-netexp/firefox_ext/firebug-1.9.2.xpi b/firebug-netexp/firefox_ext/firebug-1.9.2.xpi new file mode 100644 index 0000000..39294c5 Binary files /dev/null and b/firebug-netexp/firefox_ext/firebug-1.9.2.xpi differ diff --git a/firebug-netexp/firefox_ext/firebug-2.0.11-fx.xpi b/firebug-netexp/firefox_ext/firebug-2.0.11-fx.xpi new file mode 100644 index 0000000..ef532d0 Binary files /dev/null and b/firebug-netexp/firefox_ext/firebug-2.0.11-fx.xpi differ diff --git a/firebug-netexp/firefox_ext/netExport-0.8b10.xpi b/firebug-netexp/firefox_ext/netExport-0.8b10.xpi new file mode 100644 index 0000000..f8b0f00 Binary files /dev/null and b/firebug-netexp/firefox_ext/netExport-0.8b10.xpi differ diff --git a/firebug-netexp/firefox_ext/netExport-0.9b7.xpi b/firebug-netexp/firefox_ext/netExport-0.9b7.xpi new file mode 100644 index 0000000..6b309ce Binary files /dev/null and b/firebug-netexp/firefox_ext/netExport-0.9b7.xpi differ diff --git a/firebug-netexp/har_parser.py b/firebug-netexp/har_parser.py new file mode 100644 index 0000000..e10eda2 --- /dev/null +++ b/firebug-netexp/har_parser.py @@ -0,0 +1,15 @@ +import sys, json + +if __name__ == '__main__': + + fp = sys.stdin + try: + sdata = fp.read() + data = json.loads(sdata) + except Exception, ee: + sys.exit('Error loading data... Aborting.') + + entries = data['log']['entries'] + for e in entries: + req = e['request'] + print req['url'] \ No newline at end of file diff --git a/firebug-netexp/netexp.py b/firebug-netexp/netexp.py new file mode 100644 index 0000000..720ca92 --- /dev/null +++ b/firebug-netexp/netexp.py @@ -0,0 +1,61 @@ +from selenium import webdriver +import sys, json, urllib2, time, os + +firefox_ext = "/firefox_ext/" +firebug_ext = "firebug-1.12.8.xpi" +firestarter_ext = "fireStarter-0.1a6.xpi" +netexport_ext = "netExport-0.9b7.xpi" + +def config(): + + global firefox_ext; + config = webdriver.firefox.firefox_profile.FirefoxProfile(); + + firefox_ext = os.getcwd() + firefox_ext; + config.add_extension(firefox_ext + firebug_ext); + config.add_extension(firefox_ext + firestarter_ext); + config.add_extension(firefox_ext + netexport_ext); + + config.set_preference("app.update.enabled", False); + config.native_events_enabled = True + #config.set_preference("webdriver.log.file", "log_webdriver.txt"); + + ext_firebug = "extensions.firebug."; + + config.set_preference(ext_firebug + "currentVersion", "1.12.8"); + config.set_preference(ext_firebug + "allPagesActivation", "on"); + config.set_preference(ext_firebug + "defaultPanelName", "net"); + config.set_preference(ext_firebug + "net.enableSites", True); + config.set_preference(ext_firebug + "addonBarOpened", True); + config.set_preference(ext_firebug + "consoles.enableSite", True); + config.set_preference(ext_firebug + "console.enableSites", True); + config.set_preference(ext_firebug + "script.enableSites", True); + config.set_preference(ext_firebug + "net.enableSites", True); + config.set_preference(ext_firebug + "onByDefault", True); + config.set_preference(ext_firebug + "DBG_STARTER", True); + + config.set_preference(ext_firebug + "netexport.alwaysEnableAutoExport", True); + config.set_preference(ext_firebug + "netexport.autoExportToFile", True); + config.set_preference(ext_firebug + "netexport.saveFiles", True); + config.set_preference(ext_firebug + "netexport.showPreview", False); + config.set_preference(ext_firebug + "netexport.defaultLogDir", os.getcwd()); + config.set_preference(ext_firebug + "netexport.pageLoadedTimeout", 20000); + config.set_preference(ext_firebug + "netexport.timeout", 25000); + + return config; + + +if __name__ == '__main__': + + conf = config(); + driver = webdriver.Firefox(conf); + + time.sleep(7); + + driver.get("http://www.nytimes.com"); + + time.sleep(20); + + driver.close(); + + diff --git a/ghost-bugs/bugs_hosts.json b/ghost-bugs/bugs_hosts.json new file mode 100644 index 0000000..981ef9f --- /dev/null +++ b/ghost-bugs/bugs_hosts.json @@ -0,0 +1,1254 @@ +{ + "widgets":[ + "afy11.net", + "cpxinteractive.com", + "bigfineads.com", + "brealtime.com", + "cpxadroit.com", + "clicktale.net", + "ozonemedia.com", + "adorika.com", + "adorika.net", + "addyon.com", + "adtlgc.com", + "spring-tns.net", + "tns-counter.ru", + "tns-cs.net", + "statistik-gallup.net", + "sesamestats.com", + "tns-gallup.dk", + "research-int.se", + "clicmanager.fr", + "geovisite.com", + "scr.kliksaya.com", + "juicyads.com", + "intentmedia.net", + "chaordicsystems.com", + "tbn.ru", + "btg.mtvnservices.com", + "medleyads.com", + "myroitracking.com", + "adjug.com", + "oewabox.at", + "admulti.com", + "rollad.ru", + "reduxmediagroup.com", + "reduxmedia.com", + "revolvermaps.com", + "bizsolutions.strands.com", + "sail-horizon.com", + "cdn.sailthru.com", + "errorception.com", + "scout.scoutanalytics.net", + "nspmotion.com", + "dmmotion.com", + "visualrevenue.com", + "taboolasyndication.com", + "taboola.com", + "widgets.outbrain.com", + "ad.yieldads.com", + "counter.goingup.com", + "criteo.com", + "criteo.net", + "adbull.com", + "adcloud.net", + "magna.ru", + "smartbn.ru", + "pswec.com", + "inviziads.com", + "m2pub.com", + "adsmarket.com", + "ads.crakmedia.com", + "craktraffic.com", + "audienceiq.com", + "techlightenment.com", + "go.sonobi.com", + "ads.sonobi.com", + "theblogfrog.com", + "peerius.com", + "clicktale.pantherssl.com", + "clicktalecdn.sslcs.cdngc.net", + "trackalyzer.com", + "formalyzer.com", + "visitortracklog.com", + "rtb.strikead.com", + "adserverpub.com", + "ads.albawaba.com", + "mm.admob.com", + "mmv.admob.com", + "p.admob.com", + "a.admob.com", + "vidigital.ru", + "adverticum.net", + "optimize.webtrends.com", + "m.webtrends.com", + "simplereach.com", + "d8rk54i4mohrb.cloudfront.net", + "everestjs.net", + "everesttech.net", + "adultadworld.com", + "adworldmedia.com", + "gunggo.com", + "reviews.bazaarvoice.com", + "ugc.bazaarvoice.com", + "affiliate.godaddy.com", + "gwa.reedbusiness.net", + "du8783wkf05yr.cloudfront.net", + "dynamicoxygen.com", + "evisitanalyst.com", + "evisitcs.com", + "websiteperform.com", + "affimax.de", + "rvty.net", + "d9lq0o81skkdj.cloudfront.net", + "esm1.net", + "mythings.com", + "mathtag.com", + "mathads.com", + "batanga.com", + "impresionesweb.com", + "i.total-media.net", + "waterfrontmedia.com", + "trackset.it", + "ad.tlvmedia.com", + "ads.tlvmedia.com", + "tag.tlvmedia.com", + "xplosion.de", + "veruta.com", + "yashi.com", + "infinity-tracking.net", + "ad.targetingmarketplace.com", + "baynote.net", + "adgear.com", + "adgrx.com", + "adaction.se", + "de17a.com", + "nuggad.net", + "buzzbytes.net", + "complex.com", + "adultfriendfinder.com", + "getiton.com", + "pop6.com", + "widgets.digg.com", + "adgoto.com", + "webtraffic.se", + "webtraffic.no", + "allyes.com", + "ads.pheedo.com", + "tradetracker.net", + "ads.sexinyourcity.com", + "zerezas.com", + "xeontopa.com", + "doublepimp.com", + "redcourtside.com", + "affiliatelounge.com", + "connect.ok.ru", + "adman.gr", + "st.listrak.com", + "ad.360yield.com", + "rsspump.com", + "kavanga.ru", + "innity.com", + "innity.net", + "adshuffle.com", + "directadvert.ru", + "wigetmedia.com", + "3c45d848d99.se", + "springmetrics.com", + "atemda.com", + "counters.gigya.com", + "c.gigcount.com", + "servedby.precisionclick.com", + "geo.precisionclick.com", + "thewheelof.com", + "valuedopinions.co.uk", + "gopjn.com", + "pjatr.com", + "pjtra.com", + "pntra.com", + "pntrac.com", + "pntrs.com", + "cn01.dwstat.cn", + "ad.rambler.ru", + "ad2.rambler.ru", + "counter.rambler.ru", + "top100-images.rambler.ru", + "apps.facebook.com", + "meteorsolutions.com", + "ads.foodieblogroll.com", + "widget.foodieblogroll.com", + "adserver.freenet.de", + "analytics.unister-gmbh.de", + "sitescout.com", + "cpmstar.com", + "c8.net.ua", + "casalemedia.com", + "ads.blogherads.com", + "smartcontext.pl", + "userzoom.com", + "vizu.com", + "googletagmanager.com", + "onlinewebstat.com", + "onlinewebstats.com", + "connexity.net", + "cxt.ms", + "openadex.dk", + "technoratimedia.com", + "log.feedjit.com", + "clickexperts.net", + "awin1.com", + "perfb.com", + "double.net", + "eadv.it", + "etargetnet.com", + "kavijaseuranta.fi", + "w55c.net", + "ads.referlocal.com", + "cognitivematch.com", + "cmmeglobal.com", + "js.admeld.com", + "tag.admeld.com", + "activemeter.com" + ], + "analytics":[ + "audit.median.hu", + "zedo.com", + "ad.adlegend.com", + "xtendmedia.com", + "comclick.com", + "nxtck.com", + "ad.yieldmanager.net", + "crosspixel.net", + "webstats.motigo.com", + "webprospector.de", + "cnt.sup.com", + "creatives.livejasmin.com", + "awempire.com", + "amgdgt.com", + "adrdgt.com", + "ad.reklamport.com", + "maxlab.ru", + "recreativ.ru", + "loveadvert.ru", + "trafficfacts.com", + "en25.com", + "eloqua.com", + "data.cmcore.com", + "coremetrics.com", + "coremetrics.eu", + "npario-inc.net", + "enectoanalytics.com", + "trk.enecto.com", + "dz.glanceguide.com", + "rubiconproject.com", + "e-kolay.net", + "tr.webantenna.info", + "clixmetrix.com", + "rover.ebay.com", + "unica.com", + "roitesting.com", + "mixpanel.com", + "rsvpgenius.com", + "underclick.ru", + "extreme-dm.com", + "marketo.com", + "marketo.net", + "mktoresp.com", + "intermarkets.net", + "hypeads.org", + "srv.clickfuse.com", + "pardot.com", + "vivistats.com", + "metricsdirect.com", + "ezakus.net", + "adsymptotic.com", + "adserver.teracent.net", + "int.teracent.net", + "monetate.net", + "app.hubspot.com", + "piximedia.com", + "b2bcontext.ru", + "simpleadserver.net", + "simpleadserver.org", + "plugrush.com", + "trafficbroker.com", + "ctasnet.com", + "mybloglog.com", + "specificclick.net", + "ic-live.com", + "exitjunction.com", + "olark.com", + "media.richrelevance.com", + "recs.richrelevance.com", + "counter.yadro.ru", + "navegg.com", + "navdmp.com", + "ad4mat.ar", + "ad4mat.at", + "ad4mat.be", + "ad4mat.bg", + "ad4mat.br", + "ad4mat.ch", + "ad4mat.co.uk", + "ad4mat.cz", + "ad4mat.de", + "ad4mat.dk", + "ad4mat.es", + "ad4mat.fi", + "ad4mat.fr", + "ad4mat.gr", + "ad4mat.hu", + "ad4mat.it", + "ad4mat.mx", + "ad4mat.net", + "ad4mat.nl", + "ad4mat.no", + "ad4mat.pl", + "ad4mat.ro", + "ad4mat.ru", + "ad4mat.se", + "ad4mat.tr", + "justrelevant.com", + "unrulymedia.com", + "redintelligence.net", + "customerconversio.com", + "adblade.com", + "ib-ibi.com", + "clickmanage.com", + "link.mercent.com", + "spruce.rapleaf.com", + "rlcdn.com", + "zemanta.com", + "a.akncdn.com", + "sokrati.com", + "polyad.net", + "atwola.com", + "ads.affbuzzads.com", + "content.pulse360.com", + "track.pulse360.com", + "hit.clickaider.com", + "openx.org", + "openx.net", + "static.crowdscience.com", + "fmpub.net", + "sniff.visistat.com", + "stats.visistat.com", + "a.ucoz.net", + "servedby.adxpose.com", + "ads.adxpose.com", + "event.adxpose.com", + "adsafeprotected.com", + "s.shopify.com", + "stats.shopify.com", + "ad.adserverplus.com", + "cetrk.com", + "js.geoads.com", + "widget.dihitt.com.br", + "admedia.com", + "aweber.com", + "adfeedstrk.com", + "adcde.com", + "addlvr.com", + "adtrgt.com", + "bannertgt.com", + "cptgt.com", + "cpvfeed.com", + "cpvtgt.com", + "mygeek.com", + "popcde.com", + "sdfje.com", + "urtbk.com", + "ads.saymedia.com", + "adsummos.net", + "choicestream.com", + "admaya.in", + "stats.vertriebsassistent.de", + "tqlkg.com", + "analytics.matchbin.com", + "adnwb.ru", + "adonweb.ru", + "actionpay.ru", + "ensighten.com", + "levexis.com", + "impression.clickinc.com", + "ca.clickinc.com", + "tacoda.net", + "doubleclick.net", + "g.doubleclick.net", + "adready.com", + "gmads.net", + "grmtech.net", + "pixazza.com", + "qnsr.com", + "quinstreet.com", + "webmasterplan.com", + "userneeds.dk", + "optorb.com", + "go.cpmadvisors.com", + "intermundomedia.com", + "data.gosquared.com", + "stat.onestat.com", + "bmmetrix.com", + "japanmetrix.jp", + "service.collarity.com", + "adohana.com", + "payclick.it", + "pubdirecte.com", + "dw.com.com", + "adlog.com.com", + "virgul.com", + "api.conduit.com", + "apps.conduit.com", + "peer39.net", + "eproof.com", + "ov.yahoo.co.jp", + "nuffnang.com", + "ad.adverteerdirect.nl", + "conduit-banners.com", + "conduit-data.com", + "kanoodle.com", + "stags.bluekai.com", + "tags.bluekai.com", + "svlu.net", + "shinobi.jp", + "donburako.com", + "cho-chin.com", + "hishaku.com", + "plista.com", + "yldbt.com", + "yb0t.com", + "revsci.net", + "xiti.com", + "ati-host.net" + ], + "privacy":[ + "dmtry.com", + "trackedlink.net", + "adyard.de", + "livepass.conviva.com", + "livepassdl.conviva.com", + "liverail.com", + "powerlinks.com", + "data.resultlinks.com", + "trafficjunky.net" + ], + "ads":[ + "apex-ad.com", + "liveperson.net", + "lpsnmedia.net", + "crwdcntrl.net", + "acuityplatform.com", + "icstats.nl", + "revenuemantra.com", + "yieldbuild.com", + "ligatus.com", + "ligatus.de", + "ebz.io", + "ebuzzing.com", + "beead.net", + "beead.fr", + "beead.co.uk", + "videoclick.ru", + "videoclik.ru", + "adoperator.com", + "newstogram.com", + "adroll.com", + "banner-rotation.com", + "luminate.com", + "cnzz.com", + "exoclick.com", + "httpool.com", + "toboads.com", + "gestionpub.com", + "eplayer.clipsyndicate.com", + "tongji.linezing.com", + "pixel.adbuyer.com", + "adadvisor.net", + "mookie1.com", + "webtraxs.com", + "stats.businessol.com", + "branica.com", + "convertglobal.com", + "dev.visualwebsiteoptimizer.com", + "ad.yieldlab.net", + "adversalservers.com", + "go.adversal.com", + "adversaldisplay.com", + "admaster.net", + "turn.com", + "trafficmp.com", + "nexac.com", + "interclick.com", + "fimserve.com", + "exelator.com", + "pro-market.net", + "adnxs.com", + "adnxs.net", + "adingo.jp", + "cosmi.io", + "iadvize.com", + "lengow.com", + "widgets.getglue.com", + "tailsweep.com", + "goodadvert.ru", + "unanimis.co.uk", + "adimg.net", + "fout.jp", + "bbelements.com", + "bbmedia.cz", + "mediainter.net", + "tracking.quisma.com", + "qservz.com", + "www.is1.clixgalore.com", + "pulsemgr.com", + "sociomantic.com", + "brainient.com", + "adeasy.ru", + "rmbn.ru", + "cnstats.ru", + "yabuka.com", + "mmismm.com", + "imiclk.com", + "abmr.net", + "pixel.fetchback.com", + "gw-services.vtrenz.net", + "edge.jeetyetmedia.com", + "contentabc.com", + "rd.clickshift.com", + "beacons.hottraffic.nl", + "bannerconnect.net", + "nrelate.com", + "up.nytimes.com", + "app.salecycle.com", + "d16fk4ms6rqz1v.cloudfront.net", + "d15qhc0lu1ghnk.cloudfront.net", + "featurelink.com", + "ads.undertone.com", + "hotwords.com", + "netscope.data.marktest.pt", + "advertise.com", + "am10.ru", + "am15.net", + "web-stat.com", + "beacon.clickequations.net", + "js.clickequations.net", + "brcdn.com", + "brsrvr.com", + "brtstats.com", + "gostats.com", + "ad.clovenetwork.com", + "ads.clovenetwork.com", + "inspectlet.com", + "gameleads.ru", + "cityads.ru", + "ipromote.com", + "admitad.com", + "inq.com", + "adtegrity.net", + "adtpix.com", + "trafficfactory.biz", + "adsbookie.com", + "advivi.com", + "dothads.com", + "promoserv.com", + "zeusclicks.com", + "yazcash.com", + "maist.jp", + "ad-move.jp", + "clicktracks.com", + "adcash.com", + "vmmpxl.com", + "analytics.brightedge.com", + "way2traffic.com", + "alenty.com", + "supercounters.com", + "wikia-beacon.com", + "tidaltv.com", + "korrelate.net", + "cleanrm.net", + "adn.fusionads.net", + "richmetrics.com", + "d3q6px0y2suh5n.cloudfront.net", + "rich-agent.s3.amazonaws.com", + "image.providesupport.com", + "secure.providesupport.com", + "clkads.com", + "clkrev.com", + "clkmon.com", + "adswizz.com", + "cdn.topsy.com", + "otter.topsy.com", + "w3roi.com", + "hit.gemius.pl", + "serving-sys.com", + "d1l6p2sc9645hc.cloudfront.net", + "opentracker.net", + "img.footprintlive.com", + "script.footprintlive.com", + "netshelter.net", + "iesnare.com", + "c3tag.com", + "webspectator.com", + "powermarketing.com", + "realtime.co", + "media6degrees.com", + "blogads.com", + "cubics.com", + "bidsystem.com", + "adkengage.com", + "adknowledge.com", + "meaningtool.com", + "adfrontiers.com", + "toolbar.cdn.gigya.com", + "tracker.euroweb.net", + "lduhtrp.net", + "andomedia.com", + "tritondigital.com", + "254a.com", + "webeffective.keynote.com", + "fhserve.com", + "btrll.com", + "roi.vertical-leap.co.uk", + "a2dfp.net", + "c.p-advg.com", + "durasite.net", + "adonion.com", + "advg.jp", + "carbonads.com", + "ad.adfunky.com", + "apps.facebook.com", + "ad.clickotmedia.com", + "adition.com", + "agkn.com", + "centraliprom.com", + "stats.wordpress.com", + "analytics.yahoo.com", + "s.yjtag.jp", + "doubleverify.com", + "ivwbox.de", + "ioam.de", + "ctpsnet.com", + "ctnsnet.com", + "brand-server.com", + "synovite-scripts.com", + "svtrd.com", + "creafi-online-media.com", + "ctnetwork.hu", + "xyztraffic.com", + "intelliad.com", + "intelliad.de", + "adsrvr.org", + "sp1.convertro.com", + "content.ad", + "adaos-ads.net", + "raasnet.com", + "webhelpje.nl", + "webhelpje.be", + "shuttle.sharexy.com", + "adbrite.com", + "dnhgz729v27ca.cloudfront.net", + "wowanalytics.co.uk", + "amazon-adsystem.com", + "t.p.mybuys.com", + "projectwonderful.com", + "voicefive.com", + "traffichaus.com", + "ad.bnmla.com", + "wwa.wipe.de", + "ambientplatform.vn", + "twyn.com", + "heias.com", + "radarurl.com", + "clarityray.com", + "djers.com", + "dutrus.com", + "eurts.com", + "fanefo.com", + "hfunt.com", + "hfutz.com", + "hinsm.com", + "japum.com", + "jhame.com", + "jyaby.com", + "jyawd.com", + "kwobj.com", + "kyarm.com", + "lbein.com", + "ocyss.com", + "orpae.com", + "owpas.com", + "psyng.com", + "pturt.com", + "wredint.com", + "banzaiadv.it", + "ads.adpv.com", + "copacet.com", + "flagcounter.com", + "typekit.net", + "typekit.com", + "777seo.com", + "ptp22.com", + "ptp33.com", + "rt.liftdna.com", + "roia.biz", + "semasio.net", + "gamerdna.com", + "brandreachsys.com", + "po.st", + "optimost.com", + "adviva.net", + "adocean.pl", + "hitbox.com", + "tracking.conversionlab.it", + "ibpxl.com", + "reklamstore.com", + "indieclick.com", + "scribol.com", + "ru4.com", + "ad.xplusone.com", + "dn3y71tq7jf07.cloudfront.net", + "vlog.leadformix.com", + "prosperent.com", + "ads.adwitserver.com", + "ads.dedicatedmedia.com", + "betrad.com", + "ads.mofos.com", + "lfstmedia.com", + "12mlbe.com", + "emjcd.com", + "webads.nl", + "51.la", + "spongecell.com", + "px.steelhousemedia.com", + "l.addthiscdn.com", + "wunderloop.net", + "advertising.com", + "hlserve.com", + "insight.torbit.com", + "qwobl.net", + "api.apptap.com", + "widget.apptap.com", + "ads.admarvel.com", + "admarvel.s3.amazonaws.com", + "sslt.tellapart.com", + "t.tellapart.com", + "punchtab.com", + "track.did-it.com", + "lucidmedia.com", + "ltassrv.com", + "magnetisemedia.com", + "mycounter.ua", + "mycounter.com.ua", + "struq.com", + "xg4ken.com", + "wysistat.com", + "ant.conversive.nl", + "adclickmedia.com", + "eulerian.net", + "free-pagerank.com", + "atoomic.com", + "33across.com", + "searchmarketing.com", + "t.channeladvisor.com", + "mediaforge.com", + "embed.spotify.com", + "expo-max.com", + "b2bvideo.ru", + "rtbidder.net", + "blog.disqus.com", + "disqus.com", + "facebookofsex.com", + "nostringsattached.com", + "cams.com", + "trafficrevenue.net", + "lijit.com", + "shinystat.com", + "shinystat.it", + "adru.net", + "impact-ad.jp", + "5min.com", + "stat.yellowtracker.com", + "mmstat.com", + "caanalytics.com", + "vietad.vn", + "ads.ad4game.com", + "c.bigmir.net", + "technorati.com", + "adsonar.com", + "lzjl.com", + "clicksor.com", + "hatid.com", + "tribalfusion.com", + "webtrendslive.com", + "addthis.com", + "counter.personyze.com", + "reson8.com", + "adman.in.gr", + "listrakbi.com", + "crsspxl.com", + "adreactor.com", + "merchenta.com", + "ad.adnetwork.net", + "sa-as.com", + "mkt912.com", + "mkt922.com", + "mkt51.net", + "mkt941.com", + "hitsprocessor.com", + "vdopia.com", + "cxense.com", + "plus.google.com", + "chrome.google.com/webstore*", + "ts.istrack.com", + "adhitzads.com", + "socialannex.com", + "api.toptenreviews.com", + "servebom.com", + "fls.doubleclick.net", + "smowtion.com", + "tradedoubler.com", + "streamray.com", + "rts.sparkstudios.com", + "zanox.com", + "4dsply.com", + "trklnks.com", + "cdn.engine.adsupply.com", + "tanx.com", + "adsame.com", + "adfusion.com", + "wtp101.com", + "hs.interpolls.com", + "sw.interpolls.com", + "c-on-text.com", + "ad.103092804.com", + "kmdisplay.com", + "successfultogether.co.uk", + "reussissonsensemble.fr", + "statistics.ro", + "keymetric.net", + "webiq-warp.appspot.com", + "webiq-cdn.appspot.com", + "webiqonline.com", + "d5phz18u4wuww.cloudfront.net", + "admicro.vn", + "ad.globe7.com", + "ad.globaltakeoff.net", + "avantlink.com", + "mongoosemetrics.com", + "tracking.feedperfect.com", + "appmetrx.com", + "webclicktracker.com", + "instantservice.com", + "traffiliate.com", + "rovion.com", + "adk2.com", + "cdn.adsrvmedia.com", + "cdn.cdnrl.com", + "data.publishflow.com", + "i2idata.com", + "i2i.jp", + "accesstrade.net", + "mpn-analytics.mokonocdn.com", + "analytics.mpn.mokonocdn.com", + "teljari.is", + "centraltag.com", + "pages.etology.com", + "ftjcfx.com", + "yceml.net", + "weborama.fr", + "adrcdn.com", + "adrcntr.com", + "rutarget.ru", + "adgent007.com", + "ads.shorttail.net", + "dpmsrv.com", + "mediav.com", + "bluecava.com", + "adnet.vn", + "cg-global.maxymiser.com", + "freeonlineusers.com", + "monitus.net", + "track.digitalriver.com", + "directtrack.com", + "onenetworkdirect.net", + "ad.metanetwork.com", + "rt.legolas-media.com", + "hitsniffer.com", + "nedstatbasic.net", + "sedotracker.com", + "widgets.amung.us", + "whos.amung.us", + "histats.com", + "h4k5.com", + "stormiq.com", + "dc-storm.com", + "stormcontainertag.com", + "vdna-assets.com", + "wsod.com", + "rm.yieldmanager.com", + "ad.yieldmanager.com", + "rmxads.com", + "imrworldwide.com", + "tracker.financialcontent.com", + "grapeshot.co.uk", + "gscontxt.net", + "tapad.com", + "refinedads.com", + "affinity.com", + "openxenterprise.com", + "odnxs.net", + "servedbyopenx.com", + "adhands.ru", + "d1qpxk1wfeh8v1.cloudfront.net", + "d1cerpgff739r9.cloudfront.net", + "r.i.ua", + "doug1izaerwt3.cloudfront.net", + "d1n7kk4vfnecsc.cloudfront.net", + "d1991e1bwxgrnr.cloudfront.net", + "mediaplex.com", + "mplxtms.com", + "eclick.vn", + "iogous.com", + "tracer.jp", + "smp.specificmedia.com", + "leads.specificmedia.com", + "adsfac.eu", + "adsfac.us", + "adsfac.sg", + "adsfac.net", + "ads.sixapart.com", + "overture.com", + "adrolays.de", + "hit.stat24.com", + "easyresearch.se", + "getsmartcontent.com", + "js.bigdoor.com", + "gooo.al", + "gooal.herokuapp.com", + "apmebf.com", + "ads.adverline.com", + "ads2.adverline.com", + "surinter.net", + "adnext.fr", + "adserve.shopzilla.com", + "convertglobal.s3.amazonaws.com", + "reporting.singlefeed.com", + "admailtiser.com", + "collserve.com", + "ad.harrenmedianetwork.com", + "ads.networkhm.com", + "akavita.com", + "adform.net", + "adformdsp.net", + "netseer.com", + "ads.newtention.net", + "ads.newtentionassets.net", + "trk.newtention.net", + "w3counter.com", + "dgmatix.com", + "admagnet.net", + "amimg.net", + "netaffiliation.com", + "metaffiliation.com", + "neodatagroup.com", + "pirchio.com", + "adcastplus.net", + "juiceadv.com", + "stat.mystat.hu", + "leiki.com", + "publicidad.net", + "mvb.me", + "jetpackdigital.com", + "hits.convergetrack.com", + "netmng.com", + "adtechus.com", + "adtech.de", + "adpersia.com", + "media.gsimedia.net", + "tealium.hs.llnwd.net", + "tiqcdn.com", + "tealiumiq.com", + "nextstat.com", + "webtrekk.de", + "webtrekk.net", + "webtrekk-asia.net", + "ratevoice.com", + "ads.intergi.com", + "innovid.com", + "omgpm.com", + "sextracker.com", + "clickboothlnk.com", + "clickbooth.com", + "adtoll.com", + "ghmedia.com", + "persianstat.com", + "partner-ads.com", + "ad.advantagemedia.dk", + "online.adservicemedia.dk", + "advertisespace.com", + "twittercounter.com", + "insightexpressai.com", + "vizury.com", + "atomex.net", + "linkwithin.com", + "adtaily.com", + "adtaily.pl", + "ad.prismamediadigital.com", + "rotator.adjuggler.com", + "sophus3.com", + "met.vgwort.de", + "adserver.com.br", + "predicta.net", + "p-td.com", + "cdna.tremormedia.com", + "chango.ca", + "chango.com", + "ppjol.net", + "ppjol.com", + "komoona.com", + "widget.uservoice.com", + "spectate.com", + "adtotal.pl", + "bdv.bidvertiser.com", + "srv.bidvertiser.com", + "pointroll.com", + "chartbeat.com", + "chartbeat.net", + "sharethis.com", + "euroads.no", + "euroads.dk", + "euroads.fi", + "in.bubblestat.com", + "advertstream.com", + "sspcash.com", + "rlcdn.net", + "reachlocal.com", + "reachlocallivechat.com", + "gravity.com", + "grvcdn.com", + "get.mirando.de", + "pixfuture.net", + "sexad.net", + "clickwinks.com", + "evolvemediametrics.com", + "netmining.com", + "contentwidgets.net", + "cbproads.com" + ], + "trackers":[ + "lypn.net", + "intellitxt.com", + "triggit.com", + "whoson.com", + "admp.sanoma.fi", + "analytics.sanoma.fi", + "adsearch.adkontekst.pl", + "clickintext.net", + "veoxa.com", + "analytics.performable.com", + "geoplugin.net", + "nakanohito.jp", + "iprom.net", + "buysellads.com", + "linksynergy.com", + "run.admost.com", + "c-col.com", + "oaserve.com", + "realmediadigital.com", + "realmedia.com", + "247realmedia.com", + "collective-media.net", + "apps.facebook.com", + "lomadee.com", + "t.webtracker.jp", + "adlantis.jp", + "adlooxtracking.com", + "ats.tumri.net", + "visiblemeasures.com", + "viewablemedia.net", + "tags.dashboardad.net", + "adnetwork.pro", + "buzzparadise.com", + "contactatonce.com", + "adotube.com", + "magnify360.com", + "api.demandbase.com", + "leads.demandbase.com", + "msgapp.com", + "moon-ray.com", + "speed-trap.nl", + "ad6media.fr", + "ad6media.es", + "ad6media.co.uk", + "ad6media.com", + "sdscdn.userreport.com", + "sdsbucket.s3.amazonaws.com", + "adhese.net", + "adhese.com", + "adhese.be", + "kissmetrics.com", + "tracker.wordstream.com", + "adagionet.com", + "analytics.live.com", + "e-planning.net", + "btstatic.com", + "s.mousetrace.com", + "liveagentforsalesforce.com", + "salesforceliveagent.com", + "adzerk.net", + "2leep.com", + "nwidget.networkedblogs.com", + "investingchannel.com", + "wahoha.com", + "invitemedia.com", + "emediate.dk", + "emediate.se", + "emediate.eu", + "game-advertising-online.com", + "adperium.com", + "watch.teroti.com", + "ads.creative-serving.com", + "p161.net", + "600z.com", + "adplan-ds.com", + "d1ros97qkrwjf5.cloudfront.net", + "newrelic.com", + "nr-data.net", + "iperceptions.com", + "publicidees.com", + "mconet.biz", + "domdex.net", + "domdex.com", + "hiconversion.com", + "predictiveintent.com", + "atdmt.com", + "adbureau.net", + "spotxchange.com", + "popads.net", + "contextweb.com", + "apps.facebook.com", + "adinsight.eu", + "adinsight.com", + "responsetap.com", + "visit.webhosting.yahoo.com", + "c3metrics.com", + "apps.facebook.com", + "apps.facebook.com", + "apps.facebook.com", + "omtrdc.net", + "optmd.com", + "adriver.ru", + "adnetinteractive.com", + "adnetinteractive.net", + "rsys4.net", + "tracking.godatafeed.com", + "adspaces.ero-advertising.com", + "shop2market.com", + "ads.avazu.net", + "visualdna.com", + "retargeter.com", + "auditude.com", + "engine.influads.com", + "pocketcents.com", + "audiencefuel.com", + "sitestat.com", + "nedstat.com", + "medialand.ru", + "adnet.ru", + "mediagra.com", + "backbeatmedia.com", + "admaxserver.com", + "jump-time.net", + "jumptime.com", + "service.optify.net", + "app.ubertags.com", + "intercom.io", + "intercomcdn.com", + "effectivemeasure.net", + "pages05.net", + "pages02.net", + "pages01.net", + "dssja7qsifeak.cloudfront.net", + "tracking100.com", + "luxup.ru", + "adlabs.ru", + "mixmarket.biz", + "mxpnl.com", + "semilo.com", + "adtraxx.de", + "adclear.net", + "plus.google.com", + "chrome.google.com/webstore*", + "adsbyisocket.com", + "isocket.com", + "zopim.com", + "d3pkntwtp2ukl5.cloudfront.net", + "t.unbounce.com", + "advertserve.com", + "adsremote.scrippsnetworks.com", + "d3rmnwi2tssrfx.cloudfront.net", + "activeconversion.com", + "analytics.clickdimensions.com", + "merchantadvantage.com", + "segment.com", + "segment.io", + "d2dq2ahtl5zl1z.cloudfront.net", + "d47xnnr8b1rki.cloudfront.net", + "channelintelligence.com", + "adscale.de", + "dsa.csdata1.com", + "unister-adservices.com", + "skimresources.com", + "redirectingat.com", + "cdn.krxd.net", + "beacon.krxd.net", + "impressiondesk.com", + "impdesk.com", + "adnetwork.vn", + "adnetwork.net.vn", + "delivery.reklamz.com", + "content.dl-rms.com", + "dlqm.net", + "questionmarket.com", + "adcode.adengage.com", + "conv.adengage.com", + "apps.facebook.com", + "sekindo.com", + "qksz.net", + "ad360.vn", + "verticalnetwork.de", + "dt07.net", + "dt00.net", + "jsc.mgid.com", + "ads.elementodigital.org", + "segments.adap.tv", + "ads.jinkads.com", + "coremotives.com", + "adfox.ru", + "adwolf.ru", + "hitslink.com", + "adspirit.de", + "adspirit.net", + "quintelligence.com", + "engineseeker.com", + "adingo.jp.eimg.jp", + "d1ivexoxmp59q7.cloudfront.net", + "dinclinx.com", + "smartadserver.com", + "sponsorads.de", + "dtmpub.com", + "dotomi.com", + "track.adtraction.com", + "cpmprofit.com", + "arubamediamarketing.it", + "ammadv.it", + "brandaffinity.net", + "displaymarketplace.com", + "platform.linkedin.com", + "in.getclicky.com", + "static.getclicky.com", + "hello.staticstuff.net", + "go.activengage.com", + "turnto.com" + ] +} diff --git a/ghost-bugs/bugs_parser.py b/ghost-bugs/bugs_parser.py new file mode 100644 index 0000000..f6c310b --- /dev/null +++ b/ghost-bugs/bugs_parser.py @@ -0,0 +1,39 @@ +import sys, json + +if __name__ == '__main__': + + fp = sys.stdin + try: + sdata = fp.read() + data = json.loads(sdata) + except Exception, ee: + sys.exit('Error loading data... Aborting.') + + apps = data['apps'] + + result = {} + result['ads'] = [] + result['trackers'] = [] + result['analytics'] = [] + result['widgets'] = [] + result['privacy'] = [] + + + hosts = data['firstPartyExceptions'] + + for key, info in apps.iteritems(): + if key in hosts: + urls = [] + for u in hosts[key]: + if info['cat'] == 'tracker': + result['trackers'].append(u) + elif info['cat'] == 'ad': + result['ads'].append(u) + elif info['cat'] == 'analytics': + result['analytics'].append(u) + elif info['cat'] == 'widget': + result['widgets'].append(u) + elif info['cat'] == 'privacy': + result['privacy'].append(u) + + print json.dumps(result, indent=2, separators=(',',':')) \ No newline at end of file diff --git a/webpagetest/wpt_csv_filter.py b/webpagetest/wpt_csv_filter.py new file mode 100644 index 0000000..b43232d --- /dev/null +++ b/webpagetest/wpt_csv_filter.py @@ -0,0 +1,114 @@ +import sys, csv, json, os +from optparse import OptionParser + +# list of fileds from the wpt csv file to keep +csv_fields = ['Connect Time', 'Time to Load (ms)', 'Time to First Byte (ms)', 'Content Type', 'DNS Time', 'Real Start Time (ms)', 'Full Time to Load (ms)', 'Expires', 'Cached', 'Host', 'DNS Start', 'SSL Time', 'Date', 'SSL Negotiation Start', 'Connect End', 'Initiator', 'Image Total Bytes', 'Start Time (ms)', 'URL', 'Content Encoding', 'Cookie Count(out)', 'Bytes In', 'Initiator Line', 'Bytes Out', 'Descriptor', 'Connect Start', 'Time', 'Action', 'Sequence Number', 'CDN Provider', 'DNS End', 'SSL Negotiation End', 'Object Size', 'IP Address', 'End Time (ms)', 'Response Code', 'tracker_type'] + +def filter_fields(wpt_row, type): + for k in wpt_row.keys(): + if k not in csv_fields: + del wpt_row[k] + wpt_row['tracker_type'] = type + +def run(options): + + if not options.file: + sys.exit('No wpt csv input file specified. Aborting.') + + csv_file = options.file + + if not options.bugs: + sys.exit('No ghostery (formated) bugs input file. Aborting.') + + bugs_file = options.bugs + + try: + wpt_data = csv.DictReader(open(csv_file)) + except Exception, ee: + sys.exit('Error loading wpt csv data... Aborting.') + + try: + with open(bugs_file) as bugs_data_file: + bugs_data = json.load(bugs_data_file) + except Exception, ee: + sys.exit('Error loading bugs data... Aborting.') + + stats = {'total' : 0.0, 'ads': 0.0, 'trackers': 0.0, 'analytics': 0.0, 'widgets': 0.0, 'privacy': 0.0, 'blank': 0.0} + + #write ouput + fname, ext = os.path.splitext(os.path.basename(csv_file)) + out_csv_filename = fname + "__filtered" + ext + + out_csv = open(out_csv_filename, 'w') + writer = csv.DictWriter(out_csv, fieldnames=csv_fields) + writer.writeheader() + + + for r in wpt_data: + stats['total'] += 1 + host = r['Host'] + if any(a in host for a in bugs_data['ads']): +# print "ads: " + host + filter_fields(r, 'ad') + writer.writerow(r); + stats['ads'] += 1 + continue + if any(a in host for a in bugs_data['trackers']): +# print "trackers: " + host + filter_fields(r, 'tracker') + writer.writerow(r); + stats['trackers'] += 1 + continue + if any(a in host for a in bugs_data['analytics']): +# print "analytics: " + host + filter_fields(r, 'analytics') + writer.writerow(r); + stats['analytics'] += 1 + continue + if any(a in host for a in bugs_data['widgets']): +# print "widgets: " + host + filter_fields(r, 'widget') + writer.writerow(r); + stats['widgets'] += 1 + continue + if any(a in host for a in bugs_data['privacy']): +# print "privacy: " + host + filter_fields(r, 'privacy') + writer.writerow(r); + stats['privacy'] += 1 + continue + if options.keep: + stats['blank'] += 1 + filter_fields(r, 'n/a') + writer.writerow(r); + + + if options.stats: + print "----- Stats: " + fname + ext + " -----" + print "total (elements): " + str(stats['total']) + print "ads: " + str(stats['ads']) + ' - ' + str(stats['ads'] / stats['total']) + '%' + print "trackers: " + str(stats['trackers']) + ' - ' + str(stats['trackers'] / stats['total']) + '%' + print "analytics: " + str(stats['analytics']) + ' - ' + str(stats['analytics'] / stats['total']) + '%' + print "widgets: " + str(stats['widgets']) + ' - ' + str(stats['widgets'] / stats['total']) + '%' + print "privacy: " + str(stats['privacy']) + ' - ' + str(stats['privacy'] / stats['total']) + '%' + print "..............." + print "* JUNK INDEX * " + str((stats['ads'] + stats['trackers'] + stats['analytics'] + stats['widgets'] + stats['privacy']) / stats['total']) + '%' + +if __name__ == '__main__': + + p = OptionParser(); + p.add_option('-f', '--file', action="store", help="wpt csv input file") + p.add_option('-b', '--bugs', action="store", help="ghostery (formated) bugs input file") + p.add_option('-k', '--keep', action="store_true", help="keeps the non bugs html element") + p.add_option('-s', '--stats', action="store_true", help="prints basic stats") + + options, args = p.parse_args() + + run(options) + + + + + + +