From 168dc3f9293ef645b96f347a8ce96e83087d4858 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 22 Jan 2016 17:06:15 +0000 Subject: [PATCH 01/44] update console.log to be more descriptive --- README.md | 2 ++ server.js | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4b3342e..a1671ee 100644 --- a/README.md +++ b/README.md @@ -89,3 +89,5 @@ http://www.monitorware.com/en/logsamples/apache.php ### Node.js http module headers https://nodejs.org/api/http.html#http_message_rawheaders + +> Try: diff --git a/server.js b/server.js index cdd4e5c..d06e572 100644 --- a/server.js +++ b/server.js @@ -10,7 +10,7 @@ http.createServer(function handler(req, res) { var r = req.headers; r.ip = req.headers['x-forwarded-for'] || req.connection.remoteAddress; r.url = url.replace('.svg', '').replace('.png', ''); - console.log('>>'+url); + // console.log('>>'+url); if (url.match(/svg/)) { hits.add(r, function(err, count) { console.log(r.url, ' >> ', count); @@ -32,7 +32,7 @@ http.createServer(function handler(req, res) { } else if (url.match(/png/)) { hits.add(r, function(err, count) { - console.log(r.url, ' >> ', count) + console.log(">> Email Open Count: ", count) // var newurl = "https://img.shields.io/badge/hits-" + count +"-brightgreen.png" // wreck.get(newurl, function (error, response, html) { // expiry headers see: http://stackoverflow.com/a/2068407/1148249 From f21abf72e2f3fba46f2026000a73efd1aba2ce1c Mon Sep 17 00:00:00 2001 From: nelsonic Date: Wed, 23 Aug 2017 22:08:12 +0100 Subject: [PATCH 02/44] update dependencies fixes https://github.com/dwyl/hits/issues/41 --- package.json | 14 +++++++------- test/hits.test.js | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index 1a90384..63ac234 100644 --- a/package.json +++ b/package.json @@ -28,17 +28,17 @@ }, "homepage": "https://github.com/dwyl/hits#readme", "dependencies": { - "redis-connection": "^5.0.0", - "socket.io": "^1.4.8", + "redis-connection": "^5.2.0", + "socket.io": "^2.0.3", "uniki": "^1.0.3", - "wreck": "^8.0.0" + "wreck": "^12.2.3" }, "devDependencies": { - "decache": "^4.0.0", + "decache": "^4.1.0", "istanbul": "^0.4.4", - "nodemon": "^1.9.2", - "pre-commit": "^1.1.3", - "tape": "^4.6.0" + "nodemon": "^1.11.0", + "pre-commit": "^1.2.2", + "tape": "^4.8.0" }, "pre-commit": [ "coverage" diff --git a/test/hits.test.js b/test/hits.test.js index 4aede85..d63fa7f 100644 --- a/test/hits.test.js +++ b/test/hits.test.js @@ -39,7 +39,7 @@ test(file+'Add a hit without language', function(t){ hits.count(req.url, function(err, data) { console.log(data); t.ok(data >= 0, '✓ REQ ' +req.url +' was added at a index: ' + data) - hits.redisClient.end(); + hits.redisClient.end(true); t.end(); }) }); From f15cb24d572930505ecd5e71bff38f4a69c099c3 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Thu, 24 Aug 2017 21:14:34 +0100 Subject: [PATCH 03/44] adds browser language to hit row fixes https://github.com/dwyl/hits/issues/43 --- README.md | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 8757d62..adf1632 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ What if there was a *simple+easy* way to see how many people have viewed your Gi [![Build Status](https://travis-ci.org/dwyl/hits.svg)](https://travis-ci.org/dwyl/hits) [![HitCount](https://hitt.herokuapp.com/nelsonic/hits.svg)](https://github.com/nelsonic/hits) -[![Code Climate](https://codeclimate.com/github/dwyl/hits/badges/gpa.svg)](https://codeclimate.com/github/dwyl/hits) [![codecov.io](http://codecov.io/github/dwyl/hits/coverage.svg?branch=master)](http://codecov.io/github/dwyl/hits?branch=master) [![Dependency Status](https://david-dm.org/dwyl/hits.svg)](https://david-dm.org/dwyl/hits) [![devDependency Status](https://david-dm.org/dwyl/hits/dev-status.svg)](https://david-dm.org/dwyl/hits#info=devDependencies) @@ -12,9 +11,10 @@ What if there was a *simple+easy* way to see how many people have viewed your Gi ## Why? -We have a few repos on GitHub ... but sadly, we have no idea how many people -are looking at the repos unless they star/watch them; GitHub does not share -any stats with people using their site. +We have a _few_ repos on GitHub ... +but _sadly_, we ~~have~~ _had_ no idea how many people +are looking at the repos unless they star/watch them; +GitHub does not share any stats with people using their site. We would like to *know* the popularity of each of our repos to know where we need to be investing our time. @@ -23,9 +23,14 @@ to know where we need to be investing our time. A simple way to add (*very basic*) analytics to your GitHub repos. -There are already *many* "Badges" available which people put in their repos: https://github.com/dwyl/repo-badges +There are already *many* "badges" that people use in their repos. +See: [github.com/dwyl/**repo-badges**](https://github.com/dwyl/repo-badges)
But we haven't seen one that gives a "***hit counter***" -of the number of times a page has been viewed ... +of the number of times a page has been viewed ...
+So we decided to create one. + + + ## How? @@ -34,23 +39,31 @@ can see how popular the page is and you can track it. ### Implementation -What is the ***minimum possible*** amount of data we can store? +What is the ***minimum possible*** amount of data we can store _per request_? -+ **date+time** the person visited the site. +1. **date+time** the person visited the site. https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/now -+ **user-agent** the browser or crawler visiting the page +2. **user-agent** the browser or crawler visiting the page https://en.wikipedia.org/wiki/User_agent -+ **referer** url of the page where the image is requested from? +3. **referer** url of the page where the image is requested from? https://en.wikipedia.org/wiki/HTTP_referer -Log entries are stored as a `String` which can be parsed and re-formatted into -any other format: +Log entries are stored as a (_space delimited_) `String` +which can be parsed and re-formatted into any other format: + ```sh -1436570536950 x7uapo9 84.91.136.21 +1436570536950 x7uapo9 84.91.136.21 EN-GB ``` -| Timestamp | User Agent | IP Address | -| ------------- |:------------|:------------:| -| 1436570536950 | x7uapo9 | 84.91.136.21 | +> _**Note: while not "essential", we added **Browser Language** +> as the **4th** piece of data (when it is set/sent by the browser/device) +> because it's **insightful** to know what language people are using +> so that we can determine if we should be **translating**/"**localising**" +> our content._ + + +| Timestamp | User Agent | IP Address | Language | +| ------------- |:------------|:------------:|:--------:| +| 1436570536950 | x7uapo9 | 84.91.136.21 | EN-GB | We then have a user-agent hash where we can lookup the by id: ```js From d9aab87a83d4b6b53e0b9dbe09880e925b603a9d Mon Sep 17 00:00:00 2001 From: nelsonic Date: Thu, 24 Aug 2017 22:23:22 +0100 Subject: [PATCH 04/44] revive the idea of creating our own SVG for https://github.com/dwyl/hits/issues/30 --- lib/{climate.svg => template.svg} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lib/{climate.svg => template.svg} (100%) diff --git a/lib/climate.svg b/lib/template.svg similarity index 100% rename from lib/climate.svg rename to lib/template.svg From fd2256881359c4575df1110366dbb153f59d7b5a Mon Sep 17 00:00:00 2001 From: nelsonic Date: Thu, 24 Aug 2017 22:59:00 +0100 Subject: [PATCH 05/44] create single-purpose function to extract request data for https://github.com/dwyl/hits/issues/44 --- lib/extract_request_data.js | 26 ++++++++++++++++++++ test/extract_request_data.test.js | 40 +++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 lib/extract_request_data.js create mode 100644 test/extract_request_data.test.js diff --git a/lib/extract_request_data.js b/lib/extract_request_data.js new file mode 100644 index 0000000..230d8f4 --- /dev/null +++ b/lib/extract_request_data.js @@ -0,0 +1,26 @@ +var uniki = require('uniki'); // to create super-short hash of the User Agent + +// This file/module's only job is extracting the request data from http headers +module.exports = function extract (req) { + var h = req.headers; // shortcut to headers reduces typing + var agent = uniki(h['user-agent'], 7); // the user-agent for device/browser + var lang = ''; // the browser language + + // get the user's IP addres from headers or connection object: + var ip = h['x-forwarded-for'] || req.connection.remoteAddress; + + // get url the client requested: + var url = req.url.replace('.svg', '') + .replace('.png', '') + .replace('https://github.com/', ''); // strip to save storage + + if(h['accept-language']) { // Language for: github.com/dwyl/hits/issues/43 + if (h['accept-language'].indexOf(',') > -1) { // e.g: en-GB,en;q=0.5 + lang = h['accept-language'].split(',')[0].toUpperCase(); + } else { + lang = h['accept-language'].toUpperCase(); + } + } + + return [Date.now(), url, h['user-agent'], ip, lang].join('|'); +} diff --git a/test/extract_request_data.test.js b/test/extract_request_data.test.js new file mode 100644 index 0000000..e9f9d0f --- /dev/null +++ b/test/extract_request_data.test.js @@ -0,0 +1,40 @@ +var dir = __dirname.split('/')[__dirname.split('/').length-1]; +var file = dir + __filename.replace(__dirname, '') + " > "; +var test = require('tape'); +var extract = require('../lib/extract_request_data.js'); + +test(file + 'Extract "Hit" data from HTTP Request', function(t){ + var req = { + 'url': '/my/awesome/url', + headers: { + 'accept-language': 'en-US,en;q=0.8,pt;q=0.6,es;', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)' + }, + connection: { + remoteAddress: '88.88.88.88' + } + } + var hit = extract(req); + t.ok(hit.indexOf('/my/awesome/url|Mozilla/5.0') > -1, + '✓ HTTP request data extracted: ' + hit) + t.ok(hit.indexOf('EN-US') > -1, + '✓ extracted language: ' + hit.split('|')[4]) + t.end(); +}); + +test(file + 'fewer headers are set on request object', function(t){ + var req = { + 'url': '/my/awesome/url', + headers: { + 'accept-language': 'en-GB', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', + 'x-forwarded-for': '88.88.88.88' + } + } + var hit = extract(req); + t.ok(hit.indexOf('/my/awesome/url|Mozilla/5.0') > -1, + '✓ Reduced request data extracted: ' + hit) + t.ok(hit.indexOf('EN-GB') > -1, + '✓ extracted language: ' + hit.split('|')[4]) + t.end(); +}); From 63d5c543a64f3c5d178a24334ca154f20c8221f8 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Thu, 24 Aug 2017 23:00:10 +0100 Subject: [PATCH 06/44] adds code to create our own SVG file instead of relying on Shields.io! fixes https://github.com/dwyl/hits/issues/30 --- lib/make_svg.js | 7 +++++++ lib/template.svg | 22 +++++++--------------- test/make_svg.test.js | 12 ++++++++++++ 3 files changed, 26 insertions(+), 15 deletions(-) create mode 100644 lib/make_svg.js create mode 100644 test/make_svg.test.js diff --git a/lib/make_svg.js b/lib/make_svg.js new file mode 100644 index 0000000..91b92f9 --- /dev/null +++ b/lib/make_svg.js @@ -0,0 +1,7 @@ +var fs = require('fs'); +var path = require('path'); +var template = fs.readFileSync(path.resolve('./lib/template.svg'), 'utf8'); + +module.exports = function make (count) { + return template.replace('{count}', count); +} diff --git a/lib/template.svg b/lib/template.svg index 8e5f2fb..8171e35 100644 --- a/lib/template.svg +++ b/lib/template.svg @@ -1,21 +1,13 @@ - - - - - + - - + + - - - + - hits - hits - 4.0 - xyz - + hits + {count} + diff --git a/test/make_svg.test.js b/test/make_svg.test.js new file mode 100644 index 0000000..14b085c --- /dev/null +++ b/test/make_svg.test.js @@ -0,0 +1,12 @@ +var dir = __dirname.split('/')[__dirname.split('/').length-1]; +var file = dir + __filename.replace(__dirname, '') + " > "; +var test = require('tape'); +var make_svg = require('../lib/make_svg.js'); + +test(file + 'Make SVG file from template & count', function(t){ + var count = 1337; + var svg = make_svg(count); + t.ok(svg.indexOf(count.toString()) > -1, + '✓ SVG created for count: ' + count) + t.end(); +}); From 815fc10822b98af502465130e02614126d76f41c Mon Sep 17 00:00:00 2001 From: nelsonic Date: Thu, 24 Aug 2017 23:10:05 +0100 Subject: [PATCH 07/44] remove dependency on Wreck as no longer making HTTP request to Shields.io! see: https://github.com/dwyl/hits/issues/30 --- package.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index 63ac234..3546be0 100644 --- a/package.json +++ b/package.json @@ -30,8 +30,7 @@ "dependencies": { "redis-connection": "^5.2.0", "socket.io": "^2.0.3", - "uniki": "^1.0.3", - "wreck": "^12.2.3" + "uniki": "^1.0.3" }, "devDependencies": { "decache": "^4.1.0", From 60bc4558d3dbb9b2cba6f6e1a35aa5d824fa5983 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Thu, 24 Aug 2017 23:21:21 +0100 Subject: [PATCH 08/44] move headers to their own .json file to reduce clutter --- lib/headers.json | 6 ++++++ server.js | 55 ++++++++++++++++++++---------------------------- 2 files changed, 29 insertions(+), 32 deletions(-) create mode 100644 lib/headers.json diff --git a/lib/headers.json b/lib/headers.json new file mode 100644 index 0000000..333e49f --- /dev/null +++ b/lib/headers.json @@ -0,0 +1,6 @@ +{ + "Cache-Control": "no-cache, no-store, must-revalidate", + "Pragma": "no-cache", + "Expires": "0", + "Content-Type":"image/svg+xml" +} diff --git a/server.js b/server.js index b3498fd..db0a4db 100644 --- a/server.js +++ b/server.js @@ -1,42 +1,33 @@ -var http = require('http'); -var hits = require('./lib/hits'); var port = process.env.PORT || 8000; -var wreck = require('wreck'); -var fs = require('fs'); -var png = fs.readFileSync('./lib/1x1px.png'); - -var HEADERS = { // headers see: http://stackoverflow.com/a/2068407/1148249 - "Cache-Control": "no-cache, no-store, must-revalidate", // HTTP 1.1 - "Pragma": "no-cache", // HTTP 1.0 - "Expires": "0", // Proxies - "Content-Type":"image/svg+xml" // default to svg -}; +var http = require('http'); // plain http server (no fancy framework required) +var fs = require('fs'); // so we can open the file +var png = fs.readFileSync('./lib/1x1px.png'); // "tracking pixel" +var hits = require('./lib/hits'); // our storage interface +var favicon = 'http://i.imgur.com/zBEQq4w.png'; // dwyl favicon +var make_svg = require('./lib/make_svg.js'); +var extract = require('./lib/extract_request_data.js'); +var HEAD = require('./lib/headers.json'); // stackoverflow.com/a/2068407/1148249 var app = http.createServer(function handler(req, res) { - var url = req.url; - var r = req.headers; - r.ip = req.headers['x-forwarded-for'] || req.connection.remoteAddress; - r.url = url.replace('.svg', '').replace('.png', ''); + var url = req.url; + if (url.match(/svg/)) { - hits.add(r, function(err, count) { - console.log(r.url, ' >> ', count); - var newurl = 'https://img.shields.io/badge/hits-' + count +'-brightgreen.svg'; - wreck.get(newurl, function (error, response, raw) { - res.writeHead(200, Object.assign(HEADERS, {"Location": newurl})); - res.end(raw); - }); - }); - } - else if (url.match(/png/)) { - hits.add(r, function(err, count) { - console.log(r.url, ' >> ', count); - res.writeHead(200, Object.assign(HEADERS, {"Content-Type": "image/png"})); - res.end(png); + var hit = extract(req); + hits.add(hit, function(err, count) { + console.log(url, ' >> ', count); + res.writeHead(200, HEAD); + res.end(make_svg(count)); }); } + // else if (url.match(/png/)) { // see: https://github.com/dwyl/hits/issues/4 + // hits.add(r, function(err, count) { + // console.log(r.url, ' >> ', count); + // res.writeHead(200, Object.assign(HEAD, {"Content-Type": "image/png"})); + // res.end(png); + // }); + // } else if(url === '/favicon.ico') { - var favicon = 'http://i.imgur.com/zBEQq4w.png'; // dwyl favicon res.writeHead(301, { "Location": favicon }); res.end(); } @@ -60,7 +51,7 @@ var app = http.createServer(function handler(req, res) { } else { // echo the record without saving it console.log(" - - - - - - - - - - record:", r); - res.writeHead(200, {"Content-Type": "text/plain"}); + res.writeHead(200, {"Content-Type": "application/json"}); res.end(JSON.stringify(r, null, " ")); } // pretty JSON in Browser see: http://stackoverflow.com/a/5523967/1148249 }).listen(port); From 18966951ef2ad5d663e2b7d6acac82a449c034fe Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 16:42:27 +0100 Subject: [PATCH 09/44] use latest version of redis-connection which does not Throw Error if unable to connect see: https://github.com/dwyl/redis-connection/issues/38 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 3546be0..4fec3b4 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,7 @@ }, "homepage": "https://github.com/dwyl/hits#readme", "dependencies": { - "redis-connection": "^5.2.0", + "redis-connection": "^5.4.0", "socket.io": "^2.0.3", "uniki": "^1.0.3" }, From fd51035c621dd7669dd42d97bc121ddb22a9e3e7 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 16:57:57 +0100 Subject: [PATCH 10/44] split redis saving function into dedicated file with its own tests for https://github.com/dwyl/hits/issues/42 && https://github.com/dwyl/hits/issues/44 --- lib/db_redis.js | 17 +++++++++++++++++ test/db_redis.test.js | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 lib/db_redis.js create mode 100644 test/db_redis.test.js diff --git a/lib/db_redis.js b/lib/db_redis.js new file mode 100644 index 0000000..513e087 --- /dev/null +++ b/lib/db_redis.js @@ -0,0 +1,17 @@ +var redisClient = require('redis-connection')(); +var uniki = require('uniki'); +/** + * add - adds an entry into the List for a given url + * @param {String} url - the url for the hit + * @param {Object} hit - the hit we just received + * @param {Function} callback - call this once redis responds + */ +module.exports = function redis_save_hit (hit, callback) { + var parts = hit.split('|'); + var agent = uniki(parts[2], 7); + redisClient.hset('agents', agent, parts[2]); + parts[2] = agent; // save space in db replacing user-agent with shorter hash + redisClient.rpush(parts[1], parts.join('|'), function (err, data) { + callback(err, data) + }); +} diff --git a/test/db_redis.test.js b/test/db_redis.test.js new file mode 100644 index 0000000..212e1d3 --- /dev/null +++ b/test/db_redis.test.js @@ -0,0 +1,35 @@ +var dir = __dirname.split('/')[__dirname.split('/').length-1]; +var file = dir + __filename.replace(__dirname, '') + " > "; +var test = require('tape'); +var db = require('../lib/db_redis.js'); +var extract = require('../lib/extract_request_data.js'); + +test(file+'Add a hit to the list for that url', function (t) { + var req = { + 'url': '/my/awesome/url', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', + 'ip': '8.8.8.8', + 'accept-language': 'en-US,en;q=0.8,pt;q=0.6,es;', + } + var hit = extract(req); + db(hit, function (err, count) { + t.ok(count >= 0, '✓ URL ' +req.url +' has: ' + count) + t.end(); + }) +}); + +test(file+'Add a hit without language', function (t) { + var req = { + 'url': '/my/awesome/url', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', + 'ip': '8.8.8.8' + } + var hit = extract(req); + db(hit, function (err, count1) { + t.ok(count1 >= 0, '✓ URl ' +req.url +' was added at a index: ' + count1) + db(hit, function (err, count2) { + t.ok(count2 > count1, '✓ URL ' +req.url +' count is: ' + count2); + t.end(); // shutdown redis con + }); + }); +}); From 4432e51bdcfd1e2237f8de56295cae4a7efb55ca Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 20:04:44 +0100 Subject: [PATCH 11/44] adds hash.js (with tests & fixtures) borrow hash code from https://github.com/nelsonic/uniki --- lib/db_redis.js | 4 +- lib/hash.js | 10 ++++ package.json | 3 +- test/hash.test.js | 37 +++++++++++++++ test/hash_fixtures.json | 102 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 4 deletions(-) create mode 100644 lib/hash.js create mode 100644 test/hash.test.js create mode 100644 test/hash_fixtures.json diff --git a/lib/db_redis.js b/lib/db_redis.js index 513e087..536f528 100644 --- a/lib/db_redis.js +++ b/lib/db_redis.js @@ -1,5 +1,5 @@ var redisClient = require('redis-connection')(); -var uniki = require('uniki'); +var hash = require('./hash.js'); /** * add - adds an entry into the List for a given url * @param {String} url - the url for the hit @@ -8,7 +8,7 @@ var uniki = require('uniki'); */ module.exports = function redis_save_hit (hit, callback) { var parts = hit.split('|'); - var agent = uniki(parts[2], 7); + var agent = hash(parts[2], 7); redisClient.hset('agents', agent, parts[2]); parts[2] = agent; // save space in db replacing user-agent with shorter hash redisClient.rpush(parts[1], parts.join('|'), function (err, data) { diff --git a/lib/hash.js b/lib/hash.js new file mode 100644 index 0000000..89d21a0 --- /dev/null +++ b/lib/hash.js @@ -0,0 +1,10 @@ +var crypto = require('crypto'); + +// generate a hash of a specific length +module.exports = function hash (str, length) { + var len = length || 8; + var h = crypto.createHash('sha512') + .update(str.toString()).digest('base64') + .replace('/','').replace(/[Il0oO=\/\+]/g,''); // remove ambiguous chars + return h.substring(0, len); +} diff --git a/package.json b/package.json index 4fec3b4..5302f55 100644 --- a/package.json +++ b/package.json @@ -29,8 +29,7 @@ "homepage": "https://github.com/dwyl/hits#readme", "dependencies": { "redis-connection": "^5.4.0", - "socket.io": "^2.0.3", - "uniki": "^1.0.3" + "socket.io": "^2.0.3" }, "devDependencies": { "decache": "^4.1.0", diff --git a/test/hash.test.js b/test/hash.test.js new file mode 100644 index 0000000..a8b7c43 --- /dev/null +++ b/test/hash.test.js @@ -0,0 +1,37 @@ +var uniki = require('../lib/hash.js'); +var test = require('tape'); + + +test("Create hash for url: 1234", function(t) { + var str = uniki(1234); + t.equal(str.length, 8, "Worked as expected "+str); + t.equal(str, '1ARVn2Au', "uniki is consistent. 1234 >> 1ARVn") + t.end(); +}); + +test("Full Length Hash", function(t) { + var hash = uniki("RandomGobbledygook", 100); + t.true(hash.length === 78, "Full Length is " + hash.length + ' chars'); + t.end(); +}); + +test("Consistenty check against 100 sample hashes", function(t) { + var fixture = require('./hash_fixtures.json'); + Object.keys(fixture).forEach(function(k) { + var expected = fixture[k]; + var actual = uniki(k, 10); + t.true(expected === actual, + '✓ hash(' + k + ') >> expected: ' + expected + ' === actual: ' + actual); + }) + // var hash = 'H3ll0W0rld!'; + // var obj = {}; + // // var fixture = require('./hash_fixture.json'); + // for(var i=0; i < 101; i++){ + // obj[hash] = uniki(hash, 10); + // hash = uniki(hash); + // } + // console.log(JSON.stringify(obj, null, 2)) + // var hash = uniki("RandomGobbledygook", 100); + // t.true(hash.length === 78, "Full Length is " + hash.length + ' chars'); + t.end(); +}); diff --git a/test/hash_fixtures.json b/test/hash_fixtures.json new file mode 100644 index 0000000..8f37d51 --- /dev/null +++ b/test/hash_fixtures.json @@ -0,0 +1,102 @@ +{ + "3SsqzMWq": "eWVH8kBYSD", + "eWVH8kBY": "TeeyDAQ4CB", + "TeeyDAQ4": "mmM1jDFSVq", + "mmM1jDFS": "cK8BrR4Mey", + "cK8BrR4M": "m47txHn9Wt", + "m47txHn9": "UCSr4DRxvH", + "UCSr4DRx": "2ELG2YmSHS", + "2ELG2YmS": "gCvcTTskdw", + "gCvcTTsk": "8DzDfLnye1", + "8DzDfLny": "PH5VKFCPJB", + "PH5VKFCP": "p7RJFQNs7K", + "p7RJFQNs": "D8dF7YbQVU", + "D8dF7YbQ": "9VbiATGx2w", + "9VbiATGx": "KcbEH44Axm", + "KcbEH44A": "QD8JEpV3vy", + "QD8JEpV3": "ft4dj4dGbX", + "ft4dj4dG": "bvCks4hDyr", + "bvCks4hD": "xv3YEpuC8r", + "xv3YEpuC": "mQWF9urNvk", + "mQWF9urN": "MeekDZKvGs", + "MeekDZKv": "ei4dVyz2kk", + "ei4dVyz2": "eydUXW6Hfc", + "eydUXW6H": "RizKvQaMcE", + "RizKvQaM": "L5HayUsbvr", + "L5HayUsb": "rrkt5f9zw6", + "rrkt5f9z": "NCcZ7v18MP", + "NCcZ7v18": "PNUBzUf1Hs", + "PNUBzUf1": "2xWDBgiAAH", + "2xWDBgiA": "H4cdM8aJDF", + "H4cdM8aJ": "BEdY7paJpb", + "BEdY7paJ": "uiYxrfDQxq", + "uiYxrfDQ": "epiQY6FpnF", + "epiQY6Fp": "LbHqfPuimp", + "LbHqfPui": "9QUSAr963s", + "9QUSAr96": "tZiSfeGtjb", + "tZiSfeGt": "WcDxf8mTvG", + "WcDxf8mT": "yTvaTfJkrd", + "yTvaTfJk": "CALjmZEkbm", + "CALjmZEk": "1YrxJTUTFi", + "1YrxJTUT": "GDTq6j3P2B", + "GDTq6j3P": "xJAG6wt4aX", + "xJAG6wt4": "ATgJYgVnsZ", + "ATgJYgVn": "tuDMLhKdNC", + "tuDMLhKd": "KBFrbhJ87Q", + "KBFrbhJ8": "BcqhShkSYR", + "BcqhShkS": "RaFvZ9kCXM", + "RaFvZ9kC": "ABzXYuJYZm", + "ABzXYuJY": "ZWyaMHn2Nq", + "ZWyaMHn2": "QjPZ91YnpA", + "QjPZ91Yn": "Mhygcvg913", + "Mhygcvg9": "cnRyVdCwju", + "cnRyVdCw": "DD5LJKZpss", + "DD5LJKZp": "JnMhiaKZD5", + "JnMhiaKZ": "QbMzukQjfW", + "QbMzukQj": "yYHB1kr4eR", + "yYHB1kr4": "b8qfZgZ3fa", + "b8qfZgZ3": "BaDxwVxLZ1", + "BaDxwVxL": "wEggb44WNg", + "wEggb44W": "NRxWnLtPEh", + "NRxWnLtP": "jDMQ5z1b3k", + "jDMQ5z1b": "MCquGjRhjE", + "MCquGjRh": "DGNedLyQYH", + "DGNedLyQ": "RKfbFMAcDQ", + "RKfbFMAc": "9LH3NTKHCr", + "9LH3NTKH": "WwrBGcWBz8", + "WwrBGcWB": "A2XCx6JGpq", + "A2XCx6JG": "zXMQfKi8fH", + "zXMQfKi8": "4Rr5SQvn1w", + "4Rr5SQvn": "AA1561SCdD", + "AA1561SC": "6iPKRYcvBW", + "6iPKRYcv": "MjxYeytSr3", + "MjxYeytS": "ynsaauCSJs", + "ynsaauCS": "KLx9EkyhbB", + "KLx9Ekyh": "As2JsXUjv4", + "As2JsXUj": "gCAvDGgKAf", + "gCAvDGgK": "rgJ7W2NGKu", + "rgJ7W2NG": "wWP9wpieRa", + "wWP9wpie": "whg76M2Zn7", + "whg76M2Z": "TuXJCiWGn8", + "TuXJCiWG": "MQaaqVSats", + "MQaaqVSa": "dvEYDe67mg", + "dvEYDe67": "YPhWT43FYu", + "YPhWT43F": "aABrr4NZv6", + "aABrr4NZ": "hNFjpPBhPz", + "hNFjpPBh": "QyLANAvBdZ", + "QyLANAvB": "ZxYtVyXqxg", + "ZxYtVyXq": "PQT1xxTv8f", + "PQT1xxTv": "GGcXzYyRZj", + "GGcXzYyR": "Gbcjvs3HRm", + "Gbcjvs3H": "bB7G8nbQe6", + "bB7G8nbQ": "CS46921VBH", + "CS46921V": "paE6D9FxBm", + "paE6D9Fx": "4Dt85zHh2F", + "4Dt85zHh": "3JFU3udq3s", + "3JFU3udq": "xq9p2F74uu", + "xq9p2F74": "c88pLtc7y8", + "c88pLtc7": "nbK3ePuCkH", + "nbK3ePuC": "AtT14PZ9D2", + "AtT14PZ9": "2AJxk5F81D", + "2AJxk5F8": "H4GGnC8PdG" +} From e0b9877095b4b16154d60bdeae2fbac8cb263dad Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 21:47:39 +0100 Subject: [PATCH 12/44] clarify/simplify unique data being stored fixes https://github.com/dwyl/hits/issues/44 --- README.md | 114 +++++++++++++++++++++++++----------- lib/db_redis.js | 16 +++-- lib/extract_request_data.js | 26 ++++---- lib/hits.js | 46 ++++----------- 4 files changed, 117 insertions(+), 85 deletions(-) diff --git a/README.md b/README.md index adf1632..f771779 100644 --- a/README.md +++ b/README.md @@ -30,62 +30,89 @@ of the number of times a page has been viewed ...
So we decided to create one. +### What Data to Capture/Store? +The _first_ question we asked ourselves was: +What is the ***minimum possible*** amount of (_useful/unique_) +**data** we can store ***per visit*** (_to one of our projects_)? -## How? +1. **date + time** (_timestamp_) ***when*** +the person visited the site/page.
+https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/now -Place a badge (*image*) in your repo `README.md` so others can -can see how popular the page is and you can track it. +2. **url** being visited. -### Implementation +3. **user-agent** the browser/device (_or "crawler"_) visiting the site/page +https://en.wikipedia.org/wiki/User_agent -What is the ***minimum possible*** amount of data we can store _per request_? +4. IP Address of the client. -1. **date+time** the person visited the site. -https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/now -2. **user-agent** the browser or crawler visiting the page -https://en.wikipedia.org/wiki/User_agent -3. **referer** url of the page where the image is requested from? -https://en.wikipedia.org/wiki/HTTP_referer +5. **language** of the person's web browser. +_Note: While not "essential", we added **Browser Language** +as the **5th** piece of data (when it is set/sent by the browser/device) +because it's **insightful** to know what language people are using +so that we can determine if we should be **translating**/"**localising**" +our content._ -Log entries are stored as a (_space delimited_) `String` + +Log entries are stored as a (_"pipe" delimited_) `String` which can be parsed and re-formatted into any other format: ```sh -1436570536950 x7uapo9 84.91.136.21 EN-GB +1436570536950|github.com/dwyl/the-book|Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)|88.88.88.88|EN-US ``` -> _**Note: while not "essential", we added **Browser Language** -> as the **4th** piece of data (when it is set/sent by the browser/device) -> because it's **insightful** to know what language people are using -> so that we can determine if we should be **translating**/"**localising**" -> our content._ +This is perhaps best viewed as a table: + +| Timestamp | URL | User Agent | IP Address | Language | +| ------------- |:------------|:------------|:------------:|:--------:| +| 1436570536950 | github.com/dwyl/the-book | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) | 84.91.136.21 | EN-GB | -| Timestamp | User Agent | IP Address | Language | -| ------------- |:------------|:------------:|:--------:| -| 1436570536950 | x7uapo9 | 84.91.136.21 | EN-GB | +### Reducing Storage Costs -We then have a user-agent hash where we can lookup the by id: +If a person views multiple pages, three pieces of data are duplicated: +User Agent, IP Address and Language. +Rather than storing this data multiple times, we _hash_ the data +and store the hash as a lookup. + +#### Hash Long Data + +If we run the following Browser|IP|Language `String` +```sh +'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)|84.91.136.21|EN-US' +``` +through a **sha512** hash function we get: `8HKg3NB5Cf` (_always_). + +Sample code: ```js -{ - "x7uapo9":"Mozilla/5.0 (iPad; U; CPU OS 3_2_1 like Mac OS X; en-us) AppleWebKit/531.21.10", - "N03v1lz":"Googlebot/2.1 (+http://www.google.com/bot.html)" -} +var hash = require('./lib/hash.js'); +var user_agent_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)|88.88.88.88|EN-US'; +var agent_hash = hash(user_agent_string, 10); // 8HKg3NB5Cf ``` -### Fetch SVG from shields.io and serve it just-in-time +#### Hit Data With Hash -Given that shields.io has a badge creation service, -and it has acceptable latency, we are proxying the their service. +``` +1436570536950|github.com/dwyl/the-book|8HKg3NB5Cf +``` -## Run it! + +## How? + +Place a badge (*image*) in your repo `README.md` so others can +can see how popular the page is and you can track it. + + + +## _Run_ it Your_self_! Download (clone) the code to your local machine: + ```sh git clone https://github.com/dwyl/hits.git && cd hits ``` -> Note: you will need to have Redis running on your localhost, -> if you are new to Redis see: https://github.com/dwyl/learn-redis + +> Note: you will need to have Node.js running on your localhost. Install dependencies: ```sh @@ -98,6 +125,20 @@ npm run dev Visit: http://localhost:8000/any/url/count.svg +# Data Storage + +Recording the "hit" data is _essential_ +for this app to _work_ and be _useful_. + +We have built it to work with _two_ "data stores": +Filesystem and Redis
+> _**Note**: you only need **one** storage option to be available_. + +## Filesystem + + + + ## Research ### User Agents @@ -122,4 +163,11 @@ http://www.monitorware.com/en/logsamples/apache.php https://nodejs.org/api/http.html#http_message_rawheaders -> Try: +## Running the Test Suite locally + +The test suite includes tests for 3 databases +therefore running the tests on your `localhost` +requires all 3 to be running. + +Deploying and _using_ the app only requires _one_ +of the databases to be available. diff --git a/lib/db_redis.js b/lib/db_redis.js index 536f528..c765a70 100644 --- a/lib/db_redis.js +++ b/lib/db_redis.js @@ -7,11 +7,17 @@ var hash = require('./hash.js'); * @param {Function} callback - call this once redis responds */ module.exports = function redis_save_hit (hit, callback) { - var parts = hit.split('|'); - var agent = hash(parts[2], 7); - redisClient.hset('agents', agent, parts[2]); - parts[2] = agent; // save space in db replacing user-agent with shorter hash - redisClient.rpush(parts[1], parts.join('|'), function (err, data) { + var h = hit.split('|'); // See README.md#How secton for sample data + var url = h[1]; + + // save unique hash of browser data to avoid duplication + var unique_browser_string = [ h[2], h[3], h[4] ].join('|'); + var hashed_agent = hash(unique_browser_string, 10); + redisClient.hset('agents', hashed_agent, unique_browser_string); + + // save hit data with hashed browser data: + var entry = h[0] + '|' + hashed_agent; + redisClient.rpush(url, entry, function (err, data) { callback(err, data) }); } diff --git a/lib/extract_request_data.js b/lib/extract_request_data.js index 230d8f4..747e8e4 100644 --- a/lib/extract_request_data.js +++ b/lib/extract_request_data.js @@ -1,26 +1,28 @@ -var uniki = require('uniki'); // to create super-short hash of the User Agent +/** + * This file/module's only job is to extract the request data from http headers + * @param {Object} request - the standard nodejs http request object. + * @returns {string} hit - see readme for format. + */ +module.exports = function extract (request) { + var h = request.headers || {}; // shortcut to headers reduces typing + var lang; // the browser language -// This file/module's only job is extracting the request data from http headers -module.exports = function extract (req) { - var h = req.headers; // shortcut to headers reduces typing - var agent = uniki(h['user-agent'], 7); // the user-agent for device/browser - var lang = ''; // the browser language - // get the user's IP addres from headers or connection object: - var ip = h['x-forwarded-for'] || req.connection.remoteAddress; - + var ip = h['x-forwarded-for'] || + request.connection && request.connection.remoteAddress; + // get url the client requested: - var url = req.url.replace('.svg', '') + var url = request.url.replace('.svg', '') .replace('.png', '') .replace('https://github.com/', ''); // strip to save storage if(h['accept-language']) { // Language for: github.com/dwyl/hits/issues/43 if (h['accept-language'].indexOf(',') > -1) { // e.g: en-GB,en;q=0.5 - lang = h['accept-language'].split(',')[0].toUpperCase(); + lang = h['accept-language'].split(',')[0].toUpperCase(); // e.g: EN-GB } else { lang = h['accept-language'].toUpperCase(); } } - + return [Date.now(), url, h['user-agent'], ip, lang].join('|'); } diff --git a/lib/hits.js b/lib/hits.js index 7da52be..13c523b 100644 --- a/lib/hits.js +++ b/lib/hits.js @@ -1,41 +1,17 @@ -var redisClient = require('redis-connection')(); -var uniki = require('uniki'); /** * add - adds an entry into the List for a given url - * @param {String} url - the url for the hit * @param {Object} hit - the hit we just received * @param {Function} callback - call this once redis responds */ -module.exports.add = function add (hit, callback) { - // console.log(hit); - var url = hit.url.replace('https://github.com', ''); // don't waste RAM! - var now = Date.now(); - var agent = uniki(hit['user-agent'],7); - if(hit['accept-language'] && hit['accept-language'].indexOf(',') > -1){ - hit.lang = hit['accept-language'].split(',')[0]; - } else { - hit.lang = ''; - } - // console.log('agent',agent); - redisClient.hset('agents', agent, hit['user-agent']); - var entry = now + ' ' + agent + ' ' +hit.lang + ' ' + hit.ip - redisClient.rpush(url, entry, function (err, data) { - callback(err, data) - }); +module.exports = function add (hit, callback) { + // if(process.env.REDISCLOUD_URL) { + return require('./db_redis.js')(hit, callback); + // } + // var parts = hit.split('|'); + // var agent = uniki(parts[2],7); + // redisClient.hset('agents', agent, parts[2]); + // parts[2] = agent; // replace long-form user-agent with shorter hash + // redisClient.rpush(parts[1], parts.join('|'), function (err, data) { + // callback(err, data) + // }); } - -/** - * count - counts the number of hits for a given url - * @param {String} url - the url for the hit - * @param {Function} callback - call this once redis responds - */ -module.exports.count = function count (url, callback) { - console.log(url); - url = url.replace('https://github.com', ''); // don't waste space in Redis - redisClient.llen(url, function(err, data){ - callback(err, data); - }); -} - - -module.exports.redisClient = redisClient; From 5a805f9fcdbd8832b2735f546bb99ec6dcc83acf Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 21:58:19 +0100 Subject: [PATCH 13/44] tidy up tests for #44 --- server.js | 2 +- test/extract_request_data.test.js | 14 +++++++++++++ test/hash.test.js | 9 +++++++- test/hits.test.js | 35 +++++++++++-------------------- test/make_svg.test.js | 4 ++++ 5 files changed, 39 insertions(+), 25 deletions(-) diff --git a/server.js b/server.js index db0a4db..f27e2d9 100644 --- a/server.js +++ b/server.js @@ -14,7 +14,7 @@ var app = http.createServer(function handler(req, res) { if (url.match(/svg/)) { var hit = extract(req); - hits.add(hit, function(err, count) { + hits(hit, function(err, count) { console.log(url, ' >> ', count); res.writeHead(200, HEAD); res.end(make_svg(count)); diff --git a/test/extract_request_data.test.js b/test/extract_request_data.test.js index e9f9d0f..2f6c083 100644 --- a/test/extract_request_data.test.js +++ b/test/extract_request_data.test.js @@ -38,3 +38,17 @@ test(file + 'fewer headers are set on request object', function(t){ '✓ extracted language: ' + hit.split('|')[4]) t.end(); }); + +test(file + 'no language defined', function(t){ + var req = { + 'url': '/my/awesome/url', + headers: { + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', + 'x-forwarded-for': '88.88.88.88' + } + } + var hit = extract(req); + t.ok(hit.split('|')[4] === '', + '✓ no language defined') + t.end(); +}); diff --git a/test/hash.test.js b/test/hash.test.js index a8b7c43..fd870b7 100644 --- a/test/hash.test.js +++ b/test/hash.test.js @@ -11,7 +11,14 @@ test("Create hash for url: 1234", function(t) { test("Full Length Hash", function(t) { var hash = uniki("RandomGobbledygook", 100); - t.true(hash.length === 78, "Full Length is " + hash.length + ' chars'); + t.true(hash.length === 78, "✓ Full Length is " + hash.length + ' chars'); + t.end(); +}); + +test("Browser Agent String Hash", function(t) { + var user_agent_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)|84.91.136.21|EN-US'; + var hash = uniki(user_agent_string, 10); + t.true(hash === '8HKg3NB5Cf', "Browser Data Hash: " + hash); t.end(); }); diff --git a/test/hits.test.js b/test/hits.test.js index d63fa7f..be09d79 100644 --- a/test/hits.test.js +++ b/test/hits.test.js @@ -2,44 +2,33 @@ var dir = __dirname.split('/')[__dirname.split('/').length-1]; var file = dir + __filename.replace(__dirname, '') + " > "; var test = require('tape'); var hits = require('../lib/hits'); +var extract = require('../lib/extract_request_data.js'); -test(file+'Add a hit to the list for that url', function(t){ +test(file+'Add a hit to the list for that url', function (t) { var req = { 'url': '/my/awesome/url', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', 'ip': '8.8.8.8', 'accept-language': 'en-US,en;q=0.8,pt;q=0.6,es;', } - hits.add(req, function(err, data) { - t.ok(data >= 0, '✓ REQ ' +req.url +' was added at a index: ' + data) - // hits.redisClient.end(); + var hit = extract(req); + hits(hit, function (err, count) { + // console.log('16 >>> ', err, count); + t.ok(count >= 0, '✓ URL ' +req.url +' was added at a index: ' + count); t.end(); }) }); -test(file+'Add a hit without language', function(t){ +test(file+'Add a hit without language', function (t) { var req = { 'url': '/my/awesome/url', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', 'ip': '8.8.8.8' } - hits.add(req, function(err, data) { - t.ok(data >= 0, '✓ REQ ' +req.url +' was added at a index: ' + data) - // hits.redisClient.end(); - t.end(); - }) -}); - -test(file+'Add a hit without language', function(t){ - var req = { - 'url': '/my/awesome/url', - 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', - 'ip': '8.8.8.8' - } - hits.count(req.url, function(err, data) { - console.log(data); - t.ok(data >= 0, '✓ REQ ' +req.url +' was added at a index: ' + data) - hits.redisClient.end(true); - t.end(); + var hit = extract(req); + hits(hit, function (err, count) { + // console.log('30 >>> ', err, count); + t.ok(count >= 0, '✓ REQ ' +req.url +' was added at a index: ' + count); + t.end(require('redis-connection')().end(true)); // shutdown redis con }) }); diff --git a/test/make_svg.test.js b/test/make_svg.test.js index 14b085c..8257669 100644 --- a/test/make_svg.test.js +++ b/test/make_svg.test.js @@ -10,3 +10,7 @@ test(file + 'Make SVG file from template & count', function(t){ '✓ SVG created for count: ' + count) t.end(); }); + +test.onFinish(function () { + require('redis-connection')().end(true); // shutdown redis con +}) From 70810e24e11a2bc1557da0be2b42c8f86967edd0 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 23:30:46 +0100 Subject: [PATCH 14/44] adds implementation of filesystem db for when Redis is unavailable #42 --- .gitignore | 1 + lib/db_filesystem.js | 48 ++++++++++++++++++++++++++++++++++++++ lib/hits.js | 7 ------ package.json | 1 + test/db_filesystem.test.js | 26 +++++++++++++++++++++ test/hash.test.js | 10 -------- 6 files changed, 76 insertions(+), 17 deletions(-) create mode 100644 lib/db_filesystem.js create mode 100644 test/db_filesystem.test.js diff --git a/.gitignore b/.gitignore index 5bfc897..a611eb4 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ node_modules config.env dump.rdb npm-debug.log +data/ diff --git a/lib/db_filesystem.js b/lib/db_filesystem.js new file mode 100644 index 0000000..8b306ac --- /dev/null +++ b/lib/db_filesystem.js @@ -0,0 +1,48 @@ +var fs = require('fs'); +var path = require('path'); +var hash = require('./hash.js'); +var mkdirp = require('mkdirp') +var data_dir = path.resolve(__dirname, '../data/'); +console.log(data_dir); +var agents_path = path.resolve(data_dir, 'agents'); +mkdirp(agents_path); +var assert = require('assert'); +var EOL = require('os').EOL; // https://stackoverflow.com/a/14063413/1148249 +/** + * add - adds an entry into the List for a given url + * @param {String} url - the url for the hit + * @param {Object} hit - the hit we just received + * @param {Function} callback - call this once redis responds + */ +module.exports = function redis_save_hit (hit, callback) { + var h = hit.split('|'); // See README.md#How secton for sample data + var url = h[1]; + var parts = url.split('/').filter(function(n){ return n != undefined }); + var dir = path.join(data_dir, parts.slice(0, -1).join('/')); + // console.log(parts.slice(0, -1).join('/')); + // console.log('dir:', dir); + + // save unique hash of browser data to avoid duplication + var unique_browser_string = [ h[2], h[3], h[4] ].join('|'); + var hashed_agent = hash(unique_browser_string, 10); + // console.log(unique_browser_string); + fs.writeFile(path.resolve(agents_path, hashed_agent), + unique_browser_string, function (err, data) { + // create directory for the url + mkdirp(dir, function (err) { + assert(!err); + var filepath = path.join(dir, parts[parts.length - 1]) + // console.log('filepath:', filepath); + // save hit data with hashed browser data: + var entry = h[0] + '|' + hashed_agent + EOL; + fs.appendFile(filepath, entry, function (err) { + assert(!err); + // count how many files are in the directory: + fs.readFile(filepath, 'utf8', (err, data) => { + // console.log(err, data); + callback(err, data.split(EOL).length - 1); + }); + }) + }); + }); +} diff --git a/lib/hits.js b/lib/hits.js index 13c523b..9efbd23 100644 --- a/lib/hits.js +++ b/lib/hits.js @@ -7,11 +7,4 @@ module.exports = function add (hit, callback) { // if(process.env.REDISCLOUD_URL) { return require('./db_redis.js')(hit, callback); // } - // var parts = hit.split('|'); - // var agent = uniki(parts[2],7); - // redisClient.hset('agents', agent, parts[2]); - // parts[2] = agent; // replace long-form user-agent with shorter hash - // redisClient.rpush(parts[1], parts.join('|'), function (err, data) { - // callback(err, data) - // }); } diff --git a/package.json b/package.json index 5302f55..84748c2 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ }, "homepage": "https://github.com/dwyl/hits#readme", "dependencies": { + "mkdirp": "^0.5.1", "redis-connection": "^5.4.0", "socket.io": "^2.0.3" }, diff --git a/test/db_filesystem.test.js b/test/db_filesystem.test.js new file mode 100644 index 0000000..62993ec --- /dev/null +++ b/test/db_filesystem.test.js @@ -0,0 +1,26 @@ +var dir = __dirname.split('/')[__dirname.split('/').length-1]; +var file = dir + __filename.replace(__dirname, '') + " > "; +var test = require('tape'); +var db = require('../lib/db_filesystem.js'); +var extract = require('../lib/extract_request_data.js'); + +test(file+'Add a hit to the list for that url', function (t) { + var req = { + 'url': '/my/awesome/url', + headers: { + 'accept-language': 'en-US,en;q=0.8,pt;q=0.6,es;', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)' + }, + connection: { + remoteAddress: '88.88.88.88' + } + } + var hit = extract(req); + db(hit, function (err, count) { + t.ok(count >= 0, '✓ URL ' +req.url +' has: ' + count) + db(hit, function (err, count2) { + t.ok(count === count2 - 1, '✓ URL ' +req.url +' has: ' + count2) + t.end(); + }) + }) +}); diff --git a/test/hash.test.js b/test/hash.test.js index fd870b7..db206eb 100644 --- a/test/hash.test.js +++ b/test/hash.test.js @@ -30,15 +30,5 @@ test("Consistenty check against 100 sample hashes", function(t) { t.true(expected === actual, '✓ hash(' + k + ') >> expected: ' + expected + ' === actual: ' + actual); }) - // var hash = 'H3ll0W0rld!'; - // var obj = {}; - // // var fixture = require('./hash_fixture.json'); - // for(var i=0; i < 101; i++){ - // obj[hash] = uniki(hash, 10); - // hash = uniki(hash); - // } - // console.log(JSON.stringify(obj, null, 2)) - // var hash = uniki("RandomGobbledygook", 100); - // t.true(hash.length === 78, "Full Length is " + hash.length + ' chars'); t.end(); }); From bbb43753d10236185eb748a7447951473fe3b973 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 23:34:24 +0100 Subject: [PATCH 15/44] tidy up "db" files for consistency #42 --- lib/db_filesystem.js | 29 +++++++++++++++-------------- lib/db_redis.js | 3 +-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/db_filesystem.js b/lib/db_filesystem.js index 8b306ac..86145ed 100644 --- a/lib/db_filesystem.js +++ b/lib/db_filesystem.js @@ -1,45 +1,46 @@ var fs = require('fs'); var path = require('path'); var hash = require('./hash.js'); -var mkdirp = require('mkdirp') -var data_dir = path.resolve(__dirname, '../data/'); -console.log(data_dir); -var agents_path = path.resolve(data_dir, 'agents'); -mkdirp(agents_path); +var mkdirp = require('mkdirp'); var assert = require('assert'); var EOL = require('os').EOL; // https://stackoverflow.com/a/14063413/1148249 + +var DATA_DIR = path.resolve(__dirname, '../data/'); +var agents_path = path.resolve(DATA_DIR, 'agents'); +mkdirp(agents_path); + /** * add - adds an entry into the List for a given url - * @param {String} url - the url for the hit * @param {Object} hit - the hit we just received * @param {Function} callback - call this once redis responds */ module.exports = function redis_save_hit (hit, callback) { - var h = hit.split('|'); // See README.md#How secton for sample data + var h = hit.split('|'); // See README.md#How secton for sample data var url = h[1]; var parts = url.split('/').filter(function(n){ return n != undefined }); - var dir = path.join(data_dir, parts.slice(0, -1).join('/')); - // console.log(parts.slice(0, -1).join('/')); - // console.log('dir:', dir); + var dir = path.join(DATA_DIR, parts.slice(0, -1).join('/')); // save unique hash of browser data to avoid duplication var unique_browser_string = [ h[2], h[3], h[4] ].join('|'); var hashed_agent = hash(unique_browser_string, 10); - // console.log(unique_browser_string); + fs.writeFile(path.resolve(agents_path, hashed_agent), unique_browser_string, function (err, data) { + // create directory for the url mkdirp(dir, function (err) { assert(!err); var filepath = path.join(dir, parts[parts.length - 1]) - // console.log('filepath:', filepath); + // save hit data with hashed browser data: var entry = h[0] + '|' + hashed_agent + EOL; fs.appendFile(filepath, entry, function (err) { assert(!err); - // count how many files are in the directory: + + // count how many lines are in the file for the URL: fs.readFile(filepath, 'utf8', (err, data) => { - // console.log(err, data); + assert(!err); + callback(err, data.split(EOL).length - 1); }); }) diff --git a/lib/db_redis.js b/lib/db_redis.js index c765a70..57d596d 100644 --- a/lib/db_redis.js +++ b/lib/db_redis.js @@ -2,12 +2,11 @@ var redisClient = require('redis-connection')(); var hash = require('./hash.js'); /** * add - adds an entry into the List for a given url - * @param {String} url - the url for the hit * @param {Object} hit - the hit we just received * @param {Function} callback - call this once redis responds */ module.exports = function redis_save_hit (hit, callback) { - var h = hit.split('|'); // See README.md#How secton for sample data + var h = hit.split('|'); // See README.md#How secton for sample data var url = h[1]; // save unique hash of browser data to avoid duplication From ddcbd68bbc33d3725aac5a51c4a7b76c7f438deb Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 23:47:17 +0100 Subject: [PATCH 16/44] use filesystem when Redis is unavailable for #42 --- lib/hits.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/hits.js b/lib/hits.js index 9efbd23..96a8d30 100644 --- a/lib/hits.js +++ b/lib/hits.js @@ -4,7 +4,10 @@ * @param {Function} callback - call this once redis responds */ module.exports = function add (hit, callback) { - // if(process.env.REDISCLOUD_URL) { + if(process.env.REDISCLOUD_URL) { return require('./db_redis.js')(hit, callback); - // } + } + else { + return require('./db_filesystem.js')(hit, callback); + } } From c35cd6bc48a3a82792f57e0ceb6bbbfb0bda6031 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Fri, 25 Aug 2017 23:56:22 +0100 Subject: [PATCH 17/44] update test/hits.test.js to use both "Datastores" fixes https://github.com/dwyl/hits/issues/42 --- test/hits.test.js | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/test/hits.test.js b/test/hits.test.js index be09d79..879957a 100644 --- a/test/hits.test.js +++ b/test/hits.test.js @@ -1,33 +1,49 @@ var dir = __dirname.split('/')[__dirname.split('/').length-1]; var file = dir + __filename.replace(__dirname, '') + " > "; var test = require('tape'); -var hits = require('../lib/hits'); var extract = require('../lib/extract_request_data.js'); -test(file+'Add a hit to the list for that url', function (t) { +test(file + 'REDIS add hit', function (t) { + process.env.REDISCLOUD_URL = 'redis://u:@127.0.0.1:6379'; + var hits = require('../lib/hits'); var req = { 'url': '/my/awesome/url', - 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', - 'ip': '8.8.8.8', - 'accept-language': 'en-US,en;q=0.8,pt;q=0.6,es;', + headers: { + 'accept-language': 'en-US,en;q=0.8,pt;q=0.6,es;', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)' + }, + connection: { + remoteAddress: '88.8.88.8' + } } var hit = extract(req); hits(hit, function (err, count) { - // console.log('16 >>> ', err, count); - t.ok(count >= 0, '✓ URL ' +req.url +' was added at a index: ' + count); - t.end(); - }) + hits(hit, function (err, count2) { + // console.log('16 >>> ', err, count); + t.ok(count === count2 - 1, + '✓ URL ' +req.url +' was added at a index: ' + count); + require('redis-connection')().end(true) + require('decache')('../lib/hits'); + t.end(); + }); + }); }); -test(file+'Add a hit without language', function (t) { +test(file+'Filesystem add hit', function (t) { + delete process.env.REDISCLOUD_URL; // force use of Filesystem + var hits = require('../lib/hits'); var req = { 'url': '/my/awesome/url', - 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)', - 'ip': '8.8.8.8' + headers: { + 'accept-language': 'en-US,en;q=0.8,pt;q=0.6,es;', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)' + }, + connection: { + remoteAddress: '88.8.88.8' + } } var hit = extract(req); hits(hit, function (err, count) { - // console.log('30 >>> ', err, count); t.ok(count >= 0, '✓ REQ ' +req.url +' was added at a index: ' + count); t.end(require('redis-connection')().end(true)); // shutdown redis con }) From 544aea68ada7b76f458c768c2dfdd4835824e63e Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sat, 26 Aug 2017 09:07:23 +0100 Subject: [PATCH 18/44] adds temporary debug console.log for heroku --- lib/db_filesystem.js | 9 +++++++-- server.js | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/db_filesystem.js b/lib/db_filesystem.js index 86145ed..1c8830c 100644 --- a/lib/db_filesystem.js +++ b/lib/db_filesystem.js @@ -4,6 +4,7 @@ var hash = require('./hash.js'); var mkdirp = require('mkdirp'); var assert = require('assert'); var EOL = require('os').EOL; // https://stackoverflow.com/a/14063413/1148249 +var exec = require('child_process').exec; // var DATA_DIR = path.resolve(__dirname, '../data/'); var agents_path = path.resolve(DATA_DIR, 'agents'); @@ -42,8 +43,12 @@ module.exports = function redis_save_hit (hit, callback) { assert(!err); callback(err, data.split(EOL).length - 1); - }); - }) + }); // if slow, optimise: https://stackoverflow.com/questions/12453057 + }); }); }); } + +exec('wc /path/to/file', function (error, results) { + console.log(results); +}); diff --git a/server.js b/server.js index f27e2d9..b23db8d 100644 --- a/server.js +++ b/server.js @@ -11,9 +11,10 @@ var HEAD = require('./lib/headers.json'); // stackoverflow.com/a/2068407/1148249 var app = http.createServer(function handler(req, res) { var url = req.url; - + console.log(url); if (url.match(/svg/)) { var hit = extract(req); + console.log('hit:', hit); hits(hit, function(err, count) { console.log(url, ' >> ', count); res.writeHead(200, HEAD); From 3699032abd2e75175559e963c830d13245850f84 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sat, 26 Aug 2017 09:10:16 +0100 Subject: [PATCH 19/44] console.log(process.env.REDISCLOUD_URL) to check if Redis is available #42 --- lib/hits.js | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/hits.js b/lib/hits.js index 96a8d30..aec0193 100644 --- a/lib/hits.js +++ b/lib/hits.js @@ -4,6 +4,7 @@ * @param {Function} callback - call this once redis responds */ module.exports = function add (hit, callback) { + console.log('process.env.REDISCLOUD_URL: ', process.env.REDISCLOUD_URL); if(process.env.REDISCLOUD_URL) { return require('./db_redis.js')(hit, callback); } From 50131ced3f64765849750b2d2161c5854f75c449 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sat, 26 Aug 2017 21:54:09 +0100 Subject: [PATCH 20/44] tidy up server.js and revive socket.io "live updates" https://github.com/dwyl/hits/issues/17 --- lib/client.js | 21 ++++++++++++++++ lib/hits.js | 1 - lib/index.html | 31 ++++++++++++++++------- lib/style.css | 0 server.js | 68 +++++++++++++++++++------------------------------- 5 files changed, 68 insertions(+), 53 deletions(-) delete mode 100644 lib/style.css diff --git a/lib/client.js b/lib/client.js index c77deb0..17124dd 100644 --- a/lib/client.js +++ b/lib/client.js @@ -6,4 +6,25 @@ $( document ).ready(function() { console.log(data); socket.emit('my other event', { my: 'data' }); }); + + socket.on('hit', function (data) { + console.log(data.hit); + var h = data.hit.split('|'); + var hit = [h[0], h[1]].join(' '); + $('#hits').prepend('
' + hit + '
') + }); }); + + +function format_date (timestamp) { + var date = new Date(timestamp); +// Hours part from the timestamp +var hours = date.getHours(); +// Minutes part from the timestamp +var minutes = "0" + date.getMinutes(); +// Seconds part from the timestamp +var seconds = "0" + date.getSeconds(); + +// Will display time in 10:30:23 format +var formattedTime = hours + ':' + minutes.substr(-2) + ':' + seconds.substr(-2); +} diff --git a/lib/hits.js b/lib/hits.js index aec0193..96a8d30 100644 --- a/lib/hits.js +++ b/lib/hits.js @@ -4,7 +4,6 @@ * @param {Function} callback - call this once redis responds */ module.exports = function add (hit, callback) { - console.log('process.env.REDISCLOUD_URL: ', process.env.REDISCLOUD_URL); if(process.env.REDISCLOUD_URL) { return require('./db_redis.js')(hit, callback); } diff --git a/lib/index.html b/lib/index.html index e00c1da..c5434dc 100644 --- a/lib/index.html +++ b/lib/index.html @@ -4,17 +4,30 @@ Stats - - + + + - -

Stats!

+ +

+ Hits! +

+

+ The easy way to know how many people are + viewing your GitHub projects! +

+ +
+
+ - - diff --git a/lib/style.css b/lib/style.css deleted file mode 100644 index e69de29..0000000 diff --git a/server.js b/server.js index b23db8d..3af16fb 100644 --- a/server.js +++ b/server.js @@ -1,72 +1,54 @@ var port = process.env.PORT || 8000; var http = require('http'); // plain http server (no fancy framework required) var fs = require('fs'); // so we can open the file -var png = fs.readFileSync('./lib/1x1px.png'); // "tracking pixel" var hits = require('./lib/hits'); // our storage interface -var favicon = 'http://i.imgur.com/zBEQq4w.png'; // dwyl favicon var make_svg = require('./lib/make_svg.js'); var extract = require('./lib/extract_request_data.js'); +var FAVICON = 'http://i.imgur.com/zBEQq4w.png'; // dwyl favicon var HEAD = require('./lib/headers.json'); // stackoverflow.com/a/2068407/1148249 -var app = http.createServer(function handler(req, res) { - +var app = require('http').createServer(handler) +var io = require('socket.io')(app); + +io.on('connection', function (socket) { + socket.emit('news', { hello: 'world' }); + socket.on('my other event', function (data) { + console.log(data); + }); +}); + +app.listen(port); + +function handler (req, res) { var url = req.url; - console.log(url); + var hit = extract(req); + console.log(hit); if (url.match(/svg/)) { - var hit = extract(req); - console.log('hit:', hit); hits(hit, function(err, count) { + // var + io.sockets.emit('hit', { 'hit': hit }); console.log(url, ' >> ', count); res.writeHead(200, HEAD); res.end(make_svg(count)); }); } - // else if (url.match(/png/)) { // see: https://github.com/dwyl/hits/issues/4 - // hits.add(r, function(err, count) { - // console.log(r.url, ' >> ', count); - // res.writeHead(200, Object.assign(HEAD, {"Content-Type": "image/png"})); - // res.end(png); - // }); - // } else if(url === '/favicon.ico') { - res.writeHead(301, { "Location": favicon }); + console.log('favicon.ico'); + res.writeHead(301, { "Location": FAVICON }); res.end(); } - else if(url === '/stats') { - fs.readFile('./lib/index.html', 'utf8', function (err, data) { - res.writeHead(200, {"Content-Type": "text/html"}); - res.end(data); - }); - } - else if(url === '/client.js') { + else if(url === '/client.js') { // these can be cached in "Prod" ... fs.readFile('./lib/client.js', 'utf8', function (err, data) { res.writeHead(200, {"Content-Type": "application/javascript"}); res.end(data); }); } - else if(url === '/style.css') { - fs.readFile('./lib/style.css', 'utf8', function (err, data) { - res.writeHead(200, {"Content-Type": "text/css"}); + else { // echo the record without saving it + fs.readFile('./lib/index.html', 'utf8', function (err, data) { + res.writeHead(200, {"Content-Type": "text/html"}); res.end(data); }); } - else { // echo the record without saving it - console.log(" - - - - - - - - - - record:", r); - res.writeHead(200, {"Content-Type": "application/json"}); - res.end(JSON.stringify(r, null, " ")); - } // pretty JSON in Browser see: http://stackoverflow.com/a/5523967/1148249 -}).listen(port); - -var io = require('socket.io')(app); - -io.on('connection', function (socket) { - console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - '); - console.log(socket.client.conn); - console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - '); - socket.emit('news', { msg: 'welcome to stats-ville!' }); - socket.on('my other event', function (data) { - console.log(data); - }); -}); +} console.log('Visit http://localhost:' + port); From b3a6c3cfb8f5c46c17ddbbb8803af78fcd92f7ff Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sat, 26 Aug 2017 22:20:35 +0100 Subject: [PATCH 21/44] update server.js to show "LAN IP Address" fixes #42 --- lib/index.html | 4 ++-- lib/lanip.js | 13 +++++++++++++ server.js | 7 ++++--- 3 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 lib/lanip.js diff --git a/lib/index.html b/lib/index.html index c5434dc..cc3d3af 100644 --- a/lib/index.html +++ b/lib/index.html @@ -9,9 +9,9 @@ -

+

Hits! -

+

The easy way to know how many people are viewing your GitHub projects! diff --git a/lib/lanip.js b/lib/lanip.js new file mode 100644 index 0000000..f455b0f --- /dev/null +++ b/lib/lanip.js @@ -0,0 +1,13 @@ +// http://stackoverflow.com/questions/10750303 +var os = require('os'); +var interfaces = os.networkInterfaces(); +var ip = []; +for (var k in interfaces) { + for (var k2 in interfaces[k]) { + var address = interfaces[k][k2]; + if (address.family === 'IPv4' && !address.internal) { + ip.push(address.address); + } + } +} +module.exports = ip[0]; \ No newline at end of file diff --git a/server.js b/server.js index 3af16fb..7468bfa 100644 --- a/server.js +++ b/server.js @@ -1,12 +1,13 @@ var port = process.env.PORT || 8000; -var http = require('http'); // plain http server (no fancy framework required) -var fs = require('fs'); // so we can open the file +var fs = require('fs'); // so we can open the HTML & JS file var hits = require('./lib/hits'); // our storage interface var make_svg = require('./lib/make_svg.js'); var extract = require('./lib/extract_request_data.js'); + var FAVICON = 'http://i.imgur.com/zBEQq4w.png'; // dwyl favicon var HEAD = require('./lib/headers.json'); // stackoverflow.com/a/2068407/1148249 +// plain node.js http server (no fancy framework required!) var app = require('http').createServer(handler) var io = require('socket.io')(app); @@ -51,4 +52,4 @@ function handler (req, res) { } } -console.log('Visit http://localhost:' + port); +console.log('Visit ' + require('./lib/lanip') + ':'+ port); From 8351c420770741bf77bca76cbc7b6dc7af333a2e Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sat, 26 Aug 2017 23:08:41 +0100 Subject: [PATCH 22/44] adds human-friendly format of hit for displaying in browser UI fixes https://github.com/dwyl/hits/issues/45 --- lib/format_hit.js | 22 ++++++++++++++++++++++ test/format_hit.test.js | 14 ++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 lib/format_hit.js create mode 100644 test/format_hit.test.js diff --git a/lib/format_hit.js b/lib/format_hit.js new file mode 100644 index 0000000..5332053 --- /dev/null +++ b/lib/format_hit.js @@ -0,0 +1,22 @@ +var hash = require('./hash.js'); +/** + * This file/module's only job is to format the Hit data for human-friendly UI + * @param {String} hit - the standard nodejs http request object. + * @param {Number} count - the count for the given url + * @returns {string} hit - human-friendly hit data for display in UI + */ +module.exports = function format_hit_for_ui (hit, count) { + var h = hit.split('|'); // See README.md#How secton for sample data + var url = h[1]; + var date = format_date_time_from_timestamp(h[0]) + // save unique hash of browser data to avoid duplication + var unique_browser_string = [ h[2], h[3], h[4] ].join('|'); + var hashed_agent = hash(unique_browser_string, 10); + return [date, url, count, hashed_agent].join(' '); +} + +function format_date_time_from_timestamp (timestamp) { + var date = new Date(timestamp * 1000).toJSON(); + var len = date.length; + return date.substring(0, len -5).replace('T', ' '); +} diff --git a/test/format_hit.test.js b/test/format_hit.test.js new file mode 100644 index 0000000..f9d1342 --- /dev/null +++ b/test/format_hit.test.js @@ -0,0 +1,14 @@ +var dir = __dirname.split('/')[__dirname.split('/').length-1]; +var file = dir + __filename.replace(__dirname, '') + " > "; +var test = require('tape'); +var format = require('../lib/format_hit.js') + +test("Create hash for url: 1234", function(t) { + var hit = '1503784599|/dwyl/hits|Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0|::1|EN-GB'; + var count = 42; + var formatted = format(hit, count); + var expected = '2017-08-26 21:56:39 /dwyl/hits 42 3wtuQ6JcHR' + t.equal(formatted, expected , + '✓ Hit: ' + hit + ' formatted as: ' + formatted); + t.end(); +}); From 51c46b45b84fd7663938f0afea1240a6ae625757 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sat, 26 Aug 2017 23:26:14 +0100 Subject: [PATCH 23/44] use human-friendly format in UI via socket.io #17 & #45 --- lib/client.js | 14 -------------- lib/extract_request_data.js | 3 ++- lib/format_hit.js | 1 + lib/lanip.js | 7 +++++-- server.js | 8 +++----- 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/lib/client.js b/lib/client.js index 17124dd..04f67f4 100644 --- a/lib/client.js +++ b/lib/client.js @@ -14,17 +14,3 @@ $( document ).ready(function() { $('#hits').prepend('
' + hit + '
') }); }); - - -function format_date (timestamp) { - var date = new Date(timestamp); -// Hours part from the timestamp -var hours = date.getHours(); -// Minutes part from the timestamp -var minutes = "0" + date.getMinutes(); -// Seconds part from the timestamp -var seconds = "0" + date.getSeconds(); - -// Will display time in 10:30:23 format -var formattedTime = hours + ':' + minutes.substr(-2) + ':' + seconds.substr(-2); -} diff --git a/lib/extract_request_data.js b/lib/extract_request_data.js index 747e8e4..da7a29e 100644 --- a/lib/extract_request_data.js +++ b/lib/extract_request_data.js @@ -24,5 +24,6 @@ module.exports = function extract (request) { } } - return [Date.now(), url, h['user-agent'], ip, lang].join('|'); + return [ Math.floor(Date.now()/1000), + url, h['user-agent'], ip, lang ].join('|'); } diff --git a/lib/format_hit.js b/lib/format_hit.js index 5332053..5b9855c 100644 --- a/lib/format_hit.js +++ b/lib/format_hit.js @@ -9,6 +9,7 @@ module.exports = function format_hit_for_ui (hit, count) { var h = hit.split('|'); // See README.md#How secton for sample data var url = h[1]; var date = format_date_time_from_timestamp(h[0]) + // save unique hash of browser data to avoid duplication var unique_browser_string = [ h[2], h[3], h[4] ].join('|'); var hashed_agent = hash(unique_browser_string, 10); diff --git a/lib/lanip.js b/lib/lanip.js index f455b0f..fc7c52e 100644 --- a/lib/lanip.js +++ b/lib/lanip.js @@ -1,4 +1,7 @@ -// http://stackoverflow.com/questions/10750303 +/** + * If you want to now the IP Address on the Local Network you're in luck! + * see/credit: http://stackoverflow.com/questions/10750303 + */ var os = require('os'); var interfaces = os.networkInterfaces(); var ip = []; @@ -10,4 +13,4 @@ for (var k in interfaces) { } } } -module.exports = ip[0]; \ No newline at end of file +module.exports = ip[0]; diff --git a/server.js b/server.js index 7468bfa..334f7c2 100644 --- a/server.js +++ b/server.js @@ -3,6 +3,7 @@ var fs = require('fs'); // so we can open the HTML & JS file var hits = require('./lib/hits'); // our storage interface var make_svg = require('./lib/make_svg.js'); var extract = require('./lib/extract_request_data.js'); +var format = require('./lib/format_hit.js') var FAVICON = 'http://i.imgur.com/zBEQq4w.png'; // dwyl favicon var HEAD = require('./lib/headers.json'); // stackoverflow.com/a/2068407/1148249 @@ -18,23 +19,19 @@ io.on('connection', function (socket) { }); }); -app.listen(port); - function handler (req, res) { var url = req.url; var hit = extract(req); console.log(hit); if (url.match(/svg/)) { hits(hit, function(err, count) { - // var - io.sockets.emit('hit', { 'hit': hit }); + io.sockets.emit('hit', { 'hit': format(hit, count) }); console.log(url, ' >> ', count); res.writeHead(200, HEAD); res.end(make_svg(count)); }); } else if(url === '/favicon.ico') { - console.log('favicon.ico'); res.writeHead(301, { "Location": FAVICON }); res.end(); } @@ -52,4 +49,5 @@ function handler (req, res) { } } +app.listen(port); console.log('Visit ' + require('./lib/lanip') + ':'+ port); From f03194baf55da20d8787f931f674e35aa3fc9037 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sat, 26 Aug 2017 23:30:15 +0100 Subject: [PATCH 24/44] tidy up client.js for #17 --- lib/client.js | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/client.js b/lib/client.js index 04f67f4..fb169f4 100644 --- a/lib/client.js +++ b/lib/client.js @@ -1,16 +1,12 @@ -// connect to websocket server $( document ).ready(function() { console.log('Ready!', window.location.host); var socket = io(window.location.host); socket.on('news', function (data) { - console.log(data); + // console.log(data); socket.emit('my other event', { my: 'data' }); }); socket.on('hit', function (data) { - console.log(data.hit); - var h = data.hit.split('|'); - var hit = [h[0], h[1]].join(' '); - $('#hits').prepend('
' + hit + '
') + $('#hits').prepend('
' + data.hit + '
') }); }); From f4f22304f4c823732fc9e403527521a8206d64dd Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sat, 26 Aug 2017 23:38:06 +0100 Subject: [PATCH 25/44] remove child process exec (not being used) from db_filesystem.js --- lib/db_filesystem.js | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/db_filesystem.js b/lib/db_filesystem.js index 1c8830c..c66c00a 100644 --- a/lib/db_filesystem.js +++ b/lib/db_filesystem.js @@ -4,7 +4,6 @@ var hash = require('./hash.js'); var mkdirp = require('mkdirp'); var assert = require('assert'); var EOL = require('os').EOL; // https://stackoverflow.com/a/14063413/1148249 -var exec = require('child_process').exec; // var DATA_DIR = path.resolve(__dirname, '../data/'); var agents_path = path.resolve(DATA_DIR, 'agents'); @@ -48,7 +47,3 @@ module.exports = function redis_save_hit (hit, callback) { }); }); } - -exec('wc /path/to/file', function (error, results) { - console.log(results); -}); From d4619798c1f7d56c52821320544297efddab282d Mon Sep 17 00:00:00 2001 From: nelsonic Date: Sun, 27 Aug 2017 08:50:07 +0100 Subject: [PATCH 26/44] update to latest version of JQuery (3.2.1) fixes https://github.com/dwyl/hits/issues/46 --- README.md | 5 +++-- lib/index.html | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f771779..3074c46 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,10 @@ What if there was a *simple+easy* way to see how many people have viewed your Gi ## Why? -We have a _few_ repos on GitHub ... +We have a _few_ projects on GitHub ... but _sadly_, we ~~have~~ _had_ no idea how many people -are looking at the repos unless they star/watch them; +are looking at the repos
+unless they star/watch them; GitHub does not share any stats with people using their site. We would like to *know* the popularity of each of our repos diff --git a/lib/index.html b/lib/index.html index cc3d3af..0dfe85f 100644 --- a/lib/index.html +++ b/lib/index.html @@ -27,7 +27,7 @@

font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; } - + From 97f595aab773c0f2f2c74c5367247698c52952d5 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 28 Aug 2017 09:21:57 +0100 Subject: [PATCH 27/44] [WiP] re-writing client.js to not use JQuery for https://github.com/dwyl/hits/issues/46 --- README.md | 33 ++++++++++++++++++++++----------- lib/client.js | 26 +++++++++++++++++++++----- lib/db_filesystem.js | 5 +++-- lib/index.html | 3 ++- server.js | 4 ++-- 5 files changed, 50 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 3074c46..b26004d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # hits -What if there was a *simple+easy* way to see how many people have viewed your GitHub Repository? +A _simple + easy_ way to see how many people have _viewed_ your GitHub Repository? [![Build Status](https://travis-ci.org/dwyl/hits.svg)](https://travis-ci.org/dwyl/hits) [![HitCount](https://hitt.herokuapp.com/nelsonic/hits.svg)](https://github.com/nelsonic/hits) @@ -11,14 +11,15 @@ What if there was a *simple+easy* way to see how many people have viewed your Gi ## Why? -We have a _few_ projects on GitHub ... -but _sadly_, we ~~have~~ _had_ no idea how many people -are looking at the repos
-unless they star/watch them; -GitHub does not share any stats with people using their site. +We have a _few_ projects on GitHub ...
+_Sadly_, we ~~have~~ _had_ no idea how many people +are _reading/using_ the projects +unless people star/watch them; +GitHub does not share any stats so we . -We would like to *know* the popularity of each of our repos -to know where we need to be investing our time. +We want to *know* the popularity of each of our repos +to know what people are finding _useful_ and help us +decide where we need to be investing our time. ## What? @@ -27,9 +28,10 @@ A simple way to add (*very basic*) analytics to your GitHub repos. There are already *many* "badges" that people use in their repos. See: [github.com/dwyl/**repo-badges**](https://github.com/dwyl/repo-badges)
But we haven't seen one that gives a "***hit counter***" -of the number of times a page has been viewed ...
-So we decided to create one. +of the number of times a GitHub page has been viewed ...
+So, in today's mini project we're going to _create_ a _basic **Web Counter**_. +https://en.wikipedia.org/wiki/Web_counter ### What Data to Capture/Store? @@ -55,6 +57,15 @@ because it's **insightful** to know what language people are using so that we can determine if we should be **translating**/"**localising**" our content._ +### Log Format + +For simplicity, we are using the "Common Log Format" (CLF). + +An example log entry: +``` +127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 +``` +for more detail see: https://en.wikipedia.org/wiki/Common_Log_Format Log entries are stored as a (_"pipe" delimited_) `String` which can be parsed and re-formatted into any other format: @@ -76,7 +87,7 @@ User Agent, IP Address and Language. Rather than storing this data multiple times, we _hash_ the data and store the hash as a lookup. -#### Hash Long Data +#### Hash Long Repeating (Identical) Data If we run the following Browser|IP|Language `String` ```sh diff --git a/lib/client.js b/lib/client.js index fb169f4..5f353e2 100644 --- a/lib/client.js +++ b/lib/client.js @@ -1,12 +1,28 @@ -$( document ).ready(function() { +var root = document.getElementById("hits"); + +// $( document ).ready(function() { console.log('Ready!', window.location.host); var socket = io(window.location.host); socket.on('news', function (data) { - // console.log(data); - socket.emit('my other event', { my: 'data' }); + console.log(data); + socket.emit('hello', { msg: 'Hi!' }); }); socket.on('hit', function (data) { - $('#hits').prepend('
' + data.hit + '
') + // $('#hits').prepend('
' + data.hit + '
') + var previous = root.childNodes[0]; + root.insertBefore(div(Date.now(), data.hit), previous); }); -}); +// }); + +// borrowed from: https://git.io/v536m +function div(divid, text) { + var div = document.createElement('div'); + div.id = divid; + div.className = divid; + if(text !== undefined) { // if text is passed in render it in a "Text Node" + var txt = document.createTextNode(text); + div.appendChild(txt); + } + return div; +} diff --git a/lib/db_filesystem.js b/lib/db_filesystem.js index c66c00a..7fbea06 100644 --- a/lib/db_filesystem.js +++ b/lib/db_filesystem.js @@ -29,13 +29,14 @@ module.exports = function redis_save_hit (hit, callback) { // create directory for the url mkdirp(dir, function (err) { + console.log(dir, err); assert(!err); - var filepath = path.join(dir, parts[parts.length - 1]) + var filepath = path.join(dir, parts[parts.length - 1], '.txt') // save hit data with hashed browser data: var entry = h[0] + '|' + hashed_agent + EOL; fs.appendFile(filepath, entry, function (err) { - assert(!err); + assert(!err); // maybe we need better error handling/reporting here ...? // count how many lines are in the file for the URL: fs.readFile(filepath, 'utf8', (err, data) => { diff --git a/lib/index.html b/lib/index.html index 0dfe85f..4512f4a 100644 --- a/lib/index.html +++ b/lib/index.html @@ -1,7 +1,7 @@ - Stats + Hits! @@ -18,6 +18,7 @@

+
Dummy Child Node for insertBefore to work
- From 0ffd463b530f4200d2b686f3981ba8115f4e4aa4 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 28 Aug 2017 17:48:12 +0100 Subject: [PATCH 34/44] one log file per endpoint fixes https://github.com/dwyl/hits/issues/48 --- lib/db_filesystem.js | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/db_filesystem.js b/lib/db_filesystem.js index b1c8468..3f691e5 100644 --- a/lib/db_filesystem.js +++ b/lib/db_filesystem.js @@ -26,31 +26,37 @@ if (!fs.existsSync(AGENTS_DIR)) { // ignored if already exists module.exports = function file_save_hit (hit, callback) { var h = hit.split('|'); // See README.md#How secton for sample data var url = h[1]; + var LOG_FILE = url.split('/').join('_').replace(':', '') + '.log'; var count = 1; // hit count starts at 1 // save unique hash of browser data to avoid duplication var unique_browser_string = [ h[2], h[3], h[4] ].join('|'); var hashed_agent = hash(unique_browser_string, 10); - // save unique data in file to reduce duplication in logs var agent_path = path.join(path.resolve(AGENTS_DIR, hashed_agent)) fs.writeFile(agent_path, unique_browser_string, function (err, data) { error_log(err, 'unable to save agent data: ' + agent_path); lineReader = rl.createInterface({ input: require('fs').createReadStream(LOG_FILE) + .on('error', function (err) { + console.log('Error!', err); + error_log(err, 'unable to save agent data: ' + LOG_FILE); + var entry = [h[0], h[1], hashed_agent, count].join('|') + EOL; + fs.appendFile(LOG_FILE, entry, function (err) { + error_log(err, 'unable to APPEND to file:' + LOG_FILE); + callback(err, count); + }); + }) }); var lines = []; lineReader.on('line', function (line) { - if (line.length > 1 && line.indexOf(url) > -1) { - lines.push(line); - } + lines.push(line); }); + lineReader.on('close', function() { - if (lines && lines.length > 0) { - var last_line = lines[lines.length - 1]; - var parts = last_line.split('|'); // parse and incremnt count: - count = parseInt(parts[parts.length - 1], 10) + 1; - } + var last_line = lines[lines.length - 1]; + var parts = last_line.split('|'); // parse and incremnt count: + count = parseInt(parts[parts.length - 1], 10) + 1; var entry = [h[0], h[1], hashed_agent, count].join('|') + EOL; fs.appendFile(LOG_FILE, entry, function (err) { From c60e01b0921167ac26039bbe811c2438005f8b9c Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 28 Aug 2017 17:52:34 +0100 Subject: [PATCH 35/44] wrap socket.io code in client.js in setTimeout function with minor delay for #46 --- lib/client.js | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/lib/client.js b/lib/client.js index 2dbb2e7..9dad12d 100644 --- a/lib/client.js +++ b/lib/client.js @@ -1,25 +1,27 @@ var root = document.getElementById("hits"); - console.log('Ready!', window.location.host); -var socket = io(window.location.host); -socket.on('news', function (data) { - console.log(data); - socket.emit('hello', { msg: 'Hi!' }); -}); -socket.on('hit', function (data) { - var previous = root.childNodes[0]; - root.insertBefore(div(Date.now(), data.hit), previous); -}); +setTimeout(function(){ + var socket = io(window.location.host); + socket.on('news', function (data) { + console.log(data); + socket.emit('hello', { msg: 'Hi!' }); + }); + + socket.on('hit', function (data) { + var previous = root.childNodes[0]; + root.insertBefore(div(Date.now(), data.hit), previous); + }); -// borrowed from: https://git.io/v536m -function div(divid, text) { - var div = document.createElement('div'); - div.id = divid; - div.className = divid; - if(text !== undefined) { // if text is passed in render it in a "Text Node" - var txt = document.createTextNode(text); - div.appendChild(txt); + // borrowed from: https://git.io/v536m + function div(divid, text) { + var div = document.createElement('div'); + div.id = divid; + div.className = divid; + if(text !== undefined) { // if text is passed in render it in a "Text Node" + var txt = document.createTextNode(text); + div.appendChild(txt); + } + return div; } - return div; -} +}, 500); From 606c10a836c1aa20f255582598ee5b5e7db62d67 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 28 Aug 2017 18:10:45 +0100 Subject: [PATCH 36/44] put log files in /logs folder (duh!) for https://github.com/dwyl/hits/issues/48 --- lib/db_filesystem.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/db_filesystem.js b/lib/db_filesystem.js index 3f691e5..b9b759d 100644 --- a/lib/db_filesystem.js +++ b/lib/db_filesystem.js @@ -26,7 +26,8 @@ if (!fs.existsSync(AGENTS_DIR)) { // ignored if already exists module.exports = function file_save_hit (hit, callback) { var h = hit.split('|'); // See README.md#How secton for sample data var url = h[1]; - var LOG_FILE = url.split('/').join('_').replace(':', '') + '.log'; + var LOG_FILE = path.join(LOG_DIR, + url.split('/').join('_').replace(':', '') + '.log'); var count = 1; // hit count starts at 1 // save unique hash of browser data to avoid duplication var unique_browser_string = [ h[2], h[3], h[4] ].join('|'); @@ -52,7 +53,6 @@ module.exports = function file_save_hit (hit, callback) { lines.push(line); }); - lineReader.on('close', function() { var last_line = lines[lines.length - 1]; var parts = last_line.split('|'); // parse and incremnt count: From f025dda011b7d7f875e10e4e6536b04dcb050a76 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 28 Aug 2017 18:49:00 +0100 Subject: [PATCH 37/44] update badges in readme to use "flat style" :wink: --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index f492ba8..5dae300 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,21 @@ # hits -A _simple + easy_ way to see how many people have _viewed_ your GitHub Repository? +A _simple + easy_ way to see how many people have _viewed_ your GitHub Repository. -[![Build Status](https://travis-ci.org/dwyl/hits.svg)](https://travis-ci.org/dwyl/hits) -[![HitCount](https://hitt.herokuapp.com/nelsonic/hits.svg)](https://github.com/nelsonic/hits) -[![codecov.io](http://codecov.io/github/dwyl/hits/coverage.svg?branch=master)](http://codecov.io/github/dwyl/hits?branch=master) -[![Dependency Status](https://david-dm.org/dwyl/hits.svg)](https://david-dm.org/dwyl/hits) -[![devDependency Status](https://david-dm.org/dwyl/hits/dev-status.svg)](https://david-dm.org/dwyl/hits#info=devDependencies) +[![Build Status](https://img.shields.io/travis/dwyl/hits.svg?style=flat-square)](https://travis-ci.org/dwyl/hits) +[![HitCount](http://hits.dwyl.io/dwyl/hits.svg)](https://github.com/dwyl/hits) +[![codecov.io](https://img.shields.io/codecov/c/github/dwyl/hits/master.svg?style=flat-square)](http://codecov.io/github/dwyl/hits?branch=master) +[![Dependency Status](https://img.shields.io/david/dwyl/hits.svg?style=flat-square)](https://david-dm.org/dwyl/hits) +[![devDependency Status](https://img.shields.io/david/dev/dwyl/hits.svg?style=flat-square)](https://david-dm.org/dwyl/hits#info=devDependencies) ## Why? We have a _few_ projects on GitHub ...
_Sadly_, we ~~have~~ _had_ no idea how many people -are _reading/using_ the projects -unless people star/watch them; -GitHub does not share any stats so we . +are _reading/using_ the projects because GitHub only shares "[traffic](https://github.com/blog/1672-introducing-github-traffic-analytics)" stats +for the [_past 14 days_](https://github.com/dwyl/hits/issues/49) and not in "real time". +(_unless people star/watch the repo_) We want to *know* the popularity of each of our repos to know what people are finding _useful_ and help us From a6a4b1dd00bd0c049c612a3b65925e6296e03f1e Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 28 Aug 2017 22:56:26 +0100 Subject: [PATCH 38/44] remove mkdirp from list of dependencies as no longer used! see: #14 --- package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/package.json b/package.json index 4831213..53edf74 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,6 @@ }, "homepage": "https://github.com/dwyl/hits#readme", "dependencies": { - "mkdirp": "^0.5.1", "redis-connection": "^5.4.0", "socket.io": "^2.0.3" }, From a49b0c2681999a2ae38e67142120ea29e8f23a1d Mon Sep 17 00:00:00 2001 From: nelsonic Date: Tue, 29 Aug 2017 22:04:02 +0100 Subject: [PATCH 39/44] adds hit badge to index.html so we can count how many people visit the *service* fixes #22 --- README.md | 39 +++++++++++++++++++++++---------------- lib/index.html | 7 +++++-- package.json | 3 ++- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 5dae300..8888cee 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # hits -A _simple + easy_ way to see how many people have _viewed_ your GitHub Repository. +A _simple & easy_ way to see how many people have _viewed_ your GitHub Repository. [![Build Status](https://img.shields.io/travis/dwyl/hits.svg?style=flat-square)](https://travis-ci.org/dwyl/hits) [![HitCount](http://hits.dwyl.io/dwyl/hits.svg)](https://github.com/dwyl/hits) @@ -14,10 +14,11 @@ A _simple + easy_ way to see how many people have _viewed_ your GitHub Repositor We have a _few_ projects on GitHub ...
_Sadly_, we ~~have~~ _had_ no idea how many people are _reading/using_ the projects because GitHub only shares "[traffic](https://github.com/blog/1672-introducing-github-traffic-analytics)" stats -for the [_past 14 days_](https://github.com/dwyl/hits/issues/49) and not in "real time". -(_unless people star/watch the repo_) +for the [_past 14 days_](https://github.com/dwyl/hits/issues/49) and **not** in "***real time***". +(_unless people star/watch the repo_) Also, _manually_ checking who has viewed a +project is _exceptionally_ tedious when you have more than a handful of projects. -We want to *know* the popularity of each of our repos +We want to *know* the popularity of _each_ of our repos to know what people are finding _useful_ and help us decide where we need to be investing our time. @@ -37,20 +38,20 @@ https://en.wikipedia.org/wiki/Web_counter The _first_ question we asked ourselves was: What is the ***minimum possible*** amount of (_useful/unique_) -**data** we can store ***per visit*** (_to one of our projects_)? +**info** we can store ***per visit*** (_to one of our projects_)? 1. **date + time** (_timestamp_) ***when*** the person visited the site/page.
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/now -2. **url** being visited. +2. **url** being visited. i.e. which project was viewed. 3. **user-agent** the browser/device (_or "crawler"_) visiting the site/page https://en.wikipedia.org/wiki/User_agent -4. IP Address of the client. +4. IP Address of the client. (_for checking uniqueness_) -5. **language** of the person's web browser. +5. **Language** of the person's web browser. _Note: While not "essential", we added **Browser Language** as the **5th** piece of data (when it is set/sent by the browser/device) because it's **insightful** to know what language people are using @@ -75,7 +76,7 @@ Real example: The data makes sense when viewed as a table: -| IP Address of Client | User Identifier | User ID | Date+Imte of Request | URL of Request" | HTTP Status Code | Size of Response | +| IP Address of Client | User Identifier | User ID | Date+Imte of Request | Request "Verb" and URL of Request | HTTP Status Code | Size of Response | | -------------|:-----------|:--|:------------:|:--------:|:--|--|--| | 84.91.136.21 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) | 007 | [05/Aug/2017:16:50:51 -0000] | "GET github.com/dwyl/phase-two HTTP/1.0" | 200 | 42247 | @@ -84,13 +85,13 @@ as it contains a lot of _duplicate_ and some _useless_ data. We can do better. -### Alternative Log Format (ALF) +### Alternative Log Format ("ALF") From the CLF we can remove: + **IP Address**, **User Identifier** and **User ID** can be condensed into a single hash (_see below_). -+ **GET** - the word is implied by the service we are running (_we only accept GETs_) -+ **Response size** is irrelevant and will be the same for most requests. ++ "**GET**"" - the word is implied by the service we are running (_we only accept GET requests_) ++ **Response size** is _irrelevant_ and will be the same for most requests. | Timestamp | URL | User Agent | IP Address | Language | Hit Count | | ------------- |:------------|:------------|:------------:|:--------:| @@ -109,9 +110,9 @@ which can be parsed and re-formatted into any other format: 1436570536950|github.com/dwyl/phase-two|Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)|88.88.88.88|EN-US|42 ``` -### Reducing Storage Costs +### Reducing Storage (_Costs_) -If a person views _multiple_ pages, three pieces of data are duplicated: +If a person views _multiple_ pages, _three_ pieces of data are duplicated: User Agent, IP Address and Language. Rather than storing this data multiple times, we _hash_ the data and store the hash as a lookup. @@ -124,14 +125,20 @@ If we run the following `Browser|IP|Language` `String`: ``` through a **SHA** hash function we get: `8HKg3NB5Cf` (_always_)1. -Sample code: +_Sample_ code: ```js var hash = require('./lib/hash.js'); var user_agent_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5)|88.88.88.88|EN-US'; var agent_hash = hash(user_agent_string, 10); // 8HKg3NB5Cf ``` -1Note: SHA hash is always longer than +1Note: SHA hash is _always_ 40 characters, +but we _truncate_ it because 10 alphanumeric characters (_selected from a set of 26 letters + 10 digits_) +means there are 3610 = [3,656,158,440,062,976](http://www.wolframalpha.com/input/?i=36%5E10) +(_three and a half [**Quadrillion**](http://www.wolframalpha.com/input/?i=3,656,158,440,062,976+in+english)_) +possible strings which we consider "_enough_" entropy. +(_if you disagree, tell us why in an + [issue](https://github.com/dwyl/hits/issues)_!) #### Hit Data With Hash diff --git a/lib/index.html b/lib/index.html index 6dff8bd..4deba31 100644 --- a/lib/index.html +++ b/lib/index.html @@ -9,8 +9,11 @@ -

- Hits! +

+ Hits! + + HitCount +

The easy way to know how many people are diff --git a/package.json b/package.json index 53edf74..f3515fc 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "hits", "hit counter" ], - "author": "this guy", + "author": "some random person on the internet...", "license": "GPL-2.0", "bugs": { "url": "https://github.com/dwyl/hits/issues" @@ -34,6 +34,7 @@ "devDependencies": { "decache": "^4.1.0", "istanbul": "^0.4.4", + "mkdirp": "^0.5.1", "nodemon": "^1.11.0", "pre-commit": "^1.2.2", "rimraf": "^2.6.1", From 5c2b20fed175991e1c3656c26d7640492420d4e2 Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 4 Sep 2017 11:52:32 +0100 Subject: [PATCH 40/44] adds UI for creating badges fixes https://github.com/dwyl/hits/issues/51 --- .gitignore | 2 +- lib/client.js | 23 +++++++++++++++++++++++ lib/index.html | 50 +++++++++++++++++++++++++++++++++++++++----------- server.js | 12 ++++++------ 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index a611eb4..2d1a24e 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,7 @@ build/Release # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git node_modules -config.env +*.env dump.rdb npm-debug.log data/ diff --git a/lib/client.js b/lib/client.js index 9dad12d..65d7aa6 100644 --- a/lib/client.js +++ b/lib/client.js @@ -24,4 +24,27 @@ setTimeout(function(){ } return div; } + display_badge_markdown(); // render initial markdown template }, 500); + +// Markdown Template +var mt = '[![HitCount](http://hits.dwyl.io/{user}/{repo}.svg)](http://hits.dwyl.io/{user}/{repo})'; + +function generate_markdown () { + var user = document.getElementById("username").value || '{username}'; + var repo = document.getElementById("repo").value || '{project}'; + // console.log('user: ', user, 'repo: ', repo); + return mt.replace(/{user}/g, user).replace(/{repo}/g, repo); +} + +function display_badge_markdown() { + var md = generate_markdown() + var pre = document.getElementById("badge").innerHTML = md; +} + +var get = document.getElementsByTagName('input'); + for (i = 0; i < get.length; i++) { + get[i].addEventListener('keyup', display_badge_markdown, false); + get[i].addEventListener('keyup', display_badge_markdown, false); + + } diff --git a/lib/index.html b/lib/index.html index 4deba31..d04476d 100644 --- a/lib/index.html +++ b/lib/index.html @@ -9,27 +9,55 @@ -

+

Hits! - - HitCount + + Hit Count

-

+

The easy way to know how many people are viewing your GitHub projects!

- + +

How?

+ + +

+ Input your GitHub Username + ( or org name): + +

+ +

+ Input the GitHub Project/Repository name: + +

+ +

Your Badge Markdown:

+
+      
+    
+ +

+ Copy the markdown snippet and Paste it into your README.md file + to start tracking the view count on your GitHub project! +

+ +

Recently Viewed Projects (tracked by Hits)

Dummy Child Node for insertBefore to work
diff --git a/server.js b/server.js index d5863c0..9fb1823 100644 --- a/server.js +++ b/server.js @@ -23,16 +23,16 @@ function handler (req, res) { var url = req.url; var hit = extract(req); console.log(hit); - if (url.match(/svg/)) { + if (url.match(/svg/)) { // only return a badge if SVG requested hits(hit, function(err, count) { - io.sockets.emit('hit', { 'hit': format(hit, count) }); - console.log(url, ' >> ', count); - res.writeHead(200, HEAD); - res.end(make_svg(count)); + io.sockets.emit('hit', { 'hit': format(hit, count) }); // broadcast + console.log(url, ' >> ', count); // log in dev + res.writeHead(200, HEAD); // status code and SVG headers + res.end(make_svg(count)); // serve the SVG with count }); } else if(url === '/favicon.ico') { - res.writeHead(301, { "Location": FAVICON }); + res.writeHead(301, { "Location": FAVICON }); // redirect to @dwyl Favicon res.end(); } else if(url === '/client.js') { // these can be cached in "Prod" ... From ceddc22e69ec1a68651cfb3b736c4f3ac683885d Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 4 Sep 2017 14:30:25 +0100 Subject: [PATCH 41/44] ensure UI works when JS disabled for #51 --- lib/client.js | 1 + lib/index.html | 33 +++++++++++++++++---------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/lib/client.js b/lib/client.js index 65d7aa6..b441347 100644 --- a/lib/client.js +++ b/lib/client.js @@ -24,6 +24,7 @@ setTimeout(function(){ } return div; } + document.getElementById("how").classList.remove('dn'); // show form if JS available (progressive enhancement) display_badge_markdown(); // render initial markdown template }, 500); diff --git a/lib/index.html b/lib/index.html index d04476d..d55f9d8 100644 --- a/lib/index.html +++ b/lib/index.html @@ -21,22 +21,23 @@

How?

- - -

- Input your GitHub Username - ( or org name): - -

- -

- Input the GitHub Project/Repository name: - -

- -

Your Badge Markdown:

-
+    
+ +

+ Input your GitHub Username + ( or org name): + +

+

+ Input the GitHub Project/Repository name: + +

+ +

Your Badge Markdown:

+
+
+      [![HitCount](http://hits.dwyl.io/{username}/{repo}.svg)](http://hits.dwyl.io/{username}/{repo})
     

@@ -45,7 +46,7 @@

Your Badge Markdown:

Recently Viewed Projects (tracked by Hits)

-
+
Dummy Child Node for insertBefore to work
+ From 3a8a2f8718772ae220dc1ba4b06fad3a13fb72cd Mon Sep 17 00:00:00 2001 From: nelsonic Date: Mon, 4 Sep 2017 15:06:48 +0100 Subject: [PATCH 44/44] reduce width of inputs in UI #51 --- lib/index.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/index.html b/lib/index.html index bbf2139..6563e33 100644 --- a/lib/index.html +++ b/lib/index.html @@ -30,8 +30,8 @@

How?

Input your GitHub Username ( or org name): - - + @@ -40,8 +40,8 @@

How?

Input the GitHub Project/Repository name: - - +