diff --git a/setup/www/ansible-vars.yaml b/setup/www/ansible-vars.yaml index 00a8872ff..0a5e1edb7 100644 --- a/setup/www/ansible-vars.yaml +++ b/setup/www/ansible-vars.yaml @@ -19,4 +19,5 @@ packages: - git - python-pip - gnuplot + - golang-go benchmark_host: 50.97.245.4 \ No newline at end of file diff --git a/setup/www/resources/scripts/sync-benchmarking.sh b/setup/www/resources/scripts/sync-benchmarking.sh index 37f0b9e8a..96bbe8e98 100755 --- a/setup/www/resources/scripts/sync-benchmarking.sh +++ b/setup/www/resources/scripts/sync-benchmarking.sh @@ -3,7 +3,8 @@ pidfile=/var/run/nodejs/sync-benchmarking.pid cmd=" rsync -aqz --delete benchmark:charts/ /home/www/benchmarking/charts/ && - rsync -aqz --delete benchmark:coverage-out/out/ /home/www/coverage/ + rsync -aqz --delete benchmark:coverage-out/out/ /home/www/coverage/ && + scp benchmark:benchmarking/www/index.html /home/www/benchmarking/ " if [ -a "$pidfile" -a -r "$pidfile" ]; then diff --git a/setup/www/tasks/base.yaml b/setup/www/tasks/base.yaml index d82856513..a87645d2e 100644 --- a/setup/www/tasks/base.yaml +++ b/setup/www/tasks/base.yaml @@ -1,5 +1,5 @@ - name: Base | Add the NodeSource Node.js repo - command: "bash -c 'curl -sL https://deb.nodesource.com/setup_6.x | bash -'" + command: "bash -c 'curl -sL https://deb.nodesource.com/setup_8.x | bash -'" tags: base - name: Base | APT Update diff --git a/setup/www/tasks/tools.yaml b/setup/www/tasks/tools.yaml index 7a4291365..fbe874703 100644 --- a/setup/www/tasks/tools.yaml +++ b/setup/www/tasks/tools.yaml @@ -24,16 +24,59 @@ mode: 0644 tags: nightly-builder +- name: log-collect | Make /home/logs/log-collect/ + file: + path: /home/logs/log-collect/ + state: directory + mode: 0750 + owner: root + tags: log-collect + +- name: log-collect | Copy log-collect.sh script + template: + src: ./resources/scripts/log-collect.sh.j2 + dest: /home/logs/log-collect/log-collect.sh + mode: 0700 + owner: root + group: root + tags: log-collect + +- name: log-collect | Install Cloudflare logshare-cli + command: "go get github.com/cloudflare/logshare/..." + tags: log-collect + +- name: log-collect | Install log-collect Node dependencies + command: "npm install" + args: + chdir: /home/logs/log-collect/ + tags: log-collect + +- name: log-collect | Copy remaining log-collect components + copy: + src: './tools/log-collect/{{ item }}' + dest: '/home/logs/log-collect/{{ item }}' + mode: 0755 + owner: root + group: root + with_items: + - json-log-filter.js + - json-to-nginx.js + - nginxify.sh + - package.json + - sanity-check-logs.sh + tags: log-collect + #TODO: the branches are hardwired here, they should be in vars somewhere - name: tools | Add periodic tasks to crontab lineinfile: dest: /etc/crontab line: "{{ item }}" with_items: - - '0 20 * * * dist /usr/bin/nodejs-nightly-builder --type nightly --ref heads/v6.x --config /etc/nightly-builder.json' - - '0 19 * * * dist /usr/bin/nodejs-nightly-builder --type nightly --ref heads/v7.x --config /etc/nightly-builder.json' + - '0 20 * * * dist /usr/bin/nodejs-nightly-builder --type v8-canary --ref heads/canary --config /etc/nightly-builder-v8-canary.json' + - '0 19 * * * dist /usr/bin/nodejs-nightly-builder --type nightly --ref heads/v9.x-staging --config /etc/nightly-builder.json' - '0 18 * * * dist /usr/bin/nodejs-nightly-builder --type nightly --ref heads/master --config /etc/nightly-builder.json' - - '0 11 * * * dist /usr/bin/nodejs-nightly-builder --type nightly --ref heads/v8.x --config /etc/nightly-builder-chakracore.json' + - '0 11 * * * dist /usr/bin/nodejs-nightly-builder --type nightly --ref heads/v9.x --config /etc/nightly-builder-chakracore.json' - '0 10 * * * dist /usr/bin/nodejs-nightly-builder --type nightly --ref heads/master --config /etc/nightly-builder-chakracore.json' - - '1 0 * * * root npm update -g nodejs-latest-linker nodejs-dist-indexer nodejs-nightly-builder >& /dev/null' + - '55 * * * * root /home/logs/log-collect/log-collect.sh' + - '30 1,13 * * * root /home/logs/log-collect/sanity-check-logs.sh' tags: tools diff --git a/setup/www/tools/log-collect/json-log-filter.js b/setup/www/tools/log-collect/json-log-filter.js new file mode 100755 index 000000000..b0b3f37c2 --- /dev/null +++ b/setup/www/tools/log-collect/json-log-filter.js @@ -0,0 +1,58 @@ +#!/usr/bin/env node + + +const through2 = require('through2') + , split2 = require('split2') + + +if (process.argv.length < 3) { + console.error('cat log | json-log-filter.js [[, [, ...]]]') + process.exit(1) +} + + +const matches = process.argv.slice(2).map(function argvToGlob (s) { + return new RegExp(s) +}) + + +const jsonify = through2.obj(function jsonifyLine (chunk, enc, callback) { + let data = null + try { + data = JSON.parse(chunk) + } catch (e) {} + callback(null, data) +}) + + +const filterResponse = through2.obj(function filterResponseLine (chunk, enc, callback) { + if (((chunk.edgeResponse && chunk.edgeResponse.status == 200) || + (chunk.originResponse && chunk.originResponse.status == 200)) && + chunk.edgeResponse.bytes > 10) { + return callback(null, chunk) + } + callback() +}) + + +const filterUri = through2.obj(function filterUriLine (chunk, enc, callback) { + for (let i = 0; i < matches.length; i++) + if (matches[i].test(chunk.clientRequest.uri)) + return callback(null, chunk) + callback() +}) + + +const outWrite = through2.obj(function outWriteLine (chunk, enc, callback) { + callback(null, `${JSON.stringify(chunk)}\n`) +}) + + +process.stdin + .pipe(split2()) + .pipe(jsonify) + .pipe(filterResponse) + .pipe(filterUri) + .pipe(outWrite) + .pipe(process.stdout) + diff --git a/setup/www/tools/log-collect/json-to-nginx.js b/setup/www/tools/log-collect/json-to-nginx.js new file mode 100755 index 000000000..95f8445aa --- /dev/null +++ b/setup/www/tools/log-collect/json-to-nginx.js @@ -0,0 +1,46 @@ +#!/usr/bin/env node + +// log_format nodejs '$remote_addr - $remote_user [$time_local] ' +// '"$request" $status $body_bytes_sent ' +// '"$http_referer" "$http_user_agent" "$http_x_forwarded_for"'; +//0.0.0.0 - - [14/Apr/2017:09:25:15 +0000] "GET /en/docs/es6/ HTTP/1.1" 200 4285 "https://www.google.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Safari/537.36" "0.0.0.0" + +//{"brandId":100,"flags":2,"hosterId":0,"ownerId":2598267,"rayId":"34f5210c4b2f0b56","securityLevel":"eoff","timestamp":1492156801968000000,"unstable":null,"zoneId":15832072,"zoneName":"nodejs.org","zonePlan":"business","cache":{"bckType":"byc","cacheExternalIp":"0.0.0.0","cacheExternalPort":26380,"cacheFileKey":null,"cacheInternalIp":"0.0.0.0","cacheServerName":"22c24","cacheStatus":"unknown","cacheTokens":0,"endTimestamp":1492156803756000000,"startTimestamp":1492156801971000000},"cacheRequest":{"headers":null,"keepaliveStatus":"noReuseOpenFailed"},"cacheResponse":{"bodyBytes":0,"bytes":476907,"contentType":"application/gzip","retriedStatus":0,"status":200},"client":{"asNum":16509,"country":"jp","deviceType":"desktop","ip":"0.0.0.0","ipClass":"noRecord","srcPort":42727,"sslCipher":"ECDHE-RSA-AES128-GCM-SHA256","sslFlags":1,"sslProtocol":"TLSv1.2"},"clientRequest":{"accept":"","body":null,"bodyBytes":0,"bytes":1056,"cookies":null,"flags":0,"headers":[],"httpHost":"nodejs.org","httpMethod":"GET","httpProtocol":"HTTP/1.1","referer":"","uri":"/download/release/v6.9.2/node-v6.9.2-headers.tar.gz","userAgent":"node-gyp v3.4.0 (node v6.9.2)"},"edge":{"bbResult":"0","cacheResponseTime":2775000000,"colo":22,"enabledFlags":8,"endTimestamp":1492156804744000000,"flServerIp":"0.0.0.0","flServerName":"22f20","flServerPort":443,"pathingOp":"wl","pathingSrc":"macro","pathingStatus":"nr","startTimestamp":1492156801968000000,"usedFlags":0,"rateLimit":{"ruleId":0,"mitigationId":null,"sourceId":"","processedRules":null},"dnsResponse":{"rcode":0,"error":"ok","cached":true,"duration":0,"errorMsg":"","overrideError":false}},"edgeRequest":{"bodyBytes":0,"bytes":1857,"headers":null,"httpHost":"nodejs.org","httpMethod":"GET","keepaliveStatus":"reuseAccepted","uri":"/download/release/v6.9.2/node-v6.9.2-headers.tar.gz"},"edgeResponse":{"bodyBytes":476907,"bytes":477355,"compressionRatio":0,"contentType":"application/gzip","headers":null,"setCookies":null,"status":200},"origin":{"asNum":46652,"ip":"0.0.0.0","port":443,"responseTime":0,"sslCipher":"UNK","sslProtocol":"unknown"},"originResponse":{"bodyBytes":0,"bytes":0,"flags":0,"headers":[],"httpExpires":0,"httpLastModified":1481052127000000000,"status":200}} + +const through2 = require('through2') + , split2 = require('split2') + , strftime = require('strftime').timezone(0) + , timefmt = '%e/%b/%Y:%H:%M:%S +0000' + + +const jsonify = through2.obj(function jsonifyLine (chunk, enc, callback) { + let data = null + try { + data = JSON.parse(chunk) + } catch (e) {} + callback(null, data) +}) + + +const transform = through2.obj(function transformLine (chunk, enc, callback) { + let remote_addr = chunk.client.ip + , country = chunk.client.country + , remote_user = '-' + , time_local = strftime(timefmt, new Date(chunk.timestamp / 1000000)) + , request = `${chunk.clientRequest.httpMethod} ${chunk.clientRequest.uri} ${chunk.clientRequest.httpProtocol}` + , status = chunk.edgeResponse.status + , body_bytes_sent = chunk.edgeResponse.bodyBytes + , http_referer = chunk.clientRequest.referer.replace(/"/g, '\\"') + , http_user_agent = chunk.clientRequest.userAgent.replace(/"/g, '\\"') + , http_x_forwarded_for = remote_addr + + callback(null, `${remote_addr} ${country} ${remote_user} [${time_local}] "${request}" ${status} ${body_bytes_sent} "${http_referer}" "${http_user_agent}" "${http_x_forwarded_for}"\n`) +}) + + +process.stdin + .pipe(split2()) + .pipe(jsonify) + .pipe(transform) + .pipe(process.stdout) + diff --git a/setup/www/tools/log-collect/log-collect.sh.j2 b/setup/www/tools/log-collect/log-collect.sh.j2 new file mode 100755 index 000000000..06608f866 --- /dev/null +++ b/setup/www/tools/log-collect/log-collect.sh.j2 @@ -0,0 +1,65 @@ +#!/bin/bash + +set -e + +export TZ=UTC + +__dirname="$(CDPATH= cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +zone="nodejs.org" +apikey="{{ cdn_api_key }}" +apiemail="{{ cdn_api_email }}" +logregex='^.*\.(tar\.xz|tar\.gz|exe|msi|pkg|7z|zip|lib|pdb|exp)(\?.*)?$' +logdir="/home/logs/$zone" +logshare="/usr/local/go/bin/logshare-cli" +xz="xz -z -9 -e" + +fmt="%b %e %H:00:00 UTC %Y" +earliest=$(date --date="72 hours ago" +"%s") +end=$(date --date="+1 hour" +"$fmt") +overwritecount=0 + +mkdir -p $logdir + +while true; do + # file format is -.log.xz and there's one per hour + # we start from the current hour, knowing that it's not yet complete, and work back + # until 72 hours from now, which is how long CloudFlare stores logs for + start=$(date --date="${end}-1 hour" +"$fmt") + startts=$(date --date="$start" +"%s") + endts=$(date --date="$end" +"%s") + if (( $startts < $earliest )); then break; fi + + filename="${startts}-${endts}.log.xz" + tmpfile="/tmp/${filename}" + outfile="${logdir}/${filename}" + # we allow overwriting of the most recent 2 files, assuming that the last file wasn't + # complete and there's a possibility the second last one wasn't complete + # it's also possible that older files were not properly fetched or have been removed + # by the sanity checker so we make sure we redo any that are missing + if (( $overwritecount < 2 )) || [ ! -f $outfile ]; then + echo "Fetching ${filename}..." + # Filter the entries from CloudFlare so we only keep the download asset entries + $logshare --count=-1 --api-key="$apikey" --api-email="$apiemail" --zone-name="$zone" --start-time="$startts" --end-time="$endts" \ + | ${__dirname}/json-log-filter.js "${logregex}" \ + | $xz > $tmpfile + if (( $? != 0 )); then + echo "Failed to fetch logs" + exit 1 + fi + # check file size as a sanity check, it should be in the range of 3-4Mb, it's possible + # to get a bad set from CloudFlare so we won't bother keeping files that are sufficiently + # suspicious + # bad reads should be rectified in the next run since this is run every hour + fsize=$(stat -c '%s' $tmpfile) + if (( $fsize < 500000 )); then + echo "Unsuccessful fetch of $filename" + rm "$tmpfile" + else + mv $tmpfile $outfile + overwritecount=$(expr $overwritecount + 1) + fi + fi + + end="$start" +done diff --git a/setup/www/tools/log-collect/nginxify.sh b/setup/www/tools/log-collect/nginxify.sh new file mode 100755 index 000000000..b7e660c2c --- /dev/null +++ b/setup/www/tools/log-collect/nginxify.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -e + +export TZ=UTC + +__dirname="$(CDPATH= cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +zone="nodejs.org" +logdir="/home/logs/$zone" +nginxlogdir="/home/logs/$zone/nginx-format" + +fmt="%b %e %H:00:00 UTC %Y" +earliest=$(date --date="72 hours ago" +"%s") +end=$(date --date="+1 hour" +"$fmt") + +while true; do + start=$(date --date="${end}-1 hour" +"$fmt") + startts=$(date --date="$start" +"%s") + endts=$(date --date="$end" +"%s") + if (( $startts < $earliest )); then break; fi + + filename="${startts}-${endts}.log.xz" + nginxfilename="${startts}-${endts}.log" + infile="${logdir}/${filename}" + nginxfile="${nginxlogdir}/${nginxfilename}" + + end="$start" +done diff --git a/setup/www/tools/log-collect/package.json b/setup/www/tools/log-collect/package.json new file mode 100755 index 000000000..215e02480 --- /dev/null +++ b/setup/www/tools/log-collect/package.json @@ -0,0 +1,14 @@ +{ + "name": "json-log-filter", + "version": "1.0.0", + "description": "", + "main": "json-log-filter.js", + "author": "", + "license": "Apache-2.0", + "dependencies": { + "glob": "^7.1.1", + "split2": "^2.1.1", + "strftime": "^0.10.0", + "through2": "^2.0.3" + } +} diff --git a/setup/www/tools/log-collect/sanity-check-logs.sh b/setup/www/tools/log-collect/sanity-check-logs.sh new file mode 100755 index 000000000..de9234761 --- /dev/null +++ b/setup/www/tools/log-collect/sanity-check-logs.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -e + +fmt="%b %e %H:00:00 UTC %Y" +earliest=$(date --date="25 hours ago" +"%s") +end=$(date +"$fmt") +zone=nodejs.org +logdir="/home/logs/$zone" +latestminute=57 + +# Grab the last entry of the file, parse it and extract the timestamp, +# then check that the timestamp is reasonably late—on a busy site it should +# be in the 59th minute because we expect many requests per minute of the +# hour. If the time of the last entry seems too early then delete the file and +# expect the log fetcher to grab it again and hopefully get it right. +checkfile() { + local file=$1 + local lastjson="$(xzcat $file | tail -1)" + local lastminute=$(node -p 'new Date(Math.round(JSON.parse(process.argv[1]).timestamp / 1000000)).getMinutes()' "$lastjson") + echo $file = $lastminute + if (( $lastminute < $latestminute )); then + echo "Bad file $file, last minute = $lastminute" + rm $file + fi +} + +while true; do + # file format is -.log.xz and there's one per hour + # we start from the last hour and work back 25 hours (1 hour overlap for safety) + start=$(date --date="${end}-1 hour" +"$fmt") + startts=$(date --date="$start" +"%s") + endts=$(date --date="$end" +"%s") + if (( $startts < $earliest )); then break; fi + + filename="${startts}-${endts}.log.xz" + logfile="${logdir}/${filename}" + if [ -f $logfile ]; then + checkfile $logfile + fi + + end="$start" +done