this slide was intentionally left blank

~/src/node $ git log --grep=mraleph
commit 91f1b250ecb4fb8151cd17423dd4460652d0ce97
Author: Ryan Dahl 
Date:   Fri Jul 8 17:08:52 2011 -0700

    mraleph emit hack

~/src/node $
					
~/src/node $ git log --grep=mraleph
commit 91f1b250ecb4fb8151cd17423dd4460652d0ce97
Author: Ryan Dahl 
Date:   Fri Jul 8 17:08:52 2011 -0700

    @mraleph emit hack

~/src/node $
					
~/src/node $ git log --grep=mraleph
commit 91f1b250ecb4fb8151cd17423dd4460652d0ce97
Author: Ryan Dahl 
Date:   Fri Jul 8 17:08:52 2011 -0700

    @mraleph emit hack

~/src/node $
					

Java VM

V8

Dart VM

performance

DOUBT
EVERYTHING

"~~ the fastest!"

NOPE

JITs are clever

to measure you need to outsmart

Disclaimer: On slides I will show optimizations as source-to-source transformations. In reality compilers operate on more sophisticated representations.


function benchmark() {
  var i = '1561565', j;
  var start = Date.now();
  for (var n = 0; n < 1e6; n++) {
    j = ~~i;
  }
  return (Date.now() - start);
}
					

function benchmark() {
  var j;
  var start = Date.now();
  for (var n = 0; n < 1e6; n++) {
    j = ~~'1561565';
  }
  return (Date.now() - start);
}
          

function benchmark() {
  var j;
  var start = Date.now();
  for (var n = 0; n < 1e6; n++) {
    j = ~-1561566;
  }
  return (Date.now() - start);
}
          

function benchmark() {
  var j;
  var start = Date.now();
  for (var n = 0; n < 1e6; n++) {
    j = 1561565;
  }
  return (Date.now() - start);
}
          

constant propagation

"hey, I can trick the compiler!"


function benchmark() {
  var i = Date.now().toString(), j;
  //  ^ not a constant any more
  var start = Date.now();
  for (var n = 0; n < 1e6; n++) {
    j = ~~i;
  }
  return (Date.now() - start);
}
          

"Wrong answer, McFly!"


function benchmark() {
  var i = Date.now().toString(), j;
  var start = Date.now();
  for (var n = 0; n < 1e6; n++) {
    j = ~~i;
    //  ^^^ loop invariant
  }
  return (Date.now() - start);
}
          

function benchmark() {
  var i = Date.now().toString(), j;
  var start = Date.now();
  var j0 = ~~i;
  //  ^^^^^^^^ hoisted from the loop
  for (var n = 0; n < 1e6; n++) {
    j = j0;
  }
  return (Date.now() - start);
}
          

loop invariant code motion


function benchmark() {
  var i = Date.now().toString(), j;
  var start = Date.now();
  var j0 = ~~i;
  for (var n = 0; n < 1e6; n++) {
    j = j0;
  }
  return (Date.now() - start);
}
          

function benchmark() {
  var i = Date.now().toString(), j;
  var start = Date.now();
  var j0 = ~~i;
  for (var n = 0; n < 1e6; n++) {
    j = j0;
  }
  return (Date.now() - start);
}
          

function benchmark() {
  var i = Date.now().toString(), j;
  var start = Date.now();
  var j0 = ~~i;
  for (var n = 0; n < 1e6; n++) {
    j = j0;
  }
  return (Date.now() - start);
}
          

function benchmark() {
  var i = Date.now().toString(), j;
  var start = Date.now();
  var j0 = ~~i;
  for (var n = 0; n < 1e6; n++) {
    j = j0;
  }
  return (Date.now() - start);
}
          

function benchmark() {
  var start = Date.now();
  for (var n = 0; n < 1e6; n++) {
    /* sound of silence */
  }
  return (Date.now() - start);
}
          

function benchmark() {
  var start = Date.now();
  /*
   * sound of silence
   */
  return (Date.now() - start);
}
          

dead code elimination

[most engines won't go as far as shown with it,
but it is possible]

optimizer eats µbenchmarks for breakfast

chew proof µbenchmarks?

do not write them

  1. avoid constants
  2. avoid loop invariants
  3. avoid dead code
  4. verify results


function benchmark() {
  var i = Date.now().toString(),
      i1 = Date.now().toString(),
      t, j;
  var start = Date.now();
  for (var n = 0; n < 1e6; n++) {
    t = i; i = i1; i1 = t;
    j = ~~i;
  }
  if (i != j || i1 != j) throw "whoa?";
  return (Date.now() - start);
}
          

this is as good as it gets

but still might not be enough


for (var n = 0; n < 1e6; n++) {
  t = i; i = i1; i1 = t;
  j = ~~i;
}
          

for (var n = 0; n < (1e6 / 2); n++) {
  t = i; i = i1; i1 = t;
  j = ~~i;
  t = i; i = i1; i1 = t;
  j = ~~i;              
}
          

for (var n = 0; n < (1e6 / 2); n++) {
  j = ~~i1;
  t = i1;
  j = ~~i;
}
          

for (var n = 0; n < (1e6 / 2); n++) {
  j = ~~i1; /* dead */
  t = i1;   /* dead */
  j = ~~i;  /* invariant */
}
          

loop unrolling

V8 will not do it right now

but I want to induce paranoia ☺

"good speedup for function calls!"

"it's a kind of magic!"

NÃO


function benchmark() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }

  var start = new Date;
  for (var n = 0; n < 1e6; n++) {
    fn('a', 'b', 'c', 'd');
  }
  return (new Date - start);
}
          

function benchmark() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }

  var start = new Date;
  for (var n = 0; n < 1e6; n++) {
    fn('a', 'b', 'c', 'd');
  }
  return (new Date - start);
}
          

function benchmark() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }

  var start = new Date;
  for (var n = 0; n < 1e6; n++) {
    fn('a', 'b', 'c', 'd');
//  ^^ why, yes, sir, I know callee.
  }
  return (new Date - start);
}
          

function benchmark() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }

  var start = new Date;
  for (var n = 0; n < 1e6; n++) {
    fn('a', 'b', 'c', 'd');
  }
  return (new Date - start);
}
          

function benchmark() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }

  var start = new Date;
  for (var n = 0; n < 1e6; n++) {
    fn('a', 'b', 'c', 'd');
  }
  return (new Date - start);
}
          

function benchmark() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }

  var start = new Date;
  for (var n = 0; n < 1e6; n++) {
    'a' + 'b' + 'c' + 'd';
  }
  return (new Date - start);
}
          

function benchmark() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }

  var start = new Date;
  for (var n = 0; n < 1e6; n++) {
    /* aaaaand it is eliminated */
  }
  return (new Date - start);
}
          

inlining

Looks simple!

Why the difference?


load("lodash.js");
load("benchmark.js");

Benchmark.prototype.setup = function() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }
};

var suite = new Benchmark.Suite;
suite
  .add('invoke', function () { fn('a', 'b', 'c', 'd'); })
  .on('cycle', function (event) { print(event.target); })
  .run();
          
$ v8/3.19.18.23/out/ia32.release/d8 test.js
invoke x 24,417,452 ops/sec ±1.13%

$ v8/3.22.4/out/ia32.release/d8 test.js
invoke x 774,162,347 ops/sec ±0.27%
          
$ v8/3.19.18.23/out/ia32.release/d8    \
      --print-opt-code --code-comments \
      --trace-hydrogen --trace-phase=Z \
      --trace-deopt                    \
      test.js > code.asm
          

+ IRHydra

http://irhydra.googlecode.com

deployed version

It was optimized
correctly

It was optimized almost correctly

OSR entry tries to convert non-number to integer


var start = new Date;
//  ^^^^^ compiler wrongly speculates
//        that it is an integer
for (var n = 0; n < 1e6; n++) {
  /* nothing */
}
return (new Date - start);
          

It was a bug.

There are more traps here.


function benchmark() {
  function fn(a, b, c, d) {
    return a + b + c + d;
  }

  var start = new Date;
  for (var n = 0; n < 1e6; n++) {
    fn('a', 'b', 'c', 'd');
  }
  return (new Date - start);
}

benchmark(); // => 2 ms
benchmark(); // => 37 ms

          

Exercise
for the reader.

Related to how V8 collects type feedback and guards inlined functions

Don't micro

"Given a string of space separated words [a-z]* find most common one."


function count(str) {
  var words = str.split(' ');
  var counts = {};
  var maxWord, maxCount = 0;
  var word, count;

  for (var i = 0; i < words.length; i++) {
    word = words[i];
    /* count word here */
  }

  return maxWord;
}
          

count = counts[word];
if (typeof count === "undefined") {
  counts[word] = count = 1;
} else {
  counts[word] = count + 1;
}

if (count > maxCount) {
  maxCount = count;
  maxWord = word;
}
          
# on 1.5Mb / 255k words string
$ d8 count.js
naive x 13.51 ops/sec ±2.87%

In C++ we would just update the cell.


var cell = counts[word];
if (typeof cell === "undefined") {
  count = 1;
  counts[word] = { value: count };
} else {
  cell.value = count = cell.value + 1;
}
          
# on 1.5Mb / 255k words string
$ d8 count.js
naive x 13.51 ops/sec ±2.87%
cells x 20.42 ops/sec ±2.51%

Rethink the foundations!


function count(str) {
  var words = str.split(' ');
  var counts = {};
  var maxWord, maxCount = 0;
  var word, count;

  for (var i = 0; i < words.length; i++) {
    word = words[i];
    /* count word here */
  }

  return maxWord;
}
          

stream it char by char into a trie

stream it char by char into a trie


function count() {
  // Idea 1: compute substring at the very end.
  var maxCount = 0, maxWordStart, maxWordEnd, lastSpace = 0;

  // Idea 2: instead of hash-table use simplistic trie.
  var root = { val: 0 };
  var curr = root;

  for (var i = 0; i < str.length; i++) {
    var ch = str.charCodeAt(i);
    if (ch === 32) { // if space increase the count;
    } else { // if not space advance in the trie;
    }
  }

  return str.substring(maxWordStart + 1, maxWordEnd);
}
          

// handling non-space: find or create target node.
var next = curr[ch];
if (typeof next === 'undefined') {
  next = curr[ch] = { val: 0 };
}
curr = next;
          

// handling space: increase node's counter.
var count = curr.val = curr.val + 1;
if (count > maxCount) {
  maxCount     = count;
  maxWordStart = lastSpace;
  maxWordEnd   = i;
}
curr = root; // restart from the root
lastSpace = i;
          
# on 1.5Mb / 255k words string
$ d8 count.js
naive x 13.51 ops/sec ±2.87%
cells x 20.42 ops/sec ±2.51%
trie  x 61.46 ops/sec ±0.48%

algorithms first

µbenchmarks last

THANK YOU

... one last thing

never assume that some language feature has to be slow

talk to JS engine people instead and file bugs

SEE YOU IN BUGTRACKER