$d8 concat.js Inside x 198,893,290 ops/sec ±1.08% Outside x 674,248,118 ops/sec ±1.08% Fastest is Outside  ### quick! move all your strings out of functions ### µbench 101 ### want to know the cost of OP  function benchmark() { var start = Date.now(); /* OP */ return (Date.now() - start); }  ### what if OP is faster than resolution of our clock?  function benchmark(N) { var start = Date.now(); for (var i = 0; i < N; i++) { /* OP */ } return (Date.now() - start) / N; }  # $C$ # $C \times N$ # $\frac{C \times N}{N}$ # $\frac{C \times \cancel{N}}{\cancel{N}}$ ### naive math #### vs ### smart VMs # $C \times N$ # $C_u N_u + C_o N_o$ # ${\sum C_{i} N_i}$ #### ${\sum C_{i} N_i} +$ #### $\frac{1}{\sqrt{2\pi}}\int C(\xi) e ^ {-\frac{\xi^2}{2}} d\xi$ ### math is hard #### lets look at the code ### IRHydra2 #### all samples from this talk are available as demos within the tool ### why Outside is fast(er)? ### hmm, hard to see the forest behind the trees ### lets activate an "interesting IR" mode • "uninteresting" IR instructions are hidden • source is spliced into IR • loops are marked with red stripes on the left #### By the way: that was core loop of our benchmark and it was EMPTY only iteration variable increment and loop itself remained ### so Outside is fast because it is measuring empty loop ## how did this happen? ### know basic optimizations # inlining  function inner() { return 'a' + 'b' } for (...) { inner(); }   function inner() { return 'a' + 'b' } for (...) { inner(); }   function inner() { return 'a' + 'b' } for (...) { 'a' + 'b'; }  # constant folding  for (...) { 'a' + 'b'; }   for (...) { 'ab'; }  ### loop invariant code motion  var str1, str2; for (...) { str1 + str2; }   var str1, str2; var invS = str1 + str2; for (...) { // use invS here }  # dead code elimination  var str1, str2; var invS = str1 + str2; // nobody uses invS?   var str1, str2; // FARVEL!  # there are more! ### but what happened to Inside? ### V8 failed to constant fold #### actually a bug ## lets be fixing a bug  --- src/hydrogen.cc (revision 21348) +++ src/hydrogen.cc (working copy) @@ -9639,6 +9639,14 @@ return left; } + if (FLAG_fold_constants && + left->IsConstant() && HConstant::cast(left)->HasStringValue() && + right->IsConstant() && HConstant::cast(right)->HasStringValue()) { + return AddUncasted<HStringAdd>( + left, right, allocation_mode.GetPretenureMode(), + STRING_ADD_CHECK_NONE, allocation_mode.feedback_site()); + } + // Register the dependent code with the allocation site. if (!allocation_mode.feedback_site().is_null()) { ASSERT(!graph()->info()->IsStub());  $ d8 concat.js

Inside
x 758,530,478 ops/sec ±1.82%

Outside
x 674,248,118 ops/sec ±1.14%

Fastest is Inside


## «prototype chains are slow»


var obj =
Object.create(
Object.create(
Object.create(
Object.create(
Object.create({prop: 10})))));


var obj =
Object.create(
Object.create(
Object.create(
Object.create(
Object.create({prop: 10})))));
// LISP tribute ^^^^^


function doManyLookups() {
var counter = 0;
for(var i = 0; i < 1000; i++)
for(var j = 0; j < 1000; j++)
for(var k = 0; k < 1000; k++)
counter += obj.prop;
print('In total: ' + counter);
}


function lookupAndCache() {
var counter = 0;
var value = obj.prop;
for(var i = 0; i < 1000; i++)
for(var j = 0; j < 1000; j++)
for(var k = 0; k < 1000; k++)
counter += value;
print('In total: ' + counter);
}


// State of art benchmark driver.
function measure(f) {
var start = Date.now();
f();
var end = Date.now();
print(f.name + ' took ' +
(end - start) + ' ms.');
}


measure(doManyLookups);
measure(lookupAndCache);

$node prototype.js In total: 10000000000 doManyLookups took 8243 ms. In total: 10000000000 lookupAndCache took 1058 ms. ## CONFIRMED ### lets make it harder  - Object.create({prop: 10}))))); + Object.create({ get prop () { return 10 }})))));  $ node prototype.js
In total: 10000000000
doManyLookups took 1082 ms.
In total: 10000000000
lookupAndCache took 1061 ms.

## getter was completely inlined

all prototype traversal checks were hoisted

# why?

#### older V8 did not inline loads of data properties defined on prototypes

$d8 prototype.js In total: 10000000000 doManyLookups took 1294 ms. In total: 10000000000 lookupAndCache took 1189 ms. $ d8 prototype.js
In total: 10000000000
doManyLookups took 1294 ms.
In total: 10000000000
lookupAndCache took 1189 ms.

### what if we run benchmark twice?


measure(doManyLookups);
measure(doManyLookups);
measure(lookupAndCache);
measure(lookupAndCache);

$d8 prototype.js | grep took doManyLookups took 1301 ms. doManyLookups took 3408 ms. lookupAndCache took 1204 ms. lookupAndCache took 3406 ms.  ### what happened here? turns out: 'In total: ' + counter type-feedback leaks upwards into the loop and causes excessive boxing of the counter variable ### solution: hide + from representation inference  - print('In total: ' + counter); + print('In total: ' + counter.toString());  $ d8 prototype.js | grep took
doManyLookups took 1298 ms.
doManyLookups took 1119 ms.
lookupAndCache took 1188 ms.
lookupAndCache took 982 ms.


## and now for the desert!

### is method call faster than function call?


function mk(word) {
var len = word.length;
if (len > 255) return undefined;
var i = len >> 2;
return String.fromCharCode(
(word.charCodeAt(    0) & 0x03) << 14 |
(word.charCodeAt(    i) & 0x03) << 12 |
(word.charCodeAt(  i+i) & 0x03) << 10 |
(word.charCodeAt(i+i+i) & 0x03) <<  8 |
len
);
}


Benchmark.prototype.setup = function() {
function mk(word) {
/* ... */
}

var MK = function() { };
MK.prototype.mk = mk;
var mker = new MK;
};

suite
var key = mk('www.wired.com');
key = mk('scorecardresearch.com');
})
var key = mker.mk('www.wired.com');
key = mker.mk('scorecardresearch.com');
})

$d8 method-vs-function.js Function x 4,149,776 ops/sec ±0.62% Method x 682,273,122 ops/sec ±0.72% Fastest is Method  ### so method call IS faster than function call?  --- a/method-function.js +++ b/method-function.js @@ -2,6 +2,9 @@ load("../benchmark.js"); Benchmark.prototype.setup = function() { + "Speed" + "your" + "JS" + "with" + + "this" + "one" + "weird" + "trick"; + function mk(word) { var len = word.length; if (len > 255) return undefined;  $ d8 method-vs-function.js

Function
x 695,708,197 ops/sec ±0.38%

Method
x 692,496,013 ops/sec ±0.29%

Fastest is Function,Method


## Function was never optimized!

heuristics that decide when to optimize are based (among other things) on the amount of initialized inline caches

function call does not go through an IC, but method call does

Method had enough initialized ICs to trigger optimizations, but Function didn't!

Until fake + ICs were added in the setup section