class Producer {
byte[] src;
public void j4(Consumer dst) {
for (int i = 0; i < src.length; i++) {
dst.consume(src[i]);
}
}
public void j5(Consumer dst) {
for (byte b : src) {
dst.consume(b);
}
}
}
j4
is fasterj5
is fasterconsume
j4
is fasterj5
is fasterconsume
static void measure(String name,
Runnable r) {
long start = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) r.run();
long end = System.currentTimeMillis();
System.out.println(
name + " took " +
(end - start) + " ms.");
}
class Consumer {
void consume(byte b) { }
}
public static void main(String[] args) {
final Producer p =
new Producer(new byte[1024 * 1024 * 10]);
final Consumer c = new Consumer();
measure("j4", () -> p.j4(c));
measure("j5", () -> p.j5(c));
}
class Consumer {
void consume(byte b) { }
}
public static void main(String[] args) {
final Producer p =
new Producer(new byte[1024 * 1024 * 10]);
final Consumer c = new Consumer();
measure("j4", () -> p.j4(c)); // ⇒ 7 ms
measure("j5", () -> p.j5(c)); // ⇒ 6 ms
}
measure("j4", () -> p.j4(c)); // ⇒ 7 ms
measure("j5", () -> p.j5(c)); // ⇒ 6 ms
measure("j4", () -> p.j4(c)); // ⇒ 0 ms (!)
measure("j5", () -> p.j5(c)); // ⇒ 0 ms (!)
class Consumer {
void consume(byte b) {
/* doing nothing */
}
}
measure("j4", () -> p.j4(c)); // ⇒ 7 ms
measure("j5", () -> p.j5(c)); // ⇒ 6 ms
measure("j4", () -> p.j4(c)); // ⇒ 0 ms (!)
measure("j5", () -> p.j5(c)); // ⇒ 0 ms (!)
class Consumer {
byte trap = 1;
void consume(byte b) {
if (b == trap) /* do something */
throw new RuntimeException("impossible!");
}
}
measure("j4", () -> p.j4(c));
measure("j5", () -> p.j5(c));
class Consumer {
byte trap = 1;
void consume(byte b) {
if (b == trap) /* do something */
throw new RuntimeException("impossible!");
}
}
measure("j4", () -> p.j4(c)); // ⇒ 4248 ms
measure("j5", () -> p.j5(c)); // ⇒ 4233 ms
class Consumer {
volatile byte trap = 1;
void consume(byte b) {
if (b == trap) /* do something heavier! */
throw new RuntimeException("impossible!");
}
}
measure("j4", () -> p.j4(c));
measure("j5", () -> p.j5(c));
class Consumer {
volatile byte trap = 1;
void consume(byte b) {
if (b == trap) /* do something heavier! */
throw new RuntimeException("impossible!");
}
}
measure("j4", () -> p.j4(c)); // ⇒ 13130 ms
measure("j5", () -> p.j5(c)); // ⇒ 4738 ms
or read http://psy-lob-saw.blogspot.dk/2014/08/the-volatile-read-suprise.html
float dot(uint8_t* va, float* vb, size_t n) {
float sum = 0.0;
for (size_t i = 0; i < n; i++) {
sum += va[i] * vb[i];
}
return sum;
}
int main(int argc, char* argv[]) {
/*
* call dot() on 2^11 long vectors 2^21 times
*/
}
$ gcc -m32 -O2 ./test.cc -o ./t32 && time ./t32 ./t32 4.23s user 0.00s system 100% cpu 4.231 total $ gcc -m64 -O2 ./test.cc -o ./t64 && time ./t64 ./t64 12.16s user 0.00s system 100% cpu 12.155 total
float dot(uint8_t* va, float* vb, size_t n) {
// ...
sum += I2F(va[i]) * vb[i];
// ...
}
float __attribute__((always_inline)) I2F(int i) {
float f;
__asm__("xorps %0, %0;"
"cvtsi2ss %1, %0"
: "=X"(f)
: "r"(i));
return f;
}
// split
testString.split("e").length - 1;
// while
var count = 0;
while (testString.length) {
if (testString.charAt(0) === "e") {
count += 1;
}
testString = testString.slice(1);
}
// split (IT'S OVER 9000K OP/S ON FF!)
testString.split("e").length - 1;
// while
var count = 0;
while (testString.length) {
if (testString.charAt(0) === "e") {
count += 1;
}
testString = testString.slice(1); // WAT?
}
OP
?
function benchmark() {
var start = Date.now();
/* OP */
return (Date.now() - start) / N;
}
OP
faster than clock?
function benchmark(N) {
var start = Date.now();
for (var i = 0; i < N; i++) {
/* OP */
}
return (Date.now() - start) / N;
}
while (testString.length) {
if (testString.charAt(0) === "e") {
count += 1;
}
testString = testString.slice(1);
}
function benchmark() {
var testString = "...";
var start = Date.now();
for (var i = 0; i < N; i++) {
var count = 0;
while (testString.length) {
if (testString.charAt(0) === "e") {
count += 1;
}
testString = testString.slice(1);
}
}
return (Date.now() - start) / N;
}
function benchmark() {
var testString = "...";
var start = Date.now();
for (var i = 0; i < N; i++) {
var count = 0;
while (testString.length) {
if (testString.charAt(0) === "e") {
count += 1;
}
testString = testString.slice(1);
}
}
return (Date.now() - start) / N;
}
testString === ""
OP
should take the same time~~
for the win!»
function benchmark() {
var i = '1561565', j;
var start = Date.now();
for (var n = 0; n < N; n++) {
j = ~~i;
}
return (Date.now() - start) / N;
}
function benchmark() {
var j;
var start = Date.now();
for (var n = 0; n < N; n++) {
j = ~~'1561565';
}
return (Date.now() - start) / N;
}
function benchmark() {
var j;
var start = Date.now();
for (var n = 0; n < N; n++) {
j = ~-1561566;
}
return (Date.now() - start) / N;
}
function benchmark() {
var j;
var start = Date.now();
for (var n = 0; n < N; n++) {
j = 1561565;
}
return (Date.now() - start) / N;
}
function benchmark() {
var i = Date.now().toString(), j;
// ^ not a constant any more
var start = Date.now();
for (var n = 0; n < N; n++) {
j = ~~i;
}
return (Date.now() - start) / N;
}
function benchmark() {
var i = Date.now().toString(), j;
var start = Date.now();
for (var n = 0; n < N; n++) {
j = ~~i;
// ^^^ loop invariant
}
return (Date.now() - start) / N;
}
function benchmark() {
var i = Date.now().toString(), j;
var start = Date.now();
var j0 = ~~i;
// ^^^^^^^^ hoisted from the loop
for (var n = 0; n < N; n++) {
j = j0;
}
return (Date.now() - start) / N;
}
function benchmark() {
var i = Date.now().toString(), j;
var start = Date.now();
var j0 = ~~i;
for (var n = 0; n < N; n++) {
j = j0;
}
return (Date.now() - start) / N;
}
function benchmark() {
var i = Date.now().toString(), j;
var start = Date.now();
var j0 = ~~i;
for (var n = 0; n < N; n++) {
j = j0;
}
return (Date.now() - start) / N;
}
function benchmark() {
var i = Date.now().toString(), j;
var start = Date.now();
var j0 = ~~i;
for (var n = 0; n < N; n++) {
j = j0;
}
return (Date.now() - start) / N;
}
function benchmark() {
var i = Date.now().toString(), j;
var start = Date.now();
var j0 = ~~i;
for (var n = 0; n < N; n++) {
j = j0;
}
return (Date.now() - start) / N;
}
function benchmark() {
var start = Date.now();
for (var n = 0; n < N; n++) {
/* sound of silence */
}
return (Date.now() - start) / N;
}
function benchmark() {
var start = Date.now();
/*
* sound of silence
*/
return (Date.now() - start) / N;
}
// Remember this one?
// split (IT'S OVER 9000K OP/S ON FF!)
testString.split("e").length - 1;
// it's full of dead code.
function benchmark() {
var i = Date.now().toString(),
i1 = Date.now().toString(), t;
var j;
var start = Date.now();
for (var n = 0; n < N; n++) {
t = i; i = i1; i1 = t;
j = ~~i;
}
if (i != j || i1 != j) throw "whoa?";
return (Date.now() - start) / N;
}
for (var n = 0; n < N; n++) {
t = i; i = i1; i1 = t;
j = ~~i;
}
for (var n = 0; n < (N / 2); n++) {
t = i; i = i1; i1 = t;
j = ~~i;
t = i; i = i1; i1 = t;
j = ~~i;
}
if ((N % 2) === 1) {
t = i; i = i1; i1 = t;
j = ~~i;
}
for (var n = 0; n < (N / 2); n++) {
j = ~~i1;
t = i1;
j = ~~i;
}
for (var n = 0; n < (N / 2); n++) {
j = ~~i1; /* dead */
t = i1; /* dead */
j = ~~i; /* invariant */
}
function benchmark() {
function fn(a, b, c, d) {
return a + b + c + d;
}
var start = new Date;
for (var n = 0; n < N; n++) {
fn('a', 'b', 'c', 'd');
}
return (new Date - start);
}
function benchmark() {
function fn(a, b, c, d) {
return a + b + c + d;
}
var start = new Date;
for (var n = 0; n < N; n++) {
fn('a', 'b', 'c', 'd');
}
return (new Date - start);
}
function benchmark() {
function fn(a, b, c, d) {
return a + b + c + d;
}
var start = new Date;
for (var n = 0; n < N; n++) {
fn('a', 'b', 'c', 'd');
// ^^ why, yes, sir, I know callee.
}
return (new Date - start);
}
function benchmark() {
function fn(a, b, c, d) {
return a + b + c + d;
}
var start = new Date;
for (var n = 0; n < N; n++) {
fn('a', 'b', 'c', 'd');
// ^^ it's right there
}
return (new Date - start);
}
function benchmark() {
function fn(a, b, c, d) {
return a + b + c + d;
}
var start = new Date;
for (var n = 0; n < N; n++) {
fn('a', 'b', 'c', 'd');
// ^^ I take it there and put it here.
}
return (new Date - start);
}
function benchmark() {
function fn(a, b, c, d) {
return a + b + c + d;
}
var start = new Date;
for (var n = 0; n < N; n++) {
'a' + 'b' + 'c' + 'd';
// ^^ DONE
}
return (new Date - start);
}
function benchmark() {
function fn(a, b, c, d) {
return a + b + c + d;
}
var start = new Date;
for (var n = 0; n < N; n++) {
/* aaaaand it is eliminated as DEAD code */
}
return (new Date - start);
}
load("lodash.js");
load("benchmark.js");
Benchmark.prototype.setup = function() {
function fn(a, b, c, d) {
return a + b + c + d;
}
};
var suite = new Benchmark.Suite;
suite
.add('invoke', function () { fn('a', 'b', 'c', 'd'); })
.on('cycle', function (event) { print(event.target); })
.run();
$ v8/3.19.18.23/out/ia32.release/d8 test.js invoke x 24,417,452 ops/sec ±1.13% $ v8/3.22.4/out/ia32.release/d8 test.js invoke x 774,162,347 ops/sec ±0.27%
$ v8/3.19.18.23/out/ia32.release/d8 \ --print-opt-code --code-comments \ --trace-hydrogen --trace-phase=Z \ --trace-deopt \ test.js > code.asm
var start = new Date;
// ^^^^^ compiler wrongly speculates
// that it is an integer
for (var n = 0; n < N; n++) {
/* nothing */
}
return (new Date - start);
function benchmark() {
function fn(a, b, c, d) {
return a + b + c + d;
}
var start = new Date;
for (var n = 0; n < N; n++) {
fn('a', 'b', 'c', 'd');
}
return (new Date - start);
}
benchmark(); // => 2 ms
benchmark(); // => 37 ms
$ d8 concat.js
Inside | x 198,893,290 ops/sec ±1.08% |
Outside | x 674,248,118 ops/sec ±1.08% |
Outside
is fast(er)?only iteration variable increment and loop itself remained
Outside
is fast
--- src/hydrogen.cc (revision 21348)
+++ src/hydrogen.cc (working copy)
@@ -9639,6 +9639,14 @@
return left;
}
+ if (FLAG_fold_constants &&
+ left->IsConstant() && HConstant::cast(left)->HasStringValue() &&
+ right->IsConstant() && HConstant::cast(right)->HasStringValue()) {
+ return AddUncasted<HStringAdd>(
+ left, right, allocation_mode.GetPretenureMode(),
+ STRING_ADD_CHECK_NONE, allocation_mode.feedback_site());
+ }
+
// Register the dependent code with the allocation site.
if (!allocation_mode.feedback_site().is_null()) {
ASSERT(!graph()->info()->IsStub());
$ d8 concat.js
Inside | x 758,530,478 ops/sec ±1.82% |
Outside | x 674,248,118 ops/sec ±1.14% |
var obj =
Object.create(
Object.create(
Object.create(
Object.create(
Object.create({prop: 10})))));
var obj =
Object.create(
Object.create(
Object.create(
Object.create(
Object.create({prop: 10})))));
// LISP tribute ^^^^^
function doManyLookups() {
var counter = 0;
for(var i = 0; i < 1000; i++)
for(var j = 0; j < 1000; j++)
for(var k = 0; k < 1000; k++)
counter += obj.prop;
print('In total: ' + counter);
}
function lookupAndCache() {
var counter = 0;
var value = obj.prop;
for(var i = 0; i < 1000; i++)
for(var j = 0; j < 1000; j++)
for(var k = 0; k < 1000; k++)
counter += value;
print('In total: ' + counter);
}
// State of art benchmark driver.
function measure(f) {
var start = Date.now();
f();
var end = Date.now();
print(f.name + ' took ' +
(end - start) + ' ms.');
}
measure(doManyLookups);
measure(lookupAndCache);
$ node prototype.js In total: 10000000000 doManyLookups took 8243 ms. In total: 10000000000 lookupAndCache took 1058 ms.
- Object.create({prop: 10})))));
+ Object.create({ get prop () { return 10 }})))));
$ node prototype.js In total: 10000000000 doManyLookups took 1082 ms. In total: 10000000000 lookupAndCache took 1061 ms.
all prototype traversal checks were hoisted
$ d8 prototype.js In total: 10000000000 doManyLookups took 1294 ms. In total: 10000000000 lookupAndCache took 1189 ms.
$ d8 prototype.js In total: 10000000000 doManyLookups took 1294 ms. In total: 10000000000 lookupAndCache took 1189 ms.
measure(doManyLookups);
measure(doManyLookups);
measure(lookupAndCache);
measure(lookupAndCache);
$ d8 prototype.js | grep took doManyLookups took 1301 ms. doManyLookups took 3408 ms. lookupAndCache took 1204 ms. lookupAndCache took 3406 ms.
turns out: 'In total: ' + counter
type-feedback leaks upwards into the loop and causes excessive boxing of the counter
variable
+
from representation inference
- print('In total: ' + counter);
+ print('In total: ' + counter.toString());
$ d8 prototype.js | grep took doManyLookups took 1298 ms. doManyLookups took 1119 ms. lookupAndCache took 1188 ms. lookupAndCache took 982 ms.
function mk(word) {
var len = word.length;
if (len > 255) return undefined;
var i = len >> 2;
return String.fromCharCode(
(word.charCodeAt( 0) & 0x03) << 14 |
(word.charCodeAt( i) & 0x03) << 12 |
(word.charCodeAt( i+i) & 0x03) << 10 |
(word.charCodeAt(i+i+i) & 0x03) << 8 |
len
);
}
Benchmark.prototype.setup = function() {
function mk(word) {
/* ... */
}
var MK = function() { };
MK.prototype.mk = mk;
var mker = new MK;
};
suite
.add('Function', function() {
var key = mk('www.wired.com');
key = mk('www.youtube.com');
key = mk('scorecardresearch.com');
key = mk('www.google-analytics.com');
})
.add('Method', function() {
var key = mker.mk('www.wired.com');
key = mker.mk('www.youtube.com');
key = mker.mk('scorecardresearch.com');
key = mker.mk('www.google-analytics.com');
})
$ d8 method-vs-function.js
Function | x 4,149,776 ops/sec ±0.62% |
Method | x 682,273,122 ops/sec ±0.72% |
--- a/method-function.js
+++ b/method-function.js
@@ -2,6 +2,9 @@
load("../benchmark.js");
Benchmark.prototype.setup = function() {
+ "Speed" + "your" + "JS" + "with" +
+ "this" + "one" + "weird" + "trick";
+
function mk(word) {
var len = word.length;
if (len > 255) return undefined;
$ d8 method-vs-function.js
Function | x 695,708,197 ops/sec ±0.38% |
Method | x 692,496,013 ops/sec ±0.29% |
Function
was never optimized!— heuristics that decide when to optimize are based (among other things) on the amount of initialized inline caches
— function call does not go through an IC, but method call does
— Method
had enough initialized ICs to trigger optimizations, but Function
didn't!
— Until fake +
ICs were added in the setup
section
Function
is optimized![a-z]*
find most common one."
function count(str) {
var words = str.split(' ');
var counts = {};
var maxWord, maxCount = 0;
var word, count;
for (var i = 0; i < words.length; i++) {
word = words[i];
/* count word here */
}
return maxWord;
}
count = counts[word];
if (typeof count === "undefined") {
counts[word] = count = 1;
} else {
counts[word] = count + 1;
}
if (count > maxCount) {
maxCount = count;
maxWord = word;
}
# on 1.5Mb / 255k words string
$ d8 count.js
naive x 13.51 ops/sec ±2.87%
C++
we would just update the cell.
var cell = counts[word];
if (typeof cell === "undefined") {
count = 1;
counts[word] = { value: count };
} else {
cell.value = count = cell.value + 1;
}
# on 1.5Mb / 255k words string $ d8 count.js naive x 13.51 ops/sec ±2.87% cells x 20.42 ops/sec ±2.51%
function count(str) {
var words = str.split(' ');
var counts = {};
var maxWord, maxCount = 0;
var word, count;
for (var i = 0; i < words.length; i++) {
word = words[i];
/* count word here */
}
return maxWord;
}
function count() {
// Idea 1: compute substring at the very end.
var maxCount = 0, maxWordStart, maxWordEnd,
lastSpace = 0;
// Idea 2: instead of hash-table use simplistic trie.
var root = { val: 0 };
var curr = root;
for (var i = 0; i < str.length; i++) {
var ch = str.charCodeAt(i);
if (ch === 32) { // if space increase the count;
} else { // if not space advance in the trie;
}
}
return str.substring(maxWordStart + 1, maxWordEnd);
}
// handling non-space: find or create target node.
var next = curr[ch];
if (typeof next === 'undefined') {
next = curr[ch] = { val: 0 };
}
curr = next;
// handling space: increase node's counter.
var count = curr.val = curr.val + 1;
if (count > maxCount) {
maxCount = count;
maxWordStart = lastSpace;
maxWordEnd = i;
}
curr = root; // restart from the root
lastSpace = i;
# on 1.5Mb / 255k words string $ d8 count.js naive x 13.51 ops/sec ±2.87% cells x 20.42 ops/sec ±2.51% trie x 61.46 ops/sec ±0.48%