Object subclass: #Vector
  instanceVariableNames: 'x y'
  classVariableNames: ''
  poolDictionaries: ''
  category: nil!

!Vector class methodsFor: 'instance creation'!
  x: anXNumber y: anYNumber
    ^ self new x: anXNumber y: anYNumber!!

!Vector methodsFor: 'math'!
  dot: aOther
    ^ (self x * aOther x) + (self y * aOther y)!!

I'm Slava

I like compiling things

and ranting about that on @mraleph

𝔏 ⇒ 𝔑

𝔏 ∈ {Java, JavaScript, Dart, …}

𝔑 ∈ {ia32, x64, arm, mips, …}

𝔏′ ⇒ 𝔏 ⇒ 𝔑


Object subclass: #Vector
  instanceVariableNames: 'x y'
  classVariableNames: ''
  poolDictionaries: ''
  category: nil!

!Vector class methodsFor: 'instance creation'!
  x: anXNumber y: anYNumber
    ^ self new x: anXNumber y: anYNumber!!

!Vector methodsFor: 'math'!
  dot: aOther
    ^ (self x * aOther x) + (self y * aOther y)!!

// smalltalk2js compiler. v0.0.-1.
var fs = require('fs');
var input = process.argv[2];
var result = fs.readFileSync(input)
  .toString()
  .replace(/\^/g, "return ")
  .replace(/!(\w+) methodsFor: '[^']*'!((?:.|\n)*?!)!/g,
           function (_, n, methods) {
    prefix = n + '.prototype.';
    methods = methods.replace(/((?:.|\n)*?)!/g, function (_, method) {
      return method.replace(/\n/g, '')
                   .replace(/^\s*(\w+\s*(:\s*\w+(\s*\w+\s*:\s*\w+)*)?)(.*)$/,
                            function (_, c, _, _, body) {
        var name =  c.replace(/\s*:\s*\w+\s*/g, '$').trim();
        var args = c.replace(/\s*\w+\s*:/g, '').trim().replace(/\s+/g, ', ');
        var locals = ['self'].concat(args.split(','));
        locals.forEach(function (arg) {
          body = body.replace(new RegExp(arg + "\\s+", "g"), arg + ".");
        })
        return prefix + name + ' = function (' + args + ') {' + body + "}\n";
      });
    })   
    return methods;
  })

fs.writeFileSync(input + ".js", result);

// Dot-product of two vectors
Vector.prototype.dot$ = function (aOther) {
  return this.x * aOther.x +
         this.y * aOther.y
}

that was easy

amazing ZERO overhead translation


v := (Vector x: 10 y: 10).
(v dot: 10).
"Object: 10 error: did not understand #x"

var v = new Vector(10, 10)
v.dot(10)
// ⇒ NaN

Lua, Ruby, Python, …

the story is the same

semantik

\zeˈmantɪk\

σημαντικός

\simantikós\

significant

\sigˈnifikənt\

how to match semantics?


// Dot-product of two vectors
Vector.prototype.dot = function (other) {
  return Send(Send(Send(this, 'x'), '*',
                   Send(other, 'x')), '+'
              Send(Send(this, 'y'), '*'
                   Send(other, 'y')))
}

Before we go any further

This talk was never intended to really be about compiling Smalltalk to JavaScript. It's about primitives that could make such translation easier. It focuses on certain aspects of the language (dispatch) but does not address some other complicated aspects, e.g. translation of the non-local return or control-flow in general. Though many things can be achieved with the right combination of switch-statements, exceptions and trampolining, these emulations usually come with a hefty overhead. Absence of the first-class access to the call stack and program counter makes efficient translation of foreign control-flow constructs like non-local return or coroutines a hard if not impossible task.

Overall language to language compilation faces more challenges than I could cover in this 20 minutes talk

(Vyacheslav Egorov, 16.09.2014)


// Dot-product of two vectors
Vector.prototype.dot = function (other) {
  return Send(Send(Send(this, 'x'), '*',
                   Send(other, 'x')), '+'
              Send(Send(this, 'y'), '*'
                   Send(other, 'y')))
}

looks slow

lets "benchmark"


function lensum(vs, start, end) {
  var v = new Vector(0, 0)
  for (var i = start; i < end; i++)
    v.add(vs[i])
  return v.dot(v)
}

// this.x
⇒ Send(this, 'x')
// this.x = k;
⇒ Send(this, 'x:', k)
// v.add(vs[i])
⇒ Send(v, 'add:', Send(vs, 'at:', i))

var slice = Array.prototype.slice;
function Send(self, message /*, ... */) {
  return self[message].apply(self,
    slice.call(arguments, 2));
}

Number.prototype['+'] = function (other) {
  'use strict';
  return this + other;
}

Array.prototype['at:'] = function (idx) {
  return this[idx];
}
$ d8 s0.js
lensum x 30,034 ops/sec
$ d8 s1.js
lensum x 6.30 ops/sec
# that's 4767x slower

Send() is too polymorphic

[& can't be optimized by V8]

"Universal VM"
= a lie?

JIT is a superhero.

but it needs a sidekick

  • easy to learn language
  • with vibrant ecosystem
  • running on state-of-art VM
  • supporting variety of compile-to languages
  • named like CoffeeScript but without Script.

Java

invokedynamic

«A CallSite is a holder for a variable MethodHandle, which is called its target. An invokedynamic instruction linked to a CallSite delegates all calls to the site's current target. A CallSite may be associated with several invokedynamic instructions, or it may be "free floating", associated with none. In any case, it may be invoked through an associated method handle called its dynamic invoker.

A non-constant call site may be relinked by changing its target. »

http://docs.oracle.com/javase/7/docs/api/java/lang/invoke/CallSite.html

«A CallSite is a holder for a variable MethodHandle, which is called its target. An invokedynamic instruction linked to a CallSite delegates all calls to the site's current target. A CallSite may be associated with several invokedynamic instructions, or it may be "free floating", associated with none. In any case, it may be invoked through an associated method handle called its dynamic invoker.

A non-constant call site may be relinked by changing its target. »

http://docs.oracle.com/javase/7/docs/api/java/lang/invoke/CallSite.html

in JS every invoke
is dynamic

CallSite

var f

invokedynamic

f(...)

MethodHandle

function (...) { }

(re)linking

f = someFunc

put it together


this.x += other.x;

Send(this, 'x:',
     Send(Send(this, 'x'), '+',
          Send(other, 'x')));

ΣSend$00(this,
         ΣSend$01(ΣSend$02(this),
                  ΣSend$03(other)));
//                ↑↑↑↑↑↑↑↑ 
// global var for send site

ΣSend$00 = function (self, value) {
  return Send(self, 'x:', value);
};
ΣSend$01 = function (x, y) {
  return Send(x, '+', y);
};

// Manages ΣSend$00
new SendSite(0, 'x:') 
  .bootstrap();
// Manages ΣSend$01
new SendSite(1, '+')
  .bootstrap();

SendSite.prototype.bootstrap = function () {
  var args = _.range(this.argc)
      .map(function (n) { return 'a' + n });
  this.link(CompileHandler({
    args: ['self'].concat(args),
    op: 'self["' + this.message + '"]' +
        '(' + args.join(', ') + ')'
  }));
};

SendSite.prototype.bootstrap = function () {
  var args = _.range(this.argc)
      .map(function (n) { return 'a' + n });
  this.link(CompileHandler({
    args: ['self'].concat(args),
    op: 'self["' + this.message + '"]' +
        '(' + args.join(', ') + ')'
  }));
};

SendSite.prototype.bootstrap = function () {
  var args = _.range(this.argc)
      .map(function (n) { return 'a' + n });
  this.link(CompileHandler({
    args: ['self'].concat(args),
    op: 'self["' + this.message + '"]' +
        '(' + args.join(', ') + ')'
  }));
};

function CompileHandler(desc) {
  function Template() {
    return function Handler($args) {
      /* $uid */
      return $op;
    };
  }

  desc = _.create(desc, { uid: _.uniqueId() });

  var src = _.template(Template.toString(),
      desc, { interpolate: /\$(\w+)/g });

  return Function('return ' + src)()();
}

function Handler($args) {
  /* $uid */
  return $op;
}

// Handler for ΣSend$10
function Handler(self) {
  /* 11 */
  return self["x"]();
}

// Handler for ΣSend$16
function Handler(self,a0) {
  /* 17 */
  return self["+"](a0);
}

SendSite.prototype = {
  link: function (handler) {
    handler.site = this;
    global[this.name] = handler;
  }
}
$ d8 s0.js
lensum x 30,034 ops/sec
$ d8 s2.js
lensum x 9,267 ops/sec
# now "only" ~3x slower

numerics went bananas


Number.prototype['+'] = function (other) {
  'use strict';
  return this + other; /* CONGESTION */
}

specialize!


function Handler($args) {
  /* $uid */
  if ($check) {
    return $op;
  }
  return Handler.site.miss($args);
}

// if isBinaryOp(this.message)
CompileHandler({
  args: ['lhs', 'rhs'],
  op: 'lhs ' + this.op + ' rhs',
  check: 'typeof lhs === "number" && ' +
         'typeof rhs === "number"'
})

// Handler ΣSend$15.
function Handler(lhs,rhs) {
  /* 16 */
  if (typeof lhs === "number" && 
      typeof rhs === "number") {
    return lhs < rhs;
  }
  return Handler.site.miss(lhs,rhs);
}
$ d8 s0.js
lensum x 30,034 ops/sec
$ d8 s3.js
lensum x 22,738 ops/sec
# ~25% slower

hope is back!

semantics
is still broken


var v = new Vector(10, 10)
v['dot:'](10)
// TypeError: undefined is not a function

Vector.understands = new Set([
  'x', 'x:',
  'y', 'y:',
  'add:',
  'dot:'
]);

// ΣSend handler for 'x'
function Handler(self) {
  if (???) {
    return self.x();
  }
  return Handler.site.miss(self);
}

// ΣSend handler for 'x'
function Handler(self) {
  if (self.constructor
          .understands.has("x")) {
    return self.x();
  }
  return Handler.site.miss(self);
}

sloooooow!

too hard for V8 to optimize


// ΣSend handler for 'x'
function Handler(self) {
  if (self.constructor
          .understands.has("x")) {
    return self.x();
  }
  return Handler.site.miss(self);
}

// ΣSend handler for 'x'
function Handler(self) {
  if (self.constructor === ???) {
    return self.x();
  }
  return Handler.site.miss(self);
}

bootstrap uninitialized

& relink on the miss!


SendSite.prototype.bootstrap = function () {
  this.link(CompileHandler({
    args: ['self'],
    op: 'null',
    check: 'false' // ⇐ ALWAYS MISS
  }));
};

SendSite.prototype.miss = function (self) {
  var ctor = self.constructor;
  if (typeof ctor === "function" &&
      ctor.understands instanceof Set &&
      ctor.understands.has(this.key)) {
    /* link specialized handler & return */
  }
  throw new Error(...);
};

// Specialized handler
function Template(ctor) {
  return function Handler(self) {
    if (self.constructor === ctor) {
      return self.x();
    }
    return Handler.site.miss(self);
  };
}

// Specialized handler
function Template(ctor) {
  return function Handler(self) {
    if (self.constructor === ctor) {
      return self.x;
    }
    return Handler.site.miss(self);
  };
}
$ d8 s4.js
lensum x 1,486 ops/sec
# 20x slower

// If the function is in new space we assume
// it's more likely to change and thus prefer
// the general IC code.
if (!it->isolate()->heap()->InNewSpace(*candidate)) {
  /* ... */
}

// I command thee: inline if the name starts with 'Σ'.
if (!it->isolate()->heap()->InNewSpace(*candidate) ||
    (it->name()->IsString() &&
     Handle<String>::cast(
        it->name())->Get(0) == L'\u03A3')) {
  /* ... */
}
$ d8-x s4.js
lensum x 11,744 ops/sec
# we are back in business
# "only" 2.5x slower

case Variable::CONTEXT:
  if ((variable->maybe_assigned() == kNotAssigned) &&
      context->ActualValue()->IsConstant()) {
    /*
     * constant fold read-only context variables
     * accessed from constant contexts.
     */
  }
  /* else emit HLoadContextSlot */
$ d8-x s4.js
lensum x 23,179 ops/sec
# "only" ~25% slower

only at: left

as an exercise :)

$ d8-x s5.js
lensum x 22,518 ops/sec
# still ~25% slower

now semantics
is correct!

d8> var p = new Vector(0, 0)
d8> p['dot:'](10);
Error: object 10 does not understand x
d8> lensum([p], 2, 3);
Error: index 2 out of bounds [0, 1)

where are
these 25%?

compiler issues

e.g. unreachable blocks are not removed from the graph "poisoning" register allocation, bounds check removal does not remove, type propagation does not propagate stuff…

$ d8-x s5.js
lensum x 28,055 ops/sec
# only ~7% slower
$ d8-x s5.js
lensum x 28,055 ops/sec
# only ~7% slower
# depends on CPU now

JavaScript is
invokedynamic

totally enough
expressivity

almost enough
optimizations

too much
heuristics

we need
conventions

real challenge:
try compiling ^

THANK YOU!