lab17: init
This commit is contained in:
294
lab17/compiler.js
Normal file
294
lab17/compiler.js
Normal file
@@ -0,0 +1,294 @@
|
||||
"use strict";
|
||||
|
||||
const fs = require('fs');
|
||||
|
||||
const opcodes = require('./op-codes.js').opcodes;
|
||||
|
||||
const MAX_BUFF_SIZE = 256;
|
||||
|
||||
// Constants for types
|
||||
const LIST = 1;
|
||||
const OP = 2;
|
||||
const NUM = 3;
|
||||
const BOOL = 4;
|
||||
const VAR = 5;
|
||||
|
||||
/**
|
||||
* The Compiler class is responsible for taking a .scm
|
||||
* text file and converting it into bytecode format.
|
||||
*/
|
||||
class Compiler {
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
constructor() {
|
||||
this.buildMnemonicLookup();
|
||||
this.varMap = {};
|
||||
this.varOffset = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenizes a Scheme file, stripping out any comments.
|
||||
*
|
||||
* @param {String} contents - Scheme file, as text.
|
||||
*
|
||||
* @returns {[String]} - Array of tokens, represented as strings.
|
||||
*/
|
||||
tokenize(contents) {
|
||||
let lines = contents.trim().split('\n')
|
||||
let tokens = [];
|
||||
lines.forEach((ln) => {
|
||||
// Ensuring that parens are always surrounded
|
||||
// by spaces to simplify parsing.
|
||||
ln = ln.replaceAll("(", " ( ")
|
||||
.replaceAll(")", " ) ");
|
||||
|
||||
// The comment character in Scheme is ';'
|
||||
ln = ln.replace(/;.*/, "");
|
||||
|
||||
tokens.push(...ln.split(/\s+/).filter(s=>s.length!==0));
|
||||
});
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a stream of tokens, returning an array of objects
|
||||
* representing the top-level Scheme lists in the program.
|
||||
* (Note that in Scheme, a list is treated as a function call.)
|
||||
*
|
||||
* @param {[String]} tokens - An array of tokens.
|
||||
*
|
||||
* @returns {[Object]} - The AST, as a JS object literal.
|
||||
*/
|
||||
parse(tokens) {
|
||||
// The top level AST does not have a type.
|
||||
let ast = { children: []};
|
||||
for (let i=0; i<tokens.length; i++) {
|
||||
let tok = tokens[i];
|
||||
if (tok === "(") {
|
||||
let newAst = { parent: ast, type: LIST, children: [] };
|
||||
ast.children.push(newAst);
|
||||
ast = newAst;
|
||||
} else if (tok === ")") {
|
||||
ast = ast.parent;
|
||||
} else if (tok.match(/^\d+$/)) {
|
||||
ast.children.push({ type: NUM, value: parseInt(tok) });
|
||||
} else if (tok === "#t") {
|
||||
ast.children.push({ type: BOOL, value: true });
|
||||
} else if (tok === "#f") {
|
||||
ast.children.push({ type: BOOL, value: false });
|
||||
} else if (tok.match(/^\w+$/)) {
|
||||
ast.children.push({ type: VAR, value: tok });
|
||||
} else {
|
||||
ast.children.push({ type: OP, value: tok})
|
||||
}
|
||||
}
|
||||
return ast.children;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints out an AST, filtering out circular references.
|
||||
*
|
||||
* @param {Object} ast - The AST to print.
|
||||
*/
|
||||
printAST(ast) {
|
||||
console.log(`AST is ${JSON.stringify(ast, (key, value) => {
|
||||
if (key === 'parent') return value.id;
|
||||
else return value;
|
||||
})}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a byte to the next position in the bytecode buffer,
|
||||
* updating the offset to the position for the new write.
|
||||
*
|
||||
* @param {Number} byte - A valid byte.
|
||||
*/
|
||||
writeByte(byte) {
|
||||
this.offset = this.bytecode.writeUInt8(byte, this.offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks up the opcode by its mnemonic and writes it to
|
||||
* the bytecode buffer.
|
||||
*
|
||||
* @param {String} mnemonic - The mnemonic for the opcode.
|
||||
*/
|
||||
writeOp(mnemonic) {
|
||||
let opcode = this.lookupTable[mnemonic];
|
||||
if (opcode === undefined) {
|
||||
throw new Error(`The mnemonic ${mnemonic} is not defined.`);
|
||||
}
|
||||
this.writeByte(this.lookupTable[mnemonic]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts AST into binary bytecode.
|
||||
*
|
||||
* @param {Object} ast - abstract syntax tree of program.
|
||||
*/
|
||||
writeBytecode(ast) {
|
||||
if (ast.type === NUM) {
|
||||
// Numbers are just pushed on to the stack.
|
||||
this.writeOp('PUSH1');
|
||||
this.writeByte(ast.value);
|
||||
return;
|
||||
} else if (ast.type === BOOL) {
|
||||
//
|
||||
// ***YOUR CODE HERE***
|
||||
//
|
||||
// Booleans will be stored as either 1 for true, or as a 0 for false.
|
||||
return;
|
||||
} else if (ast.type === VAR) {
|
||||
//
|
||||
// ***YOUR CODE HERE***
|
||||
//
|
||||
// We look up the offset for a variable and push the offset
|
||||
// value on to the stack. The 'MLOAD' operation will
|
||||
// retrieve the value stored at that position in the memory.
|
||||
return;
|
||||
}
|
||||
|
||||
// If we made it hear, we have a list.
|
||||
// The first argument is the name of the 'function'
|
||||
// that we will be invoking.
|
||||
let first = ast.children[0];
|
||||
|
||||
// Almost all functions need some special handling for the
|
||||
// first argument. Some functions will need the additional
|
||||
// arguments stored in 'rest'.
|
||||
let second = ast.children[1];
|
||||
let rest = ast.children.slice(2);
|
||||
|
||||
switch (first.value) {
|
||||
case "println":
|
||||
this.writeBytecode(second);
|
||||
this.writeOp('PRINT');
|
||||
break;
|
||||
|
||||
case "define":
|
||||
//
|
||||
// ***YOUR CODE HERE***
|
||||
//
|
||||
// The define function lets us store variables.
|
||||
//
|
||||
// The variable name is stored in 'second.value'.
|
||||
// Update the 'this.varMap' array to store the current
|
||||
// value of 'this.varOffset'.
|
||||
//
|
||||
// The VM will need to push the value on to the stack,
|
||||
// push 'this.varOffset' on to the stack, and then
|
||||
// invoke 'MSTORE'.
|
||||
//
|
||||
// Increment this.varOffset so that it points to the next
|
||||
// position in memory.
|
||||
break;
|
||||
|
||||
case "if":
|
||||
//
|
||||
// ***YOUR CODE HERE***
|
||||
//
|
||||
// EXTRA CREDIT!
|
||||
// Add support for if expressions.
|
||||
// The cond.scm file gives you some good examples.
|
||||
break;
|
||||
|
||||
case "+":
|
||||
this.writeBytecode(second);
|
||||
rest.forEach((x) => {
|
||||
this.writeBytecode(x);
|
||||
this.writeOp('ADD');
|
||||
});
|
||||
break;
|
||||
|
||||
case "*":
|
||||
//
|
||||
// ***YOUR CODE HERE***
|
||||
//
|
||||
// Using the '+' case as a template, add support
|
||||
// for '*'. Note that the 'MUL' opcode only works
|
||||
// with two arguments, whereas '*' allows an arbitrary
|
||||
// number of arguments.
|
||||
break;
|
||||
|
||||
case "-":
|
||||
//
|
||||
// ***YOUR CODE HERE***
|
||||
//
|
||||
// Add support for '-'. The approach here will be
|
||||
// Similar to the solution for '+' and '*'. However,
|
||||
// one key difference is that the order of the arguments
|
||||
// matters. You will need to use 'SWAP1' to get the
|
||||
// arguments ordered correctly before invoking 'SUB'.
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Unexpected head: '${first.value}'`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds up a mapping of opcode mnemonics to the corresponding
|
||||
* hexadecimal values.
|
||||
*/
|
||||
buildMnemonicLookup() {
|
||||
this.lookupTable = {};
|
||||
Object.keys(opcodes).forEach((opcode) => {
|
||||
let inst = opcodes[opcode];
|
||||
this.lookupTable[inst.mnemonic] = opcode;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* This method takes a scheme file, tokenizes and parses it,
|
||||
* and finally compiles it to binary bytecode.
|
||||
*
|
||||
* @param {String} fileName - The name of the scheme file.
|
||||
*
|
||||
* @returns {String} - The name of the bytecode file.
|
||||
*/
|
||||
compileScheme(fileName) {
|
||||
if (!fileName.toLowerCase().endsWith('.scm')) {
|
||||
throw new Error(`${fileName} does not end with a .scm extension.`);
|
||||
}
|
||||
|
||||
fs.readFileSync(fileName);
|
||||
let contents = fs.readFileSync(fileName, 'utf8');
|
||||
|
||||
let tokens = this.tokenize(contents);
|
||||
let asts = this.parse(tokens);
|
||||
|
||||
// The bytecode size cannot be larger than MAX_BUFF_SIZE.
|
||||
this.bytecode = Buffer.alloc(MAX_BUFF_SIZE);
|
||||
// The offset tracks the current position in the bytecode buffer.
|
||||
this.offset = 0;
|
||||
|
||||
asts.forEach((ast) => {
|
||||
this.writeBytecode(ast);
|
||||
});
|
||||
|
||||
// The output file will have the same name as the input file,
|
||||
// except that '.scm' will be replaced with '.byco'.
|
||||
let outputFile = fileName.replace(/.scm\b/i, ".byco");
|
||||
fs.writeFileSync(outputFile, this.bytecode.slice(0, this.offset, 'hex'));
|
||||
|
||||
return outputFile;
|
||||
}
|
||||
}
|
||||
|
||||
// Handling command line arguments.
|
||||
if (process.argv0 === 'node') {
|
||||
process.argv.shift();
|
||||
}
|
||||
if (process.argv.length !== 2) {
|
||||
console.log("compiler.js <scheme file>");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let cmplr = new Compiler();
|
||||
let scmFile = process.argv[1];
|
||||
|
||||
console.log(`Compiling ${scmFile}...`);
|
||||
let bytecodeFile = cmplr.compileScheme(scmFile);
|
||||
|
||||
console.log(`Bytecode written to ${bytecodeFile}.`);
|
||||
BIN
lab17/cond.byco
Normal file
BIN
lab17/cond.byco
Normal file
Binary file not shown.
4
lab17/cond.scm
Normal file
4
lab17/cond.scm
Normal file
@@ -0,0 +1,4 @@
|
||||
(println #t)
|
||||
(if #t (println 3) (println 4))
|
||||
(if #f (println 3) (println 4))
|
||||
|
||||
33
lab17/op-codes.js
Normal file
33
lab17/op-codes.js
Normal file
@@ -0,0 +1,33 @@
|
||||
'use strict';
|
||||
|
||||
let opcodes = {
|
||||
0x01: { mnemonic: 'ADD', evaluate: (vm) => {
|
||||
let v1 = vm.stack.pop();
|
||||
let v2 = vm.stack.pop();
|
||||
vm.stack.push(v1+v2);
|
||||
}},
|
||||
0x02: { mnemonic: 'MUL', evaluate: (vm) => {
|
||||
//
|
||||
// **YOUR CODE HERE**
|
||||
//
|
||||
// Pop the top two arguments off of the stack,
|
||||
// and then push the result on to the stack.
|
||||
}},
|
||||
0x5B: { mnemonic: 'JUMPDEST', evaluate: (vm) => {
|
||||
// Does nothing. We could check to make sure that jumps
|
||||
// always land at JUMPDEST opcodes, but it is not totally
|
||||
// clear that it is worth the bother.
|
||||
}},
|
||||
0x60: { mnemonic: 'PUSH1', evaluate: (vm) => {
|
||||
// The next byte is data, not another instruction
|
||||
vm.pc++;
|
||||
let v = vm.bytecode.readUInt8(vm.pc);
|
||||
vm.stack.push(v);
|
||||
}},
|
||||
0x0c: { mnemonic: 'PRINT', evaluate: (vm) => {
|
||||
// **NOTE**: This is not a real EVM opcode.
|
||||
console.log(vm.stack.pop());
|
||||
}},
|
||||
};
|
||||
|
||||
exports.opcodes = opcodes;
|
||||
1
lab17/print.byco
Normal file
1
lab17/print.byco
Normal file
@@ -0,0 +1 @@
|
||||
`
|
||||
3
lab17/print.scm
Normal file
3
lab17/print.scm
Normal file
@@ -0,0 +1,3 @@
|
||||
; Simple test that prints 3.
|
||||
(println 3)
|
||||
|
||||
BIN
lab17/store.byco
Normal file
BIN
lab17/store.byco
Normal file
Binary file not shown.
5
lab17/store.scm
Normal file
5
lab17/store.scm
Normal file
@@ -0,0 +1,5 @@
|
||||
(define x 3)
|
||||
(define y (+ x 1))
|
||||
(println (- x 2))
|
||||
(println (* x y))
|
||||
|
||||
1
lab17/test1.byco
Normal file
1
lab17/test1.byco
Normal file
@@ -0,0 +1 @@
|
||||
```
|
||||
2
lab17/test1.scm
Normal file
2
lab17/test1.scm
Normal file
@@ -0,0 +1,2 @@
|
||||
(println (+ 1 2 3))
|
||||
|
||||
2
lab17/test2.byco
Normal file
2
lab17/test2.byco
Normal file
@@ -0,0 +1,2 @@
|
||||
````
|
||||
``<02>`
|
||||
4
lab17/test2.scm
Normal file
4
lab17/test2.scm
Normal file
@@ -0,0 +1,4 @@
|
||||
(println (+ 2 3 4))
|
||||
(println (- 13 (* 2 4)))
|
||||
(println (- 10 4 3))
|
||||
|
||||
68
lab17/vm.js
Normal file
68
lab17/vm.js
Normal file
@@ -0,0 +1,68 @@
|
||||
'use strict';
|
||||
|
||||
const fs = require('fs');
|
||||
const opcodes = require('./op-codes.js').opcodes;
|
||||
|
||||
/**
|
||||
* The VM is responsible for executing the bytecode format.
|
||||
*/
|
||||
class VirtualMachine {
|
||||
|
||||
/**
|
||||
* Initializes the virtual machine with the specified amount of
|
||||
* gas. The stack and memory are both initially empty.
|
||||
*
|
||||
* @param {Number} gas - Amount of gas the VM begins with.
|
||||
*/
|
||||
constructor() {
|
||||
this.stack = [];
|
||||
this.memory = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a bytecode file and returns an array of strings,
|
||||
* which are the commands within the file.
|
||||
*/
|
||||
static loadBytecode(bytecodeFile) {
|
||||
let contents = fs.readFileSync(bytecodeFile);
|
||||
return Buffer.from(contents);
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluates the specified file.
|
||||
*/
|
||||
evaluate(bytecodeFile) {
|
||||
this.bytecode = this.constructor.loadBytecode(bytecodeFile);
|
||||
|
||||
// Initializing the program counter to keep track of our
|
||||
// place within the program.
|
||||
this.pc = 0;
|
||||
|
||||
while (this.pc < this.bytecode.length) {
|
||||
let opcode = this.bytecode.readUInt8(this.pc);
|
||||
//console.log(`Evaluating ${opcode.toString(16)}`);
|
||||
let operation = opcodes[opcode];
|
||||
if (operation === undefined) {
|
||||
throw new Error(`Unable to find instruction for ${opcode.toString(16)}`);
|
||||
}
|
||||
operation.evaluate(this);
|
||||
|
||||
this.pc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handling command line arguments.
|
||||
if (process.argv0 === 'node') {
|
||||
process.argv.shift();
|
||||
}
|
||||
if (process.argv.length !== 2) {
|
||||
console.log("vm.js <bytecode file>");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let bytecodeFile = process.argv[1];
|
||||
|
||||
console.log(`Executing ${bytecodeFile}...`);
|
||||
let vm = new VirtualMachine();
|
||||
vm.evaluate(bytecodeFile);
|
||||
Reference in New Issue
Block a user