diff --git a/lab17/compiler.js b/lab17/compiler.js new file mode 100644 index 0000000..2750835 --- /dev/null +++ b/lab17/compiler.js @@ -0,0 +1,294 @@ +"use strict"; + +const fs = require('fs'); + +const opcodes = require('./op-codes.js').opcodes; + +const MAX_BUFF_SIZE = 256; + +// Constants for types +const LIST = 1; +const OP = 2; +const NUM = 3; +const BOOL = 4; +const VAR = 5; + +/** + * The Compiler class is responsible for taking a .scm + * text file and converting it into bytecode format. + */ +class Compiler { + /** + * Constructor. + */ + constructor() { + this.buildMnemonicLookup(); + this.varMap = {}; + this.varOffset = 0; + } + + /** + * Tokenizes a Scheme file, stripping out any comments. + * + * @param {String} contents - Scheme file, as text. + * + * @returns {[String]} - Array of tokens, represented as strings. + */ + tokenize(contents) { + let lines = contents.trim().split('\n') + let tokens = []; + lines.forEach((ln) => { + // Ensuring that parens are always surrounded + // by spaces to simplify parsing. + ln = ln.replaceAll("(", " ( ") + .replaceAll(")", " ) "); + + // The comment character in Scheme is ';' + ln = ln.replace(/;.*/, ""); + + tokens.push(...ln.split(/\s+/).filter(s=>s.length!==0)); + }); + return tokens; + } + + /** + * Parses a stream of tokens, returning an array of objects + * representing the top-level Scheme lists in the program. + * (Note that in Scheme, a list is treated as a function call.) + * + * @param {[String]} tokens - An array of tokens. + * + * @returns {[Object]} - The AST, as a JS object literal. + */ + parse(tokens) { + // The top level AST does not have a type. + let ast = { children: []}; + for (let i=0; i { + if (key === 'parent') return value.id; + else return value; + })}`); + } + + /** + * Writes a byte to the next position in the bytecode buffer, + * updating the offset to the position for the new write. + * + * @param {Number} byte - A valid byte. + */ + writeByte(byte) { + this.offset = this.bytecode.writeUInt8(byte, this.offset); + } + + /** + * Looks up the opcode by its mnemonic and writes it to + * the bytecode buffer. + * + * @param {String} mnemonic - The mnemonic for the opcode. + */ + writeOp(mnemonic) { + let opcode = this.lookupTable[mnemonic]; + if (opcode === undefined) { + throw new Error(`The mnemonic ${mnemonic} is not defined.`); + } + this.writeByte(this.lookupTable[mnemonic]); + } + + /** + * Converts AST into binary bytecode. + * + * @param {Object} ast - abstract syntax tree of program. + */ + writeBytecode(ast) { + if (ast.type === NUM) { + // Numbers are just pushed on to the stack. + this.writeOp('PUSH1'); + this.writeByte(ast.value); + return; + } else if (ast.type === BOOL) { + // + // ***YOUR CODE HERE*** + // + // Booleans will be stored as either 1 for true, or as a 0 for false. + return; + } else if (ast.type === VAR) { + // + // ***YOUR CODE HERE*** + // + // We look up the offset for a variable and push the offset + // value on to the stack. The 'MLOAD' operation will + // retrieve the value stored at that position in the memory. + return; + } + + // If we made it hear, we have a list. + // The first argument is the name of the 'function' + // that we will be invoking. + let first = ast.children[0]; + + // Almost all functions need some special handling for the + // first argument. Some functions will need the additional + // arguments stored in 'rest'. + let second = ast.children[1]; + let rest = ast.children.slice(2); + + switch (first.value) { + case "println": + this.writeBytecode(second); + this.writeOp('PRINT'); + break; + + case "define": + // + // ***YOUR CODE HERE*** + // + // The define function lets us store variables. + // + // The variable name is stored in 'second.value'. + // Update the 'this.varMap' array to store the current + // value of 'this.varOffset'. + // + // The VM will need to push the value on to the stack, + // push 'this.varOffset' on to the stack, and then + // invoke 'MSTORE'. + // + // Increment this.varOffset so that it points to the next + // position in memory. + break; + + case "if": + // + // ***YOUR CODE HERE*** + // + // EXTRA CREDIT! + // Add support for if expressions. + // The cond.scm file gives you some good examples. + break; + + case "+": + this.writeBytecode(second); + rest.forEach((x) => { + this.writeBytecode(x); + this.writeOp('ADD'); + }); + break; + + case "*": + // + // ***YOUR CODE HERE*** + // + // Using the '+' case as a template, add support + // for '*'. Note that the 'MUL' opcode only works + // with two arguments, whereas '*' allows an arbitrary + // number of arguments. + break; + + case "-": + // + // ***YOUR CODE HERE*** + // + // Add support for '-'. The approach here will be + // Similar to the solution for '+' and '*'. However, + // one key difference is that the order of the arguments + // matters. You will need to use 'SWAP1' to get the + // arguments ordered correctly before invoking 'SUB'. + break; + + default: + throw new Error(`Unexpected head: '${first.value}'`); + } + } + + /** + * Builds up a mapping of opcode mnemonics to the corresponding + * hexadecimal values. + */ + buildMnemonicLookup() { + this.lookupTable = {}; + Object.keys(opcodes).forEach((opcode) => { + let inst = opcodes[opcode]; + this.lookupTable[inst.mnemonic] = opcode; + }); + } + + /** + * This method takes a scheme file, tokenizes and parses it, + * and finally compiles it to binary bytecode. + * + * @param {String} fileName - The name of the scheme file. + * + * @returns {String} - The name of the bytecode file. + */ + compileScheme(fileName) { + if (!fileName.toLowerCase().endsWith('.scm')) { + throw new Error(`${fileName} does not end with a .scm extension.`); + } + + fs.readFileSync(fileName); + let contents = fs.readFileSync(fileName, 'utf8'); + + let tokens = this.tokenize(contents); + let asts = this.parse(tokens); + + // The bytecode size cannot be larger than MAX_BUFF_SIZE. + this.bytecode = Buffer.alloc(MAX_BUFF_SIZE); + // The offset tracks the current position in the bytecode buffer. + this.offset = 0; + + asts.forEach((ast) => { + this.writeBytecode(ast); + }); + + // The output file will have the same name as the input file, + // except that '.scm' will be replaced with '.byco'. + let outputFile = fileName.replace(/.scm\b/i, ".byco"); + fs.writeFileSync(outputFile, this.bytecode.slice(0, this.offset, 'hex')); + + return outputFile; + } +} + +// Handling command line arguments. +if (process.argv0 === 'node') { + process.argv.shift(); +} +if (process.argv.length !== 2) { + console.log("compiler.js "); + process.exit(1); +} + +let cmplr = new Compiler(); +let scmFile = process.argv[1]; + +console.log(`Compiling ${scmFile}...`); +let bytecodeFile = cmplr.compileScheme(scmFile); + +console.log(`Bytecode written to ${bytecodeFile}.`); diff --git a/lab17/cond.byco b/lab17/cond.byco new file mode 100644 index 0000000..5d59228 Binary files /dev/null and b/lab17/cond.byco differ diff --git a/lab17/cond.scm b/lab17/cond.scm new file mode 100644 index 0000000..6063809 --- /dev/null +++ b/lab17/cond.scm @@ -0,0 +1,4 @@ +(println #t) +(if #t (println 3) (println 4)) +(if #f (println 3) (println 4)) + diff --git a/lab17/op-codes.js b/lab17/op-codes.js new file mode 100644 index 0000000..f12141d --- /dev/null +++ b/lab17/op-codes.js @@ -0,0 +1,33 @@ +'use strict'; + +let opcodes = { + 0x01: { mnemonic: 'ADD', evaluate: (vm) => { + let v1 = vm.stack.pop(); + let v2 = vm.stack.pop(); + vm.stack.push(v1+v2); + }}, + 0x02: { mnemonic: 'MUL', evaluate: (vm) => { + // + // **YOUR CODE HERE** + // + // Pop the top two arguments off of the stack, + // and then push the result on to the stack. + }}, + 0x5B: { mnemonic: 'JUMPDEST', evaluate: (vm) => { + // Does nothing. We could check to make sure that jumps + // always land at JUMPDEST opcodes, but it is not totally + // clear that it is worth the bother. + }}, + 0x60: { mnemonic: 'PUSH1', evaluate: (vm) => { + // The next byte is data, not another instruction + vm.pc++; + let v = vm.bytecode.readUInt8(vm.pc); + vm.stack.push(v); + }}, + 0x0c: { mnemonic: 'PRINT', evaluate: (vm) => { + // **NOTE**: This is not a real EVM opcode. + console.log(vm.stack.pop()); + }}, +}; + +exports.opcodes = opcodes; diff --git a/lab17/print.byco b/lab17/print.byco new file mode 100644 index 0000000..ee742b4 --- /dev/null +++ b/lab17/print.byco @@ -0,0 +1 @@ +` \ No newline at end of file diff --git a/lab17/print.scm b/lab17/print.scm new file mode 100644 index 0000000..3cf4c07 --- /dev/null +++ b/lab17/print.scm @@ -0,0 +1,3 @@ +; Simple test that prints 3. +(println 3) + diff --git a/lab17/store.byco b/lab17/store.byco new file mode 100644 index 0000000..818dcae Binary files /dev/null and b/lab17/store.byco differ diff --git a/lab17/store.scm b/lab17/store.scm new file mode 100644 index 0000000..2cf8a01 --- /dev/null +++ b/lab17/store.scm @@ -0,0 +1,5 @@ +(define x 3) +(define y (+ x 1)) +(println (- x 2)) +(println (* x y)) + diff --git a/lab17/test1.byco b/lab17/test1.byco new file mode 100644 index 0000000..aa78f81 --- /dev/null +++ b/lab17/test1.byco @@ -0,0 +1 @@ +``` \ No newline at end of file diff --git a/lab17/test1.scm b/lab17/test1.scm new file mode 100644 index 0000000..be6605e --- /dev/null +++ b/lab17/test1.scm @@ -0,0 +1,2 @@ +(println (+ 1 2 3)) + diff --git a/lab17/test2.byco b/lab17/test2.byco new file mode 100644 index 0000000..898ac3e --- /dev/null +++ b/lab17/test2.byco @@ -0,0 +1,2 @@ +``` ` `` ` +`` \ No newline at end of file diff --git a/lab17/test2.scm b/lab17/test2.scm new file mode 100644 index 0000000..b6f2b62 --- /dev/null +++ b/lab17/test2.scm @@ -0,0 +1,4 @@ +(println (+ 2 3 4)) +(println (- 13 (* 2 4))) +(println (- 10 4 3)) + diff --git a/lab17/vm.js b/lab17/vm.js new file mode 100644 index 0000000..781a1f0 --- /dev/null +++ b/lab17/vm.js @@ -0,0 +1,68 @@ +'use strict'; + +const fs = require('fs'); +const opcodes = require('./op-codes.js').opcodes; + +/** + * The VM is responsible for executing the bytecode format. + */ +class VirtualMachine { + + /** + * Initializes the virtual machine with the specified amount of + * gas. The stack and memory are both initially empty. + * + * @param {Number} gas - Amount of gas the VM begins with. + */ + constructor() { + this.stack = []; + this.memory = []; + } + + /** + * Loads a bytecode file and returns an array of strings, + * which are the commands within the file. + */ + static loadBytecode(bytecodeFile) { + let contents = fs.readFileSync(bytecodeFile); + return Buffer.from(contents); + } + + /** + * Evaluates the specified file. + */ + evaluate(bytecodeFile) { + this.bytecode = this.constructor.loadBytecode(bytecodeFile); + + // Initializing the program counter to keep track of our + // place within the program. + this.pc = 0; + + while (this.pc < this.bytecode.length) { + let opcode = this.bytecode.readUInt8(this.pc); + //console.log(`Evaluating ${opcode.toString(16)}`); + let operation = opcodes[opcode]; + if (operation === undefined) { + throw new Error(`Unable to find instruction for ${opcode.toString(16)}`); + } + operation.evaluate(this); + + this.pc++; + } + } +} + +// Handling command line arguments. +if (process.argv0 === 'node') { + process.argv.shift(); +} +if (process.argv.length !== 2) { + console.log("vm.js "); + process.exit(1); +} + +let bytecodeFile = process.argv[1]; + +console.log(`Executing ${bytecodeFile}...`); +let vm = new VirtualMachine(); +vm.evaluate(bytecodeFile);