lab17: init

2026-04-22 11:03:07 -07:00
parent 6aa6c9f076
commit 6d6820009b
13 changed files with 417 additions and 0 deletions
--- a/lab17/compiler.js
+++ b/lab17/compiler.js
@@ -0,0 +1,294 @@
+"use strict";
+
+const fs = require('fs');
+
+const opcodes = require('./op-codes.js').opcodes;
+
+const MAX_BUFF_SIZE = 256;
+
+// Constants for types
+const LIST = 1;
+const OP = 2;
+const NUM = 3;
+const BOOL = 4;
+const VAR = 5;
+
+/**
+ * The Compiler class is responsible for taking a .scm
+ * text file and converting it into bytecode format.
+ */
+class Compiler {
+  /**
+   * Constructor.
+   */
+  constructor() {
+    this.buildMnemonicLookup();
+    this.varMap = {};
+    this.varOffset = 0;
+  }
+
+  /**
+   * Tokenizes a Scheme file, stripping out any comments.
+   * 
+   * @param {String} contents - Scheme file, as text.
+   * 
+   * @returns {[String]} - Array of tokens, represented as strings.
+   */
+  tokenize(contents) {
+    let lines = contents.trim().split('\n')
+    let tokens = [];
+    lines.forEach((ln) => {
+      // Ensuring that parens are always surrounded
+      // by spaces to simplify parsing.
+      ln = ln.replaceAll("(", " ( ")
+             .replaceAll(")", " ) ");
+
+      // The comment character in Scheme is ';'
+      ln = ln.replace(/;.*/, "");
+      
+      tokens.push(...ln.split(/\s+/).filter(s=>s.length!==0));
+    });
+    return tokens;
+  }
+
+  /**
+   * Parses a stream of tokens, returning an array of objects
+   * representing the top-level Scheme lists in the program.
+   * (Note that in Scheme, a list is treated as a function call.)
+   * 
+   * @param {[String]} tokens - An array of tokens.
+   * 
+   * @returns {[Object]} - The AST, as a JS object literal.
+   */
+  parse(tokens) {
+    // The top level AST does not have a type.
+    let ast = { children: []};
+    for (let i=0; i<tokens.length; i++) {
+      let tok = tokens[i];
+      if (tok === "(") {
+        let newAst = { parent: ast, type: LIST, children: [] };
+        ast.children.push(newAst);
+        ast = newAst;
+      } else if (tok === ")") {
+        ast = ast.parent;
+      } else if (tok.match(/^\d+$/)) {
+        ast.children.push({ type: NUM, value: parseInt(tok) });
+      } else if (tok === "#t") {
+        ast.children.push({ type: BOOL, value: true });
+      } else if (tok === "#f") {
+        ast.children.push({ type: BOOL, value: false });
+      } else if (tok.match(/^\w+$/)) {
+        ast.children.push({ type: VAR, value: tok });
+      } else {
+        ast.children.push({ type: OP, value: tok})
+      }
+    }
+    return ast.children;
+  }
+
+  /**
+   * Prints out an AST, filtering out circular references.
+   * 
+   * @param {Object} ast - The AST to print.
+   */
+  printAST(ast) {
+    console.log(`AST is ${JSON.stringify(ast, (key, value) => {
+      if (key === 'parent') return value.id;
+      else return value;
+    })}`);
+  }
+
+  /**
+   * Writes a byte to the next position in the bytecode buffer,
+   * updating the offset to the position for the new write.
+   * 
+   * @param {Number} byte - A valid byte.
+   */
+  writeByte(byte) {
+    this.offset = this.bytecode.writeUInt8(byte, this.offset);
+  }
+
+  /**
+   * Looks up the opcode by its mnemonic and writes it to
+   * the bytecode buffer.
+   * 
+   * @param {String} mnemonic - The mnemonic for the opcode.
+   */
+  writeOp(mnemonic) {
+    let opcode = this.lookupTable[mnemonic];
+    if (opcode === undefined) {
+      throw new Error(`The mnemonic ${mnemonic} is not defined.`);
+    }
+    this.writeByte(this.lookupTable[mnemonic]);
+  }
+
+  /**
+   * Converts AST into binary bytecode.
+   *
+   * @param {Object} ast - abstract syntax tree of program.
+   */
+  writeBytecode(ast) {
+    if (ast.type === NUM) {
+      // Numbers are just pushed on to the stack.
+      this.writeOp('PUSH1');
+      this.writeByte(ast.value);
+      return;
+    } else if (ast.type === BOOL) {
+      //
+      // ***YOUR CODE HERE***
+      //
+      // Booleans will be stored as either 1 for true, or as a 0 for false.
+      return;
+    } else if (ast.type === VAR) {
+      //
+      // ***YOUR CODE HERE***
+      //
+      // We look up the offset for a variable and push the offset
+      // value on to the stack.  The 'MLOAD' operation will
+      // retrieve the value stored at that position in the memory.
+      return;
+    }
+
+    // If we made it hear, we have a list.
+    // The first argument is the name of the 'function'
+    // that we will be invoking.
+    let first = ast.children[0];
+
+    // Almost all functions need some special handling for the
+    // first argument.  Some functions will need the additional
+    // arguments stored in 'rest'.
+    let second = ast.children[1];
+    let rest = ast.children.slice(2);
+
+    switch (first.value) {
+      case "println":
+        this.writeBytecode(second);
+        this.writeOp('PRINT');
+        break;
+
+      case "define":
+        //
+        // ***YOUR CODE HERE***
+        //
+        // The define function lets us store variables.
+        //
+        // The variable name is stored in 'second.value'.
+        // Update the 'this.varMap' array to store the current
+        // value of 'this.varOffset'.
+        //
+        // The VM will need to push the value on to the stack,
+        // push 'this.varOffset' on to the stack, and then
+        // invoke 'MSTORE'.
+        //
+        // Increment this.varOffset so that it points to the next
+        // position in memory.
+        break;
+
+      case "if":
+        //
+        // ***YOUR CODE HERE***
+        //
+        // EXTRA CREDIT!
+        // Add support for if expressions.
+        // The cond.scm file gives you some good examples.
+        break;
+
+      case "+":
+        this.writeBytecode(second);
+        rest.forEach((x) => {
+          this.writeBytecode(x);
+          this.writeOp('ADD');
+        });
+        break;
+
+      case "*":
+        //
+        // ***YOUR CODE HERE***
+        //
+        // Using the '+' case as a template, add support
+        // for '*'.  Note that the 'MUL' opcode only works
+        // with two arguments, whereas '*' allows an arbitrary
+        // number of arguments.
+        break;
+
+      case "-":
+        //
+        // ***YOUR CODE HERE***
+        //
+        // Add support for '-'.  The approach here will be
+        // Similar to the solution for '+' and '*'.  However,
+        // one key difference is that the order of the arguments
+        // matters.  You will need to use 'SWAP1' to get the
+        // arguments ordered correctly before invoking 'SUB'.
+        break;
+
+      default:
+        throw new Error(`Unexpected head: '${first.value}'`);
+    }
+  }
+
+  /**
+   * Builds up a mapping of opcode mnemonics to the corresponding
+   * hexadecimal values.
+   */
+  buildMnemonicLookup() {
+    this.lookupTable = {};
+    Object.keys(opcodes).forEach((opcode) => {
+      let inst = opcodes[opcode];
+      this.lookupTable[inst.mnemonic] = opcode;
+    });
+  }
+
+  /**
+   * This method takes a scheme file, tokenizes and parses it,
+   * and finally compiles it to binary bytecode.
+   * 
+   * @param {String} fileName - The name of the scheme file.
+   * 
+   * @returns {String} - The name of the bytecode file.
+   */
+  compileScheme(fileName) {
+    if (!fileName.toLowerCase().endsWith('.scm')) {
+      throw new Error(`${fileName} does not end with a .scm extension.`);
+    }
+
+    fs.readFileSync(fileName);
+    let contents = fs.readFileSync(fileName, 'utf8');
+
+    let tokens = this.tokenize(contents);
+    let asts = this.parse(tokens);
+
+    // The bytecode size cannot be larger than MAX_BUFF_SIZE.
+    this.bytecode = Buffer.alloc(MAX_BUFF_SIZE);
+    // The offset tracks the current position in the bytecode buffer.
+    this.offset = 0;
+
+    asts.forEach((ast) => {
+      this.writeBytecode(ast);
+    });
+
+    // The output file will have the same name as the input file,
+    // except that '.scm' will be replaced with '.byco'.
+    let outputFile = fileName.replace(/.scm\b/i, ".byco");
+    fs.writeFileSync(outputFile, this.bytecode.slice(0, this.offset, 'hex'));
+
+    return outputFile;
+  }
+}
+
+// Handling command line arguments.
+if (process.argv0 === 'node') {
+  process.argv.shift();
+}
+if (process.argv.length !== 2) {
+  console.log("compiler.js <scheme file>");
+  process.exit(1);
+}
+
+let cmplr = new Compiler();
+let scmFile = process.argv[1];
+
+console.log(`Compiling ${scmFile}...`);
+let bytecodeFile = cmplr.compileScheme(scmFile);
+
+console.log(`Bytecode written to ${bytecodeFile}.`);
--- a/lab17/cond.byco
+++ b/lab17/cond.byco
--- a/lab17/cond.scm
+++ b/lab17/cond.scm
@@ -0,0 +1,4 @@
+(println #t)
+(if #t (println 3) (println 4))
+(if #f (println 3) (println 4))
+
--- a/lab17/op-codes.js
+++ b/lab17/op-codes.js
@@ -0,0 +1,33 @@
+'use strict';
+
+let opcodes = {
+  0x01: { mnemonic: 'ADD', evaluate: (vm) => {
+    let v1 = vm.stack.pop();
+    let v2 = vm.stack.pop();
+    vm.stack.push(v1+v2);
+  }},
+  0x02: { mnemonic: 'MUL', evaluate: (vm) => {
+    //
+    // **YOUR CODE HERE**
+    //
+    // Pop the top two arguments off of the stack,
+    // and then push the result on to the stack.
+  }},
+  0x5B: { mnemonic: 'JUMPDEST', evaluate: (vm) => {
+    // Does nothing.  We could check to make sure that jumps
+    // always land at JUMPDEST opcodes, but it is not totally
+    // clear that it is worth the bother.
+  }},
+  0x60: { mnemonic: 'PUSH1', evaluate: (vm) => {
+    // The next byte is data, not another instruction
+    vm.pc++;
+    let v = vm.bytecode.readUInt8(vm.pc);
+    vm.stack.push(v);
+  }},
+  0x0c: { mnemonic: 'PRINT', evaluate: (vm) => {
+    // **NOTE**: This is not a real EVM opcode.
+    console.log(vm.stack.pop());
+  }},
+};
+
+exports.opcodes = opcodes;
--- a/lab17/print.byco
+++ b/lab17/print.byco
@@ -0,0 +1 @@
+`
--- a/lab17/print.scm
+++ b/lab17/print.scm
@@ -0,0 +1,3 @@
+; Simple test that prints 3.
+(println 3)
+
--- a/lab17/store.byco
+++ b/lab17/store.byco
--- a/lab17/store.scm
+++ b/lab17/store.scm
@@ -0,0 +1,5 @@
+(define x 3)
+(define y (+ x 1))
+(println (- x 2))
+(println (* x y))
+
--- a/lab17/test1.byco
+++ b/lab17/test1.byco
@@ -0,0 +1 @@
+```
--- a/lab17/test1.scm
+++ b/lab17/test1.scm
@@ -0,0 +1,2 @@
+(println (+ 1 2 3))
+
--- a/lab17/test2.byco
+++ b/lab17/test2.byco
@@ -0,0 +1,2 @@
+````
+``<02>`
--- a/lab17/test2.scm
+++ b/lab17/test2.scm
@@ -0,0 +1,4 @@
+(println (+ 2 3 4))
+(println (- 13 (* 2 4)))
+(println (- 10 4 3))
+
--- a/lab17/vm.js
+++ b/lab17/vm.js
@@ -0,0 +1,68 @@
+'use strict';
+
+const fs = require('fs');
+const opcodes = require('./op-codes.js').opcodes;
+
+/**
+ * The VM is responsible for executing the bytecode format.
+ */
+class VirtualMachine {
+
+  /**
+   * Initializes the virtual machine with the specified amount of
+   * gas.  The stack and memory are both initially empty.
+   * 
+   * @param {Number} gas - Amount of gas the VM begins with.
+   */
+  constructor() {
+    this.stack = [];
+    this.memory = [];
+  }
+
+  /** 
+   * Loads a bytecode file and returns an array of strings,
+   * which are the commands within the file.
+   */
+  static loadBytecode(bytecodeFile) {
+    let contents = fs.readFileSync(bytecodeFile);
+    return Buffer.from(contents);
+  }
+
+  /**
+   * Evaluates the specified file.
+   */
+  evaluate(bytecodeFile) {
+    this.bytecode = this.constructor.loadBytecode(bytecodeFile);
+
+    // Initializing the program counter to keep track of our
+    // place within the program.
+    this.pc = 0;
+
+    while (this.pc < this.bytecode.length) {
+      let opcode = this.bytecode.readUInt8(this.pc);
+      //console.log(`Evaluating ${opcode.toString(16)}`);
+      let operation = opcodes[opcode];
+      if (operation === undefined) {
+        throw new Error(`Unable to find instruction for ${opcode.toString(16)}`);
+      }
+      operation.evaluate(this);
+
+      this.pc++;
+    }
+  }
+}
+
+// Handling command line arguments.
+if (process.argv0 === 'node') {
+  process.argv.shift();
+}
+if (process.argv.length !== 2) {
+  console.log("vm.js <bytecode file>");
+  process.exit(1);
+}
+
+let bytecodeFile = process.argv[1];
+
+console.log(`Executing ${bytecodeFile}...`);
+let vm = new VirtualMachine();
+vm.evaluate(bytecodeFile);