"use strict"; const fs = require('fs'); const opcodes = require('./op-codes.js').opcodes; const MAX_BUFF_SIZE = 256; // Constants for types const LIST = 1; const OP = 2; const NUM = 3; const BOOL = 4; const VAR = 5; /** * The Compiler class is responsible for taking a .scm * text file and converting it into bytecode format. */ class Compiler { /** * Constructor. */ constructor() { this.buildMnemonicLookup(); this.varMap = {}; this.varOffset = 0; } /** * Tokenizes a Scheme file, stripping out any comments. * * @param {String} contents - Scheme file, as text. * * @returns {[String]} - Array of tokens, represented as strings. */ tokenize(contents) { let lines = contents.trim().split('\n') let tokens = []; lines.forEach((ln) => { // Ensuring that parens are always surrounded // by spaces to simplify parsing. ln = ln.replaceAll("(", " ( ") .replaceAll(")", " ) "); // The comment character in Scheme is ';' ln = ln.replace(/;.*/, ""); tokens.push(...ln.split(/\s+/).filter(s=>s.length!==0)); }); return tokens; } /** * Parses a stream of tokens, returning an array of objects * representing the top-level Scheme lists in the program. * (Note that in Scheme, a list is treated as a function call.) * * @param {[String]} tokens - An array of tokens. * * @returns {[Object]} - The AST, as a JS object literal. */ parse(tokens) { // The top level AST does not have a type. let ast = { children: []}; for (let i=0; i { if (key === 'parent') return value.id; else return value; })}`); } /** * Writes a byte to the next position in the bytecode buffer, * updating the offset to the position for the new write. * * @param {Number} byte - A valid byte. */ writeByte(byte) { this.offset = this.bytecode.writeUInt8(byte, this.offset); } /** * Looks up the opcode by its mnemonic and writes it to * the bytecode buffer. * * @param {String} mnemonic - The mnemonic for the opcode. */ writeOp(mnemonic) { let opcode = this.lookupTable[mnemonic]; if (opcode === undefined) { throw new Error(`The mnemonic ${mnemonic} is not defined.`); } this.writeByte(this.lookupTable[mnemonic]); } /** * Converts AST into binary bytecode. * * @param {Object} ast - abstract syntax tree of program. */ writeBytecode(ast) { if (ast.type === NUM) { // Numbers are just pushed on to the stack. this.writeOp('PUSH1'); this.writeByte(ast.value); return; } else if (ast.type === BOOL) { // // ***YOUR CODE HERE*** // // Booleans will be stored as either 1 for true, or as a 0 for false. return; } else if (ast.type === VAR) { // We look up the offset for a variable and push the offset // value on to the stack. The 'MLOAD' operation will // retrieve the value stored at that position in the memory. this.writeOp('PUSH1'); this.writeByte(this.varMap[ast.value]); this.writeOp('MLOAD'); return; } // If we made it hear, we have a list. // The first argument is the name of the 'function' // that we will be invoking. let first = ast.children[0]; // Almost all functions need some special handling for the // first argument. Some functions will need the additional // arguments stored in 'rest'. let second = ast.children[1]; let rest = ast.children.slice(2); switch (first.value) { case "println": this.writeBytecode(second); this.writeOp('PRINT'); break; case "define": // The define function lets us store variables. // // The variable name is stored in 'second.value'. // Update the 'this.varMap' array to store the current // value of 'this.varOffset'. // // The VM will need to push the value on to the stack, // push 'this.varOffset' on to the stack, and then // invoke 'MSTORE'. // // Increment this.varOffset so that it points to the next // position in memory. this.varMap[second.value] = this.varOffset; rest.forEach((x) => { this.writeBytecode(x); }); this.writeOp('PUSH1'); this.writeByte(this.varOffset); this.writeOp('MSTORE'); this.varOffset++; break; case "if": // // ***YOUR CODE HERE*** // // EXTRA CREDIT! // Add support for if expressions. // The cond.scm file gives you some good examples. break; case "+": this.writeBytecode(second); rest.forEach((x) => { this.writeBytecode(x); this.writeOp('ADD'); }); break; case "*": // Using the '+' case as a template, add support // for '*'. Note that the 'MUL' opcode only works // with two arguments, whereas '*' allows an arbitrary // number of arguments. this.writeBytecode(second); rest.forEach((x) => { this.writeBytecode(x); this.writeOp('MUL'); }); break; case "-": // Add support for '-'. The approach here will be // Similar to the solution for '+' and '*'. However, // one key difference is that the order of the arguments // matters. You will need to use 'SWAP1' to get the // arguments ordered correctly before invoking 'SUB'. this.writeBytecode(second); rest.forEach((x) => { this.writeBytecode(x); this.writeOp('SWAP1'); this.writeOp('SUB'); }); break; default: throw new Error(`Unexpected head: '${first.value}'`); } } /** * Builds up a mapping of opcode mnemonics to the corresponding * hexadecimal values. */ buildMnemonicLookup() { this.lookupTable = {}; Object.keys(opcodes).forEach((opcode) => { let inst = opcodes[opcode]; this.lookupTable[inst.mnemonic] = opcode; }); } /** * This method takes a scheme file, tokenizes and parses it, * and finally compiles it to binary bytecode. * * @param {String} fileName - The name of the scheme file. * * @returns {String} - The name of the bytecode file. */ compileScheme(fileName) { if (!fileName.toLowerCase().endsWith('.scm')) { throw new Error(`${fileName} does not end with a .scm extension.`); } fs.readFileSync(fileName); let contents = fs.readFileSync(fileName, 'utf8'); let tokens = this.tokenize(contents); let asts = this.parse(tokens); // The bytecode size cannot be larger than MAX_BUFF_SIZE. this.bytecode = Buffer.alloc(MAX_BUFF_SIZE); // The offset tracks the current position in the bytecode buffer. this.offset = 0; asts.forEach((ast) => { this.writeBytecode(ast); }); // The output file will have the same name as the input file, // except that '.scm' will be replaced with '.byco'. let outputFile = fileName.replace(/.scm\b/i, ".byco"); fs.writeFileSync(outputFile, this.bytecode.slice(0, this.offset, 'hex')); return outputFile; } } // Handling command line arguments. if (process.argv0 === 'node') { process.argv.shift(); } if (process.argv.length !== 2) { console.log("compiler.js "); process.exit(1); } let cmplr = new Compiler(); let scmFile = process.argv[1]; console.log(`Compiling ${scmFile}...`); let bytecodeFile = cmplr.compileScheme(scmFile); console.log(`Bytecode written to ${bytecodeFile}.`);