Files
cs-252/lab17/compiler.js
2026-04-22 11:03:07 -07:00

295 lines
8.0 KiB
JavaScript

"use strict";
const fs = require('fs');
const opcodes = require('./op-codes.js').opcodes;
const MAX_BUFF_SIZE = 256;
// Constants for types
const LIST = 1;
const OP = 2;
const NUM = 3;
const BOOL = 4;
const VAR = 5;
/**
* The Compiler class is responsible for taking a .scm
* text file and converting it into bytecode format.
*/
class Compiler {
/**
* Constructor.
*/
constructor() {
this.buildMnemonicLookup();
this.varMap = {};
this.varOffset = 0;
}
/**
* Tokenizes a Scheme file, stripping out any comments.
*
* @param {String} contents - Scheme file, as text.
*
* @returns {[String]} - Array of tokens, represented as strings.
*/
tokenize(contents) {
let lines = contents.trim().split('\n')
let tokens = [];
lines.forEach((ln) => {
// Ensuring that parens are always surrounded
// by spaces to simplify parsing.
ln = ln.replaceAll("(", " ( ")
.replaceAll(")", " ) ");
// The comment character in Scheme is ';'
ln = ln.replace(/;.*/, "");
tokens.push(...ln.split(/\s+/).filter(s=>s.length!==0));
});
return tokens;
}
/**
* Parses a stream of tokens, returning an array of objects
* representing the top-level Scheme lists in the program.
* (Note that in Scheme, a list is treated as a function call.)
*
* @param {[String]} tokens - An array of tokens.
*
* @returns {[Object]} - The AST, as a JS object literal.
*/
parse(tokens) {
// The top level AST does not have a type.
let ast = { children: []};
for (let i=0; i<tokens.length; i++) {
let tok = tokens[i];
if (tok === "(") {
let newAst = { parent: ast, type: LIST, children: [] };
ast.children.push(newAst);
ast = newAst;
} else if (tok === ")") {
ast = ast.parent;
} else if (tok.match(/^\d+$/)) {
ast.children.push({ type: NUM, value: parseInt(tok) });
} else if (tok === "#t") {
ast.children.push({ type: BOOL, value: true });
} else if (tok === "#f") {
ast.children.push({ type: BOOL, value: false });
} else if (tok.match(/^\w+$/)) {
ast.children.push({ type: VAR, value: tok });
} else {
ast.children.push({ type: OP, value: tok})
}
}
return ast.children;
}
/**
* Prints out an AST, filtering out circular references.
*
* @param {Object} ast - The AST to print.
*/
printAST(ast) {
console.log(`AST is ${JSON.stringify(ast, (key, value) => {
if (key === 'parent') return value.id;
else return value;
})}`);
}
/**
* Writes a byte to the next position in the bytecode buffer,
* updating the offset to the position for the new write.
*
* @param {Number} byte - A valid byte.
*/
writeByte(byte) {
this.offset = this.bytecode.writeUInt8(byte, this.offset);
}
/**
* Looks up the opcode by its mnemonic and writes it to
* the bytecode buffer.
*
* @param {String} mnemonic - The mnemonic for the opcode.
*/
writeOp(mnemonic) {
let opcode = this.lookupTable[mnemonic];
if (opcode === undefined) {
throw new Error(`The mnemonic ${mnemonic} is not defined.`);
}
this.writeByte(this.lookupTable[mnemonic]);
}
/**
* Converts AST into binary bytecode.
*
* @param {Object} ast - abstract syntax tree of program.
*/
writeBytecode(ast) {
if (ast.type === NUM) {
// Numbers are just pushed on to the stack.
this.writeOp('PUSH1');
this.writeByte(ast.value);
return;
} else if (ast.type === BOOL) {
//
// ***YOUR CODE HERE***
//
// Booleans will be stored as either 1 for true, or as a 0 for false.
return;
} else if (ast.type === VAR) {
//
// ***YOUR CODE HERE***
//
// We look up the offset for a variable and push the offset
// value on to the stack. The 'MLOAD' operation will
// retrieve the value stored at that position in the memory.
return;
}
// If we made it hear, we have a list.
// The first argument is the name of the 'function'
// that we will be invoking.
let first = ast.children[0];
// Almost all functions need some special handling for the
// first argument. Some functions will need the additional
// arguments stored in 'rest'.
let second = ast.children[1];
let rest = ast.children.slice(2);
switch (first.value) {
case "println":
this.writeBytecode(second);
this.writeOp('PRINT');
break;
case "define":
//
// ***YOUR CODE HERE***
//
// The define function lets us store variables.
//
// The variable name is stored in 'second.value'.
// Update the 'this.varMap' array to store the current
// value of 'this.varOffset'.
//
// The VM will need to push the value on to the stack,
// push 'this.varOffset' on to the stack, and then
// invoke 'MSTORE'.
//
// Increment this.varOffset so that it points to the next
// position in memory.
break;
case "if":
//
// ***YOUR CODE HERE***
//
// EXTRA CREDIT!
// Add support for if expressions.
// The cond.scm file gives you some good examples.
break;
case "+":
this.writeBytecode(second);
rest.forEach((x) => {
this.writeBytecode(x);
this.writeOp('ADD');
});
break;
case "*":
//
// ***YOUR CODE HERE***
//
// Using the '+' case as a template, add support
// for '*'. Note that the 'MUL' opcode only works
// with two arguments, whereas '*' allows an arbitrary
// number of arguments.
break;
case "-":
//
// ***YOUR CODE HERE***
//
// Add support for '-'. The approach here will be
// Similar to the solution for '+' and '*'. However,
// one key difference is that the order of the arguments
// matters. You will need to use 'SWAP1' to get the
// arguments ordered correctly before invoking 'SUB'.
break;
default:
throw new Error(`Unexpected head: '${first.value}'`);
}
}
/**
* Builds up a mapping of opcode mnemonics to the corresponding
* hexadecimal values.
*/
buildMnemonicLookup() {
this.lookupTable = {};
Object.keys(opcodes).forEach((opcode) => {
let inst = opcodes[opcode];
this.lookupTable[inst.mnemonic] = opcode;
});
}
/**
* This method takes a scheme file, tokenizes and parses it,
* and finally compiles it to binary bytecode.
*
* @param {String} fileName - The name of the scheme file.
*
* @returns {String} - The name of the bytecode file.
*/
compileScheme(fileName) {
if (!fileName.toLowerCase().endsWith('.scm')) {
throw new Error(`${fileName} does not end with a .scm extension.`);
}
fs.readFileSync(fileName);
let contents = fs.readFileSync(fileName, 'utf8');
let tokens = this.tokenize(contents);
let asts = this.parse(tokens);
// The bytecode size cannot be larger than MAX_BUFF_SIZE.
this.bytecode = Buffer.alloc(MAX_BUFF_SIZE);
// The offset tracks the current position in the bytecode buffer.
this.offset = 0;
asts.forEach((ast) => {
this.writeBytecode(ast);
});
// The output file will have the same name as the input file,
// except that '.scm' will be replaced with '.byco'.
let outputFile = fileName.replace(/.scm\b/i, ".byco");
fs.writeFileSync(outputFile, this.bytecode.slice(0, this.offset, 'hex'));
return outputFile;
}
}
// Handling command line arguments.
if (process.argv0 === 'node') {
process.argv.shift();
}
if (process.argv.length !== 2) {
console.log("compiler.js <scheme file>");
process.exit(1);
}
let cmplr = new Compiler();
let scmFile = process.argv[1];
console.log(`Compiling ${scmFile}...`);
let bytecodeFile = cmplr.compileScheme(scmFile);
console.log(`Bytecode written to ${bytecodeFile}.`);