aboutsummaryrefslogtreecommitdiffgithub
diff options
context:
space:
mode:
authorAustin Adams <git@austinjadams.com>2018-08-10 12:24:09 -0400
committerAustin Adams <git@austinjadams.com>2018-08-10 12:24:09 -0400
commit300541a981fcbfe4e226432a891533575cc878a2 (patch)
tree1f7e0580b24fe6aaaf30d545f0ff5712df5db40b
parent027ce8bbc58469c39bc0b3c1a3d8c6ae9e95921a (diff)
downloadnovice-300541a981fcbfe4e226432a891533575cc878a2.tar.gz
novice-300541a981fcbfe4e226432a891533575cc878a2.tar.xz
Add LR(1) table generator
-rw-r--r--novice/assembler/assembler.ts41
-rw-r--r--novice/assembler/dfa/comment.test.ts7
-rw-r--r--novice/assembler/dfa/comment.ts6
-rw-r--r--novice/assembler/dfa/dfa.ts25
-rw-r--r--novice/assembler/dfa/helpers.test.ts2
-rw-r--r--novice/assembler/dfa/index.ts9
-rw-r--r--novice/assembler/dfa/integer.test.ts8
-rw-r--r--novice/assembler/dfa/integer.ts24
-rw-r--r--novice/assembler/dfa/label.test.ts38
-rw-r--r--novice/assembler/dfa/label.ts70
-rw-r--r--novice/assembler/dfa/pseudoop.test.ts5
-rw-r--r--novice/assembler/dfa/pseudoop.ts13
-rw-r--r--novice/assembler/dfa/string.test.ts237
-rw-r--r--novice/assembler/dfa/string.ts9
-rw-r--r--novice/assembler/dfa/symbol.test.ts15
-rw-r--r--novice/assembler/dfa/symbol.ts9
-rw-r--r--novice/assembler/dfa/whitespace.test.ts7
-rw-r--r--novice/assembler/dfa/whitespace.ts6
-rw-r--r--novice/assembler/dfa/word.test.ts5
-rw-r--r--novice/assembler/dfa/word.ts13
-rw-r--r--novice/assembler/lex.ts22
-rw-r--r--novice/assembler/lr1/grammar.ts37
-rw-r--r--novice/assembler/lr1/objset.ts54
-rw-r--r--novice/assembler/lr1/production.ts21
-rw-r--r--novice/assembler/lr1/tablegen.test.ts634
-rw-r--r--novice/assembler/lr1/tablegen.ts373
-rw-r--r--novice/assembler/scanner.test.ts61
-rw-r--r--novice/assembler/scanner.ts13
-rw-r--r--tslint.json3
29 files changed, 1442 insertions, 325 deletions
diff --git a/novice/assembler/assembler.ts b/novice/assembler/assembler.ts
index dca2316..f5792de 100644
--- a/novice/assembler/assembler.ts
+++ b/novice/assembler/assembler.ts
@@ -1,15 +1,41 @@
import { Buffer } from 'buffer';
import { Readable } from 'stream';
-import { Scanner } from './scanner';
+import { isDecimalDigit, isHexDigit, isWordChar } from './lex';
+import { Line, Scanner, Token } from './scanner';
+
+interface RegisterOperand {
+ kind: 'reg';
+ num: number;
+}
+
+interface IntegerOperand {
+ kind: 'int';
+ val: number;
+}
+
+interface LabelOperand {
+ kind: 'label';
+ label: string;
+}
+
+interface StringOperand {
+ kind: 'string';
+ contents: string;
+}
interface Instruction {
op: string;
- operands: (string|number)[];
+ operands: (RegisterOperand|IntegerOperand|LabelOperand)[];
+}
+
+interface PseudoOp {
+ op: string;
+ operand: StringOperand|IntegerOperand|undefined;
}
interface Section {
startAddr: number;
- instructions: Instruction[];
+ instructions: (Instruction|PseudoOp)[];
}
interface Assembly {
@@ -19,15 +45,20 @@ interface Assembly {
class Assembler {
private scanner: Scanner;
+ private assembly: Assembly;
+ private newSection: boolean;
public constructor(fp: Readable) {
this.scanner = new Scanner(fp);
+ this.assembly = {sections: [], labels: {}};
+ this.newSection = true;
}
public async parse(): Promise<Assembly> {
await this.scanner.scan();
- return {sections: [], labels: {}};
+ return this.assembly;
}
}
-export { Assembler, Assembly, Section, Instruction };
+export { Assembler, Assembly, Section, Instruction, RegisterOperand,
+ IntegerOperand, LabelOperand };
diff --git a/novice/assembler/dfa/comment.test.ts b/novice/assembler/dfa/comment.test.ts
index 13ae456..6664333 100644
--- a/novice/assembler/dfa/comment.test.ts
+++ b/novice/assembler/dfa/comment.test.ts
@@ -8,10 +8,6 @@ describe('comment DFA', () => {
dfa = new CommentDFA();
});
- it('is not a token', () => {
- expect(dfa.isToken()).toBe(false);
- });
-
it('rejects text without ; or #', () => {
const len = feedDFA(dfa, '', 'this is a comment');
expect(dfa.getAcceptingLength()).toBe(len);
@@ -28,17 +24,20 @@ describe('comment DFA', () => {
const len = feedDFA(dfa, '; hello');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toBe(null);
});
it('recognizes # comments', () => {
const len = feedDFA(dfa, '# end gaming today');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toBe(null);
});
it('recognizes empty comments', () => {
const len = feedDFA(dfa, ';');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toBe(null);
});
});
diff --git a/novice/assembler/dfa/comment.ts b/novice/assembler/dfa/comment.ts
index 3cf1ee5..809dcc4 100644
--- a/novice/assembler/dfa/comment.ts
+++ b/novice/assembler/dfa/comment.ts
@@ -1,4 +1,4 @@
-import DFA from './dfa';
+import { DFA, Kind } from './dfa';
enum State {
Start,
@@ -54,7 +54,7 @@ export default class CommentDFA extends DFA {
this.acceptingLength = 0;
}
- public isToken(): boolean {
- return false;
+ public getKind(): null {
+ return null;
}
}
diff --git a/novice/assembler/dfa/dfa.ts b/novice/assembler/dfa/dfa.ts
index 1d7f0dd..e366a23 100644
--- a/novice/assembler/dfa/dfa.ts
+++ b/novice/assembler/dfa/dfa.ts
@@ -1,10 +1,25 @@
-export default abstract class DFA {
+const kindsObj = {
+ 'int-decimal' : '',
+ 'int-hex' : '',
+ 'reg' : '',
+ 'pseudoop' : '',
+ 'string' : '',
+ 'char' : '',
+ 'word' : '',
+ ':' : '',
+ '(' : '',
+ ')' : '',
+ ',' : '',
+};
+type Kind = keyof typeof kindsObj;
+const kinds = new Set(Object.keys(kindsObj) as Kind[]);
+
+abstract class DFA {
public abstract feed(c: string): void;
public abstract isAlive(): boolean;
public abstract getAcceptingLength(): number;
public abstract reset(): void;
-
- public isToken(): boolean {
- return true;
- }
+ public abstract getKind(): Kind | null;
}
+
+export { DFA, Kind, kinds };
diff --git a/novice/assembler/dfa/helpers.test.ts b/novice/assembler/dfa/helpers.test.ts
index 6878d1b..7157d2e 100644
--- a/novice/assembler/dfa/helpers.test.ts
+++ b/novice/assembler/dfa/helpers.test.ts
@@ -1,4 +1,4 @@
-import DFA from './dfa';
+import { DFA } from './dfa';
function feedDFA(dfa: DFA, str: string, follow?: string): number {
let food = follow? str + follow : str;
diff --git a/novice/assembler/dfa/index.ts b/novice/assembler/dfa/index.ts
index 7a2a552..4a39281 100644
--- a/novice/assembler/dfa/index.ts
+++ b/novice/assembler/dfa/index.ts
@@ -1,13 +1,12 @@
import CommentDFA from './comment';
-import DFA from './dfa';
+import { DFA, Kind, kinds } from './dfa';
import IntegerDFA from './integer';
-import LabelDFA from './label';
import PseudoOpDFA from './pseudoop';
import StringDFA from './string';
import SymbolDFA from './symbol';
import WhitespaceDFA from './whitespace';
import WordDFA from './word';
-const dfas = [ WhitespaceDFA, LabelDFA, WordDFA, PseudoOpDFA, IntegerDFA,
- SymbolDFA, StringDFA, CommentDFA ];
-export { dfas, DFA };
+const dfas = [ WhitespaceDFA, WordDFA, PseudoOpDFA, IntegerDFA, SymbolDFA,
+ StringDFA, CommentDFA ];
+export { dfas, DFA, Kind, kinds };
diff --git a/novice/assembler/dfa/integer.test.ts b/novice/assembler/dfa/integer.test.ts
index 5816585..2d9d0f8 100644
--- a/novice/assembler/dfa/integer.test.ts
+++ b/novice/assembler/dfa/integer.test.ts
@@ -8,10 +8,6 @@ describe('integer DFA', () => {
dfa = new IntegerDFA();
});
- it('is a token', () => {
- expect(dfa.isToken()).toBe(true);
- });
-
it('rejects raw hex digits', () => {
const len = feedDFA(dfa, '', 'abc');
expect(dfa.getAcceptingLength()).toBe(len);
@@ -34,23 +30,27 @@ describe('integer DFA', () => {
const len = feedDFA(dfa, 'xbeef');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toEqual('int-hex');
});
it('recognizes capitalized hex literals', () => {
const len = feedDFA(dfa, 'XBEeeeF');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toEqual('int-hex');
});
it('recognizes decimal numbers', () => {
const len = feedDFA(dfa, '69');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toEqual('int-decimal');
});
it('recognizes registers', () => {
const len = feedDFA(dfa, 'r0');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toEqual('reg');
});
});
diff --git a/novice/assembler/dfa/integer.ts b/novice/assembler/dfa/integer.ts
index 9487d56..927ba64 100644
--- a/novice/assembler/dfa/integer.ts
+++ b/novice/assembler/dfa/integer.ts
@@ -1,4 +1,5 @@
-import DFA from './dfa';
+import { isDecimalDigit, isHexDigit } from '../lex';
+import { DFA, Kind } from './dfa';
enum State {
Start,
@@ -11,6 +12,7 @@ export default class IntegerDFA extends DFA {
private alive!: boolean;
private length!: number;
private acceptingLength!: number;
+ private kind!: Kind;
public constructor() {
super();
@@ -24,35 +26,31 @@ export default class IntegerDFA extends DFA {
this.length++;
- const charCode = c.charCodeAt(0);
- const isDecimalDigit =
- charCode >= '0'.charCodeAt(0) && charCode <= '9'.charCodeAt(0);
- const isHexDigit = isDecimalDigit ||
- charCode >= 'a'.charCodeAt(0) && charCode <= 'f'.charCodeAt(0) ||
- charCode >= 'A'.charCodeAt(0) && charCode <= 'F'.charCodeAt(0);
-
switch (this.state) {
case State.Start:
if (c.toLowerCase() === 'x') {
this.state = State.Hexadecimal;
- } else if (isDecimalDigit) {
+ this.kind = 'int-hex';
+ } else if (isDecimalDigit(c)) {
this.state = State.Decimal;
this.acceptingLength = this.length;
+ this.kind = 'int-decimal';
} else if (c.toLowerCase() === 'r') {
this.state = State.Decimal;
+ this.kind = 'reg';
} else {
this.alive = false;
}
break;
case State.Hexadecimal:
- if (isHexDigit) {
+ if (isHexDigit(c)) {
this.acceptingLength = this.length;
} else {
this.alive = false;
}
break;
case State.Decimal:
- if (isDecimalDigit) {
+ if (isDecimalDigit(c)) {
this.acceptingLength = this.length;
} else {
this.alive = false;
@@ -75,4 +73,8 @@ export default class IntegerDFA extends DFA {
this.length = 0;
this.acceptingLength = 0;
}
+
+ public getKind(): Kind {
+ return this.kind;
+ }
}
diff --git a/novice/assembler/dfa/label.test.ts b/novice/assembler/dfa/label.test.ts
deleted file mode 100644
index e0c6b8b..0000000
--- a/novice/assembler/dfa/label.test.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-import LabelDFA from './label';
-import { feedDFA } from './helpers.test';
-
-describe('label DFA', () => {
- let dfa: LabelDFA;
-
- beforeEach(() => {
- dfa = new LabelDFA();
- });
-
- it('is a token', () => {
- expect(dfa.isToken()).toBe(true);
- });
-
- it('rejects nonsense', () => {
- const len = feedDFA(dfa, '', '^ woweee');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('rejects ending in nonsense', () => {
- const len = feedDFA(dfa, '', 'woweee^: halt');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('rejects word without :', () => {
- const len = feedDFA(dfa, '', 'margaret');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(true);
- });
-
- it('recognizes word with :', () => {
- const len = feedDFA(dfa, 'bob:', ' add r0, r0, r1');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-});
diff --git a/novice/assembler/dfa/label.ts b/novice/assembler/dfa/label.ts
deleted file mode 100644
index 390bbc0..0000000
--- a/novice/assembler/dfa/label.ts
+++ /dev/null
@@ -1,70 +0,0 @@
-import DFA from './dfa';
-
-enum State {
- Start,
- GotText,
- GotColon,
-}
-
-export default class LabelDFA extends DFA {
- private state!: State;
- private alive!: boolean;
- private length!: number;
- private acceptingLength!: number;
-
- public constructor() {
- super();
- this.reset();
- }
-
- public feed(c: string): void {
- if (!this.alive) {
- return;
- }
-
- this.length++;
-
- const charCode = c.charCodeAt(0);
- const isText = c === '_' || c === '-' ||
- charCode >= 'a'.charCodeAt(0) && charCode <= 'z'.charCodeAt(0) ||
- charCode >= 'A'.charCodeAt(0) && charCode <= 'Z'.charCodeAt(0);
-
- switch (this.state) {
- case State.Start:
- if (isText) {
- this.state = State.GotText;
- } else {
- this.alive = false;
- }
- break;
- case State.GotText:
- if (isText) {
- // cool
- } else if (c === ':') {
- this.acceptingLength = this.length;
- this.state = State.GotColon;
- } else {
- this.alive = false;
- }
- break;
- case State.GotColon:
- this.alive = false;
- break;
- }
- }
-
- public isAlive(): boolean {
- return this.alive;
- }
-
- public getAcceptingLength(): number {
- return this.acceptingLength;
- }
-
- public reset(): void {
- this.state = State.Start;
- this.alive = true;
- this.length = 0;
- this.acceptingLength = 0;
- }
-}
diff --git a/novice/assembler/dfa/pseudoop.test.ts b/novice/assembler/dfa/pseudoop.test.ts
index 3049f00..0566ddf 100644
--- a/novice/assembler/dfa/pseudoop.test.ts
+++ b/novice/assembler/dfa/pseudoop.test.ts
@@ -8,10 +8,6 @@ describe('pseudo-op DFA', () => {
dfa = new PseudoOpDFA();
});
- it('is a token', () => {
- expect(dfa.isToken()).toBe(true);
- });
-
it('rejects nonsense', () => {
const len = feedDFA(dfa, '', '^.fill');
expect(dfa.getAcceptingLength()).toBe(len);
@@ -28,5 +24,6 @@ describe('pseudo-op DFA', () => {
const len = feedDFA(dfa, '.fill', ' xBEEF');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('pseudoop');
});
});
diff --git a/novice/assembler/dfa/pseudoop.ts b/novice/assembler/dfa/pseudoop.ts
index 25bdf56..dd175c8 100644
--- a/novice/assembler/dfa/pseudoop.ts
+++ b/novice/assembler/dfa/pseudoop.ts
@@ -1,4 +1,5 @@
-import DFA from './dfa';
+import { isWordChar } from '../lex';
+import { DFA, Kind } from './dfa';
enum State {
Start,
@@ -23,11 +24,7 @@ export default class PseudoOpDFA extends DFA {
this.length++;
- const charCode = c.charCodeAt(0);
- const isText = c === '_' || c === '-' ||
- charCode >= 'a'.charCodeAt(0) && charCode <= 'z'.charCodeAt(0) ||
- charCode >= 'A'.charCodeAt(0) && charCode <= 'Z'.charCodeAt(0);
-
+ const isText = isWordChar(c);
switch (this.state) {
case State.Start:
if (c === '.') {
@@ -60,4 +57,8 @@ export default class PseudoOpDFA extends DFA {
this.length = 0;
this.acceptingLength = 0;
}
+
+ public getKind(): Kind {
+ return 'pseudoop';
+ }
}
diff --git a/novice/assembler/dfa/string.test.ts b/novice/assembler/dfa/string.test.ts
index 157c33a..98fe87b 100644
--- a/novice/assembler/dfa/string.test.ts
+++ b/novice/assembler/dfa/string.test.ts
@@ -8,121 +8,138 @@ describe('string DFA', () => {
dfa = new StringDFA();
});
- it('is a token', () => {
- expect(dfa.isToken()).toBe(true);
- });
-
it('rejects raw text', () => {
const len = feedDFA(dfa, '', 'gwen"');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(false);
});
- it('rejects unclosed empty string', () => {
- const len = feedDFA(dfa, '', '"');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(true);
- });
-
- it('rejects unclosed string', () => {
- const len = feedDFA(dfa, '', '"kitty');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(true);
- });
-
- it('rejects unclosed string ending with backslash', () => {
- const len = feedDFA(dfa, '', '"kitty\\');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(true);
- });
-
- it('rejects unclosed empty char literal', () => {
- const len = feedDFA(dfa, '', "'");
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(true);
- });
-
- it('rejects unclosed char literal', () => {
- const len = feedDFA(dfa, '', "'a");
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(true);
- });
-
- it('rejects empty char literal', () => {
- const len = feedDFA(dfa, '', "''");
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('rejects char literal with multiple characters', () => {
- const len = feedDFA(dfa, '', "'ab");
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('rejects unclosed char literal ending with backslash', () => {
- const len = feedDFA(dfa, '', "'\\");
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(true);
- });
-
- it('recognizes empty string', () => {
- const len = feedDFA(dfa, '""', ' ');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('dies at closing quote', () => {
- const len = feedDFA(dfa, '""');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('recognizes nonempty string', () => {
- const len = feedDFA(dfa, '"trevor"', ' ');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('dies at closing quote of nonempty string', () => {
- const len = feedDFA(dfa, '"trevor"');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('recognizes escapes', () => {
- const len = feedDFA(dfa, '"\\n"');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('recognizes intermixed escapes', () => {
- const len = feedDFA(dfa, '"hello\\ni am bob"');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('recognizes escaped double quote', () => {
- const len = feedDFA(dfa, '"hi\\"mom"', ' ');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('recognizes lonely escaped double quote', () => {
- const len = feedDFA(dfa, '"\\""');
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('recognizes escapes in character literals', () => {
- const len = feedDFA(dfa, "'\\n'");
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
- });
-
- it('recognizes escaped single quote in character literals', () => {
- const len = feedDFA(dfa, "'\\''");
- expect(dfa.getAcceptingLength()).toBe(len);
- expect(dfa.isAlive()).toBe(false);
+ describe('strings', () => {
+ it('rejects unclosed empty string', () => {
+ const len = feedDFA(dfa, '', '"');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(true);
+ });
+
+ it('rejects unclosed string', () => {
+ const len = feedDFA(dfa, '', '"kitty');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(true);
+ });
+
+ it('rejects unclosed string ending with backslash', () => {
+ const len = feedDFA(dfa, '', '"kitty\\');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(true);
+ });
+
+ it('recognizes empty string', () => {
+ const len = feedDFA(dfa, '""', ' ');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('string');
+ });
+
+ it('dies at closing quote', () => {
+ const len = feedDFA(dfa, '""');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('string');
+ });
+
+ it('recognizes nonempty string', () => {
+ const len = feedDFA(dfa, '"trevor"', ' ');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('string');
+ });
+
+ it('dies at closing quote of nonempty string', () => {
+ const len = feedDFA(dfa, '"trevor"');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('string');
+ });
+
+ it('recognizes escapes', () => {
+ const len = feedDFA(dfa, '"\\n"');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('string');
+ });
+
+ it('recognizes intermixed escapes', () => {
+ const len = feedDFA(dfa, '"hello\\ni am bob"');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('string');
+ });
+
+ it('recognizes escaped double quote', () => {
+ const len = feedDFA(dfa, '"hi\\"mom"', ' ');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('string');
+ });
+
+ it('recognizes lonely escaped double quote', () => {
+ const len = feedDFA(dfa, '"\\""');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('string');
+ });
+ });
+
+ describe('chars', () => {
+ it('rejects unclosed empty char literal', () => {
+ const len = feedDFA(dfa, '', "'");
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(true);
+ });
+
+ it('rejects unclosed char literal', () => {
+ const len = feedDFA(dfa, '', "'a");
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(true);
+ });
+
+ it('rejects empty char literal', () => {
+ const len = feedDFA(dfa, '', "''");
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ });
+
+ it('rejects char literal with multiple characters', () => {
+ const len = feedDFA(dfa, '', "'ab");
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ });
+
+ it('rejects unclosed char literal ending with backslash', () => {
+ const len = feedDFA(dfa, '', "'\\");
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(true);
+ });
+
+ it('recognizes character literals', () => {
+ const len = feedDFA(dfa, "'a'");
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('char');
+ });
+
+ it('recognizes escapes in character literals', () => {
+ const len = feedDFA(dfa, "'\\n'");
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('char');
+ });
+
+ it('recognizes escaped single quote in character literals', () => {
+ const len = feedDFA(dfa, "'\\''");
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('char');
+ });
});
});
diff --git a/novice/assembler/dfa/string.ts b/novice/assembler/dfa/string.ts
index 4b509c5..a10af18 100644
--- a/novice/assembler/dfa/string.ts
+++ b/novice/assembler/dfa/string.ts
@@ -1,4 +1,4 @@
-import DFA from './dfa';
+import { DFA, Kind } from './dfa';
enum State {
Start,
@@ -14,6 +14,7 @@ export default class StringDFA extends DFA {
private alive!: boolean;
private length!: number;
private acceptingLength!: number;
+ private kind!: Kind;
public constructor() {
super();
@@ -32,9 +33,11 @@ export default class StringDFA extends DFA {
switch (c) {
case '"':
this.state = State.String;
+ this.kind = 'string';
break;
case "'":
this.state = State.Character;
+ this.kind = 'char';
break;
default:
this.alive = false;
@@ -105,4 +108,8 @@ export default class StringDFA extends DFA {
this.length = 0;
this.acceptingLength = 0;
}
+
+ public getKind(): Kind {
+ return this.kind;
+ }
}
diff --git a/novice/assembler/dfa/symbol.test.ts b/novice/assembler/dfa/symbol.test.ts
index 3bf1811..96e651b 100644
--- a/novice/assembler/dfa/symbol.test.ts
+++ b/novice/assembler/dfa/symbol.test.ts
@@ -8,10 +8,6 @@ describe('symbol DFA', () => {
dfa = new SymbolDFA();
});
- it('is a token', () => {
- expect(dfa.isToken()).toBe(true);
- });
-
it('rejects unknown symbols', () => {
const len = feedDFA(dfa, '', '^ asdfasdf');
expect(dfa.getAcceptingLength()).toBe(len);
@@ -22,23 +18,34 @@ describe('symbol DFA', () => {
const len = feedDFA(dfa, ',', 'r0');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual(',');
});
it('dies after ,', () => {
const len = feedDFA(dfa, ',');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual(',');
});
it('recognizes (', () => {
const len = feedDFA(dfa, '(', 'r0');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('(');
});
it('recognizes )', () => {
const len = feedDFA(dfa, ')');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual(')');
+ });
+
+ it('recognizes :', () => {
+ const len = feedDFA(dfa, ':');
+ expect(dfa.getAcceptingLength()).toBe(len);
+ expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual(':');
});
});
diff --git a/novice/assembler/dfa/symbol.ts b/novice/assembler/dfa/symbol.ts
index bfcf49b..afc628a 100644
--- a/novice/assembler/dfa/symbol.ts
+++ b/novice/assembler/dfa/symbol.ts
@@ -1,8 +1,9 @@
-import DFA from './dfa';
+import { DFA, Kind } from './dfa';
export default class SymbolDFA extends DFA {
private alive!: boolean;
private acceptingLength!: number;
+ private kind!: Kind;
public constructor() {
super();
@@ -18,7 +19,9 @@ export default class SymbolDFA extends DFA {
case '(':
case ')':
case ',':
+ case ':':
this.acceptingLength = 1;
+ this.kind = c;
default:
this.alive = false;
}
@@ -36,4 +39,8 @@ export default class SymbolDFA extends DFA {
this.alive = true;
this.acceptingLength = 0;
}
+
+ public getKind(): Kind {
+ return this.kind;
+ }
}
diff --git a/novice/assembler/dfa/whitespace.test.ts b/novice/assembler/dfa/whitespace.test.ts
index 2f335a1..adf9254 100644
--- a/novice/assembler/dfa/whitespace.test.ts
+++ b/novice/assembler/dfa/whitespace.test.ts
@@ -8,10 +8,6 @@ describe('whitespace DFA', () => {
dfa = new WhitespaceDFA();
});
- it('is not a token', () => {
- expect(dfa.isToken()).toBe(false);
- });
-
it('rejects text', () => {
const len = feedDFA(dfa, '', 'bob');
expect(dfa.getAcceptingLength()).toBe(len);
@@ -22,17 +18,20 @@ describe('whitespace DFA', () => {
const len = feedDFA(dfa, ' ');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toBe(null);
});
it('recognizes tabs', () => {
const len = feedDFA(dfa, '\t');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(true);
+ expect(dfa.getKind()).toBe(null);
});
it('recognizes intermixed whitespace', () => {
const len = feedDFA(dfa, ' \t \t\t \t \t', 'halt');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toBe(null);
});
});
diff --git a/novice/assembler/dfa/whitespace.ts b/novice/assembler/dfa/whitespace.ts
index 6b62173..5662e46 100644
--- a/novice/assembler/dfa/whitespace.ts
+++ b/novice/assembler/dfa/whitespace.ts
@@ -1,4 +1,4 @@
-import DFA from './dfa';
+import { DFA, Kind } from './dfa';
export default class WhitespaceDFA extends DFA {
private alive!: boolean;
@@ -41,7 +41,7 @@ export default class WhitespaceDFA extends DFA {
this.acceptingLength = 0;
}
- public isToken(): boolean {
- return false;
+ public getKind(): null {
+ return null;
}
}
diff --git a/novice/assembler/dfa/word.test.ts b/novice/assembler/dfa/word.test.ts
index 41dd877..3658c61 100644
--- a/novice/assembler/dfa/word.test.ts
+++ b/novice/assembler/dfa/word.test.ts
@@ -8,10 +8,6 @@ describe('word DFA', () => {
dfa = new WordDFA();
});
- it('is a token', () => {
- expect(dfa.isToken()).toBe(true);
- });
-
it('rejects nonsense', () => {
const len = feedDFA(dfa, '', '^ woweee');
expect(dfa.getAcceptingLength()).toBe(len);
@@ -28,5 +24,6 @@ describe('word DFA', () => {
const len = feedDFA(dfa, 'add', ' r0, r0, r1');
expect(dfa.getAcceptingLength()).toBe(len);
expect(dfa.isAlive()).toBe(false);
+ expect(dfa.getKind()).toEqual('word');
});
});
diff --git a/novice/assembler/dfa/word.ts b/novice/assembler/dfa/word.ts
index 799b75a..8b3e04b 100644
--- a/novice/assembler/dfa/word.ts
+++ b/novice/assembler/dfa/word.ts
@@ -1,4 +1,5 @@
-import DFA from './dfa';
+import { isWordChar } from '../lex';
+import { DFA, Kind } from './dfa';
export default class WordDFA extends DFA {
private alive!: boolean;
@@ -17,11 +18,7 @@ export default class WordDFA extends DFA {
this.length++;
- const charCode = c.charCodeAt(0);
- const isText = c === '_' || c === '-' ||
- charCode >= 'a'.charCodeAt(0) && charCode <= 'z'.charCodeAt(0) ||
- charCode >= 'A'.charCodeAt(0) && charCode <= 'Z'.charCodeAt(0);
-
+ const isText = isWordChar(c);
if (isText) {
this.acceptingLength = this.length;
} else {
@@ -42,4 +39,8 @@ export default class WordDFA extends DFA {
this.length = 0;
this.acceptingLength = 0;
}
+
+ public getKind(): Kind {
+ return 'word';
+ }
}
diff --git a/novice/assembler/lex.ts b/novice/assembler/lex.ts
new file mode 100644
index 0000000..78a4415
--- /dev/null
+++ b/novice/assembler/lex.ts
@@ -0,0 +1,22 @@
+function isWordChar(c: string): boolean {
+ const charCode = c.charCodeAt(0);
+ return c === '_' || c === '-' ||
+ charCode >= 'a'.charCodeAt(0) && charCode <= 'z'.charCodeAt(0) ||
+ charCode >= 'A'.charCodeAt(0) && charCode <= 'Z'.charCodeAt(0);
+
+}
+
+function isDecimalDigit(c: string): boolean {
+ const charCode = c.charCodeAt(0);
+ return charCode >= '0'.charCodeAt(0) && charCode <= '9'.charCodeAt(0);
+
+}
+
+function isHexDigit(c: string): boolean {
+ const charCode = c.charCodeAt(0);
+ return isDecimalDigit(c) ||
+ charCode >= 'a'.charCodeAt(0) && charCode <= 'f'.charCodeAt(0) ||
+ charCode >= 'A'.charCodeAt(0) && charCode <= 'F'.charCodeAt(0);
+}
+
+export { isWordChar, isDecimalDigit, isHexDigit };
diff --git a/novice/assembler/lr1/grammar.ts b/novice/assembler/lr1/grammar.ts
new file mode 100644
index 0000000..e50e224
--- /dev/null
+++ b/novice/assembler/lr1/grammar.ts
@@ -0,0 +1,37 @@
+import { Kind as T, kinds as Ts } from '../scanner';
+import { Production } from './production';
+
+const NTsObj = {
+ 'line' : '',
+ 'label' : '',
+ 'instr-line' : '',
+ 'instr' : '',
+ 'operand' : '',
+ 'pseudoop-line' : '',
+ 'pseudoop-operand' : '',
+};
+type NT = keyof typeof NTsObj;
+const NTs = new Set(Object.keys(NTsObj) as NT[]);
+
+const grammar: Production<NT, T>[] = [
+ new Production<NT, T>('line', ['label']),
+ new Production<NT, T>('line', ['instr-line']),
+ new Production<NT, T>('line', ['pseudoop-line']),
+ new Production<NT, T>('label', ['word', ':']),
+ new Production<NT, T>('instr-line', ['label', 'instr']),
+ new Production<NT, T>('instr-line', ['instr']),
+ new Production<NT, T>('instr', ['word']),
+ new Production<NT, T>('instr', ['instr', ',', 'operand']),
+ new Production<NT, T>('instr', ['instr', '(', 'operand', ')']),
+ new Production<NT, T>('operand', ['word']),
+ new Production<NT, T>('operand', ['int-decimal']),
+ new Production<NT, T>('operand', ['int-hex']),
+ new Production<NT, T>('operand', ['reg']),
+ new Production<NT, T>('pseudoop-line', ['pseudoop']),
+ new Production<NT, T>('pseudoop-line', ['pseudoop', 'pseudoop-operand']),
+ new Production<NT, T>('pseudoop-operand', ['operand']),
+ new Production<NT, T>('pseudoop-operand', ['char']),
+ new Production<NT, T>('pseudoop-operand', ['string']),
+];
+
+export { Production, grammar, T, Ts, NT, NTs };
diff --git a/novice/assembler/lr1/objset.ts b/novice/assembler/lr1/objset.ts
new file mode 100644
index 0000000..3ffb4a2
--- /dev/null
+++ b/novice/assembler/lr1/objset.ts
@@ -0,0 +1,54 @@
+interface Hashable {
+ hash(): string;
+}
+
+class ObjSet<E extends Hashable> implements Hashable {
+ private map: Map<string, E>;
+
+ constructor(objs?: E[]) {
+ if (objs) {
+ this.map = new Map(
+ objs.map(obj => [obj.hash(), obj]) as [string, E][]);
+ } else {
+ this.map = new Map();
+ }
+ }
+
+ public add(obj: E): boolean {
+ const hash = obj.hash();
+
+ // Avoid clobbering existing values
+ if (this.map.has(hash)) {
+ return false;
+ } else {
+ this.map.set(hash, obj);
+ return true;
+ }
+ }
+
+ public has(obj: E): boolean {
+ return this.map.has(obj.hash());
+ }
+
+ public get(obj: E): E | undefined {
+ return this.map.get(obj.hash());
+ }
+
+ public forEach(callback: (obj: E) => void): void {
+ this.map.forEach((obj, hash, map) => callback(obj));
+ }
+
+ public size(): number {
+ return this.map.size;
+ }
+
+ public toArray(): E[] {
+ return Array.from(this.map.values());
+ }
+
+ public hash(): string {
+ return Array.from(this.map.keys()).sort().join('\n');
+ }
+}
+
+export { ObjSet, Hashable };
diff --git a/novice/assembler/lr1/production.ts b/novice/assembler/lr1/production.ts
new file mode 100644
index 0000000..434413e
--- /dev/null
+++ b/novice/assembler/lr1/production.ts
@@ -0,0 +1,21 @@
+import { Hashable } from './objset';
+
+class Production<NT, T> implements Hashable {
+ public lhs: NT;
+ public rhs: (T|NT)[];
+
+ public constructor(lhs: NT, rhs: (T|NT)[]) {
+ this.lhs = lhs;
+ this.rhs = rhs;
+ }
+
+ public toString(): string {
+ return `${this.lhs} -> ${this.rhs.join(' ')}`;
+ }
+
+ public hash(): string {
+ return this.toString();
+ }
+}
+
+export { Production };
diff --git a/novice/assembler/lr1/tablegen.test.ts b/novice/assembler/lr1/tablegen.test.ts
new file mode 100644
index 0000000..dd1564a
--- /dev/null
+++ b/novice/assembler/lr1/tablegen.test.ts
@@ -0,0 +1,634 @@
+import { ObjSet } from './objset';
+import { Production } from './production';
+import { TableGenerator, S, ParseItem, ParseState, ParseTransition, ParseAction } from './tablegen';
+
+describe('LR(1) table generator', () => {
+ describe('calcFirst()', () => {
+ it('a^nb^n grammar', () => {
+ type T = 'a' | 'b';
+ let Ts = new Set(<T[]>['a', 'b']);
+ type NT = 'goal';
+ let NTs = new Set(<NT[]>['goal']);
+ let productions: Production<NT, T>[] = [
+ new Production<NT, T>('goal', ['a', 'b']),
+ new Production<NT, T>('goal', ['a', 'goal', 'b']),
+ ];
+
+ let tablegen = new TableGenerator<NT, T>('goal', productions, NTs, Ts);
+ expect(tablegen.first).toEqual(new Map<NT|S|T, Set<S|T>>([
+ ['', new Set<S|T>([''])],
+ ['eof', new Set<S|T>(['eof'])],
+ ['goal', new Set<S|T>(['a'])],
+ ['a', new Set<S|T>(['a'])],
+ ['b', new Set<S|T>(['b'])],
+ ]));
+ });
+
+ it('compilers homework 2 grammar', () => {
+ const _NTs = {
+ 'imp' : '',
+ 'vars' : '',
+ 'nevars' : '',
+ 'nevars2' : '',
+ 'decl' : '',
+ 'assign' : '',
+ 'expr' : '',
+ 'expr2' : '',
+ 'op' : '',
+ 'type' : '',
+ };
+ type NT = keyof typeof _NTs;
+ const NTs = new Set(<NT[]> Object.keys(_NTs));
+
+ const _Ts = {
+ 'main' : '',
+ '(' : '',
+ ')' : '',
+ '{' : '',
+ '}' : '',
+ '=' : '',
+ '+' : '',
+ '*' : '',
+ ';' : '',
+ ',' : '',
+ 'int' : '',
+ 'float' : '',
+ 'var' : '',
+ };
+ type T = keyof typeof _Ts;
+ const Ts = new Set(<T[]> Object.keys(_Ts));
+
+ let productions: Production<NT, T>[] = [
+ new Production<NT, T>('imp', ['main', '(', 'vars', ')', '{', 'decl', 'assign', ')']),
+ new Production<NT, T>('vars', []),
+ new Production<NT, T>('vars', ['nevars']),
+ new Production<NT, T>('nevars', ['var', 'nevars2']),
+ new Production<NT, T>('nevars2', []),
+ new Production<NT, T>('nevars2', [',', 'nevars']),
+ new Production<NT, T>('decl', ['type', 'nevars', ';']),
+ new Production<NT, T>('assign', ['var', '=', 'expr', ';']),
+ new Production<NT, T>('expr', ['var', 'expr2']),
+ new Production<NT, T>('expr2', []),
+ new Production<NT, T>('expr2', ['op', 'expr']),
+ new Production<NT, T>('op', ['+']),
+ new Production<NT, T>('op', ['*']),
+ new Production<NT, T>('type', ['int']),
+ new Production<NT, T>('type', ['float']),
+ ];
+
+ let tablegen = new TableGenerator<NT, T>('imp', productions, NTs, Ts);
+ expect(tablegen.first).toEqual(new Map<NT|S|T, Set<S|T>>([
+ ['', new Set<S|T>([''])],
+ ['eof', new Set<S|T>(['eof'])],
+ ['main', new Set<S|T>(['main'])],
+ ['(', new Set<S|T>(['('])],
+ [')', new Set<S|T>([')'])],
+ ['{', new Set<S|T>(['{'])],
+ ['}', new Set<S|T>(['}'])],
+ ['=', new Set<S|T>(['='])],
+ ['+', new Set<S|T>(['+'])],
+ ['*', new Set<S|T>(['*'])],
+ [';', new Set<S|T>([';'])],
+ [',', new Set<S|T>([','])],
+ ['int', new Set<S|T>(['int'])],
+ ['float', new Set<S|T>(['float'])],
+ ['var', new Set<S|T>(['var'])],
+ ['imp', new Set<S|T>(['main'])],
+ ['vars', new Set<S|T>(['', 'var'])],
+ ['nevars', new Set<S|T>(['var'])],
+ ['nevars2', new Set<S|T>(['', ','])],
+ ['decl', new Set<S|T>(['int', 'float'])],
+ ['assign', new Set<S|T>(['var'])],
+ ['expr', new Set<S|T>(['var'])],
+ ['expr2', new Set<S|T>(['', '+', '*'])],
+ ['op', new Set<S|T>(['+', '*'])],
+ ['type', new Set<S|T>(['int', 'float'])],
+ ]));
+ });
+
+ it('deep epsilon grammar', () => {
+ const _NTs = {
+ 'A' : '',
+ 'A2' : '',
+ 'A3' : '',
+ 'A4' : '',
+ 'B' : '',
+ 'C' : '',
+ };
+ type NT = keyof typeof _NTs;
+ const NTs = new Set(<NT[]> Object.keys(_NTs));
+
+ const _Ts = {
+ 'a' : '',
+ 'b' : '',
+ 'c' : '',
+ 'd' : '',
+ 'e' : '',
+ };
+ type T = keyof typeof _Ts;
+ const Ts = new Set(<T[]> Object.keys(_Ts));
+
+ let productions: Production<NT, T>[] = [
+ new Production<NT, T>('A', ['A2', 'A3', 'A4']),
+ new Production<NT, T>('A2', []),
+ new Production<NT, T>('A2', ['b', 'B']),
+ new Production<NT, T>('A3', []),
+ new Production<NT, T>('A3', ['e', 'd']),
+ new Production<NT, T>('A4', []),
+ new Production<NT, T>('A4', ['B']),
+ new Production<NT, T>('B', ['c', 'a', 'B']),
+ new Production<NT, T>('C', ['A3', 'A2', 'B']),
+ ];
+
+ let tablegen = new TableGenerator<NT, T>('A', productions, NTs, Ts);
+ expect(tablegen.first).toEqual(new Map<NT|S|T, Set<S|T>>([
+ ['', new Set<S|T>([''])],
+ ['eof', new Set<S|T>(['eof'])],
+ ['a', new Set<S|T>(['a'])],
+ ['b', new Set<S|T>(['b'])],
+ ['c', new Set<S|T>(['c'])],
+ ['d', new Set<S|T>(['d'])],
+ ['e', new Set<S|T>(['e'])],
+ ['A', new Set<S|T>(['', 'b', 'e', 'c'])],
+ ['A2', new Set<S|T>(['', 'b'])],
+ ['A3', new Set<S|T>(['', 'e'])],
+ ['A4', new Set<S|T>(['', 'c'])],
+ ['B', new Set<S|T>(['c'])],
+ ['C', new Set<S|T>(['e', 'b', 'c'])],
+ ]));
+ });
+ });
+
+ describe('closure()', () => {
+ it('closure from compilers worksheet 7', () => {
+ const _NTs = {
+ 'goal' : '',
+ 'expr' : '',
+ 'term' : '',
+ 'factor' : '',
+ };
+ type NT = keyof typeof _NTs;
+ const NTs = new Set(<NT[]> Object.keys(_NTs));
+
+ const _Ts = {
+ 'ident' : '',
+ '*' : '',
+ '-' : '',
+ };
+ type T = keyof typeof _Ts;
+ const Ts = new Set(<T[]> Object.keys(_Ts));
+
+ let productions: Production<NT, T>[] = [
+ new Production<NT, T>('goal', ['expr']),
+ new Production<NT, T>('expr', ['term', '-', 'expr']),
+ new Production<NT, T>('expr', ['term']),
+ new Production<NT, T>('term', ['factor', '*', 'term']),
+ new Production<NT, T>('term', ['factor']),
+ new Production<NT, T>('factor', ['ident']),
+ ];
+
+ let tablegen = new TableGenerator<NT, T>('goal', productions, NTs, Ts);
+ let items = new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[0], 0, 'eof'),
+ ]);
+ expect(tablegen.closure(items)).toBe(true);
+ expect(items).toEqual(new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[0], 0, 'eof'),
+ new ParseItem<NT, T>(productions[1], 0, 'eof'),
+ new ParseItem<NT, T>(productions[2], 0, 'eof'),
+ new ParseItem<NT, T>(productions[3], 0, 'eof'),
+ new ParseItem<NT, T>(productions[3], 0, '-'),
+ new ParseItem<NT, T>(productions[4], 0, 'eof'),
+ new ParseItem<NT, T>(productions[4], 0, '-'),
+ new ParseItem<NT, T>(productions[5], 0, 'eof'),
+ new ParseItem<NT, T>(productions[5], 0, '-'),
+ new ParseItem<NT, T>(productions[5], 0, '*'),
+ ]));
+ });
+ });
+
+ describe('goto()', () => {
+ it('goto from compilers worksheet 7', () => {
+ const _NTs = {
+ 'goal' : '',
+ 'expr' : '',
+ 'term' : '',
+ 'factor' : '',
+ };
+ type NT = keyof typeof _NTs;
+ const NTs = new Set(<NT[]> Object.keys(_NTs));
+
+ const _Ts = {
+ 'ident' : '',
+ '*' : '',
+ '-' : '',
+ };
+ type T = keyof typeof _Ts;
+ const Ts = new Set(<T[]> Object.keys(_Ts));
+
+ let productions: Production<NT, T>[] = [
+ new Production<NT, T>('goal', ['expr']),
+ new Production<NT, T>('expr', ['term', '-', 'expr']),
+ new Production<NT, T>('expr', ['term']),
+ new Production<NT, T>('term', ['factor', '*', 'term']),
+ new Production<NT, T>('term', ['factor']),
+ new Production<NT, T>('factor', ['ident']),
+ ];
+
+ let tablegen = new TableGenerator<NT, T>('goal', productions, NTs, Ts);
+ let cc0 = new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[0], 0, 'eof'),
+ new ParseItem<NT, T>(productions[1], 0, 'eof'),
+ new ParseItem<NT, T>(productions[2], 0, 'eof'),
+ new ParseItem<NT, T>(productions[3], 0, 'eof'),
+ new ParseItem<NT, T>(productions[3], 0, '-'),
+ new ParseItem<NT, T>(productions[4], 0, 'eof'),
+ new ParseItem<NT, T>(productions[4], 0, '-'),
+ new ParseItem<NT, T>(productions[5], 0, 'eof'),
+ new ParseItem<NT, T>(productions[5], 0, '-'),
+ new ParseItem<NT, T>(productions[5], 0, '*'),
+ ]);
+
+ expect(tablegen.goto(cc0, 'term')).toEqual(new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[1], 1, 'eof'),
+ new ParseItem<NT, T>(productions[2], 1, 'eof'),
+ ]));
+ });
+ });
+
+ describe('calcStates()', () => {
+ it('states example from textbook', () => {
+ const _NTs = {
+ 'goal' : '',
+ 'list' : '',
+ 'pair' : '',
+ };
+ type NT = keyof typeof _NTs;
+ const NTs = new Set(<NT[]> Object.keys(_NTs));
+
+ const _Ts = {
+ '(' : '',
+ ')' : '',
+ };
+ type T = keyof typeof _Ts;
+ const Ts = new Set(<T[]> Object.keys(_Ts));
+
+ let productions: Production<NT, T>[] = [
+ new Production<NT, T>('goal', ['list']),
+ new Production<NT, T>('list', ['list', 'pair']),
+ new Production<NT, T>('list', ['pair']),
+ new Production<NT, T>('pair', ['(', 'pair', ')']),
+ new Production<NT, T>('pair', ['(', ')']),
+ ];
+
+ let tablegen = new TableGenerator<NT, T>('goal', productions, NTs, Ts);
+
+ let states = [
+ new ParseState<NT, T>(0, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[0], 0, 'eof'),
+ new ParseItem<NT, T>(productions[1], 0, 'eof'),
+ new ParseItem<NT, T>(productions[1], 0, '('),
+ new ParseItem<NT, T>(productions[2], 0, 'eof'),
+ new ParseItem<NT, T>(productions[2], 0, '('),
+ new ParseItem<NT, T>(productions[3], 0, 'eof'),
+ new ParseItem<NT, T>(productions[3], 0, '('),
+ new ParseItem<NT, T>(productions[4], 0, 'eof'),
+ new ParseItem<NT, T>(productions[4], 0, '('),
+ ])),
+ new ParseState<NT, T>(1, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[0], 1, 'eof'),
+ new ParseItem<NT, T>(productions[1], 1, 'eof'),
+ new ParseItem<NT, T>(productions[1], 1, '('),
+ new ParseItem<NT, T>(productions[3], 0, 'eof'),
+ new ParseItem<NT, T>(productions[3], 0, '('),
+ new ParseItem<NT, T>(productions[4], 0, 'eof'),
+ new ParseItem<NT, T>(productions[4], 0, '('),
+ ])),
+ new ParseState<NT, T>(2, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[2], 1, 'eof'),
+ new ParseItem<NT, T>(productions[2], 1, '('),
+ ])),
+ new ParseState<NT, T>(3, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 0, ')'),
+ new ParseItem<NT, T>(productions[3], 1, 'eof'),
+ new ParseItem<NT, T>(productions[3], 1, '('),
+ new ParseItem<NT, T>(productions[4], 0, ')'),
+ new ParseItem<NT, T>(productions[4], 1, 'eof'),
+ new ParseItem<NT, T>(productions[4], 1, '('),
+ ])),
+ new ParseState<NT, T>(4, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[1], 2, 'eof'),
+ new ParseItem<NT, T>(productions[1], 2, '('),
+ ])),
+ new ParseState<NT, T>(5, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 2, 'eof'),
+ new ParseItem<NT, T>(productions[3], 2, '('),
+ ])),
+ new ParseState<NT, T>(6, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 0, ')'),
+ new ParseItem<NT, T>(productions[3], 1, ')'),
+ new ParseItem<NT, T>(productions[4], 0, ')'),
+ new ParseItem<NT, T>(productions[4], 1, ')'),
+ ])),
+ new ParseState<NT, T>(7, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[4], 2, 'eof'),
+ new ParseItem<NT, T>(productions[4], 2, '('),
+ ])),
+ new ParseState<NT, T>(8, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 3, 'eof'),
+ new ParseItem<NT, T>(productions[3], 3, '('),
+ ])),
+ new ParseState<NT, T>(9, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 2, ')'),
+ ])),
+ new ParseState<NT, T>(10, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[4], 2, ')'),
+ ])),
+ new ParseState<NT, T>(11, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 3, ')'),
+ ])),
+ ];
+ states[0].transition('list', 1);
+ states[0].transition('pair', 2);
+ states[0].transition('(', 3);
+ states[1].transition('pair', 4);
+ states[1].transition('(', 3);
+ states[3].transition('pair', 5);
+ states[3].transition('(', 6);
+ states[3].transition(')', 7);
+ states[5].transition(')', 8);
+ states[6].transition('pair', 9);
+ states[6].transition('(', 6);
+ states[6].transition(')', 10);
+ states[9].transition(')', 11);
+
+ const statesSet = new ObjSet<ParseState<NT, T>>(states);
+
+ // SO HARD to figure this out
+ for (let i = 0; i < states.length; i++) {
+ let actualState = tablegen.states[i];
+ let wantExpectedState = statesSet.get(actualState);
+ expect(wantExpectedState).toBeDefined();
+
+ let expectedState = <ParseState<NT, T>> wantExpectedState;
+ expect(actualState.transitions.size()).toBe(expectedState.transitions.size());
+
+ actualState.transitions.forEach(transition => {
+ let wantExpectedNewState =
+ statesSet.get(tablegen.states[transition.newState]);
+ expect(wantExpectedNewState).toBeDefined();
+ let expectedNewState = <ParseState<NT, T>> wantExpectedNewState;
+ let expectedNewStateNum = expectedNewState.num;
+
+ let expectedThisTransition = expectedState.transitions.has(
+ new ParseTransition<NT, T>(transition.token, expectedNewStateNum));
+ expect(expectedThisTransition).toBe(true);
+ });
+ }
+ });
+ });
+
+ describe('calcStates()', () => {
+ it('table example from textbook', () => {
+ const _NTs = {
+ 'goal' : '',
+ 'list' : '',
+ 'pair' : '',
+ };
+ type NT = keyof typeof _NTs;
+ const NTs = new Set(<NT[]> Object.keys(_NTs));
+
+ const _Ts = {
+ '(' : '',
+ ')' : '',
+ };
+ type T = keyof typeof _Ts;
+ const Ts = new Set(<T[]> Object.keys(_Ts));
+
+ let productions: Production<NT, T>[] = [
+ new Production<NT, T>('goal', ['list']),
+ new Production<NT, T>('list', ['list', 'pair']),
+ new Production<NT, T>('list', ['pair']),
+ new Production<NT, T>('pair', ['(', 'pair', ')']),
+ new Production<NT, T>('pair', ['(', ')']),
+ ];
+
+ let tablegen = new TableGenerator<NT, T>('goal', productions, NTs, Ts);
+
+ let states = [
+ new ParseState<NT, T>(0, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[0], 0, 'eof'),
+ new ParseItem<NT, T>(productions[1], 0, 'eof'),
+ new ParseItem<NT, T>(productions[1], 0, '('),
+ new ParseItem<NT, T>(productions[2], 0, 'eof'),
+ new ParseItem<NT, T>(productions[2], 0, '('),
+ new ParseItem<NT, T>(productions[3], 0, 'eof'),
+ new ParseItem<NT, T>(productions[3], 0, '('),
+ new ParseItem<NT, T>(productions[4], 0, 'eof'),
+ new ParseItem<NT, T>(productions[4], 0, '('),
+ ])),
+ new ParseState<NT, T>(1, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[0], 1, 'eof'),
+ new ParseItem<NT, T>(productions[1], 1, 'eof'),
+ new ParseItem<NT, T>(productions[1], 1, '('),
+ new ParseItem<NT, T>(productions[3], 0, 'eof'),
+ new ParseItem<NT, T>(productions[3], 0, '('),
+ new ParseItem<NT, T>(productions[4], 0, 'eof'),
+ new ParseItem<NT, T>(productions[4], 0, '('),
+ ])),
+ new ParseState<NT, T>(2, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[2], 1, 'eof'),
+ new ParseItem<NT, T>(productions[2], 1, '('),
+ ])),
+ new ParseState<NT, T>(3, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 0, ')'),
+ new ParseItem<NT, T>(productions[3], 1, 'eof'),
+ new ParseItem<NT, T>(productions[3], 1, '('),
+ new ParseItem<NT, T>(productions[4], 0, ')'),
+ new ParseItem<NT, T>(productions[4], 1, 'eof'),
+ new ParseItem<NT, T>(productions[4], 1, '('),
+ ])),
+ new ParseState<NT, T>(4, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[1], 2, 'eof'),
+ new ParseItem<NT, T>(productions[1], 2, '('),
+ ])),
+ new ParseState<NT, T>(5, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 2, 'eof'),
+ new ParseItem<NT, T>(productions[3], 2, '('),
+ ])),
+ new ParseState<NT, T>(6, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 0, ')'),
+ new ParseItem<NT, T>(productions[3], 1, ')'),
+ new ParseItem<NT, T>(productions[4], 0, ')'),
+ new ParseItem<NT, T>(productions[4], 1, ')'),
+ ])),
+ new ParseState<NT, T>(7, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[4], 2, 'eof'),
+ new ParseItem<NT, T>(productions[4], 2, '('),
+ ])),
+ new ParseState<NT, T>(8, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 3, 'eof'),
+ new ParseItem<NT, T>(productions[3], 3, '('),
+ ])),
+ new ParseState<NT, T>(9, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 2, ')'),
+ ])),
+ new ParseState<NT, T>(10, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[4], 2, ')'),
+ ])),
+ new ParseState<NT, T>(11, new ObjSet<ParseItem<NT, T>>([
+ new ParseItem<NT, T>(productions[3], 3, ')'),
+ ])),
+ ];
+
+ let expectedAction: [T|'eof', ParseAction<NT, T>][][] = [
+ // state 0
+ [
+ ['(', {action: 'shift', newState: 3}],
+ ],
+ // state 1
+ [
+ ['eof', {action: 'accept'}],
+ ['(', {action: 'shift', newState: 3}],
+ ],
+ // state 2
+ [
+ ['eof', {action: 'reduce', production: productions[2]}],
+ ['(', {action: 'reduce', production: productions[2]}],
+ ],
+ // state 3
+ [
+ ['(', {action: 'shift', newState: 6}],
+ [')', {action: 'shift', newState: 7}],
+ ],
+ // state 4
+ [
+ ['eof', {action: 'reduce', production: productions[1]}],
+ ['(', {action: 'reduce', production: productions[1]}],
+ ],
+ // state 5
+ [
+ [')', {action: 'shift', newState: 8}],
+ ],
+ // state 6
+ [
+ ['(', {action: 'shift', newState: 6}],
+ [')', {action: 'shift', newState: 10}],
+ ],
+ // state 7
+ [
+ ['eof', {action: 'reduce', production: productions[4]}],
+ ['(', {action: 'reduce', production: productions[4]}],
+ ],
+ // state 8
+ [
+ ['eof', {action: 'reduce', production: productions[3]}],
+ ['(', {action: 'reduce', production: productions[3]}],
+ ],
+ // state 9
+ [
+ [')', {action: 'shift', newState: 11}],
+ ],
+ // state 10
+ [
+ [')', {action: 'reduce', production: productions[4]}],
+ ],
+ // state 11
+ [
+ [')', {action: 'reduce', production: productions[3]}],
+ ],
+ ];
+
+ let expectedGoto: [NT, number][][] = [
+ // state 0
+ [
+ ['list', 1],
+ ['pair', 2],
+ ],
+ // state 1
+ [
+ ['pair', 4],
+ ],
+ // state 2
+ [],
+ // state 3
+ [
+ ['pair', 5],
+ ],
+ // state 4
+ [],
+ // state 5
+ [],
+ // state 6
+ [
+ ['pair', 9],
+ ],
+ // state 7
+ [],
+ // state 8
+ [],
+ // state 9
+ [],
+ // state 10
+ [],
+ // state 11
+ [],
+ ];
+
+ const statesSet = new ObjSet<ParseState<NT, T>>(states);
+ const expectedToActual = new Map<number, number>();
+
+ for (let i = 0; i < states.length; i++) {
+ let actualState = tablegen.states[i];
+ let wantExpectedState = statesSet.get(actualState);
+ expect(wantExpectedState).toBeDefined();
+
+ let expectedState = <ParseState<NT, T>> wantExpectedState;
+ expectedToActual.set(expectedState.num, actualState.num);
+ }
+
+ let table = tablegen.genTable();
+
+ expect(Object.keys(table.positions).length).toBe(NTs.size + Ts.size + 1);
+ for (let state = 0; state < expectedAction.length; state++) {
+ let actualState = <number> expectedToActual.get(state);
+ let actualRow = table.actionTable[actualState];
+
+ let expectedActions = expectedAction[state];
+ let actualEntries =
+ actualRow.filter(action => action !== null).length;
+ expect(actualEntries).toBe(expectedActions.length);
+
+ for (let i = 0; i < expectedActions.length; i++) {
+ let [token, expectedAction] = expectedActions[i];
+ let actualAction = actualRow[table.positions[token]];
+
+ expect(actualAction).not.toBe(null);
+ if (expectedAction.action === 'shift') {
+ expectedAction.newState = <number> expectedToActual.get(<number> expectedAction.newState);
+ }
+ expect(actualAction).toEqual(expectedAction);
+ }
+ }
+
+ for (let state = 0; state < expectedGoto.length; state++) {
+ let actualState = <number> expectedToActual.get(state);
+ let actualRow = table.gotoTable[actualState];
+
+ let expectedGotos = expectedGoto[state];
+ let actualEntries =
+ actualRow.filter(goto => goto !== null).length;
+ expect(actualEntries).toBe(expectedGotos.length);
+
+ for (let i = 0; i < expectedGotos.length; i++) {
+ let [token, expectedNewState] = expectedGotos[i];
+ let actualNewState = actualRow[table.positions[token]];
+
+ expect(actualNewState).not.toBe(null);
+ expectedNewState = <number> expectedToActual.get(expectedNewState);
+ expect(actualNewState).toEqual(expectedNewState);
+ }
+ }
+ });
+ });
+});
diff --git a/novice/assembler/lr1/tablegen.ts b/novice/assembler/lr1/tablegen.ts
new file mode 100644
index 0000000..df2860a
--- /dev/null
+++ b/novice/assembler/lr1/tablegen.ts
@@ -0,0 +1,373 @@
+import { Hashable, ObjSet } from './objset';
+import { Production } from './production';
+
+type S = '' | 'eof';
+
+class ParseItem<NT, T> implements Hashable {
+ public production: Production<NT, T>;
+ public stacktop: number;
+ public lookahead: S|T;
+
+ public constructor(production: Production<NT, T>, stacktop: number,
+ lookahead: S|T) {
+ this.production = production;
+ this.stacktop = stacktop;
+ this.lookahead = lookahead;
+ }
+
+ public hash(): string {
+ return `[${this.production.hash()}, ${this.stacktop}, ${this.lookahead}]`;
+ }
+}
+
+class ParseTransition<NT, T> implements Hashable {
+ public token: NT|T;
+ public newState: number;
+
+ public constructor(token: NT|T, newState: number) {
+ this.token = token;
+ this.newState = newState;
+ }
+
+ public hash(): string {
+ return this.token + ' ' + this.newState;
+ }
+}
+
+class ParseState<NT, T> implements Hashable {
+ public num: number;
+ public items: ObjSet<ParseItem<NT, T>>;
+ public transitions: ObjSet<ParseTransition<NT, T>>;
+
+ public constructor(num: number, items: ObjSet<ParseItem<NT, T>>) {
+ this.num = num;
+ this.items = items;
+ this.transitions = new ObjSet();
+ }
+
+ public transition(token: NT|T, newState: number) {
+ this.transitions.add(new ParseTransition<NT, T>(token, newState));
+ }
+
+ public hash(): string {
+ return this.items.hash();
+ }
+}
+
+interface ParseAction<NT, T> {
+ action: 'shift'|'reduce'|'accept';
+ newState?: number;
+ production?: Production<NT, T>;
+}
+
+interface ParseTable<NT, T> {
+ positions: {[token: string]: number};
+ actionTable: (ParseAction<NT, T>|null)[][];
+ gotoTable: (number|null)[][];
+}
+
+class TableGenerator<NT, T> {
+ public first: Map<NT|S|T, Set<S|T>>;
+ public states: ParseState<NT, T>[];
+ private goal: NT;
+ private productions: Production<NT, T>[];
+ private NTs: Set<NT>;
+ private Ts: Set<T>;
+ private NTProductions: Map<NT, Production<NT, T>[]>;
+
+ public constructor(goal: NT, productions: Production<NT, T>[], NTs: Set<NT>, Ts: Set<T>) {
+ this.goal = goal;
+ this.productions = productions;
+ this.NTs = NTs;
+ this.Ts = Ts;
+ this.NTProductions = this.calcNTProductions();
+ this.first = this.calcFirst();
+ this.states = this.calcStates();
+ }
+
+ public genTable(): ParseTable<NT, T> {
+ const result: ParseTable<NT, T> = {
+ positions: {},
+ actionTable: [],
+ gotoTable: [],
+ };
+
+ const Ts: (T|'eof')[] = Array.from(this.Ts);
+ Ts.push('eof');
+ Ts.sort();
+ Ts.forEach((val, index) => {
+ result.positions[val.toString()] = index;
+ });
+
+ const NTs = Array.from(this.NTs).sort();
+ NTs.forEach((val, index) => {
+ // .toString() is a hack to make the type checker shut up,
+ // couldn't find another way
+ result.positions[val.toString()] = index;
+ });
+
+ this.states.forEach(state => {
+ const actionRow: (ParseAction<NT, T>|null)[] = [];
+ for (const t of Ts) {
+ actionRow.push(null);
+ }
+ const gotoRow: (number|null)[] = [];
+ for (const nt of NTs) {
+ gotoRow.push(null);
+ }
+
+ state.items.forEach(item => {
+ // reduction!
+ if (item.stacktop === item.production.rhs.length) {
+ if (item.production.lhs === this.goal &&
+ item.lookahead === 'eof') {
+ const index = result.positions[item.lookahead.toString()];
+
+ if (actionRow[index] !== null) {
+ throw new Error('conflict in table!');
+ }
+
+ actionRow[index] = {action: 'accept'};
+ } else {
+ const index = result.positions[item.lookahead.toString()];
+
+ if (actionRow[index] !== null) {
+ throw new Error('conflict in table!');
+ }
+
+ actionRow[index] = {action: 'reduce', production: item.production};
+ }
+ }
+ });
+
+ state.transitions.forEach(transition => {
+ const isNT = this.isNT(transition.token);
+
+ if (isNT) {
+ const index = result.positions[transition.token.toString()];
+
+ if (gotoRow[index] !== null) {
+ throw new Error('conflict in table!');
+ }
+
+ gotoRow[index] = transition.newState;
+ } else {
+ const index = result.positions[transition.token.toString()];
+
+ if (gotoRow[index] !== null) {
+ throw new Error('conflict in table!');
+ }
+
+ actionRow[index] = {action: 'shift', newState: transition.newState};
+ }
+ });
+
+ result.actionTable.push(actionRow);
+ result.gotoTable.push(gotoRow);
+ });
+
+ return result;
+ }
+
+ public closure(items: ObjSet<ParseItem<NT, T>>): boolean {
+ let changed = false;
+ let changing = true;
+
+ while (changing) {
+ changing = false;
+
+ items.forEach(item => {
+ if (item.stacktop === item.production.rhs.length) {
+ // no thanks
+ return;
+ }
+ const top = item.production.rhs[item.stacktop];
+ if (!this.isNT(top)) {
+ // no thanks again
+ return;
+ }
+
+ const leftovers: (NT|S|T)[] =
+ item.production.rhs.slice(item.stacktop + 1);
+ leftovers.push(item.lookahead);
+ const firstAfter = new Set<S|T>();
+ this.calcFirstOfSeq(this.first, leftovers, firstAfter);
+
+ const topProductions = this.NTProductions.get(top as NT);
+ if (typeof topProductions === 'undefined') {
+ throw new Error(`no productions for NT ${top}`);
+ }
+
+ topProductions.forEach(production => {
+ firstAfter.forEach(tokenAfter => {
+ const newItem: ParseItem<NT, T> =
+ new ParseItem(production, 0, tokenAfter);
+ changing = items.add(newItem) || changing;
+ });
+ });
+ });
+
+ changed = changed || changing;
+ }
+
+ return changed;
+ }
+
+ public goto(cci: ObjSet<ParseItem<NT, T>>, token: NT|T): ObjSet<ParseItem<NT, T>> {
+ const items = new ObjSet<ParseItem<NT, T>>();
+
+ cci.forEach(item => {
+ if (item.stacktop < item.production.rhs.length &&
+ item.production.rhs[item.stacktop] === token) {
+ const newItem = new ParseItem<NT, T>(
+ item.production, item.stacktop + 1, item.lookahead);
+ items.add(newItem);
+ }
+ });
+
+ this.closure(items);
+ return items;
+ }
+
+ private calcNTProductions(): Map<NT, Production<NT, T>[]> {
+ const result = new Map<NT, Production<NT, T>[]>();
+
+ this.NTs.forEach(nt => {
+ result.set(nt, []);
+ });
+
+ this.productions.forEach(production => {
+ const NTProductions = result.get(production.lhs);
+
+ if (typeof NTProductions === 'undefined') {
+ throw new Error(`lhs of production ${production} is not a NT`);
+ }
+
+ NTProductions.push(production);
+ });
+
+ return result;
+ }
+
+ private firstOf(first: Map<NT|S|T, Set<S|T>>, token: NT|S|T): Set<S|T> {
+ const firstSet = first.get(token);
+ if (typeof firstSet === 'undefined') {
+ throw Error(`first set for ${token} missing`);
+ }
+ return firstSet;
+ }
+
+ private setAdd<U>(victim: U, dest: Set<U>): boolean {
+ const oldSize = dest.size;
+ dest.add(victim);
+ return dest.size > oldSize;
+ }
+
+ private setAddAll<U>(source: Set<U>, dest: Set<U>, skipFalsy?: boolean): boolean {
+ let changed = false;
+ source.forEach(val => {
+ if (!skipFalsy || val) {
+ changed = this.setAdd(val, dest) || changed;
+ }
+ });
+ return changed;
+ }
+
+ private calcFirstOfSeq(first: Map<NT|S|T, Set<S|T>>, seq: (NT|S|T)[],
+ dest: Set<S|T>): boolean {
+ if (seq.length === 0) {
+ // FIRST[epsilon]
+ return this.setAdd('', dest);
+ } else {
+ let changed = this.setAddAll(this.firstOf(first, seq[0]), dest, true);
+
+ let i: number;
+ for (i = 0; i < seq.length - 1 &&
+ this.firstOf(first, seq[i]).has(''); i++) {
+ changed = this.setAddAll(
+ this.firstOf(first, seq[i + 1]), dest, true) || changed;
+ }
+
+ if (i === seq.length - 1 && this.firstOf(first, seq[i]).has('')) {
+ changed = this.setAdd('', dest) || changed;
+ }
+
+ return changed;
+ }
+ }
+
+ private calcFirst(): Map<NT|S|T, Set<S|T>> {
+ const first = new Map<NT|S|T, Set<S|T>>();
+
+ // FIRST[t] = {t} for all t in T
+ this.Ts.forEach(t => {
+ first.set(t, new Set<S|T>([t]));
+ });
+ first.set('', new Set<S|T>(['']));
+ first.set('eof', new Set<S|T>(['eof']));
+
+ // FIRST[nt] = {} for all nt in NT
+ this.NTs.forEach(nt => {
+ first.set(nt, new Set<S|T>());
+ });
+
+ let changing = true;
+
+ while (changing) {
+ changing = false;
+
+ this.productions.forEach(production => {
+ changing = this.calcFirstOfSeq(
+ first, production.rhs, this.firstOf(first, production.lhs)) || changing;
+ });
+ }
+
+ return first;
+ }
+
+ private isNT(token: NT|S|T): boolean {
+ return (this.NTs as Set<NT|S|T>).has(token);
+ }
+
+ private calcStates(): ParseState<NT, T>[] {
+ const states = new ObjSet<ParseState<NT, T>>();
+ const unprocessedStates: ParseState<NT, T>[] = [];
+
+ const goalProductions = this.NTProductions.get(this.goal);
+ if (typeof goalProductions === 'undefined') {
+ throw new Error('goal NT has no productions (sounds fun)');
+ }
+
+ const cc0Items = new ObjSet<ParseItem<NT, T>>(
+ goalProductions.map(production => new ParseItem<NT, T>(production, 0, 'eof')));
+ this.closure(cc0Items);
+ const cc0 = new ParseState<NT, T>(0, cc0Items);
+
+ states.add(cc0);
+ unprocessedStates.push(cc0);
+
+ while (unprocessedStates.length > 0) {
+ const cci = unprocessedStates.pop() as ParseState<NT, T>;
+
+ cci.items.forEach(item => {
+ if (item.stacktop < item.production.rhs.length) {
+ const top = item.production.rhs[item.stacktop];
+ let newState = new ParseState(states.size(), this.goto(cci.items, top));
+
+ if (states.add(newState)) {
+ unprocessedStates.push(newState);
+ } else {
+ newState = states.get(newState) as ParseState<NT, T>;
+ }
+
+ cci.transition(top, newState.num);
+ }
+ });
+ }
+
+ return states.toArray().sort(
+ (left: ParseState<NT, T>, right: ParseState<NT, T>) => left.num - right.num);
+ }
+}
+
+export { TableGenerator, S, ParseItem, ParseState, ParseTransition, ParseAction };
diff --git a/novice/assembler/scanner.test.ts b/novice/assembler/scanner.test.ts
index 9136d5e..9c57f52 100644
--- a/novice/assembler/scanner.test.ts
+++ b/novice/assembler/scanner.test.ts
@@ -25,12 +25,12 @@ describe('scanner', () => {
return new Scanner(fp).scan().then(lines => {
expect(lines).toEqual([
{num: 1, tokens: [
- {col: 4, val: 'add'},
- {col: 8, val: 'r0'},
- {col: 10, val: ','},
- {col: 12, val: 'r0'},
- {col: 14, val: ','},
- {col: 16, val: 'r1'},
+ {col: 4, val: 'add', kind: 'word'},
+ {col: 8, val: 'r0', kind: 'reg'},
+ {col: 10, val: ',', kind: ','},
+ {col: 12, val: 'r0', kind: 'reg'},
+ {col: 14, val: ',', kind: ','},
+ {col: 16, val: 'r1', kind: 'reg'},
]},
]);
});
@@ -44,8 +44,8 @@ describe('scanner', () => {
return new Scanner(fp).scan().then(lines => {
expect(lines).toEqual([
{num: 1, tokens: [
- {col: 1, val: '.fill'},
- {col: 7, val: '420'},
+ {col: 1, val: '.fill', kind: 'pseudoop'},
+ {col: 7, val: '420', kind: 'int-decimal'},
]},
]);
});
@@ -59,8 +59,8 @@ describe('scanner', () => {
return new Scanner(fp).scan().then(lines => {
expect(lines).toEqual([
{num: 1, tokens: [
- {col: 1, val: 'trap'},
- {col: 6, val: 'x69'},
+ {col: 1, val: 'trap', kind: 'word'},
+ {col: 6, val: 'x69', kind: 'int-hex'},
]},
]);
});
@@ -75,10 +75,10 @@ describe('scanner', () => {
return new Scanner(fp).scan().then(lines => {
expect(lines).toEqual([
{num: 2, tokens: [
- {col: 1, val: 'not'},
- {col: 5, val: 'r0'},
- {col: 7, val: ','},
- {col: 9, val: 'r0'},
+ {col: 1, val: 'not', kind: 'word'},
+ {col: 5, val: 'r0', kind: 'reg'},
+ {col: 7, val: ',', kind: ','},
+ {col: 9, val: 'r0', kind: 'reg'},
]},
]);
});
@@ -94,19 +94,19 @@ describe('scanner', () => {
return new Scanner(fp).scan().then(lines => {
expect(lines).toEqual([
{num: 1, tokens: [
- {col: 1, val: '.orig'},
- {col: 7, val: 'x3000'},
+ {col: 1, val: '.orig', kind: 'pseudoop'},
+ {col: 7, val: 'x3000', kind: 'int-hex'},
]},
{num: 2, tokens: [
- {col: 1, val: 'add'},
- {col: 5, val: 'r1'},
- {col: 7, val: ','},
- {col: 9, val: 'r1'},
- {col: 11, val: ','},
- {col: 13, val: 'r2'},
+ {col: 1, val: 'add', kind: 'word'},
+ {col: 5, val: 'r1', kind: 'reg'},
+ {col: 7, val: ',', kind: ','},
+ {col: 9, val: 'r1', kind: 'reg'},
+ {col: 11, val: ',', kind: ','},
+ {col: 13, val: 'r2', kind: 'reg'},
]},
{num: 3, tokens: [
- {col: 1, val: '.end'},
+ {col: 1, val: '.end', kind: 'pseudoop'},
]},
]);
});
@@ -121,11 +121,12 @@ describe('scanner', () => {
return new Scanner(fp).scan().then(lines => {
expect(lines).toEqual([
{num: 1, tokens: [
- {col: 1, val: 'bob:'},
+ {col: 1, val: 'bob', kind: 'word'},
+ {col: 4, val: ':', kind: ':'},
]},
{num: 2, tokens: [
- {col: 1, val: 'goto'},
- {col: 6, val: 'bob'},
+ {col: 1, val: 'goto', kind: 'word'},
+ {col: 6, val: 'bob', kind: 'word'},
]},
]);
});
@@ -139,8 +140,8 @@ describe('scanner', () => {
return new Scanner(fp).scan().then(lines => {
expect(lines).toEqual([
{num: 1, tokens: [
- {col: 1, val: '.stringz'},
- {col: 10, val: '"hi\\npatrick"'},
+ {col: 1, val: '.stringz', kind: 'pseudoop'},
+ {col: 10, val: '"hi\\npatrick"', kind: 'string'},
]},
]);
});
@@ -154,8 +155,8 @@ describe('scanner', () => {
return new Scanner(fp).scan().then(lines => {
expect(lines).toEqual([
{num: 1, tokens: [
- {col: 1, val: '.fill'},
- {col: 9, val: "'\\n'"},
+ {col: 1, val: '.fill', kind: 'pseudoop'},
+ {col: 9, val: "'\\n'", kind: 'char'},
]},
]);
});
diff --git a/novice/assembler/scanner.ts b/novice/assembler/scanner.ts
index 1165cd4..93a5f44 100644
--- a/novice/assembler/scanner.ts
+++ b/novice/assembler/scanner.ts
@@ -1,9 +1,10 @@
import { Readable } from 'stream';
-import { DFA, dfas } from './dfa';
+import { DFA, dfas, Kind, kinds } from './dfa';
interface Token {
col: number;
val: string;
+ kind: Kind;
}
interface Line {
@@ -79,15 +80,17 @@ class Scanner {
if (best.getAcceptingLength() > 0) {
const tokenLen = best.getAcceptingLength();
+ const tokenKind = best.getKind();
- // Leave out stuff like whitespace
- if (best.isToken()) {
+ // Leave out stuff like whitespace (will be null in that
+ // case)
+ if (tokenKind) {
if (this.newline) {
this.newline = false;
this.lines.push({num: this.lineNum, tokens: []});
}
const newVal = this.currentToken.substring(0, tokenLen);
- const newToken = {col: this.col, val: newVal};
+ const newToken = {col: this.col, val: newVal, kind: tokenKind};
this.lines[this.lines.length - 1].tokens.push(newToken);
}
@@ -128,4 +131,4 @@ class Scanner {
}
}
-export { Line, Token, Scanner };
+export { Line, Token, Kind, kinds, Scanner };
diff --git a/tslint.json b/tslint.json
index e5a64a1..37f0f90 100644
--- a/tslint.json
+++ b/tslint.json
@@ -9,7 +9,8 @@
"array-type": [true, "array"],
"interface-name": [true, "never-prefix"],
"max-classes-per-file": false,
- "arrow-parens": [true, "ban-single-arg-parens"]
+ "arrow-parens": [true, "ban-single-arg-parens"],
+ "object-literal-sort-keys": false
},
"rulesDirectory": []
}