Tokenizer now correctly recognizes Outdents

2025-05-16 12:30:09 -07:00 · 2020-10-27 13:24:23 -04:00 · 2020-10-27 13:24:23 -04:00 · 44838b8a43
commit 44838b8a43
parent 5e9c185923
7 changed files with 313 additions and 9451 deletions
--- a/docs/bundle.min.js
+++ b/docs/bundle.min.js
--- a/package.json
+++ b/package.json
@ -29,7 +29,7 @@
  },
  "scripts": {
    "build": "webpack --config webpack.full.config.js",
-	"partial": "webpack --config webpack.partial.config.js",
+    "partial": "webpack --config webpack.partial.config.js",
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "keywords": [
--- a/src/parser.ts
+++ b/src/parser.ts
@ -1,7 +1,7 @@
 /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
 import { CstParser } from "chevrotain";
-import * as T from "./tokenizer";
+import * as T from "./tokens";
 export class Human2RegexParser extends CstParser {
    constructor() {
--- a/src/script.ts
+++ b/src/script.ts
@ -2,8 +2,7 @@
 import "./style.css";
-import { Human2RegexLexer } from './tokenizer';
+import { Human2RegexLexer } from "./tokenizer";
 import { Human2RegexParser } from './parser';
 /*
 $(function() {
@ -11,7 +10,9 @@ $(function() {
 });
 */
-const result = Human2RegexLexer.tokenize(`
+const lexer = new Human2RegexLexer();
 const result = lexer.tokenize(`
 // H2R supports // # and /**/ as comments
 // A group is only captured if given a name. 
 // You can use "and", "or", "not" to specify "[]" regex
@ -52,8 +53,16 @@ create an optional group
 	match 0+ any thing
 `);
 //let str = "";
 //for(const r of result.tokens) {
 //    str += r.tokenType === Newline ? "\n" : r.image + " ";
 //}
 //console.log(str);
 for(const r of result.tokens) {
-    console.log(r);
+	console.log(r);
 }
 console.log(result.errors);
--- a/src/tokenizer.ts
+++ b/src/tokenizer.ts
@ -1,179 +1,8 @@
 /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
-import { createToken, Lexer, IToken, createTokenInstance, ILexingResult } from "chevrotain";
+import { Lexer, IToken, createTokenInstance, ILexingResult } from "chevrotain";
-
+import { last, findLastIndex } from "./utilities";
-export const Zero = createToken({name: "Zero", pattern: /zero/i });
+import { Indent, Outdent, EndOfLine, AllTokens } from "./tokens";
 export const One = createToken({name: "One", pattern: /one/i });
 export const Two = createToken({name: "Two", pattern: /two/i });
 export const Three = createToken({name: "Three", pattern: /three/i });
 export const Four = createToken({name: "Four", pattern: /four/i });
 export const Five = createToken({name: "Five", pattern: /five/i });
 export const Six = createToken({name: "Six", pattern: /six/i });
 export const Seven = createToken({name: "Seven", pattern: /seven/i });
 export const Eight = createToken({name: "Eight", pattern: /eight/i });
 export const Nine = createToken({name: "Nine", pattern: /nine/i });
 export const Ten = createToken({name: "Ten", pattern: /ten/i });
 export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
 export const Match = createToken({name: "Match", pattern: /match(es)?/i });
 export const Then = createToken({name: "Then", pattern: /then/i });
 export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
 export const Of = createToken({name: "Of", pattern: /of/i});
 export const Or = createToken({name: "Or", pattern: /or/i});
 export const And = createToken({name: "And", pattern: /and|,/i});
 export const Word = createToken({name: "Word Specifier", pattern: /word(s)?/i});
 export const Digit = createToken({name: "Digit Specifier", pattern: /digit(s)?/i});
 export const Character = createToken({name: "Character Specifier", pattern: /character(s)?/i});
 export const Whitespace = createToken({name: "Whitespace Specifier", pattern: /(white space|whitespace)(s)?/i});
 export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
 export const As = createToken({name: "As", pattern: /as/i});
 export const If = createToken({name: "If", pattern: /if/i});
 export const Start = createToken({name: "Start", pattern: /start(s)?/i});
 export const With = createToken({name: "With", pattern: /with/i});
 export const Ends = createToken({name: "Ends", pattern: /end(s)?/i});
 export const Otherwise = createToken({name: "Otherwise", pattern: /(other wise|otherwise)/i});
 export const Else = createToken({name: "Else", pattern: /else/i});
 export const Unless = createToken({name: "Unless", pattern: /unless/i});
 export const While = createToken({name: "While", pattern: /while/i});
 export const More = createToken({name: "More", pattern: /more/i});
 export const Using = createToken({name: "Using", pattern: /using/i});
 export const Global = createToken({name: "Global", pattern: /global/i});
 export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
 export const Exact = createToken({name: "Exact", pattern: /exact/i});
 export const Matching = createToken({name: "Matching", pattern: /matching/i});
 export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
 export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
 export const Between = createToken({name: "Between", pattern: /between/i});
 export const Tab = createToken({name: "Tab", pattern: /tab/i});
 export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
 export const Group = createToken({name: "Group", pattern: /group/i});
 export const By = createToken({name: "By", pattern: /by/i});
 export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
 export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
 export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
 export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
 export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
 export const From = createToken({name: "From", pattern: /from/i});
 export const To = createToken({name: "To", pattern: /(to|\-|\.\.|\.\.\.)/i});
 export const Create = createToken({name: "Create", pattern: /create(s)?/i});
 export const Called = createToken({name: "Called", pattern: /called/i});
 export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
 export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
 export const None = createToken({name: "None", pattern: /none/i});
 export const Neither = createToken({name: "Neither", pattern: /neither/i});
 export const CarriageReturn = createToken({name: "Carriage Return", pattern: /carriage return/i});
 export const CaseInsensitive = createToken({name: "Case Insensitive", pattern: /case insensitive/i});
 export const CaseSensitive = createToken({name: "Case Sensitive", pattern: /case sensitive/i});
 export const OrMore = createToken({name: "Or More", pattern: /\+/ });
 export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
 export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
 export const EndOfLine = createToken({name: "EOL", pattern: /\n/, group: "nl" });
 export const WhiteSpace = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
 export const SingleLineComment = createToken({name: "Single-Line Comment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
 export const MultilineComment = createToken({name: "Multi-Line Comment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
 export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
 export const NumberLiteral = createToken({name: "Number Literal", pattern: /-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/ });
 export const StringLiteral = createToken({name: "String Literal", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
 enum IndentBaseType {
    Indent,
    Outdent
 }
 export const Indent = createToken({
    name: "Indent",
    start_chars_hint: [ "\t", " " ],
    pattern: (text, offset, matchedTokens, groups) => Human2RegexLexer.matchIndentBase(text, offset, matchedTokens, groups, IndentBaseType.Indent),
    // custom token patterns should explicitly specify the line_breaks option
    line_breaks: false
 });
 export const Outdent = createToken({
    name: "Outdent",
    start_chars_hint: [ "\t", " " ],
    pattern: (text, offset, matchedTokens, groups) => Human2RegexLexer.matchIndentBase(text, offset, matchedTokens, groups, IndentBaseType.Outdent),
    // custom token patterns should explicitly specify the line_breaks option
    line_breaks: false
 });
 export const AllTokens = [
    Zero,
    One,
    Two,
    Three,
    Four,
    Five,
    Six,
    Seven,
    Eight,
    Nine,
    Ten,
    Optional,
    Matching,
    Match,
    Then,
    Anything,
    Of,
    Or,
    And,
    Word,
    Digit,
    Character,
    Whitespace,
    Number,
    As,
    If,
    Start,
    With,
    Ends,
    Otherwise,
    Else,
    Unless,
    While,
    More,
    Using,
    Global,
    Multiline,
    Exact,
    Nothing,
    Not,
    Between,
    Tab,
    Linefeed,
    Group,
    By,
    A,
    The,
    Exactly,
    Inclusive,
    Exclusive,
    From,
    Create,
    Called,
    Repeat,
    Newline,
    None,
    Neither,
    CarriageReturn,
    CaseInsensitive,
    CaseSensitive,
    OrMore,
    To,
    EndOfLine,
    Indent,
    Outdent,
    WhiteSpace,
    SingleLineComment,
    MultilineComment,
    Identifier,
    NumberLiteral,
    StringLiteral,
 ];
 const H2RLexer = new Lexer(AllTokens, { ensureOptimizations: true });
 export enum IndentType {
    Tabs,
@ -188,118 +17,21 @@ export class Human2RegexLexerOptions {
 }
 export class Human2RegexLexer {
-    //Taken and adapted from https://github.com/SAP/chevrotain/blob/master/examples/lexer/python_indentation/python_indentation.js
+    private static already_init = false;
-    // State required for matching the indentations
+    private lexer : Lexer;
    private static options = new Human2RegexLexerOptions();
    private static indentStack = [ 0 ];
    private static wsRegExp: RegExp;
    private static spacesPerTab = "   ";
-    private static findLastIndex<T>(array: T[], predicate: (x: T) => boolean) : number {
+    constructor(private options: Human2RegexLexerOptions = new Human2RegexLexerOptions()) {
-        for (let index = array.length; index >= 0; index--) {
+        if (Human2RegexLexer.already_init) {
-            if (predicate(array[index])) {
+            throw new Error("Only 1 instance of Human2RegexLexer allowed");
                return index;
            }
        }
        return -1;
    }
    /**
     * This custom Token matcher uses Lexer context ("matchedTokens" and "groups" arguments)
     * combined with state via closure ("indentStack" and "lastTextMatched") to match indentation.
     */
    public static matchIndentBase(text: string, offset: number, matchedTokens: IToken[], groups: {[groupName: string]: IToken[]}, type: IndentBaseType) : RegExpExecArray | null  {
        const noTokensMatchedYet = !matchedTokens.length;
        const newLines = groups.nl;
        const noNewLinesMatchedYet = !newLines.length;
        const isFirstLine = noTokensMatchedYet && noNewLinesMatchedYet;
        const isStartOfLine =
            // only newlines matched so far
            (noTokensMatchedYet && !noNewLinesMatchedYet) ||
            // Both newlines and other Tokens have been matched AND the offset is just after the last newline
            (!noTokensMatchedYet &&
            !noNewLinesMatchedYet &&
            offset === newLines[newLines.length-1].startOffset + 1);
        // indentation can only be matched at the start of a line.
        if (isFirstLine || isStartOfLine) {
            let currIndentLevel: number = -1;
            Human2RegexLexer.wsRegExp.lastIndex = offset;
            const match = Human2RegexLexer.wsRegExp.exec(text);
            // possible non-empty indentation
            if (match !== null) {
                currIndentLevel = match[0].length;
                //if (this.options.type === IndentType.Tabs) {
                //    currIndentLevel = match[0].length;
                //}
                //else {
                //    currIndentLevel = match[0].replace(Human2RegexLexer.spacesPerTab, "\t").length;
                //}
            }
            // "empty" indentation means indentLevel of 0.
            else {
                currIndentLevel = 0;
            }
            const prevIndentLevel = this.indentStack[this.indentStack.length-1];
            // deeper indentation
            if (currIndentLevel > prevIndentLevel && type === IndentBaseType.Indent) {
                this.indentStack.push(currIndentLevel);
                return match;
            }
            // shallower indentation
            else if (currIndentLevel < prevIndentLevel && type === IndentBaseType.Outdent) {
                const matchIndentIndex = this.findLastIndex(this.indentStack, (stackIndentDepth) => stackIndentDepth === currIndentLevel);
                // any outdent must match some previous indentation level.
                if (matchIndentIndex === -1) {
                    throw Error(`invalid outdent at offset: ${offset}`);
                }
                const numberOfDedents = this.indentStack.length - matchIndentIndex - 1;
                // This is a little tricky
                // 1. If there is no match (0 level indent) than this custom token
                //    matcher would return "null" and so we need to add all the required outdents ourselves.
                // 2. If there was match (> 0 level indent) than we need to add minus one number of outsents
                //    because the lexer would create one due to returning a none null result.
                const iStart = match !== null ? 1 : 0;
                for (let i = iStart; i < numberOfDedents; i++) {
                    this.indentStack.pop();
                    matchedTokens.push(createTokenInstance(Outdent, "", NaN, NaN, NaN, NaN, NaN, NaN));
                }
                // even though we are adding fewer outdents directly we still need to update the indent stack fully.
                if (iStart === 1) {
                    this.indentStack.pop();
                }
                return match;
            } 
            else {
                // same indent, this should be lexed as simple whitespace and ignored
                return null;
            }
        } 
        else {
            // indentation cannot be matched under other circumstances
            return null;
        }
    }
    public static tokenize(text: string, options: Human2RegexLexerOptions | null = null) : ILexingResult{
        // have to reset the indent stack between processing of different text inputs
        Human2RegexLexer.indentStack = [ 0 ];
        if (options !== null) {
            Human2RegexLexer.options = this.options;
        }
-        /*
+        Human2RegexLexer.already_init = true;
        let indent_regex: RegExp | null = null;
        if (this.options.type === IndentType.Tabs) {
-            Human2RegexLexer.wsRegExp = /\t/y;
+            indent_regex = /\t/y;
        }
        else {
            let reg = ` {${this.options.spaces_per_tab}}`;
@ -308,20 +40,117 @@ export class Human2RegexLexer {
                reg += "|\\t";
            }
-            Human2RegexLexer.wsRegExp = new RegExp(reg, "y");
+            indent_regex = new RegExp(reg, "y");
        }
-            Human2RegexLexer.spacesPerTab = Array(this.options.spaces_per_tab+1).join(" ");
+        Indent.PATTERN = indent_regex;
-        }*/
+
-        Human2RegexLexer.wsRegExp = / +/y;
+        this.lexer = new Lexer(AllTokens, { ensureOptimizations: true });
-    
+    }
-        const lexResult = H2RLexer.tokenize(text);
+
-    
+    public tokenize(text: string) : ILexingResult {
-        //add remaining Outdents
+        const lexResult = this.lexer.tokenize(text);
-        while (Human2RegexLexer.indentStack.length > 1) {
+
-            lexResult.tokens.push(createTokenInstance(Outdent, "", NaN, NaN, NaN, NaN, NaN, NaN));
+        if (lexResult.tokens.length == 0) {
-            Human2RegexLexer.indentStack.pop();
+            return lexResult;
        }
        // create Outdents
        const tokens: IToken[] = [];
        const indentStack = [ 0 ];
        let currIndentLevel = 0;
        let startOfLine = true;
        let hadIndents = false;
        for (let i = 0; i < lexResult.tokens.length; i++) {
            if (lexResult.tokens[i].tokenType === EndOfLine) {
                startOfLine = true;
                tokens.push(lexResult.tokens[i]);
            }
            else if (lexResult.tokens[i].tokenType === Indent) {
                hadIndents = true;
                currIndentLevel = 1; 
                const start_token = lexResult.tokens[i];
                let length = lexResult.tokens[i].image.length;
                while (lexResult.tokens[i+1].tokenType === Indent) {
                    currIndentLevel++;
                    i++;
                    length += lexResult.tokens[i].image.length;
                }
                if (!startOfLine || (currIndentLevel > last(indentStack) + 1)) {
                    lexResult.errors.push({ 
                        offset: start_token.startOffset,
                        line: start_token.startLine ?? NaN,
                        column: start_token.startColumn ?? NaN,
                        length: length,
                        message: "Unexpected indentation found"
                    });
                }
                else if (currIndentLevel > last(indentStack)) {
                    indentStack.push(currIndentLevel);
                    tokens.push(start_token);
                }
                else if (currIndentLevel < last(indentStack)) {
                    const index = findLastIndex(indentStack, currIndentLevel);
                    if (index < 0) {
                        lexResult.errors.push({ 
                            offset: start_token.startOffset,
                            line: start_token.startLine ?? NaN,
                            column: start_token.startColumn ?? NaN,
                            length: length,
                            message: "Unexpected indentation found"
                        });
                    }
                    else {
                        const numberOfDedents = indentStack.length - index - 1;
                        for(let i = 0; i < numberOfDedents; i++) {
                            indentStack.pop();
                            tokens.push(createTokenInstance(Outdent, "", start_token.startOffset, start_token.startOffset + length, start_token.startLine ?? NaN, start_token.endLine ?? NaN, start_token.startColumn ?? NaN, (start_token.startColumn ?? NaN) + length));
                        }
                    }
                }
                else {
                    // same indent level: don't care
                }
            }
            else {
                if(startOfLine && !hadIndents) {
                    const tok = lexResult.tokens[i];
                    while (indentStack.length > 1) {
                        indentStack.pop();
                        tokens.push(createTokenInstance(Outdent, "", tok.startOffset, tok.startOffset, tok.startLine ?? NaN, NaN, tok.startColumn ?? NaN, NaN));
                    }
                }
                startOfLine = false;
                hadIndents = false;
                tokens.push(lexResult.tokens[i]);
            }
        }
        const tok = last(tokens);
        // Do we have an EOL marker at the end?
        if(tok.tokenType !== EndOfLine) {
            tokens.push(createTokenInstance(EndOfLine, "\n", tok.endOffset ?? NaN, tok.endOffset ?? NaN, tok.startLine ?? NaN, NaN, tok.startColumn ?? NaN, NaN)); 
        }
        //add remaining Outdents
        while (indentStack.length > 1) {
            indentStack.pop();
            tokens.push(createTokenInstance(Outdent, "", tok.endOffset ?? NaN, tok.endOffset ?? NaN, tok.startLine ?? NaN, NaN, tok.startColumn ?? NaN, NaN));
        }
        lexResult.tokens = tokens;
        return lexResult;
    }
 }
--- a/src/tokens.ts
+++ b/src/tokens.ts
@ -0,0 +1,154 @@
 import { createToken, Lexer } from "chevrotain";
 export const Zero = createToken({name: "Zero", pattern: /zero/i });
 export const One = createToken({name: "One", pattern: /one/i });
 export const Two = createToken({name: "Two", pattern: /two/i });
 export const Three = createToken({name: "Three", pattern: /three/i });
 export const Four = createToken({name: "Four", pattern: /four/i });
 export const Five = createToken({name: "Five", pattern: /five/i });
 export const Six = createToken({name: "Six", pattern: /six/i });
 export const Seven = createToken({name: "Seven", pattern: /seven/i });
 export const Eight = createToken({name: "Eight", pattern: /eight/i });
 export const Nine = createToken({name: "Nine", pattern: /nine/i });
 export const Ten = createToken({name: "Ten", pattern: /ten/i });
 export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
 export const Match = createToken({name: "Match", pattern: /match(es)?/i });
 export const Then = createToken({name: "Then", pattern: /then/i });
 export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
 export const Of = createToken({name: "Of", pattern: /of/i});
 export const Or = createToken({name: "Or", pattern: /or/i});
 export const And = createToken({name: "And", pattern: /and|,/i});
 export const Word = createToken({name: "Word Specifier", pattern: /word(s)?/i});
 export const Digit = createToken({name: "Digit Specifier", pattern: /digit(s)?/i});
 export const Character = createToken({name: "Character Specifier", pattern: /character(s)?/i});
 export const Whitespace = createToken({name: "Whitespace Specifier", pattern: /(white space|whitespace)(s)?/i});
 export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
 export const As = createToken({name: "As", pattern: /as/i});
 export const If = createToken({name: "If", pattern: /if/i});
 export const Start = createToken({name: "Start", pattern: /start(s)?/i});
 export const With = createToken({name: "With", pattern: /with/i});
 export const Ends = createToken({name: "Ends", pattern: /end(s)?/i});
 export const Otherwise = createToken({name: "Otherwise", pattern: /(other wise|otherwise)/i});
 export const Else = createToken({name: "Else", pattern: /else/i});
 export const Unless = createToken({name: "Unless", pattern: /unless/i});
 export const While = createToken({name: "While", pattern: /while/i});
 export const More = createToken({name: "More", pattern: /more/i});
 export const Using = createToken({name: "Using", pattern: /using/i});
 export const Global = createToken({name: "Global", pattern: /global/i});
 export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
 export const Exact = createToken({name: "Exact", pattern: /exact/i});
 export const Matching = createToken({name: "Matching", pattern: /matching/i});
 export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
 export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
 export const Between = createToken({name: "Between", pattern: /between/i});
 export const Tab = createToken({name: "Tab", pattern: /tab/i});
 export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
 export const Group = createToken({name: "Group", pattern: /group/i});
 export const By = createToken({name: "By", pattern: /by/i});
 export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
 export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
 export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
 export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
 export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
 export const From = createToken({name: "From", pattern: /from/i});
 export const To = createToken({name: "To", pattern: /(to|\-|\.\.|\.\.\.)/i});
 export const Create = createToken({name: "Create", pattern: /create(s)?/i});
 export const Called = createToken({name: "Called", pattern: /called/i});
 export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
 export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
 export const None = createToken({name: "None", pattern: /none/i});
 export const Neither = createToken({name: "Neither", pattern: /neither/i});
 export const CarriageReturn = createToken({name: "Carriage Return", pattern: /carriage return/i});
 export const CaseInsensitive = createToken({name: "Case Insensitive", pattern: /case insensitive/i});
 export const CaseSensitive = createToken({name: "Case Sensitive", pattern: /case sensitive/i});
 export const OrMore = createToken({name: "Or More", pattern: /\+/ });
 export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
 export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
 export const EndOfLine = createToken({name: "EOL", pattern: /\n/ });
 export const WhiteSpace = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
 export const SingleLineComment = createToken({name: "Single-Line Comment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
 export const MultilineComment = createToken({name: "Multi-Line Comment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
 export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
 export const NumberLiteral = createToken({name: "Number Literal", pattern: /-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/ });
 export const StringLiteral = createToken({name: "String Literal", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
 export const Indent = createToken({name: "Indent"});
 export const Outdent = createToken({name: "Outdent"});
 export const AllTokens = [
    Zero,
    One,
    Two,
    Three,
    Four,
    Five,
    Six,
    Seven,
    Eight,
    Nine,
    Ten,
    Optional,
    Matching,
    Match,
    Then,
    Anything,
    Of,
    Or,
    And,
    Word,
    Digit,
    Character,
    Whitespace,
    Number,
    As,
    If,
    Start,
    With,
    Ends,
    Otherwise,
    Else,
    Unless,
    While,
    More,
    Using,
    Global,
    Multiline,
    Exact,
    Nothing,
    Not,
    Between,
    Tab,
    Linefeed,
    Group,
    By,
    A,
    The,
    Exactly,
    Inclusive,
    Exclusive,
    From,
    Create,
    Called,
    Repeat,
    Newline,
    None,
    Neither,
    CarriageReturn,
    CaseInsensitive,
    CaseSensitive,
    OrMore,
    To,
    EndOfLine,
    Indent,
    WhiteSpace,
    SingleLineComment,
    MultilineComment,
    Identifier,
    NumberLiteral,
    StringLiteral,
 ];
--- a/src/utilities.ts
+++ b/src/utilities.ts
@ -0,0 +1,21 @@
 export function last<T>(array: T[]) : T {
    return array[array.length-1];
 }
 export function findLastIndex<T>(array: T[], value: T) : number {
    for (let index = array.length-1; index >= 0; index--) {
        if (array[index] === value) {
            return index;
        }
    }
    return -1;
 }
 export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean) : number {
    for (let index = array.length-1; index >= 0; index--) {
        if (predicate(array[index])) {
            return index;
        }
    }
    return -1;
 }