From 7d7d6337e1ab84872bbe034e2c3968cd7e680aec Mon Sep 17 00:00:00 2001
From: Patrick Demian <patrick.demian@ryerson.ca>
Date: Thu, 5 Nov 2020 01:03:31 -0500
Subject: [PATCH] Added tests and fixed bugs thanks to tests

---
 src/generator.ts        |  14 ++++--
 src/lexer.ts            |   4 +-
 src/parser.ts           |  92 +++++++++++++++++-----------------
 src/utilities.ts        |   2 +-
 tests/generator.spec.ts |  91 +++++++++++++++++++++++++++++++++
 tests/lexer.spec.ts     |  73 +++++++++++++++++++++++++--
 tests/parser.spec.ts    |  71 ++++++++++++++++++++++++++
 tests/utilities.spec.ts | 108 ++++++++++++++++++++++++++++++++++++++++
 8 files changed, 397 insertions(+), 58 deletions(-)
 create mode 100644 tests/generator.spec.ts
 create mode 100644 tests/parser.spec.ts
 create mode 100644 tests/utilities.spec.ts

diff --git a/src/generator.ts b/src/generator.ts
index f271105..b9c47c2 100644
--- a/src/generator.ts
+++ b/src/generator.ts
@@ -68,15 +68,13 @@ const unicode_script_codes = [
  * @internal
  */
 export abstract class H2RCST {
-    public tokens: IToken[];
-
     /**
      * Constructor for H2RCST
      * 
      * @param tokens Tokens used to calculate where an error occured
      * @internal
      */
-    constructor(tokens: IToken[]) {
+    constructor(public tokens: IToken[]) {
         this.tokens = tokens;
     }
 
@@ -335,6 +333,8 @@ export class MatchSubStatementCST extends H2RCST {
 
         let ret = "";
 
+        let require_grouping = false;
+
         if (str.length === 1) {
             ret = str[0];
         }
@@ -344,10 +344,14 @@ export class MatchSubStatementCST extends H2RCST {
         }
         else {
             //use a no-capture group
-            ret = "(?:" + str.join("|") + ")";
+            ret = str.join("|");
+            require_grouping = true;
         }
 
         if (this.count) {
+            if (require_grouping) {
+                ret = "(?:" + ret + ")";
+            }
             ret += this.count.toRegex(language);
         }
 
@@ -380,7 +384,7 @@ export class UsingStatementCST extends H2RCST {
 
         for (let i = 1; i < this.flags.length; i++) {
             if (hasFlag(flag, this.flags[i])) {
-                errors.push(this.error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] ));
+                errors.push(this.error("Duplicate modifier: " + UsingFlags[this.flags[i]] ));
             }
             flag = combineFlags(flag, this.flags[i]);
         }
diff --git a/src/lexer.ts b/src/lexer.ts
index ab6e754..9cfea2a 100644
--- a/src/lexer.ts
+++ b/src/lexer.ts
@@ -93,7 +93,7 @@ export class Human2RegexLexer {
         this.lexer = new Lexer(AllTokens, { ensureOptimizations: true, skipValidations: options.skip_validations });
     }
 
-    private lexError(token: IToken) : ILexingError {
+    private lexError(token: IToken): ILexingError {
         return { 
             offset: token.startOffset,
             line: token.startLine ?? NaN,
@@ -109,7 +109,7 @@ export class Human2RegexLexer {
      * @param text the text to analyze
      * @returns a lexing result which contains the token stream and error list
      */
-    public tokenize(text: string) : ILexingResult {
+    public tokenize(text: string): ILexingResult {
         const lex_result = this.lexer.tokenize(text);
 
         if (lex_result.tokens.length === 0) {
diff --git a/src/parser.ts b/src/parser.ts
index 5063b88..93f4b5d 100644
--- a/src/parser.ts
+++ b/src/parser.ts
@@ -154,7 +154,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
         });
 
         // match sub rules
-        let mss_rules: IOrAlt<MatchSubStatementValue>[] | null = null;
+        let mss_rules: IOrAlt<{tokens: IToken[], statement: MatchSubStatementValue}>[] | null = null;
         const MatchSubStatement = $.RULE("MatchSubStatement", () => {
             let count: CountSubStatementCST | null = null;
             let invert: boolean = false;
@@ -164,7 +164,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
             let to: string | null = null;
             let type: MatchSubStatementType = MatchSubStatementType.Anything;
 
-            const tokens: IToken[] = [];
+            let tokens: IToken[] = [];
 
             count = $.OPTION(() => {
                 const css = $.SUBRULE(CountSubStatement);
@@ -184,122 +184,124 @@ export class Human2RegexParser extends EmbeddedActionsParser {
                 SEP: T.Or,
                 DEF: () => {
                     $.OPTION3(() => $.CONSUME(T.A));
-                    values.push($.OR(mss_rules || (mss_rules = [
-
+                    const result = $.OR(mss_rules || (mss_rules = [
                         // range [a-z]
                         { ALT: () => {
-                            $.OPTION4(() => $.CONSUME(T.From));
-                            from = $.CONSUME2(T.StringLiteral).image; 
+                            const token0 = $.OPTION4(() => $.CONSUME(T.From));
+                            const token1 = $.CONSUME2(T.StringLiteral);
+                            from = token1.image; 
                             $.CONSUME(T.To);
-                            const token = $.CONSUME3(T.StringLiteral);
-                            tokens.push(token);
-                            to = token.image;
+                            const token2 = $.CONSUME3(T.StringLiteral);
+                            to = token2.image;
                             type = MatchSubStatementType.Between;
 
-                            return new MatchSubStatementValue(type, from, to);
+                            if (usefulConditional(token0, "Bug in type definition. Option should return <T|undefined>, but it doesn't")) {
+                                return { tokens: [ token0, token2 ], statement: new MatchSubStatementValue(type, from, to) };
+                            }
+                            return { tokens: [ token1, token2 ], statement: new MatchSubStatementValue(type, from, to) };
                         }},
 
                         // range [a-z]
                         { ALT: () => {
-                            $.CONSUME(T.Between);
+                            const token1 = $.CONSUME(T.Between);
                             from = $.CONSUME4(T.StringLiteral).image;
                             $.CONSUME(T.And);
-                            const token = $.CONSUME5(T.StringLiteral);
-                            to = token.image;
-                            tokens.push(token);
+                            const token2 = $.CONSUME5(T.StringLiteral);
+                            to = token2.image;
                             type = MatchSubStatementType.Between;
 
-                            return new MatchSubStatementValue(type, from, to);
+                            return { tokens: [ token1, token2 ], statement: new MatchSubStatementValue(type, from, to) };
                         }},
 
                         // exact string
                         { ALT: () => {
                             const token = $.CONSUME(T.StringLiteral);
-                            tokens.push(token);
                             value = token.image;
                             type = MatchSubStatementType.SingleString;
 
-                            return new MatchSubStatementValue(type, value);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type, value) };
                         }},
 
                         //unicode
                         { ALT: () => {
-                            $.CONSUME(T.Unicode);
-                            const token = $.CONSUME5(T.StringLiteral);
-                            tokens.push(token);
-                            value = token.image;
+                            const token1 = $.CONSUME(T.Unicode);
+                            const token2 = $.CONSUME6(T.StringLiteral);
+                            value = token2.image;
                             type = MatchSubStatementType.Unicode;
 
-                            return new MatchSubStatementValue(type, value);
+                            return { tokens: [ token1, token2 ], statement: new MatchSubStatementValue(type, value) };
                         }},
 
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Anything)); 
+                            const token = $.CONSUME(T.Anything); 
                             type = MatchSubStatementType.Anything;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => {
-                            tokens.push($.CONSUME(T.Boundary));
+                            const token = $.CONSUME(T.Boundary);
                             type = MatchSubStatementType.Boundary;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Word)); 
+                            const token = $.CONSUME(T.Word); 
                             type = MatchSubStatementType.Word;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Digit)); 
+                            const token = $.CONSUME(T.Digit); 
                             type = MatchSubStatementType.Digit;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Character)); 
+                            const token = $.CONSUME(T.Character); 
                             type = MatchSubStatementType.Character;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Whitespace)); 
+                            const token = $.CONSUME(T.Whitespace); 
                             type = MatchSubStatementType.Whitespace;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Number)); 
+                            const token = $.CONSUME(T.Number); 
                             type = MatchSubStatementType.Number;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Tab)); 
+                            const token = $.CONSUME(T.Tab); 
                             type = MatchSubStatementType.Tab;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Linefeed)); 
+                            const token = $.CONSUME(T.Linefeed); 
                             type = MatchSubStatementType.Linefeed;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.Newline)); 
+                            const token = $.CONSUME(T.Newline); 
                             type = MatchSubStatementType.Newline;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
                         { ALT: () => { 
-                            tokens.push($.CONSUME(T.CarriageReturn)); 
+                            const token = $.CONSUME(T.CarriageReturn); 
                             type = MatchSubStatementType.CarriageReturn;
 
-                            return new MatchSubStatementValue(type);
+                            return { tokens: [ token ], statement: new MatchSubStatementValue(type) };
                         }},
-                    ])));
+                    ]));
+
+                    tokens = tokens.concat(result.tokens);
+                    values.push(result.statement);
                 }
             });
 
diff --git a/src/utilities.ts b/src/utilities.ts
index 67c0187..2508dea 100644
--- a/src/utilities.ts
+++ b/src/utilities.ts
@@ -155,7 +155,7 @@ export function removeQuotes(input: string): string {
  * @internal
  */
 export function regexEscape(input: string): string {
-    return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
+    return input.replace(/([:\\\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
 }
 
 /**
diff --git a/tests/generator.spec.ts b/tests/generator.spec.ts
new file mode 100644
index 0000000..91d852a
--- /dev/null
+++ b/tests/generator.spec.ts
@@ -0,0 +1,91 @@
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
+import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
+import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
+import { RegexDialect } from "../src/generator";
+
+
+describe("Generator functionality", function() {
+    const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
+    // eslint-disable-next-line init-declarations
+    const parser = new Human2RegexParser(new Human2RegexParserOptions(true));
+
+    it("generates an empty regex", function() {
+        parser.input = lexer.tokenize("").tokens;
+        const reg0 = parser.parse();
+        expect(reg0.validate(RegexDialect.JS).length).toBe(0);
+        expect(reg0.toRegex(RegexDialect.JS)).toBe("//");
+
+        parser.input = lexer.tokenize("\n/*hello world*/\n").tokens;
+        const reg1 = parser.parse();
+        expect(reg1.validate(RegexDialect.JS).length).toBe(0);
+        expect(reg1.toRegex(RegexDialect.JS)).toBe("//");
+    });
+
+    it("generates a basic regex", function() {
+        parser.input = lexer.tokenize('match "hello" or "world"').tokens;
+        const reg0 = parser.parse();
+        expect(reg0.validate(RegexDialect.JS).length).toBe(0);
+        expect(reg0.toRegex(RegexDialect.JS)).toBe("/hello|world/");
+
+        parser.input = lexer.tokenize('match "http" then optionally "s"').tokens;
+        const reg1 = parser.parse();
+        expect(reg1.validate(RegexDialect.JS).length).toBe(0);
+        expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/");
+    });
+
+    it("validates invalid regexes", function() {
+        parser.input = lexer.tokenize('match unicode "Latin"').tokens;
+        const reg0 = parser.parse();
+        expect(reg0.validate(RegexDialect.DotNet).length).toBeGreaterThan(0);
+
+        parser.input = lexer.tokenize("using global and global").tokens;
+        const reg1 = parser.parse();
+        expect(reg1.validate(RegexDialect.DotNet).length).toBeGreaterThan(0);
+
+        parser.input = lexer.tokenize('match "a" to "asdf"').tokens;
+        const reg2 = parser.parse();
+        expect(reg2.validate(RegexDialect.DotNet).length).toBeGreaterThan(0);
+
+    });
+
+    it("runs complex scripts", function() {
+        const str = `
+using global and exact matching
+create an optional group called protocol
+    match "http"
+    optionally match "s"
+    match "://"
+create an optional group called subdomain
+    repeat
+        match a word
+        match "."
+create a group called domain
+    match 1+ words or "_" or "-"
+    match "."
+    match a word
+# port, but we don't care about it, so ignore it
+optionally match ":" then 0+ digits
+create an optional group called path
+    repeat
+        match "/"
+        match 0+ words or "_" or "-"
+create an optional group
+    # we don't want to capture the '?', so don't name the group until afterwards
+    match "?"
+    create a group called query
+        repeat
+            match 1+ words or "_" or "-"
+            match "="
+            match 1+ words or "_" or "-"
+create an optional group
+    # fragment, again, we don't care, so ignore everything afterwards
+    match "#"
+    match 0+ any thing
+`;
+    parser.input = lexer.tokenize(str).tokens;
+    const reg = parser.parse();
+    expect(reg.validate(RegexDialect.JS).length).toBe(0);
+    expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)\\:?\\d*(?<path>(\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>((?:\\w+|_|\\-)+\=(?:\\w+|_|\\-)+)*))?(#.*)?$/g");
+    });
+});
\ No newline at end of file
diff --git a/tests/lexer.spec.ts b/tests/lexer.spec.ts
index bed372a..410e00b 100644
--- a/tests/lexer.spec.ts
+++ b/tests/lexer.spec.ts
@@ -1,6 +1,69 @@
-describe("calculate", function() {
-    it("add", function() {
-      const result = 5 + 2;
-      expect(result).toBe(7);   
-  });
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
+import { Human2RegexLexer, Human2RegexLexerOptions, IndentType } from "../src/lexer";
+import { Indent } from "../src/tokens";
+
+describe("Lexer capabilities", function() {
+    const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
+
+    it("validates", function() {
+        expect(() => lexer.setOptions(new Human2RegexLexerOptions(false, IndentType.Both))).not.toThrow();
+    });
+
+    it("parses nothing", function() {
+        expect(() => lexer.tokenize("")).not.toThrow();
+        expect(lexer.tokenize("").errors).toHaveLength(0);
+        expect(lexer.tokenize("").tokens).toHaveLength(0);
+
+        expect(() => lexer.tokenize("\n/* hello world */\n")).not.toThrow();
+        expect(lexer.tokenize("\n/* hello world */\n").errors).toHaveLength(0);
+        expect(lexer.tokenize("\n/* hello world */\n").tokens).toHaveLength(0);
+    });
+
+    it("parses something", function() {
+        // tabs
+        expect(() => lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n')).not.toThrow();
+        expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0);
+        expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens).toHaveLength(17);
+
+        // spaces
+        expect(() => lexer.tokenize('optionally create a group called test\n    optionally match "-" or "$/()" then "^[]"\n')).not.toThrow();
+        expect(lexer.tokenize('optionally create a group called test\n    optionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0);
+        expect(lexer.tokenize('optionally create a group called test\n    optionally match "-" or "$/()" then "^[]"\n').tokens).toHaveLength(17);
+
+        // no EOF newline
+        expect(() => lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"')).not.toThrow();
+        expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"').errors).toHaveLength(0);
+        expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"').tokens).toHaveLength(17);
+
+        // Outdent
+        expect(() => lexer.tokenize('optionally create a group\n\trepeat\n\t\tmatch "-"\n\toptionally match "-" or "$/()" then "^[]"\n')).not.toThrow();
+        expect(lexer.tokenize('optionally create a group\n\trepeat\n\t\tmatch "-"\n\toptionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0);
+        expect(lexer.tokenize('optionally create a group\n\trepeat\n\t\tmatch "-"\n\toptionally match "-" or "$/()" then "^[]"\n').tokens).toHaveLength(22);
+    });
+
+    it("fails to parse bad text", function() {
+        // double indent
+        expect(() => lexer.tokenize('optionally create a group called test\n\t\toptionally match "-" or "$/()" then "^[]"')).not.toThrow();
+        expect(lexer.tokenize('optionally create a group called test\n\t\toptionally match "-" or "$/()" then "^[]"').errors.length).toBeGreaterThan(0);
+        
+        // missing " at end
+        expect(() => lexer.tokenize('optionally create a group\n\toptionally match "- or "$/()" then "^[]')).not.toThrow();
+        expect(lexer.tokenize('optionally create a group\n\toptionally match "- or "$/()" then "^[]').errors.length).toBeGreaterThan(0);
+    });
+
+    it("handles switching between tabs and spaces", function() {
+        lexer.setOptions(new Human2RegexLexerOptions(true, IndentType.Tabs));
+
+        // tabs
+        expect(() => lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n')).not.toThrow();
+        expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0);
+        expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens).toHaveLength(17);
+        expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens.map((x) => x.tokenType)).toContain(Indent);
+
+        // spaces should be ignored
+        expect(() => lexer.tokenize('optionally create a group called test\n    optionally match "-" or "$/()" then "^[]"\n')).not.toThrow();
+        expect(lexer.tokenize('optionally create a group called test\n    optionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0);
+        expect(lexer.tokenize('optionally create a group called test\n    optionally match "-" or "$/()" then "^[]"\n').tokens.map((x) => x.tokenType)).not.toContain(Indent);
+    });
 });
\ No newline at end of file
diff --git a/tests/parser.spec.ts b/tests/parser.spec.ts
new file mode 100644
index 0000000..f07dd34
--- /dev/null
+++ b/tests/parser.spec.ts
@@ -0,0 +1,71 @@
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
+import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
+import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
+import { IToken } from "chevrotain";
+import { RegularExpressionCST } from "../src/generator";
+
+
+describe("Parser capabilities", function() {
+    const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
+    // eslint-disable-next-line init-declarations
+    let parser!: Human2RegexParser;
+
+    it("validates", function() {
+        expect(() => parser = new Human2RegexParser(new Human2RegexParserOptions(false))).not.toThrow();
+    });
+
+    it("parses nothing", function() {
+        let tokens: IToken[] = [];
+
+        tokens = lexer.tokenize("").tokens;
+        parser.input = tokens;
+        expect(() => parser.parse()).not.toThrow();
+        expect(parser.parse()).toBeInstanceOf(RegularExpressionCST);
+        expect(parser.errors.length).toEqual(0);
+
+        tokens = lexer.tokenize("\n/* hello world */\n").tokens;
+        parser.input = tokens;
+        expect(() => parser.parse()).not.toThrow();
+        expect(parser.parse()).toBeInstanceOf(RegularExpressionCST);
+        expect(parser.errors.length).toEqual(0);
+    });
+
+    it("parses something", function() {
+        let tokens: IToken[] = [];
+
+        tokens = lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens;
+        parser.input = tokens;
+        expect(() => parser.parse()).not.toThrow();
+        parser.input = tokens;
+        expect(parser.parse()).toBeInstanceOf(RegularExpressionCST);
+        expect(parser.errors.length).toEqual(0);
+
+        tokens = lexer.tokenize('optionally create a group called test\n\trepeat 3..five\n\t\toptionally match "-" or "$/()" then "^[]"').tokens;
+        parser.input = tokens;
+        expect(() => parser.parse()).not.toThrow();
+        parser.input = tokens;
+        expect(parser.parse()).toBeInstanceOf(RegularExpressionCST);
+        expect(parser.errors.length).toEqual(0);
+    });
+
+    it("fails to parse bad text", function() {
+        let tokens: IToken[] = [];
+        
+        tokens = lexer.tokenize('optionally create a group called\n\toptionally match "-" or "$/()" then "^[]"\n').tokens;
+        parser.input = tokens;
+        expect(() => parser.parse()).not.toThrow();
+        expect(parser.errors.length).toBeGreaterThan(0);
+
+        tokens = lexer.tokenize('optionally create a called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens;
+        parser.input = tokens;
+        expect(() => parser.parse()).not.toThrow();
+        expect(parser.errors.length).toBeGreaterThan(0);
+
+        tokens = lexer.tokenize('optionally create a group\n\toptionally match or "$/()" then "^[]"\n').tokens;
+        parser.input = tokens;
+        expect(() => parser.parse()).not.toThrow();
+        expect(parser.errors.length).toBeGreaterThan(0);
+    });
+});
+
diff --git a/tests/utilities.spec.ts b/tests/utilities.spec.ts
new file mode 100644
index 0000000..e094d0e
--- /dev/null
+++ b/tests/utilities.spec.ts
@@ -0,0 +1,108 @@
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
+import "../src/utilities";
+import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError } from "../src/utilities";
+import { UsingFlags, ISemanticError } from "../src/generator";
+import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain";
+import { Indent } from "../src/tokens";
+
+describe("Utility functions", function() {
+
+    it("should handle flags", function() {
+        expect(makeFlag(0)).toBe(1);
+        expect(makeFlag(7)).toBe(1*2*2*2*2*2*2*2);
+
+        expect(combineFlags(UsingFlags.Exact, UsingFlags.Global)).toBe(UsingFlags.Exact + UsingFlags.Global);
+        expect(combineFlags(UsingFlags.Multiline, combineFlags(UsingFlags.Exact, UsingFlags.Global))).toBe(UsingFlags.Exact + UsingFlags.Global + UsingFlags.Multiline);
+
+        expect(hasFlag(UsingFlags.Exact, UsingFlags.Exact)).toBe(true);
+        expect(hasFlag(UsingFlags.Exact, UsingFlags.Global)).toBe(false);
+
+        expect(hasFlag(UsingFlags.Global + UsingFlags.Exact, UsingFlags.Exact)).toBe(true);
+        expect(hasFlag(UsingFlags.Global + UsingFlags.Exact, UsingFlags.Multiline)).toBe(false);
+
+        expect(hasFlag(combineFlags(UsingFlags.Global, UsingFlags.Exact), UsingFlags.Exact)).toBe(true);
+        expect(hasFlag(combineFlags(UsingFlags.Global, UsingFlags.Exact), UsingFlags.Global)).toBe(true);
+        expect(hasFlag(combineFlags(UsingFlags.Global, UsingFlags.Exact), UsingFlags.Multiline)).toBe(false);
+    });
+
+    it("should return correct array elements", function() {
+        expect(first([ 1, 2, 3 ])).toBe(1);
+        expect(last([ 1, 2, 3 ])).toBe(3);
+    });
+
+    it("should recognize single regex regular characters", function() {
+        expect(isSingleRegexCharacter("")).toBe(false);
+        expect(isSingleRegexCharacter("a")).toBe(true);
+        expect(isSingleRegexCharacter("ab")).toBe(false);
+    });
+
+    it("should recognize single regex escape characters", function() {
+        expect(isSingleRegexCharacter("\\n")).toBe(true);
+        expect(isSingleRegexCharacter("\\r\\n")).toBe(false);
+        expect(isSingleRegexCharacter("\\na")).toBe(false);
+        expect(isSingleRegexCharacter("\\?")).toBe(true);
+    });
+
+    it("should recognize single unicode characters", function() {
+        expect(isSingleRegexCharacter("\\u1")).toBe(false);
+        expect(isSingleRegexCharacter("\\u1234")).toBe(true);
+        expect(isSingleRegexCharacter("\\u1234\\u1234")).toBe(false);
+        expect(isSingleRegexCharacter("\\U12345678")).toBe(false);
+        expect(isSingleRegexCharacter("\\U1")).toBe(false);
+        expect(isSingleRegexCharacter("௹")).toBe(true);
+        expect(isSingleRegexCharacter("💩")).toBe(false);
+    });
+
+    it("should remove quotes correctly", function() {
+        expect(removeQuotes('""')).toEqual("");
+        expect(removeQuotes('"hello world"')).toEqual("hello world");
+        expect(removeQuotes('"hello"world"')).toEqual('hello"world');
+    });
+
+    it("should escape regex correctly", function() {
+        expect(regexEscape("")).toEqual("");
+        expect(regexEscape("\\$")).toEqual("\\\\\\$");
+        expect(regexEscape("^(.*)?\\?$")).toEqual("\\^\\(\\.\\*\\)\\?\\\\\\?\\$");
+        expect(regexEscape("\\p{Latin}")).toEqual("\\\\p\\{Latin\\}");
+    });
+
+    it("should find the last index of an element", function() {
+        expect(findLastIndex([], 3)).toBe(-1);
+        expect(findLastIndex([ 3, 1, 2, 3, 3 ], 3)).toBe(4);
+        expect(findLastIndex([ 3, 1, 2, 3, 3 ], 1)).toBe(1);
+        expect(findLastIndex([ 3, 1, 2, 3, 3 ], 9)).toBe(-1);
+    });
+
+    it("should generate CommonErrors correctly", function() {
+        const lex_error: ILexingError = {
+            offset: 123,
+            line: 123,
+            column: 123,
+            length: 123,
+            message: "error"
+        };
+
+        const par_error: IRecognitionException = {
+            name: "Recognition Exception",
+            message: "Mismatch at 1,1",
+            token: createTokenInstance(Indent, "", 123, 124, 123, 123, 123, 124),
+            resyncedTokens: [],
+            context: { ruleStack: [], ruleOccurrenceStack: [] }
+        };
+
+        const sem_error: ISemanticError = {
+            startLine: 123,
+            startColumn: 123,
+            length: 123,
+            message: "error"
+        };
+
+        expect(CommonError.fromLexError(lex_error)).toBeInstanceOf(CommonError);
+        expect(CommonError.fromParseError(par_error)).toBeInstanceOf(CommonError);
+        expect(CommonError.fromSemanticError(sem_error)).toBeInstanceOf(CommonError);
+
+        expect(() => CommonError.fromSemanticError(sem_error).toString()).not.toThrow();
+        expect(CommonError.fromSemanticError(sem_error).toString()).not.toBeNull();
+    });
+});
\ No newline at end of file