Added additional information to semantic errors

2025-05-16 04:20:35 -07:00 · 2020-10-31 01:06:56 -04:00 · 2020-10-31 01:06:56 -04:00 · 18d4c4418f
commit 18d4c4418f
parent 6e42c7e921
4 changed files with 280 additions and 174 deletions
--- a/src/generator.ts
+++ b/src/generator.ts
@ -1,7 +1,8 @@
 /* eslint-disable @typescript-eslint/no-unused-vars */
 /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */

-import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter } from "./utilities";
+import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last } from "./utilities";
+import { IToken } from "chevrotain";

 export enum RobotLanguage {
    JS,
@ -10,9 +11,34 @@ export enum RobotLanguage {
    Java
 }

+export interface ISemanticError {
+    startLine: number,
+    startColumn: number,
+    length: number,
+    message: string
+}
+
 export abstract class H2RCST {
-    public abstract validate(language: RobotLanguage): Error[];
+    public tokens: IToken[];
+
+    constructor(tokens: IToken[]) {
+        this.tokens = tokens;
+    }
+
+    public abstract validate(language: RobotLanguage): ISemanticError[];
    public abstract toRegex(language: RobotLanguage): string;
+
+    protected error(message: string): ISemanticError {
+        const f = first(this.tokens);
+        const l = last(this.tokens);
+
+        return { 
+            startLine: f.startLine ?? NaN,
+            startColumn: f.startColumn ?? NaN,
+            length: (l.endOffset ?? l.startOffset) - f.startOffset,
+            message: message
+        };
+    }
 }

 /* eslint-disable no-bitwise */
@ -52,18 +78,16 @@ export class MatchStatementValue {
    }
 }

-export abstract class StatementCST implements H2RCST {
-    public abstract validate(language: RobotLanguage): Error[];
-    public abstract toRegex(language: RobotLanguage): string;
+export abstract class StatementCST extends H2RCST {
 }

-export class MatchSubStatementCST implements H2RCST {
-    constructor(public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
-        /* empty */
+export class MatchSubStatementCST extends H2RCST {
+    constructor(public tokens: IToken[], public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
+        super(tokens);
    }
    
-    public validate(language: RobotLanguage): Error[] {
-        let errors: Error[] = [];
+    public validate(language: RobotLanguage): ISemanticError[] {
+        let errors: ISemanticError[] = [];

        if (this.count) {
            errors = errors.concat(this.count.validate(language));
@ -75,21 +99,21 @@ export class MatchSubStatementCST implements H2RCST {
                let to = value.to as string;

                if (!isSingleRegexCharacter(from)) {
-                        errors.push(new Error("Between statement must begin with a single character"));
+                        errors.push(this.error("Between statement must begin with a single character"));
                }
                else if (from.startsWith("\\u") || from.startsWith("\\U") || from.startsWith("\\")) {
                    from = JSON.parse(`"${regexEscape(from)}"`);
                }

                if (!isSingleRegexCharacter(to)) {
-                        errors.push(new Error("Between statement must end with a single character"));
+                        errors.push(this.error("Between statement must end with a single character"));
                }
                else if (to.startsWith("\\u") || to.startsWith("\\U") || to.startsWith("\\")) {
                    to = JSON.parse(`"${regexEscape(to)}"`);
                }

                if (from.charCodeAt(0) >= to.charCodeAt(0)) {
-                    errors.push(new Error("Between statement range invalid"));
+                    errors.push(this.error("Between statement range invalid"));
                }
            }
        }
@ -182,27 +206,29 @@ export class MatchSubStatementCST implements H2RCST {

 }

-export class UsingStatementCST implements H2RCST {
-    constructor(public flags: UsingFlags[]) {
-        /* empty */
+export class UsingStatementCST extends H2RCST {
+    constructor(public tokens: IToken[], public flags: UsingFlags[]) {
+        super(tokens);
    }
-    public validate(language: RobotLanguage): Error[] {
-        const errors: Error[] = [];
+
+    public validate(language: RobotLanguage): ISemanticError[] {
+        const errors: ISemanticError[] = [];
        let flag = this.flags[0];

        for (let i = 1; i < this.flags.length; i++) {
            if (hasFlag(flag, this.flags[i])) {
-                errors.push(new Error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] ));
+                errors.push(this.error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] ));
            }
            flag = combineFlags(flag, this.flags[i]);
        }

        if (hasFlag(flag, UsingFlags.Sensitive) && hasFlag(flag, UsingFlags.Insensitive)) {
-            errors.push(new Error("Cannot be both case sensitive and insensitive"));
+            errors.push(this.error("Cannot be both case sensitive and insensitive"));
        }

        return errors;
    }
+
    public toRegex(language: RobotLanguage): string {
        let str = "";
        let exact = false;
@ -226,19 +252,19 @@ export class UsingStatementCST implements H2RCST {
    }
 }

-export class CountSubStatementCST implements H2RCST {
-    constructor(public from: number, public to: number | null, public opt: "inclusive" | "exclusive" | "+" | null) {
-        /* empty */
+export class CountSubStatementCST extends H2RCST {
+    constructor(public tokens: IToken[], public from: number, public to: number | null = null, public opt: "inclusive" | "exclusive" | "+" | null = null) {
+        super(tokens);
    }

-    public validate(language: RobotLanguage): Error[] {
-        const errors: Error[] = [];
+    public validate(language: RobotLanguage): ISemanticError[] {
+        const errors: ISemanticError[] = [];

        if (this.from < 0) {
-            errors.push(new Error("Value cannot be negative"));
+            errors.push(this.error("Value cannot be negative"));
        }
        else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
-            errors.push(new Error("Values must be in range of eachother"));
+            errors.push(this.error("Values must be in range of eachother"));
        }

        return errors;
@ -263,13 +289,13 @@ export class CountSubStatementCST implements H2RCST {
    }
 }

-export class MatchStatementCST implements StatementCST {
-    constructor(public matches: MatchStatementValue[]) {
-        /* empty */
+export class MatchStatementCST extends StatementCST {
+    constructor(public tokens: IToken[], public matches: MatchStatementValue[]) {
+        super(tokens);
    }

-    public validate(language: RobotLanguage): Error[] {
-        let errors: Error[] = [];
+    public validate(language: RobotLanguage): ISemanticError[] {
+        let errors: ISemanticError[] = [];

        for (const match of this.matches) {
            errors = errors.concat(match.statement.validate(language));
@ -285,13 +311,13 @@ export class MatchStatementCST implements StatementCST {
    }
 }

-export class RepeatStatementCST implements StatementCST {
-    constructor(public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
-        /* empty */
+export class RepeatStatementCST extends StatementCST {
+    constructor(public tokens: IToken[], public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
+        super(tokens);
    }

-    public validate(language: RobotLanguage): Error[] {
-        let errors: Error[] = [];
+    public validate(language: RobotLanguage): ISemanticError[] {
+        let errors: ISemanticError[] = [];

        if (this.count !== null) {
            errors = errors.concat(this.count.validate(language));
@ -335,16 +361,16 @@ export class RepeatStatementCST implements StatementCST {
    }
 }

-export class GroupStatementCST implements StatementCST {
-    constructor(public optional: boolean, public name: string | null, public statements: StatementCST[]) {
-        /* empty */
+export class GroupStatementCST extends StatementCST {
+    constructor(public tokens: IToken[], public optional: boolean, public name: string | null, public statements: StatementCST[]) {
+        super(tokens);
    }

-    public validate(language: RobotLanguage): Error[] {
-        let errors : Error[] = [];
+    public validate(language: RobotLanguage): ISemanticError[] {
+        let errors : ISemanticError[] = [];
        
        if (language !== RobotLanguage.DotNet && language !== RobotLanguage.JS) {
-            errors.push(new Error("This language does not support named groups"));
+            errors.push(this.error("This language does not support named groups"));
        }

        for (const statement of this.statements) {
@ -373,13 +399,13 @@ export class GroupStatementCST implements StatementCST {
    }
 }

-export class RegularExpressionCST implements H2RCST {
-    constructor(public usings: UsingStatementCST, public statements: StatementCST[]) {
-        /* empty */
+export class RegularExpressionCST extends H2RCST {
+    constructor(public tokens: IToken[], public usings: UsingStatementCST, public statements: StatementCST[]) {
+        super(tokens);
    }

-    public validate(language: RobotLanguage): Error[] {
-        let errors: Error[] = this.usings.validate(language);
+    public validate(language: RobotLanguage): ISemanticError[] {
+        let errors: ISemanticError[] = this.usings.validate(language);

        for (const statement of this.statements) {
            errors = errors.concat(statement.validate(language));
--- a/src/parser.ts
+++ b/src/parser.ts
@ -1,8 +1,9 @@
 /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */

-import { EmbeddedActionsParser, IOrAlt } from "chevrotain";
+import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
 import * as T from "./tokens";
 import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
+import { first } from "./utilities";

 export class Human2RegexParserOptions {
    constructor(public skip_validations: boolean = false) {
@ -10,10 +11,21 @@ export class Human2RegexParserOptions {
    }
 }

+class TokenAndValue<T> {
+    constructor(public token: IToken, public value: T) {
+        /* empty */
+    }
+}
+class TokensAndValue<T> {
+    constructor(public tokens: IToken[], public value: T) {
+        /* empty */
+    }
+}
+
 export class Human2RegexParser extends EmbeddedActionsParser {
    private static already_init = false;

-    public parse : (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST;
+    public parse: (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST;

    constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
        super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });
@ -26,116 +38,96 @@ export class Human2RegexParser extends EmbeddedActionsParser {
        
        const $ = this;

-        let nss_rules : IOrAlt<number>[] | null = null;
+        // IN REGARDS TO KEEPING TOKENS:
+        // We don't really need to keep each token, only the first and last tokens
+        // This is due to the fact we calculate the difference between those tokens
+        // However, sometimes we have optional starts and ends
+        // Each optional near the start and end MUST be recorded because they may be the first/last token
+        // ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match"
+
+        let nss_rules : IOrAlt<TokenAndValue<number>>[] | null = null;
        const NumberSubStatement = $.RULE("NumberSubStatement", () => {
-            let value: number = 0;
-
-            value = $.OR(nss_rules || (nss_rules = [
+            return $.OR(nss_rules || (nss_rules = [
+                { ALT: () => new TokenAndValue($.CONSUME(T.Zero), 0) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.One), 1) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Two), 2) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Three), 3) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Four), 4) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Five), 5) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Six), 6) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Seven), 7) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Eight), 8) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Nine), 9) },
+                { ALT: () => new TokenAndValue($.CONSUME(T.Ten), 10) },
                { ALT: () => {
-                    $.CONSUME(T.Zero); 
-                    return 0; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.One); 
-                    return 1; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Two); 
-                    return 2; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Three); 
-                    return 3; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Four); 
-                    return 4; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Five); 
-                    return 5; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Six); 
-                    return 6; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Seven); 
-                    return 7; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Eight); 
-                    return 8; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Nine); 
-                    return 9; 
-                }},
-                { ALT: () => {
-                    $.CONSUME(T.Ten); 
-                    return 10; 
-                }},
-
-                { ALT: () => parseInt($.CONSUME(T.NumberLiteral).image) },
+                    const tok = $.CONSUME(T.NumberLiteral);
+                    return new TokenAndValue(tok, parseInt(tok.image));
+                }}
            ]));
-
-            return value;
        });

        // 1, 1..2, between 1 and/to 2 inclusively/exclusively
        const CountSubStatement = $.RULE("CountSubStatement", () => {
-            let from : number = 0;
-            let to: number | null = null;
-            let opt: "inclusive" | "exclusive" | "+" | null = null;
-            
-            $.OR([
+            return $.OR([
                { ALT: () => {
-                    $.CONSUME(T.Between);
-                    from = $.SUBRULE4(NumberSubStatement);
+                    const tokens: IToken[] = [];
+
+                    tokens.push($.CONSUME(T.Between));
+                    const from = $.SUBRULE4(NumberSubStatement);
                    $.OR3([
                        { ALT: () => $.CONSUME2(T.To) },
                        { ALT: () => $.CONSUME(T.And) }
                    ]);
-                    to = $.SUBRULE5(NumberSubStatement);
-                    $.OPTION4(() => $.CONSUME3(T.Times));
-                    $.OPTION5(() => {
-                        $.OR4([
+                    const to = $.SUBRULE5(NumberSubStatement);
+                    tokens.push(to.token);
+                    $.OPTION4(() => tokens.push($.CONSUME3(T.Times)));
+                    const opt = $.OPTION5(() => {
+                        return $.OR4([
                            { ALT: () => {
-                                $.CONSUME(T.Inclusive);
-                                opt = "inclusive";
+                                tokens.push($.CONSUME(T.Inclusive));
+                                return "inclusive";
                            }},
                            { ALT: () => {
-                                $.CONSUME(T.Exclusive);
-                                opt = "exclusive";
+                                tokens.push($.CONSUME(T.Exclusive));
+                                return "exclusive";
                            }}
                        ]);
                    });
+
+                    return new CountSubStatementCST(tokens, from.value, to.value, opt as "inclusive" | "exclusive" | null);
                }},
                
-                { ALT: () => { 
-                    $.OPTION2(() => $.CONSUME(T.From));
-                    from = $.SUBRULE2(NumberSubStatement);
-                    $.OR2([
-                        { ALT: () => {
-                            $.CONSUME(T.OrMore);
-                            opt = "+";
-                        }},
+                { ALT: () => {
+                    const tokens: IToken[] = [];
+
+                    $.OPTION2(() => tokens.push($.CONSUME(T.From)));
+                    const from = $.SUBRULE2(NumberSubStatement);
+                    const to = $.OR2([
+                        { ALT: () => new TokenAndValue($.CONSUME(T.OrMore), [ null, "+" ]) },
                        { ALT: () => { 
                            $.CONSUME(T.To); 
-                            to = $.SUBRULE3(NumberSubStatement); 
+                            const val = $.SUBRULE3(NumberSubStatement);
+                            return new TokenAndValue(val.token, [ val.value, null ]);
                        }}
                    ]);
-                    $.OPTION3(() => $.CONSUME2(T.Times));
+                    tokens.push(to.token);
+                    $.OPTION3(() => tokens.push($.CONSUME2(T.Times)));
+
+                    return new CountSubStatementCST(tokens, from.value, to.value ? to.value[0] : null, to.value ? to.value[1] : null);
                }},

                { ALT: () => { 
-                    $.OPTION(() => $.CONSUME(T.Exactly));
-                    from = $.SUBRULE(NumberSubStatement);
-                    $.OPTION6(() => $.CONSUME(T.Times));
+                    const tokens: IToken[] = [];
+                    $.OPTION(() => tokens.push($.CONSUME(T.Exactly)));
+                    const from = $.SUBRULE(NumberSubStatement);
+                    tokens.push(from.token);
+                    $.OPTION6(() => tokens.push($.CONSUME(T.Times)));
+
+                    return new CountSubStatementCST(tokens, from.value);
                }} 
            ]);

-            return new CountSubStatementCST(from, to, opt);
+            
        });

        let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null;
@ -147,9 +139,19 @@ export class Human2RegexParser extends EmbeddedActionsParser {
            let to : string | null = null;
            let type : MatchSubStatementType = MatchSubStatementType.Anything;

-            count = $.OPTION(() => $.SUBRULE(CountSubStatement) );
+            const tokens: IToken[] = [];
+
+            count = $.OPTION(() => {
+                const css = $.SUBRULE(CountSubStatement);
+                // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+                if (css.tokens) {
+                    tokens.push(first(css.tokens));
+                }
+                return css;
+            });
+
            invert = $.OPTION2(() => { 
-                $.CONSUME(T.Not); 
+                tokens.push($.CONSUME(T.Not));
                return true;
            });
            $.AT_LEAST_ONE_SEP({
@ -161,7 +163,9 @@ export class Human2RegexParser extends EmbeddedActionsParser {
                            $.OPTION4(() => $.CONSUME(T.From));
                            from = $.CONSUME2(T.StringLiteral).image; 
                            $.CONSUME(T.To);
-                            to = $.CONSUME3(T.StringLiteral).image;
+                            const token = $.CONSUME3(T.StringLiteral);
+                            tokens.push(token);
+                            to = token.image;
                            type = MatchSubStatementType.Between;

                            return new MatchSubStatementValue(type, from, to);
@ -170,73 +174,77 @@ export class Human2RegexParser extends EmbeddedActionsParser {
                            $.CONSUME(T.Between);
                            from = $.CONSUME4(T.StringLiteral).image;
                            $.CONSUME(T.And);
-                            to = $.CONSUME5(T.StringLiteral).image;
+                            const token = $.CONSUME5(T.StringLiteral);
+                            to = token.image;
+                            tokens.push(token);
                            type = MatchSubStatementType.Between;

                            return new MatchSubStatementValue(type, from, to);
                        }},
                        { ALT: () => {
-                            from = $.CONSUME(T.StringLiteral).image;
+                            const token = $.CONSUME(T.StringLiteral);
+                            tokens.push(token);
+                            from = token.image;
                            type = MatchSubStatementType.SingleString;

                            return new MatchSubStatementValue(type, from);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Anything); 
+                            tokens.push($.CONSUME(T.Anything)); 
                            type = MatchSubStatementType.Anything;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Word); 
+                            tokens.push($.CONSUME(T.Word)); 
                            type = MatchSubStatementType.Word;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Digit); 
+                            tokens.push($.CONSUME(T.Digit)); 
                            type = MatchSubStatementType.Digit;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Character); 
+                            tokens.push($.CONSUME(T.Character)); 
                            type = MatchSubStatementType.Character;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Whitespace); 
+                            tokens.push($.CONSUME(T.Whitespace)); 
                            type = MatchSubStatementType.Whitespace;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Number); 
+                            tokens.push($.CONSUME(T.Number)); 
                            type = MatchSubStatementType.Number;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Tab); 
+                            tokens.push($.CONSUME(T.Tab)); 
                            type = MatchSubStatementType.Tab;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Linefeed); 
+                            tokens.push($.CONSUME(T.Linefeed)); 
                            type = MatchSubStatementType.Linefeed;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.Newline); 
+                            tokens.push($.CONSUME(T.Newline)); 
                            type = MatchSubStatementType.Newline;

                            return new MatchSubStatementValue(type);
                        }},
                        { ALT: () => { 
-                            $.CONSUME(T.CarriageReturn); 
+                            tokens.push($.CONSUME(T.CarriageReturn)); 
                            type = MatchSubStatementType.CarriageReturn;

                            return new MatchSubStatementValue(type);
@ -245,19 +253,20 @@ export class Human2RegexParser extends EmbeddedActionsParser {
                }
            });

-            return new MatchSubStatementCST(count, invert, values);
+            return new MatchSubStatementCST(tokens, count, invert, values);
        });

        // optionally match "+" then 1+ words
        const MatchStatement = $.RULE("MatchStatement", () => {
            let optional = false;
            const msv: MatchStatementValue[] = [];
+            const tokens: IToken[] = [];

            $.OPTION(() => {
-                $.CONSUME(T.Optional);
+                tokens.push($.CONSUME(T.Optional));
                optional = true;
            });
-            $.CONSUME(T.Match);
+            tokens.push($.CONSUME(T.Match));
            msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
            $.MANY(() => {
                $.OR([
@ -274,9 +283,9 @@ export class Human2RegexParser extends EmbeddedActionsParser {
                });
                msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
            });
-            $.CONSUME(T.EndOfLine);
+            tokens.push($.CONSUME(T.EndOfLine));

-            return new MatchStatementCST(msv);
+            return new MatchStatementCST(tokens, msv);
        });

        // using global matching
@ -284,7 +293,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
        const UsingStatement = $.RULE("UsingStatement", () => {
            const usings: UsingFlags[] = [];

-            $.CONSUME(T.Using);
+            const tokens = [ $.CONSUME(T.Using) ];
            $.AT_LEAST_ONE_SEP({
                SEP: T.And,
                DEF: () => {
@ -313,67 +322,75 @@ export class Human2RegexParser extends EmbeddedActionsParser {
                    $.OPTION(() => $.CONSUME(T.Matching));
                }
            });
-            $.CONSUME(T.EndOfLine);
+            tokens.push($.CONSUME(T.EndOfLine));

-            return usings;
+            return new TokensAndValue(tokens, usings);
        });

        const GroupStatement = $.RULE("GroupStatement", () => {
+            const tokens: IToken[] = [];
            let optional = false;
            let name: string | null = null;
            const statement: StatementCST[] = [];

-            $.OR([
+            tokens.push($.OR([
                { ALT: () => {
                    optional = true;
-                    $.CONSUME(T.Optional);
+                    const first_token = $.CONSUME(T.Optional);
                    $.CONSUME(T.Create);
                    $.CONSUME(T.A);
+
+                    return first_token;
                }},
                { ALT: () => {
-                    $.CONSUME2(T.Create);
+                    const first_token = $.CONSUME2(T.Create);
                    $.CONSUME2(T.A);
                    $.OPTION2(() => {
                        $.CONSUME2(T.Optional);
                        optional = true;
                    });
+
+                    return first_token;
                }}
-            ]);
+            ]));

            $.CONSUME(T.Group);
            $.OPTION(() => {
                $.CONSUME(T.Called);
                name = $.CONSUME(T.Identifier).image;
            });
-            $.CONSUME2(T.EndOfLine);
+            // Note: Technically not the end token, 
+            // BUT this is way more useful than the Outdent for error reporting
+            tokens.push($.CONSUME2(T.EndOfLine));
            $.CONSUME(T.Indent);
            $.AT_LEAST_ONE(() => {
                statement.push($.SUBRULE(Statement));
            });
            $.CONSUME(T.Outdent);

-            return new GroupStatementCST(optional, name, statement);
+            return new GroupStatementCST(tokens, optional, name, statement);
        });

        const RepeatStatement = $.RULE("RepeatStatement", () => {
+            const tokens: IToken[] = [];
            let optional = false;
            let count : CountSubStatementCST | null = null;
            const statements: StatementCST[] = [];

            $.OPTION3(() => {
-                $.CONSUME(T.Optional);
+                tokens.push($.CONSUME(T.Optional));
                optional = true;
            });
-            $.CONSUME(T.Repeat);
+            tokens.push($.CONSUME(T.Repeat));
            $.OPTION(() => count = $.SUBRULE(CountSubStatement));
            $.CONSUME3(T.EndOfLine);
            $.CONSUME(T.Indent);
            $.AT_LEAST_ONE(() => {
                statements.push($.SUBRULE(Statement));
            });
-            $.CONSUME(T.Outdent);
+            tokens.push($.CONSUME(T.Outdent));

-            return new RepeatStatementCST(optional, count, statements);
+            return new RepeatStatementCST(tokens, optional, count, statements);
        });

        const Statement = $.RULE("Statement", () => {
@ -385,13 +402,18 @@ export class Human2RegexParser extends EmbeddedActionsParser {
        });

        const Regex = $.RULE("Regex", () => {
+            let tokens: IToken[] = [];
            let usings: UsingFlags[] = [];
            const statements: StatementCST[] = [];

-            $.MANY(() => usings = usings.concat($.SUBRULE(UsingStatement)));
+            $.MANY(() => {
+                const using = $.SUBRULE(UsingStatement);
+                tokens = tokens.concat(using.tokens);
+                usings = usings.concat(using.value);
+            });
            $.MANY2(() => statements.push($.SUBRULE(Statement)) );

-            return new RegularExpressionCST(new UsingStatementCST(usings), statements);
+            return new RegularExpressionCST([], new UsingStatementCST(tokens, usings), statements);
        });

        this.performSelfAnalysis();
--- a/src/script.ts
+++ b/src/script.ts
@ -5,6 +5,8 @@ import "./webpage/style.css";

 import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
 import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
+import { RobotLanguage } from "./generator";
+import { lexErrorToCommonError, parseErrorToCommonError, semanticErrorToCommonError, ICommonError } from "./utilities";

 /*
 $(function() {
@ -12,6 +14,8 @@ $(function() {
 });
 */

+
+
 const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(false));
 const parser = new Human2RegexParser(new Human2RegexParserOptions(false));

@ -28,9 +32,9 @@ create an optional group called protocol
 	match "http"
 	optionally match "s"
 	match "://"
-create a group called subdomain
+create an optional group called subdomain
 	repeat
-		match 1+ words
+		match a word
 		match "."
 create a group called domain
 	match 1+ words or "_" or "-"
@ -57,9 +61,23 @@ create an optional group
 `);


-console.log(result.errors);

-parser.input = result.tokens;
-const regex = parser.parse();
-console.log(JSON.stringify(regex, undefined, 4));
-console.log(parser.errors);
+const total_errors: ICommonError[] = [];
+
+
+result.errors.map(lexErrorToCommonError).forEach((x) => total_errors.push(x));
+
+if (total_errors.length === 0) {
+	parser.input = result.tokens;
+	const regex = parser.parse();
+
+	parser.errors.map(parseErrorToCommonError).forEach((x) => total_errors.push(x));
+	regex.validate(RobotLanguage.JS).map(semanticErrorToCommonError).forEach((x) => total_errors.push(x));
+
+	// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+	if (total_errors.length === 0) {
+		console.log(regex.toRegex(RobotLanguage.JS));
+	}
+}
+
+console.log("Errors = " + total_errors);
--- a/src/utilities.ts
+++ b/src/utilities.ts
@ -1,5 +1,8 @@
 /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */

+import { ISemanticError } from "./generator";
+import { IRecognitionException, ILexingError } from "chevrotain";
+
 /* eslint-disable no-bitwise */
 export function hasFlag(a: number, b: number) : boolean {
    return (a & b) !== 0;
@ -17,11 +20,15 @@ export function isSingleRegexCharacter(char: string): boolean {
           char.length === 1;
 }

-export function last<T>(array: T[]) : T {
+export function first<T>(array: T[]): T {
+    return array[0];
+}
+
+export function last<T>(array: T[]): T {
    return array[array.length-1];
 }

-export function findLastIndex<T>(array: T[], value: T) : number {
+export function findLastIndex<T>(array: T[], value: T): number {
    for (let index = array.length-1; index >= 0; index--) {
        if (array[index] === value) {
            return index;
@ -46,3 +53,36 @@ export function removeQuotes(input: string): string {
 export function regexEscape(input: string) : string {
    return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
 }
+
+export interface ICommonError {
+	type: string,
+	startLine: number,
+    startColumn: number,
+    length: number,
+    message: string
+}
+
+export function lexErrorToCommonError(error: ILexingError): ICommonError {
+	return {
+		type: "Lexer Error",
+		startLine: error.line,
+		startColumn: error.column,
+		length: error.length,
+		message: error.message
+	};
+}
+
+export function parseErrorToCommonError(error: IRecognitionException): ICommonError {
+	return {
+		type: "Parser Error",
+		startLine: error.token.startLine ?? NaN,
+		startColumn: error.token.startColumn ?? NaN,
+		length: error.token.endOffset ?? NaN - error.token.startOffset,
+		message: error.name + ": " + error.message,
+	};
+}
+
+export function semanticErrorToCommonError(error: ISemanticError): ICommonError {
+	(error as ICommonError).type = "Semantic Error";
+	return error as ICommonError;
+}