Got a rudimentary syntax tree started

2025-07-01 02:10:44 -07:00 · 2020-10-29 15:35:59 -04:00 · 2020-10-29 15:35:59 -04:00 · 3baefade52
commit 3baefade52
parent 91d1b37322
7 changed files with 533 additions and 94 deletions
--- a/.eslintrc.json
+++ b/.eslintrc.json
@ -43,7 +43,7 @@
 		"@typescript-eslint/naming-convention": [
 			"error",
            { "selector": "default", "format": [ "snake_case", "PascalCase" ] },
-            { "selector": "property", "format": [ "camelCase", "snake_case", "PascalCase" ] },
+            { "selector": "property", "format": [ "camelCase", "snake_case", "UPPER_CASE", "PascalCase" ] },
            { "selector": [ "function", "method"], "format": [ "camelCase", "UPPER_CASE" ] },
            { "selector": "typeLike", "format": [ "PascalCase" ] }
 		],
@ -55,7 +55,7 @@
        "camelcase": "off",
 		"no-magic-numbers": [ 
 			"warn", 
-			{ "ignoreArrayIndexes": true, "ignore": [-1,0,1,2,3,4,5,6,7,8,9]}
+			{ "ignoreArrayIndexes": true, "ignore": [-1,0,1,2,3,4,5,6,7,8,9,10]}
 		],
        "curly": "warn",
        "no-loss-of-precision": "error",
--- a/docs/bundle.min.js
+++ b/docs/bundle.min.js
--- a/src/generator.ts
+++ b/src/generator.ts
@ -0,0 +1,255 @@
+/* eslint-disable @typescript-eslint/no-unused-vars */
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
+import { regexEscape, removeQuotes, hasFlag, combineFlags } from "./utilities";
+
+export enum RobotLanguage {
+    JS,
+    Perl,
+    DotNet,
+    Java
+}
+
+export abstract class H2RCST {
+    public abstract validate(language: RobotLanguage): Error[];
+    public abstract toRegex(language: RobotLanguage): string;
+}
+
+/* eslint-disable no-bitwise */
+export enum UsingFlags {
+    Multiline = 1 << 0,
+    Global = 1 << 1,
+    Sensitive = 1 << 2,
+    Insensitive = 1 << 3,
+    Exact = 1 << 4
+}
+/* eslint-enable no-bitwise */
+
+
+export enum MatchSubStatementType {
+    SingleString,
+    Between,
+    Anything,
+    Word,
+    Digit,
+    Character,
+    Whitespace,
+    Number,
+    Tab,
+    Linefeed,
+    Newline,
+    CarriageReturn
+}
+
+export class MatchSubStatementValue {
+    constructor(public type: MatchSubStatementType, public from: string | null, public to: string | null) {
+        /* empty */
+    }
+}
+
+export class MatchStatementValue {
+    constructor(public optional: boolean, public statement: MatchSubStatementCST) {
+        /* empty */
+    }
+}
+
+export abstract class StatementCST implements H2RCST {
+    public abstract validate(language: RobotLanguage): Error[];
+    public abstract toRegex(language: RobotLanguage): string;
+}
+
+export class MatchSubStatementCST implements H2RCST {
+    constructor(public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
+        /* empty */
+    }
+    
+    public validate(language: RobotLanguage): Error[] {
+        throw new Error("Method not implemented.");
+    }
+
+    public toRegex(language: RobotLanguage): string {
+        const str: string[] = [];
+
+        for (const value of this.values) {
+            switch (value.type) {
+                case MatchSubStatementType.SingleString: {
+                    const reg = regexEscape(removeQuotes(value.from as string));
+                    str.push(this.invert ? `(?:(?!${reg}))` : reg);
+                    break;
+                }
+                case MatchSubStatementType.Between:
+                    str.push(this.invert ? `[^${value.from}-${value.to}]` : `[${value.from}-${value.to}]`);
+                    break;
+                case MatchSubStatementType.Word:
+                    str.push(this.invert ? "\\W" : "\\w");
+                    break;
+                case MatchSubStatementType.Digit:
+                    str.push(this.invert ? "\\D" : "\\d");
+                    break;
+                case MatchSubStatementType.Character:
+                    str.push(this.invert ? "[^a-zA-Z]" : "[a-zA-Z]");
+                    break;
+                case MatchSubStatementType.Whitespace:
+                    str.push(this.invert ? "\\S" : "\\s");
+                    break;
+                case MatchSubStatementType.Number:
+                    str.push(this.invert ? "\\D+" : "\\d+");
+                    break;
+                case MatchSubStatementType.Tab:
+                    str.push(this.invert ? "[^\\t]" : "\\t");
+                    break;
+                case MatchSubStatementType.Newline:
+                case MatchSubStatementType.Linefeed:
+                    str.push(this.invert ? "[^\\n]" : "\\n");
+                    break;
+                case MatchSubStatementType.CarriageReturn:
+                    str.push(this.invert ? "[^\\r]" : "\\r");
+                    break;
+                default:
+                    // default: anything
+                    str.push(this.invert ? "[^.]" : ".");
+                    break;
+            }
+        }
+
+        return str.join("|");
+    }
+
+}
+
+export class UsingStatementCST implements H2RCST {
+    constructor(public flags: UsingFlags[]) {
+        /* empty */
+    }
+    public validate(language: RobotLanguage): Error[] {
+        const errors: Error[] = [];
+        let flag = this.flags[0];
+
+        for (let i = 1; i < this.flags.length; i++) {
+            if (hasFlag(flag, this.flags[i])) {
+                errors.push(new Error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] ));
+            }
+            flag = combineFlags(flag, this.flags[i]);
+        }
+
+        if (hasFlag(flag, UsingFlags.Sensitive) && hasFlag(flag, UsingFlags.Insensitive)) {
+            errors.push(new Error("Cannot be both case sensitive and insensitive"));
+        }
+
+        return errors;
+    }
+    public toRegex(language: RobotLanguage): string {
+        let str = "";
+        let exact = false;
+
+        for (const flag of this.flags) {
+            if (hasFlag(flag, UsingFlags.Multiline)) {
+                str += "m";
+            }
+            else if (hasFlag(flag, UsingFlags.Global)) {
+                str += "g";
+            }
+            else if (hasFlag(flag, UsingFlags.Insensitive)) {
+                str += "i";
+            }
+            else if (hasFlag(flag, UsingFlags.Exact)) {
+                exact = true;
+            }
+        }
+
+        return exact ? "/^{regex}$/" + str : "/{regex}/" + str;
+    }
+}
+
+export class CountSubStatementCST implements H2RCST {
+    constructor(public from: number, public to: number | null, public opt: "inclusive" | "exclusive" | "+" | null) {
+        /* empty */
+    }
+
+    public validate(language: RobotLanguage): Error[] {
+        const errors: Error[] = [];
+
+        if (this.from < 0) {
+            errors.push(new Error("Value cannot be negative"));
+        }
+        else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
+            errors.push(new Error("Values must be in range of eachother"));
+        }
+
+        return errors;
+    }
+
+    public toRegex(language: RobotLanguage): string {
+        const from = this.from;
+        let to = this.to;
+        if (to !== null && this.opt === "exclusive") {
+            to--;
+        }
+
+        if (to !== null) {
+            return `{${from},${to}}`;
+        }
+        else if (this.opt === "+") {
+            return `{${from},}`;
+        }
+        else {
+            return `{${this.from}}`;
+        }
+    }
+}
+
+export class MatchStatementCST implements StatementCST {
+    constructor(public matches: MatchStatementValue[]) {
+        /* empty */
+    }
+
+    public validate(language: RobotLanguage): Error[] {
+        throw new Error("Method not implemented.");
+    }
+    public toRegex(language: RobotLanguage): string {
+        throw new Error("Method not implemented.");
+    }
+    
+}
+
+export class RepeatStatementCST implements StatementCST {
+    constructor(public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
+        /* empty */
+    }
+
+    public validate(language: RobotLanguage): Error[] {
+        throw new Error("Method not implemented.");
+    }
+
+    public toRegex(language: RobotLanguage): string {
+        throw new Error("Method not implemented.");
+    }
+}
+
+export class GroupStatementCST implements StatementCST {
+    constructor(public optional: boolean, public name: string | null, public statements: StatementCST[]) {
+        /* empty */
+    }
+
+    public validate(language: RobotLanguage): Error[] {
+        throw new Error("Method not implemented.");
+    }
+
+    public toRegex(language: RobotLanguage): string {
+        throw new Error("Method not implemented.");
+    }
+}
+
+export class RegularExpressionCST implements H2RCST {
+    constructor(public usings: UsingStatementCST, public statements: StatementCST[]) {
+        /* empty */
+    }
+
+    public validate(language: RobotLanguage): Error[] {
+        throw new Error("Method not implemented.");
+    }
+    public toRegex(language: RobotLanguage): string {
+        throw new Error("Method not implemented.");
+    }
+    
+}
--- a/src/parser.ts
+++ b/src/parser.ts
@ -1,30 +1,19 @@
 /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */

-import { CstParser, CstNode, IOrAlt } from "chevrotain";
+import { EmbeddedActionsParser, IOrAlt,  } from "chevrotain";
 import * as T from "./tokens";
-
-export enum RobotLanguage {
-    JS,
-    Perl,
-    DotNet,
-    Java
-}
-
-export enum HumanLanguage {
-    English,
-    /* Todo: Humans speak more than just english! */
-}
+import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";

 export class Human2RegexParserOptions {
-    constructor(public skip_validations: boolean = false, public robot_language: RobotLanguage = RobotLanguage.JS, public human_language: HumanLanguage = HumanLanguage.English) {
+    constructor(public skip_validations: boolean = false) {
        /* empty */
    }
 }

-export class Human2RegexParser extends CstParser {
+export class Human2RegexParser extends EmbeddedActionsParser {
    private static already_init = false;

-    public parse : (idxInCallingRule?: number, ...args: unknown[]) => CstNode;
+    public parse : (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST;

    constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
        super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });
@ -39,50 +28,101 @@ export class Human2RegexParser extends CstParser {

        let nss_rules : IOrAlt<unknown>[] | null = null;
        const NumberSubStatement = $.RULE("NumberSubStatement", () => {
+            let value: number = 0;
+
            $.OR(nss_rules || (nss_rules = [
-                { ALT: () => $.CONSUME(T.One) },
-                { ALT: () => $.CONSUME(T.Two) },
-                { ALT: () => $.CONSUME(T.Three) },
-                { ALT: () => $.CONSUME(T.Four) },
-                { ALT: () => $.CONSUME(T.Five) },
-                { ALT: () => $.CONSUME(T.Six) },
-                { ALT: () => $.CONSUME(T.Seven) },
-                { ALT: () => $.CONSUME(T.Eight) },
-                { ALT: () => $.CONSUME(T.Nine) },
-                { ALT: () => $.CONSUME(T.Ten) },
-                { ALT: () => $.CONSUME(T.Zero) },
-                { ALT: () => $.CONSUME(T.NumberLiteral) },
+                { ALT: () => {
+                    $.CONSUME(T.Zero); 
+                    value = 0; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.One); 
+                    value = 1; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Two); 
+                    value = 2; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Three); 
+                    value = 3; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Four); 
+                    value = 4; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Five); 
+                    value = 5; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Six); 
+                    value = 6; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Seven); 
+                    value = 7; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Eight); 
+                    value = 8; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Nine); 
+                    value = 9; 
+                }},
+                { ALT: () => {
+                    $.CONSUME(T.Ten); 
+                    value = 10; 
+                }},
+
+                { ALT: () => value = parseInt($.CONSUME(T.NumberLiteral).image) },
            ]));
+
+            return value;
        });

        // 1, 1..2, between 1 and/to 2 inclusively/exclusively
        const CountSubStatement = $.RULE("CountSubStatement", () => {
+            let from : number = 0;
+            let to: number | null = null;
+            let opt: "inclusive" | "exclusive" | "+" | null = null;
+            
            $.OR([
                { ALT: () => {
                    $.CONSUME(T.Between);
-                    $.SUBRULE4(NumberSubStatement);
+                    from = $.SUBRULE4(NumberSubStatement);
                    $.OR3([
                        { ALT: () => $.CONSUME2(T.To) },
                        { ALT: () => $.CONSUME(T.And) }
                    ]);
-                    $.SUBRULE5(NumberSubStatement);
+                    to = $.SUBRULE5(NumberSubStatement);
                    $.OPTION4(() => $.CONSUME3(T.Times));
                    $.OPTION5(() => {
                        $.OR4([
-                            { ALT: () => $.CONSUME(T.Inclusive) },
-                            { ALT: () => $.CONSUME(T.Exclusive) }
+                            { ALT: () => {
+                                $.CONSUME(T.Inclusive);
+                                opt = "inclusive";
+                            }},
+                            { ALT: () => {
+                                $.CONSUME(T.Exclusive);
+                                opt = "exclusive";
+                            }}
                        ]);
                    });
                }},
                
                { ALT: () => { 
                    $.OPTION2(() => $.CONSUME(T.From));
-                    $.SUBRULE2(NumberSubStatement);
+                    from = $.SUBRULE2(NumberSubStatement);
                    $.OR2([
-                        { ALT: () => $.CONSUME(T.OrMore) },
+                        { ALT: () => {
+                            $.CONSUME(T.OrMore);
+                            opt = "+";
+                        }},
                        { ALT: () => { 
                            $.CONSUME(T.To); 
-                            $.SUBRULE3(NumberSubStatement); 
+                            to = $.SUBRULE3(NumberSubStatement); 
                        }}
                    ]);
                    $.OPTION3(() => $.CONSUME2(T.Times));
@ -90,54 +130,112 @@ export class Human2RegexParser extends CstParser {

                { ALT: () => { 
                    $.OPTION(() => $.CONSUME(T.Exactly));
-                    $.SUBRULE(NumberSubStatement);
+                    from = $.SUBRULE(NumberSubStatement);
                    $.OPTION6(() => $.CONSUME(T.Times));
                }} 
            ]);
+
+            return new CountSubStatementCST(from, to, opt);
        });

        let mss_rules : IOrAlt<unknown>[] | null = null;
        const MatchSubStatement = $.RULE("MatchSubStatement", () => {
-            $.OPTION(() => $.SUBRULE(CountSubStatement) );
-            $.OPTION2(() => $.CONSUME(T.Not));
+            let count: CountSubStatementCST | null = null;
+            let invert: boolean = false;
+            const values: MatchSubStatementValue[] = [];
+
+            $.OPTION(() => count = $.SUBRULE(CountSubStatement) );
+            $.OPTION2(() => { 
+                $.CONSUME(T.Not); 
+                invert = true;
+            });
            $.AT_LEAST_ONE_SEP({
                SEP: T.Or,
                DEF: () => {
+                    let from : string | null = null;
+                    let to : string | null = null;
+                    let type : MatchSubStatementType = MatchSubStatementType.Anything;
+
                    $.OPTION3(() => $.CONSUME(T.A));
                    $.OR(mss_rules || (mss_rules = [
                        { ALT: () => {
                            $.OPTION4(() => $.CONSUME(T.From));
-                            $.CONSUME2(T.StringLiteral); 
+                            from = $.CONSUME2(T.StringLiteral).image; 
                            $.CONSUME(T.To);
-                            $.CONSUME3(T.StringLiteral);
+                            to = $.CONSUME3(T.StringLiteral).image;
+                            type = MatchSubStatementType.Between;
                        }},
                        { ALT: () => {
                            $.CONSUME(T.Between);
-                            $.CONSUME4(T.StringLiteral);
+                            from = $.CONSUME4(T.StringLiteral).image;
                            $.CONSUME(T.And);
-                            $.CONSUME5(T.StringLiteral);
+                            to = $.CONSUME5(T.StringLiteral).image;
+                            type = MatchSubStatementType.Between;
+                        }},
+                        { ALT: () => {
+                            from = $.CONSUME(T.StringLiteral).image;
+                            type = MatchSubStatementType.SingleString;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Anything); 
+                            type = MatchSubStatementType.Anything;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Word); 
+                            type = MatchSubStatementType.Word;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Digit); 
+                            type = MatchSubStatementType.Digit;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Character); 
+                            type = MatchSubStatementType.Character;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Whitespace); 
+                            type = MatchSubStatementType.Whitespace;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Number); 
+                            type = MatchSubStatementType.Number;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Tab); 
+                            type = MatchSubStatementType.Tab;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Linefeed); 
+                            type = MatchSubStatementType.Linefeed;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Newline); 
+                            type = MatchSubStatementType.Newline;
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.CarriageReturn); 
+                            type = MatchSubStatementType.CarriageReturn;
                        }},
-                        { ALT: () => $.CONSUME(T.StringLiteral) },
-                        { ALT: () => $.CONSUME(T.Anything) },
-                        { ALT: () => $.CONSUME(T.Word) },
-                        { ALT: () => $.CONSUME(T.Digit) },
-                        { ALT: () => $.CONSUME(T.Character) },
-                        { ALT: () => $.CONSUME(T.Whitespace) },
-                        { ALT: () => $.CONSUME(T.Number) },
-                        { ALT: () => $.CONSUME(T.Tab) },
-                        { ALT: () => $.CONSUME(T.Linefeed) },
-                        { ALT: () => $.CONSUME(T.Newline) },
-                        { ALT: () => $.CONSUME(T.CarriageReturn) },
                    ]));
+
+                    values.push(new MatchSubStatementValue(type, from, to));
                }
            });
+
+            return new MatchSubStatementCST(count, invert, values);
        });

        // optionally match "+" then 1+ words
        const MatchStatement = $.RULE("MatchStatement", () => {
-            $.OPTION(() => $.CONSUME(T.Optional));
+            let optional = false;
+            const msv: MatchStatementValue[] = [];
+
+            $.OPTION(() => {
+                $.CONSUME(T.Optional);
+                optional = true;
+            });
            $.CONSUME(T.Match);
-            $.SUBRULE(MatchSubStatement);
+            msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
            $.MANY(() => {
                $.OR([
                    { ALT: () => { 
@ -146,69 +244,136 @@ export class Human2RegexParser extends CstParser {
                    }},
                    { ALT: () => $.CONSUME(T.And) },
                ]);
-                $.OPTION3(() => $.CONSUME2(T.Optional));
-                $.SUBRULE2(MatchSubStatement);
+                optional = false;
+                $.OPTION3(() => {
+                     $.CONSUME2(T.Optional);
+                     optional = true;
+                });
+                msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
            });
            $.CONSUME(T.EndOfLine);
+
+            return new MatchStatementCST(msv);
        });

        // using global matching
        let us_rules : IOrAlt<unknown>[] | null = null;
        const UsingStatement = $.RULE("UsingStatement", () => {
+            const usings: UsingFlags[] = [];
+
            $.CONSUME(T.Using);
            $.AT_LEAST_ONE_SEP({
                SEP: T.And,
                DEF: () => {
                    $.OR(us_rules || (us_rules = [
-                        { ALT: () => $.CONSUME(T.Multiline) },
-                        { ALT: () => $.CONSUME(T.Global) },
-                        { ALT: () => $.CONSUME(T.CaseInsensitive) },
-                        { ALT: () => $.CONSUME(T.CaseSensitive) },
-                        { ALT: () => $.CONSUME(T.Exact) }
+                        { ALT: () => {
+                            $.CONSUME(T.Multiline); 
+                            usings.push(UsingFlags.Multiline);
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Global);
+                            usings.push(UsingFlags.Global);
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.CaseInsensitive);
+                            usings.push(UsingFlags.Insensitive); 
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.CaseSensitive);
+                            usings.push(UsingFlags.Sensitive); 
+                        }},
+                        { ALT: () => { 
+                            $.CONSUME(T.Exact); 
+                            usings.push(UsingFlags.Exact);
+                        }}
                    ]));
                    $.OPTION(() => $.CONSUME(T.Matching));
                }
            });
            $.CONSUME(T.EndOfLine);
+
+            return usings;
        });

        const GroupStatement = $.RULE("GroupStatement", () => {
-            $.OPTION2(() => $.CONSUME(T.Optional));
-            $.CONSUME(T.Create);
-            $.CONSUME(T.A);
-            $.OPTION3(() => $.CONSUME2(T.Optional));
+            let optional = false;
+            let name: string | null = null;
+            const statement: StatementCST[] = [];
+
+            $.OR([
+                { ALT: () => {
+                    optional = true;
+                    $.CONSUME(T.Optional);
+                    $.CONSUME(T.Create);
+                    $.CONSUME(T.A);
+                }},
+                { ALT: () => {
+                    $.CONSUME2(T.Create);
+                    $.CONSUME2(T.A);
+                    $.OPTION2(() => {
+                        $.CONSUME2(T.Optional);
+                        optional = true;
+                    });
+                }}
+            ]);
+
            $.CONSUME(T.Group);
            $.OPTION(() => {
                $.CONSUME(T.Called);
-                $.CONSUME(T.StringLiteral);
+                name = $.CONSUME(T.Identifier).image;
            });
            $.CONSUME2(T.EndOfLine);
            $.CONSUME(T.Indent);
-            $.AT_LEAST_ONE(Statement);
+            $.AT_LEAST_ONE(() => {
+                statement.push($.SUBRULE(Statement));
+            });
            $.CONSUME(T.Outdent);
+
+            return new GroupStatementCST(optional, name, statement);
        });

        const RepeatStatement = $.RULE("RepeatStatement", () => {
-            $.OPTION3(() => $.CONSUME(T.Optional));
+            let optional = false;
+            let count : CountSubStatementCST | null = null;
+            const statements: StatementCST[] = [];
+
+            $.OPTION3(() => {
+                $.CONSUME(T.Optional);
+                optional = true;
+            });
            $.CONSUME(T.Repeat);
-            $.OPTION(() => $.SUBRULE(CountSubStatement));
+            $.OPTION(() => count = $.SUBRULE(CountSubStatement));
            $.CONSUME3(T.EndOfLine);
            $.CONSUME(T.Indent);
-            $.AT_LEAST_ONE(Statement);
+            $.AT_LEAST_ONE(() => {
+                statements.push($.SUBRULE(Statement));
+            });
            $.CONSUME(T.Outdent);
+
+            return new RepeatStatementCST(optional, count, statements);
        });

        const Statement = $.RULE("Statement", () => {
+            // eslint-disable-next-line init-declarations
+            let statement! : StatementCST;
+
            $.OR([
-                { ALT: () => $.SUBRULE(MatchStatement) },
-                { ALT: () => $.SUBRULE(GroupStatement) },
-                { ALT: () => $.SUBRULE(RepeatStatement) }
+                { ALT: () => statement = $.SUBRULE(MatchStatement) },
+                { ALT: () => statement = $.SUBRULE(GroupStatement) },
+                { ALT: () => statement = $.SUBRULE(RepeatStatement) }
            ]);
+
+            return statement;
        });

        const Regex = $.RULE("Regex", () => {
-            $.MANY(() => $.SUBRULE(UsingStatement));
-            $.MANY2(() => $.SUBRULE(Statement) );
+            let usings: UsingFlags[] = [];
+            const statements: StatementCST[] = [];
+
+            $.MANY(() => usings = usings.concat($.SUBRULE(UsingStatement)));
+            $.MANY2(() => statements.push($.SUBRULE(Statement)) );
+
+            return new RegularExpressionCST(new UsingStatementCST(usings), statements);
        });

        this.performSelfAnalysis();
--- a/src/script.ts
+++ b/src/script.ts
@ -12,8 +12,8 @@ $(function() {
 });
 */

-const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
-const parser = new Human2RegexParser(new Human2RegexParserOptions(true));
+const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(false));
+const parser = new Human2RegexParser(new Human2RegexParserOptions(false));

 const result = lexer.tokenize(`
 // H2R supports // # and /**/ as comments
@ -24,28 +24,28 @@ const result = lexer.tokenize(`
 // exact matching means use a ^ and $ to signify the start and end of the string

 using global and exact matching
-create an optional group called "protocol"
+create an optional group called protocol
 	match "http"
 	optionally match "s"
 	match "://"
-create a group called "subdomain"
+create a group called subdomain
 	repeat
 		match 1+ words
 		match "."
-create a group called "domain"
+create a group called domain
 	match 1+ words or "_" or "-"
 	match "."
 	match a word
 # port, but we don't care about it, so ignore it
 optionally match ":" then 0+ digits
-create an optional group called "path"
+create an optional group called path
 	repeat
 		match "/"
 		match 0+ words or "_" or "-"
 create an optional group
 	# we don't want to capture the '?', so don't name the group until afterwards
 	match "?"
-	create a group called "query"
+	create a group called query
 		repeat
 			match 1+ words or "_" or "-"
 			match "="
@ -61,5 +61,5 @@ console.log(result.errors);

 parser.input = result.tokens;
 const regex = parser.parse();
-console.log(JSON.stringify(regex.children, undefined, 4));
+console.log(JSON.stringify(regex, undefined, 4));
 console.log(parser.errors);
--- a/src/tokens.ts
+++ b/src/tokens.ts
@ -78,7 +78,7 @@ export const SingleLineComment = createToken({name: "SingleLineComment", pattern
 export const MultilineComment = createToken({name: "MultiLineComment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });

 export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
-export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/ });
+export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?\d+/ });
 export const StringLiteral = createToken({name: "StringLiteral", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });

 export const Indent = createToken({name: "Indent"});
--- a/src/utilities.ts
+++ b/src/utilities.ts
@ -1,5 +1,15 @@
 /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */

+/* eslint-disable no-bitwise */
+export function hasFlag(a: number, b: number) : boolean {
+    return (a & b) !== 0;
+}
+
+export function combineFlags(a: number, b: number): number {
+    return (a | b);
+}
+/* eslint-enable no-bitwise */
+
 export function last<T>(array: T[]) : T {
    return array[array.length-1];
 }
@ -13,7 +23,7 @@ export function findLastIndex<T>(array: T[], value: T) : number {
    return -1;
 }

-export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean) : number {
+export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean): number {
    for (let index = array.length-1; index >= 0; index--) {
        if (predicate(array[index])) {
            return index;
@ -21,3 +31,11 @@ export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boole
    }
    return -1;
 }
+
+export function removeQuotes(input: string): string {
+    return input.substring(1, input.length-2);
+}
+
+export function regexEscape(input: string) : string {
+    return input.replace("\\", "\\\\").replace(/(\.\[\]\^\-\|\(\)\*\+\?\{\}\$)/, "\\$1");
+}