Parser refactored and made faster

Though to be honest, it's already really fast
2025-05-16 12:30:09 -07:00 · 2020-10-29 03:55:19 -04:00 · 2020-10-29 03:55:19 -04:00 · 31e9872c4f
commit 31e9872c4f
parent bddc5d4f3b
6 changed files with 71 additions and 55 deletions
--- a/docs/bundle.min.js
+++ b/docs/bundle.min.js
--- a/src/tokenizer.ts
+++ b/src/tokenizer.ts
@ -11,7 +11,7 @@ export enum IndentType {
 }

 export class Human2RegexLexerOptions {
-    constructor(public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
+    constructor(public skip_validations = false, public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
        /* empty */
    }
 }
@ -52,7 +52,7 @@ export class Human2RegexLexer {

        Indent.PATTERN = indent_regex;

-        this.lexer = new Lexer(AllTokens, { ensureOptimizations: true });
+        this.lexer = new Lexer(AllTokens, { ensureOptimizations: true, skipValidations: options.skip_validations });
    }

    private lex_error(token: IToken) : ILexingError {
--- a/src/parser.ts
+++ b/src/parser.ts
@ -1,10 +1,10 @@
 /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */

-import { CstParser, CstNode } from "chevrotain";
+import { CstParser, CstNode, IOrAlt } from "chevrotain";
 import * as T from "./tokens";

 export class Human2RegexParserOptions {
-    constructor() {
+    constructor(public skip_validations: boolean = false) {
        /* empty */
    }
 }
@ -12,12 +12,10 @@ export class Human2RegexParserOptions {
 export class Human2RegexParser extends CstParser {
    private static already_init = false;

-    public nodes: { [key: string]: (idxInCallingRule?: number, ...args: unknown[]) => CstNode } = {};
-
    public parse : (idxInCallingRule?: number, ...args: unknown[]) => CstNode;

    constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
-        super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2});
+        super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });

        if (Human2RegexParser.already_init) {
            throw new Error("Only 1 instance of Human2RegexParser allowed");
@ -27,8 +25,9 @@ export class Human2RegexParser extends CstParser {
        
        const $ = this;

-        this.nodes.NumberSubStatement = $.RULE("NumberSubStatement", () => {
-            $.OR([
+        let nss_rules : IOrAlt<unknown>[] | null = null;
+        const NumberSubStatement = $.RULE("NumberSubStatement", () => {
+            $.OR(nss_rules || (nss_rules = [
                { ALT: () => $.CONSUME(T.One) },
                { ALT: () => $.CONSUME(T.Two) },
                { ALT: () => $.CONSUME(T.Three) },
@ -41,20 +40,20 @@ export class Human2RegexParser extends CstParser {
                { ALT: () => $.CONSUME(T.Ten) },
                { ALT: () => $.CONSUME(T.Zero) },
                { ALT: () => $.CONSUME(T.NumberLiteral) },
-            ]);
+            ]));
        });

        // 1, 1..2, between 1 and/to 2 inclusively/exclusively
-        this.nodes.CountSubStatement = $.RULE("CountSubStatement", () => {
+        const CountSubStatement = $.RULE("CountSubStatement", () => {
            $.OR([
                { ALT: () => {
                    $.CONSUME(T.Between);
-                    $.SUBRULE4(this.nodes.NumberSubStatement);
+                    $.SUBRULE4(NumberSubStatement);
                    $.OR3([
                        { ALT: () => $.CONSUME2(T.To) },
                        { ALT: () => $.CONSUME(T.And) }
                    ]);
-                    $.SUBRULE5(this.nodes.NumberSubStatement);
+                    $.SUBRULE5(NumberSubStatement);
                    $.OPTION4(() => $.CONSUME3(T.Times));
                    $.OPTION5(() => {
                        $.OR4([
@ -66,12 +65,12 @@ export class Human2RegexParser extends CstParser {
                
                { ALT: () => { 
                    $.OPTION2(() => $.CONSUME(T.From));
-                    $.SUBRULE2(this.nodes.NumberSubStatement);
+                    $.SUBRULE2(NumberSubStatement);
                    $.OR2([
                        { ALT: () => $.CONSUME(T.OrMore) },
                        { ALT: () => { 
                            $.CONSUME(T.To); 
-                            $.SUBRULE3(this.nodes.NumberSubStatement); 
+                            $.SUBRULE3(NumberSubStatement); 
                        }}
                    ]);
                    $.OPTION3(() => $.CONSUME2(T.Times));
@ -79,22 +78,35 @@ export class Human2RegexParser extends CstParser {

                { ALT: () => { 
                    $.OPTION(() => $.CONSUME(T.Exactly));
-                    $.SUBRULE(this.nodes.NumberSubStatement);
+                    $.SUBRULE(NumberSubStatement);
                    $.OPTION6(() => $.CONSUME(T.Times));
                }} 
            ]);
        });

-        this.nodes.MatchSubStatement = $.RULE("MatchSubStatement", () => {
-            $.OPTION(() => $.SUBRULE(this.nodes.CountSubStatement) );
+        let mss_rules : IOrAlt<unknown>[] | null = null;
+        const MatchSubStatement = $.RULE("MatchSubStatement", () => {
+            $.OPTION(() => $.SUBRULE(CountSubStatement) );
            $.OPTION2(() => $.CONSUME(T.Not));
            $.AT_LEAST_ONE_SEP({
                SEP: T.Or,
                DEF: () => {
                    $.OPTION3(() => $.CONSUME(T.A));
-                    $.OR([
-                        { ALT: () => $.CONSUME(T.Anything) },
+                    $.OR(mss_rules || (mss_rules = [
+                        { ALT: () => {
+                            $.OPTION4(() => $.CONSUME(T.From));
+                            $.CONSUME2(T.StringLiteral); 
+                            $.CONSUME(T.To);
+                            $.CONSUME3(T.StringLiteral);
+                        }},
+                        { ALT: () => {
+                            $.CONSUME(T.Between);
+                            $.CONSUME4(T.StringLiteral);
+                            $.CONSUME(T.And);
+                            $.CONSUME5(T.StringLiteral);
+                        }},
                        { ALT: () => $.CONSUME(T.StringLiteral) },
+                        { ALT: () => $.CONSUME(T.Anything) },
                        { ALT: () => $.CONSUME(T.Word) },
                        { ALT: () => $.CONSUME(T.Digit) },
                        { ALT: () => $.CONSUME(T.Character) },
@ -104,17 +116,16 @@ export class Human2RegexParser extends CstParser {
                        { ALT: () => $.CONSUME(T.Linefeed) },
                        { ALT: () => $.CONSUME(T.Newline) },
                        { ALT: () => $.CONSUME(T.CarriageReturn) },
-                    ]);
-                    
+                    ]));
                }
            });
        });

        // optionally match "+" then 1+ words
-        this.nodes.MatchStatement = $.RULE("MatchStatement", () => {
+        const MatchStatement = $.RULE("MatchStatement", () => {
            $.OPTION(() => $.CONSUME(T.Optional));
            $.CONSUME(T.Match);
-            $.SUBRULE(this.nodes.MatchSubStatement);
+            $.SUBRULE(MatchSubStatement);
            $.MANY(() => {
                $.OR([
                    { ALT: () => { 
@ -124,31 +135,32 @@ export class Human2RegexParser extends CstParser {
                    { ALT: () => $.CONSUME(T.And) },
                ]);
                $.OPTION3(() => $.CONSUME2(T.Optional));
-                $.SUBRULE2(this.nodes.MatchSubStatement);
+                $.SUBRULE2(MatchSubStatement);
            });
            $.CONSUME(T.EndOfLine);
        });

        // using global matching
-        this.nodes.UsingStatement = $.RULE("UsingStatement", () => {
+        let us_rules : IOrAlt<unknown>[] | null = null;
+        const UsingStatement = $.RULE("UsingStatement", () => {
            $.CONSUME(T.Using);
            $.AT_LEAST_ONE_SEP({
                SEP: T.And,
                DEF: () => {
-                    $.OR([
+                    $.OR(us_rules || (us_rules = [
                        { ALT: () => $.CONSUME(T.Multiline) },
                        { ALT: () => $.CONSUME(T.Global) },
                        { ALT: () => $.CONSUME(T.CaseInsensitive) },
                        { ALT: () => $.CONSUME(T.CaseSensitive) },
                        { ALT: () => $.CONSUME(T.Exact) }
-                    ]);
+                    ]));
                    $.OPTION(() => $.CONSUME(T.Matching));
                }
            });
            $.CONSUME(T.EndOfLine);
        });

-        this.nodes.GroupStatement = $.RULE("GroupStatement", () => {
+        const GroupStatement = $.RULE("GroupStatement", () => {
            $.OPTION2(() => $.CONSUME(T.Optional));
            $.CONSUME(T.Create);
            $.CONSUME(T.A);
@ -160,36 +172,36 @@ export class Human2RegexParser extends CstParser {
            });
            $.CONSUME2(T.EndOfLine);
            $.CONSUME(T.Indent);
-            $.AT_LEAST_ONE(this.nodes.Statement);
+            $.AT_LEAST_ONE(Statement);
            $.CONSUME(T.Outdent);
        });

-        this.nodes.RepeatStatement = $.RULE("RepeatStatement", () => {
+        const RepeatStatement = $.RULE("RepeatStatement", () => {
            $.OPTION3(() => $.CONSUME(T.Optional));
            $.CONSUME(T.Repeat);
-            $.OPTION(() => $.SUBRULE(this.nodes.CountSubStatement));
+            $.OPTION(() => $.SUBRULE(CountSubStatement));
            $.CONSUME3(T.EndOfLine);
            $.CONSUME(T.Indent);
-            $.AT_LEAST_ONE(this.nodes.Statement);
+            $.AT_LEAST_ONE(Statement);
            $.CONSUME(T.Outdent);
        });

-        this.nodes.Statement = $.RULE("Statement", () => {
+        const Statement = $.RULE("Statement", () => {
            $.OR([
-                { ALT: () => $.SUBRULE(this.nodes.MatchStatement) },
-                { ALT: () => $.SUBRULE(this.nodes.GroupStatement) },
-                { ALT: () => $.SUBRULE(this.nodes.RepeatStatement) }
+                { ALT: () => $.SUBRULE(MatchStatement) },
+                { ALT: () => $.SUBRULE(GroupStatement) },
+                { ALT: () => $.SUBRULE(RepeatStatement) }
            ]);
        });

-        this.nodes.Regex = $.RULE("Regex", () => {
-            $.MANY(() => $.SUBRULE(this.nodes.UsingStatement));
-            $.MANY2(() => $.SUBRULE(this.nodes.Statement) );
+        const Regex = $.RULE("Regex", () => {
+            $.MANY(() => $.SUBRULE(UsingStatement));
+            $.MANY2(() => $.SUBRULE(Statement) );
        });

        this.performSelfAnalysis();

-        this.parse = this.nodes.Regex;
+        this.parse = Regex;
    }

    //public set_options(options: Human2RegexParserOptions) : void {
--- a/src/script.ts
+++ b/src/script.ts
@ -1,10 +1,10 @@
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
 "use strict";

 import "./style.css";

-import { Human2RegexLexer } from "./tokenizer";
-import { Human2RegexParser } from "./parser";
-
+import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
+import { Human2RegexParser, Human2RegexParserOptions } from "./parser";

 /*
 $(function() {
@ -12,8 +12,8 @@ $(function() {
 });
 */

-const lexer = new Human2RegexLexer();
-const parser = new Human2RegexParser();
+const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
+const parser = new Human2RegexParser(new Human2RegexParserOptions(true));

 const result = lexer.tokenize(`
 // H2R supports // # and /**/ as comments
@ -58,11 +58,8 @@ create an optional group


 console.log(result.errors);
+
 parser.input = result.tokens;
 const regex = parser.parse();
-console.log(regex);
-console.log(parser.errors);
-
-//interpreter.visit(regex);
-
-//parser.getBaseCstVisitorConstructor();
+console.log(JSON.stringify(regex.children, undefined, 4));
+console.log(parser.errors);
--- a/src/tokens.ts
+++ b/src/tokens.ts
@ -1,3 +1,5 @@
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
 import { createToken, Lexer } from "chevrotain";

 export const Zero = createToken({name: "Zero", pattern: /zero/i });
--- a/src/utilities.ts
+++ b/src/utilities.ts
@ -1,3 +1,5 @@
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
 export function last<T>(array: T[]) : T {
    return array[array.length-1];
 }