1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 12:30:09 -07:00

Parser refactored and made faster

Though to be honest, it's already really fast
This commit is contained in:
Patrick Demian 2020-10-29 03:55:19 -04:00
parent bddc5d4f3b
commit 31e9872c4f
6 changed files with 71 additions and 55 deletions

9
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -11,7 +11,7 @@ export enum IndentType {
} }
export class Human2RegexLexerOptions { export class Human2RegexLexerOptions {
constructor(public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) { constructor(public skip_validations = false, public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
/* empty */ /* empty */
} }
} }
@ -52,7 +52,7 @@ export class Human2RegexLexer {
Indent.PATTERN = indent_regex; Indent.PATTERN = indent_regex;
this.lexer = new Lexer(AllTokens, { ensureOptimizations: true }); this.lexer = new Lexer(AllTokens, { ensureOptimizations: true, skipValidations: options.skip_validations });
} }
private lex_error(token: IToken) : ILexingError { private lex_error(token: IToken) : ILexingError {

View File

@ -1,10 +1,10 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { CstParser, CstNode } from "chevrotain"; import { CstParser, CstNode, IOrAlt } from "chevrotain";
import * as T from "./tokens"; import * as T from "./tokens";
export class Human2RegexParserOptions { export class Human2RegexParserOptions {
constructor() { constructor(public skip_validations: boolean = false) {
/* empty */ /* empty */
} }
} }
@ -12,12 +12,10 @@ export class Human2RegexParserOptions {
export class Human2RegexParser extends CstParser { export class Human2RegexParser extends CstParser {
private static already_init = false; private static already_init = false;
public nodes: { [key: string]: (idxInCallingRule?: number, ...args: unknown[]) => CstNode } = {};
public parse : (idxInCallingRule?: number, ...args: unknown[]) => CstNode; public parse : (idxInCallingRule?: number, ...args: unknown[]) => CstNode;
constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) { constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2}); super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });
if (Human2RegexParser.already_init) { if (Human2RegexParser.already_init) {
throw new Error("Only 1 instance of Human2RegexParser allowed"); throw new Error("Only 1 instance of Human2RegexParser allowed");
@ -27,8 +25,9 @@ export class Human2RegexParser extends CstParser {
const $ = this; const $ = this;
this.nodes.NumberSubStatement = $.RULE("NumberSubStatement", () => { let nss_rules : IOrAlt<unknown>[] | null = null;
$.OR([ const NumberSubStatement = $.RULE("NumberSubStatement", () => {
$.OR(nss_rules || (nss_rules = [
{ ALT: () => $.CONSUME(T.One) }, { ALT: () => $.CONSUME(T.One) },
{ ALT: () => $.CONSUME(T.Two) }, { ALT: () => $.CONSUME(T.Two) },
{ ALT: () => $.CONSUME(T.Three) }, { ALT: () => $.CONSUME(T.Three) },
@ -41,20 +40,20 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => $.CONSUME(T.Ten) }, { ALT: () => $.CONSUME(T.Ten) },
{ ALT: () => $.CONSUME(T.Zero) }, { ALT: () => $.CONSUME(T.Zero) },
{ ALT: () => $.CONSUME(T.NumberLiteral) }, { ALT: () => $.CONSUME(T.NumberLiteral) },
]); ]));
}); });
// 1, 1..2, between 1 and/to 2 inclusively/exclusively // 1, 1..2, between 1 and/to 2 inclusively/exclusively
this.nodes.CountSubStatement = $.RULE("CountSubStatement", () => { const CountSubStatement = $.RULE("CountSubStatement", () => {
$.OR([ $.OR([
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Between); $.CONSUME(T.Between);
$.SUBRULE4(this.nodes.NumberSubStatement); $.SUBRULE4(NumberSubStatement);
$.OR3([ $.OR3([
{ ALT: () => $.CONSUME2(T.To) }, { ALT: () => $.CONSUME2(T.To) },
{ ALT: () => $.CONSUME(T.And) } { ALT: () => $.CONSUME(T.And) }
]); ]);
$.SUBRULE5(this.nodes.NumberSubStatement); $.SUBRULE5(NumberSubStatement);
$.OPTION4(() => $.CONSUME3(T.Times)); $.OPTION4(() => $.CONSUME3(T.Times));
$.OPTION5(() => { $.OPTION5(() => {
$.OR4([ $.OR4([
@ -66,12 +65,12 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => { { ALT: () => {
$.OPTION2(() => $.CONSUME(T.From)); $.OPTION2(() => $.CONSUME(T.From));
$.SUBRULE2(this.nodes.NumberSubStatement); $.SUBRULE2(NumberSubStatement);
$.OR2([ $.OR2([
{ ALT: () => $.CONSUME(T.OrMore) }, { ALT: () => $.CONSUME(T.OrMore) },
{ ALT: () => { { ALT: () => {
$.CONSUME(T.To); $.CONSUME(T.To);
$.SUBRULE3(this.nodes.NumberSubStatement); $.SUBRULE3(NumberSubStatement);
}} }}
]); ]);
$.OPTION3(() => $.CONSUME2(T.Times)); $.OPTION3(() => $.CONSUME2(T.Times));
@ -79,22 +78,35 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => { { ALT: () => {
$.OPTION(() => $.CONSUME(T.Exactly)); $.OPTION(() => $.CONSUME(T.Exactly));
$.SUBRULE(this.nodes.NumberSubStatement); $.SUBRULE(NumberSubStatement);
$.OPTION6(() => $.CONSUME(T.Times)); $.OPTION6(() => $.CONSUME(T.Times));
}} }}
]); ]);
}); });
this.nodes.MatchSubStatement = $.RULE("MatchSubStatement", () => { let mss_rules : IOrAlt<unknown>[] | null = null;
$.OPTION(() => $.SUBRULE(this.nodes.CountSubStatement) ); const MatchSubStatement = $.RULE("MatchSubStatement", () => {
$.OPTION(() => $.SUBRULE(CountSubStatement) );
$.OPTION2(() => $.CONSUME(T.Not)); $.OPTION2(() => $.CONSUME(T.Not));
$.AT_LEAST_ONE_SEP({ $.AT_LEAST_ONE_SEP({
SEP: T.Or, SEP: T.Or,
DEF: () => { DEF: () => {
$.OPTION3(() => $.CONSUME(T.A)); $.OPTION3(() => $.CONSUME(T.A));
$.OR([ $.OR(mss_rules || (mss_rules = [
{ ALT: () => $.CONSUME(T.Anything) }, { ALT: () => {
$.OPTION4(() => $.CONSUME(T.From));
$.CONSUME2(T.StringLiteral);
$.CONSUME(T.To);
$.CONSUME3(T.StringLiteral);
}},
{ ALT: () => {
$.CONSUME(T.Between);
$.CONSUME4(T.StringLiteral);
$.CONSUME(T.And);
$.CONSUME5(T.StringLiteral);
}},
{ ALT: () => $.CONSUME(T.StringLiteral) }, { ALT: () => $.CONSUME(T.StringLiteral) },
{ ALT: () => $.CONSUME(T.Anything) },
{ ALT: () => $.CONSUME(T.Word) }, { ALT: () => $.CONSUME(T.Word) },
{ ALT: () => $.CONSUME(T.Digit) }, { ALT: () => $.CONSUME(T.Digit) },
{ ALT: () => $.CONSUME(T.Character) }, { ALT: () => $.CONSUME(T.Character) },
@ -104,17 +116,16 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => $.CONSUME(T.Linefeed) }, { ALT: () => $.CONSUME(T.Linefeed) },
{ ALT: () => $.CONSUME(T.Newline) }, { ALT: () => $.CONSUME(T.Newline) },
{ ALT: () => $.CONSUME(T.CarriageReturn) }, { ALT: () => $.CONSUME(T.CarriageReturn) },
]); ]));
} }
}); });
}); });
// optionally match "+" then 1+ words // optionally match "+" then 1+ words
this.nodes.MatchStatement = $.RULE("MatchStatement", () => { const MatchStatement = $.RULE("MatchStatement", () => {
$.OPTION(() => $.CONSUME(T.Optional)); $.OPTION(() => $.CONSUME(T.Optional));
$.CONSUME(T.Match); $.CONSUME(T.Match);
$.SUBRULE(this.nodes.MatchSubStatement); $.SUBRULE(MatchSubStatement);
$.MANY(() => { $.MANY(() => {
$.OR([ $.OR([
{ ALT: () => { { ALT: () => {
@ -124,31 +135,32 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => $.CONSUME(T.And) }, { ALT: () => $.CONSUME(T.And) },
]); ]);
$.OPTION3(() => $.CONSUME2(T.Optional)); $.OPTION3(() => $.CONSUME2(T.Optional));
$.SUBRULE2(this.nodes.MatchSubStatement); $.SUBRULE2(MatchSubStatement);
}); });
$.CONSUME(T.EndOfLine); $.CONSUME(T.EndOfLine);
}); });
// using global matching // using global matching
this.nodes.UsingStatement = $.RULE("UsingStatement", () => { let us_rules : IOrAlt<unknown>[] | null = null;
const UsingStatement = $.RULE("UsingStatement", () => {
$.CONSUME(T.Using); $.CONSUME(T.Using);
$.AT_LEAST_ONE_SEP({ $.AT_LEAST_ONE_SEP({
SEP: T.And, SEP: T.And,
DEF: () => { DEF: () => {
$.OR([ $.OR(us_rules || (us_rules = [
{ ALT: () => $.CONSUME(T.Multiline) }, { ALT: () => $.CONSUME(T.Multiline) },
{ ALT: () => $.CONSUME(T.Global) }, { ALT: () => $.CONSUME(T.Global) },
{ ALT: () => $.CONSUME(T.CaseInsensitive) }, { ALT: () => $.CONSUME(T.CaseInsensitive) },
{ ALT: () => $.CONSUME(T.CaseSensitive) }, { ALT: () => $.CONSUME(T.CaseSensitive) },
{ ALT: () => $.CONSUME(T.Exact) } { ALT: () => $.CONSUME(T.Exact) }
]); ]));
$.OPTION(() => $.CONSUME(T.Matching)); $.OPTION(() => $.CONSUME(T.Matching));
} }
}); });
$.CONSUME(T.EndOfLine); $.CONSUME(T.EndOfLine);
}); });
this.nodes.GroupStatement = $.RULE("GroupStatement", () => { const GroupStatement = $.RULE("GroupStatement", () => {
$.OPTION2(() => $.CONSUME(T.Optional)); $.OPTION2(() => $.CONSUME(T.Optional));
$.CONSUME(T.Create); $.CONSUME(T.Create);
$.CONSUME(T.A); $.CONSUME(T.A);
@ -160,36 +172,36 @@ export class Human2RegexParser extends CstParser {
}); });
$.CONSUME2(T.EndOfLine); $.CONSUME2(T.EndOfLine);
$.CONSUME(T.Indent); $.CONSUME(T.Indent);
$.AT_LEAST_ONE(this.nodes.Statement); $.AT_LEAST_ONE(Statement);
$.CONSUME(T.Outdent); $.CONSUME(T.Outdent);
}); });
this.nodes.RepeatStatement = $.RULE("RepeatStatement", () => { const RepeatStatement = $.RULE("RepeatStatement", () => {
$.OPTION3(() => $.CONSUME(T.Optional)); $.OPTION3(() => $.CONSUME(T.Optional));
$.CONSUME(T.Repeat); $.CONSUME(T.Repeat);
$.OPTION(() => $.SUBRULE(this.nodes.CountSubStatement)); $.OPTION(() => $.SUBRULE(CountSubStatement));
$.CONSUME3(T.EndOfLine); $.CONSUME3(T.EndOfLine);
$.CONSUME(T.Indent); $.CONSUME(T.Indent);
$.AT_LEAST_ONE(this.nodes.Statement); $.AT_LEAST_ONE(Statement);
$.CONSUME(T.Outdent); $.CONSUME(T.Outdent);
}); });
this.nodes.Statement = $.RULE("Statement", () => { const Statement = $.RULE("Statement", () => {
$.OR([ $.OR([
{ ALT: () => $.SUBRULE(this.nodes.MatchStatement) }, { ALT: () => $.SUBRULE(MatchStatement) },
{ ALT: () => $.SUBRULE(this.nodes.GroupStatement) }, { ALT: () => $.SUBRULE(GroupStatement) },
{ ALT: () => $.SUBRULE(this.nodes.RepeatStatement) } { ALT: () => $.SUBRULE(RepeatStatement) }
]); ]);
}); });
this.nodes.Regex = $.RULE("Regex", () => { const Regex = $.RULE("Regex", () => {
$.MANY(() => $.SUBRULE(this.nodes.UsingStatement)); $.MANY(() => $.SUBRULE(UsingStatement));
$.MANY2(() => $.SUBRULE(this.nodes.Statement) ); $.MANY2(() => $.SUBRULE(Statement) );
}); });
this.performSelfAnalysis(); this.performSelfAnalysis();
this.parse = this.nodes.Regex; this.parse = Regex;
} }
//public set_options(options: Human2RegexParserOptions) : void { //public set_options(options: Human2RegexParserOptions) : void {

View File

@ -1,10 +1,10 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
"use strict"; "use strict";
import "./style.css"; import "./style.css";
import { Human2RegexLexer } from "./tokenizer"; import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
import { Human2RegexParser } from "./parser"; import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
/* /*
$(function() { $(function() {
@ -12,8 +12,8 @@ $(function() {
}); });
*/ */
const lexer = new Human2RegexLexer(); const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
const parser = new Human2RegexParser(); const parser = new Human2RegexParser(new Human2RegexParserOptions(true));
const result = lexer.tokenize(` const result = lexer.tokenize(`
// H2R supports // # and /**/ as comments // H2R supports // # and /**/ as comments
@ -58,11 +58,8 @@ create an optional group
console.log(result.errors); console.log(result.errors);
parser.input = result.tokens; parser.input = result.tokens;
const regex = parser.parse(); const regex = parser.parse();
console.log(regex); console.log(JSON.stringify(regex.children, undefined, 4));
console.log(parser.errors); console.log(parser.errors);
//interpreter.visit(regex);
//parser.getBaseCstVisitorConstructor();

View File

@ -1,3 +1,5 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { createToken, Lexer } from "chevrotain"; import { createToken, Lexer } from "chevrotain";
export const Zero = createToken({name: "Zero", pattern: /zero/i }); export const Zero = createToken({name: "Zero", pattern: /zero/i });

View File

@ -1,3 +1,5 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
export function last<T>(array: T[]) : T { export function last<T>(array: T[]) : T {
return array[array.length-1]; return array[array.length-1];
} }