1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 12:30:09 -07:00

Parser refactored and made faster

Though to be honest, it's already really fast
This commit is contained in:
Patrick Demian 2020-10-29 03:55:19 -04:00
parent bddc5d4f3b
commit 31e9872c4f
6 changed files with 71 additions and 55 deletions

9
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -11,7 +11,7 @@ export enum IndentType {
}
export class Human2RegexLexerOptions {
constructor(public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
constructor(public skip_validations = false, public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
/* empty */
}
}
@ -52,7 +52,7 @@ export class Human2RegexLexer {
Indent.PATTERN = indent_regex;
this.lexer = new Lexer(AllTokens, { ensureOptimizations: true });
this.lexer = new Lexer(AllTokens, { ensureOptimizations: true, skipValidations: options.skip_validations });
}
private lex_error(token: IToken) : ILexingError {

View File

@ -1,10 +1,10 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { CstParser, CstNode } from "chevrotain";
import { CstParser, CstNode, IOrAlt } from "chevrotain";
import * as T from "./tokens";
export class Human2RegexParserOptions {
constructor() {
constructor(public skip_validations: boolean = false) {
/* empty */
}
}
@ -12,12 +12,10 @@ export class Human2RegexParserOptions {
export class Human2RegexParser extends CstParser {
private static already_init = false;
public nodes: { [key: string]: (idxInCallingRule?: number, ...args: unknown[]) => CstNode } = {};
public parse : (idxInCallingRule?: number, ...args: unknown[]) => CstNode;
constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2});
super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });
if (Human2RegexParser.already_init) {
throw new Error("Only 1 instance of Human2RegexParser allowed");
@ -27,8 +25,9 @@ export class Human2RegexParser extends CstParser {
const $ = this;
this.nodes.NumberSubStatement = $.RULE("NumberSubStatement", () => {
$.OR([
let nss_rules : IOrAlt<unknown>[] | null = null;
const NumberSubStatement = $.RULE("NumberSubStatement", () => {
$.OR(nss_rules || (nss_rules = [
{ ALT: () => $.CONSUME(T.One) },
{ ALT: () => $.CONSUME(T.Two) },
{ ALT: () => $.CONSUME(T.Three) },
@ -41,20 +40,20 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => $.CONSUME(T.Ten) },
{ ALT: () => $.CONSUME(T.Zero) },
{ ALT: () => $.CONSUME(T.NumberLiteral) },
]);
]));
});
// 1, 1..2, between 1 and/to 2 inclusively/exclusively
this.nodes.CountSubStatement = $.RULE("CountSubStatement", () => {
const CountSubStatement = $.RULE("CountSubStatement", () => {
$.OR([
{ ALT: () => {
$.CONSUME(T.Between);
$.SUBRULE4(this.nodes.NumberSubStatement);
$.SUBRULE4(NumberSubStatement);
$.OR3([
{ ALT: () => $.CONSUME2(T.To) },
{ ALT: () => $.CONSUME(T.And) }
]);
$.SUBRULE5(this.nodes.NumberSubStatement);
$.SUBRULE5(NumberSubStatement);
$.OPTION4(() => $.CONSUME3(T.Times));
$.OPTION5(() => {
$.OR4([
@ -66,12 +65,12 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => {
$.OPTION2(() => $.CONSUME(T.From));
$.SUBRULE2(this.nodes.NumberSubStatement);
$.SUBRULE2(NumberSubStatement);
$.OR2([
{ ALT: () => $.CONSUME(T.OrMore) },
{ ALT: () => {
$.CONSUME(T.To);
$.SUBRULE3(this.nodes.NumberSubStatement);
$.SUBRULE3(NumberSubStatement);
}}
]);
$.OPTION3(() => $.CONSUME2(T.Times));
@ -79,22 +78,35 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => {
$.OPTION(() => $.CONSUME(T.Exactly));
$.SUBRULE(this.nodes.NumberSubStatement);
$.SUBRULE(NumberSubStatement);
$.OPTION6(() => $.CONSUME(T.Times));
}}
]);
});
this.nodes.MatchSubStatement = $.RULE("MatchSubStatement", () => {
$.OPTION(() => $.SUBRULE(this.nodes.CountSubStatement) );
let mss_rules : IOrAlt<unknown>[] | null = null;
const MatchSubStatement = $.RULE("MatchSubStatement", () => {
$.OPTION(() => $.SUBRULE(CountSubStatement) );
$.OPTION2(() => $.CONSUME(T.Not));
$.AT_LEAST_ONE_SEP({
SEP: T.Or,
DEF: () => {
$.OPTION3(() => $.CONSUME(T.A));
$.OR([
{ ALT: () => $.CONSUME(T.Anything) },
$.OR(mss_rules || (mss_rules = [
{ ALT: () => {
$.OPTION4(() => $.CONSUME(T.From));
$.CONSUME2(T.StringLiteral);
$.CONSUME(T.To);
$.CONSUME3(T.StringLiteral);
}},
{ ALT: () => {
$.CONSUME(T.Between);
$.CONSUME4(T.StringLiteral);
$.CONSUME(T.And);
$.CONSUME5(T.StringLiteral);
}},
{ ALT: () => $.CONSUME(T.StringLiteral) },
{ ALT: () => $.CONSUME(T.Anything) },
{ ALT: () => $.CONSUME(T.Word) },
{ ALT: () => $.CONSUME(T.Digit) },
{ ALT: () => $.CONSUME(T.Character) },
@ -104,17 +116,16 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => $.CONSUME(T.Linefeed) },
{ ALT: () => $.CONSUME(T.Newline) },
{ ALT: () => $.CONSUME(T.CarriageReturn) },
]);
]));
}
});
});
// optionally match "+" then 1+ words
this.nodes.MatchStatement = $.RULE("MatchStatement", () => {
const MatchStatement = $.RULE("MatchStatement", () => {
$.OPTION(() => $.CONSUME(T.Optional));
$.CONSUME(T.Match);
$.SUBRULE(this.nodes.MatchSubStatement);
$.SUBRULE(MatchSubStatement);
$.MANY(() => {
$.OR([
{ ALT: () => {
@ -124,31 +135,32 @@ export class Human2RegexParser extends CstParser {
{ ALT: () => $.CONSUME(T.And) },
]);
$.OPTION3(() => $.CONSUME2(T.Optional));
$.SUBRULE2(this.nodes.MatchSubStatement);
$.SUBRULE2(MatchSubStatement);
});
$.CONSUME(T.EndOfLine);
});
// using global matching
this.nodes.UsingStatement = $.RULE("UsingStatement", () => {
let us_rules : IOrAlt<unknown>[] | null = null;
const UsingStatement = $.RULE("UsingStatement", () => {
$.CONSUME(T.Using);
$.AT_LEAST_ONE_SEP({
SEP: T.And,
DEF: () => {
$.OR([
$.OR(us_rules || (us_rules = [
{ ALT: () => $.CONSUME(T.Multiline) },
{ ALT: () => $.CONSUME(T.Global) },
{ ALT: () => $.CONSUME(T.CaseInsensitive) },
{ ALT: () => $.CONSUME(T.CaseSensitive) },
{ ALT: () => $.CONSUME(T.Exact) }
]);
]));
$.OPTION(() => $.CONSUME(T.Matching));
}
});
$.CONSUME(T.EndOfLine);
});
this.nodes.GroupStatement = $.RULE("GroupStatement", () => {
const GroupStatement = $.RULE("GroupStatement", () => {
$.OPTION2(() => $.CONSUME(T.Optional));
$.CONSUME(T.Create);
$.CONSUME(T.A);
@ -160,36 +172,36 @@ export class Human2RegexParser extends CstParser {
});
$.CONSUME2(T.EndOfLine);
$.CONSUME(T.Indent);
$.AT_LEAST_ONE(this.nodes.Statement);
$.AT_LEAST_ONE(Statement);
$.CONSUME(T.Outdent);
});
this.nodes.RepeatStatement = $.RULE("RepeatStatement", () => {
const RepeatStatement = $.RULE("RepeatStatement", () => {
$.OPTION3(() => $.CONSUME(T.Optional));
$.CONSUME(T.Repeat);
$.OPTION(() => $.SUBRULE(this.nodes.CountSubStatement));
$.OPTION(() => $.SUBRULE(CountSubStatement));
$.CONSUME3(T.EndOfLine);
$.CONSUME(T.Indent);
$.AT_LEAST_ONE(this.nodes.Statement);
$.AT_LEAST_ONE(Statement);
$.CONSUME(T.Outdent);
});
this.nodes.Statement = $.RULE("Statement", () => {
const Statement = $.RULE("Statement", () => {
$.OR([
{ ALT: () => $.SUBRULE(this.nodes.MatchStatement) },
{ ALT: () => $.SUBRULE(this.nodes.GroupStatement) },
{ ALT: () => $.SUBRULE(this.nodes.RepeatStatement) }
{ ALT: () => $.SUBRULE(MatchStatement) },
{ ALT: () => $.SUBRULE(GroupStatement) },
{ ALT: () => $.SUBRULE(RepeatStatement) }
]);
});
this.nodes.Regex = $.RULE("Regex", () => {
$.MANY(() => $.SUBRULE(this.nodes.UsingStatement));
$.MANY2(() => $.SUBRULE(this.nodes.Statement) );
const Regex = $.RULE("Regex", () => {
$.MANY(() => $.SUBRULE(UsingStatement));
$.MANY2(() => $.SUBRULE(Statement) );
});
this.performSelfAnalysis();
this.parse = this.nodes.Regex;
this.parse = Regex;
}
//public set_options(options: Human2RegexParserOptions) : void {

View File

@ -1,10 +1,10 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
"use strict";
import "./style.css";
import { Human2RegexLexer } from "./tokenizer";
import { Human2RegexParser } from "./parser";
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
/*
$(function() {
@ -12,8 +12,8 @@ $(function() {
});
*/
const lexer = new Human2RegexLexer();
const parser = new Human2RegexParser();
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
const parser = new Human2RegexParser(new Human2RegexParserOptions(true));
const result = lexer.tokenize(`
// H2R supports // # and /**/ as comments
@ -58,11 +58,8 @@ create an optional group
console.log(result.errors);
parser.input = result.tokens;
const regex = parser.parse();
console.log(regex);
console.log(parser.errors);
//interpreter.visit(regex);
//parser.getBaseCstVisitorConstructor();
console.log(JSON.stringify(regex.children, undefined, 4));
console.log(parser.errors);

View File

@ -1,3 +1,5 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { createToken, Lexer } from "chevrotain";
export const Zero = createToken({name: "Zero", pattern: /zero/i });

View File

@ -1,3 +1,5 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
export function last<T>(array: T[]) : T {
return array[array.length-1];
}