mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 04:20:35 -07:00
Parser refactored and made faster
Though to be honest, it's already really fast
This commit is contained in:
parent
bddc5d4f3b
commit
31e9872c4f
9
docs/bundle.min.js
vendored
9
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
@ -11,7 +11,7 @@ export enum IndentType {
|
||||
}
|
||||
|
||||
export class Human2RegexLexerOptions {
|
||||
constructor(public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
|
||||
constructor(public skip_validations = false, public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
@ -52,7 +52,7 @@ export class Human2RegexLexer {
|
||||
|
||||
Indent.PATTERN = indent_regex;
|
||||
|
||||
this.lexer = new Lexer(AllTokens, { ensureOptimizations: true });
|
||||
this.lexer = new Lexer(AllTokens, { ensureOptimizations: true, skipValidations: options.skip_validations });
|
||||
}
|
||||
|
||||
private lex_error(token: IToken) : ILexingError {
|
@ -1,10 +1,10 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
import { CstParser, CstNode } from "chevrotain";
|
||||
import { CstParser, CstNode, IOrAlt } from "chevrotain";
|
||||
import * as T from "./tokens";
|
||||
|
||||
export class Human2RegexParserOptions {
|
||||
constructor() {
|
||||
constructor(public skip_validations: boolean = false) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
@ -12,12 +12,10 @@ export class Human2RegexParserOptions {
|
||||
export class Human2RegexParser extends CstParser {
|
||||
private static already_init = false;
|
||||
|
||||
public nodes: { [key: string]: (idxInCallingRule?: number, ...args: unknown[]) => CstNode } = {};
|
||||
|
||||
public parse : (idxInCallingRule?: number, ...args: unknown[]) => CstNode;
|
||||
|
||||
constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
|
||||
super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2});
|
||||
super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });
|
||||
|
||||
if (Human2RegexParser.already_init) {
|
||||
throw new Error("Only 1 instance of Human2RegexParser allowed");
|
||||
@ -27,8 +25,9 @@ export class Human2RegexParser extends CstParser {
|
||||
|
||||
const $ = this;
|
||||
|
||||
this.nodes.NumberSubStatement = $.RULE("NumberSubStatement", () => {
|
||||
$.OR([
|
||||
let nss_rules : IOrAlt<unknown>[] | null = null;
|
||||
const NumberSubStatement = $.RULE("NumberSubStatement", () => {
|
||||
$.OR(nss_rules || (nss_rules = [
|
||||
{ ALT: () => $.CONSUME(T.One) },
|
||||
{ ALT: () => $.CONSUME(T.Two) },
|
||||
{ ALT: () => $.CONSUME(T.Three) },
|
||||
@ -41,20 +40,20 @@ export class Human2RegexParser extends CstParser {
|
||||
{ ALT: () => $.CONSUME(T.Ten) },
|
||||
{ ALT: () => $.CONSUME(T.Zero) },
|
||||
{ ALT: () => $.CONSUME(T.NumberLiteral) },
|
||||
]);
|
||||
]));
|
||||
});
|
||||
|
||||
// 1, 1..2, between 1 and/to 2 inclusively/exclusively
|
||||
this.nodes.CountSubStatement = $.RULE("CountSubStatement", () => {
|
||||
const CountSubStatement = $.RULE("CountSubStatement", () => {
|
||||
$.OR([
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Between);
|
||||
$.SUBRULE4(this.nodes.NumberSubStatement);
|
||||
$.SUBRULE4(NumberSubStatement);
|
||||
$.OR3([
|
||||
{ ALT: () => $.CONSUME2(T.To) },
|
||||
{ ALT: () => $.CONSUME(T.And) }
|
||||
]);
|
||||
$.SUBRULE5(this.nodes.NumberSubStatement);
|
||||
$.SUBRULE5(NumberSubStatement);
|
||||
$.OPTION4(() => $.CONSUME3(T.Times));
|
||||
$.OPTION5(() => {
|
||||
$.OR4([
|
||||
@ -66,12 +65,12 @@ export class Human2RegexParser extends CstParser {
|
||||
|
||||
{ ALT: () => {
|
||||
$.OPTION2(() => $.CONSUME(T.From));
|
||||
$.SUBRULE2(this.nodes.NumberSubStatement);
|
||||
$.SUBRULE2(NumberSubStatement);
|
||||
$.OR2([
|
||||
{ ALT: () => $.CONSUME(T.OrMore) },
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.To);
|
||||
$.SUBRULE3(this.nodes.NumberSubStatement);
|
||||
$.SUBRULE3(NumberSubStatement);
|
||||
}}
|
||||
]);
|
||||
$.OPTION3(() => $.CONSUME2(T.Times));
|
||||
@ -79,22 +78,35 @@ export class Human2RegexParser extends CstParser {
|
||||
|
||||
{ ALT: () => {
|
||||
$.OPTION(() => $.CONSUME(T.Exactly));
|
||||
$.SUBRULE(this.nodes.NumberSubStatement);
|
||||
$.SUBRULE(NumberSubStatement);
|
||||
$.OPTION6(() => $.CONSUME(T.Times));
|
||||
}}
|
||||
]);
|
||||
});
|
||||
|
||||
this.nodes.MatchSubStatement = $.RULE("MatchSubStatement", () => {
|
||||
$.OPTION(() => $.SUBRULE(this.nodes.CountSubStatement) );
|
||||
let mss_rules : IOrAlt<unknown>[] | null = null;
|
||||
const MatchSubStatement = $.RULE("MatchSubStatement", () => {
|
||||
$.OPTION(() => $.SUBRULE(CountSubStatement) );
|
||||
$.OPTION2(() => $.CONSUME(T.Not));
|
||||
$.AT_LEAST_ONE_SEP({
|
||||
SEP: T.Or,
|
||||
DEF: () => {
|
||||
$.OPTION3(() => $.CONSUME(T.A));
|
||||
$.OR([
|
||||
{ ALT: () => $.CONSUME(T.Anything) },
|
||||
$.OR(mss_rules || (mss_rules = [
|
||||
{ ALT: () => {
|
||||
$.OPTION4(() => $.CONSUME(T.From));
|
||||
$.CONSUME2(T.StringLiteral);
|
||||
$.CONSUME(T.To);
|
||||
$.CONSUME3(T.StringLiteral);
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Between);
|
||||
$.CONSUME4(T.StringLiteral);
|
||||
$.CONSUME(T.And);
|
||||
$.CONSUME5(T.StringLiteral);
|
||||
}},
|
||||
{ ALT: () => $.CONSUME(T.StringLiteral) },
|
||||
{ ALT: () => $.CONSUME(T.Anything) },
|
||||
{ ALT: () => $.CONSUME(T.Word) },
|
||||
{ ALT: () => $.CONSUME(T.Digit) },
|
||||
{ ALT: () => $.CONSUME(T.Character) },
|
||||
@ -104,17 +116,16 @@ export class Human2RegexParser extends CstParser {
|
||||
{ ALT: () => $.CONSUME(T.Linefeed) },
|
||||
{ ALT: () => $.CONSUME(T.Newline) },
|
||||
{ ALT: () => $.CONSUME(T.CarriageReturn) },
|
||||
]);
|
||||
|
||||
]));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// optionally match "+" then 1+ words
|
||||
this.nodes.MatchStatement = $.RULE("MatchStatement", () => {
|
||||
const MatchStatement = $.RULE("MatchStatement", () => {
|
||||
$.OPTION(() => $.CONSUME(T.Optional));
|
||||
$.CONSUME(T.Match);
|
||||
$.SUBRULE(this.nodes.MatchSubStatement);
|
||||
$.SUBRULE(MatchSubStatement);
|
||||
$.MANY(() => {
|
||||
$.OR([
|
||||
{ ALT: () => {
|
||||
@ -124,31 +135,32 @@ export class Human2RegexParser extends CstParser {
|
||||
{ ALT: () => $.CONSUME(T.And) },
|
||||
]);
|
||||
$.OPTION3(() => $.CONSUME2(T.Optional));
|
||||
$.SUBRULE2(this.nodes.MatchSubStatement);
|
||||
$.SUBRULE2(MatchSubStatement);
|
||||
});
|
||||
$.CONSUME(T.EndOfLine);
|
||||
});
|
||||
|
||||
// using global matching
|
||||
this.nodes.UsingStatement = $.RULE("UsingStatement", () => {
|
||||
let us_rules : IOrAlt<unknown>[] | null = null;
|
||||
const UsingStatement = $.RULE("UsingStatement", () => {
|
||||
$.CONSUME(T.Using);
|
||||
$.AT_LEAST_ONE_SEP({
|
||||
SEP: T.And,
|
||||
DEF: () => {
|
||||
$.OR([
|
||||
$.OR(us_rules || (us_rules = [
|
||||
{ ALT: () => $.CONSUME(T.Multiline) },
|
||||
{ ALT: () => $.CONSUME(T.Global) },
|
||||
{ ALT: () => $.CONSUME(T.CaseInsensitive) },
|
||||
{ ALT: () => $.CONSUME(T.CaseSensitive) },
|
||||
{ ALT: () => $.CONSUME(T.Exact) }
|
||||
]);
|
||||
]));
|
||||
$.OPTION(() => $.CONSUME(T.Matching));
|
||||
}
|
||||
});
|
||||
$.CONSUME(T.EndOfLine);
|
||||
});
|
||||
|
||||
this.nodes.GroupStatement = $.RULE("GroupStatement", () => {
|
||||
const GroupStatement = $.RULE("GroupStatement", () => {
|
||||
$.OPTION2(() => $.CONSUME(T.Optional));
|
||||
$.CONSUME(T.Create);
|
||||
$.CONSUME(T.A);
|
||||
@ -160,36 +172,36 @@ export class Human2RegexParser extends CstParser {
|
||||
});
|
||||
$.CONSUME2(T.EndOfLine);
|
||||
$.CONSUME(T.Indent);
|
||||
$.AT_LEAST_ONE(this.nodes.Statement);
|
||||
$.AT_LEAST_ONE(Statement);
|
||||
$.CONSUME(T.Outdent);
|
||||
});
|
||||
|
||||
this.nodes.RepeatStatement = $.RULE("RepeatStatement", () => {
|
||||
const RepeatStatement = $.RULE("RepeatStatement", () => {
|
||||
$.OPTION3(() => $.CONSUME(T.Optional));
|
||||
$.CONSUME(T.Repeat);
|
||||
$.OPTION(() => $.SUBRULE(this.nodes.CountSubStatement));
|
||||
$.OPTION(() => $.SUBRULE(CountSubStatement));
|
||||
$.CONSUME3(T.EndOfLine);
|
||||
$.CONSUME(T.Indent);
|
||||
$.AT_LEAST_ONE(this.nodes.Statement);
|
||||
$.AT_LEAST_ONE(Statement);
|
||||
$.CONSUME(T.Outdent);
|
||||
});
|
||||
|
||||
this.nodes.Statement = $.RULE("Statement", () => {
|
||||
const Statement = $.RULE("Statement", () => {
|
||||
$.OR([
|
||||
{ ALT: () => $.SUBRULE(this.nodes.MatchStatement) },
|
||||
{ ALT: () => $.SUBRULE(this.nodes.GroupStatement) },
|
||||
{ ALT: () => $.SUBRULE(this.nodes.RepeatStatement) }
|
||||
{ ALT: () => $.SUBRULE(MatchStatement) },
|
||||
{ ALT: () => $.SUBRULE(GroupStatement) },
|
||||
{ ALT: () => $.SUBRULE(RepeatStatement) }
|
||||
]);
|
||||
});
|
||||
|
||||
this.nodes.Regex = $.RULE("Regex", () => {
|
||||
$.MANY(() => $.SUBRULE(this.nodes.UsingStatement));
|
||||
$.MANY2(() => $.SUBRULE(this.nodes.Statement) );
|
||||
const Regex = $.RULE("Regex", () => {
|
||||
$.MANY(() => $.SUBRULE(UsingStatement));
|
||||
$.MANY2(() => $.SUBRULE(Statement) );
|
||||
});
|
||||
|
||||
this.performSelfAnalysis();
|
||||
|
||||
this.parse = this.nodes.Regex;
|
||||
this.parse = Regex;
|
||||
}
|
||||
|
||||
//public set_options(options: Human2RegexParserOptions) : void {
|
||||
|
@ -1,10 +1,10 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
"use strict";
|
||||
|
||||
import "./style.css";
|
||||
|
||||
import { Human2RegexLexer } from "./tokenizer";
|
||||
import { Human2RegexParser } from "./parser";
|
||||
|
||||
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
|
||||
import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
|
||||
|
||||
/*
|
||||
$(function() {
|
||||
@ -12,8 +12,8 @@ $(function() {
|
||||
});
|
||||
*/
|
||||
|
||||
const lexer = new Human2RegexLexer();
|
||||
const parser = new Human2RegexParser();
|
||||
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
|
||||
const parser = new Human2RegexParser(new Human2RegexParserOptions(true));
|
||||
|
||||
const result = lexer.tokenize(`
|
||||
// H2R supports // # and /**/ as comments
|
||||
@ -58,11 +58,8 @@ create an optional group
|
||||
|
||||
|
||||
console.log(result.errors);
|
||||
|
||||
parser.input = result.tokens;
|
||||
const regex = parser.parse();
|
||||
console.log(regex);
|
||||
console.log(parser.errors);
|
||||
|
||||
//interpreter.visit(regex);
|
||||
|
||||
//parser.getBaseCstVisitorConstructor();
|
||||
console.log(JSON.stringify(regex.children, undefined, 4));
|
||||
console.log(parser.errors);
|
@ -1,3 +1,5 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
import { createToken, Lexer } from "chevrotain";
|
||||
|
||||
export const Zero = createToken({name: "Zero", pattern: /zero/i });
|
||||
|
@ -1,3 +1,5 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
export function last<T>(array: T[]) : T {
|
||||
return array[array.length-1];
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user