mirror of
https://github.com/pdemian/human2regex.git
synced 2025-07-01 02:10:44 -07:00
Got a rudimentary syntax tree started
This commit is contained in:
parent
91d1b37322
commit
3baefade52
@ -43,7 +43,7 @@
|
||||
"@typescript-eslint/naming-convention": [
|
||||
"error",
|
||||
{ "selector": "default", "format": [ "snake_case", "PascalCase" ] },
|
||||
{ "selector": "property", "format": [ "camelCase", "snake_case", "PascalCase" ] },
|
||||
{ "selector": "property", "format": [ "camelCase", "snake_case", "UPPER_CASE", "PascalCase" ] },
|
||||
{ "selector": [ "function", "method"], "format": [ "camelCase", "UPPER_CASE" ] },
|
||||
{ "selector": "typeLike", "format": [ "PascalCase" ] }
|
||||
],
|
||||
@ -55,7 +55,7 @@
|
||||
"camelcase": "off",
|
||||
"no-magic-numbers": [
|
||||
"warn",
|
||||
{ "ignoreArrayIndexes": true, "ignore": [-1,0,1,2,3,4,5,6,7,8,9]}
|
||||
{ "ignoreArrayIndexes": true, "ignore": [-1,0,1,2,3,4,5,6,7,8,9,10]}
|
||||
],
|
||||
"curly": "warn",
|
||||
"no-loss-of-precision": "error",
|
||||
|
13
docs/bundle.min.js
vendored
13
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
255
src/generator.ts
Normal file
255
src/generator.ts
Normal file
@ -0,0 +1,255 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-vars */
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
import { regexEscape, removeQuotes, hasFlag, combineFlags } from "./utilities";
|
||||
|
||||
export enum RobotLanguage {
|
||||
JS,
|
||||
Perl,
|
||||
DotNet,
|
||||
Java
|
||||
}
|
||||
|
||||
export abstract class H2RCST {
|
||||
public abstract validate(language: RobotLanguage): Error[];
|
||||
public abstract toRegex(language: RobotLanguage): string;
|
||||
}
|
||||
|
||||
/* eslint-disable no-bitwise */
|
||||
export enum UsingFlags {
|
||||
Multiline = 1 << 0,
|
||||
Global = 1 << 1,
|
||||
Sensitive = 1 << 2,
|
||||
Insensitive = 1 << 3,
|
||||
Exact = 1 << 4
|
||||
}
|
||||
/* eslint-enable no-bitwise */
|
||||
|
||||
|
||||
export enum MatchSubStatementType {
|
||||
SingleString,
|
||||
Between,
|
||||
Anything,
|
||||
Word,
|
||||
Digit,
|
||||
Character,
|
||||
Whitespace,
|
||||
Number,
|
||||
Tab,
|
||||
Linefeed,
|
||||
Newline,
|
||||
CarriageReturn
|
||||
}
|
||||
|
||||
export class MatchSubStatementValue {
|
||||
constructor(public type: MatchSubStatementType, public from: string | null, public to: string | null) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
|
||||
export class MatchStatementValue {
|
||||
constructor(public optional: boolean, public statement: MatchSubStatementCST) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
|
||||
export abstract class StatementCST implements H2RCST {
|
||||
public abstract validate(language: RobotLanguage): Error[];
|
||||
public abstract toRegex(language: RobotLanguage): string;
|
||||
}
|
||||
|
||||
export class MatchSubStatementCST implements H2RCST {
|
||||
constructor(public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
|
||||
/* empty */
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): Error[] {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
const str: string[] = [];
|
||||
|
||||
for (const value of this.values) {
|
||||
switch (value.type) {
|
||||
case MatchSubStatementType.SingleString: {
|
||||
const reg = regexEscape(removeQuotes(value.from as string));
|
||||
str.push(this.invert ? `(?:(?!${reg}))` : reg);
|
||||
break;
|
||||
}
|
||||
case MatchSubStatementType.Between:
|
||||
str.push(this.invert ? `[^${value.from}-${value.to}]` : `[${value.from}-${value.to}]`);
|
||||
break;
|
||||
case MatchSubStatementType.Word:
|
||||
str.push(this.invert ? "\\W" : "\\w");
|
||||
break;
|
||||
case MatchSubStatementType.Digit:
|
||||
str.push(this.invert ? "\\D" : "\\d");
|
||||
break;
|
||||
case MatchSubStatementType.Character:
|
||||
str.push(this.invert ? "[^a-zA-Z]" : "[a-zA-Z]");
|
||||
break;
|
||||
case MatchSubStatementType.Whitespace:
|
||||
str.push(this.invert ? "\\S" : "\\s");
|
||||
break;
|
||||
case MatchSubStatementType.Number:
|
||||
str.push(this.invert ? "\\D+" : "\\d+");
|
||||
break;
|
||||
case MatchSubStatementType.Tab:
|
||||
str.push(this.invert ? "[^\\t]" : "\\t");
|
||||
break;
|
||||
case MatchSubStatementType.Newline:
|
||||
case MatchSubStatementType.Linefeed:
|
||||
str.push(this.invert ? "[^\\n]" : "\\n");
|
||||
break;
|
||||
case MatchSubStatementType.CarriageReturn:
|
||||
str.push(this.invert ? "[^\\r]" : "\\r");
|
||||
break;
|
||||
default:
|
||||
// default: anything
|
||||
str.push(this.invert ? "[^.]" : ".");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return str.join("|");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export class UsingStatementCST implements H2RCST {
|
||||
constructor(public flags: UsingFlags[]) {
|
||||
/* empty */
|
||||
}
|
||||
public validate(language: RobotLanguage): Error[] {
|
||||
const errors: Error[] = [];
|
||||
let flag = this.flags[0];
|
||||
|
||||
for (let i = 1; i < this.flags.length; i++) {
|
||||
if (hasFlag(flag, this.flags[i])) {
|
||||
errors.push(new Error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] ));
|
||||
}
|
||||
flag = combineFlags(flag, this.flags[i]);
|
||||
}
|
||||
|
||||
if (hasFlag(flag, UsingFlags.Sensitive) && hasFlag(flag, UsingFlags.Insensitive)) {
|
||||
errors.push(new Error("Cannot be both case sensitive and insensitive"));
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
let str = "";
|
||||
let exact = false;
|
||||
|
||||
for (const flag of this.flags) {
|
||||
if (hasFlag(flag, UsingFlags.Multiline)) {
|
||||
str += "m";
|
||||
}
|
||||
else if (hasFlag(flag, UsingFlags.Global)) {
|
||||
str += "g";
|
||||
}
|
||||
else if (hasFlag(flag, UsingFlags.Insensitive)) {
|
||||
str += "i";
|
||||
}
|
||||
else if (hasFlag(flag, UsingFlags.Exact)) {
|
||||
exact = true;
|
||||
}
|
||||
}
|
||||
|
||||
return exact ? "/^{regex}$/" + str : "/{regex}/" + str;
|
||||
}
|
||||
}
|
||||
|
||||
export class CountSubStatementCST implements H2RCST {
|
||||
constructor(public from: number, public to: number | null, public opt: "inclusive" | "exclusive" | "+" | null) {
|
||||
/* empty */
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): Error[] {
|
||||
const errors: Error[] = [];
|
||||
|
||||
if (this.from < 0) {
|
||||
errors.push(new Error("Value cannot be negative"));
|
||||
}
|
||||
else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
|
||||
errors.push(new Error("Values must be in range of eachother"));
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
const from = this.from;
|
||||
let to = this.to;
|
||||
if (to !== null && this.opt === "exclusive") {
|
||||
to--;
|
||||
}
|
||||
|
||||
if (to !== null) {
|
||||
return `{${from},${to}}`;
|
||||
}
|
||||
else if (this.opt === "+") {
|
||||
return `{${from},}`;
|
||||
}
|
||||
else {
|
||||
return `{${this.from}}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class MatchStatementCST implements StatementCST {
|
||||
constructor(public matches: MatchStatementValue[]) {
|
||||
/* empty */
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): Error[] {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export class RepeatStatementCST implements StatementCST {
|
||||
constructor(public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
|
||||
/* empty */
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): Error[] {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
}
|
||||
|
||||
export class GroupStatementCST implements StatementCST {
|
||||
constructor(public optional: boolean, public name: string | null, public statements: StatementCST[]) {
|
||||
/* empty */
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): Error[] {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
}
|
||||
|
||||
export class RegularExpressionCST implements H2RCST {
|
||||
constructor(public usings: UsingStatementCST, public statements: StatementCST[]) {
|
||||
/* empty */
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): Error[] {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
|
||||
}
|
317
src/parser.ts
317
src/parser.ts
@ -1,30 +1,19 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
import { CstParser, CstNode, IOrAlt } from "chevrotain";
|
||||
import { EmbeddedActionsParser, IOrAlt, } from "chevrotain";
|
||||
import * as T from "./tokens";
|
||||
|
||||
export enum RobotLanguage {
|
||||
JS,
|
||||
Perl,
|
||||
DotNet,
|
||||
Java
|
||||
}
|
||||
|
||||
export enum HumanLanguage {
|
||||
English,
|
||||
/* Todo: Humans speak more than just english! */
|
||||
}
|
||||
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
|
||||
|
||||
export class Human2RegexParserOptions {
|
||||
constructor(public skip_validations: boolean = false, public robot_language: RobotLanguage = RobotLanguage.JS, public human_language: HumanLanguage = HumanLanguage.English) {
|
||||
constructor(public skip_validations: boolean = false) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
|
||||
export class Human2RegexParser extends CstParser {
|
||||
export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
private static already_init = false;
|
||||
|
||||
public parse : (idxInCallingRule?: number, ...args: unknown[]) => CstNode;
|
||||
public parse : (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST;
|
||||
|
||||
constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
|
||||
super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });
|
||||
@ -39,50 +28,101 @@ export class Human2RegexParser extends CstParser {
|
||||
|
||||
let nss_rules : IOrAlt<unknown>[] | null = null;
|
||||
const NumberSubStatement = $.RULE("NumberSubStatement", () => {
|
||||
let value: number = 0;
|
||||
|
||||
$.OR(nss_rules || (nss_rules = [
|
||||
{ ALT: () => $.CONSUME(T.One) },
|
||||
{ ALT: () => $.CONSUME(T.Two) },
|
||||
{ ALT: () => $.CONSUME(T.Three) },
|
||||
{ ALT: () => $.CONSUME(T.Four) },
|
||||
{ ALT: () => $.CONSUME(T.Five) },
|
||||
{ ALT: () => $.CONSUME(T.Six) },
|
||||
{ ALT: () => $.CONSUME(T.Seven) },
|
||||
{ ALT: () => $.CONSUME(T.Eight) },
|
||||
{ ALT: () => $.CONSUME(T.Nine) },
|
||||
{ ALT: () => $.CONSUME(T.Ten) },
|
||||
{ ALT: () => $.CONSUME(T.Zero) },
|
||||
{ ALT: () => $.CONSUME(T.NumberLiteral) },
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Zero);
|
||||
value = 0;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.One);
|
||||
value = 1;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Two);
|
||||
value = 2;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Three);
|
||||
value = 3;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Four);
|
||||
value = 4;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Five);
|
||||
value = 5;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Six);
|
||||
value = 6;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Seven);
|
||||
value = 7;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Eight);
|
||||
value = 8;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Nine);
|
||||
value = 9;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Ten);
|
||||
value = 10;
|
||||
}},
|
||||
|
||||
{ ALT: () => value = parseInt($.CONSUME(T.NumberLiteral).image) },
|
||||
]));
|
||||
|
||||
return value;
|
||||
});
|
||||
|
||||
// 1, 1..2, between 1 and/to 2 inclusively/exclusively
|
||||
const CountSubStatement = $.RULE("CountSubStatement", () => {
|
||||
let from : number = 0;
|
||||
let to: number | null = null;
|
||||
let opt: "inclusive" | "exclusive" | "+" | null = null;
|
||||
|
||||
$.OR([
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Between);
|
||||
$.SUBRULE4(NumberSubStatement);
|
||||
from = $.SUBRULE4(NumberSubStatement);
|
||||
$.OR3([
|
||||
{ ALT: () => $.CONSUME2(T.To) },
|
||||
{ ALT: () => $.CONSUME(T.And) }
|
||||
]);
|
||||
$.SUBRULE5(NumberSubStatement);
|
||||
to = $.SUBRULE5(NumberSubStatement);
|
||||
$.OPTION4(() => $.CONSUME3(T.Times));
|
||||
$.OPTION5(() => {
|
||||
$.OR4([
|
||||
{ ALT: () => $.CONSUME(T.Inclusive) },
|
||||
{ ALT: () => $.CONSUME(T.Exclusive) }
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Inclusive);
|
||||
opt = "inclusive";
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Exclusive);
|
||||
opt = "exclusive";
|
||||
}}
|
||||
]);
|
||||
});
|
||||
}},
|
||||
|
||||
{ ALT: () => {
|
||||
$.OPTION2(() => $.CONSUME(T.From));
|
||||
$.SUBRULE2(NumberSubStatement);
|
||||
from = $.SUBRULE2(NumberSubStatement);
|
||||
$.OR2([
|
||||
{ ALT: () => $.CONSUME(T.OrMore) },
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.OrMore);
|
||||
opt = "+";
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.To);
|
||||
$.SUBRULE3(NumberSubStatement);
|
||||
to = $.SUBRULE3(NumberSubStatement);
|
||||
}}
|
||||
]);
|
||||
$.OPTION3(() => $.CONSUME2(T.Times));
|
||||
@ -90,54 +130,112 @@ export class Human2RegexParser extends CstParser {
|
||||
|
||||
{ ALT: () => {
|
||||
$.OPTION(() => $.CONSUME(T.Exactly));
|
||||
$.SUBRULE(NumberSubStatement);
|
||||
from = $.SUBRULE(NumberSubStatement);
|
||||
$.OPTION6(() => $.CONSUME(T.Times));
|
||||
}}
|
||||
]);
|
||||
|
||||
return new CountSubStatementCST(from, to, opt);
|
||||
});
|
||||
|
||||
let mss_rules : IOrAlt<unknown>[] | null = null;
|
||||
const MatchSubStatement = $.RULE("MatchSubStatement", () => {
|
||||
$.OPTION(() => $.SUBRULE(CountSubStatement) );
|
||||
$.OPTION2(() => $.CONSUME(T.Not));
|
||||
let count: CountSubStatementCST | null = null;
|
||||
let invert: boolean = false;
|
||||
const values: MatchSubStatementValue[] = [];
|
||||
|
||||
$.OPTION(() => count = $.SUBRULE(CountSubStatement) );
|
||||
$.OPTION2(() => {
|
||||
$.CONSUME(T.Not);
|
||||
invert = true;
|
||||
});
|
||||
$.AT_LEAST_ONE_SEP({
|
||||
SEP: T.Or,
|
||||
DEF: () => {
|
||||
let from : string | null = null;
|
||||
let to : string | null = null;
|
||||
let type : MatchSubStatementType = MatchSubStatementType.Anything;
|
||||
|
||||
$.OPTION3(() => $.CONSUME(T.A));
|
||||
$.OR(mss_rules || (mss_rules = [
|
||||
{ ALT: () => {
|
||||
$.OPTION4(() => $.CONSUME(T.From));
|
||||
$.CONSUME2(T.StringLiteral);
|
||||
from = $.CONSUME2(T.StringLiteral).image;
|
||||
$.CONSUME(T.To);
|
||||
$.CONSUME3(T.StringLiteral);
|
||||
to = $.CONSUME3(T.StringLiteral).image;
|
||||
type = MatchSubStatementType.Between;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Between);
|
||||
$.CONSUME4(T.StringLiteral);
|
||||
from = $.CONSUME4(T.StringLiteral).image;
|
||||
$.CONSUME(T.And);
|
||||
$.CONSUME5(T.StringLiteral);
|
||||
to = $.CONSUME5(T.StringLiteral).image;
|
||||
type = MatchSubStatementType.Between;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
from = $.CONSUME(T.StringLiteral).image;
|
||||
type = MatchSubStatementType.SingleString;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Anything);
|
||||
type = MatchSubStatementType.Anything;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Word);
|
||||
type = MatchSubStatementType.Word;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Digit);
|
||||
type = MatchSubStatementType.Digit;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Character);
|
||||
type = MatchSubStatementType.Character;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Whitespace);
|
||||
type = MatchSubStatementType.Whitespace;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Number);
|
||||
type = MatchSubStatementType.Number;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Tab);
|
||||
type = MatchSubStatementType.Tab;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Linefeed);
|
||||
type = MatchSubStatementType.Linefeed;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Newline);
|
||||
type = MatchSubStatementType.Newline;
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.CarriageReturn);
|
||||
type = MatchSubStatementType.CarriageReturn;
|
||||
}},
|
||||
{ ALT: () => $.CONSUME(T.StringLiteral) },
|
||||
{ ALT: () => $.CONSUME(T.Anything) },
|
||||
{ ALT: () => $.CONSUME(T.Word) },
|
||||
{ ALT: () => $.CONSUME(T.Digit) },
|
||||
{ ALT: () => $.CONSUME(T.Character) },
|
||||
{ ALT: () => $.CONSUME(T.Whitespace) },
|
||||
{ ALT: () => $.CONSUME(T.Number) },
|
||||
{ ALT: () => $.CONSUME(T.Tab) },
|
||||
{ ALT: () => $.CONSUME(T.Linefeed) },
|
||||
{ ALT: () => $.CONSUME(T.Newline) },
|
||||
{ ALT: () => $.CONSUME(T.CarriageReturn) },
|
||||
]));
|
||||
|
||||
values.push(new MatchSubStatementValue(type, from, to));
|
||||
}
|
||||
});
|
||||
|
||||
return new MatchSubStatementCST(count, invert, values);
|
||||
});
|
||||
|
||||
// optionally match "+" then 1+ words
|
||||
const MatchStatement = $.RULE("MatchStatement", () => {
|
||||
$.OPTION(() => $.CONSUME(T.Optional));
|
||||
let optional = false;
|
||||
const msv: MatchStatementValue[] = [];
|
||||
|
||||
$.OPTION(() => {
|
||||
$.CONSUME(T.Optional);
|
||||
optional = true;
|
||||
});
|
||||
$.CONSUME(T.Match);
|
||||
$.SUBRULE(MatchSubStatement);
|
||||
msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
|
||||
$.MANY(() => {
|
||||
$.OR([
|
||||
{ ALT: () => {
|
||||
@ -146,69 +244,136 @@ export class Human2RegexParser extends CstParser {
|
||||
}},
|
||||
{ ALT: () => $.CONSUME(T.And) },
|
||||
]);
|
||||
$.OPTION3(() => $.CONSUME2(T.Optional));
|
||||
$.SUBRULE2(MatchSubStatement);
|
||||
optional = false;
|
||||
$.OPTION3(() => {
|
||||
$.CONSUME2(T.Optional);
|
||||
optional = true;
|
||||
});
|
||||
msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
|
||||
});
|
||||
$.CONSUME(T.EndOfLine);
|
||||
|
||||
return new MatchStatementCST(msv);
|
||||
});
|
||||
|
||||
// using global matching
|
||||
let us_rules : IOrAlt<unknown>[] | null = null;
|
||||
const UsingStatement = $.RULE("UsingStatement", () => {
|
||||
const usings: UsingFlags[] = [];
|
||||
|
||||
$.CONSUME(T.Using);
|
||||
$.AT_LEAST_ONE_SEP({
|
||||
SEP: T.And,
|
||||
DEF: () => {
|
||||
$.OR(us_rules || (us_rules = [
|
||||
{ ALT: () => $.CONSUME(T.Multiline) },
|
||||
{ ALT: () => $.CONSUME(T.Global) },
|
||||
{ ALT: () => $.CONSUME(T.CaseInsensitive) },
|
||||
{ ALT: () => $.CONSUME(T.CaseSensitive) },
|
||||
{ ALT: () => $.CONSUME(T.Exact) }
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Multiline);
|
||||
usings.push(UsingFlags.Multiline);
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Global);
|
||||
usings.push(UsingFlags.Global);
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.CaseInsensitive);
|
||||
usings.push(UsingFlags.Insensitive);
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.CaseSensitive);
|
||||
usings.push(UsingFlags.Sensitive);
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Exact);
|
||||
usings.push(UsingFlags.Exact);
|
||||
}}
|
||||
]));
|
||||
$.OPTION(() => $.CONSUME(T.Matching));
|
||||
}
|
||||
});
|
||||
$.CONSUME(T.EndOfLine);
|
||||
|
||||
return usings;
|
||||
});
|
||||
|
||||
const GroupStatement = $.RULE("GroupStatement", () => {
|
||||
$.OPTION2(() => $.CONSUME(T.Optional));
|
||||
$.CONSUME(T.Create);
|
||||
$.CONSUME(T.A);
|
||||
$.OPTION3(() => $.CONSUME2(T.Optional));
|
||||
let optional = false;
|
||||
let name: string | null = null;
|
||||
const statement: StatementCST[] = [];
|
||||
|
||||
$.OR([
|
||||
{ ALT: () => {
|
||||
optional = true;
|
||||
$.CONSUME(T.Optional);
|
||||
$.CONSUME(T.Create);
|
||||
$.CONSUME(T.A);
|
||||
}},
|
||||
{ ALT: () => {
|
||||
$.CONSUME2(T.Create);
|
||||
$.CONSUME2(T.A);
|
||||
$.OPTION2(() => {
|
||||
$.CONSUME2(T.Optional);
|
||||
optional = true;
|
||||
});
|
||||
}}
|
||||
]);
|
||||
|
||||
$.CONSUME(T.Group);
|
||||
$.OPTION(() => {
|
||||
$.CONSUME(T.Called);
|
||||
$.CONSUME(T.StringLiteral);
|
||||
name = $.CONSUME(T.Identifier).image;
|
||||
});
|
||||
$.CONSUME2(T.EndOfLine);
|
||||
$.CONSUME(T.Indent);
|
||||
$.AT_LEAST_ONE(Statement);
|
||||
$.AT_LEAST_ONE(() => {
|
||||
statement.push($.SUBRULE(Statement));
|
||||
});
|
||||
$.CONSUME(T.Outdent);
|
||||
|
||||
return new GroupStatementCST(optional, name, statement);
|
||||
});
|
||||
|
||||
const RepeatStatement = $.RULE("RepeatStatement", () => {
|
||||
$.OPTION3(() => $.CONSUME(T.Optional));
|
||||
let optional = false;
|
||||
let count : CountSubStatementCST | null = null;
|
||||
const statements: StatementCST[] = [];
|
||||
|
||||
$.OPTION3(() => {
|
||||
$.CONSUME(T.Optional);
|
||||
optional = true;
|
||||
});
|
||||
$.CONSUME(T.Repeat);
|
||||
$.OPTION(() => $.SUBRULE(CountSubStatement));
|
||||
$.OPTION(() => count = $.SUBRULE(CountSubStatement));
|
||||
$.CONSUME3(T.EndOfLine);
|
||||
$.CONSUME(T.Indent);
|
||||
$.AT_LEAST_ONE(Statement);
|
||||
$.AT_LEAST_ONE(() => {
|
||||
statements.push($.SUBRULE(Statement));
|
||||
});
|
||||
$.CONSUME(T.Outdent);
|
||||
|
||||
return new RepeatStatementCST(optional, count, statements);
|
||||
});
|
||||
|
||||
const Statement = $.RULE("Statement", () => {
|
||||
// eslint-disable-next-line init-declarations
|
||||
let statement! : StatementCST;
|
||||
|
||||
$.OR([
|
||||
{ ALT: () => $.SUBRULE(MatchStatement) },
|
||||
{ ALT: () => $.SUBRULE(GroupStatement) },
|
||||
{ ALT: () => $.SUBRULE(RepeatStatement) }
|
||||
{ ALT: () => statement = $.SUBRULE(MatchStatement) },
|
||||
{ ALT: () => statement = $.SUBRULE(GroupStatement) },
|
||||
{ ALT: () => statement = $.SUBRULE(RepeatStatement) }
|
||||
]);
|
||||
|
||||
return statement;
|
||||
});
|
||||
|
||||
const Regex = $.RULE("Regex", () => {
|
||||
$.MANY(() => $.SUBRULE(UsingStatement));
|
||||
$.MANY2(() => $.SUBRULE(Statement) );
|
||||
let usings: UsingFlags[] = [];
|
||||
const statements: StatementCST[] = [];
|
||||
|
||||
$.MANY(() => usings = usings.concat($.SUBRULE(UsingStatement)));
|
||||
$.MANY2(() => statements.push($.SUBRULE(Statement)) );
|
||||
|
||||
return new RegularExpressionCST(new UsingStatementCST(usings), statements);
|
||||
});
|
||||
|
||||
this.performSelfAnalysis();
|
||||
|
@ -12,8 +12,8 @@ $(function() {
|
||||
});
|
||||
*/
|
||||
|
||||
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
|
||||
const parser = new Human2RegexParser(new Human2RegexParserOptions(true));
|
||||
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(false));
|
||||
const parser = new Human2RegexParser(new Human2RegexParserOptions(false));
|
||||
|
||||
const result = lexer.tokenize(`
|
||||
// H2R supports // # and /**/ as comments
|
||||
@ -24,28 +24,28 @@ const result = lexer.tokenize(`
|
||||
// exact matching means use a ^ and $ to signify the start and end of the string
|
||||
|
||||
using global and exact matching
|
||||
create an optional group called "protocol"
|
||||
create an optional group called protocol
|
||||
match "http"
|
||||
optionally match "s"
|
||||
match "://"
|
||||
create a group called "subdomain"
|
||||
create a group called subdomain
|
||||
repeat
|
||||
match 1+ words
|
||||
match "."
|
||||
create a group called "domain"
|
||||
create a group called domain
|
||||
match 1+ words or "_" or "-"
|
||||
match "."
|
||||
match a word
|
||||
# port, but we don't care about it, so ignore it
|
||||
optionally match ":" then 0+ digits
|
||||
create an optional group called "path"
|
||||
create an optional group called path
|
||||
repeat
|
||||
match "/"
|
||||
match 0+ words or "_" or "-"
|
||||
create an optional group
|
||||
# we don't want to capture the '?', so don't name the group until afterwards
|
||||
match "?"
|
||||
create a group called "query"
|
||||
create a group called query
|
||||
repeat
|
||||
match 1+ words or "_" or "-"
|
||||
match "="
|
||||
@ -61,5 +61,5 @@ console.log(result.errors);
|
||||
|
||||
parser.input = result.tokens;
|
||||
const regex = parser.parse();
|
||||
console.log(JSON.stringify(regex.children, undefined, 4));
|
||||
console.log(JSON.stringify(regex, undefined, 4));
|
||||
console.log(parser.errors);
|
@ -78,7 +78,7 @@ export const SingleLineComment = createToken({name: "SingleLineComment", pattern
|
||||
export const MultilineComment = createToken({name: "MultiLineComment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
|
||||
|
||||
export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
|
||||
export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/ });
|
||||
export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?\d+/ });
|
||||
export const StringLiteral = createToken({name: "StringLiteral", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
|
||||
|
||||
export const Indent = createToken({name: "Indent"});
|
||||
|
@ -1,5 +1,15 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
/* eslint-disable no-bitwise */
|
||||
export function hasFlag(a: number, b: number) : boolean {
|
||||
return (a & b) !== 0;
|
||||
}
|
||||
|
||||
export function combineFlags(a: number, b: number): number {
|
||||
return (a | b);
|
||||
}
|
||||
/* eslint-enable no-bitwise */
|
||||
|
||||
export function last<T>(array: T[]) : T {
|
||||
return array[array.length-1];
|
||||
}
|
||||
@ -13,7 +23,7 @@ export function findLastIndex<T>(array: T[], value: T) : number {
|
||||
return -1;
|
||||
}
|
||||
|
||||
export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean) : number {
|
||||
export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean): number {
|
||||
for (let index = array.length-1; index >= 0; index--) {
|
||||
if (predicate(array[index])) {
|
||||
return index;
|
||||
@ -21,3 +31,11 @@ export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boole
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
export function removeQuotes(input: string): string {
|
||||
return input.substring(1, input.length-2);
|
||||
}
|
||||
|
||||
export function regexEscape(input: string) : string {
|
||||
return input.replace("\\", "\\\\").replace(/(\.\[\]\^\-\|\(\)\*\+\?\{\}\$)/, "\\$1");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user