1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-20 14:10:10 -07:00

Everything works now

This commit is contained in:
Patrick Demian 2020-10-30 22:34:41 -04:00
parent 9d28543c5b
commit 6e42c7e921
4 changed files with 122 additions and 69 deletions

14
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,7 @@
/* eslint-disable @typescript-eslint/no-unused-vars */ /* eslint-disable @typescript-eslint/no-unused-vars */
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { regexEscape, removeQuotes, hasFlag, combineFlags } from "./utilities"; import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter } from "./utilities";
export enum RobotLanguage { export enum RobotLanguage {
JS, JS,
@ -25,7 +25,6 @@ export enum UsingFlags {
} }
/* eslint-enable no-bitwise */ /* eslint-enable no-bitwise */
export enum MatchSubStatementType { export enum MatchSubStatementType {
SingleString, SingleString,
Between, Between,
@ -42,7 +41,7 @@ export enum MatchSubStatementType {
} }
export class MatchSubStatementValue { export class MatchSubStatementValue {
constructor(public type: MatchSubStatementType, public from: string | null, public to: string | null) { constructor(public type: MatchSubStatementType, public from: string | null = null, public to: string | null = null) {
/* empty */ /* empty */
} }
} }
@ -66,7 +65,7 @@ export class MatchSubStatementCST implements H2RCST {
public validate(language: RobotLanguage): Error[] { public validate(language: RobotLanguage): Error[] {
let errors: Error[] = []; let errors: Error[] = [];
if (this.count !== null) { if (this.count) {
errors = errors.concat(this.count.validate(language)); errors = errors.concat(this.count.validate(language));
} }
@ -75,20 +74,14 @@ export class MatchSubStatementCST implements H2RCST {
let from = value.from as string; let from = value.from as string;
let to = value.to as string; let to = value.to as string;
if ((from.startsWith("\\u") && from.length !== 6) || if (!isSingleRegexCharacter(from)) {
(from.startsWith("\\U") && from.length !== 8) ||
(from.startsWith("\\") && from.length !== 2) ||
(from.length !== 1)) {
errors.push(new Error("Between statement must begin with a single character")); errors.push(new Error("Between statement must begin with a single character"));
} }
else if (from.startsWith("\\u") || from.startsWith("\\U") || from.startsWith("\\")) { else if (from.startsWith("\\u") || from.startsWith("\\U") || from.startsWith("\\")) {
from = JSON.parse(`"${regexEscape(from)}"`); from = JSON.parse(`"${regexEscape(from)}"`);
} }
if ((to.startsWith("\\u") && to.length !== 6) || if (!isSingleRegexCharacter(to)) {
(to.startsWith("\\U") && to.length !== 8) ||
(to.startsWith("\\") && to.length !== 2) ||
(to.length !== 1)) {
errors.push(new Error("Between statement must end with a single character")); errors.push(new Error("Between statement must end with a single character"));
} }
else if (to.startsWith("\\u") || to.startsWith("\\U") || to.startsWith("\\")) { else if (to.startsWith("\\u") || to.startsWith("\\U") || to.startsWith("\\")) {
@ -118,13 +111,13 @@ export class MatchSubStatementCST implements H2RCST {
str.push(this.invert ? `[^${value.from}-${value.to}]` : `[${value.from}-${value.to}]`); str.push(this.invert ? `[^${value.from}-${value.to}]` : `[${value.from}-${value.to}]`);
break; break;
case MatchSubStatementType.Word: case MatchSubStatementType.Word:
str.push(this.invert ? "\\W" : "\\w"); str.push(this.invert ? "\\W+" : "\\w+");
break; break;
case MatchSubStatementType.Digit: case MatchSubStatementType.Digit:
str.push(this.invert ? "\\D" : "\\d"); str.push(this.invert ? "\\D" : "\\d");
break; break;
case MatchSubStatementType.Character: case MatchSubStatementType.Character:
str.push(this.invert ? "[^a-zA-Z]" : "[a-zA-Z]"); str.push(this.invert ? "\\W" : "\\w");
break; break;
case MatchSubStatementType.Whitespace: case MatchSubStatementType.Whitespace:
str.push(this.invert ? "\\S" : "\\s"); str.push(this.invert ? "\\S" : "\\s");
@ -149,7 +142,42 @@ export class MatchSubStatementCST implements H2RCST {
} }
} }
return "(?:" + str.join("|") + ")"; let ret = "";
if (str.length === 1) {
ret = str[0];
}
// we can use regex's [] for single chars, otherwise we need a group
else if (str.every(isSingleRegexCharacter)) {
ret = "[" + str.join("") + "]";
}
else {
//use a no-capture group
ret = "(?:" + str.join("|") + ")";
}
if (this.count) {
if (this.count.from === 1 && this.count.to === null) {
if (this.count.opt === "+") {
ret += "+";
}
// if we only have a count of 1, we can ignore adding any extra text
}
else if (this.count.from === 0 && this.count.to === null) {
if (this.count.opt === "+") {
ret += "*";
}
else {
// match 0 of anything? ok...
ret = "";
}
}
else {
ret += this.count.toRegex(language);
}
}
return ret;
} }
} }

View File

@ -1,6 +1,6 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { EmbeddedActionsParser, IOrAlt, } from "chevrotain"; import { EmbeddedActionsParser, IOrAlt } from "chevrotain";
import * as T from "./tokens"; import * as T from "./tokens";
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator"; import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
@ -26,57 +26,57 @@ export class Human2RegexParser extends EmbeddedActionsParser {
const $ = this; const $ = this;
let nss_rules : IOrAlt<unknown>[] | null = null; let nss_rules : IOrAlt<number>[] | null = null;
const NumberSubStatement = $.RULE("NumberSubStatement", () => { const NumberSubStatement = $.RULE("NumberSubStatement", () => {
let value: number = 0; let value: number = 0;
$.OR(nss_rules || (nss_rules = [ value = $.OR(nss_rules || (nss_rules = [
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Zero); $.CONSUME(T.Zero);
value = 0; return 0;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.One); $.CONSUME(T.One);
value = 1; return 1;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Two); $.CONSUME(T.Two);
value = 2; return 2;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Three); $.CONSUME(T.Three);
value = 3; return 3;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Four); $.CONSUME(T.Four);
value = 4; return 4;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Five); $.CONSUME(T.Five);
value = 5; return 5;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Six); $.CONSUME(T.Six);
value = 6; return 6;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Seven); $.CONSUME(T.Seven);
value = 7; return 7;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Eight); $.CONSUME(T.Eight);
value = 8; return 8;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Nine); $.CONSUME(T.Nine);
value = 9; return 9;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Ten); $.CONSUME(T.Ten);
value = 10; return 10;
}}, }},
{ ALT: () => value = parseInt($.CONSUME(T.NumberLiteral).image) }, { ALT: () => parseInt($.CONSUME(T.NumberLiteral).image) },
])); ]));
return value; return value;
@ -138,32 +138,33 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new CountSubStatementCST(from, to, opt); return new CountSubStatementCST(from, to, opt);
}); });
let mss_rules : IOrAlt<unknown>[] | null = null; let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null;
const MatchSubStatement = $.RULE("MatchSubStatement", () => { const MatchSubStatement = $.RULE("MatchSubStatement", () => {
let count: CountSubStatementCST | null = null; let count: CountSubStatementCST | null = null;
let invert: boolean = false; let invert: boolean = false;
const values: MatchSubStatementValue[] = []; const values: MatchSubStatementValue[] = [];
let from : string | null = null;
let to : string | null = null;
let type : MatchSubStatementType = MatchSubStatementType.Anything;
$.OPTION(() => count = $.SUBRULE(CountSubStatement) ); count = $.OPTION(() => $.SUBRULE(CountSubStatement) );
$.OPTION2(() => { invert = $.OPTION2(() => {
$.CONSUME(T.Not); $.CONSUME(T.Not);
invert = true; return true;
}); });
$.AT_LEAST_ONE_SEP({ $.AT_LEAST_ONE_SEP({
SEP: T.Or, SEP: T.Or,
DEF: () => { DEF: () => {
let from : string | null = null;
let to : string | null = null;
let type : MatchSubStatementType = MatchSubStatementType.Anything;
$.OPTION3(() => $.CONSUME(T.A)); $.OPTION3(() => $.CONSUME(T.A));
$.OR(mss_rules || (mss_rules = [ values.push($.OR(mss_rules || (mss_rules = [
{ ALT: () => { { ALT: () => {
$.OPTION4(() => $.CONSUME(T.From)); $.OPTION4(() => $.CONSUME(T.From));
from = $.CONSUME2(T.StringLiteral).image; from = $.CONSUME2(T.StringLiteral).image;
$.CONSUME(T.To); $.CONSUME(T.To);
to = $.CONSUME3(T.StringLiteral).image; to = $.CONSUME3(T.StringLiteral).image;
type = MatchSubStatementType.Between; type = MatchSubStatementType.Between;
return new MatchSubStatementValue(type, from, to);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Between); $.CONSUME(T.Between);
@ -171,54 +172,76 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.CONSUME(T.And); $.CONSUME(T.And);
to = $.CONSUME5(T.StringLiteral).image; to = $.CONSUME5(T.StringLiteral).image;
type = MatchSubStatementType.Between; type = MatchSubStatementType.Between;
return new MatchSubStatementValue(type, from, to);
}}, }},
{ ALT: () => { { ALT: () => {
from = $.CONSUME(T.StringLiteral).image; from = $.CONSUME(T.StringLiteral).image;
type = MatchSubStatementType.SingleString; type = MatchSubStatementType.SingleString;
return new MatchSubStatementValue(type, from);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Anything); $.CONSUME(T.Anything);
type = MatchSubStatementType.Anything; type = MatchSubStatementType.Anything;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Word); $.CONSUME(T.Word);
type = MatchSubStatementType.Word; type = MatchSubStatementType.Word;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Digit); $.CONSUME(T.Digit);
type = MatchSubStatementType.Digit; type = MatchSubStatementType.Digit;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Character); $.CONSUME(T.Character);
type = MatchSubStatementType.Character; type = MatchSubStatementType.Character;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Whitespace); $.CONSUME(T.Whitespace);
type = MatchSubStatementType.Whitespace; type = MatchSubStatementType.Whitespace;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Number); $.CONSUME(T.Number);
type = MatchSubStatementType.Number; type = MatchSubStatementType.Number;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Tab); $.CONSUME(T.Tab);
type = MatchSubStatementType.Tab; type = MatchSubStatementType.Tab;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Linefeed); $.CONSUME(T.Linefeed);
type = MatchSubStatementType.Linefeed; type = MatchSubStatementType.Linefeed;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Newline); $.CONSUME(T.Newline);
type = MatchSubStatementType.Newline; type = MatchSubStatementType.Newline;
return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.CarriageReturn); $.CONSUME(T.CarriageReturn);
type = MatchSubStatementType.CarriageReturn; type = MatchSubStatementType.CarriageReturn;
}},
]));
values.push(new MatchSubStatementValue(type, from, to)); return new MatchSubStatementValue(type);
}},
])));
} }
}); });
@ -257,7 +280,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
}); });
// using global matching // using global matching
let us_rules : IOrAlt<unknown>[] | null = null; let us_rules : IOrAlt<UsingFlags>[] | null = null;
const UsingStatement = $.RULE("UsingStatement", () => { const UsingStatement = $.RULE("UsingStatement", () => {
const usings: UsingFlags[] = []; const usings: UsingFlags[] = [];
@ -265,28 +288,28 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.AT_LEAST_ONE_SEP({ $.AT_LEAST_ONE_SEP({
SEP: T.And, SEP: T.And,
DEF: () => { DEF: () => {
$.OR(us_rules || (us_rules = [ usings.push($.OR(us_rules || (us_rules = [
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Multiline); $.CONSUME(T.Multiline);
usings.push(UsingFlags.Multiline); return UsingFlags.Multiline;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Global); $.CONSUME(T.Global);
usings.push(UsingFlags.Global); return UsingFlags.Global;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.CaseInsensitive); $.CONSUME(T.CaseInsensitive);
usings.push(UsingFlags.Insensitive); return UsingFlags.Insensitive;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.CaseSensitive); $.CONSUME(T.CaseSensitive);
usings.push(UsingFlags.Sensitive); return UsingFlags.Sensitive;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Exact); $.CONSUME(T.Exact);
usings.push(UsingFlags.Exact); return UsingFlags.Exact;
}} }}
])); ])));
$.OPTION(() => $.CONSUME(T.Matching)); $.OPTION(() => $.CONSUME(T.Matching));
} }
}); });
@ -354,16 +377,11 @@ export class Human2RegexParser extends EmbeddedActionsParser {
}); });
const Statement = $.RULE("Statement", () => { const Statement = $.RULE("Statement", () => {
// eslint-disable-next-line init-declarations return $.OR([
let statement! : StatementCST; { ALT: () => $.SUBRULE(MatchStatement) },
{ ALT: () => $.SUBRULE(GroupStatement) },
$.OR([ { ALT: () => $.SUBRULE(RepeatStatement) }
{ ALT: () => statement = $.SUBRULE(MatchStatement) },
{ ALT: () => statement = $.SUBRULE(GroupStatement) },
{ ALT: () => statement = $.SUBRULE(RepeatStatement) }
]); ]);
return statement;
}); });
const Regex = $.RULE("Regex", () => { const Regex = $.RULE("Regex", () => {

View File

@ -10,6 +10,13 @@ export function combineFlags(a: number, b: number): number {
} }
/* eslint-enable no-bitwise */ /* eslint-enable no-bitwise */
export function isSingleRegexCharacter(char: string): boolean {
return (char.startsWith("\\u") && char.length === 6) ||
(char.startsWith("\\U") && char.length === 8) ||
(char.startsWith("\\") && char.length === 2) ||
char.length === 1;
}
export function last<T>(array: T[]) : T { export function last<T>(array: T[]) : T {
return array[array.length-1]; return array[array.length-1];
} }
@ -33,9 +40,9 @@ export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boole
} }
export function removeQuotes(input: string): string { export function removeQuotes(input: string): string {
return input.substring(1, input.length-2); return input.substring(1, input.length-1);
} }
export function regexEscape(input: string) : string { export function regexEscape(input: string) : string {
return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$])/, "\\$1"); return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
} }