1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-20 06:00:08 -07:00

Everything works now

This commit is contained in:
Patrick Demian 2020-10-30 22:34:41 -04:00
parent 9d28543c5b
commit 6e42c7e921
4 changed files with 122 additions and 69 deletions

14
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,7 @@
/* eslint-disable @typescript-eslint/no-unused-vars */
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { regexEscape, removeQuotes, hasFlag, combineFlags } from "./utilities";
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter } from "./utilities";
export enum RobotLanguage {
JS,
@ -25,7 +25,6 @@ export enum UsingFlags {
}
/* eslint-enable no-bitwise */
export enum MatchSubStatementType {
SingleString,
Between,
@ -42,7 +41,7 @@ export enum MatchSubStatementType {
}
export class MatchSubStatementValue {
constructor(public type: MatchSubStatementType, public from: string | null, public to: string | null) {
constructor(public type: MatchSubStatementType, public from: string | null = null, public to: string | null = null) {
/* empty */
}
}
@ -66,7 +65,7 @@ export class MatchSubStatementCST implements H2RCST {
public validate(language: RobotLanguage): Error[] {
let errors: Error[] = [];
if (this.count !== null) {
if (this.count) {
errors = errors.concat(this.count.validate(language));
}
@ -75,20 +74,14 @@ export class MatchSubStatementCST implements H2RCST {
let from = value.from as string;
let to = value.to as string;
if ((from.startsWith("\\u") && from.length !== 6) ||
(from.startsWith("\\U") && from.length !== 8) ||
(from.startsWith("\\") && from.length !== 2) ||
(from.length !== 1)) {
if (!isSingleRegexCharacter(from)) {
errors.push(new Error("Between statement must begin with a single character"));
}
else if (from.startsWith("\\u") || from.startsWith("\\U") || from.startsWith("\\")) {
from = JSON.parse(`"${regexEscape(from)}"`);
}
if ((to.startsWith("\\u") && to.length !== 6) ||
(to.startsWith("\\U") && to.length !== 8) ||
(to.startsWith("\\") && to.length !== 2) ||
(to.length !== 1)) {
if (!isSingleRegexCharacter(to)) {
errors.push(new Error("Between statement must end with a single character"));
}
else if (to.startsWith("\\u") || to.startsWith("\\U") || to.startsWith("\\")) {
@ -118,13 +111,13 @@ export class MatchSubStatementCST implements H2RCST {
str.push(this.invert ? `[^${value.from}-${value.to}]` : `[${value.from}-${value.to}]`);
break;
case MatchSubStatementType.Word:
str.push(this.invert ? "\\W" : "\\w");
str.push(this.invert ? "\\W+" : "\\w+");
break;
case MatchSubStatementType.Digit:
str.push(this.invert ? "\\D" : "\\d");
break;
case MatchSubStatementType.Character:
str.push(this.invert ? "[^a-zA-Z]" : "[a-zA-Z]");
str.push(this.invert ? "\\W" : "\\w");
break;
case MatchSubStatementType.Whitespace:
str.push(this.invert ? "\\S" : "\\s");
@ -149,7 +142,42 @@ export class MatchSubStatementCST implements H2RCST {
}
}
return "(?:" + str.join("|") + ")";
let ret = "";
if (str.length === 1) {
ret = str[0];
}
// we can use regex's [] for single chars, otherwise we need a group
else if (str.every(isSingleRegexCharacter)) {
ret = "[" + str.join("") + "]";
}
else {
//use a no-capture group
ret = "(?:" + str.join("|") + ")";
}
if (this.count) {
if (this.count.from === 1 && this.count.to === null) {
if (this.count.opt === "+") {
ret += "+";
}
// if we only have a count of 1, we can ignore adding any extra text
}
else if (this.count.from === 0 && this.count.to === null) {
if (this.count.opt === "+") {
ret += "*";
}
else {
// match 0 of anything? ok...
ret = "";
}
}
else {
ret += this.count.toRegex(language);
}
}
return ret;
}
}

View File

@ -1,6 +1,6 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { EmbeddedActionsParser, IOrAlt, } from "chevrotain";
import { EmbeddedActionsParser, IOrAlt } from "chevrotain";
import * as T from "./tokens";
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
@ -26,57 +26,57 @@ export class Human2RegexParser extends EmbeddedActionsParser {
const $ = this;
let nss_rules : IOrAlt<unknown>[] | null = null;
let nss_rules : IOrAlt<number>[] | null = null;
const NumberSubStatement = $.RULE("NumberSubStatement", () => {
let value: number = 0;
$.OR(nss_rules || (nss_rules = [
value = $.OR(nss_rules || (nss_rules = [
{ ALT: () => {
$.CONSUME(T.Zero);
value = 0;
return 0;
}},
{ ALT: () => {
$.CONSUME(T.One);
value = 1;
return 1;
}},
{ ALT: () => {
$.CONSUME(T.Two);
value = 2;
return 2;
}},
{ ALT: () => {
$.CONSUME(T.Three);
value = 3;
return 3;
}},
{ ALT: () => {
$.CONSUME(T.Four);
value = 4;
return 4;
}},
{ ALT: () => {
$.CONSUME(T.Five);
value = 5;
return 5;
}},
{ ALT: () => {
$.CONSUME(T.Six);
value = 6;
return 6;
}},
{ ALT: () => {
$.CONSUME(T.Seven);
value = 7;
return 7;
}},
{ ALT: () => {
$.CONSUME(T.Eight);
value = 8;
return 8;
}},
{ ALT: () => {
$.CONSUME(T.Nine);
value = 9;
return 9;
}},
{ ALT: () => {
$.CONSUME(T.Ten);
value = 10;
return 10;
}},
{ ALT: () => value = parseInt($.CONSUME(T.NumberLiteral).image) },
{ ALT: () => parseInt($.CONSUME(T.NumberLiteral).image) },
]));
return value;
@ -138,32 +138,33 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new CountSubStatementCST(from, to, opt);
});
let mss_rules : IOrAlt<unknown>[] | null = null;
let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null;
const MatchSubStatement = $.RULE("MatchSubStatement", () => {
let count: CountSubStatementCST | null = null;
let invert: boolean = false;
const values: MatchSubStatementValue[] = [];
$.OPTION(() => count = $.SUBRULE(CountSubStatement) );
$.OPTION2(() => {
$.CONSUME(T.Not);
invert = true;
});
$.AT_LEAST_ONE_SEP({
SEP: T.Or,
DEF: () => {
let from : string | null = null;
let to : string | null = null;
let type : MatchSubStatementType = MatchSubStatementType.Anything;
count = $.OPTION(() => $.SUBRULE(CountSubStatement) );
invert = $.OPTION2(() => {
$.CONSUME(T.Not);
return true;
});
$.AT_LEAST_ONE_SEP({
SEP: T.Or,
DEF: () => {
$.OPTION3(() => $.CONSUME(T.A));
$.OR(mss_rules || (mss_rules = [
values.push($.OR(mss_rules || (mss_rules = [
{ ALT: () => {
$.OPTION4(() => $.CONSUME(T.From));
from = $.CONSUME2(T.StringLiteral).image;
$.CONSUME(T.To);
to = $.CONSUME3(T.StringLiteral).image;
type = MatchSubStatementType.Between;
return new MatchSubStatementValue(type, from, to);
}},
{ ALT: () => {
$.CONSUME(T.Between);
@ -171,54 +172,76 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.CONSUME(T.And);
to = $.CONSUME5(T.StringLiteral).image;
type = MatchSubStatementType.Between;
return new MatchSubStatementValue(type, from, to);
}},
{ ALT: () => {
from = $.CONSUME(T.StringLiteral).image;
type = MatchSubStatementType.SingleString;
return new MatchSubStatementValue(type, from);
}},
{ ALT: () => {
$.CONSUME(T.Anything);
type = MatchSubStatementType.Anything;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Word);
type = MatchSubStatementType.Word;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Digit);
type = MatchSubStatementType.Digit;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Character);
type = MatchSubStatementType.Character;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Whitespace);
type = MatchSubStatementType.Whitespace;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Number);
type = MatchSubStatementType.Number;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Tab);
type = MatchSubStatementType.Tab;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Linefeed);
type = MatchSubStatementType.Linefeed;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Newline);
type = MatchSubStatementType.Newline;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.CarriageReturn);
type = MatchSubStatementType.CarriageReturn;
}},
]));
values.push(new MatchSubStatementValue(type, from, to));
return new MatchSubStatementValue(type);
}},
])));
}
});
@ -257,7 +280,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
});
// using global matching
let us_rules : IOrAlt<unknown>[] | null = null;
let us_rules : IOrAlt<UsingFlags>[] | null = null;
const UsingStatement = $.RULE("UsingStatement", () => {
const usings: UsingFlags[] = [];
@ -265,28 +288,28 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.AT_LEAST_ONE_SEP({
SEP: T.And,
DEF: () => {
$.OR(us_rules || (us_rules = [
usings.push($.OR(us_rules || (us_rules = [
{ ALT: () => {
$.CONSUME(T.Multiline);
usings.push(UsingFlags.Multiline);
return UsingFlags.Multiline;
}},
{ ALT: () => {
$.CONSUME(T.Global);
usings.push(UsingFlags.Global);
return UsingFlags.Global;
}},
{ ALT: () => {
$.CONSUME(T.CaseInsensitive);
usings.push(UsingFlags.Insensitive);
return UsingFlags.Insensitive;
}},
{ ALT: () => {
$.CONSUME(T.CaseSensitive);
usings.push(UsingFlags.Sensitive);
return UsingFlags.Sensitive;
}},
{ ALT: () => {
$.CONSUME(T.Exact);
usings.push(UsingFlags.Exact);
return UsingFlags.Exact;
}}
]));
])));
$.OPTION(() => $.CONSUME(T.Matching));
}
});
@ -354,16 +377,11 @@ export class Human2RegexParser extends EmbeddedActionsParser {
});
const Statement = $.RULE("Statement", () => {
// eslint-disable-next-line init-declarations
let statement! : StatementCST;
$.OR([
{ ALT: () => statement = $.SUBRULE(MatchStatement) },
{ ALT: () => statement = $.SUBRULE(GroupStatement) },
{ ALT: () => statement = $.SUBRULE(RepeatStatement) }
return $.OR([
{ ALT: () => $.SUBRULE(MatchStatement) },
{ ALT: () => $.SUBRULE(GroupStatement) },
{ ALT: () => $.SUBRULE(RepeatStatement) }
]);
return statement;
});
const Regex = $.RULE("Regex", () => {

View File

@ -10,6 +10,13 @@ export function combineFlags(a: number, b: number): number {
}
/* eslint-enable no-bitwise */
export function isSingleRegexCharacter(char: string): boolean {
return (char.startsWith("\\u") && char.length === 6) ||
(char.startsWith("\\U") && char.length === 8) ||
(char.startsWith("\\") && char.length === 2) ||
char.length === 1;
}
export function last<T>(array: T[]) : T {
return array[array.length-1];
}
@ -33,9 +40,9 @@ export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boole
}
export function removeQuotes(input: string): string {
return input.substring(1, input.length-2);
return input.substring(1, input.length-1);
}
export function regexEscape(input: string) : string {
return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$])/, "\\$1");
return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
}