1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 20:40:08 -07:00

Added additional information to semantic errors

This commit is contained in:
Patrick Demian 2020-10-31 01:06:56 -04:00
parent 6e42c7e921
commit 18d4c4418f
4 changed files with 280 additions and 174 deletions

View File

@ -1,7 +1,8 @@
/* eslint-disable @typescript-eslint/no-unused-vars */ /* eslint-disable @typescript-eslint/no-unused-vars */
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter } from "./utilities"; import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last } from "./utilities";
import { IToken } from "chevrotain";
export enum RobotLanguage { export enum RobotLanguage {
JS, JS,
@ -10,9 +11,34 @@ export enum RobotLanguage {
Java Java
} }
export interface ISemanticError {
startLine: number,
startColumn: number,
length: number,
message: string
}
export abstract class H2RCST { export abstract class H2RCST {
public abstract validate(language: RobotLanguage): Error[]; public tokens: IToken[];
constructor(tokens: IToken[]) {
this.tokens = tokens;
}
public abstract validate(language: RobotLanguage): ISemanticError[];
public abstract toRegex(language: RobotLanguage): string; public abstract toRegex(language: RobotLanguage): string;
protected error(message: string): ISemanticError {
const f = first(this.tokens);
const l = last(this.tokens);
return {
startLine: f.startLine ?? NaN,
startColumn: f.startColumn ?? NaN,
length: (l.endOffset ?? l.startOffset) - f.startOffset,
message: message
};
}
} }
/* eslint-disable no-bitwise */ /* eslint-disable no-bitwise */
@ -52,18 +78,16 @@ export class MatchStatementValue {
} }
} }
export abstract class StatementCST implements H2RCST { export abstract class StatementCST extends H2RCST {
public abstract validate(language: RobotLanguage): Error[];
public abstract toRegex(language: RobotLanguage): string;
} }
export class MatchSubStatementCST implements H2RCST { export class MatchSubStatementCST extends H2RCST {
constructor(public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) { constructor(public tokens: IToken[], public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
/* empty */ super(tokens);
} }
public validate(language: RobotLanguage): Error[] { public validate(language: RobotLanguage): ISemanticError[] {
let errors: Error[] = []; let errors: ISemanticError[] = [];
if (this.count) { if (this.count) {
errors = errors.concat(this.count.validate(language)); errors = errors.concat(this.count.validate(language));
@ -75,21 +99,21 @@ export class MatchSubStatementCST implements H2RCST {
let to = value.to as string; let to = value.to as string;
if (!isSingleRegexCharacter(from)) { if (!isSingleRegexCharacter(from)) {
errors.push(new Error("Between statement must begin with a single character")); errors.push(this.error("Between statement must begin with a single character"));
} }
else if (from.startsWith("\\u") || from.startsWith("\\U") || from.startsWith("\\")) { else if (from.startsWith("\\u") || from.startsWith("\\U") || from.startsWith("\\")) {
from = JSON.parse(`"${regexEscape(from)}"`); from = JSON.parse(`"${regexEscape(from)}"`);
} }
if (!isSingleRegexCharacter(to)) { if (!isSingleRegexCharacter(to)) {
errors.push(new Error("Between statement must end with a single character")); errors.push(this.error("Between statement must end with a single character"));
} }
else if (to.startsWith("\\u") || to.startsWith("\\U") || to.startsWith("\\")) { else if (to.startsWith("\\u") || to.startsWith("\\U") || to.startsWith("\\")) {
to = JSON.parse(`"${regexEscape(to)}"`); to = JSON.parse(`"${regexEscape(to)}"`);
} }
if (from.charCodeAt(0) >= to.charCodeAt(0)) { if (from.charCodeAt(0) >= to.charCodeAt(0)) {
errors.push(new Error("Between statement range invalid")); errors.push(this.error("Between statement range invalid"));
} }
} }
} }
@ -182,27 +206,29 @@ export class MatchSubStatementCST implements H2RCST {
} }
export class UsingStatementCST implements H2RCST { export class UsingStatementCST extends H2RCST {
constructor(public flags: UsingFlags[]) { constructor(public tokens: IToken[], public flags: UsingFlags[]) {
/* empty */ super(tokens);
} }
public validate(language: RobotLanguage): Error[] {
const errors: Error[] = []; public validate(language: RobotLanguage): ISemanticError[] {
const errors: ISemanticError[] = [];
let flag = this.flags[0]; let flag = this.flags[0];
for (let i = 1; i < this.flags.length; i++) { for (let i = 1; i < this.flags.length; i++) {
if (hasFlag(flag, this.flags[i])) { if (hasFlag(flag, this.flags[i])) {
errors.push(new Error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] )); errors.push(this.error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] ));
} }
flag = combineFlags(flag, this.flags[i]); flag = combineFlags(flag, this.flags[i]);
} }
if (hasFlag(flag, UsingFlags.Sensitive) && hasFlag(flag, UsingFlags.Insensitive)) { if (hasFlag(flag, UsingFlags.Sensitive) && hasFlag(flag, UsingFlags.Insensitive)) {
errors.push(new Error("Cannot be both case sensitive and insensitive")); errors.push(this.error("Cannot be both case sensitive and insensitive"));
} }
return errors; return errors;
} }
public toRegex(language: RobotLanguage): string { public toRegex(language: RobotLanguage): string {
let str = ""; let str = "";
let exact = false; let exact = false;
@ -226,19 +252,19 @@ export class UsingStatementCST implements H2RCST {
} }
} }
export class CountSubStatementCST implements H2RCST { export class CountSubStatementCST extends H2RCST {
constructor(public from: number, public to: number | null, public opt: "inclusive" | "exclusive" | "+" | null) { constructor(public tokens: IToken[], public from: number, public to: number | null = null, public opt: "inclusive" | "exclusive" | "+" | null = null) {
/* empty */ super(tokens);
} }
public validate(language: RobotLanguage): Error[] { public validate(language: RobotLanguage): ISemanticError[] {
const errors: Error[] = []; const errors: ISemanticError[] = [];
if (this.from < 0) { if (this.from < 0) {
errors.push(new Error("Value cannot be negative")); errors.push(this.error("Value cannot be negative"));
} }
else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) { else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
errors.push(new Error("Values must be in range of eachother")); errors.push(this.error("Values must be in range of eachother"));
} }
return errors; return errors;
@ -263,13 +289,13 @@ export class CountSubStatementCST implements H2RCST {
} }
} }
export class MatchStatementCST implements StatementCST { export class MatchStatementCST extends StatementCST {
constructor(public matches: MatchStatementValue[]) { constructor(public tokens: IToken[], public matches: MatchStatementValue[]) {
/* empty */ super(tokens);
} }
public validate(language: RobotLanguage): Error[] { public validate(language: RobotLanguage): ISemanticError[] {
let errors: Error[] = []; let errors: ISemanticError[] = [];
for (const match of this.matches) { for (const match of this.matches) {
errors = errors.concat(match.statement.validate(language)); errors = errors.concat(match.statement.validate(language));
@ -285,13 +311,13 @@ export class MatchStatementCST implements StatementCST {
} }
} }
export class RepeatStatementCST implements StatementCST { export class RepeatStatementCST extends StatementCST {
constructor(public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) { constructor(public tokens: IToken[], public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
/* empty */ super(tokens);
} }
public validate(language: RobotLanguage): Error[] { public validate(language: RobotLanguage): ISemanticError[] {
let errors: Error[] = []; let errors: ISemanticError[] = [];
if (this.count !== null) { if (this.count !== null) {
errors = errors.concat(this.count.validate(language)); errors = errors.concat(this.count.validate(language));
@ -335,16 +361,16 @@ export class RepeatStatementCST implements StatementCST {
} }
} }
export class GroupStatementCST implements StatementCST { export class GroupStatementCST extends StatementCST {
constructor(public optional: boolean, public name: string | null, public statements: StatementCST[]) { constructor(public tokens: IToken[], public optional: boolean, public name: string | null, public statements: StatementCST[]) {
/* empty */ super(tokens);
} }
public validate(language: RobotLanguage): Error[] { public validate(language: RobotLanguage): ISemanticError[] {
let errors : Error[] = []; let errors : ISemanticError[] = [];
if (language !== RobotLanguage.DotNet && language !== RobotLanguage.JS) { if (language !== RobotLanguage.DotNet && language !== RobotLanguage.JS) {
errors.push(new Error("This language does not support named groups")); errors.push(this.error("This language does not support named groups"));
} }
for (const statement of this.statements) { for (const statement of this.statements) {
@ -373,13 +399,13 @@ export class GroupStatementCST implements StatementCST {
} }
} }
export class RegularExpressionCST implements H2RCST { export class RegularExpressionCST extends H2RCST {
constructor(public usings: UsingStatementCST, public statements: StatementCST[]) { constructor(public tokens: IToken[], public usings: UsingStatementCST, public statements: StatementCST[]) {
/* empty */ super(tokens);
} }
public validate(language: RobotLanguage): Error[] { public validate(language: RobotLanguage): ISemanticError[] {
let errors: Error[] = this.usings.validate(language); let errors: ISemanticError[] = this.usings.validate(language);
for (const statement of this.statements) { for (const statement of this.statements) {
errors = errors.concat(statement.validate(language)); errors = errors.concat(statement.validate(language));

View File

@ -1,8 +1,9 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { EmbeddedActionsParser, IOrAlt } from "chevrotain"; import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
import * as T from "./tokens"; import * as T from "./tokens";
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator"; import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
import { first } from "./utilities";
export class Human2RegexParserOptions { export class Human2RegexParserOptions {
constructor(public skip_validations: boolean = false) { constructor(public skip_validations: boolean = false) {
@ -10,10 +11,21 @@ export class Human2RegexParserOptions {
} }
} }
class TokenAndValue<T> {
constructor(public token: IToken, public value: T) {
/* empty */
}
}
class TokensAndValue<T> {
constructor(public tokens: IToken[], public value: T) {
/* empty */
}
}
export class Human2RegexParser extends EmbeddedActionsParser { export class Human2RegexParser extends EmbeddedActionsParser {
private static already_init = false; private static already_init = false;
public parse : (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST; public parse: (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST;
constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) { constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations }); super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });
@ -26,116 +38,96 @@ export class Human2RegexParser extends EmbeddedActionsParser {
const $ = this; const $ = this;
let nss_rules : IOrAlt<number>[] | null = null; // IN REGARDS TO KEEPING TOKENS:
// We don't really need to keep each token, only the first and last tokens
// This is due to the fact we calculate the difference between those tokens
// However, sometimes we have optional starts and ends
// Each optional near the start and end MUST be recorded because they may be the first/last token
// ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match"
let nss_rules : IOrAlt<TokenAndValue<number>>[] | null = null;
const NumberSubStatement = $.RULE("NumberSubStatement", () => { const NumberSubStatement = $.RULE("NumberSubStatement", () => {
let value: number = 0; return $.OR(nss_rules || (nss_rules = [
{ ALT: () => new TokenAndValue($.CONSUME(T.Zero), 0) },
value = $.OR(nss_rules || (nss_rules = [ { ALT: () => new TokenAndValue($.CONSUME(T.One), 1) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Two), 2) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Three), 3) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Four), 4) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Five), 5) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Six), 6) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Seven), 7) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Eight), 8) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Nine), 9) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Ten), 10) },
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Zero); const tok = $.CONSUME(T.NumberLiteral);
return 0; return new TokenAndValue(tok, parseInt(tok.image));
}}, }}
{ ALT: () => {
$.CONSUME(T.One);
return 1;
}},
{ ALT: () => {
$.CONSUME(T.Two);
return 2;
}},
{ ALT: () => {
$.CONSUME(T.Three);
return 3;
}},
{ ALT: () => {
$.CONSUME(T.Four);
return 4;
}},
{ ALT: () => {
$.CONSUME(T.Five);
return 5;
}},
{ ALT: () => {
$.CONSUME(T.Six);
return 6;
}},
{ ALT: () => {
$.CONSUME(T.Seven);
return 7;
}},
{ ALT: () => {
$.CONSUME(T.Eight);
return 8;
}},
{ ALT: () => {
$.CONSUME(T.Nine);
return 9;
}},
{ ALT: () => {
$.CONSUME(T.Ten);
return 10;
}},
{ ALT: () => parseInt($.CONSUME(T.NumberLiteral).image) },
])); ]));
return value;
}); });
// 1, 1..2, between 1 and/to 2 inclusively/exclusively // 1, 1..2, between 1 and/to 2 inclusively/exclusively
const CountSubStatement = $.RULE("CountSubStatement", () => { const CountSubStatement = $.RULE("CountSubStatement", () => {
let from : number = 0; return $.OR([
let to: number | null = null;
let opt: "inclusive" | "exclusive" | "+" | null = null;
$.OR([
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Between); const tokens: IToken[] = [];
from = $.SUBRULE4(NumberSubStatement);
tokens.push($.CONSUME(T.Between));
const from = $.SUBRULE4(NumberSubStatement);
$.OR3([ $.OR3([
{ ALT: () => $.CONSUME2(T.To) }, { ALT: () => $.CONSUME2(T.To) },
{ ALT: () => $.CONSUME(T.And) } { ALT: () => $.CONSUME(T.And) }
]); ]);
to = $.SUBRULE5(NumberSubStatement); const to = $.SUBRULE5(NumberSubStatement);
$.OPTION4(() => $.CONSUME3(T.Times)); tokens.push(to.token);
$.OPTION5(() => { $.OPTION4(() => tokens.push($.CONSUME3(T.Times)));
$.OR4([ const opt = $.OPTION5(() => {
return $.OR4([
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Inclusive); tokens.push($.CONSUME(T.Inclusive));
opt = "inclusive"; return "inclusive";
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Exclusive); tokens.push($.CONSUME(T.Exclusive));
opt = "exclusive"; return "exclusive";
}} }}
]); ]);
}); });
return new CountSubStatementCST(tokens, from.value, to.value, opt as "inclusive" | "exclusive" | null);
}}, }},
{ ALT: () => { { ALT: () => {
$.OPTION2(() => $.CONSUME(T.From)); const tokens: IToken[] = [];
from = $.SUBRULE2(NumberSubStatement);
$.OR2([ $.OPTION2(() => tokens.push($.CONSUME(T.From)));
{ ALT: () => { const from = $.SUBRULE2(NumberSubStatement);
$.CONSUME(T.OrMore); const to = $.OR2([
opt = "+"; { ALT: () => new TokenAndValue($.CONSUME(T.OrMore), [ null, "+" ]) },
}},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.To); $.CONSUME(T.To);
to = $.SUBRULE3(NumberSubStatement); const val = $.SUBRULE3(NumberSubStatement);
return new TokenAndValue(val.token, [ val.value, null ]);
}} }}
]); ]);
$.OPTION3(() => $.CONSUME2(T.Times)); tokens.push(to.token);
$.OPTION3(() => tokens.push($.CONSUME2(T.Times)));
return new CountSubStatementCST(tokens, from.value, to.value ? to.value[0] : null, to.value ? to.value[1] : null);
}}, }},
{ ALT: () => { { ALT: () => {
$.OPTION(() => $.CONSUME(T.Exactly)); const tokens: IToken[] = [];
from = $.SUBRULE(NumberSubStatement); $.OPTION(() => tokens.push($.CONSUME(T.Exactly)));
$.OPTION6(() => $.CONSUME(T.Times)); const from = $.SUBRULE(NumberSubStatement);
tokens.push(from.token);
$.OPTION6(() => tokens.push($.CONSUME(T.Times)));
return new CountSubStatementCST(tokens, from.value);
}} }}
]); ]);
return new CountSubStatementCST(from, to, opt);
}); });
let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null; let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null;
@ -147,9 +139,19 @@ export class Human2RegexParser extends EmbeddedActionsParser {
let to : string | null = null; let to : string | null = null;
let type : MatchSubStatementType = MatchSubStatementType.Anything; let type : MatchSubStatementType = MatchSubStatementType.Anything;
count = $.OPTION(() => $.SUBRULE(CountSubStatement) ); const tokens: IToken[] = [];
count = $.OPTION(() => {
const css = $.SUBRULE(CountSubStatement);
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (css.tokens) {
tokens.push(first(css.tokens));
}
return css;
});
invert = $.OPTION2(() => { invert = $.OPTION2(() => {
$.CONSUME(T.Not); tokens.push($.CONSUME(T.Not));
return true; return true;
}); });
$.AT_LEAST_ONE_SEP({ $.AT_LEAST_ONE_SEP({
@ -161,7 +163,9 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.OPTION4(() => $.CONSUME(T.From)); $.OPTION4(() => $.CONSUME(T.From));
from = $.CONSUME2(T.StringLiteral).image; from = $.CONSUME2(T.StringLiteral).image;
$.CONSUME(T.To); $.CONSUME(T.To);
to = $.CONSUME3(T.StringLiteral).image; const token = $.CONSUME3(T.StringLiteral);
tokens.push(token);
to = token.image;
type = MatchSubStatementType.Between; type = MatchSubStatementType.Between;
return new MatchSubStatementValue(type, from, to); return new MatchSubStatementValue(type, from, to);
@ -170,73 +174,77 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.CONSUME(T.Between); $.CONSUME(T.Between);
from = $.CONSUME4(T.StringLiteral).image; from = $.CONSUME4(T.StringLiteral).image;
$.CONSUME(T.And); $.CONSUME(T.And);
to = $.CONSUME5(T.StringLiteral).image; const token = $.CONSUME5(T.StringLiteral);
to = token.image;
tokens.push(token);
type = MatchSubStatementType.Between; type = MatchSubStatementType.Between;
return new MatchSubStatementValue(type, from, to); return new MatchSubStatementValue(type, from, to);
}}, }},
{ ALT: () => { { ALT: () => {
from = $.CONSUME(T.StringLiteral).image; const token = $.CONSUME(T.StringLiteral);
tokens.push(token);
from = token.image;
type = MatchSubStatementType.SingleString; type = MatchSubStatementType.SingleString;
return new MatchSubStatementValue(type, from); return new MatchSubStatementValue(type, from);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Anything); tokens.push($.CONSUME(T.Anything));
type = MatchSubStatementType.Anything; type = MatchSubStatementType.Anything;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Word); tokens.push($.CONSUME(T.Word));
type = MatchSubStatementType.Word; type = MatchSubStatementType.Word;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Digit); tokens.push($.CONSUME(T.Digit));
type = MatchSubStatementType.Digit; type = MatchSubStatementType.Digit;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Character); tokens.push($.CONSUME(T.Character));
type = MatchSubStatementType.Character; type = MatchSubStatementType.Character;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Whitespace); tokens.push($.CONSUME(T.Whitespace));
type = MatchSubStatementType.Whitespace; type = MatchSubStatementType.Whitespace;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Number); tokens.push($.CONSUME(T.Number));
type = MatchSubStatementType.Number; type = MatchSubStatementType.Number;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Tab); tokens.push($.CONSUME(T.Tab));
type = MatchSubStatementType.Tab; type = MatchSubStatementType.Tab;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Linefeed); tokens.push($.CONSUME(T.Linefeed));
type = MatchSubStatementType.Linefeed; type = MatchSubStatementType.Linefeed;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Newline); tokens.push($.CONSUME(T.Newline));
type = MatchSubStatementType.Newline; type = MatchSubStatementType.Newline;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME(T.CarriageReturn); tokens.push($.CONSUME(T.CarriageReturn));
type = MatchSubStatementType.CarriageReturn; type = MatchSubStatementType.CarriageReturn;
return new MatchSubStatementValue(type); return new MatchSubStatementValue(type);
@ -245,19 +253,20 @@ export class Human2RegexParser extends EmbeddedActionsParser {
} }
}); });
return new MatchSubStatementCST(count, invert, values); return new MatchSubStatementCST(tokens, count, invert, values);
}); });
// optionally match "+" then 1+ words // optionally match "+" then 1+ words
const MatchStatement = $.RULE("MatchStatement", () => { const MatchStatement = $.RULE("MatchStatement", () => {
let optional = false; let optional = false;
const msv: MatchStatementValue[] = []; const msv: MatchStatementValue[] = [];
const tokens: IToken[] = [];
$.OPTION(() => { $.OPTION(() => {
$.CONSUME(T.Optional); tokens.push($.CONSUME(T.Optional));
optional = true; optional = true;
}); });
$.CONSUME(T.Match); tokens.push($.CONSUME(T.Match));
msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement))); msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
$.MANY(() => { $.MANY(() => {
$.OR([ $.OR([
@ -274,9 +283,9 @@ export class Human2RegexParser extends EmbeddedActionsParser {
}); });
msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement))); msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
}); });
$.CONSUME(T.EndOfLine); tokens.push($.CONSUME(T.EndOfLine));
return new MatchStatementCST(msv); return new MatchStatementCST(tokens, msv);
}); });
// using global matching // using global matching
@ -284,7 +293,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
const UsingStatement = $.RULE("UsingStatement", () => { const UsingStatement = $.RULE("UsingStatement", () => {
const usings: UsingFlags[] = []; const usings: UsingFlags[] = [];
$.CONSUME(T.Using); const tokens = [ $.CONSUME(T.Using) ];
$.AT_LEAST_ONE_SEP({ $.AT_LEAST_ONE_SEP({
SEP: T.And, SEP: T.And,
DEF: () => { DEF: () => {
@ -313,67 +322,75 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.OPTION(() => $.CONSUME(T.Matching)); $.OPTION(() => $.CONSUME(T.Matching));
} }
}); });
$.CONSUME(T.EndOfLine); tokens.push($.CONSUME(T.EndOfLine));
return usings; return new TokensAndValue(tokens, usings);
}); });
const GroupStatement = $.RULE("GroupStatement", () => { const GroupStatement = $.RULE("GroupStatement", () => {
const tokens: IToken[] = [];
let optional = false; let optional = false;
let name: string | null = null; let name: string | null = null;
const statement: StatementCST[] = []; const statement: StatementCST[] = [];
$.OR([ tokens.push($.OR([
{ ALT: () => { { ALT: () => {
optional = true; optional = true;
$.CONSUME(T.Optional); const first_token = $.CONSUME(T.Optional);
$.CONSUME(T.Create); $.CONSUME(T.Create);
$.CONSUME(T.A); $.CONSUME(T.A);
return first_token;
}}, }},
{ ALT: () => { { ALT: () => {
$.CONSUME2(T.Create); const first_token = $.CONSUME2(T.Create);
$.CONSUME2(T.A); $.CONSUME2(T.A);
$.OPTION2(() => { $.OPTION2(() => {
$.CONSUME2(T.Optional); $.CONSUME2(T.Optional);
optional = true; optional = true;
}); });
return first_token;
}} }}
]); ]));
$.CONSUME(T.Group); $.CONSUME(T.Group);
$.OPTION(() => { $.OPTION(() => {
$.CONSUME(T.Called); $.CONSUME(T.Called);
name = $.CONSUME(T.Identifier).image; name = $.CONSUME(T.Identifier).image;
}); });
$.CONSUME2(T.EndOfLine); // Note: Technically not the end token,
// BUT this is way more useful than the Outdent for error reporting
tokens.push($.CONSUME2(T.EndOfLine));
$.CONSUME(T.Indent); $.CONSUME(T.Indent);
$.AT_LEAST_ONE(() => { $.AT_LEAST_ONE(() => {
statement.push($.SUBRULE(Statement)); statement.push($.SUBRULE(Statement));
}); });
$.CONSUME(T.Outdent); $.CONSUME(T.Outdent);
return new GroupStatementCST(optional, name, statement); return new GroupStatementCST(tokens, optional, name, statement);
}); });
const RepeatStatement = $.RULE("RepeatStatement", () => { const RepeatStatement = $.RULE("RepeatStatement", () => {
const tokens: IToken[] = [];
let optional = false; let optional = false;
let count : CountSubStatementCST | null = null; let count : CountSubStatementCST | null = null;
const statements: StatementCST[] = []; const statements: StatementCST[] = [];
$.OPTION3(() => { $.OPTION3(() => {
$.CONSUME(T.Optional); tokens.push($.CONSUME(T.Optional));
optional = true; optional = true;
}); });
$.CONSUME(T.Repeat); tokens.push($.CONSUME(T.Repeat));
$.OPTION(() => count = $.SUBRULE(CountSubStatement)); $.OPTION(() => count = $.SUBRULE(CountSubStatement));
$.CONSUME3(T.EndOfLine); $.CONSUME3(T.EndOfLine);
$.CONSUME(T.Indent); $.CONSUME(T.Indent);
$.AT_LEAST_ONE(() => { $.AT_LEAST_ONE(() => {
statements.push($.SUBRULE(Statement)); statements.push($.SUBRULE(Statement));
}); });
$.CONSUME(T.Outdent); tokens.push($.CONSUME(T.Outdent));
return new RepeatStatementCST(optional, count, statements); return new RepeatStatementCST(tokens, optional, count, statements);
}); });
const Statement = $.RULE("Statement", () => { const Statement = $.RULE("Statement", () => {
@ -385,13 +402,18 @@ export class Human2RegexParser extends EmbeddedActionsParser {
}); });
const Regex = $.RULE("Regex", () => { const Regex = $.RULE("Regex", () => {
let tokens: IToken[] = [];
let usings: UsingFlags[] = []; let usings: UsingFlags[] = [];
const statements: StatementCST[] = []; const statements: StatementCST[] = [];
$.MANY(() => usings = usings.concat($.SUBRULE(UsingStatement))); $.MANY(() => {
const using = $.SUBRULE(UsingStatement);
tokens = tokens.concat(using.tokens);
usings = usings.concat(using.value);
});
$.MANY2(() => statements.push($.SUBRULE(Statement)) ); $.MANY2(() => statements.push($.SUBRULE(Statement)) );
return new RegularExpressionCST(new UsingStatementCST(usings), statements); return new RegularExpressionCST([], new UsingStatementCST(tokens, usings), statements);
}); });
this.performSelfAnalysis(); this.performSelfAnalysis();

View File

@ -5,6 +5,8 @@ import "./webpage/style.css";
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer"; import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
import { Human2RegexParser, Human2RegexParserOptions } from "./parser"; import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
import { RobotLanguage } from "./generator";
import { lexErrorToCommonError, parseErrorToCommonError, semanticErrorToCommonError, ICommonError } from "./utilities";
/* /*
$(function() { $(function() {
@ -12,6 +14,8 @@ $(function() {
}); });
*/ */
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(false)); const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(false));
const parser = new Human2RegexParser(new Human2RegexParserOptions(false)); const parser = new Human2RegexParser(new Human2RegexParserOptions(false));
@ -28,9 +32,9 @@ create an optional group called protocol
match "http" match "http"
optionally match "s" optionally match "s"
match "://" match "://"
create a group called subdomain create an optional group called subdomain
repeat repeat
match 1+ words match a word
match "." match "."
create a group called domain create a group called domain
match 1+ words or "_" or "-" match 1+ words or "_" or "-"
@ -57,9 +61,23 @@ create an optional group
`); `);
console.log(result.errors);
parser.input = result.tokens; const total_errors: ICommonError[] = [];
const regex = parser.parse();
console.log(JSON.stringify(regex, undefined, 4));
console.log(parser.errors); result.errors.map(lexErrorToCommonError).forEach((x) => total_errors.push(x));
if (total_errors.length === 0) {
parser.input = result.tokens;
const regex = parser.parse();
parser.errors.map(parseErrorToCommonError).forEach((x) => total_errors.push(x));
regex.validate(RobotLanguage.JS).map(semanticErrorToCommonError).forEach((x) => total_errors.push(x));
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (total_errors.length === 0) {
console.log(regex.toRegex(RobotLanguage.JS));
}
}
console.log("Errors = " + total_errors);

View File

@ -1,5 +1,8 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { ISemanticError } from "./generator";
import { IRecognitionException, ILexingError } from "chevrotain";
/* eslint-disable no-bitwise */ /* eslint-disable no-bitwise */
export function hasFlag(a: number, b: number) : boolean { export function hasFlag(a: number, b: number) : boolean {
return (a & b) !== 0; return (a & b) !== 0;
@ -17,11 +20,15 @@ export function isSingleRegexCharacter(char: string): boolean {
char.length === 1; char.length === 1;
} }
export function last<T>(array: T[]) : T { export function first<T>(array: T[]): T {
return array[0];
}
export function last<T>(array: T[]): T {
return array[array.length-1]; return array[array.length-1];
} }
export function findLastIndex<T>(array: T[], value: T) : number { export function findLastIndex<T>(array: T[], value: T): number {
for (let index = array.length-1; index >= 0; index--) { for (let index = array.length-1; index >= 0; index--) {
if (array[index] === value) { if (array[index] === value) {
return index; return index;
@ -46,3 +53,36 @@ export function removeQuotes(input: string): string {
export function regexEscape(input: string) : string { export function regexEscape(input: string) : string {
return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1"); return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
} }
export interface ICommonError {
type: string,
startLine: number,
startColumn: number,
length: number,
message: string
}
export function lexErrorToCommonError(error: ILexingError): ICommonError {
return {
type: "Lexer Error",
startLine: error.line,
startColumn: error.column,
length: error.length,
message: error.message
};
}
export function parseErrorToCommonError(error: IRecognitionException): ICommonError {
return {
type: "Parser Error",
startLine: error.token.startLine ?? NaN,
startColumn: error.token.startColumn ?? NaN,
length: error.token.endOffset ?? NaN - error.token.startOffset,
message: error.name + ": " + error.message,
};
}
export function semanticErrorToCommonError(error: ISemanticError): ICommonError {
(error as ICommonError).type = "Semantic Error";
return error as ICommonError;
}