1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 04:20:35 -07:00

Added additional information to semantic errors

This commit is contained in:
Patrick Demian 2020-10-31 01:06:56 -04:00
parent 6e42c7e921
commit 18d4c4418f
4 changed files with 280 additions and 174 deletions

View File

@ -1,7 +1,8 @@
/* eslint-disable @typescript-eslint/no-unused-vars */
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter } from "./utilities";
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last } from "./utilities";
import { IToken } from "chevrotain";
export enum RobotLanguage {
JS,
@ -10,9 +11,34 @@ export enum RobotLanguage {
Java
}
export interface ISemanticError {
startLine: number,
startColumn: number,
length: number,
message: string
}
export abstract class H2RCST {
public abstract validate(language: RobotLanguage): Error[];
public tokens: IToken[];
constructor(tokens: IToken[]) {
this.tokens = tokens;
}
public abstract validate(language: RobotLanguage): ISemanticError[];
public abstract toRegex(language: RobotLanguage): string;
protected error(message: string): ISemanticError {
const f = first(this.tokens);
const l = last(this.tokens);
return {
startLine: f.startLine ?? NaN,
startColumn: f.startColumn ?? NaN,
length: (l.endOffset ?? l.startOffset) - f.startOffset,
message: message
};
}
}
/* eslint-disable no-bitwise */
@ -52,18 +78,16 @@ export class MatchStatementValue {
}
}
export abstract class StatementCST implements H2RCST {
public abstract validate(language: RobotLanguage): Error[];
public abstract toRegex(language: RobotLanguage): string;
export abstract class StatementCST extends H2RCST {
}
export class MatchSubStatementCST implements H2RCST {
constructor(public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
/* empty */
export class MatchSubStatementCST extends H2RCST {
constructor(public tokens: IToken[], public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
super(tokens);
}
public validate(language: RobotLanguage): Error[] {
let errors: Error[] = [];
public validate(language: RobotLanguage): ISemanticError[] {
let errors: ISemanticError[] = [];
if (this.count) {
errors = errors.concat(this.count.validate(language));
@ -75,21 +99,21 @@ export class MatchSubStatementCST implements H2RCST {
let to = value.to as string;
if (!isSingleRegexCharacter(from)) {
errors.push(new Error("Between statement must begin with a single character"));
errors.push(this.error("Between statement must begin with a single character"));
}
else if (from.startsWith("\\u") || from.startsWith("\\U") || from.startsWith("\\")) {
from = JSON.parse(`"${regexEscape(from)}"`);
}
if (!isSingleRegexCharacter(to)) {
errors.push(new Error("Between statement must end with a single character"));
errors.push(this.error("Between statement must end with a single character"));
}
else if (to.startsWith("\\u") || to.startsWith("\\U") || to.startsWith("\\")) {
to = JSON.parse(`"${regexEscape(to)}"`);
}
if (from.charCodeAt(0) >= to.charCodeAt(0)) {
errors.push(new Error("Between statement range invalid"));
errors.push(this.error("Between statement range invalid"));
}
}
}
@ -182,27 +206,29 @@ export class MatchSubStatementCST implements H2RCST {
}
export class UsingStatementCST implements H2RCST {
constructor(public flags: UsingFlags[]) {
/* empty */
export class UsingStatementCST extends H2RCST {
constructor(public tokens: IToken[], public flags: UsingFlags[]) {
super(tokens);
}
public validate(language: RobotLanguage): Error[] {
const errors: Error[] = [];
public validate(language: RobotLanguage): ISemanticError[] {
const errors: ISemanticError[] = [];
let flag = this.flags[0];
for (let i = 1; i < this.flags.length; i++) {
if (hasFlag(flag, this.flags[i])) {
errors.push(new Error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] ));
errors.push(this.error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] ));
}
flag = combineFlags(flag, this.flags[i]);
}
if (hasFlag(flag, UsingFlags.Sensitive) && hasFlag(flag, UsingFlags.Insensitive)) {
errors.push(new Error("Cannot be both case sensitive and insensitive"));
errors.push(this.error("Cannot be both case sensitive and insensitive"));
}
return errors;
}
public toRegex(language: RobotLanguage): string {
let str = "";
let exact = false;
@ -226,19 +252,19 @@ export class UsingStatementCST implements H2RCST {
}
}
export class CountSubStatementCST implements H2RCST {
constructor(public from: number, public to: number | null, public opt: "inclusive" | "exclusive" | "+" | null) {
/* empty */
export class CountSubStatementCST extends H2RCST {
constructor(public tokens: IToken[], public from: number, public to: number | null = null, public opt: "inclusive" | "exclusive" | "+" | null = null) {
super(tokens);
}
public validate(language: RobotLanguage): Error[] {
const errors: Error[] = [];
public validate(language: RobotLanguage): ISemanticError[] {
const errors: ISemanticError[] = [];
if (this.from < 0) {
errors.push(new Error("Value cannot be negative"));
errors.push(this.error("Value cannot be negative"));
}
else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
errors.push(new Error("Values must be in range of eachother"));
errors.push(this.error("Values must be in range of eachother"));
}
return errors;
@ -263,13 +289,13 @@ export class CountSubStatementCST implements H2RCST {
}
}
export class MatchStatementCST implements StatementCST {
constructor(public matches: MatchStatementValue[]) {
/* empty */
export class MatchStatementCST extends StatementCST {
constructor(public tokens: IToken[], public matches: MatchStatementValue[]) {
super(tokens);
}
public validate(language: RobotLanguage): Error[] {
let errors: Error[] = [];
public validate(language: RobotLanguage): ISemanticError[] {
let errors: ISemanticError[] = [];
for (const match of this.matches) {
errors = errors.concat(match.statement.validate(language));
@ -285,13 +311,13 @@ export class MatchStatementCST implements StatementCST {
}
}
export class RepeatStatementCST implements StatementCST {
constructor(public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
/* empty */
export class RepeatStatementCST extends StatementCST {
constructor(public tokens: IToken[], public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
super(tokens);
}
public validate(language: RobotLanguage): Error[] {
let errors: Error[] = [];
public validate(language: RobotLanguage): ISemanticError[] {
let errors: ISemanticError[] = [];
if (this.count !== null) {
errors = errors.concat(this.count.validate(language));
@ -335,16 +361,16 @@ export class RepeatStatementCST implements StatementCST {
}
}
export class GroupStatementCST implements StatementCST {
constructor(public optional: boolean, public name: string | null, public statements: StatementCST[]) {
/* empty */
export class GroupStatementCST extends StatementCST {
constructor(public tokens: IToken[], public optional: boolean, public name: string | null, public statements: StatementCST[]) {
super(tokens);
}
public validate(language: RobotLanguage): Error[] {
let errors : Error[] = [];
public validate(language: RobotLanguage): ISemanticError[] {
let errors : ISemanticError[] = [];
if (language !== RobotLanguage.DotNet && language !== RobotLanguage.JS) {
errors.push(new Error("This language does not support named groups"));
errors.push(this.error("This language does not support named groups"));
}
for (const statement of this.statements) {
@ -373,13 +399,13 @@ export class GroupStatementCST implements StatementCST {
}
}
export class RegularExpressionCST implements H2RCST {
constructor(public usings: UsingStatementCST, public statements: StatementCST[]) {
/* empty */
export class RegularExpressionCST extends H2RCST {
constructor(public tokens: IToken[], public usings: UsingStatementCST, public statements: StatementCST[]) {
super(tokens);
}
public validate(language: RobotLanguage): Error[] {
let errors: Error[] = this.usings.validate(language);
public validate(language: RobotLanguage): ISemanticError[] {
let errors: ISemanticError[] = this.usings.validate(language);
for (const statement of this.statements) {
errors = errors.concat(statement.validate(language));

View File

@ -1,8 +1,9 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { EmbeddedActionsParser, IOrAlt } from "chevrotain";
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
import * as T from "./tokens";
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
import { first } from "./utilities";
export class Human2RegexParserOptions {
constructor(public skip_validations: boolean = false) {
@ -10,10 +11,21 @@ export class Human2RegexParserOptions {
}
}
class TokenAndValue<T> {
constructor(public token: IToken, public value: T) {
/* empty */
}
}
class TokensAndValue<T> {
constructor(public tokens: IToken[], public value: T) {
/* empty */
}
}
export class Human2RegexParser extends EmbeddedActionsParser {
private static already_init = false;
public parse : (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST;
public parse: (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST;
constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) {
super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations });
@ -26,116 +38,96 @@ export class Human2RegexParser extends EmbeddedActionsParser {
const $ = this;
let nss_rules : IOrAlt<number>[] | null = null;
// IN REGARDS TO KEEPING TOKENS:
// We don't really need to keep each token, only the first and last tokens
// This is due to the fact we calculate the difference between those tokens
// However, sometimes we have optional starts and ends
// Each optional near the start and end MUST be recorded because they may be the first/last token
// ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match"
let nss_rules : IOrAlt<TokenAndValue<number>>[] | null = null;
const NumberSubStatement = $.RULE("NumberSubStatement", () => {
let value: number = 0;
value = $.OR(nss_rules || (nss_rules = [
return $.OR(nss_rules || (nss_rules = [
{ ALT: () => new TokenAndValue($.CONSUME(T.Zero), 0) },
{ ALT: () => new TokenAndValue($.CONSUME(T.One), 1) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Two), 2) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Three), 3) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Four), 4) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Five), 5) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Six), 6) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Seven), 7) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Eight), 8) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Nine), 9) },
{ ALT: () => new TokenAndValue($.CONSUME(T.Ten), 10) },
{ ALT: () => {
$.CONSUME(T.Zero);
return 0;
}},
{ ALT: () => {
$.CONSUME(T.One);
return 1;
}},
{ ALT: () => {
$.CONSUME(T.Two);
return 2;
}},
{ ALT: () => {
$.CONSUME(T.Three);
return 3;
}},
{ ALT: () => {
$.CONSUME(T.Four);
return 4;
}},
{ ALT: () => {
$.CONSUME(T.Five);
return 5;
}},
{ ALT: () => {
$.CONSUME(T.Six);
return 6;
}},
{ ALT: () => {
$.CONSUME(T.Seven);
return 7;
}},
{ ALT: () => {
$.CONSUME(T.Eight);
return 8;
}},
{ ALT: () => {
$.CONSUME(T.Nine);
return 9;
}},
{ ALT: () => {
$.CONSUME(T.Ten);
return 10;
}},
{ ALT: () => parseInt($.CONSUME(T.NumberLiteral).image) },
const tok = $.CONSUME(T.NumberLiteral);
return new TokenAndValue(tok, parseInt(tok.image));
}}
]));
return value;
});
// 1, 1..2, between 1 and/to 2 inclusively/exclusively
const CountSubStatement = $.RULE("CountSubStatement", () => {
let from : number = 0;
let to: number | null = null;
let opt: "inclusive" | "exclusive" | "+" | null = null;
$.OR([
return $.OR([
{ ALT: () => {
$.CONSUME(T.Between);
from = $.SUBRULE4(NumberSubStatement);
const tokens: IToken[] = [];
tokens.push($.CONSUME(T.Between));
const from = $.SUBRULE4(NumberSubStatement);
$.OR3([
{ ALT: () => $.CONSUME2(T.To) },
{ ALT: () => $.CONSUME(T.And) }
]);
to = $.SUBRULE5(NumberSubStatement);
$.OPTION4(() => $.CONSUME3(T.Times));
$.OPTION5(() => {
$.OR4([
const to = $.SUBRULE5(NumberSubStatement);
tokens.push(to.token);
$.OPTION4(() => tokens.push($.CONSUME3(T.Times)));
const opt = $.OPTION5(() => {
return $.OR4([
{ ALT: () => {
$.CONSUME(T.Inclusive);
opt = "inclusive";
tokens.push($.CONSUME(T.Inclusive));
return "inclusive";
}},
{ ALT: () => {
$.CONSUME(T.Exclusive);
opt = "exclusive";
tokens.push($.CONSUME(T.Exclusive));
return "exclusive";
}}
]);
});
return new CountSubStatementCST(tokens, from.value, to.value, opt as "inclusive" | "exclusive" | null);
}},
{ ALT: () => {
$.OPTION2(() => $.CONSUME(T.From));
from = $.SUBRULE2(NumberSubStatement);
$.OR2([
{ ALT: () => {
$.CONSUME(T.OrMore);
opt = "+";
}},
{ ALT: () => {
const tokens: IToken[] = [];
$.OPTION2(() => tokens.push($.CONSUME(T.From)));
const from = $.SUBRULE2(NumberSubStatement);
const to = $.OR2([
{ ALT: () => new TokenAndValue($.CONSUME(T.OrMore), [ null, "+" ]) },
{ ALT: () => {
$.CONSUME(T.To);
to = $.SUBRULE3(NumberSubStatement);
const val = $.SUBRULE3(NumberSubStatement);
return new TokenAndValue(val.token, [ val.value, null ]);
}}
]);
$.OPTION3(() => $.CONSUME2(T.Times));
tokens.push(to.token);
$.OPTION3(() => tokens.push($.CONSUME2(T.Times)));
return new CountSubStatementCST(tokens, from.value, to.value ? to.value[0] : null, to.value ? to.value[1] : null);
}},
{ ALT: () => {
$.OPTION(() => $.CONSUME(T.Exactly));
from = $.SUBRULE(NumberSubStatement);
$.OPTION6(() => $.CONSUME(T.Times));
const tokens: IToken[] = [];
$.OPTION(() => tokens.push($.CONSUME(T.Exactly)));
const from = $.SUBRULE(NumberSubStatement);
tokens.push(from.token);
$.OPTION6(() => tokens.push($.CONSUME(T.Times)));
return new CountSubStatementCST(tokens, from.value);
}}
]);
return new CountSubStatementCST(from, to, opt);
});
let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null;
@ -147,9 +139,19 @@ export class Human2RegexParser extends EmbeddedActionsParser {
let to : string | null = null;
let type : MatchSubStatementType = MatchSubStatementType.Anything;
count = $.OPTION(() => $.SUBRULE(CountSubStatement) );
const tokens: IToken[] = [];
count = $.OPTION(() => {
const css = $.SUBRULE(CountSubStatement);
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (css.tokens) {
tokens.push(first(css.tokens));
}
return css;
});
invert = $.OPTION2(() => {
$.CONSUME(T.Not);
tokens.push($.CONSUME(T.Not));
return true;
});
$.AT_LEAST_ONE_SEP({
@ -161,7 +163,9 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.OPTION4(() => $.CONSUME(T.From));
from = $.CONSUME2(T.StringLiteral).image;
$.CONSUME(T.To);
to = $.CONSUME3(T.StringLiteral).image;
const token = $.CONSUME3(T.StringLiteral);
tokens.push(token);
to = token.image;
type = MatchSubStatementType.Between;
return new MatchSubStatementValue(type, from, to);
@ -170,73 +174,77 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.CONSUME(T.Between);
from = $.CONSUME4(T.StringLiteral).image;
$.CONSUME(T.And);
to = $.CONSUME5(T.StringLiteral).image;
const token = $.CONSUME5(T.StringLiteral);
to = token.image;
tokens.push(token);
type = MatchSubStatementType.Between;
return new MatchSubStatementValue(type, from, to);
}},
{ ALT: () => {
from = $.CONSUME(T.StringLiteral).image;
const token = $.CONSUME(T.StringLiteral);
tokens.push(token);
from = token.image;
type = MatchSubStatementType.SingleString;
return new MatchSubStatementValue(type, from);
}},
{ ALT: () => {
$.CONSUME(T.Anything);
tokens.push($.CONSUME(T.Anything));
type = MatchSubStatementType.Anything;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Word);
tokens.push($.CONSUME(T.Word));
type = MatchSubStatementType.Word;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Digit);
tokens.push($.CONSUME(T.Digit));
type = MatchSubStatementType.Digit;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Character);
tokens.push($.CONSUME(T.Character));
type = MatchSubStatementType.Character;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Whitespace);
tokens.push($.CONSUME(T.Whitespace));
type = MatchSubStatementType.Whitespace;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Number);
tokens.push($.CONSUME(T.Number));
type = MatchSubStatementType.Number;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Tab);
tokens.push($.CONSUME(T.Tab));
type = MatchSubStatementType.Tab;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Linefeed);
tokens.push($.CONSUME(T.Linefeed));
type = MatchSubStatementType.Linefeed;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.Newline);
tokens.push($.CONSUME(T.Newline));
type = MatchSubStatementType.Newline;
return new MatchSubStatementValue(type);
}},
{ ALT: () => {
$.CONSUME(T.CarriageReturn);
tokens.push($.CONSUME(T.CarriageReturn));
type = MatchSubStatementType.CarriageReturn;
return new MatchSubStatementValue(type);
@ -245,19 +253,20 @@ export class Human2RegexParser extends EmbeddedActionsParser {
}
});
return new MatchSubStatementCST(count, invert, values);
return new MatchSubStatementCST(tokens, count, invert, values);
});
// optionally match "+" then 1+ words
const MatchStatement = $.RULE("MatchStatement", () => {
let optional = false;
const msv: MatchStatementValue[] = [];
const tokens: IToken[] = [];
$.OPTION(() => {
$.CONSUME(T.Optional);
tokens.push($.CONSUME(T.Optional));
optional = true;
});
$.CONSUME(T.Match);
tokens.push($.CONSUME(T.Match));
msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
$.MANY(() => {
$.OR([
@ -274,9 +283,9 @@ export class Human2RegexParser extends EmbeddedActionsParser {
});
msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
});
$.CONSUME(T.EndOfLine);
tokens.push($.CONSUME(T.EndOfLine));
return new MatchStatementCST(msv);
return new MatchStatementCST(tokens, msv);
});
// using global matching
@ -284,7 +293,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
const UsingStatement = $.RULE("UsingStatement", () => {
const usings: UsingFlags[] = [];
$.CONSUME(T.Using);
const tokens = [ $.CONSUME(T.Using) ];
$.AT_LEAST_ONE_SEP({
SEP: T.And,
DEF: () => {
@ -313,67 +322,75 @@ export class Human2RegexParser extends EmbeddedActionsParser {
$.OPTION(() => $.CONSUME(T.Matching));
}
});
$.CONSUME(T.EndOfLine);
tokens.push($.CONSUME(T.EndOfLine));
return usings;
return new TokensAndValue(tokens, usings);
});
const GroupStatement = $.RULE("GroupStatement", () => {
const tokens: IToken[] = [];
let optional = false;
let name: string | null = null;
const statement: StatementCST[] = [];
$.OR([
tokens.push($.OR([
{ ALT: () => {
optional = true;
$.CONSUME(T.Optional);
const first_token = $.CONSUME(T.Optional);
$.CONSUME(T.Create);
$.CONSUME(T.A);
return first_token;
}},
{ ALT: () => {
$.CONSUME2(T.Create);
const first_token = $.CONSUME2(T.Create);
$.CONSUME2(T.A);
$.OPTION2(() => {
$.CONSUME2(T.Optional);
optional = true;
});
return first_token;
}}
]);
]));
$.CONSUME(T.Group);
$.OPTION(() => {
$.CONSUME(T.Called);
name = $.CONSUME(T.Identifier).image;
});
$.CONSUME2(T.EndOfLine);
// Note: Technically not the end token,
// BUT this is way more useful than the Outdent for error reporting
tokens.push($.CONSUME2(T.EndOfLine));
$.CONSUME(T.Indent);
$.AT_LEAST_ONE(() => {
statement.push($.SUBRULE(Statement));
});
$.CONSUME(T.Outdent);
return new GroupStatementCST(optional, name, statement);
return new GroupStatementCST(tokens, optional, name, statement);
});
const RepeatStatement = $.RULE("RepeatStatement", () => {
const tokens: IToken[] = [];
let optional = false;
let count : CountSubStatementCST | null = null;
const statements: StatementCST[] = [];
$.OPTION3(() => {
$.CONSUME(T.Optional);
tokens.push($.CONSUME(T.Optional));
optional = true;
});
$.CONSUME(T.Repeat);
tokens.push($.CONSUME(T.Repeat));
$.OPTION(() => count = $.SUBRULE(CountSubStatement));
$.CONSUME3(T.EndOfLine);
$.CONSUME(T.Indent);
$.AT_LEAST_ONE(() => {
statements.push($.SUBRULE(Statement));
});
$.CONSUME(T.Outdent);
tokens.push($.CONSUME(T.Outdent));
return new RepeatStatementCST(optional, count, statements);
return new RepeatStatementCST(tokens, optional, count, statements);
});
const Statement = $.RULE("Statement", () => {
@ -385,13 +402,18 @@ export class Human2RegexParser extends EmbeddedActionsParser {
});
const Regex = $.RULE("Regex", () => {
let tokens: IToken[] = [];
let usings: UsingFlags[] = [];
const statements: StatementCST[] = [];
$.MANY(() => usings = usings.concat($.SUBRULE(UsingStatement)));
$.MANY(() => {
const using = $.SUBRULE(UsingStatement);
tokens = tokens.concat(using.tokens);
usings = usings.concat(using.value);
});
$.MANY2(() => statements.push($.SUBRULE(Statement)) );
return new RegularExpressionCST(new UsingStatementCST(usings), statements);
return new RegularExpressionCST([], new UsingStatementCST(tokens, usings), statements);
});
this.performSelfAnalysis();

View File

@ -5,6 +5,8 @@ import "./webpage/style.css";
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
import { RobotLanguage } from "./generator";
import { lexErrorToCommonError, parseErrorToCommonError, semanticErrorToCommonError, ICommonError } from "./utilities";
/*
$(function() {
@ -12,6 +14,8 @@ $(function() {
});
*/
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(false));
const parser = new Human2RegexParser(new Human2RegexParserOptions(false));
@ -28,9 +32,9 @@ create an optional group called protocol
match "http"
optionally match "s"
match "://"
create a group called subdomain
create an optional group called subdomain
repeat
match 1+ words
match a word
match "."
create a group called domain
match 1+ words or "_" or "-"
@ -57,9 +61,23 @@ create an optional group
`);
console.log(result.errors);
parser.input = result.tokens;
const regex = parser.parse();
console.log(JSON.stringify(regex, undefined, 4));
console.log(parser.errors);
const total_errors: ICommonError[] = [];
result.errors.map(lexErrorToCommonError).forEach((x) => total_errors.push(x));
if (total_errors.length === 0) {
parser.input = result.tokens;
const regex = parser.parse();
parser.errors.map(parseErrorToCommonError).forEach((x) => total_errors.push(x));
regex.validate(RobotLanguage.JS).map(semanticErrorToCommonError).forEach((x) => total_errors.push(x));
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (total_errors.length === 0) {
console.log(regex.toRegex(RobotLanguage.JS));
}
}
console.log("Errors = " + total_errors);

View File

@ -1,5 +1,8 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { ISemanticError } from "./generator";
import { IRecognitionException, ILexingError } from "chevrotain";
/* eslint-disable no-bitwise */
export function hasFlag(a: number, b: number) : boolean {
return (a & b) !== 0;
@ -17,11 +20,15 @@ export function isSingleRegexCharacter(char: string): boolean {
char.length === 1;
}
export function last<T>(array: T[]) : T {
export function first<T>(array: T[]): T {
return array[0];
}
export function last<T>(array: T[]): T {
return array[array.length-1];
}
export function findLastIndex<T>(array: T[], value: T) : number {
export function findLastIndex<T>(array: T[], value: T): number {
for (let index = array.length-1; index >= 0; index--) {
if (array[index] === value) {
return index;
@ -46,3 +53,36 @@ export function removeQuotes(input: string): string {
export function regexEscape(input: string) : string {
return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
}
export interface ICommonError {
type: string,
startLine: number,
startColumn: number,
length: number,
message: string
}
export function lexErrorToCommonError(error: ILexingError): ICommonError {
return {
type: "Lexer Error",
startLine: error.line,
startColumn: error.column,
length: error.length,
message: error.message
};
}
export function parseErrorToCommonError(error: IRecognitionException): ICommonError {
return {
type: "Parser Error",
startLine: error.token.startLine ?? NaN,
startColumn: error.token.startColumn ?? NaN,
length: error.token.endOffset ?? NaN - error.token.startOffset,
message: error.name + ": " + error.message,
};
}
export function semanticErrorToCommonError(error: ISemanticError): ICommonError {
(error as ICommonError).type = "Semantic Error";
return error as ICommonError;
}