From 18d4c4418f561aecf0a237b5197980022e29c023 Mon Sep 17 00:00:00 2001 From: Patrick Demian Date: Sat, 31 Oct 2020 01:06:56 -0400 Subject: [PATCH] Added additional information to semantic errors --- src/generator.ts | 122 +++++++++++++--------- src/parser.ts | 256 +++++++++++++++++++++++++---------------------- src/script.ts | 32 ++++-- src/utilities.ts | 44 +++++++- 4 files changed, 280 insertions(+), 174 deletions(-) diff --git a/src/generator.ts b/src/generator.ts index 5185d5f..cd92e71 100644 --- a/src/generator.ts +++ b/src/generator.ts @@ -1,7 +1,8 @@ /* eslint-disable @typescript-eslint/no-unused-vars */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ -import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter } from "./utilities"; +import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last } from "./utilities"; +import { IToken } from "chevrotain"; export enum RobotLanguage { JS, @@ -10,9 +11,34 @@ export enum RobotLanguage { Java } +export interface ISemanticError { + startLine: number, + startColumn: number, + length: number, + message: string +} + export abstract class H2RCST { - public abstract validate(language: RobotLanguage): Error[]; + public tokens: IToken[]; + + constructor(tokens: IToken[]) { + this.tokens = tokens; + } + + public abstract validate(language: RobotLanguage): ISemanticError[]; public abstract toRegex(language: RobotLanguage): string; + + protected error(message: string): ISemanticError { + const f = first(this.tokens); + const l = last(this.tokens); + + return { + startLine: f.startLine ?? NaN, + startColumn: f.startColumn ?? NaN, + length: (l.endOffset ?? l.startOffset) - f.startOffset, + message: message + }; + } } /* eslint-disable no-bitwise */ @@ -52,18 +78,16 @@ export class MatchStatementValue { } } -export abstract class StatementCST implements H2RCST { - public abstract validate(language: RobotLanguage): Error[]; - public abstract toRegex(language: RobotLanguage): string; +export abstract class StatementCST extends H2RCST { } -export class MatchSubStatementCST implements H2RCST { - constructor(public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) { - /* empty */ +export class MatchSubStatementCST extends H2RCST { + constructor(public tokens: IToken[], public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) { + super(tokens); } - public validate(language: RobotLanguage): Error[] { - let errors: Error[] = []; + public validate(language: RobotLanguage): ISemanticError[] { + let errors: ISemanticError[] = []; if (this.count) { errors = errors.concat(this.count.validate(language)); @@ -75,21 +99,21 @@ export class MatchSubStatementCST implements H2RCST { let to = value.to as string; if (!isSingleRegexCharacter(from)) { - errors.push(new Error("Between statement must begin with a single character")); + errors.push(this.error("Between statement must begin with a single character")); } else if (from.startsWith("\\u") || from.startsWith("\\U") || from.startsWith("\\")) { from = JSON.parse(`"${regexEscape(from)}"`); } if (!isSingleRegexCharacter(to)) { - errors.push(new Error("Between statement must end with a single character")); + errors.push(this.error("Between statement must end with a single character")); } else if (to.startsWith("\\u") || to.startsWith("\\U") || to.startsWith("\\")) { to = JSON.parse(`"${regexEscape(to)}"`); } if (from.charCodeAt(0) >= to.charCodeAt(0)) { - errors.push(new Error("Between statement range invalid")); + errors.push(this.error("Between statement range invalid")); } } } @@ -182,27 +206,29 @@ export class MatchSubStatementCST implements H2RCST { } -export class UsingStatementCST implements H2RCST { - constructor(public flags: UsingFlags[]) { - /* empty */ +export class UsingStatementCST extends H2RCST { + constructor(public tokens: IToken[], public flags: UsingFlags[]) { + super(tokens); } - public validate(language: RobotLanguage): Error[] { - const errors: Error[] = []; + + public validate(language: RobotLanguage): ISemanticError[] { + const errors: ISemanticError[] = []; let flag = this.flags[0]; for (let i = 1; i < this.flags.length; i++) { if (hasFlag(flag, this.flags[i])) { - errors.push(new Error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] )); + errors.push(this.error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] )); } flag = combineFlags(flag, this.flags[i]); } if (hasFlag(flag, UsingFlags.Sensitive) && hasFlag(flag, UsingFlags.Insensitive)) { - errors.push(new Error("Cannot be both case sensitive and insensitive")); + errors.push(this.error("Cannot be both case sensitive and insensitive")); } return errors; } + public toRegex(language: RobotLanguage): string { let str = ""; let exact = false; @@ -226,19 +252,19 @@ export class UsingStatementCST implements H2RCST { } } -export class CountSubStatementCST implements H2RCST { - constructor(public from: number, public to: number | null, public opt: "inclusive" | "exclusive" | "+" | null) { - /* empty */ +export class CountSubStatementCST extends H2RCST { + constructor(public tokens: IToken[], public from: number, public to: number | null = null, public opt: "inclusive" | "exclusive" | "+" | null = null) { + super(tokens); } - public validate(language: RobotLanguage): Error[] { - const errors: Error[] = []; + public validate(language: RobotLanguage): ISemanticError[] { + const errors: ISemanticError[] = []; if (this.from < 0) { - errors.push(new Error("Value cannot be negative")); + errors.push(this.error("Value cannot be negative")); } else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) { - errors.push(new Error("Values must be in range of eachother")); + errors.push(this.error("Values must be in range of eachother")); } return errors; @@ -263,13 +289,13 @@ export class CountSubStatementCST implements H2RCST { } } -export class MatchStatementCST implements StatementCST { - constructor(public matches: MatchStatementValue[]) { - /* empty */ +export class MatchStatementCST extends StatementCST { + constructor(public tokens: IToken[], public matches: MatchStatementValue[]) { + super(tokens); } - public validate(language: RobotLanguage): Error[] { - let errors: Error[] = []; + public validate(language: RobotLanguage): ISemanticError[] { + let errors: ISemanticError[] = []; for (const match of this.matches) { errors = errors.concat(match.statement.validate(language)); @@ -285,13 +311,13 @@ export class MatchStatementCST implements StatementCST { } } -export class RepeatStatementCST implements StatementCST { - constructor(public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) { - /* empty */ +export class RepeatStatementCST extends StatementCST { + constructor(public tokens: IToken[], public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) { + super(tokens); } - public validate(language: RobotLanguage): Error[] { - let errors: Error[] = []; + public validate(language: RobotLanguage): ISemanticError[] { + let errors: ISemanticError[] = []; if (this.count !== null) { errors = errors.concat(this.count.validate(language)); @@ -335,16 +361,16 @@ export class RepeatStatementCST implements StatementCST { } } -export class GroupStatementCST implements StatementCST { - constructor(public optional: boolean, public name: string | null, public statements: StatementCST[]) { - /* empty */ +export class GroupStatementCST extends StatementCST { + constructor(public tokens: IToken[], public optional: boolean, public name: string | null, public statements: StatementCST[]) { + super(tokens); } - public validate(language: RobotLanguage): Error[] { - let errors : Error[] = []; + public validate(language: RobotLanguage): ISemanticError[] { + let errors : ISemanticError[] = []; if (language !== RobotLanguage.DotNet && language !== RobotLanguage.JS) { - errors.push(new Error("This language does not support named groups")); + errors.push(this.error("This language does not support named groups")); } for (const statement of this.statements) { @@ -373,13 +399,13 @@ export class GroupStatementCST implements StatementCST { } } -export class RegularExpressionCST implements H2RCST { - constructor(public usings: UsingStatementCST, public statements: StatementCST[]) { - /* empty */ +export class RegularExpressionCST extends H2RCST { + constructor(public tokens: IToken[], public usings: UsingStatementCST, public statements: StatementCST[]) { + super(tokens); } - public validate(language: RobotLanguage): Error[] { - let errors: Error[] = this.usings.validate(language); + public validate(language: RobotLanguage): ISemanticError[] { + let errors: ISemanticError[] = this.usings.validate(language); for (const statement of this.statements) { errors = errors.concat(statement.validate(language)); diff --git a/src/parser.ts b/src/parser.ts index 646bb35..f4cebd2 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,8 +1,9 @@ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ -import { EmbeddedActionsParser, IOrAlt } from "chevrotain"; +import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain"; import * as T from "./tokens"; import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator"; +import { first } from "./utilities"; export class Human2RegexParserOptions { constructor(public skip_validations: boolean = false) { @@ -10,10 +11,21 @@ export class Human2RegexParserOptions { } } +class TokenAndValue { + constructor(public token: IToken, public value: T) { + /* empty */ + } +} +class TokensAndValue { + constructor(public tokens: IToken[], public value: T) { + /* empty */ + } +} + export class Human2RegexParser extends EmbeddedActionsParser { private static already_init = false; - public parse : (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST; + public parse: (idxInCallingRule?: number, ...args: unknown[]) => RegularExpressionCST; constructor(private options: Human2RegexParserOptions = new Human2RegexParserOptions()) { super(T.AllTokens, { recoveryEnabled: false, maxLookahead: 2, skipValidations: options.skip_validations }); @@ -26,116 +38,96 @@ export class Human2RegexParser extends EmbeddedActionsParser { const $ = this; - let nss_rules : IOrAlt[] | null = null; + // IN REGARDS TO KEEPING TOKENS: + // We don't really need to keep each token, only the first and last tokens + // This is due to the fact we calculate the difference between those tokens + // However, sometimes we have optional starts and ends + // Each optional near the start and end MUST be recorded because they may be the first/last token + // ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match" + + let nss_rules : IOrAlt>[] | null = null; const NumberSubStatement = $.RULE("NumberSubStatement", () => { - let value: number = 0; - - value = $.OR(nss_rules || (nss_rules = [ + return $.OR(nss_rules || (nss_rules = [ + { ALT: () => new TokenAndValue($.CONSUME(T.Zero), 0) }, + { ALT: () => new TokenAndValue($.CONSUME(T.One), 1) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Two), 2) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Three), 3) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Four), 4) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Five), 5) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Six), 6) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Seven), 7) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Eight), 8) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Nine), 9) }, + { ALT: () => new TokenAndValue($.CONSUME(T.Ten), 10) }, { ALT: () => { - $.CONSUME(T.Zero); - return 0; - }}, - { ALT: () => { - $.CONSUME(T.One); - return 1; - }}, - { ALT: () => { - $.CONSUME(T.Two); - return 2; - }}, - { ALT: () => { - $.CONSUME(T.Three); - return 3; - }}, - { ALT: () => { - $.CONSUME(T.Four); - return 4; - }}, - { ALT: () => { - $.CONSUME(T.Five); - return 5; - }}, - { ALT: () => { - $.CONSUME(T.Six); - return 6; - }}, - { ALT: () => { - $.CONSUME(T.Seven); - return 7; - }}, - { ALT: () => { - $.CONSUME(T.Eight); - return 8; - }}, - { ALT: () => { - $.CONSUME(T.Nine); - return 9; - }}, - { ALT: () => { - $.CONSUME(T.Ten); - return 10; - }}, - - { ALT: () => parseInt($.CONSUME(T.NumberLiteral).image) }, + const tok = $.CONSUME(T.NumberLiteral); + return new TokenAndValue(tok, parseInt(tok.image)); + }} ])); - - return value; }); // 1, 1..2, between 1 and/to 2 inclusively/exclusively const CountSubStatement = $.RULE("CountSubStatement", () => { - let from : number = 0; - let to: number | null = null; - let opt: "inclusive" | "exclusive" | "+" | null = null; - - $.OR([ + return $.OR([ { ALT: () => { - $.CONSUME(T.Between); - from = $.SUBRULE4(NumberSubStatement); + const tokens: IToken[] = []; + + tokens.push($.CONSUME(T.Between)); + const from = $.SUBRULE4(NumberSubStatement); $.OR3([ { ALT: () => $.CONSUME2(T.To) }, { ALT: () => $.CONSUME(T.And) } ]); - to = $.SUBRULE5(NumberSubStatement); - $.OPTION4(() => $.CONSUME3(T.Times)); - $.OPTION5(() => { - $.OR4([ + const to = $.SUBRULE5(NumberSubStatement); + tokens.push(to.token); + $.OPTION4(() => tokens.push($.CONSUME3(T.Times))); + const opt = $.OPTION5(() => { + return $.OR4([ { ALT: () => { - $.CONSUME(T.Inclusive); - opt = "inclusive"; + tokens.push($.CONSUME(T.Inclusive)); + return "inclusive"; }}, { ALT: () => { - $.CONSUME(T.Exclusive); - opt = "exclusive"; + tokens.push($.CONSUME(T.Exclusive)); + return "exclusive"; }} ]); }); + + return new CountSubStatementCST(tokens, from.value, to.value, opt as "inclusive" | "exclusive" | null); }}, - { ALT: () => { - $.OPTION2(() => $.CONSUME(T.From)); - from = $.SUBRULE2(NumberSubStatement); - $.OR2([ - { ALT: () => { - $.CONSUME(T.OrMore); - opt = "+"; - }}, + { ALT: () => { + const tokens: IToken[] = []; + + $.OPTION2(() => tokens.push($.CONSUME(T.From))); + const from = $.SUBRULE2(NumberSubStatement); + const to = $.OR2([ + { ALT: () => new TokenAndValue($.CONSUME(T.OrMore), [ null, "+" ]) }, { ALT: () => { $.CONSUME(T.To); - to = $.SUBRULE3(NumberSubStatement); + const val = $.SUBRULE3(NumberSubStatement); + return new TokenAndValue(val.token, [ val.value, null ]); }} ]); - $.OPTION3(() => $.CONSUME2(T.Times)); + tokens.push(to.token); + $.OPTION3(() => tokens.push($.CONSUME2(T.Times))); + + return new CountSubStatementCST(tokens, from.value, to.value ? to.value[0] : null, to.value ? to.value[1] : null); }}, { ALT: () => { - $.OPTION(() => $.CONSUME(T.Exactly)); - from = $.SUBRULE(NumberSubStatement); - $.OPTION6(() => $.CONSUME(T.Times)); + const tokens: IToken[] = []; + $.OPTION(() => tokens.push($.CONSUME(T.Exactly))); + const from = $.SUBRULE(NumberSubStatement); + tokens.push(from.token); + $.OPTION6(() => tokens.push($.CONSUME(T.Times))); + + return new CountSubStatementCST(tokens, from.value); }} ]); - return new CountSubStatementCST(from, to, opt); + }); let mss_rules : IOrAlt[] | null = null; @@ -147,9 +139,19 @@ export class Human2RegexParser extends EmbeddedActionsParser { let to : string | null = null; let type : MatchSubStatementType = MatchSubStatementType.Anything; - count = $.OPTION(() => $.SUBRULE(CountSubStatement) ); + const tokens: IToken[] = []; + + count = $.OPTION(() => { + const css = $.SUBRULE(CountSubStatement); + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + if (css.tokens) { + tokens.push(first(css.tokens)); + } + return css; + }); + invert = $.OPTION2(() => { - $.CONSUME(T.Not); + tokens.push($.CONSUME(T.Not)); return true; }); $.AT_LEAST_ONE_SEP({ @@ -161,7 +163,9 @@ export class Human2RegexParser extends EmbeddedActionsParser { $.OPTION4(() => $.CONSUME(T.From)); from = $.CONSUME2(T.StringLiteral).image; $.CONSUME(T.To); - to = $.CONSUME3(T.StringLiteral).image; + const token = $.CONSUME3(T.StringLiteral); + tokens.push(token); + to = token.image; type = MatchSubStatementType.Between; return new MatchSubStatementValue(type, from, to); @@ -170,73 +174,77 @@ export class Human2RegexParser extends EmbeddedActionsParser { $.CONSUME(T.Between); from = $.CONSUME4(T.StringLiteral).image; $.CONSUME(T.And); - to = $.CONSUME5(T.StringLiteral).image; + const token = $.CONSUME5(T.StringLiteral); + to = token.image; + tokens.push(token); type = MatchSubStatementType.Between; return new MatchSubStatementValue(type, from, to); }}, { ALT: () => { - from = $.CONSUME(T.StringLiteral).image; + const token = $.CONSUME(T.StringLiteral); + tokens.push(token); + from = token.image; type = MatchSubStatementType.SingleString; return new MatchSubStatementValue(type, from); }}, { ALT: () => { - $.CONSUME(T.Anything); + tokens.push($.CONSUME(T.Anything)); type = MatchSubStatementType.Anything; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.Word); + tokens.push($.CONSUME(T.Word)); type = MatchSubStatementType.Word; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.Digit); + tokens.push($.CONSUME(T.Digit)); type = MatchSubStatementType.Digit; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.Character); + tokens.push($.CONSUME(T.Character)); type = MatchSubStatementType.Character; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.Whitespace); + tokens.push($.CONSUME(T.Whitespace)); type = MatchSubStatementType.Whitespace; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.Number); + tokens.push($.CONSUME(T.Number)); type = MatchSubStatementType.Number; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.Tab); + tokens.push($.CONSUME(T.Tab)); type = MatchSubStatementType.Tab; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.Linefeed); + tokens.push($.CONSUME(T.Linefeed)); type = MatchSubStatementType.Linefeed; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.Newline); + tokens.push($.CONSUME(T.Newline)); type = MatchSubStatementType.Newline; return new MatchSubStatementValue(type); }}, { ALT: () => { - $.CONSUME(T.CarriageReturn); + tokens.push($.CONSUME(T.CarriageReturn)); type = MatchSubStatementType.CarriageReturn; return new MatchSubStatementValue(type); @@ -245,19 +253,20 @@ export class Human2RegexParser extends EmbeddedActionsParser { } }); - return new MatchSubStatementCST(count, invert, values); + return new MatchSubStatementCST(tokens, count, invert, values); }); // optionally match "+" then 1+ words const MatchStatement = $.RULE("MatchStatement", () => { let optional = false; const msv: MatchStatementValue[] = []; + const tokens: IToken[] = []; $.OPTION(() => { - $.CONSUME(T.Optional); + tokens.push($.CONSUME(T.Optional)); optional = true; }); - $.CONSUME(T.Match); + tokens.push($.CONSUME(T.Match)); msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement))); $.MANY(() => { $.OR([ @@ -274,9 +283,9 @@ export class Human2RegexParser extends EmbeddedActionsParser { }); msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement))); }); - $.CONSUME(T.EndOfLine); + tokens.push($.CONSUME(T.EndOfLine)); - return new MatchStatementCST(msv); + return new MatchStatementCST(tokens, msv); }); // using global matching @@ -284,7 +293,7 @@ export class Human2RegexParser extends EmbeddedActionsParser { const UsingStatement = $.RULE("UsingStatement", () => { const usings: UsingFlags[] = []; - $.CONSUME(T.Using); + const tokens = [ $.CONSUME(T.Using) ]; $.AT_LEAST_ONE_SEP({ SEP: T.And, DEF: () => { @@ -313,67 +322,75 @@ export class Human2RegexParser extends EmbeddedActionsParser { $.OPTION(() => $.CONSUME(T.Matching)); } }); - $.CONSUME(T.EndOfLine); + tokens.push($.CONSUME(T.EndOfLine)); - return usings; + return new TokensAndValue(tokens, usings); }); const GroupStatement = $.RULE("GroupStatement", () => { + const tokens: IToken[] = []; let optional = false; let name: string | null = null; const statement: StatementCST[] = []; - $.OR([ + tokens.push($.OR([ { ALT: () => { optional = true; - $.CONSUME(T.Optional); + const first_token = $.CONSUME(T.Optional); $.CONSUME(T.Create); $.CONSUME(T.A); + + return first_token; }}, { ALT: () => { - $.CONSUME2(T.Create); + const first_token = $.CONSUME2(T.Create); $.CONSUME2(T.A); $.OPTION2(() => { $.CONSUME2(T.Optional); optional = true; }); + + return first_token; }} - ]); + ])); $.CONSUME(T.Group); $.OPTION(() => { $.CONSUME(T.Called); name = $.CONSUME(T.Identifier).image; }); - $.CONSUME2(T.EndOfLine); + // Note: Technically not the end token, + // BUT this is way more useful than the Outdent for error reporting + tokens.push($.CONSUME2(T.EndOfLine)); $.CONSUME(T.Indent); $.AT_LEAST_ONE(() => { statement.push($.SUBRULE(Statement)); }); $.CONSUME(T.Outdent); - return new GroupStatementCST(optional, name, statement); + return new GroupStatementCST(tokens, optional, name, statement); }); const RepeatStatement = $.RULE("RepeatStatement", () => { + const tokens: IToken[] = []; let optional = false; let count : CountSubStatementCST | null = null; const statements: StatementCST[] = []; $.OPTION3(() => { - $.CONSUME(T.Optional); + tokens.push($.CONSUME(T.Optional)); optional = true; }); - $.CONSUME(T.Repeat); + tokens.push($.CONSUME(T.Repeat)); $.OPTION(() => count = $.SUBRULE(CountSubStatement)); $.CONSUME3(T.EndOfLine); $.CONSUME(T.Indent); $.AT_LEAST_ONE(() => { statements.push($.SUBRULE(Statement)); }); - $.CONSUME(T.Outdent); + tokens.push($.CONSUME(T.Outdent)); - return new RepeatStatementCST(optional, count, statements); + return new RepeatStatementCST(tokens, optional, count, statements); }); const Statement = $.RULE("Statement", () => { @@ -385,13 +402,18 @@ export class Human2RegexParser extends EmbeddedActionsParser { }); const Regex = $.RULE("Regex", () => { + let tokens: IToken[] = []; let usings: UsingFlags[] = []; const statements: StatementCST[] = []; - $.MANY(() => usings = usings.concat($.SUBRULE(UsingStatement))); + $.MANY(() => { + const using = $.SUBRULE(UsingStatement); + tokens = tokens.concat(using.tokens); + usings = usings.concat(using.value); + }); $.MANY2(() => statements.push($.SUBRULE(Statement)) ); - return new RegularExpressionCST(new UsingStatementCST(usings), statements); + return new RegularExpressionCST([], new UsingStatementCST(tokens, usings), statements); }); this.performSelfAnalysis(); diff --git a/src/script.ts b/src/script.ts index c208c77..74b1d9f 100644 --- a/src/script.ts +++ b/src/script.ts @@ -5,6 +5,8 @@ import "./webpage/style.css"; import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer"; import { Human2RegexParser, Human2RegexParserOptions } from "./parser"; +import { RobotLanguage } from "./generator"; +import { lexErrorToCommonError, parseErrorToCommonError, semanticErrorToCommonError, ICommonError } from "./utilities"; /* $(function() { @@ -12,6 +14,8 @@ $(function() { }); */ + + const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(false)); const parser = new Human2RegexParser(new Human2RegexParserOptions(false)); @@ -28,9 +32,9 @@ create an optional group called protocol match "http" optionally match "s" match "://" -create a group called subdomain +create an optional group called subdomain repeat - match 1+ words + match a word match "." create a group called domain match 1+ words or "_" or "-" @@ -57,9 +61,23 @@ create an optional group `); -console.log(result.errors); -parser.input = result.tokens; -const regex = parser.parse(); -console.log(JSON.stringify(regex, undefined, 4)); -console.log(parser.errors); \ No newline at end of file +const total_errors: ICommonError[] = []; + + +result.errors.map(lexErrorToCommonError).forEach((x) => total_errors.push(x)); + +if (total_errors.length === 0) { + parser.input = result.tokens; + const regex = parser.parse(); + + parser.errors.map(parseErrorToCommonError).forEach((x) => total_errors.push(x)); + regex.validate(RobotLanguage.JS).map(semanticErrorToCommonError).forEach((x) => total_errors.push(x)); + + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + if (total_errors.length === 0) { + console.log(regex.toRegex(RobotLanguage.JS)); + } +} + +console.log("Errors = " + total_errors); \ No newline at end of file diff --git a/src/utilities.ts b/src/utilities.ts index 3ad5364..11ff19b 100644 --- a/src/utilities.ts +++ b/src/utilities.ts @@ -1,5 +1,8 @@ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ +import { ISemanticError } from "./generator"; +import { IRecognitionException, ILexingError } from "chevrotain"; + /* eslint-disable no-bitwise */ export function hasFlag(a: number, b: number) : boolean { return (a & b) !== 0; @@ -17,11 +20,15 @@ export function isSingleRegexCharacter(char: string): boolean { char.length === 1; } -export function last(array: T[]) : T { +export function first(array: T[]): T { + return array[0]; +} + +export function last(array: T[]): T { return array[array.length-1]; } -export function findLastIndex(array: T[], value: T) : number { +export function findLastIndex(array: T[], value: T): number { for (let index = array.length-1; index >= 0; index--) { if (array[index] === value) { return index; @@ -46,3 +53,36 @@ export function removeQuotes(input: string): string { export function regexEscape(input: string) : string { return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1"); } + +export interface ICommonError { + type: string, + startLine: number, + startColumn: number, + length: number, + message: string +} + +export function lexErrorToCommonError(error: ILexingError): ICommonError { + return { + type: "Lexer Error", + startLine: error.line, + startColumn: error.column, + length: error.length, + message: error.message + }; +} + +export function parseErrorToCommonError(error: IRecognitionException): ICommonError { + return { + type: "Parser Error", + startLine: error.token.startLine ?? NaN, + startColumn: error.token.startColumn ?? NaN, + length: error.token.endOffset ?? NaN - error.token.startOffset, + message: error.name + ": " + error.message, + }; +} + +export function semanticErrorToCommonError(error: ISemanticError): ICommonError { + (error as ICommonError).type = "Semantic Error"; + return error as ICommonError; +} \ No newline at end of file