1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 12:30:09 -07:00

Refactored code for later migration to npm

This commit is contained in:
Patrick Demian 2020-11-01 23:19:30 -05:00
parent 92fc7445d5
commit 424cb59d6d
7 changed files with 619 additions and 224 deletions

16
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -1,16 +1,26 @@
/* eslint-disable @typescript-eslint/no-unused-vars */
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last } from "./utilities"; /**
* Includes all Concrete Syntax Trees for Human2Regex
* @packageDocumentation
*/
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag } from "./utilities";
import { IToken } from "chevrotain"; import { IToken } from "chevrotain";
export enum RobotLanguage { /**
* List of regular expression dialects we support
*/
export enum RegexDialect {
JS, JS,
Perl, Perl,
DotNet, DotNet,
Java Java
} }
/**
* Interface for all semantic errors
*/
export interface ISemanticError { export interface ISemanticError {
startLine: number, startLine: number,
startColumn: number, startColumn: number,
@ -18,16 +28,52 @@ export interface ISemanticError {
message: string message: string
} }
/**
* The base concrete syntax tree class
*
* @internal
*/
export abstract class H2RCST { export abstract class H2RCST {
public tokens: IToken[]; public tokens: IToken[];
/**
* Constructor for H2RCST
*
* @param tokens Tokens used to calculate where an error occured
* @internal
*/
constructor(tokens: IToken[]) { constructor(tokens: IToken[]) {
this.tokens = tokens; this.tokens = tokens;
} }
public abstract validate(language: RobotLanguage): ISemanticError[]; /**
public abstract toRegex(language: RobotLanguage): string; * Validate that this is both valid and can be generated in the specified language
*
* @remarks There is no guarantee toRegex will work unless validate returns no errors
*
* @param language the regex dialect we're validating
* @returns A list of errors
* @public
*/
public abstract validate(language: RegexDialect): ISemanticError[];
/**
* Generate a regular expression fragment based on this syntax tree
*
* @remarks There is no guarantee toRegex will work unless validate returns no errors
*
* @param language the regex dialect we're generating
* @returns a regular expression fragment
* @public
*/
public abstract toRegex(language: RegexDialect): string;
/**
* Creates an ISemanticError with a given message and the tokens provided from the constructor
*
* @param message the message
* @internal
*/
protected error(message: string): ISemanticError { protected error(message: string): ISemanticError {
const f = first(this.tokens); const f = first(this.tokens);
const l = last(this.tokens); const l = last(this.tokens);
@ -41,16 +87,28 @@ export abstract class H2RCST {
} }
} }
/* eslint-disable no-bitwise */ /**
* Flags for the using statement
*
* @internal
*/
export enum UsingFlags { export enum UsingFlags {
Multiline = 1 << 0, Multiline = makeFlag(0),
Global = 1 << 1, Global = makeFlag(1),
Sensitive = 1 << 2, Sensitive = makeFlag(2),
Insensitive = 1 << 3, Insensitive = makeFlag(3),
Exact = 1 << 4 Exact = makeFlag(4)
} }
/* eslint-enable no-bitwise */
/**
* Type of match arguments
*
* @remarks SingleString means an escaped string
* @remarks Between means a range (ex. a-z)
* @remarks Anything means .
* @remarks Word, Digit, Character, Whitespace, Number, Tab, Linefeed, Newline, and Carriage return are \w+, \d, \w, \s, \d+, \t, \n, \n, \r respectively
* @internal
*/
export enum MatchSubStatementType { export enum MatchSubStatementType {
SingleString, SingleString,
Between, Between,
@ -66,27 +124,73 @@ export enum MatchSubStatementType {
CarriageReturn CarriageReturn
} }
/**
* Container for match statements
*
* @internal
*/
export class MatchSubStatementValue { export class MatchSubStatementValue {
/**
* Constructor for MatchSubStatementValue
*
* @param type the type of this match
* @param from optional range string
* @param to optional range string
* @internal
*/
constructor(public type: MatchSubStatementType, public from: string | null = null, public to: string | null = null) { constructor(public type: MatchSubStatementType, public from: string | null = null, public to: string | null = null) {
/* empty */ /* empty */
} }
} }
/**
* Container for MatchStatementValue
*
* @internal
*/
export class MatchStatementValue { export class MatchStatementValue {
/**
* Constructor for MatchStatementValue
*
* @param optional is this match optional
* @param statement the substatement to generate
* @internal
*/
constructor(public optional: boolean, public statement: MatchSubStatementCST) { constructor(public optional: boolean, public statement: MatchSubStatementCST) {
/* empty */ /* empty */
} }
} }
/**
* The base class for all statement concrete syntax trees
*
* @internal
*/
export abstract class StatementCST extends H2RCST { export abstract class StatementCST extends H2RCST {
} }
/**
* Concrete Syntax Tree for Match Sub statements
*
* @internal
*/
export class MatchSubStatementCST extends H2RCST { export class MatchSubStatementCST extends H2RCST {
constructor(public tokens: IToken[], public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
/**
* Constructor for MatchSubStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param count optional count statement
* @param invert is this match inverted (ex, [^a-z] or [a-z])
* @param values sub statements to match
*/
constructor(tokens: IToken[], private count: CountSubStatementCST | null, private invert: boolean = false, private values: MatchSubStatementValue[]) {
super(tokens); super(tokens);
} }
public validate(language: RobotLanguage): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = []; let errors: ISemanticError[] = [];
if (this.count) { if (this.count) {
@ -121,7 +225,7 @@ export class MatchSubStatementCST extends H2RCST {
return errors; return errors;
} }
public toRegex(language: RobotLanguage): string { public toRegex(language: RegexDialect): string {
const str: string[] = []; const str: string[] = [];
for (const value of this.values) { for (const value of this.values) {
@ -181,37 +285,33 @@ export class MatchSubStatementCST extends H2RCST {
} }
if (this.count) { if (this.count) {
if (this.count.from === 1 && this.count.to === null) { ret += this.count.toRegex(language);
if (this.count.opt === "+") {
ret += "+";
}
// if we only have a count of 1, we can ignore adding any extra text
}
else if (this.count.from === 0 && this.count.to === null) {
if (this.count.opt === "+") {
ret += "*";
}
else {
// match 0 of anything? ok...
ret = "";
}
}
else {
ret += this.count.toRegex(language);
}
} }
return ret; return ret;
} }
} }
/**
* Concrete Syntax Tree for Using statements
*
* @internal
*/
export class UsingStatementCST extends H2RCST { export class UsingStatementCST extends H2RCST {
constructor(public tokens: IToken[], public flags: UsingFlags[]) {
/**
* Constructor for UsingStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param flags using flags
*/
constructor(tokens: IToken[], private flags: UsingFlags[]) {
super(tokens); super(tokens);
} }
public validate(language: RobotLanguage): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
unusedParameter(language, "Using Statement does not change based on language");
const errors: ISemanticError[] = []; const errors: ISemanticError[] = [];
let flag = this.flags[0]; let flag = this.flags[0];
@ -229,7 +329,9 @@ export class UsingStatementCST extends H2RCST {
return errors; return errors;
} }
public toRegex(language: RobotLanguage): string { public toRegex(language: RegexDialect): string {
unusedParameter(language, "Using Statement does not change based on language");
let str = ""; let str = "";
let exact = false; let exact = false;
@ -252,12 +354,27 @@ export class UsingStatementCST extends H2RCST {
} }
} }
/**
* Concrete Syntax Tree for Count sub statements
*
* @internal
*/
export class CountSubStatementCST extends H2RCST { export class CountSubStatementCST extends H2RCST {
constructor(public tokens: IToken[], public from: number, public to: number | null = null, public opt: "inclusive" | "exclusive" | "+" | null = null) { /**
* Constructor for CountSubStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param from number to count from
* @param to optional number to count to
* @param opt option modifier
*/
constructor(tokens: IToken[], private from: number, private to: number | null = null, private opt: "inclusive" | "exclusive" | "+" | null = null) {
super(tokens); super(tokens);
} }
public validate(language: RobotLanguage): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
unusedParameter(language, "Count does not need checking");
const errors: ISemanticError[] = []; const errors: ISemanticError[] = [];
if (this.from < 0) { if (this.from < 0) {
@ -270,31 +387,56 @@ export class CountSubStatementCST extends H2RCST {
return errors; return errors;
} }
public toRegex(language: RobotLanguage): string { public toRegex(language: RegexDialect): string {
unusedParameter(language, "Count does not change from language");
const from = this.from; const from = this.from;
let to = this.to; let to = this.to;
if (to !== null && this.opt === "exclusive") {
to--;
// if we only have a count of 1, we can ignore adding any extra text
if (to === null) {
if (from === 1) {
return this.opt === "+" ? "+" : "*";
}
else if (from === 0) {
return this.opt === "+" ? "*" : "";
}
} }
if (to !== null) { if (to !== null) {
if (this.opt === "exclusive") {
to--;
}
return `{${from},${to}}`; return `{${from},${to}}`;
} }
else if (this.opt === "+") { else if (this.opt === "+") {
return `{${from},}`; return `{${from},}`;
} }
else { else {
return `{${this.from}}`; return `{${from}}`;
} }
} }
} }
/**
* Concrete Syntax Tree for a Match statement
*
* @internal
*/
export class MatchStatementCST extends StatementCST { export class MatchStatementCST extends StatementCST {
constructor(public tokens: IToken[], public matches: MatchStatementValue[]) {
/**
* Constructor for MatchStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param matches
*/
constructor(tokens: IToken[], private matches: MatchStatementValue[]) {
super(tokens); super(tokens);
} }
public validate(language: RobotLanguage): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = []; let errors: ISemanticError[] = [];
for (const match of this.matches) { for (const match of this.matches) {
@ -304,19 +446,33 @@ export class MatchStatementCST extends StatementCST {
return errors; return errors;
} }
public toRegex(language: RobotLanguage): string { public toRegex(language: RegexDialect): string {
return this.matches.map((x) => { return this.matches.map((x) => {
return x.statement.toRegex(language) + (x.optional ? "?" : ""); return x.statement.toRegex(language) + (x.optional ? "?" : "");
}).join(""); }).join("");
} }
} }
/**
* Concrete Syntax Tree for a Repeat statement
*
* @internal
*/
export class RepeatStatementCST extends StatementCST { export class RepeatStatementCST extends StatementCST {
constructor(public tokens: IToken[], public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
/**
* Constructor for RepeatStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param optional is this repetition optional
* @param count optional number of times to repeat
* @param statements the statements to repeat
*/
constructor(tokens: IToken[], private optional: boolean, private count: CountSubStatementCST | null, private statements: StatementCST[]) {
super(tokens); super(tokens);
} }
public validate(language: RobotLanguage): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = []; let errors: ISemanticError[] = [];
if (this.count !== null) { if (this.count !== null) {
@ -330,67 +486,16 @@ export class RepeatStatementCST extends StatementCST {
return errors; return errors;
} }
public toRegex(language: RobotLanguage): string { public toRegex(language: RegexDialect): string {
let str = "(" + this.statements.map((x) => x.toRegex(language)).join("") + ")"; let str = "(" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
if (this.count !== null) { if (this.count) {
if (this.count.from === 1 && this.count.to === null) { str += this.count.toRegex(language);
if (this.count.opt === "+") {
str += "+";
}
// if we only have a count of 1, we can ignore adding any extra text
}
else if (this.count.from === 0 && this.count.to === null) {
if (this.count.opt === "+") {
str += "*";
}
else {
// match 0 of anything? ok...
str = "";
}
}
else {
str += this.count.toRegex(language);
}
} }
else { else {
str += "*"; str += "*";
} }
return str;
}
}
export class GroupStatementCST extends StatementCST {
constructor(public tokens: IToken[], public optional: boolean, public name: string | null, public statements: StatementCST[]) {
super(tokens);
}
public validate(language: RobotLanguage): ISemanticError[] {
let errors : ISemanticError[] = [];
if (language !== RobotLanguage.DotNet && language !== RobotLanguage.JS) {
errors.push(this.error("This language does not support named groups"));
}
for (const statement of this.statements) {
errors = errors.concat(statement.validate(language));
}
return errors;
}
public toRegex(language: RobotLanguage): string {
let str = "(";
if (this.name !== null) {
str += `?<${this.name}>`;
}
str += this.statements.map((x) => x.toRegex(language)).join("");
str += ")";
if (this.optional) { if (this.optional) {
str += "?"; str += "?";
} }
@ -399,12 +504,77 @@ export class GroupStatementCST extends StatementCST {
} }
} }
export class RegularExpressionCST extends H2RCST { /**
constructor(public tokens: IToken[], public usings: UsingStatementCST, public statements: StatementCST[]) { * Conrete Syntax Tree for a group Statement
*
* @internal
*/
export class GroupStatementCST extends StatementCST {
/**
* Constructor for GroupStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param optional is this group optional
* @param name optional name for named group
* @param statements other statements
* @internal
*/
constructor(tokens: IToken[], private optional: boolean, private name: string | null, private statements: StatementCST[]) {
super(tokens); super(tokens);
} }
public validate(language: RobotLanguage): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors : ISemanticError[] = [];
// All languages currently support named groups
//if (false) {
// errors.push(this.error("This language does not support named groups"));
//}
for (const statement of this.statements) {
errors = errors.concat(statement.validate(language));
}
return errors;
}
public toRegex(language: RegexDialect): string {
let str = "(";
// named group
if (this.name !== null) {
str += `?<${this.name}>`;
}
str += this.statements.map((x) => x.toRegex(language)).join("");
str += (this.optional ? ")?" : ")");
return str;
}
}
/**
* Concrete Syntax Tree for a regular expression
*
* @public
*/
export class RegularExpressionCST extends H2RCST {
/**
* Constructor for RegularExpressionCST
*
* @param tokens Tokens used to calculate where an error occured
* @param usings using statements
* @param statements other statements
* @internal
*/
constructor(tokens: IToken[], private usings: UsingStatementCST, private statements: StatementCST[]) {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = this.usings.validate(language); let errors: ISemanticError[] = this.usings.validate(language);
for (const statement of this.statements) { for (const statement of this.statements) {
@ -413,11 +583,10 @@ export class RegularExpressionCST extends H2RCST {
return errors; return errors;
} }
public toRegex(language: RobotLanguage): string { public toRegex(language: RegexDialect): string {
const modifiers = this.usings.toRegex(language); const modifiers = this.usings.toRegex(language);
const regex = this.statements.map((x) => x.toRegex(language)).join(""); const regex = this.statements.map((x) => x.toRegex(language)).join("");
return modifiers.replace("{regex}", regex); return modifiers.replace("{regex}", regex);
} }
} }

View File

@ -1,27 +1,58 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* The Lexer for Human2Regex
* @packageDocumentation
*/
import { Lexer, IToken, createTokenInstance, ILexingResult, ILexingError } from "chevrotain"; import { Lexer, IToken, createTokenInstance, ILexingResult, ILexingError } from "chevrotain";
import { last, findLastIndex } from "./utilities"; import { last, findLastIndex } from "./utilities";
import { Indent, Outdent, EndOfLine, AllTokens } from "./tokens"; import { Indent, Outdent, EndOfLine, AllTokens } from "./tokens";
/**
* Defines the type of indents the lexer will allow
*/
export enum IndentType { export enum IndentType {
Tabs, Tabs,
Spaces, Spaces,
Both Both
} }
/**
* The options for the Lexer
*/
export class Human2RegexLexerOptions { export class Human2RegexLexerOptions {
/**
* Constructor for the Human2RegexLexerOptions
*
* @param skip_validations If true, the lexer will skip validations (~25% faster)
* @param type The type of indents the lexer will allow
* @param spaces_per_tab Number of spaces per tab
*/
constructor(public skip_validations = false, public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) { constructor(public skip_validations = false, public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
/* empty */ /* empty */
} }
} }
/**
* Human2Regex Lexer
*
* @remarks Only 1 lexer instance allowed due to a technical limitation and performance reasons
*/
export class Human2RegexLexer { export class Human2RegexLexer {
private static already_init = false; private static already_init = false;
private lexer!: Lexer; private lexer!: Lexer;
private options!: Human2RegexLexerOptions; private options!: Human2RegexLexerOptions;
/**
* Human2Regex Lexer
*
* @remarks Only 1 lexer instance allowed due to a technical limitation and performance reasons
* @param options options for the lexer
* @see Human2RegexLexerOptions
*/
constructor(options: Human2RegexLexerOptions = new Human2RegexLexerOptions()) { constructor(options: Human2RegexLexerOptions = new Human2RegexLexerOptions()) {
if (Human2RegexLexer.already_init) { if (Human2RegexLexer.already_init) {
throw new Error("Only 1 instance of Human2RegexLexer allowed"); throw new Error("Only 1 instance of Human2RegexLexer allowed");
@ -32,11 +63,18 @@ export class Human2RegexLexer {
this.setOptions(options); this.setOptions(options);
} }
public setOptions(options: Human2RegexLexerOptions) : void { /**
* Sets the options for this lexer
*
* @param options options for the lexer
* @see Human2RegexLexerOptions
*/
public setOptions(options: Human2RegexLexerOptions): void {
this.options = options; this.options = options;
let indent_regex: RegExp | null = null; let indent_regex: RegExp | null = null;
// Generate an index lexer (accepts tabs or spaces or both based on options)
if (this.options.type === IndentType.Tabs) { if (this.options.type === IndentType.Tabs) {
indent_regex = /\t/y; indent_regex = /\t/y;
} }
@ -65,6 +103,12 @@ export class Human2RegexLexer {
}; };
} }
/**
* Tokenizes the given text
*
* @param text the text to analyze
* @returns a lexing result which contains the token stream and error list
*/
public tokenize(text: string) : ILexingResult { public tokenize(text: string) : ILexingResult {
const lex_result = this.lexer.tokenize(text); const lex_result = this.lexer.tokenize(text);
@ -72,7 +116,6 @@ export class Human2RegexLexer {
return lex_result; return lex_result;
} }
// create Outdents
const tokens: IToken[] = []; const tokens: IToken[] = [];
const indent_stack = [ 0 ]; const indent_stack = [ 0 ];
@ -80,6 +123,7 @@ export class Human2RegexLexer {
let start_of_line = true; let start_of_line = true;
let had_indents = false; let had_indents = false;
// create Outdents
for (let i = 0; i < lex_result.tokens.length; i++) { for (let i = 0; i < lex_result.tokens.length; i++) {
// EoL? check for indents next (by setting startOfLine = true) // EoL? check for indents next (by setting startOfLine = true)
@ -117,13 +161,16 @@ export class Human2RegexLexer {
// Ignore all indents AND newline // Ignore all indents AND newline
// continue; // continue;
} }
// new indent is too far ahead
else if (!start_of_line || (curr_indent_level > last(indent_stack) + 1)) { else if (!start_of_line || (curr_indent_level > last(indent_stack) + 1)) {
lex_result.errors.push(this.lexError(start_token)); lex_result.errors.push(this.lexError(start_token));
} }
// new indent is just 1 above
else if (curr_indent_level > last(indent_stack)) { else if (curr_indent_level > last(indent_stack)) {
indent_stack.push(curr_indent_level); indent_stack.push(curr_indent_level);
tokens.push(start_token); tokens.push(start_token);
} }
// new indent is below the past indent
else if (curr_indent_level < last(indent_stack)) { else if (curr_indent_level < last(indent_stack)) {
const index = findLastIndex(indent_stack, curr_indent_level); const index = findLastIndex(indent_stack, curr_indent_level);

View File

@ -1,11 +1,24 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* The parser for Human2Regex
* @packageDocumentation
*/
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain"; import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
import * as T from "./tokens"; import * as T from "./tokens";
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator"; import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
import { first } from "./utilities"; import { first, usefulConditional, unusedParameter } from "./utilities";
/**
* The options for the Parser
*/
export class Human2RegexParserOptions { export class Human2RegexParserOptions {
/**
* Constructor for Human2RegexParserOptions
*
* @param skip_validations If true, the lexer will skip validations (~25% faster)
*/
constructor(public skip_validations: boolean = false) { constructor(public skip_validations: boolean = false) {
/* empty */ /* empty */
} }
@ -22,6 +35,11 @@ class TokensAndValue<T> {
} }
} }
/**
* The Parser class
*
* @remarks Only 1 parser instance allowed due to performance reasons
*/
export class Human2RegexParser extends EmbeddedActionsParser { export class Human2RegexParser extends EmbeddedActionsParser {
private static already_init = false; private static already_init = false;
@ -38,14 +56,17 @@ export class Human2RegexParser extends EmbeddedActionsParser {
const $ = this; const $ = this;
// IN REGARDS TO KEEPING TOKENS: /**
// We don't really need to keep each token, only the first and last tokens * IN REGARDS TO KEEPING TOKENS:
// This is due to the fact we calculate the difference between those tokens * We don't really need to keep each token, only the first and last tokens
// However, sometimes we have optional starts and ends * This is due to the fact we calculate the difference between those tokens
// Each optional near the start and end MUST be recorded because they may be the first/last token * However, sometimes we have optional starts and ends
// ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match" * Each optional near the start and end MUST be recorded because they may be the first/last token
* ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match"
* */
let nss_rules : IOrAlt<TokenAndValue<number>>[] | null = null; // number rules
let nss_rules: IOrAlt<TokenAndValue<number>>[] | null = null;
const NumberSubStatement = $.RULE("NumberSubStatement", () => { const NumberSubStatement = $.RULE("NumberSubStatement", () => {
return $.OR(nss_rules || (nss_rules = [ return $.OR(nss_rules || (nss_rules = [
{ ALT: () => new TokenAndValue($.CONSUME(T.Zero), 0) }, { ALT: () => new TokenAndValue($.CONSUME(T.Zero), 0) },
@ -69,6 +90,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
// 1, 1..2, between 1 and/to 2 inclusively/exclusively // 1, 1..2, between 1 and/to 2 inclusively/exclusively
const CountSubStatement = $.RULE("CountSubStatement", () => { const CountSubStatement = $.RULE("CountSubStatement", () => {
return $.OR([ return $.OR([
// between 1 to 4
{ ALT: () => { { ALT: () => {
const tokens: IToken[] = []; const tokens: IToken[] = [];
@ -97,6 +120,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new CountSubStatementCST(tokens, from.value, to.value, opt as "inclusive" | "exclusive" | null); return new CountSubStatementCST(tokens, from.value, to.value, opt as "inclusive" | "exclusive" | null);
}}, }},
// from 1 to 4
{ ALT: () => { { ALT: () => {
const tokens: IToken[] = []; const tokens: IToken[] = [];
@ -116,6 +140,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new CountSubStatementCST(tokens, from.value, to.value ? to.value[0] : null, to.value ? to.value[1] : null); return new CountSubStatementCST(tokens, from.value, to.value ? to.value[0] : null, to.value ? to.value[1] : null);
}}, }},
// exactly 2
{ ALT: () => { { ALT: () => {
const tokens: IToken[] = []; const tokens: IToken[] = [];
$.OPTION(() => tokens.push($.CONSUME(T.Exactly))); $.OPTION(() => tokens.push($.CONSUME(T.Exactly)));
@ -126,27 +151,27 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new CountSubStatementCST(tokens, from.value); return new CountSubStatementCST(tokens, from.value);
}} }}
]); ]);
}); });
let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null; // match sub rules
let mss_rules: IOrAlt<MatchSubStatementValue>[] | null = null;
const MatchSubStatement = $.RULE("MatchSubStatement", () => { const MatchSubStatement = $.RULE("MatchSubStatement", () => {
let count: CountSubStatementCST | null = null; let count: CountSubStatementCST | null = null;
let invert: boolean = false; let invert: boolean = false;
const values: MatchSubStatementValue[] = []; const values: MatchSubStatementValue[] = [];
let from : string | null = null; let from: string | null = null;
let to : string | null = null; let to: string | null = null;
let type : MatchSubStatementType = MatchSubStatementType.Anything; let type: MatchSubStatementType = MatchSubStatementType.Anything;
const tokens: IToken[] = []; const tokens: IToken[] = [];
count = $.OPTION(() => { count = $.OPTION(() => {
const css = $.SUBRULE(CountSubStatement); const css = $.SUBRULE(CountSubStatement);
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (css.tokens) { if (usefulConditional(css.tokens, "due to how chevrotain works, the first run produces a null value")) {
tokens.push(first(css.tokens)); tokens.push(first(css.tokens));
} }
return css; return css;
}); });
@ -159,6 +184,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
DEF: () => { DEF: () => {
$.OPTION3(() => $.CONSUME(T.A)); $.OPTION3(() => $.CONSUME(T.A));
values.push($.OR(mss_rules || (mss_rules = [ values.push($.OR(mss_rules || (mss_rules = [
// range [a-z]
{ ALT: () => { { ALT: () => {
$.OPTION4(() => $.CONSUME(T.From)); $.OPTION4(() => $.CONSUME(T.From));
from = $.CONSUME2(T.StringLiteral).image; from = $.CONSUME2(T.StringLiteral).image;
@ -170,6 +197,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new MatchSubStatementValue(type, from, to); return new MatchSubStatementValue(type, from, to);
}}, }},
// range [a-z]
{ ALT: () => { { ALT: () => {
$.CONSUME(T.Between); $.CONSUME(T.Between);
from = $.CONSUME4(T.StringLiteral).image; from = $.CONSUME4(T.StringLiteral).image;
@ -181,6 +210,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new MatchSubStatementValue(type, from, to); return new MatchSubStatementValue(type, from, to);
}}, }},
// exact string
{ ALT: () => { { ALT: () => {
const token = $.CONSUME(T.StringLiteral); const token = $.CONSUME(T.StringLiteral);
tokens.push(token); tokens.push(token);
@ -289,7 +320,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
}); });
// using global matching // using global matching
let us_rules : IOrAlt<UsingFlags>[] | null = null; let us_rules: IOrAlt<UsingFlags>[] | null = null;
const UsingStatement = $.RULE("UsingStatement", () => { const UsingStatement = $.RULE("UsingStatement", () => {
const usings: UsingFlags[] = []; const usings: UsingFlags[] = [];
@ -327,12 +358,16 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new TokensAndValue(tokens, usings); return new TokensAndValue(tokens, usings);
}); });
// group rules
const GroupStatement = $.RULE("GroupStatement", () => { const GroupStatement = $.RULE("GroupStatement", () => {
const tokens: IToken[] = []; const tokens: IToken[] = [];
let optional = false; let optional = false;
let name: string | null = null; let name: string | null = null;
const statement: StatementCST[] = []; const statement: StatementCST[] = [];
// position of optional must be OR'd because
// otherwise it could appear twice
// ex) optional? create an optional? group
tokens.push($.OR([ tokens.push($.OR([
{ ALT: () => { { ALT: () => {
optional = true; optional = true;
@ -371,10 +406,11 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new GroupStatementCST(tokens, optional, name, statement); return new GroupStatementCST(tokens, optional, name, statement);
}); });
// repeat rules
const RepeatStatement = $.RULE("RepeatStatement", () => { const RepeatStatement = $.RULE("RepeatStatement", () => {
const tokens: IToken[] = []; const tokens: IToken[] = [];
let optional = false; let optional = false;
let count : CountSubStatementCST | null = null; let count: CountSubStatementCST | null = null;
const statements: StatementCST[] = []; const statements: StatementCST[] = [];
$.OPTION3(() => { $.OPTION3(() => {
@ -393,6 +429,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new RepeatStatementCST(tokens, optional, count, statements); return new RepeatStatementCST(tokens, optional, count, statements);
}); });
// statement super class
const Statement = $.RULE("Statement", () => { const Statement = $.RULE("Statement", () => {
return $.OR([ return $.OR([
{ ALT: () => $.SUBRULE(MatchStatement) }, { ALT: () => $.SUBRULE(MatchStatement) },
@ -401,6 +438,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
]); ]);
}); });
// full regex
const Regex = $.RULE("Regex", () => { const Regex = $.RULE("Regex", () => {
let tokens: IToken[] = []; let tokens: IToken[] = [];
let usings: UsingFlags[] = []; let usings: UsingFlags[] = [];
@ -421,7 +459,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
this.parse = Regex; this.parse = Regex;
} }
//public set_options(options: Human2RegexParserOptions) : void { public setOptions(options: Human2RegexParserOptions): void {
// // empty so far unusedParameter(options, "skip_validations is not valid to change once we've already initialized");
//} }
} }

View File

@ -3,18 +3,17 @@
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer"; import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
import { Human2RegexParser, Human2RegexParserOptions } from "./parser"; import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
import { RobotLanguage } from "./generator"; import { RegexDialect } from "./generator";
import { CommonError } from "./utilities"; import { CommonError, unusedParameter, usefulConditional } from "./utilities";
import $ from "jquery"; import $ from "jquery";
import CodeMirror from "codemirror/lib/codemirror"; import CodeMirror from "codemirror/lib/codemirror";
require("codemirror/mode/javascript/javascript"); import "codemirror/mode/javascript/javascript";
import "./webpage/bootstrap.css"; import "./webpage/bootstrap.css";
import "./webpage/cleanblog.css"; import "./webpage/cleanblog.css";
import "codemirror/lib/codemirror.css"; import "codemirror/lib/codemirror.css";
import "./webpage/style.css"; import "./webpage/style.css";
$(function() { $(function() {
const total_errors: CommonError[] = []; const total_errors: CommonError[] = [];
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true)); const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
@ -32,19 +31,19 @@ $(function() {
parser.errors.map(CommonError.fromParseError).forEach((x) => total_errors.push(x)); parser.errors.map(CommonError.fromParseError).forEach((x) => total_errors.push(x));
let lang: RobotLanguage = RobotLanguage.JS; let lang: RegexDialect = RegexDialect.JS;
switch ($("#dialect option:selected").val()) { switch ($("#dialect option:selected").val()) {
case "dotnet": case "dotnet":
lang = RobotLanguage.DotNet; lang = RegexDialect.DotNet;
break; break;
case "java": case "java":
lang = RobotLanguage.Java; lang = RegexDialect.Java;
break; break;
case "perl": case "perl":
lang = RobotLanguage.Perl; lang = RegexDialect.Perl;
break; break;
default: default:
lang = RobotLanguage.JS; lang = RegexDialect.JS;
break; break;
} }
@ -52,8 +51,7 @@ $(function() {
valid.map(CommonError.fromSemanticError).forEach((x) => total_errors.push(x)); valid.map(CommonError.fromSemanticError).forEach((x) => total_errors.push(x));
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (!usefulConditional(total_errors.length, "total_errors may have added an error")) {
if (total_errors.length === 0) {
regex_result = regex.toRegex(lang); regex_result = regex.toRegex(lang);
$("#regex").attr("value", regex_result); $("#regex").attr("value", regex_result);
} }
@ -76,8 +74,9 @@ $(function() {
}); });
$("#clip").on("click", () => { $("#clip").on("click", () => {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (window.isSecureContext &&
if (window.isSecureContext && navigator?.clipboard?.writeText) { usefulConditional(navigator.clipboard, "clipboard may be undefined") &&
usefulConditional(navigator.clipboard.writeText, "writeText may be undefined")) {
navigator.clipboard.writeText(regex_result); navigator.clipboard.writeText(regex_result);
} }
else { else {
@ -96,6 +95,9 @@ $(function() {
}); });
editor.on("change", (instance: unknown, change_obj: unknown) => { editor.on("change", (instance: unknown, change_obj: unknown) => {
unusedParameter(instance, "Instance is not required, we have a reference already");
unusedParameter(change_obj, "Change is not required, we want the full value");
/* not empty */ /* not empty */
console.log(editor.getValue()); console.log(editor.getValue());
}); });

View File

@ -1,55 +1,60 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* The tokens required for Human2Regex
* @packageDocumentation
*/
import { createToken, Lexer } from "chevrotain"; import { createToken, Lexer } from "chevrotain";
export const Zero = createToken({name: "Zero", pattern: /zero/i }); /** @internal */ export const Zero = createToken({name: "Zero", pattern: /zero/i });
export const One = createToken({name: "One", pattern: /one/i }); /** @internal */ export const One = createToken({name: "One", pattern: /one/i });
export const Two = createToken({name: "Two", pattern: /two/i }); /** @internal */ export const Two = createToken({name: "Two", pattern: /two/i });
export const Three = createToken({name: "Three", pattern: /three/i }); /** @internal */ export const Three = createToken({name: "Three", pattern: /three/i });
export const Four = createToken({name: "Four", pattern: /four/i }); /** @internal */ export const Four = createToken({name: "Four", pattern: /four/i });
export const Five = createToken({name: "Five", pattern: /five/i }); /** @internal */ export const Five = createToken({name: "Five", pattern: /five/i });
export const Six = createToken({name: "Six", pattern: /six/i }); /** @internal */ export const Six = createToken({name: "Six", pattern: /six/i });
export const Seven = createToken({name: "Seven", pattern: /seven/i }); /** @internal */ export const Seven = createToken({name: "Seven", pattern: /seven/i });
export const Eight = createToken({name: "Eight", pattern: /eight/i }); /** @internal */ export const Eight = createToken({name: "Eight", pattern: /eight/i });
export const Nine = createToken({name: "Nine", pattern: /nine/i }); /** @internal */ export const Nine = createToken({name: "Nine", pattern: /nine/i });
export const Ten = createToken({name: "Ten", pattern: /ten/i }); /** @internal */ export const Ten = createToken({name: "Ten", pattern: /ten/i });
export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i }); /** @internal */ export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
export const Match = createToken({name: "Match", pattern: /match(es)?/i }); /** @internal */ export const Match = createToken({name: "Match", pattern: /match(es)?/i });
export const Then = createToken({name: "Then", pattern: /then/i }); /** @internal */ export const Then = createToken({name: "Then", pattern: /then/i });
export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i}); /** @internal */ export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
export const Or = createToken({name: "Or", pattern: /or/i}); /** @internal */ export const Or = createToken({name: "Or", pattern: /or/i});
export const And = createToken({name: "And", pattern: /and|,/i}); /** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i}); /** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i}); /** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i}); /** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i}); /** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i}); /** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
export const Using = createToken({name: "Using", pattern: /using/i}); /** @internal */ export const Using = createToken({name: "Using", pattern: /using/i});
export const Global = createToken({name: "Global", pattern: /global/i}); /** @internal */ export const Global = createToken({name: "Global", pattern: /global/i});
export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i}); /** @internal */ export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
export const Exact = createToken({name: "Exact", pattern: /exact/i}); /** @internal */ export const Exact = createToken({name: "Exact", pattern: /exact/i});
export const Matching = createToken({name: "Matching", pattern: /matching/i}); /** @internal */ export const Matching = createToken({name: "Matching", pattern: /matching/i});
export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing}); /** @internal */ export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
export const Between = createToken({name: "Between", pattern: /between/i}); /** @internal */ export const Between = createToken({name: "Between", pattern: /between/i});
export const Tab = createToken({name: "Tab", pattern: /tab/i}); /** @internal */ export const Tab = createToken({name: "Tab", pattern: /tab/i});
export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i}); /** @internal */ export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
export const Group = createToken({name: "Group", pattern: /group/i}); /** @internal */ export const Group = createToken({name: "Group", pattern: /group/i});
export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything}); /** @internal */ export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
export const Times = createToken({name: "Times", pattern: /times/i }); /** @internal */ export const Times = createToken({name: "Times", pattern: /times/i });
export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i}); /** @internal */ export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i}); /** @internal */ export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i}); /** @internal */ export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
export const From = createToken({name: "From", pattern: /from/i}); /** @internal */ export const From = createToken({name: "From", pattern: /from/i});
export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.|\.\.\.)/i}); /** @internal */ export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.|\.\.\.)/i});
export const Create = createToken({name: "Create", pattern: /create(s)?/i}); /** @internal */ export const Create = createToken({name: "Create", pattern: /create(s)?/i});
export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i}); /** @internal */ export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i});
export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i}); /** @internal */ export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i}); /** @internal */ export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i}); /** @internal */ export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i}); /** @internal */ export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i}); /** @internal */ export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i }); /** @internal */ export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i });
/* /*
//Not being used currently //Not being used currently
@ -72,18 +77,22 @@ export const By = createToken({name: "By", pattern: /by/i});
*/ */
export const EndOfLine = createToken({name: "EOL", pattern: /\n/ }); /** @internal */ export const EndOfLine = createToken({name: "EOL", pattern: /\n/ });
export const WS = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED }); /** @internal */ export const WS = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
export const SingleLineComment = createToken({name: "SingleLineComment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED }); /** @internal */ export const SingleLineComment = createToken({name: "SingleLineComment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
export const MultilineComment = createToken({name: "MultiLineComment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED }); /** @internal */ export const MultilineComment = createToken({name: "MultiLineComment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i }); /** @internal */ export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?\d+/ }); /** @internal */ export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?\d+/ });
export const StringLiteral = createToken({name: "StringLiteral", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i }); /** @internal */ export const StringLiteral = createToken({name: "StringLiteral", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
export const Indent = createToken({name: "Indent"}); /** @internal */ export const Indent = createToken({name: "Indent"});
export const Outdent = createToken({name: "Outdent"}); /** @internal */ export const Outdent = createToken({name: "Outdent"});
/**
* All the tokens used
* @internal
*/
export const AllTokens = [ export const AllTokens = [
Zero, Zero,
One, One,

View File

@ -1,18 +1,93 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* Some utility functions for Human2Regex
* @packageDocumentation
*/
import { ISemanticError } from "./generator"; import { ISemanticError } from "./generator";
import { IRecognitionException, ILexingError } from "chevrotain"; import { IRecognitionException, ILexingError } from "chevrotain";
/* eslint-disable no-bitwise */
export function hasFlag(a: number, b: number) : boolean { /**
return (a & b) !== 0; * The following section is used because the linter is set up to warn about certain operations
* and for good reason! I'd much rather have these functions than accidently use bitwise operations, or
* create a bunch of usless conditionals
* Plus, it signifies exactly what you wish to do (ex, calling hasFlag means you want to check if the
* bitpattern matches a given flag)
*/
/**
* Fixes linter warnings about unused variables, however requires a reason why it's unused
*
* @param value the value you want to specify that is unused
* @param reason the reason this value is required but unused in this context
* @internal
*/
// eslint-disable-next-line @typescript-eslint/no-unused-vars
export function unusedParameter<T>(value: T, reason: string): void {
/* empty on purpose */
} }
export function combineFlags(a: number, b: number): number { /**
return (a | b); * Fixes linter warnings about useless conditionals, however requires a reason why it's useless
*
* @param conditional the supposedly useless conditional
* @param reason the reason this value is required but considered useless
* @internal
*/
// eslint-disable-next-line @typescript-eslint/no-unused-vars
export function usefulConditional<T>(conditional: boolean | T, reason: string): boolean {
return Boolean(conditional);
}
/* eslint-disable no-bitwise */
/**
* Generates a bitwise flag based on the value provided
*
* @param value the number of bits to shift
* @returns 1 << value
* @internal
*/
export function makeFlag(value: number): number {
return 1 << value;
}
/**
* Checks if value has the given flag
*
* @param value First flag to compare
* @param flag Second flag to compare
* @returns value & flag
* @internal
*/
export function hasFlag(value: number, flag: number): boolean {
return (value & flag) !== 0;
}
/**
* Appends the flag to the value
*
* @param value First flag
* @param flag Second flag
* @returns value | flag
* @internal
*/
export function combineFlags(value: number, flag: number): number {
return (value | flag);
} }
/* eslint-enable no-bitwise */ /* eslint-enable no-bitwise */
/**
* Checks to see if the character is a single regex character
*
* @remarks unicode and escape characters count as a single character
*
* @param char the character to check
* @returns if the value is exactly 1 character
* @internal
*/
export function isSingleRegexCharacter(char: string): boolean { export function isSingleRegexCharacter(char: string): boolean {
return (char.startsWith("\\u") && char.length === 6) || return (char.startsWith("\\u") && char.length === 6) ||
(char.startsWith("\\U") && char.length === 8) || (char.startsWith("\\U") && char.length === 8) ||
@ -20,14 +95,38 @@ export function isSingleRegexCharacter(char: string): boolean {
char.length === 1; char.length === 1;
} }
/**
* Gets the first element of an array
* @remarks does not validate if array has any elements
*
* @param array an array
* @returns first element of an array
* @internal
*/
export function first<T>(array: T[]): T { export function first<T>(array: T[]): T {
return array[0]; return array[0];
} }
/**
* Gets the last element of an array
* @remarks does not validate if array has any elements
*
* @param array an array
* @returns last element of an array
* @internal
*/
export function last<T>(array: T[]): T { export function last<T>(array: T[]): T {
return array[array.length-1]; return array[array.length-1];
} }
/**
* Find the last index of a given value in an array
*
* @param array an array
* @param value the value to find
* @returns an index if found or -1 if not found
* @internal
*/
export function findLastIndex<T>(array: T[], value: T): number { export function findLastIndex<T>(array: T[], value: T): number {
for (let index = array.length-1; index >= 0; index--) { for (let index = array.length-1; index >= 0; index--) {
if (array[index] === value) { if (array[index] === value) {
@ -37,40 +136,71 @@ export function findLastIndex<T>(array: T[], value: T): number {
return -1; return -1;
} }
export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean): number { /**
for (let index = array.length-1; index >= 0; index--) { * Removes start and end quotes from a string
if (predicate(array[index])) { *
return index; * @param input the string to remove quotes from
} * @returns a string without quote characters
} * @internal
return -1; */
}
export function removeQuotes(input: string): string { export function removeQuotes(input: string): string {
return input.substring(1, input.length-1); return input.substring(1, input.length-1);
} }
export function regexEscape(input: string) : string { /**
* Escapes a string so it may be used literally in a regular expression
*
* @param input the string to escape
* @returns a regex escaped string
* @internal
*/
export function regexEscape(input: string): string {
return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1"); return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
} }
/**
* Common Error class that encapsulates information from the lexer, parser, and generator
*/
export class CommonError { export class CommonError {
constructor(public type: string, public start_line: number, public start_column: number, public length: number, public message: string) { private constructor(public type: string, public start_line: number, public start_column: number, public length: number, public message: string) {
/* empty */ /* empty */
} }
/**
* Creates a common error from a lexing error
*
* @param error The lexing error
* @returns a new CommonError
*/
public static fromLexError(error: ILexingError): CommonError { public static fromLexError(error: ILexingError): CommonError {
return new CommonError("Lexer Error", error.line, error.column, error.length, error.message); return new CommonError("Lexer Error", error.line, error.column, error.length, error.message);
} }
/**
* Creates a common error from a parsing error
*
* @param error The parsing error
* @returns a new CommonError
*/
public static fromParseError(error: IRecognitionException): CommonError { public static fromParseError(error: IRecognitionException): CommonError {
return new CommonError("Parser Error", error.token.startLine ?? NaN, error.token.startColumn ?? NaN, error.token.endOffset ?? NaN - error.token.startOffset, error.name + ": " + error.message); return new CommonError("Parser Error", error.token.startLine ?? NaN, error.token.startColumn ?? NaN, error.token.endOffset ?? NaN - error.token.startOffset, error.name + ": " + error.message);
} }
/**
* Creates a common error from a semantic error
*
* @param error The semantic error
* @returns a new CommonError
*/
public static fromSemanticError(error: ISemanticError): CommonError { public static fromSemanticError(error: ISemanticError): CommonError {
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message); return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
} }
/**
* Generates a string representation of a CommonError
*
* @returns a string representation
*/
public toString(): string { public toString(): string {
return `${this.type} @ ${this.start_line} ${this.start_column}: ${this.message}`; return `${this.type} @ ${this.start_line} ${this.start_column}: ${this.message}`;
} }