1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 12:30:09 -07:00

Refactored code for later migration to npm

This commit is contained in:
Patrick Demian 2020-11-01 23:19:30 -05:00
parent 92fc7445d5
commit 424cb59d6d
7 changed files with 619 additions and 224 deletions

16
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -1,16 +1,26 @@
/* eslint-disable @typescript-eslint/no-unused-vars */
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last } from "./utilities";
/**
* Includes all Concrete Syntax Trees for Human2Regex
* @packageDocumentation
*/
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag } from "./utilities";
import { IToken } from "chevrotain";
export enum RobotLanguage {
/**
* List of regular expression dialects we support
*/
export enum RegexDialect {
JS,
Perl,
DotNet,
Java
}
/**
* Interface for all semantic errors
*/
export interface ISemanticError {
startLine: number,
startColumn: number,
@ -18,16 +28,52 @@ export interface ISemanticError {
message: string
}
/**
* The base concrete syntax tree class
*
* @internal
*/
export abstract class H2RCST {
public tokens: IToken[];
/**
* Constructor for H2RCST
*
* @param tokens Tokens used to calculate where an error occured
* @internal
*/
constructor(tokens: IToken[]) {
this.tokens = tokens;
}
public abstract validate(language: RobotLanguage): ISemanticError[];
public abstract toRegex(language: RobotLanguage): string;
/**
* Validate that this is both valid and can be generated in the specified language
*
* @remarks There is no guarantee toRegex will work unless validate returns no errors
*
* @param language the regex dialect we're validating
* @returns A list of errors
* @public
*/
public abstract validate(language: RegexDialect): ISemanticError[];
/**
* Generate a regular expression fragment based on this syntax tree
*
* @remarks There is no guarantee toRegex will work unless validate returns no errors
*
* @param language the regex dialect we're generating
* @returns a regular expression fragment
* @public
*/
public abstract toRegex(language: RegexDialect): string;
/**
* Creates an ISemanticError with a given message and the tokens provided from the constructor
*
* @param message the message
* @internal
*/
protected error(message: string): ISemanticError {
const f = first(this.tokens);
const l = last(this.tokens);
@ -41,16 +87,28 @@ export abstract class H2RCST {
}
}
/* eslint-disable no-bitwise */
/**
* Flags for the using statement
*
* @internal
*/
export enum UsingFlags {
Multiline = 1 << 0,
Global = 1 << 1,
Sensitive = 1 << 2,
Insensitive = 1 << 3,
Exact = 1 << 4
Multiline = makeFlag(0),
Global = makeFlag(1),
Sensitive = makeFlag(2),
Insensitive = makeFlag(3),
Exact = makeFlag(4)
}
/* eslint-enable no-bitwise */
/**
* Type of match arguments
*
* @remarks SingleString means an escaped string
* @remarks Between means a range (ex. a-z)
* @remarks Anything means .
* @remarks Word, Digit, Character, Whitespace, Number, Tab, Linefeed, Newline, and Carriage return are \w+, \d, \w, \s, \d+, \t, \n, \n, \r respectively
* @internal
*/
export enum MatchSubStatementType {
SingleString,
Between,
@ -66,27 +124,73 @@ export enum MatchSubStatementType {
CarriageReturn
}
/**
* Container for match statements
*
* @internal
*/
export class MatchSubStatementValue {
/**
* Constructor for MatchSubStatementValue
*
* @param type the type of this match
* @param from optional range string
* @param to optional range string
* @internal
*/
constructor(public type: MatchSubStatementType, public from: string | null = null, public to: string | null = null) {
/* empty */
}
}
/**
* Container for MatchStatementValue
*
* @internal
*/
export class MatchStatementValue {
/**
* Constructor for MatchStatementValue
*
* @param optional is this match optional
* @param statement the substatement to generate
* @internal
*/
constructor(public optional: boolean, public statement: MatchSubStatementCST) {
/* empty */
}
}
/**
* The base class for all statement concrete syntax trees
*
* @internal
*/
export abstract class StatementCST extends H2RCST {
}
/**
* Concrete Syntax Tree for Match Sub statements
*
* @internal
*/
export class MatchSubStatementCST extends H2RCST {
constructor(public tokens: IToken[], public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
/**
* Constructor for MatchSubStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param count optional count statement
* @param invert is this match inverted (ex, [^a-z] or [a-z])
* @param values sub statements to match
*/
constructor(tokens: IToken[], private count: CountSubStatementCST | null, private invert: boolean = false, private values: MatchSubStatementValue[]) {
super(tokens);
}
public validate(language: RobotLanguage): ISemanticError[] {
public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = [];
if (this.count) {
@ -121,7 +225,7 @@ export class MatchSubStatementCST extends H2RCST {
return errors;
}
public toRegex(language: RobotLanguage): string {
public toRegex(language: RegexDialect): string {
const str: string[] = [];
for (const value of this.values) {
@ -181,37 +285,33 @@ export class MatchSubStatementCST extends H2RCST {
}
if (this.count) {
if (this.count.from === 1 && this.count.to === null) {
if (this.count.opt === "+") {
ret += "+";
}
// if we only have a count of 1, we can ignore adding any extra text
}
else if (this.count.from === 0 && this.count.to === null) {
if (this.count.opt === "+") {
ret += "*";
}
else {
// match 0 of anything? ok...
ret = "";
}
}
else {
ret += this.count.toRegex(language);
}
}
return ret;
}
}
/**
* Concrete Syntax Tree for Using statements
*
* @internal
*/
export class UsingStatementCST extends H2RCST {
constructor(public tokens: IToken[], public flags: UsingFlags[]) {
/**
* Constructor for UsingStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param flags using flags
*/
constructor(tokens: IToken[], private flags: UsingFlags[]) {
super(tokens);
}
public validate(language: RobotLanguage): ISemanticError[] {
public validate(language: RegexDialect): ISemanticError[] {
unusedParameter(language, "Using Statement does not change based on language");
const errors: ISemanticError[] = [];
let flag = this.flags[0];
@ -229,7 +329,9 @@ export class UsingStatementCST extends H2RCST {
return errors;
}
public toRegex(language: RobotLanguage): string {
public toRegex(language: RegexDialect): string {
unusedParameter(language, "Using Statement does not change based on language");
let str = "";
let exact = false;
@ -252,12 +354,27 @@ export class UsingStatementCST extends H2RCST {
}
}
/**
* Concrete Syntax Tree for Count sub statements
*
* @internal
*/
export class CountSubStatementCST extends H2RCST {
constructor(public tokens: IToken[], public from: number, public to: number | null = null, public opt: "inclusive" | "exclusive" | "+" | null = null) {
/**
* Constructor for CountSubStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param from number to count from
* @param to optional number to count to
* @param opt option modifier
*/
constructor(tokens: IToken[], private from: number, private to: number | null = null, private opt: "inclusive" | "exclusive" | "+" | null = null) {
super(tokens);
}
public validate(language: RobotLanguage): ISemanticError[] {
public validate(language: RegexDialect): ISemanticError[] {
unusedParameter(language, "Count does not need checking");
const errors: ISemanticError[] = [];
if (this.from < 0) {
@ -270,31 +387,56 @@ export class CountSubStatementCST extends H2RCST {
return errors;
}
public toRegex(language: RobotLanguage): string {
public toRegex(language: RegexDialect): string {
unusedParameter(language, "Count does not change from language");
const from = this.from;
let to = this.to;
if (to !== null && this.opt === "exclusive") {
to--;
// if we only have a count of 1, we can ignore adding any extra text
if (to === null) {
if (from === 1) {
return this.opt === "+" ? "+" : "*";
}
else if (from === 0) {
return this.opt === "+" ? "*" : "";
}
}
if (to !== null) {
if (this.opt === "exclusive") {
to--;
}
return `{${from},${to}}`;
}
else if (this.opt === "+") {
return `{${from},}`;
}
else {
return `{${this.from}}`;
return `{${from}}`;
}
}
}
/**
* Concrete Syntax Tree for a Match statement
*
* @internal
*/
export class MatchStatementCST extends StatementCST {
constructor(public tokens: IToken[], public matches: MatchStatementValue[]) {
/**
* Constructor for MatchStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param matches
*/
constructor(tokens: IToken[], private matches: MatchStatementValue[]) {
super(tokens);
}
public validate(language: RobotLanguage): ISemanticError[] {
public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = [];
for (const match of this.matches) {
@ -304,19 +446,33 @@ export class MatchStatementCST extends StatementCST {
return errors;
}
public toRegex(language: RobotLanguage): string {
public toRegex(language: RegexDialect): string {
return this.matches.map((x) => {
return x.statement.toRegex(language) + (x.optional ? "?" : "");
}).join("");
}
}
/**
* Concrete Syntax Tree for a Repeat statement
*
* @internal
*/
export class RepeatStatementCST extends StatementCST {
constructor(public tokens: IToken[], public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
/**
* Constructor for RepeatStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param optional is this repetition optional
* @param count optional number of times to repeat
* @param statements the statements to repeat
*/
constructor(tokens: IToken[], private optional: boolean, private count: CountSubStatementCST | null, private statements: StatementCST[]) {
super(tokens);
}
public validate(language: RobotLanguage): ISemanticError[] {
public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = [];
if (this.count !== null) {
@ -330,67 +486,16 @@ export class RepeatStatementCST extends StatementCST {
return errors;
}
public toRegex(language: RobotLanguage): string {
public toRegex(language: RegexDialect): string {
let str = "(" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
if (this.count !== null) {
if (this.count.from === 1 && this.count.to === null) {
if (this.count.opt === "+") {
str += "+";
}
// if we only have a count of 1, we can ignore adding any extra text
}
else if (this.count.from === 0 && this.count.to === null) {
if (this.count.opt === "+") {
str += "*";
}
else {
// match 0 of anything? ok...
str = "";
}
}
else {
if (this.count) {
str += this.count.toRegex(language);
}
}
else {
str += "*";
}
return str;
}
}
export class GroupStatementCST extends StatementCST {
constructor(public tokens: IToken[], public optional: boolean, public name: string | null, public statements: StatementCST[]) {
super(tokens);
}
public validate(language: RobotLanguage): ISemanticError[] {
let errors : ISemanticError[] = [];
if (language !== RobotLanguage.DotNet && language !== RobotLanguage.JS) {
errors.push(this.error("This language does not support named groups"));
}
for (const statement of this.statements) {
errors = errors.concat(statement.validate(language));
}
return errors;
}
public toRegex(language: RobotLanguage): string {
let str = "(";
if (this.name !== null) {
str += `?<${this.name}>`;
}
str += this.statements.map((x) => x.toRegex(language)).join("");
str += ")";
if (this.optional) {
str += "?";
}
@ -399,12 +504,77 @@ export class GroupStatementCST extends StatementCST {
}
}
export class RegularExpressionCST extends H2RCST {
constructor(public tokens: IToken[], public usings: UsingStatementCST, public statements: StatementCST[]) {
/**
* Conrete Syntax Tree for a group Statement
*
* @internal
*/
export class GroupStatementCST extends StatementCST {
/**
* Constructor for GroupStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param optional is this group optional
* @param name optional name for named group
* @param statements other statements
* @internal
*/
constructor(tokens: IToken[], private optional: boolean, private name: string | null, private statements: StatementCST[]) {
super(tokens);
}
public validate(language: RobotLanguage): ISemanticError[] {
public validate(language: RegexDialect): ISemanticError[] {
let errors : ISemanticError[] = [];
// All languages currently support named groups
//if (false) {
// errors.push(this.error("This language does not support named groups"));
//}
for (const statement of this.statements) {
errors = errors.concat(statement.validate(language));
}
return errors;
}
public toRegex(language: RegexDialect): string {
let str = "(";
// named group
if (this.name !== null) {
str += `?<${this.name}>`;
}
str += this.statements.map((x) => x.toRegex(language)).join("");
str += (this.optional ? ")?" : ")");
return str;
}
}
/**
* Concrete Syntax Tree for a regular expression
*
* @public
*/
export class RegularExpressionCST extends H2RCST {
/**
* Constructor for RegularExpressionCST
*
* @param tokens Tokens used to calculate where an error occured
* @param usings using statements
* @param statements other statements
* @internal
*/
constructor(tokens: IToken[], private usings: UsingStatementCST, private statements: StatementCST[]) {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = this.usings.validate(language);
for (const statement of this.statements) {
@ -413,11 +583,10 @@ export class RegularExpressionCST extends H2RCST {
return errors;
}
public toRegex(language: RobotLanguage): string {
public toRegex(language: RegexDialect): string {
const modifiers = this.usings.toRegex(language);
const regex = this.statements.map((x) => x.toRegex(language)).join("");
return modifiers.replace("{regex}", regex);
}
}

View File

@ -1,27 +1,58 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* The Lexer for Human2Regex
* @packageDocumentation
*/
import { Lexer, IToken, createTokenInstance, ILexingResult, ILexingError } from "chevrotain";
import { last, findLastIndex } from "./utilities";
import { Indent, Outdent, EndOfLine, AllTokens } from "./tokens";
/**
* Defines the type of indents the lexer will allow
*/
export enum IndentType {
Tabs,
Spaces,
Both
}
/**
* The options for the Lexer
*/
export class Human2RegexLexerOptions {
/**
* Constructor for the Human2RegexLexerOptions
*
* @param skip_validations If true, the lexer will skip validations (~25% faster)
* @param type The type of indents the lexer will allow
* @param spaces_per_tab Number of spaces per tab
*/
constructor(public skip_validations = false, public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
/* empty */
}
}
/**
* Human2Regex Lexer
*
* @remarks Only 1 lexer instance allowed due to a technical limitation and performance reasons
*/
export class Human2RegexLexer {
private static already_init = false;
private lexer!: Lexer;
private options!: Human2RegexLexerOptions;
/**
* Human2Regex Lexer
*
* @remarks Only 1 lexer instance allowed due to a technical limitation and performance reasons
* @param options options for the lexer
* @see Human2RegexLexerOptions
*/
constructor(options: Human2RegexLexerOptions = new Human2RegexLexerOptions()) {
if (Human2RegexLexer.already_init) {
throw new Error("Only 1 instance of Human2RegexLexer allowed");
@ -32,11 +63,18 @@ export class Human2RegexLexer {
this.setOptions(options);
}
public setOptions(options: Human2RegexLexerOptions) : void {
/**
* Sets the options for this lexer
*
* @param options options for the lexer
* @see Human2RegexLexerOptions
*/
public setOptions(options: Human2RegexLexerOptions): void {
this.options = options;
let indent_regex: RegExp | null = null;
// Generate an index lexer (accepts tabs or spaces or both based on options)
if (this.options.type === IndentType.Tabs) {
indent_regex = /\t/y;
}
@ -65,6 +103,12 @@ export class Human2RegexLexer {
};
}
/**
* Tokenizes the given text
*
* @param text the text to analyze
* @returns a lexing result which contains the token stream and error list
*/
public tokenize(text: string) : ILexingResult {
const lex_result = this.lexer.tokenize(text);
@ -72,7 +116,6 @@ export class Human2RegexLexer {
return lex_result;
}
// create Outdents
const tokens: IToken[] = [];
const indent_stack = [ 0 ];
@ -80,6 +123,7 @@ export class Human2RegexLexer {
let start_of_line = true;
let had_indents = false;
// create Outdents
for (let i = 0; i < lex_result.tokens.length; i++) {
// EoL? check for indents next (by setting startOfLine = true)
@ -117,13 +161,16 @@ export class Human2RegexLexer {
// Ignore all indents AND newline
// continue;
}
// new indent is too far ahead
else if (!start_of_line || (curr_indent_level > last(indent_stack) + 1)) {
lex_result.errors.push(this.lexError(start_token));
}
// new indent is just 1 above
else if (curr_indent_level > last(indent_stack)) {
indent_stack.push(curr_indent_level);
tokens.push(start_token);
}
// new indent is below the past indent
else if (curr_indent_level < last(indent_stack)) {
const index = findLastIndex(indent_stack, curr_indent_level);

View File

@ -1,11 +1,24 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* The parser for Human2Regex
* @packageDocumentation
*/
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
import * as T from "./tokens";
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
import { first } from "./utilities";
import { first, usefulConditional, unusedParameter } from "./utilities";
/**
* The options for the Parser
*/
export class Human2RegexParserOptions {
/**
* Constructor for Human2RegexParserOptions
*
* @param skip_validations If true, the lexer will skip validations (~25% faster)
*/
constructor(public skip_validations: boolean = false) {
/* empty */
}
@ -22,6 +35,11 @@ class TokensAndValue<T> {
}
}
/**
* The Parser class
*
* @remarks Only 1 parser instance allowed due to performance reasons
*/
export class Human2RegexParser extends EmbeddedActionsParser {
private static already_init = false;
@ -38,14 +56,17 @@ export class Human2RegexParser extends EmbeddedActionsParser {
const $ = this;
// IN REGARDS TO KEEPING TOKENS:
// We don't really need to keep each token, only the first and last tokens
// This is due to the fact we calculate the difference between those tokens
// However, sometimes we have optional starts and ends
// Each optional near the start and end MUST be recorded because they may be the first/last token
// ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match"
/**
* IN REGARDS TO KEEPING TOKENS:
* We don't really need to keep each token, only the first and last tokens
* This is due to the fact we calculate the difference between those tokens
* However, sometimes we have optional starts and ends
* Each optional near the start and end MUST be recorded because they may be the first/last token
* ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match"
* */
let nss_rules : IOrAlt<TokenAndValue<number>>[] | null = null;
// number rules
let nss_rules: IOrAlt<TokenAndValue<number>>[] | null = null;
const NumberSubStatement = $.RULE("NumberSubStatement", () => {
return $.OR(nss_rules || (nss_rules = [
{ ALT: () => new TokenAndValue($.CONSUME(T.Zero), 0) },
@ -69,6 +90,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
// 1, 1..2, between 1 and/to 2 inclusively/exclusively
const CountSubStatement = $.RULE("CountSubStatement", () => {
return $.OR([
// between 1 to 4
{ ALT: () => {
const tokens: IToken[] = [];
@ -97,6 +120,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new CountSubStatementCST(tokens, from.value, to.value, opt as "inclusive" | "exclusive" | null);
}},
// from 1 to 4
{ ALT: () => {
const tokens: IToken[] = [];
@ -116,6 +140,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new CountSubStatementCST(tokens, from.value, to.value ? to.value[0] : null, to.value ? to.value[1] : null);
}},
// exactly 2
{ ALT: () => {
const tokens: IToken[] = [];
$.OPTION(() => tokens.push($.CONSUME(T.Exactly)));
@ -126,27 +151,27 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new CountSubStatementCST(tokens, from.value);
}}
]);
});
let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null;
// match sub rules
let mss_rules: IOrAlt<MatchSubStatementValue>[] | null = null;
const MatchSubStatement = $.RULE("MatchSubStatement", () => {
let count: CountSubStatementCST | null = null;
let invert: boolean = false;
const values: MatchSubStatementValue[] = [];
let from : string | null = null;
let to : string | null = null;
let type : MatchSubStatementType = MatchSubStatementType.Anything;
let from: string | null = null;
let to: string | null = null;
let type: MatchSubStatementType = MatchSubStatementType.Anything;
const tokens: IToken[] = [];
count = $.OPTION(() => {
const css = $.SUBRULE(CountSubStatement);
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (css.tokens) {
if (usefulConditional(css.tokens, "due to how chevrotain works, the first run produces a null value")) {
tokens.push(first(css.tokens));
}
return css;
});
@ -159,6 +184,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
DEF: () => {
$.OPTION3(() => $.CONSUME(T.A));
values.push($.OR(mss_rules || (mss_rules = [
// range [a-z]
{ ALT: () => {
$.OPTION4(() => $.CONSUME(T.From));
from = $.CONSUME2(T.StringLiteral).image;
@ -170,6 +197,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new MatchSubStatementValue(type, from, to);
}},
// range [a-z]
{ ALT: () => {
$.CONSUME(T.Between);
from = $.CONSUME4(T.StringLiteral).image;
@ -181,6 +210,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new MatchSubStatementValue(type, from, to);
}},
// exact string
{ ALT: () => {
const token = $.CONSUME(T.StringLiteral);
tokens.push(token);
@ -289,7 +320,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
});
// using global matching
let us_rules : IOrAlt<UsingFlags>[] | null = null;
let us_rules: IOrAlt<UsingFlags>[] | null = null;
const UsingStatement = $.RULE("UsingStatement", () => {
const usings: UsingFlags[] = [];
@ -327,12 +358,16 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new TokensAndValue(tokens, usings);
});
// group rules
const GroupStatement = $.RULE("GroupStatement", () => {
const tokens: IToken[] = [];
let optional = false;
let name: string | null = null;
const statement: StatementCST[] = [];
// position of optional must be OR'd because
// otherwise it could appear twice
// ex) optional? create an optional? group
tokens.push($.OR([
{ ALT: () => {
optional = true;
@ -371,10 +406,11 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new GroupStatementCST(tokens, optional, name, statement);
});
// repeat rules
const RepeatStatement = $.RULE("RepeatStatement", () => {
const tokens: IToken[] = [];
let optional = false;
let count : CountSubStatementCST | null = null;
let count: CountSubStatementCST | null = null;
const statements: StatementCST[] = [];
$.OPTION3(() => {
@ -393,6 +429,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new RepeatStatementCST(tokens, optional, count, statements);
});
// statement super class
const Statement = $.RULE("Statement", () => {
return $.OR([
{ ALT: () => $.SUBRULE(MatchStatement) },
@ -401,6 +438,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
]);
});
// full regex
const Regex = $.RULE("Regex", () => {
let tokens: IToken[] = [];
let usings: UsingFlags[] = [];
@ -421,7 +459,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
this.parse = Regex;
}
//public set_options(options: Human2RegexParserOptions) : void {
// // empty so far
//}
public setOptions(options: Human2RegexParserOptions): void {
unusedParameter(options, "skip_validations is not valid to change once we've already initialized");
}
}

View File

@ -3,18 +3,17 @@
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
import { RobotLanguage } from "./generator";
import { CommonError } from "./utilities";
import { RegexDialect } from "./generator";
import { CommonError, unusedParameter, usefulConditional } from "./utilities";
import $ from "jquery";
import CodeMirror from "codemirror/lib/codemirror";
require("codemirror/mode/javascript/javascript");
import "codemirror/mode/javascript/javascript";
import "./webpage/bootstrap.css";
import "./webpage/cleanblog.css";
import "codemirror/lib/codemirror.css";
import "./webpage/style.css";
$(function() {
const total_errors: CommonError[] = [];
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
@ -32,19 +31,19 @@ $(function() {
parser.errors.map(CommonError.fromParseError).forEach((x) => total_errors.push(x));
let lang: RobotLanguage = RobotLanguage.JS;
let lang: RegexDialect = RegexDialect.JS;
switch ($("#dialect option:selected").val()) {
case "dotnet":
lang = RobotLanguage.DotNet;
lang = RegexDialect.DotNet;
break;
case "java":
lang = RobotLanguage.Java;
lang = RegexDialect.Java;
break;
case "perl":
lang = RobotLanguage.Perl;
lang = RegexDialect.Perl;
break;
default:
lang = RobotLanguage.JS;
lang = RegexDialect.JS;
break;
}
@ -52,8 +51,7 @@ $(function() {
valid.map(CommonError.fromSemanticError).forEach((x) => total_errors.push(x));
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (total_errors.length === 0) {
if (!usefulConditional(total_errors.length, "total_errors may have added an error")) {
regex_result = regex.toRegex(lang);
$("#regex").attr("value", regex_result);
}
@ -76,8 +74,9 @@ $(function() {
});
$("#clip").on("click", () => {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (window.isSecureContext && navigator?.clipboard?.writeText) {
if (window.isSecureContext &&
usefulConditional(navigator.clipboard, "clipboard may be undefined") &&
usefulConditional(navigator.clipboard.writeText, "writeText may be undefined")) {
navigator.clipboard.writeText(regex_result);
}
else {
@ -96,6 +95,9 @@ $(function() {
});
editor.on("change", (instance: unknown, change_obj: unknown) => {
unusedParameter(instance, "Instance is not required, we have a reference already");
unusedParameter(change_obj, "Change is not required, we want the full value");
/* not empty */
console.log(editor.getValue());
});

View File

@ -1,55 +1,60 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* The tokens required for Human2Regex
* @packageDocumentation
*/
import { createToken, Lexer } from "chevrotain";
export const Zero = createToken({name: "Zero", pattern: /zero/i });
export const One = createToken({name: "One", pattern: /one/i });
export const Two = createToken({name: "Two", pattern: /two/i });
export const Three = createToken({name: "Three", pattern: /three/i });
export const Four = createToken({name: "Four", pattern: /four/i });
export const Five = createToken({name: "Five", pattern: /five/i });
export const Six = createToken({name: "Six", pattern: /six/i });
export const Seven = createToken({name: "Seven", pattern: /seven/i });
export const Eight = createToken({name: "Eight", pattern: /eight/i });
export const Nine = createToken({name: "Nine", pattern: /nine/i });
export const Ten = createToken({name: "Ten", pattern: /ten/i });
/** @internal */ export const Zero = createToken({name: "Zero", pattern: /zero/i });
/** @internal */ export const One = createToken({name: "One", pattern: /one/i });
/** @internal */ export const Two = createToken({name: "Two", pattern: /two/i });
/** @internal */ export const Three = createToken({name: "Three", pattern: /three/i });
/** @internal */ export const Four = createToken({name: "Four", pattern: /four/i });
/** @internal */ export const Five = createToken({name: "Five", pattern: /five/i });
/** @internal */ export const Six = createToken({name: "Six", pattern: /six/i });
/** @internal */ export const Seven = createToken({name: "Seven", pattern: /seven/i });
/** @internal */ export const Eight = createToken({name: "Eight", pattern: /eight/i });
/** @internal */ export const Nine = createToken({name: "Nine", pattern: /nine/i });
/** @internal */ export const Ten = createToken({name: "Ten", pattern: /ten/i });
export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
export const Match = createToken({name: "Match", pattern: /match(es)?/i });
export const Then = createToken({name: "Then", pattern: /then/i });
export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
export const Or = createToken({name: "Or", pattern: /or/i});
export const And = createToken({name: "And", pattern: /and|,/i});
export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
export const Using = createToken({name: "Using", pattern: /using/i});
export const Global = createToken({name: "Global", pattern: /global/i});
export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
export const Exact = createToken({name: "Exact", pattern: /exact/i});
export const Matching = createToken({name: "Matching", pattern: /matching/i});
export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
export const Between = createToken({name: "Between", pattern: /between/i});
export const Tab = createToken({name: "Tab", pattern: /tab/i});
export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
export const Group = createToken({name: "Group", pattern: /group/i});
export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
export const Times = createToken({name: "Times", pattern: /times/i });
export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
export const From = createToken({name: "From", pattern: /from/i});
export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.|\.\.\.)/i});
export const Create = createToken({name: "Create", pattern: /create(s)?/i});
export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i});
export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i });
/** @internal */ export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
/** @internal */ export const Match = createToken({name: "Match", pattern: /match(es)?/i });
/** @internal */ export const Then = createToken({name: "Then", pattern: /then/i });
/** @internal */ export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
/** @internal */ export const Or = createToken({name: "Or", pattern: /or/i});
/** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
/** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
/** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
/** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
/** @internal */ export const Using = createToken({name: "Using", pattern: /using/i});
/** @internal */ export const Global = createToken({name: "Global", pattern: /global/i});
/** @internal */ export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
/** @internal */ export const Exact = createToken({name: "Exact", pattern: /exact/i});
/** @internal */ export const Matching = createToken({name: "Matching", pattern: /matching/i});
/** @internal */ export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
/** @internal */ export const Between = createToken({name: "Between", pattern: /between/i});
/** @internal */ export const Tab = createToken({name: "Tab", pattern: /tab/i});
/** @internal */ export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
/** @internal */ export const Group = createToken({name: "Group", pattern: /group/i});
/** @internal */ export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
/** @internal */ export const Times = createToken({name: "Times", pattern: /times/i });
/** @internal */ export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
/** @internal */ export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
/** @internal */ export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
/** @internal */ export const From = createToken({name: "From", pattern: /from/i});
/** @internal */ export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.|\.\.\.)/i});
/** @internal */ export const Create = createToken({name: "Create", pattern: /create(s)?/i});
/** @internal */ export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i});
/** @internal */ export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
/** @internal */ export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
/** @internal */ export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
/** @internal */ export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
/** @internal */ export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
/** @internal */ export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i });
/*
//Not being used currently
@ -72,18 +77,22 @@ export const By = createToken({name: "By", pattern: /by/i});
*/
export const EndOfLine = createToken({name: "EOL", pattern: /\n/ });
export const WS = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
export const SingleLineComment = createToken({name: "SingleLineComment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
export const MultilineComment = createToken({name: "MultiLineComment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
/** @internal */ export const EndOfLine = createToken({name: "EOL", pattern: /\n/ });
/** @internal */ export const WS = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
/** @internal */ export const SingleLineComment = createToken({name: "SingleLineComment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
/** @internal */ export const MultilineComment = createToken({name: "MultiLineComment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?\d+/ });
export const StringLiteral = createToken({name: "StringLiteral", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
/** @internal */ export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
/** @internal */ export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?\d+/ });
/** @internal */ export const StringLiteral = createToken({name: "StringLiteral", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
export const Indent = createToken({name: "Indent"});
export const Outdent = createToken({name: "Outdent"});
/** @internal */ export const Indent = createToken({name: "Indent"});
/** @internal */ export const Outdent = createToken({name: "Outdent"});
/**
* All the tokens used
* @internal
*/
export const AllTokens = [
Zero,
One,

View File

@ -1,18 +1,93 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* Some utility functions for Human2Regex
* @packageDocumentation
*/
import { ISemanticError } from "./generator";
import { IRecognitionException, ILexingError } from "chevrotain";
/* eslint-disable no-bitwise */
export function hasFlag(a: number, b: number) : boolean {
return (a & b) !== 0;
/**
* The following section is used because the linter is set up to warn about certain operations
* and for good reason! I'd much rather have these functions than accidently use bitwise operations, or
* create a bunch of usless conditionals
* Plus, it signifies exactly what you wish to do (ex, calling hasFlag means you want to check if the
* bitpattern matches a given flag)
*/
/**
* Fixes linter warnings about unused variables, however requires a reason why it's unused
*
* @param value the value you want to specify that is unused
* @param reason the reason this value is required but unused in this context
* @internal
*/
// eslint-disable-next-line @typescript-eslint/no-unused-vars
export function unusedParameter<T>(value: T, reason: string): void {
/* empty on purpose */
}
export function combineFlags(a: number, b: number): number {
return (a | b);
/**
* Fixes linter warnings about useless conditionals, however requires a reason why it's useless
*
* @param conditional the supposedly useless conditional
* @param reason the reason this value is required but considered useless
* @internal
*/
// eslint-disable-next-line @typescript-eslint/no-unused-vars
export function usefulConditional<T>(conditional: boolean | T, reason: string): boolean {
return Boolean(conditional);
}
/* eslint-disable no-bitwise */
/**
* Generates a bitwise flag based on the value provided
*
* @param value the number of bits to shift
* @returns 1 << value
* @internal
*/
export function makeFlag(value: number): number {
return 1 << value;
}
/**
* Checks if value has the given flag
*
* @param value First flag to compare
* @param flag Second flag to compare
* @returns value & flag
* @internal
*/
export function hasFlag(value: number, flag: number): boolean {
return (value & flag) !== 0;
}
/**
* Appends the flag to the value
*
* @param value First flag
* @param flag Second flag
* @returns value | flag
* @internal
*/
export function combineFlags(value: number, flag: number): number {
return (value | flag);
}
/* eslint-enable no-bitwise */
/**
* Checks to see if the character is a single regex character
*
* @remarks unicode and escape characters count as a single character
*
* @param char the character to check
* @returns if the value is exactly 1 character
* @internal
*/
export function isSingleRegexCharacter(char: string): boolean {
return (char.startsWith("\\u") && char.length === 6) ||
(char.startsWith("\\U") && char.length === 8) ||
@ -20,14 +95,38 @@ export function isSingleRegexCharacter(char: string): boolean {
char.length === 1;
}
/**
* Gets the first element of an array
* @remarks does not validate if array has any elements
*
* @param array an array
* @returns first element of an array
* @internal
*/
export function first<T>(array: T[]): T {
return array[0];
}
/**
* Gets the last element of an array
* @remarks does not validate if array has any elements
*
* @param array an array
* @returns last element of an array
* @internal
*/
export function last<T>(array: T[]): T {
return array[array.length-1];
}
/**
* Find the last index of a given value in an array
*
* @param array an array
* @param value the value to find
* @returns an index if found or -1 if not found
* @internal
*/
export function findLastIndex<T>(array: T[], value: T): number {
for (let index = array.length-1; index >= 0; index--) {
if (array[index] === value) {
@ -37,40 +136,71 @@ export function findLastIndex<T>(array: T[], value: T): number {
return -1;
}
export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean): number {
for (let index = array.length-1; index >= 0; index--) {
if (predicate(array[index])) {
return index;
}
}
return -1;
}
/**
* Removes start and end quotes from a string
*
* @param input the string to remove quotes from
* @returns a string without quote characters
* @internal
*/
export function removeQuotes(input: string): string {
return input.substring(1, input.length-1);
}
export function regexEscape(input: string) : string {
/**
* Escapes a string so it may be used literally in a regular expression
*
* @param input the string to escape
* @returns a regex escaped string
* @internal
*/
export function regexEscape(input: string): string {
return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
}
/**
* Common Error class that encapsulates information from the lexer, parser, and generator
*/
export class CommonError {
constructor(public type: string, public start_line: number, public start_column: number, public length: number, public message: string) {
private constructor(public type: string, public start_line: number, public start_column: number, public length: number, public message: string) {
/* empty */
}
/**
* Creates a common error from a lexing error
*
* @param error The lexing error
* @returns a new CommonError
*/
public static fromLexError(error: ILexingError): CommonError {
return new CommonError("Lexer Error", error.line, error.column, error.length, error.message);
}
/**
* Creates a common error from a parsing error
*
* @param error The parsing error
* @returns a new CommonError
*/
public static fromParseError(error: IRecognitionException): CommonError {
return new CommonError("Parser Error", error.token.startLine ?? NaN, error.token.startColumn ?? NaN, error.token.endOffset ?? NaN - error.token.startOffset, error.name + ": " + error.message);
}
/**
* Creates a common error from a semantic error
*
* @param error The semantic error
* @returns a new CommonError
*/
public static fromSemanticError(error: ISemanticError): CommonError {
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
}
/**
* Generates a string representation of a CommonError
*
* @returns a string representation
*/
public toString(): string {
return `${this.type} @ ${this.start_line} ${this.start_column}: ${this.message}`;
}