mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 12:30:09 -07:00
Refactored code for later migration to npm
This commit is contained in:
parent
92fc7445d5
commit
424cb59d6d
16
docs/bundle.min.js
vendored
16
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
379
src/generator.ts
379
src/generator.ts
@ -1,16 +1,26 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-vars */
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last } from "./utilities";
|
||||
/**
|
||||
* Includes all Concrete Syntax Trees for Human2Regex
|
||||
* @packageDocumentation
|
||||
*/
|
||||
|
||||
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag } from "./utilities";
|
||||
import { IToken } from "chevrotain";
|
||||
|
||||
export enum RobotLanguage {
|
||||
/**
|
||||
* List of regular expression dialects we support
|
||||
*/
|
||||
export enum RegexDialect {
|
||||
JS,
|
||||
Perl,
|
||||
DotNet,
|
||||
Java
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for all semantic errors
|
||||
*/
|
||||
export interface ISemanticError {
|
||||
startLine: number,
|
||||
startColumn: number,
|
||||
@ -18,16 +28,52 @@ export interface ISemanticError {
|
||||
message: string
|
||||
}
|
||||
|
||||
/**
|
||||
* The base concrete syntax tree class
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export abstract class H2RCST {
|
||||
public tokens: IToken[];
|
||||
|
||||
/**
|
||||
* Constructor for H2RCST
|
||||
*
|
||||
* @param tokens Tokens used to calculate where an error occured
|
||||
* @internal
|
||||
*/
|
||||
constructor(tokens: IToken[]) {
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
public abstract validate(language: RobotLanguage): ISemanticError[];
|
||||
public abstract toRegex(language: RobotLanguage): string;
|
||||
/**
|
||||
* Validate that this is both valid and can be generated in the specified language
|
||||
*
|
||||
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
||||
*
|
||||
* @param language the regex dialect we're validating
|
||||
* @returns A list of errors
|
||||
* @public
|
||||
*/
|
||||
public abstract validate(language: RegexDialect): ISemanticError[];
|
||||
|
||||
/**
|
||||
* Generate a regular expression fragment based on this syntax tree
|
||||
*
|
||||
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
||||
*
|
||||
* @param language the regex dialect we're generating
|
||||
* @returns a regular expression fragment
|
||||
* @public
|
||||
*/
|
||||
public abstract toRegex(language: RegexDialect): string;
|
||||
|
||||
/**
|
||||
* Creates an ISemanticError with a given message and the tokens provided from the constructor
|
||||
*
|
||||
* @param message the message
|
||||
* @internal
|
||||
*/
|
||||
protected error(message: string): ISemanticError {
|
||||
const f = first(this.tokens);
|
||||
const l = last(this.tokens);
|
||||
@ -41,16 +87,28 @@ export abstract class H2RCST {
|
||||
}
|
||||
}
|
||||
|
||||
/* eslint-disable no-bitwise */
|
||||
/**
|
||||
* Flags for the using statement
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export enum UsingFlags {
|
||||
Multiline = 1 << 0,
|
||||
Global = 1 << 1,
|
||||
Sensitive = 1 << 2,
|
||||
Insensitive = 1 << 3,
|
||||
Exact = 1 << 4
|
||||
Multiline = makeFlag(0),
|
||||
Global = makeFlag(1),
|
||||
Sensitive = makeFlag(2),
|
||||
Insensitive = makeFlag(3),
|
||||
Exact = makeFlag(4)
|
||||
}
|
||||
/* eslint-enable no-bitwise */
|
||||
|
||||
/**
|
||||
* Type of match arguments
|
||||
*
|
||||
* @remarks SingleString means an escaped string
|
||||
* @remarks Between means a range (ex. a-z)
|
||||
* @remarks Anything means .
|
||||
* @remarks Word, Digit, Character, Whitespace, Number, Tab, Linefeed, Newline, and Carriage return are \w+, \d, \w, \s, \d+, \t, \n, \n, \r respectively
|
||||
* @internal
|
||||
*/
|
||||
export enum MatchSubStatementType {
|
||||
SingleString,
|
||||
Between,
|
||||
@ -66,27 +124,73 @@ export enum MatchSubStatementType {
|
||||
CarriageReturn
|
||||
}
|
||||
|
||||
/**
|
||||
* Container for match statements
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export class MatchSubStatementValue {
|
||||
|
||||
/**
|
||||
* Constructor for MatchSubStatementValue
|
||||
*
|
||||
* @param type the type of this match
|
||||
* @param from optional range string
|
||||
* @param to optional range string
|
||||
* @internal
|
||||
*/
|
||||
constructor(public type: MatchSubStatementType, public from: string | null = null, public to: string | null = null) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Container for MatchStatementValue
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export class MatchStatementValue {
|
||||
|
||||
/**
|
||||
* Constructor for MatchStatementValue
|
||||
*
|
||||
* @param optional is this match optional
|
||||
* @param statement the substatement to generate
|
||||
* @internal
|
||||
*/
|
||||
constructor(public optional: boolean, public statement: MatchSubStatementCST) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The base class for all statement concrete syntax trees
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export abstract class StatementCST extends H2RCST {
|
||||
}
|
||||
|
||||
/**
|
||||
* Concrete Syntax Tree for Match Sub statements
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export class MatchSubStatementCST extends H2RCST {
|
||||
constructor(public tokens: IToken[], public count: CountSubStatementCST | null, public invert: boolean = false, public values: MatchSubStatementValue[]) {
|
||||
|
||||
/**
|
||||
* Constructor for MatchSubStatementCST
|
||||
*
|
||||
* @param tokens Tokens used to calculate where an error occured
|
||||
* @param count optional count statement
|
||||
* @param invert is this match inverted (ex, [^a-z] or [a-z])
|
||||
* @param values sub statements to match
|
||||
*/
|
||||
constructor(tokens: IToken[], private count: CountSubStatementCST | null, private invert: boolean = false, private values: MatchSubStatementValue[]) {
|
||||
super(tokens);
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): ISemanticError[] {
|
||||
public validate(language: RegexDialect): ISemanticError[] {
|
||||
let errors: ISemanticError[] = [];
|
||||
|
||||
if (this.count) {
|
||||
@ -121,7 +225,7 @@ export class MatchSubStatementCST extends H2RCST {
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
public toRegex(language: RegexDialect): string {
|
||||
const str: string[] = [];
|
||||
|
||||
for (const value of this.values) {
|
||||
@ -181,37 +285,33 @@ export class MatchSubStatementCST extends H2RCST {
|
||||
}
|
||||
|
||||
if (this.count) {
|
||||
if (this.count.from === 1 && this.count.to === null) {
|
||||
if (this.count.opt === "+") {
|
||||
ret += "+";
|
||||
}
|
||||
// if we only have a count of 1, we can ignore adding any extra text
|
||||
}
|
||||
else if (this.count.from === 0 && this.count.to === null) {
|
||||
if (this.count.opt === "+") {
|
||||
ret += "*";
|
||||
}
|
||||
else {
|
||||
// match 0 of anything? ok...
|
||||
ret = "";
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret += this.count.toRegex(language);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Concrete Syntax Tree for Using statements
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export class UsingStatementCST extends H2RCST {
|
||||
constructor(public tokens: IToken[], public flags: UsingFlags[]) {
|
||||
|
||||
/**
|
||||
* Constructor for UsingStatementCST
|
||||
*
|
||||
* @param tokens Tokens used to calculate where an error occured
|
||||
* @param flags using flags
|
||||
*/
|
||||
constructor(tokens: IToken[], private flags: UsingFlags[]) {
|
||||
super(tokens);
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): ISemanticError[] {
|
||||
public validate(language: RegexDialect): ISemanticError[] {
|
||||
unusedParameter(language, "Using Statement does not change based on language");
|
||||
|
||||
const errors: ISemanticError[] = [];
|
||||
let flag = this.flags[0];
|
||||
|
||||
@ -229,7 +329,9 @@ export class UsingStatementCST extends H2RCST {
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
public toRegex(language: RegexDialect): string {
|
||||
unusedParameter(language, "Using Statement does not change based on language");
|
||||
|
||||
let str = "";
|
||||
let exact = false;
|
||||
|
||||
@ -252,12 +354,27 @@ export class UsingStatementCST extends H2RCST {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Concrete Syntax Tree for Count sub statements
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export class CountSubStatementCST extends H2RCST {
|
||||
constructor(public tokens: IToken[], public from: number, public to: number | null = null, public opt: "inclusive" | "exclusive" | "+" | null = null) {
|
||||
/**
|
||||
* Constructor for CountSubStatementCST
|
||||
*
|
||||
* @param tokens Tokens used to calculate where an error occured
|
||||
* @param from number to count from
|
||||
* @param to optional number to count to
|
||||
* @param opt option modifier
|
||||
*/
|
||||
constructor(tokens: IToken[], private from: number, private to: number | null = null, private opt: "inclusive" | "exclusive" | "+" | null = null) {
|
||||
super(tokens);
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): ISemanticError[] {
|
||||
public validate(language: RegexDialect): ISemanticError[] {
|
||||
unusedParameter(language, "Count does not need checking");
|
||||
|
||||
const errors: ISemanticError[] = [];
|
||||
|
||||
if (this.from < 0) {
|
||||
@ -270,31 +387,56 @@ export class CountSubStatementCST extends H2RCST {
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
public toRegex(language: RegexDialect): string {
|
||||
unusedParameter(language, "Count does not change from language");
|
||||
|
||||
const from = this.from;
|
||||
let to = this.to;
|
||||
if (to !== null && this.opt === "exclusive") {
|
||||
to--;
|
||||
|
||||
|
||||
// if we only have a count of 1, we can ignore adding any extra text
|
||||
if (to === null) {
|
||||
if (from === 1) {
|
||||
return this.opt === "+" ? "+" : "*";
|
||||
}
|
||||
else if (from === 0) {
|
||||
return this.opt === "+" ? "*" : "";
|
||||
}
|
||||
}
|
||||
|
||||
if (to !== null) {
|
||||
if (this.opt === "exclusive") {
|
||||
to--;
|
||||
}
|
||||
return `{${from},${to}}`;
|
||||
}
|
||||
else if (this.opt === "+") {
|
||||
return `{${from},}`;
|
||||
}
|
||||
else {
|
||||
return `{${this.from}}`;
|
||||
return `{${from}}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Concrete Syntax Tree for a Match statement
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export class MatchStatementCST extends StatementCST {
|
||||
constructor(public tokens: IToken[], public matches: MatchStatementValue[]) {
|
||||
|
||||
/**
|
||||
* Constructor for MatchStatementCST
|
||||
*
|
||||
* @param tokens Tokens used to calculate where an error occured
|
||||
* @param matches
|
||||
*/
|
||||
constructor(tokens: IToken[], private matches: MatchStatementValue[]) {
|
||||
super(tokens);
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): ISemanticError[] {
|
||||
public validate(language: RegexDialect): ISemanticError[] {
|
||||
let errors: ISemanticError[] = [];
|
||||
|
||||
for (const match of this.matches) {
|
||||
@ -304,19 +446,33 @@ export class MatchStatementCST extends StatementCST {
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
public toRegex(language: RegexDialect): string {
|
||||
return this.matches.map((x) => {
|
||||
return x.statement.toRegex(language) + (x.optional ? "?" : "");
|
||||
}).join("");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Concrete Syntax Tree for a Repeat statement
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export class RepeatStatementCST extends StatementCST {
|
||||
constructor(public tokens: IToken[], public optional: boolean, public count: CountSubStatementCST | null, public statements: StatementCST[]) {
|
||||
|
||||
/**
|
||||
* Constructor for RepeatStatementCST
|
||||
*
|
||||
* @param tokens Tokens used to calculate where an error occured
|
||||
* @param optional is this repetition optional
|
||||
* @param count optional number of times to repeat
|
||||
* @param statements the statements to repeat
|
||||
*/
|
||||
constructor(tokens: IToken[], private optional: boolean, private count: CountSubStatementCST | null, private statements: StatementCST[]) {
|
||||
super(tokens);
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): ISemanticError[] {
|
||||
public validate(language: RegexDialect): ISemanticError[] {
|
||||
let errors: ISemanticError[] = [];
|
||||
|
||||
if (this.count !== null) {
|
||||
@ -330,67 +486,16 @@ export class RepeatStatementCST extends StatementCST {
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
public toRegex(language: RegexDialect): string {
|
||||
let str = "(" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
|
||||
|
||||
if (this.count !== null) {
|
||||
if (this.count.from === 1 && this.count.to === null) {
|
||||
if (this.count.opt === "+") {
|
||||
str += "+";
|
||||
}
|
||||
// if we only have a count of 1, we can ignore adding any extra text
|
||||
}
|
||||
else if (this.count.from === 0 && this.count.to === null) {
|
||||
if (this.count.opt === "+") {
|
||||
str += "*";
|
||||
}
|
||||
else {
|
||||
// match 0 of anything? ok...
|
||||
str = "";
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (this.count) {
|
||||
str += this.count.toRegex(language);
|
||||
}
|
||||
}
|
||||
else {
|
||||
str += "*";
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
export class GroupStatementCST extends StatementCST {
|
||||
constructor(public tokens: IToken[], public optional: boolean, public name: string | null, public statements: StatementCST[]) {
|
||||
super(tokens);
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): ISemanticError[] {
|
||||
let errors : ISemanticError[] = [];
|
||||
|
||||
if (language !== RobotLanguage.DotNet && language !== RobotLanguage.JS) {
|
||||
errors.push(this.error("This language does not support named groups"));
|
||||
}
|
||||
|
||||
for (const statement of this.statements) {
|
||||
errors = errors.concat(statement.validate(language));
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
let str = "(";
|
||||
|
||||
if (this.name !== null) {
|
||||
str += `?<${this.name}>`;
|
||||
}
|
||||
|
||||
str += this.statements.map((x) => x.toRegex(language)).join("");
|
||||
|
||||
str += ")";
|
||||
|
||||
if (this.optional) {
|
||||
str += "?";
|
||||
}
|
||||
@ -399,12 +504,77 @@ export class GroupStatementCST extends StatementCST {
|
||||
}
|
||||
}
|
||||
|
||||
export class RegularExpressionCST extends H2RCST {
|
||||
constructor(public tokens: IToken[], public usings: UsingStatementCST, public statements: StatementCST[]) {
|
||||
/**
|
||||
* Conrete Syntax Tree for a group Statement
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export class GroupStatementCST extends StatementCST {
|
||||
|
||||
/**
|
||||
* Constructor for GroupStatementCST
|
||||
*
|
||||
* @param tokens Tokens used to calculate where an error occured
|
||||
* @param optional is this group optional
|
||||
* @param name optional name for named group
|
||||
* @param statements other statements
|
||||
* @internal
|
||||
*/
|
||||
constructor(tokens: IToken[], private optional: boolean, private name: string | null, private statements: StatementCST[]) {
|
||||
super(tokens);
|
||||
}
|
||||
|
||||
public validate(language: RobotLanguage): ISemanticError[] {
|
||||
public validate(language: RegexDialect): ISemanticError[] {
|
||||
let errors : ISemanticError[] = [];
|
||||
|
||||
// All languages currently support named groups
|
||||
//if (false) {
|
||||
// errors.push(this.error("This language does not support named groups"));
|
||||
//}
|
||||
|
||||
for (const statement of this.statements) {
|
||||
errors = errors.concat(statement.validate(language));
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RegexDialect): string {
|
||||
let str = "(";
|
||||
|
||||
// named group
|
||||
if (this.name !== null) {
|
||||
str += `?<${this.name}>`;
|
||||
}
|
||||
|
||||
str += this.statements.map((x) => x.toRegex(language)).join("");
|
||||
|
||||
str += (this.optional ? ")?" : ")");
|
||||
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Concrete Syntax Tree for a regular expression
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export class RegularExpressionCST extends H2RCST {
|
||||
|
||||
/**
|
||||
* Constructor for RegularExpressionCST
|
||||
*
|
||||
* @param tokens Tokens used to calculate where an error occured
|
||||
* @param usings using statements
|
||||
* @param statements other statements
|
||||
* @internal
|
||||
*/
|
||||
constructor(tokens: IToken[], private usings: UsingStatementCST, private statements: StatementCST[]) {
|
||||
super(tokens);
|
||||
}
|
||||
|
||||
public validate(language: RegexDialect): ISemanticError[] {
|
||||
let errors: ISemanticError[] = this.usings.validate(language);
|
||||
|
||||
for (const statement of this.statements) {
|
||||
@ -413,11 +583,10 @@ export class RegularExpressionCST extends H2RCST {
|
||||
|
||||
return errors;
|
||||
}
|
||||
public toRegex(language: RobotLanguage): string {
|
||||
public toRegex(language: RegexDialect): string {
|
||||
const modifiers = this.usings.toRegex(language);
|
||||
const regex = this.statements.map((x) => x.toRegex(language)).join("");
|
||||
|
||||
return modifiers.replace("{regex}", regex);
|
||||
}
|
||||
|
||||
}
|
51
src/lexer.ts
51
src/lexer.ts
@ -1,27 +1,58 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
/**
|
||||
* The Lexer for Human2Regex
|
||||
* @packageDocumentation
|
||||
*/
|
||||
|
||||
import { Lexer, IToken, createTokenInstance, ILexingResult, ILexingError } from "chevrotain";
|
||||
import { last, findLastIndex } from "./utilities";
|
||||
import { Indent, Outdent, EndOfLine, AllTokens } from "./tokens";
|
||||
|
||||
/**
|
||||
* Defines the type of indents the lexer will allow
|
||||
*/
|
||||
export enum IndentType {
|
||||
Tabs,
|
||||
Spaces,
|
||||
Both
|
||||
}
|
||||
|
||||
/**
|
||||
* The options for the Lexer
|
||||
*/
|
||||
export class Human2RegexLexerOptions {
|
||||
|
||||
/**
|
||||
* Constructor for the Human2RegexLexerOptions
|
||||
*
|
||||
* @param skip_validations If true, the lexer will skip validations (~25% faster)
|
||||
* @param type The type of indents the lexer will allow
|
||||
* @param spaces_per_tab Number of spaces per tab
|
||||
*/
|
||||
constructor(public skip_validations = false, public type: IndentType = IndentType.Both, public spaces_per_tab: number = 4) {
|
||||
/* empty */
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Human2Regex Lexer
|
||||
*
|
||||
* @remarks Only 1 lexer instance allowed due to a technical limitation and performance reasons
|
||||
*/
|
||||
export class Human2RegexLexer {
|
||||
private static already_init = false;
|
||||
|
||||
private lexer!: Lexer;
|
||||
private options!: Human2RegexLexerOptions;
|
||||
|
||||
/**
|
||||
* Human2Regex Lexer
|
||||
*
|
||||
* @remarks Only 1 lexer instance allowed due to a technical limitation and performance reasons
|
||||
* @param options options for the lexer
|
||||
* @see Human2RegexLexerOptions
|
||||
*/
|
||||
constructor(options: Human2RegexLexerOptions = new Human2RegexLexerOptions()) {
|
||||
if (Human2RegexLexer.already_init) {
|
||||
throw new Error("Only 1 instance of Human2RegexLexer allowed");
|
||||
@ -32,11 +63,18 @@ export class Human2RegexLexer {
|
||||
this.setOptions(options);
|
||||
}
|
||||
|
||||
public setOptions(options: Human2RegexLexerOptions) : void {
|
||||
/**
|
||||
* Sets the options for this lexer
|
||||
*
|
||||
* @param options options for the lexer
|
||||
* @see Human2RegexLexerOptions
|
||||
*/
|
||||
public setOptions(options: Human2RegexLexerOptions): void {
|
||||
this.options = options;
|
||||
|
||||
let indent_regex: RegExp | null = null;
|
||||
|
||||
// Generate an index lexer (accepts tabs or spaces or both based on options)
|
||||
if (this.options.type === IndentType.Tabs) {
|
||||
indent_regex = /\t/y;
|
||||
}
|
||||
@ -65,6 +103,12 @@ export class Human2RegexLexer {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenizes the given text
|
||||
*
|
||||
* @param text the text to analyze
|
||||
* @returns a lexing result which contains the token stream and error list
|
||||
*/
|
||||
public tokenize(text: string) : ILexingResult {
|
||||
const lex_result = this.lexer.tokenize(text);
|
||||
|
||||
@ -72,7 +116,6 @@ export class Human2RegexLexer {
|
||||
return lex_result;
|
||||
}
|
||||
|
||||
// create Outdents
|
||||
const tokens: IToken[] = [];
|
||||
const indent_stack = [ 0 ];
|
||||
|
||||
@ -80,6 +123,7 @@ export class Human2RegexLexer {
|
||||
let start_of_line = true;
|
||||
let had_indents = false;
|
||||
|
||||
// create Outdents
|
||||
for (let i = 0; i < lex_result.tokens.length; i++) {
|
||||
|
||||
// EoL? check for indents next (by setting startOfLine = true)
|
||||
@ -117,13 +161,16 @@ export class Human2RegexLexer {
|
||||
// Ignore all indents AND newline
|
||||
// continue;
|
||||
}
|
||||
// new indent is too far ahead
|
||||
else if (!start_of_line || (curr_indent_level > last(indent_stack) + 1)) {
|
||||
lex_result.errors.push(this.lexError(start_token));
|
||||
}
|
||||
// new indent is just 1 above
|
||||
else if (curr_indent_level > last(indent_stack)) {
|
||||
indent_stack.push(curr_indent_level);
|
||||
tokens.push(start_token);
|
||||
}
|
||||
// new indent is below the past indent
|
||||
else if (curr_indent_level < last(indent_stack)) {
|
||||
const index = findLastIndex(indent_stack, curr_indent_level);
|
||||
|
||||
|
@ -1,11 +1,24 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
/**
|
||||
* The parser for Human2Regex
|
||||
* @packageDocumentation
|
||||
*/
|
||||
|
||||
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
|
||||
import * as T from "./tokens";
|
||||
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST } from "./generator";
|
||||
import { first } from "./utilities";
|
||||
import { first, usefulConditional, unusedParameter } from "./utilities";
|
||||
|
||||
/**
|
||||
* The options for the Parser
|
||||
*/
|
||||
export class Human2RegexParserOptions {
|
||||
/**
|
||||
* Constructor for Human2RegexParserOptions
|
||||
*
|
||||
* @param skip_validations If true, the lexer will skip validations (~25% faster)
|
||||
*/
|
||||
constructor(public skip_validations: boolean = false) {
|
||||
/* empty */
|
||||
}
|
||||
@ -22,6 +35,11 @@ class TokensAndValue<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The Parser class
|
||||
*
|
||||
* @remarks Only 1 parser instance allowed due to performance reasons
|
||||
*/
|
||||
export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
private static already_init = false;
|
||||
|
||||
@ -38,14 +56,17 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
|
||||
const $ = this;
|
||||
|
||||
// IN REGARDS TO KEEPING TOKENS:
|
||||
// We don't really need to keep each token, only the first and last tokens
|
||||
// This is due to the fact we calculate the difference between those tokens
|
||||
// However, sometimes we have optional starts and ends
|
||||
// Each optional near the start and end MUST be recorded because they may be the first/last token
|
||||
// ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match"
|
||||
/**
|
||||
* IN REGARDS TO KEEPING TOKENS:
|
||||
* We don't really need to keep each token, only the first and last tokens
|
||||
* This is due to the fact we calculate the difference between those tokens
|
||||
* However, sometimes we have optional starts and ends
|
||||
* Each optional near the start and end MUST be recorded because they may be the first/last token
|
||||
* ex) "optional match 3..." the start token is "optional", but "match 3..."'s start token is "match"
|
||||
* */
|
||||
|
||||
let nss_rules : IOrAlt<TokenAndValue<number>>[] | null = null;
|
||||
// number rules
|
||||
let nss_rules: IOrAlt<TokenAndValue<number>>[] | null = null;
|
||||
const NumberSubStatement = $.RULE("NumberSubStatement", () => {
|
||||
return $.OR(nss_rules || (nss_rules = [
|
||||
{ ALT: () => new TokenAndValue($.CONSUME(T.Zero), 0) },
|
||||
@ -69,6 +90,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
// 1, 1..2, between 1 and/to 2 inclusively/exclusively
|
||||
const CountSubStatement = $.RULE("CountSubStatement", () => {
|
||||
return $.OR([
|
||||
|
||||
// between 1 to 4
|
||||
{ ALT: () => {
|
||||
const tokens: IToken[] = [];
|
||||
|
||||
@ -97,6 +120,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
return new CountSubStatementCST(tokens, from.value, to.value, opt as "inclusive" | "exclusive" | null);
|
||||
}},
|
||||
|
||||
// from 1 to 4
|
||||
{ ALT: () => {
|
||||
const tokens: IToken[] = [];
|
||||
|
||||
@ -116,6 +140,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
return new CountSubStatementCST(tokens, from.value, to.value ? to.value[0] : null, to.value ? to.value[1] : null);
|
||||
}},
|
||||
|
||||
// exactly 2
|
||||
{ ALT: () => {
|
||||
const tokens: IToken[] = [];
|
||||
$.OPTION(() => tokens.push($.CONSUME(T.Exactly)));
|
||||
@ -126,27 +151,27 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
return new CountSubStatementCST(tokens, from.value);
|
||||
}}
|
||||
]);
|
||||
|
||||
|
||||
});
|
||||
|
||||
let mss_rules : IOrAlt<MatchSubStatementValue>[] | null = null;
|
||||
// match sub rules
|
||||
let mss_rules: IOrAlt<MatchSubStatementValue>[] | null = null;
|
||||
const MatchSubStatement = $.RULE("MatchSubStatement", () => {
|
||||
let count: CountSubStatementCST | null = null;
|
||||
let invert: boolean = false;
|
||||
const values: MatchSubStatementValue[] = [];
|
||||
let from : string | null = null;
|
||||
let to : string | null = null;
|
||||
let type : MatchSubStatementType = MatchSubStatementType.Anything;
|
||||
let from: string | null = null;
|
||||
let to: string | null = null;
|
||||
let type: MatchSubStatementType = MatchSubStatementType.Anything;
|
||||
|
||||
const tokens: IToken[] = [];
|
||||
|
||||
count = $.OPTION(() => {
|
||||
const css = $.SUBRULE(CountSubStatement);
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
if (css.tokens) {
|
||||
|
||||
if (usefulConditional(css.tokens, "due to how chevrotain works, the first run produces a null value")) {
|
||||
tokens.push(first(css.tokens));
|
||||
}
|
||||
|
||||
return css;
|
||||
});
|
||||
|
||||
@ -159,6 +184,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
DEF: () => {
|
||||
$.OPTION3(() => $.CONSUME(T.A));
|
||||
values.push($.OR(mss_rules || (mss_rules = [
|
||||
|
||||
// range [a-z]
|
||||
{ ALT: () => {
|
||||
$.OPTION4(() => $.CONSUME(T.From));
|
||||
from = $.CONSUME2(T.StringLiteral).image;
|
||||
@ -170,6 +197,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
|
||||
return new MatchSubStatementValue(type, from, to);
|
||||
}},
|
||||
|
||||
// range [a-z]
|
||||
{ ALT: () => {
|
||||
$.CONSUME(T.Between);
|
||||
from = $.CONSUME4(T.StringLiteral).image;
|
||||
@ -181,6 +210,8 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
|
||||
return new MatchSubStatementValue(type, from, to);
|
||||
}},
|
||||
|
||||
// exact string
|
||||
{ ALT: () => {
|
||||
const token = $.CONSUME(T.StringLiteral);
|
||||
tokens.push(token);
|
||||
@ -289,7 +320,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
});
|
||||
|
||||
// using global matching
|
||||
let us_rules : IOrAlt<UsingFlags>[] | null = null;
|
||||
let us_rules: IOrAlt<UsingFlags>[] | null = null;
|
||||
const UsingStatement = $.RULE("UsingStatement", () => {
|
||||
const usings: UsingFlags[] = [];
|
||||
|
||||
@ -327,12 +358,16 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
return new TokensAndValue(tokens, usings);
|
||||
});
|
||||
|
||||
// group rules
|
||||
const GroupStatement = $.RULE("GroupStatement", () => {
|
||||
const tokens: IToken[] = [];
|
||||
let optional = false;
|
||||
let name: string | null = null;
|
||||
const statement: StatementCST[] = [];
|
||||
|
||||
// position of optional must be OR'd because
|
||||
// otherwise it could appear twice
|
||||
// ex) optional? create an optional? group
|
||||
tokens.push($.OR([
|
||||
{ ALT: () => {
|
||||
optional = true;
|
||||
@ -371,10 +406,11 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
return new GroupStatementCST(tokens, optional, name, statement);
|
||||
});
|
||||
|
||||
// repeat rules
|
||||
const RepeatStatement = $.RULE("RepeatStatement", () => {
|
||||
const tokens: IToken[] = [];
|
||||
let optional = false;
|
||||
let count : CountSubStatementCST | null = null;
|
||||
let count: CountSubStatementCST | null = null;
|
||||
const statements: StatementCST[] = [];
|
||||
|
||||
$.OPTION3(() => {
|
||||
@ -393,6 +429,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
return new RepeatStatementCST(tokens, optional, count, statements);
|
||||
});
|
||||
|
||||
// statement super class
|
||||
const Statement = $.RULE("Statement", () => {
|
||||
return $.OR([
|
||||
{ ALT: () => $.SUBRULE(MatchStatement) },
|
||||
@ -401,6 +438,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
]);
|
||||
});
|
||||
|
||||
// full regex
|
||||
const Regex = $.RULE("Regex", () => {
|
||||
let tokens: IToken[] = [];
|
||||
let usings: UsingFlags[] = [];
|
||||
@ -421,7 +459,7 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
||||
this.parse = Regex;
|
||||
}
|
||||
|
||||
//public set_options(options: Human2RegexParserOptions) : void {
|
||||
// // empty so far
|
||||
//}
|
||||
public setOptions(options: Human2RegexParserOptions): void {
|
||||
unusedParameter(options, "skip_validations is not valid to change once we've already initialized");
|
||||
}
|
||||
}
|
@ -3,18 +3,17 @@
|
||||
|
||||
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
|
||||
import { Human2RegexParser, Human2RegexParserOptions } from "./parser";
|
||||
import { RobotLanguage } from "./generator";
|
||||
import { CommonError } from "./utilities";
|
||||
import { RegexDialect } from "./generator";
|
||||
import { CommonError, unusedParameter, usefulConditional } from "./utilities";
|
||||
import $ from "jquery";
|
||||
import CodeMirror from "codemirror/lib/codemirror";
|
||||
require("codemirror/mode/javascript/javascript");
|
||||
import "codemirror/mode/javascript/javascript";
|
||||
|
||||
import "./webpage/bootstrap.css";
|
||||
import "./webpage/cleanblog.css";
|
||||
import "codemirror/lib/codemirror.css";
|
||||
import "./webpage/style.css";
|
||||
|
||||
|
||||
$(function() {
|
||||
const total_errors: CommonError[] = [];
|
||||
const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true));
|
||||
@ -32,19 +31,19 @@ $(function() {
|
||||
|
||||
parser.errors.map(CommonError.fromParseError).forEach((x) => total_errors.push(x));
|
||||
|
||||
let lang: RobotLanguage = RobotLanguage.JS;
|
||||
let lang: RegexDialect = RegexDialect.JS;
|
||||
switch ($("#dialect option:selected").val()) {
|
||||
case "dotnet":
|
||||
lang = RobotLanguage.DotNet;
|
||||
lang = RegexDialect.DotNet;
|
||||
break;
|
||||
case "java":
|
||||
lang = RobotLanguage.Java;
|
||||
lang = RegexDialect.Java;
|
||||
break;
|
||||
case "perl":
|
||||
lang = RobotLanguage.Perl;
|
||||
lang = RegexDialect.Perl;
|
||||
break;
|
||||
default:
|
||||
lang = RobotLanguage.JS;
|
||||
lang = RegexDialect.JS;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -52,8 +51,7 @@ $(function() {
|
||||
|
||||
valid.map(CommonError.fromSemanticError).forEach((x) => total_errors.push(x));
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
if (total_errors.length === 0) {
|
||||
if (!usefulConditional(total_errors.length, "total_errors may have added an error")) {
|
||||
regex_result = regex.toRegex(lang);
|
||||
$("#regex").attr("value", regex_result);
|
||||
}
|
||||
@ -76,8 +74,9 @@ $(function() {
|
||||
});
|
||||
|
||||
$("#clip").on("click", () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
if (window.isSecureContext && navigator?.clipboard?.writeText) {
|
||||
if (window.isSecureContext &&
|
||||
usefulConditional(navigator.clipboard, "clipboard may be undefined") &&
|
||||
usefulConditional(navigator.clipboard.writeText, "writeText may be undefined")) {
|
||||
navigator.clipboard.writeText(regex_result);
|
||||
}
|
||||
else {
|
||||
@ -96,6 +95,9 @@ $(function() {
|
||||
});
|
||||
|
||||
editor.on("change", (instance: unknown, change_obj: unknown) => {
|
||||
unusedParameter(instance, "Instance is not required, we have a reference already");
|
||||
unusedParameter(change_obj, "Change is not required, we want the full value");
|
||||
|
||||
/* not empty */
|
||||
console.log(editor.getValue());
|
||||
});
|
||||
|
121
src/tokens.ts
121
src/tokens.ts
@ -1,55 +1,60 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
/**
|
||||
* The tokens required for Human2Regex
|
||||
* @packageDocumentation
|
||||
*/
|
||||
|
||||
import { createToken, Lexer } from "chevrotain";
|
||||
|
||||
export const Zero = createToken({name: "Zero", pattern: /zero/i });
|
||||
export const One = createToken({name: "One", pattern: /one/i });
|
||||
export const Two = createToken({name: "Two", pattern: /two/i });
|
||||
export const Three = createToken({name: "Three", pattern: /three/i });
|
||||
export const Four = createToken({name: "Four", pattern: /four/i });
|
||||
export const Five = createToken({name: "Five", pattern: /five/i });
|
||||
export const Six = createToken({name: "Six", pattern: /six/i });
|
||||
export const Seven = createToken({name: "Seven", pattern: /seven/i });
|
||||
export const Eight = createToken({name: "Eight", pattern: /eight/i });
|
||||
export const Nine = createToken({name: "Nine", pattern: /nine/i });
|
||||
export const Ten = createToken({name: "Ten", pattern: /ten/i });
|
||||
/** @internal */ export const Zero = createToken({name: "Zero", pattern: /zero/i });
|
||||
/** @internal */ export const One = createToken({name: "One", pattern: /one/i });
|
||||
/** @internal */ export const Two = createToken({name: "Two", pattern: /two/i });
|
||||
/** @internal */ export const Three = createToken({name: "Three", pattern: /three/i });
|
||||
/** @internal */ export const Four = createToken({name: "Four", pattern: /four/i });
|
||||
/** @internal */ export const Five = createToken({name: "Five", pattern: /five/i });
|
||||
/** @internal */ export const Six = createToken({name: "Six", pattern: /six/i });
|
||||
/** @internal */ export const Seven = createToken({name: "Seven", pattern: /seven/i });
|
||||
/** @internal */ export const Eight = createToken({name: "Eight", pattern: /eight/i });
|
||||
/** @internal */ export const Nine = createToken({name: "Nine", pattern: /nine/i });
|
||||
/** @internal */ export const Ten = createToken({name: "Ten", pattern: /ten/i });
|
||||
|
||||
export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
|
||||
export const Match = createToken({name: "Match", pattern: /match(es)?/i });
|
||||
export const Then = createToken({name: "Then", pattern: /then/i });
|
||||
export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
|
||||
export const Or = createToken({name: "Or", pattern: /or/i});
|
||||
export const And = createToken({name: "And", pattern: /and|,/i});
|
||||
export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
|
||||
export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
|
||||
export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
|
||||
export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
|
||||
export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
|
||||
export const Using = createToken({name: "Using", pattern: /using/i});
|
||||
export const Global = createToken({name: "Global", pattern: /global/i});
|
||||
export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
|
||||
export const Exact = createToken({name: "Exact", pattern: /exact/i});
|
||||
export const Matching = createToken({name: "Matching", pattern: /matching/i});
|
||||
export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
|
||||
export const Between = createToken({name: "Between", pattern: /between/i});
|
||||
export const Tab = createToken({name: "Tab", pattern: /tab/i});
|
||||
export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
|
||||
export const Group = createToken({name: "Group", pattern: /group/i});
|
||||
export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
|
||||
export const Times = createToken({name: "Times", pattern: /times/i });
|
||||
export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
|
||||
export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
|
||||
export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
|
||||
export const From = createToken({name: "From", pattern: /from/i});
|
||||
export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.|\.\.\.)/i});
|
||||
export const Create = createToken({name: "Create", pattern: /create(s)?/i});
|
||||
export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i});
|
||||
export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
|
||||
export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
|
||||
export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
|
||||
export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
|
||||
export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
|
||||
export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i });
|
||||
/** @internal */ export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
|
||||
/** @internal */ export const Match = createToken({name: "Match", pattern: /match(es)?/i });
|
||||
/** @internal */ export const Then = createToken({name: "Then", pattern: /then/i });
|
||||
/** @internal */ export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
|
||||
/** @internal */ export const Or = createToken({name: "Or", pattern: /or/i});
|
||||
/** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
|
||||
/** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
|
||||
/** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
|
||||
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
|
||||
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
|
||||
/** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
|
||||
/** @internal */ export const Using = createToken({name: "Using", pattern: /using/i});
|
||||
/** @internal */ export const Global = createToken({name: "Global", pattern: /global/i});
|
||||
/** @internal */ export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
|
||||
/** @internal */ export const Exact = createToken({name: "Exact", pattern: /exact/i});
|
||||
/** @internal */ export const Matching = createToken({name: "Matching", pattern: /matching/i});
|
||||
/** @internal */ export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
|
||||
/** @internal */ export const Between = createToken({name: "Between", pattern: /between/i});
|
||||
/** @internal */ export const Tab = createToken({name: "Tab", pattern: /tab/i});
|
||||
/** @internal */ export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
|
||||
/** @internal */ export const Group = createToken({name: "Group", pattern: /group/i});
|
||||
/** @internal */ export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
|
||||
/** @internal */ export const Times = createToken({name: "Times", pattern: /times/i });
|
||||
/** @internal */ export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
|
||||
/** @internal */ export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
|
||||
/** @internal */ export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
|
||||
/** @internal */ export const From = createToken({name: "From", pattern: /from/i});
|
||||
/** @internal */ export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.|\.\.\.)/i});
|
||||
/** @internal */ export const Create = createToken({name: "Create", pattern: /create(s)?/i});
|
||||
/** @internal */ export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i});
|
||||
/** @internal */ export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
|
||||
/** @internal */ export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
|
||||
/** @internal */ export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
|
||||
/** @internal */ export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
|
||||
/** @internal */ export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
|
||||
/** @internal */ export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i });
|
||||
|
||||
/*
|
||||
//Not being used currently
|
||||
@ -72,18 +77,22 @@ export const By = createToken({name: "By", pattern: /by/i});
|
||||
*/
|
||||
|
||||
|
||||
export const EndOfLine = createToken({name: "EOL", pattern: /\n/ });
|
||||
export const WS = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
|
||||
export const SingleLineComment = createToken({name: "SingleLineComment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
|
||||
export const MultilineComment = createToken({name: "MultiLineComment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
|
||||
/** @internal */ export const EndOfLine = createToken({name: "EOL", pattern: /\n/ });
|
||||
/** @internal */ export const WS = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
|
||||
/** @internal */ export const SingleLineComment = createToken({name: "SingleLineComment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
|
||||
/** @internal */ export const MultilineComment = createToken({name: "MultiLineComment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
|
||||
|
||||
export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
|
||||
export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?\d+/ });
|
||||
export const StringLiteral = createToken({name: "StringLiteral", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
|
||||
/** @internal */ export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
|
||||
/** @internal */ export const NumberLiteral = createToken({name: "NumberLiteral", pattern: /-?\d+/ });
|
||||
/** @internal */ export const StringLiteral = createToken({name: "StringLiteral", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
|
||||
|
||||
export const Indent = createToken({name: "Indent"});
|
||||
export const Outdent = createToken({name: "Outdent"});
|
||||
/** @internal */ export const Indent = createToken({name: "Indent"});
|
||||
/** @internal */ export const Outdent = createToken({name: "Outdent"});
|
||||
|
||||
/**
|
||||
* All the tokens used
|
||||
* @internal
|
||||
*/
|
||||
export const AllTokens = [
|
||||
Zero,
|
||||
One,
|
||||
|
162
src/utilities.ts
162
src/utilities.ts
@ -1,18 +1,93 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
/**
|
||||
* Some utility functions for Human2Regex
|
||||
* @packageDocumentation
|
||||
*/
|
||||
|
||||
import { ISemanticError } from "./generator";
|
||||
import { IRecognitionException, ILexingError } from "chevrotain";
|
||||
|
||||
/* eslint-disable no-bitwise */
|
||||
export function hasFlag(a: number, b: number) : boolean {
|
||||
return (a & b) !== 0;
|
||||
|
||||
/**
|
||||
* The following section is used because the linter is set up to warn about certain operations
|
||||
* and for good reason! I'd much rather have these functions than accidently use bitwise operations, or
|
||||
* create a bunch of usless conditionals
|
||||
* Plus, it signifies exactly what you wish to do (ex, calling hasFlag means you want to check if the
|
||||
* bitpattern matches a given flag)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Fixes linter warnings about unused variables, however requires a reason why it's unused
|
||||
*
|
||||
* @param value the value you want to specify that is unused
|
||||
* @param reason the reason this value is required but unused in this context
|
||||
* @internal
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
export function unusedParameter<T>(value: T, reason: string): void {
|
||||
/* empty on purpose */
|
||||
}
|
||||
|
||||
export function combineFlags(a: number, b: number): number {
|
||||
return (a | b);
|
||||
/**
|
||||
* Fixes linter warnings about useless conditionals, however requires a reason why it's useless
|
||||
*
|
||||
* @param conditional the supposedly useless conditional
|
||||
* @param reason the reason this value is required but considered useless
|
||||
* @internal
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
export function usefulConditional<T>(conditional: boolean | T, reason: string): boolean {
|
||||
return Boolean(conditional);
|
||||
}
|
||||
|
||||
|
||||
/* eslint-disable no-bitwise */
|
||||
/**
|
||||
* Generates a bitwise flag based on the value provided
|
||||
*
|
||||
* @param value the number of bits to shift
|
||||
* @returns 1 << value
|
||||
* @internal
|
||||
*/
|
||||
export function makeFlag(value: number): number {
|
||||
return 1 << value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if value has the given flag
|
||||
*
|
||||
* @param value First flag to compare
|
||||
* @param flag Second flag to compare
|
||||
* @returns value & flag
|
||||
* @internal
|
||||
*/
|
||||
export function hasFlag(value: number, flag: number): boolean {
|
||||
return (value & flag) !== 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the flag to the value
|
||||
*
|
||||
* @param value First flag
|
||||
* @param flag Second flag
|
||||
* @returns value | flag
|
||||
* @internal
|
||||
*/
|
||||
export function combineFlags(value: number, flag: number): number {
|
||||
return (value | flag);
|
||||
}
|
||||
/* eslint-enable no-bitwise */
|
||||
|
||||
/**
|
||||
* Checks to see if the character is a single regex character
|
||||
*
|
||||
* @remarks unicode and escape characters count as a single character
|
||||
*
|
||||
* @param char the character to check
|
||||
* @returns if the value is exactly 1 character
|
||||
* @internal
|
||||
*/
|
||||
export function isSingleRegexCharacter(char: string): boolean {
|
||||
return (char.startsWith("\\u") && char.length === 6) ||
|
||||
(char.startsWith("\\U") && char.length === 8) ||
|
||||
@ -20,14 +95,38 @@ export function isSingleRegexCharacter(char: string): boolean {
|
||||
char.length === 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first element of an array
|
||||
* @remarks does not validate if array has any elements
|
||||
*
|
||||
* @param array an array
|
||||
* @returns first element of an array
|
||||
* @internal
|
||||
*/
|
||||
export function first<T>(array: T[]): T {
|
||||
return array[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the last element of an array
|
||||
* @remarks does not validate if array has any elements
|
||||
*
|
||||
* @param array an array
|
||||
* @returns last element of an array
|
||||
* @internal
|
||||
*/
|
||||
export function last<T>(array: T[]): T {
|
||||
return array[array.length-1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the last index of a given value in an array
|
||||
*
|
||||
* @param array an array
|
||||
* @param value the value to find
|
||||
* @returns an index if found or -1 if not found
|
||||
* @internal
|
||||
*/
|
||||
export function findLastIndex<T>(array: T[], value: T): number {
|
||||
for (let index = array.length-1; index >= 0; index--) {
|
||||
if (array[index] === value) {
|
||||
@ -37,40 +136,71 @@ export function findLastIndex<T>(array: T[], value: T): number {
|
||||
return -1;
|
||||
}
|
||||
|
||||
export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean): number {
|
||||
for (let index = array.length-1; index >= 0; index--) {
|
||||
if (predicate(array[index])) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes start and end quotes from a string
|
||||
*
|
||||
* @param input the string to remove quotes from
|
||||
* @returns a string without quote characters
|
||||
* @internal
|
||||
*/
|
||||
export function removeQuotes(input: string): string {
|
||||
return input.substring(1, input.length-1);
|
||||
}
|
||||
|
||||
export function regexEscape(input: string) : string {
|
||||
/**
|
||||
* Escapes a string so it may be used literally in a regular expression
|
||||
*
|
||||
* @param input the string to escape
|
||||
* @returns a regex escaped string
|
||||
* @internal
|
||||
*/
|
||||
export function regexEscape(input: string): string {
|
||||
return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
|
||||
}
|
||||
|
||||
/**
|
||||
* Common Error class that encapsulates information from the lexer, parser, and generator
|
||||
*/
|
||||
export class CommonError {
|
||||
constructor(public type: string, public start_line: number, public start_column: number, public length: number, public message: string) {
|
||||
private constructor(public type: string, public start_line: number, public start_column: number, public length: number, public message: string) {
|
||||
/* empty */
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a common error from a lexing error
|
||||
*
|
||||
* @param error The lexing error
|
||||
* @returns a new CommonError
|
||||
*/
|
||||
public static fromLexError(error: ILexingError): CommonError {
|
||||
return new CommonError("Lexer Error", error.line, error.column, error.length, error.message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a common error from a parsing error
|
||||
*
|
||||
* @param error The parsing error
|
||||
* @returns a new CommonError
|
||||
*/
|
||||
public static fromParseError(error: IRecognitionException): CommonError {
|
||||
return new CommonError("Parser Error", error.token.startLine ?? NaN, error.token.startColumn ?? NaN, error.token.endOffset ?? NaN - error.token.startOffset, error.name + ": " + error.message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a common error from a semantic error
|
||||
*
|
||||
* @param error The semantic error
|
||||
* @returns a new CommonError
|
||||
*/
|
||||
public static fromSemanticError(error: ISemanticError): CommonError {
|
||||
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a string representation of a CommonError
|
||||
*
|
||||
* @returns a string representation
|
||||
*/
|
||||
public toString(): string {
|
||||
return `${this.type} @ ${this.start_line} ${this.start_column}: ${this.message}`;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user