1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 04:20:35 -07:00

Merge branch 'new-features' into dependabot/npm_and_yarn/node-notifier-8.0.1

This commit is contained in:
Patrick Demian 2021-01-03 04:04:47 -05:00 committed by GitHub
commit ed89a2995a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 1517 additions and 566 deletions

View File

@ -81,6 +81,7 @@ The API reference is available [here](API.md)
## Todo
- Add more regex options such as back references, subroutines, lookahead/behind, and more character classes (eg, `[:alpha:]`)
- Add more regex options such as subroutines, conditions, and lookahead/behind
- Fix error messages (They sometimes point to the wrong location, off by 1 errors, etc)
- Add more useful lex/parse errors (What even is an EarlyExitException?)
- Use a different/better static site generation method

15
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

140
lib/generator.d.ts vendored
View File

@ -21,29 +21,45 @@ export interface ISemanticError {
message: string;
}
/**
* The base concrete syntax tree class
* Context for validation
*
* @remarks Currently only used to validate groups
* @internal
*/
export declare abstract class H2RCST {
tokens: IToken[];
export declare class GeneratorContext {
groups: {
[key: string]: {
startLine: number;
startColumn: number;
length: number;
};
};
/**
* Constructor for H2RCST
* Checks to see if we already have a group defined
*
* @param tokens Tokens used to calculate where an error occured
* @internal
* @param identifier the group name
* @returns true if the group name already exists
*/
constructor(tokens: IToken[]);
hasGroup(identifier: string): boolean;
/**
* Adds the identifier to the group list
*
* @param identifier the group name
*/
addGroup(identifier: string, tokens: IToken[]): void;
}
interface Generates {
/**
* Validate that this is both valid and can be generated in the specified language
*
* @remarks There is no guarantee toRegex will work unless validate returns no errors
*
* @param language the regex dialect we're validating
* @param context the generator context
* @returns A list of errors
* @public
*/
abstract validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
/**
* Generate a regular expression fragment based on this syntax tree
*
@ -53,6 +69,23 @@ export declare abstract class H2RCST {
* @returns a regular expression fragment
* @public
*/
toRegex(language: RegexDialect): string;
}
/**
* The base concrete syntax tree class
*
* @internal
*/
export declare abstract class H2RCST implements Generates {
tokens: IToken[];
/**
* Constructor for H2RCST
*
* @param tokens Tokens used to calculate where an error occured
* @internal
*/
constructor(tokens: IToken[]);
abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
abstract toRegex(language: RegexDialect): string;
/**
* Creates an ISemanticError with a given message and the tokens provided from the constructor
@ -126,7 +159,7 @@ export declare class MatchSubStatementValue {
*
* @internal
*/
export declare class MatchStatementValue {
export declare class MatchStatementValue implements Generates {
optional: boolean;
statement: MatchSubStatementCST;
/**
@ -137,6 +170,8 @@ export declare class MatchStatementValue {
* @internal
*/
constructor(optional: boolean, statement: MatchSubStatementCST);
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
* The base class for all statement concrete syntax trees
@ -163,7 +198,7 @@ export declare class MatchSubStatementCST extends H2RCST {
* @param values sub statements to match
*/
constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]);
validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
@ -180,7 +215,7 @@ export declare class UsingStatementCST extends H2RCST {
* @param flags using flags
*/
constructor(tokens: IToken[], flags: UsingFlags[]);
validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
@ -201,7 +236,7 @@ export declare class CountSubStatementCST extends H2RCST {
* @param opt option modifier
*/
constructor(tokens: IToken[], from: number, to?: number | null, opt?: "inclusive" | "exclusive" | "+" | null);
validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
@ -216,10 +251,10 @@ export declare class MatchStatementCST extends StatementCST {
* Constructor for MatchStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param matches
* @param matches the list of matches
*/
constructor(tokens: IToken[], completely_optional: boolean, matches: MatchStatementValue[]);
validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
@ -240,7 +275,7 @@ export declare class RepeatStatementCST extends StatementCST {
* @param statements the statements to repeat
*/
constructor(tokens: IToken[], optional: boolean, count: CountSubStatementCST | null, statements: StatementCST[]);
validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
@ -262,7 +297,70 @@ export declare class GroupStatementCST extends StatementCST {
* @internal
*/
constructor(tokens: IToken[], optional: boolean, name: string | null, statements: StatementCST[]);
validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
* Concrete Syntax Tree for a Backreference statement
*
* @internal
*/
export declare class BackrefStatementCST extends StatementCST {
private optional;
private count;
private name;
/**
* Constructor for BackrefStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param optional is this backref optional
* @param count optional number of times to repeat
* @param name the group name to call
*/
constructor(tokens: IToken[], optional: boolean, count: CountSubStatementCST | null, name: string);
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
* Concrete Syntax Tree for an If Pattern statement
*
* @internal
*/
export declare class IfPatternStatementCST extends StatementCST {
private matches;
private true_statements;
private false_statements;
/**
* Constructor for IfPatternStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param matches list of matches to test against
* @param true_statements true path
* @param false_statements false path
*/
constructor(tokens: IToken[], matches: MatchStatementValue[], true_statements: StatementCST[], false_statements: StatementCST[]);
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
* Concrete Syntax Tree for an If group Ident statement
*
* @internal
*/
export declare class IfIdentStatementCST extends StatementCST {
private identifier;
private true_statements;
private false_statements;
/**
* Constructor for IfIdentStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param identifier the group identifier to check
* @param true_statements true path
* @param false_statements false path
*/
constructor(tokens: IToken[], identifier: string, true_statements: StatementCST[], false_statements: StatementCST[]);
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
@ -282,13 +380,7 @@ export declare class RegularExpressionCST extends H2RCST {
* @internal
*/
constructor(tokens: IToken[], usings: UsingStatementCST, statements: StatementCST[]);
validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
}
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array of matches
* @internal
*/
export declare function minimizeMatchString(arr: string[]): string;
export {};

View File

@ -1,12 +1,13 @@
"use strict";
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true });
exports.minimizeMatchString = exports.RegularExpressionCST = exports.GroupStatementCST = exports.RepeatStatementCST = exports.MatchStatementCST = exports.CountSubStatementCST = exports.UsingStatementCST = exports.MatchSubStatementCST = exports.StatementCST = exports.MatchStatementValue = exports.MatchSubStatementValue = exports.MatchSubStatementType = exports.UsingFlags = exports.H2RCST = exports.RegexDialect = void 0;
exports.RegularExpressionCST = exports.IfIdentStatementCST = exports.IfPatternStatementCST = exports.BackrefStatementCST = exports.GroupStatementCST = exports.RepeatStatementCST = exports.MatchStatementCST = exports.CountSubStatementCST = exports.UsingStatementCST = exports.MatchSubStatementCST = exports.StatementCST = exports.MatchStatementValue = exports.MatchSubStatementValue = exports.MatchSubStatementType = exports.UsingFlags = exports.H2RCST = exports.GeneratorContext = exports.RegexDialect = void 0;
/**
* Includes all Concrete Syntax Trees for Human2Regex
* @packageDocumentation
*/
const utilities_1 = require("./utilities");
const generator_helper_1 = require("./generator_helper");
/**
* List of regular expression dialects we support
*/
@ -49,6 +50,42 @@ const unicode_script_codes = [
"Tai_Tham", "Tai_Viet", "Takri", "Tamil", "Telugu", "Thaana", "Thai",
"Tibetan", "Tifinagh", "Ugaritic", "Vai", "Yi"
];
/**
* Context for validation
*
* @remarks Currently only used to validate groups
* @internal
*/
class GeneratorContext {
constructor() {
this.groups = {};
}
/**
* Checks to see if we already have a group defined
*
* @param identifier the group name
* @returns true if the group name already exists
*/
hasGroup(identifier) {
return Object.prototype.hasOwnProperty.call(this.groups, identifier);
}
/**
* Adds the identifier to the group list
*
* @param identifier the group name
*/
addGroup(identifier, tokens) {
var _a, _b, _c;
const f = utilities_1.first(tokens);
const l = utilities_1.last(tokens);
this.groups[identifier] = {
startLine: (_a = f.startLine) !== null && _a !== void 0 ? _a : NaN,
startColumn: (_b = f.startColumn) !== null && _b !== void 0 ? _b : NaN,
length: ((_c = l.endOffset) !== null && _c !== void 0 ? _c : l.startOffset) - f.startOffset,
};
}
}
exports.GeneratorContext = GeneratorContext;
/**
* The base concrete syntax tree class
*
@ -166,6 +203,17 @@ class MatchStatementValue {
this.statement = statement;
/* empty */
}
validate(language, context) {
return this.statement.validate(language, context);
}
toRegex(language) {
let match_stmt = this.statement.toRegex(language);
// need to group if optional and ungrouped
if (this.optional) {
match_stmt = generator_helper_1.groupIfRequired(match_stmt) + "?";
}
return match_stmt;
}
}
exports.MatchStatementValue = MatchStatementValue;
/**
@ -196,10 +244,10 @@ class MatchSubStatementCST extends H2RCST {
this.invert = invert;
this.values = values;
}
validate(language) {
validate(language, context) {
const errors = [];
if (this.count) {
utilities_1.append(errors, this.count.validate(language));
utilities_1.append(errors, this.count.validate(language, context));
}
for (const value of this.values) {
if (value.type === MatchSubStatementType.Between) {
@ -311,50 +359,15 @@ class MatchSubStatementCST extends H2RCST {
break;
}
}
let ret = "";
let require_grouping = false;
let dont_clobber_plus = false;
if (matches.length === 1) {
ret = utilities_1.first(matches);
if (ret.endsWith("+")) {
dont_clobber_plus = true;
}
}
else {
ret = minimizeMatchString(matches);
if (ret.length > 1 &&
(!ret.startsWith("(") || !ret.endsWith("["))) {
require_grouping = true;
}
}
let ret = generator_helper_1.minimizeMatchString(matches);
if (this.count) {
if (dont_clobber_plus) {
const clobber = this.count.toRegex(language);
// + can be ignored as well as a count as long as that count is > 0
switch (clobber) {
case "*":
case "?":
ret = "(?:" + ret + ")" + clobber;
break;
case "+":
// ignore
break;
default:
if (clobber.startsWith("{0")) {
ret = "(?:" + ret + ")" + clobber;
}
else {
// remove + and replace with count
ret.substring(0, ret.length - 1) + clobber;
}
break;
}
if (matches.length === 1) {
// we don't group if there's only 1 element
// but we need to make sure we don't add an additional + or *
ret = generator_helper_1.dontClobberRepetition(ret, this.count.toRegex(language));
}
else {
if (require_grouping) {
ret = "(?:" + ret + ")";
}
ret += this.count.toRegex(language);
ret = generator_helper_1.groupIfRequired(ret) + this.count.toRegex(language);
}
}
return ret;
@ -377,8 +390,9 @@ class UsingStatementCST extends H2RCST {
super(tokens);
this.flags = flags;
}
validate(language) {
utilities_1.unusedParameter(language, "Using Statement does not change based on language");
validate(language, context) {
utilities_1.unusedParameter(language, "Count does not need checking");
utilities_1.unusedParameter(context, "Context is not needed");
const errors = [];
let flag = this.flags[0];
for (let i = 1; i < this.flags.length; i++) {
@ -434,13 +448,11 @@ class CountSubStatementCST extends H2RCST {
this.to = to;
this.opt = opt;
}
validate(language) {
validate(language, context) {
utilities_1.unusedParameter(language, "Count does not need checking");
utilities_1.unusedParameter(context, "Context is not needed");
const errors = [];
if (this.from < 0) {
errors.push(this.error("Value cannot be negative"));
}
else if (this.to !== null && ((this.opt === "exclusive" && (this.to - 1) <= this.from) || this.to <= this.from)) {
if (this.to !== null && ((this.opt === "exclusive" && (this.to - 1) <= this.from) || this.to <= this.from)) {
errors.push(this.error("Values must be in range of eachother"));
}
return errors;
@ -483,43 +495,24 @@ class MatchStatementCST extends StatementCST {
* Constructor for MatchStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param matches
* @param matches the list of matches
*/
constructor(tokens, completely_optional, matches) {
super(tokens);
this.completely_optional = completely_optional;
this.matches = matches;
}
validate(language) {
validate(language, context) {
const errors = [];
for (const match of this.matches) {
utilities_1.append(errors, match.statement.validate(language));
utilities_1.append(errors, match.statement.validate(language, context));
}
return errors;
}
toRegex(language) {
let final_matches = this.matches.map((x) => {
let match_stmt = x.statement.toRegex(language);
// need to group if optional and ungrouped
if (x.optional) {
if (!utilities_1.isSingleRegexCharacter(match_stmt)) {
// don't re-group a group
if (match_stmt[0] !== "(" && match_stmt[match_stmt.length - 1] !== ")") {
match_stmt = "(?:" + match_stmt + ")";
}
}
match_stmt += "?";
}
return match_stmt;
}).join("");
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
if (this.completely_optional) {
if (!utilities_1.isSingleRegexCharacter(final_matches)) {
// don't re-group a group
if (final_matches[0] !== "(" && final_matches[final_matches.length - 1] !== ")") {
final_matches = "(?:" + final_matches + ")";
}
}
final_matches += "?";
final_matches = generator_helper_1.groupIfRequired(final_matches) + "?";
}
return final_matches;
}
@ -545,18 +538,18 @@ class RepeatStatementCST extends StatementCST {
this.count = count;
this.statements = statements;
}
validate(language) {
validate(language, context) {
const errors = [];
if (this.count !== null) {
utilities_1.append(errors, this.count.validate(language));
utilities_1.append(errors, this.count.validate(language, context));
}
for (const statement of this.statements) {
utilities_1.append(errors, statement.validate(language));
utilities_1.append(errors, statement.validate(language, context));
}
return errors;
}
toRegex(language) {
let str = "(?:" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
if (this.count) {
str += this.count.toRegex(language);
// group for optionality because count would be incorrect otherwise
@ -595,14 +588,19 @@ class GroupStatementCST extends StatementCST {
this.name = name;
this.statements = statements;
}
validate(language) {
validate(language, context) {
const errors = [];
// All languages currently support named groups
//if (false) {
// errors.push(this.error("This language does not support named groups"));
//}
if (this.name !== null) {
if (context.hasGroup(this.name)) {
const past_group = context.groups[this.name];
errors.push(this.error(`Group with name "${this.name}" was already defined here: ${past_group.startLine}:${past_group.startLine}-${past_group.startLine}:${past_group.startLine + past_group.length}`));
}
else {
context.addGroup(this.name, this.tokens);
}
}
for (const statement of this.statements) {
utilities_1.append(errors, statement.validate(language));
utilities_1.append(errors, statement.validate(language, context));
}
return errors;
}
@ -623,6 +621,169 @@ class GroupStatementCST extends StatementCST {
}
}
exports.GroupStatementCST = GroupStatementCST;
/**
* Concrete Syntax Tree for a Backreference statement
*
* @internal
*/
class BackrefStatementCST extends StatementCST {
/**
* Constructor for BackrefStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param optional is this backref optional
* @param count optional number of times to repeat
* @param name the group name to call
*/
constructor(tokens, optional, count, name) {
super(tokens);
this.optional = optional;
this.count = count;
this.name = name;
}
validate(language, context) {
const errors = [];
if (!context.hasGroup(this.name)) {
errors.push(this.error(`Cannot call group with name "${this.name}" as it was never previously defined`));
}
if (this.count !== null) {
utilities_1.append(errors, this.count.validate(language, context));
}
return errors;
}
toRegex(language) {
let str = "";
switch (language) {
case RegexDialect.Python:
str = `(?P=${this.name})`;
break;
case RegexDialect.DotNet:
case RegexDialect.Java:
str = `\\k<${this.name}>`;
break;
default:
str = `\\g<${this.name}>`;
break;
}
if (this.count) {
str += this.count.toRegex(language);
// group for optionality because count would be incorrect otherwise
if (this.optional) {
str = "(?:" + str + ")?";
}
}
else if (this.optional) {
str = "?";
}
return str;
}
}
exports.BackrefStatementCST = BackrefStatementCST;
/**
* Concrete Syntax Tree for an If Pattern statement
*
* @internal
*/
class IfPatternStatementCST extends StatementCST {
/**
* Constructor for IfPatternStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param matches list of matches to test against
* @param true_statements true path
* @param false_statements false path
*/
constructor(tokens, matches, true_statements, false_statements) {
super(tokens);
this.matches = matches;
this.true_statements = true_statements;
this.false_statements = false_statements;
}
validate(language, context) {
const errors = [];
if (language === RegexDialect.Java || language === RegexDialect.JS) {
errors.push(this.error("This language does not support conditionals"));
}
if (language === RegexDialect.Python) {
errors.push(this.error("This language does not support pattern conditionals"));
}
for (const match of this.matches) {
utilities_1.append(errors, match.validate(language, context));
}
for (const statement of this.true_statements) {
utilities_1.append(errors, statement.validate(language, context));
}
for (const statement of this.false_statements) {
utilities_1.append(errors, statement.validate(language, context));
}
return errors;
}
toRegex(language) {
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
if (this.false_statements.length > 0) {
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
}
else {
return `(?(${if_stmt})${true_stmt})`;
}
}
}
exports.IfPatternStatementCST = IfPatternStatementCST;
/**
* Concrete Syntax Tree for an If group Ident statement
*
* @internal
*/
class IfIdentStatementCST extends StatementCST {
/**
* Constructor for IfIdentStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param identifier the group identifier to check
* @param true_statements true path
* @param false_statements false path
*/
constructor(tokens, identifier, true_statements, false_statements) {
super(tokens);
this.identifier = identifier;
this.true_statements = true_statements;
this.false_statements = false_statements;
}
validate(language, context) {
const errors = [];
if (language === RegexDialect.Java || language === RegexDialect.JS) {
errors.push(this.error("This language does not support conditionals"));
}
if (!context.hasGroup(this.identifier)) {
errors.push(this.error(`Group with name "${this.identifier}" does not exist`));
}
for (const statement of this.true_statements) {
utilities_1.append(errors, statement.validate(language, context));
}
for (const statement of this.false_statements) {
utilities_1.append(errors, statement.validate(language, context));
}
return errors;
}
toRegex(language) {
let if_stmt = this.identifier;
// be more clear with languages that support it
if (language === RegexDialect.Boost) {
if_stmt = "<" + if_stmt + ">";
}
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
if (this.false_statements.length > 0) {
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
}
else {
return `(?(${if_stmt})${true_stmt})`;
}
}
}
exports.IfIdentStatementCST = IfIdentStatementCST;
/**
* Concrete Syntax Tree for a regular expression
*
@ -642,10 +803,10 @@ class RegularExpressionCST extends H2RCST {
this.usings = usings;
this.statements = statements;
}
validate(language) {
const errors = this.usings.validate(language);
validate(language, context) {
const errors = this.usings.validate(language, context);
for (const statement of this.statements) {
utilities_1.append(errors, statement.validate(language));
utilities_1.append(errors, statement.validate(language, context));
}
return errors;
}
@ -656,87 +817,3 @@ class RegularExpressionCST extends H2RCST {
}
}
exports.RegularExpressionCST = RegularExpressionCST;
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array of matches
* @internal
*/
function minimizeMatchString(arr) {
return minMatchString(arr, 0);
}
exports.minimizeMatchString = minimizeMatchString;
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array
* @param depth must be 0 for initial call
* @internal
*/
function minMatchString(arr, depth = 0) {
// base case: arr is empty
if (arr.length === 0) {
return "";
}
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
if (arr.length === 1) {
return utilities_1.first(arr) + "?";
}
// remove duplicates
arr = [...new Set(arr)];
// base case: arr has 1 element (after duplicate removal means this is required)
if (arr.length === 1) {
return utilities_1.first(arr);
}
// base case: arr is all single letters
if (arr.every(utilities_1.isSingleRegexCharacter)) {
return "[" + arr.join("") + "]";
}
// now the real magic begins
// You are not expected to understand this
let longest_begin_substring = utilities_1.first(arr);
let longest_end_substring = utilities_1.first(arr);
for (let i = 1; i < arr.length; i++) {
// reduce longest_substring to match everything
for (let j = 0; j < longest_begin_substring.length; j++) {
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
longest_begin_substring = longest_begin_substring.substr(0, j);
break;
}
}
for (let j = 0; j < longest_end_substring.length; j++) {
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
break;
}
}
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
break;
}
}
// No matches whatsoever
// *technically* we can optimize further, but that is a VERY non-trivial problem
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
if (depth > 0) {
return "(?:" + arr.join("|") + ")";
}
else {
return arr.join("|");
}
}
// we have some matches
else {
// remove begin (if exists) and end (if exists) from each element and remove empty strings
const begin_pos = longest_begin_substring.length;
const end_pos = longest_end_substring.length;
const similar_matches = [];
for (const ele of arr) {
const match = ele.substring(begin_pos, ele.length - end_pos);
if (match.length !== 0) {
similar_matches.push(match);
}
}
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
}
}

23
lib/generator_helper.d.ts vendored Normal file
View File

@ -0,0 +1,23 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array of matches
* @internal
*/
export declare function minimizeMatchString(arr: string[]): string;
/**
* Groups a regex fragment if it needs to be grouped
*
* @param fragment fragment of regular expression to potentially group
* @returns a non-capturing group if there needs to be one
* @internal
*/
export declare function groupIfRequired(fragment: string): string;
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
export declare function dontClobberRepetition(fragment: string, repetition: string): string;

203
lib/generator_helper.js Normal file
View File

@ -0,0 +1,203 @@
"use strict";
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true });
exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0;
/**
* Includes helper functions for the Generator
* @packageDocumentation
*/
const utilities_1 = require("./utilities");
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array of matches
* @internal
*/
function minimizeMatchString(arr) {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return utilities_1.first(arr);
}
return minMatchString(arr, 0);
}
exports.minimizeMatchString = minimizeMatchString;
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array
* @param depth must be 0 for initial call
* @returns an optimized string
* @internal
*/
function minMatchString(arr, depth = 0) {
// base case: arr is empty
if (arr.length === 0) {
return "";
}
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
if (arr.length === 1) {
return utilities_1.first(arr) + "?";
}
// remove duplicates
arr = [...new Set(arr)];
// base case: arr has 1 element (after duplicate removal means this is required)
if (arr.length === 1) {
return utilities_1.first(arr);
}
// base case: arr is all single letters
if (arr.every(utilities_1.isSingleRegexCharacter)) {
return "[" + arr.join("") + "]";
}
// now the real magic begins
// You are not expected to understand this
let longest_begin_substring = utilities_1.first(arr);
let longest_end_substring = utilities_1.first(arr);
for (let i = 1; i < arr.length; i++) {
// reduce longest_substring to match everything
for (let j = 0; j < longest_begin_substring.length; j++) {
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
longest_begin_substring = longest_begin_substring.substr(0, j);
break;
}
}
for (let j = 0; j < longest_end_substring.length; j++) {
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
break;
}
}
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
break;
}
}
// No matches whatsoever
// *technically* we can optimize further, but that is a VERY non-trivial problem
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
if (depth > 0) {
return "(?:" + arr.join("|") + ")";
}
else {
return arr.join("|");
}
}
// we have some matches
else {
// remove begin (if exists) and end (if exists) from each element and remove empty strings
const begin_pos = longest_begin_substring.length;
const end_pos = longest_end_substring.length;
const similar_matches = [];
for (const ele of arr) {
const match = ele.substring(begin_pos, ele.length - end_pos);
if (match.length !== 0) {
similar_matches.push(match);
}
}
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
}
}
/**
* Groups a regex fragment if it needs to be grouped
*
* @param fragment fragment of regular expression to potentially group
* @returns a non-capturing group if there needs to be one
* @internal
*/
function groupIfRequired(fragment) {
if (utilities_1.isSingleRegexCharacter(fragment)) {
return fragment;
}
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
let bracket_count = 0;
for (let i = 1; i < fragment.length - 2; i++) {
if (fragment[i] === "\\") {
i++;
}
else if (fragment[i] === "(") {
bracket_count++;
}
else if (fragment[i] === ")") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
}
else if (fragment[0] === "[" && fragment[fragment.length - 1] === "]") {
let bracket_count = 0;
for (let i = 1; i < fragment.length - 2; i++) {
if (fragment[i] === "\\") {
i++;
}
//you'll never have a raw [ inside a []
//else if (fragment[i] === "[") {
// bracket_count++;
//}
else if (fragment[i] === "]") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
}
else {
return "(?:" + fragment + ")";
}
}
exports.groupIfRequired = groupIfRequired;
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
function dontClobberRepetition(fragment, repetition) {
// + can be ignored as well as a count as long as that count is > 0
if (fragment.endsWith("+")) {
switch (repetition) {
case "*":
// ignore: + is greater than *
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
case "+":
// ignore: already +
break;
default:
if (repetition.startsWith("{0")) {
fragment = "(?:" + fragment + ")" + repetition;
}
else {
// remove + and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
}
break;
}
}
else if (fragment.endsWith("*")) {
switch (repetition) {
case "*":
// ignore: already +
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
default:
// remove * and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
break;
}
}
else {
fragment += repetition;
}
return fragment;
}
exports.dontClobberRepetition = dontClobberRepetition;

View File

@ -83,7 +83,7 @@ class ParseResult {
* @public
*/
validate(language) {
return this.regexp_cst.validate(language).map(utilities_1.CommonError.fromSemanticError);
return this.regexp_cst.validate(language, new generator_1.GeneratorContext()).map(utilities_1.CommonError.fromSemanticError);
}
/**
* Generate a regular expression string based on the parse result
@ -499,12 +499,91 @@ class Human2RegexParser extends chevrotain_1.EmbeddedActionsParser {
tokens.push($.CONSUME(T.Outdent));
return new generator_1.RepeatStatementCST(tokens, optional, count, statements);
});
const BackrefStatement = $.RULE("BackrefStatement", () => {
const tokens = [];
let optional = false;
let count = null;
$.OPTION5(() => {
tokens.push($.CONSUME(T.Optional));
optional = true;
});
tokens.push($.CONSUME(T.Call));
$.OPTION6(() => count = $.SUBRULE(CountSubStatement));
$.OPTION7(() => {
$.OPTION(() => $.CONSUME(T.The));
$.CONSUME(T.Group);
$.OPTION2(() => $.CONSUME(T.Called));
});
const name = $.CONSUME(T.Identifier).image;
tokens.push($.CONSUME4(T.EndOfLine));
return new generator_1.BackrefStatementCST(tokens, optional, count, name);
});
const IfStatement = $.RULE("IfStatement", () => {
const tokens = [];
const msv = [];
let optional = false;
const true_statements = [];
const false_statements = [];
let name = "";
tokens.push($.CONSUME(T.If));
$.OR2([
{ ALT: () => {
name = $.CONSUME(T.Identifier).image;
} },
{ ALT: () => {
$.CONSUME(T.Match);
$.OPTION4(() => {
$.CONSUME3(T.Optional);
optional = true;
});
msv.push(new generator_1.MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
$.MANY(() => {
$.OR([
{ ALT: () => {
$.OPTION2(() => $.CONSUME2(T.And));
$.CONSUME(T.Then);
} },
{ ALT: () => $.CONSUME(T.And) },
]);
optional = false;
$.OPTION3(() => {
$.CONSUME2(T.Optional);
optional = true;
});
msv.push(new generator_1.MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
});
} }
]);
tokens.push($.CONSUME3(T.EndOfLine));
$.CONSUME2(T.Indent);
$.AT_LEAST_ONE2(() => {
true_statements.push($.SUBRULE(Statement));
});
$.CONSUME2(T.Outdent);
$.OPTION(() => {
$.CONSUME(T.Else);
$.CONSUME4(T.EndOfLine);
$.CONSUME3(T.Indent);
$.AT_LEAST_ONE3(() => {
false_statements.push($.SUBRULE2(Statement));
});
$.CONSUME3(T.Outdent);
});
if (name === "") {
return new generator_1.IfPatternStatementCST(tokens, msv, true_statements, false_statements);
}
else {
return new generator_1.IfIdentStatementCST(tokens, name, true_statements, false_statements);
}
});
// statement super class
const Statement = $.RULE("Statement", () => {
return $.OR([
{ ALT: () => $.SUBRULE(MatchStatement) },
{ ALT: () => $.SUBRULE(GroupStatement) },
{ ALT: () => $.SUBRULE(RepeatStatement) }
{ ALT: () => $.SUBRULE(RepeatStatement) },
{ ALT: () => $.SUBRULE(BackrefStatement) },
{ ALT: () => $.SUBRULE(IfStatement) }
]);
});
// full regex

4
lib/tokens.d.ts vendored
View File

@ -51,6 +51,10 @@
/** @internal */ export declare const CaseInsensitive: import("chevrotain").TokenType;
/** @internal */ export declare const CaseSensitive: import("chevrotain").TokenType;
/** @internal */ export declare const OrMore: import("chevrotain").TokenType;
/** @internal */ export declare const Call: import("chevrotain").TokenType;
/** @internal */ export declare const The: import("chevrotain").TokenType;
/** @internal */ export declare const If: import("chevrotain").TokenType;
/** @internal */ export declare const Else: import("chevrotain").TokenType;
/** @internal */ export declare const EndOfLine: import("chevrotain").TokenType;
/** @internal */ export declare const WS: import("chevrotain").TokenType;
/** @internal */ export declare const SingleLineComment: import("chevrotain").TokenType;

View File

@ -1,7 +1,8 @@
"use strict";
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true });
exports.AllTokens = exports.Outdent = exports.Indent = exports.StringLiteral = exports.NumberLiteral = exports.Identifier = exports.MultilineComment = exports.SingleLineComment = exports.WS = exports.EndOfLine = exports.OrMore = exports.CaseSensitive = exports.CaseInsensitive = exports.CarriageReturn = exports.Newline = exports.Repeat = exports.Called = exports.Create = exports.To = exports.From = exports.Exclusive = exports.Inclusive = exports.Exactly = exports.Times = exports.A = exports.Group = exports.Linefeed = exports.Tab = exports.Between = exports.Not = exports.Matching = exports.Exact = exports.Multiline = exports.Global = exports.Using = exports.Unicode = exports.Number = exports.Boundary = exports.Whitespace = exports.Integer = exports.Decimal = exports.Letter = exports.Character = exports.Digit = exports.Word = exports.And = exports.Or = exports.Anything = exports.Then = exports.Match = exports.Optional = exports.Ten = exports.Nine = exports.Eight = exports.Seven = exports.Six = exports.Five = exports.Four = exports.Three = exports.Two = exports.One = exports.Zero = void 0;
exports.CaseInsensitive = exports.CarriageReturn = exports.Newline = exports.Repeat = exports.Called = exports.Create = exports.To = exports.From = exports.Exclusive = exports.Inclusive = exports.Exactly = exports.Times = exports.A = exports.Group = exports.Linefeed = exports.Tab = exports.Between = exports.Not = exports.Matching = exports.Exact = exports.Multiline = exports.Global = exports.Using = exports.Unicode = exports.Number = exports.Boundary = exports.Whitespace = exports.Integer = exports.Decimal = exports.Letter = exports.Character = exports.Digit = exports.Word = exports.And = exports.Or = exports.Anything = exports.Then = exports.Match = exports.Optional = exports.Ten = exports.Nine = exports.Eight = exports.Seven = exports.Six = exports.Five = exports.Four = exports.Three = exports.Two = exports.One = exports.Zero = void 0;
exports.AllTokens = exports.Outdent = exports.Indent = exports.StringLiteral = exports.NumberLiteral = exports.Identifier = exports.MultilineComment = exports.SingleLineComment = exports.WS = exports.EndOfLine = exports.Else = exports.If = exports.The = exports.Call = exports.OrMore = exports.CaseSensitive = void 0;
/**
* The tokens required for Human2Regex
* @packageDocumentation
@ -52,32 +53,17 @@ const chevrotain_1 = require("chevrotain");
/** @internal */ exports.From = chevrotain_1.createToken({ name: "From", pattern: /from/i });
/** @internal */ exports.To = chevrotain_1.createToken({ name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i });
/** @internal */ exports.Create = chevrotain_1.createToken({ name: "Create", pattern: /create(s)?/i });
/** @internal */ exports.Called = chevrotain_1.createToken({ name: "Called", pattern: /name(d)?|call(ed)?/i });
/** @internal */ exports.Called = chevrotain_1.createToken({ name: "Called", pattern: /named|called/i });
/** @internal */ exports.Repeat = chevrotain_1.createToken({ name: "Repeat", pattern: /repeat(s|ing)?/i });
/** @internal */ exports.Newline = chevrotain_1.createToken({ name: "Newline", pattern: /(new line|newline)/i });
/** @internal */ exports.CarriageReturn = chevrotain_1.createToken({ name: "CarriageReturn", pattern: /carriage return/i });
/** @internal */ exports.CaseInsensitive = chevrotain_1.createToken({ name: "CaseInsensitive", pattern: /case insensitive/i });
/** @internal */ exports.CaseSensitive = chevrotain_1.createToken({ name: "CaseSensitive", pattern: /case sensitive/i });
/** @internal */ exports.OrMore = chevrotain_1.createToken({ name: "OrMore", pattern: /\+|or more/i });
/*
//Not being used currently
export const Of = createToken({name: "Of", pattern: /of/i});
export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
export const As = createToken({name: "As", pattern: /as/i});
export const If = createToken({name: "If", pattern: /if/i});
export const Start = createToken({name: "Start", pattern: /start(s) with?/i});
export const Ends = createToken({name: "Ends", pattern: /end(s)? with/i});
export const Else = createToken({name: "Else", pattern: /(other wise|otherwise|else)/i});
export const Unless = createToken({name: "Unless", pattern: /unless/i});
export const While = createToken({name: "While", pattern: /while/i});
export const More = createToken({name: "More", pattern: /more/i});
export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
export const None = createToken({name: "None", pattern: /none/i});
export const Neither = createToken({name: "Neither", pattern: /neither/i});
export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
export const By = createToken({name: "By", pattern: /by/i});
*/
/** @internal */ exports.Call = chevrotain_1.createToken({ name: "Call", pattern: /call|invoke|execute|(re ?)?run/i });
/** @internal */ exports.The = chevrotain_1.createToken({ name: "The", pattern: /the/i });
/** @internal */ exports.If = chevrotain_1.createToken({ name: "If", pattern: /if/i });
/** @internal */ exports.Else = chevrotain_1.createToken({ name: "Else", pattern: /else|otherwise/i });
/** @internal */ exports.EndOfLine = chevrotain_1.createToken({ name: "EOL", pattern: /\n/ });
/** @internal */ exports.WS = chevrotain_1.createToken({ name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [" ", "\r"], group: chevrotain_1.Lexer.SKIPPED });
/** @internal */ exports.SingleLineComment = chevrotain_1.createToken({ name: "SingleLineComment", pattern: /(#|\/\/).*/, group: chevrotain_1.Lexer.SKIPPED });
@ -120,22 +106,11 @@ exports.AllTokens = [
exports.Whitespace,
exports.Number,
exports.Unicode,
/*
Of,
As,
If,
Start,
Ends,
Else,
Unless,
While,
More,
Nothing,
By,
The,
None,
Neither,
*/
exports.Called,
exports.Call,
exports.If,
exports.Else,
exports.The,
exports.Using,
exports.Global,
exports.Multiline,
@ -151,7 +126,6 @@ exports.AllTokens = [
exports.Exclusive,
exports.From,
exports.Create,
exports.Called,
exports.Repeat,
exports.Newline,
exports.CarriageReturn,

3
lib/utilities.d.ts vendored
View File

@ -130,6 +130,7 @@ export declare class CommonError {
*
* @param error The lexing error
* @returns a new CommonError
* @internal
*/
static fromLexError(error: ILexingError): CommonError;
/**
@ -137,6 +138,7 @@ export declare class CommonError {
*
* @param error The parsing error
* @returns a new CommonError
* @internal
*/
static fromParseError(error: IRecognitionException): CommonError;
/**
@ -144,6 +146,7 @@ export declare class CommonError {
*
* @param error The semantic error
* @returns a new CommonError
* @internal
*/
static fromSemanticError(error: ISemanticError): CommonError;
/**

View File

@ -181,6 +181,7 @@ class CommonError {
*
* @param error The lexing error
* @returns a new CommonError
* @internal
*/
static fromLexError(error) {
// not really fond of --> and <--
@ -192,6 +193,7 @@ class CommonError {
*
* @param error The parsing error
* @returns a new CommonError
* @internal
*/
static fromParseError(error) {
var _a, _b, _c;
@ -204,6 +206,7 @@ class CommonError {
*
* @param error The semantic error
* @returns a new CommonError
* @internal
*/
static fromSemanticError(error) {
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);

100
package-lock.json generated
View File

@ -1,6 +1,6 @@
{
"name": "human2regex",
"version": "1.0.2",
"version": "1.1.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
@ -1607,13 +1607,13 @@
"dev": true
},
"@typescript-eslint/eslint-plugin": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.7.0.tgz",
"integrity": "sha512-li9aiSVBBd7kU5VlQlT1AqP0uWGDK6JYKUQ9cVDnOg34VNnd9t4jr0Yqc/bKxJr/tDCPDaB4KzoSFN9fgVxe/Q==",
"version": "4.8.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.8.1.tgz",
"integrity": "sha512-d7LeQ7dbUrIv5YVFNzGgaW3IQKMmnmKFneRWagRlGYOSfLJVaRbj/FrBNOBC1a3tVO+TgNq1GbHvRtg1kwL0FQ==",
"dev": true,
"requires": {
"@typescript-eslint/experimental-utils": "4.7.0",
"@typescript-eslint/scope-manager": "4.7.0",
"@typescript-eslint/experimental-utils": "4.8.1",
"@typescript-eslint/scope-manager": "4.8.1",
"debug": "^4.1.1",
"functional-red-black-tree": "^1.0.1",
"regexpp": "^3.0.0",
@ -1622,55 +1622,55 @@
}
},
"@typescript-eslint/experimental-utils": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/experimental-utils/-/experimental-utils-4.7.0.tgz",
"integrity": "sha512-cymzovXAiD4EF+YoHAB5Oh02MpnXjvyaOb+v+BdpY7lsJXZQN34oIETeUwVT2XfV9rSNpXaIcknDLfupO/tUoA==",
"version": "4.8.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/experimental-utils/-/experimental-utils-4.8.1.tgz",
"integrity": "sha512-WigyLn144R3+lGATXW4nNcDJ9JlTkG8YdBWHkDlN0lC3gUGtDi7Pe3h5GPvFKMcRz8KbZpm9FJV9NTW8CpRHpg==",
"dev": true,
"requires": {
"@types/json-schema": "^7.0.3",
"@typescript-eslint/scope-manager": "4.7.0",
"@typescript-eslint/types": "4.7.0",
"@typescript-eslint/typescript-estree": "4.7.0",
"@typescript-eslint/scope-manager": "4.8.1",
"@typescript-eslint/types": "4.8.1",
"@typescript-eslint/typescript-estree": "4.8.1",
"eslint-scope": "^5.0.0",
"eslint-utils": "^2.0.0"
}
},
"@typescript-eslint/parser": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-4.7.0.tgz",
"integrity": "sha512-+meGV8bMP1sJHBI2AFq1GeTwofcGiur8LoIr6v+rEmD9knyCqDlrQcFHR0KDDfldHIFDU/enZ53fla6ReF4wRw==",
"version": "4.8.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-4.8.1.tgz",
"integrity": "sha512-QND8XSVetATHK9y2Ltc/XBl5Ro7Y62YuZKnPEwnNPB8E379fDsvzJ1dMJ46fg/VOmk0hXhatc+GXs5MaXuL5Uw==",
"dev": true,
"requires": {
"@typescript-eslint/scope-manager": "4.7.0",
"@typescript-eslint/types": "4.7.0",
"@typescript-eslint/typescript-estree": "4.7.0",
"@typescript-eslint/scope-manager": "4.8.1",
"@typescript-eslint/types": "4.8.1",
"@typescript-eslint/typescript-estree": "4.8.1",
"debug": "^4.1.1"
}
},
"@typescript-eslint/scope-manager": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-4.7.0.tgz",
"integrity": "sha512-ILITvqwDJYbcDCROj6+Ob0oCKNg3SH46iWcNcTIT9B5aiVssoTYkhKjxOMNzR1F7WSJkik4zmuqve5MdnA0DyA==",
"version": "4.8.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-4.8.1.tgz",
"integrity": "sha512-r0iUOc41KFFbZdPAdCS4K1mXivnSZqXS5D9oW+iykQsRlTbQRfuFRSW20xKDdYiaCoH+SkSLeIF484g3kWzwOQ==",
"dev": true,
"requires": {
"@typescript-eslint/types": "4.7.0",
"@typescript-eslint/visitor-keys": "4.7.0"
"@typescript-eslint/types": "4.8.1",
"@typescript-eslint/visitor-keys": "4.8.1"
}
},
"@typescript-eslint/types": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-4.7.0.tgz",
"integrity": "sha512-uLszFe0wExJc+I7q0Z/+BnP7wao/kzX0hB5vJn4LIgrfrMLgnB2UXoReV19lkJQS1a1mHWGGODSxnBx6JQC3Sg==",
"version": "4.8.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-4.8.1.tgz",
"integrity": "sha512-ave2a18x2Y25q5K05K/U3JQIe2Av4+TNi/2YuzyaXLAsDx6UZkz1boZ7nR/N6Wwae2PpudTZmHFXqu7faXfHmA==",
"dev": true
},
"@typescript-eslint/typescript-estree": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-4.7.0.tgz",
"integrity": "sha512-5XZRQznD1MfUmxu1t8/j2Af4OxbA7EFU2rbo0No7meb46eHgGkSieFdfV6omiC/DGIBhH9H9gXn7okBbVOm8jw==",
"version": "4.8.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-4.8.1.tgz",
"integrity": "sha512-bJ6Fn/6tW2g7WIkCWh3QRlaSU7CdUUK52shx36/J7T5oTQzANvi6raoTsbwGM11+7eBbeem8hCCKbyvAc0X3sQ==",
"dev": true,
"requires": {
"@typescript-eslint/types": "4.7.0",
"@typescript-eslint/visitor-keys": "4.7.0",
"@typescript-eslint/types": "4.8.1",
"@typescript-eslint/visitor-keys": "4.8.1",
"debug": "^4.1.1",
"globby": "^11.0.1",
"is-glob": "^4.0.1",
@ -1680,12 +1680,12 @@
}
},
"@typescript-eslint/visitor-keys": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-4.7.0.tgz",
"integrity": "sha512-aDJDWuCRsf1lXOtignlfiPODkzSxxop7D0rZ91L6ZuMlcMCSh0YyK+gAfo5zN/ih6WxMwhoXgJWC3cWQdaKC+A==",
"version": "4.8.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-4.8.1.tgz",
"integrity": "sha512-3nrwXFdEYALQh/zW8rFwP4QltqsanCDz4CwWMPiIZmwlk9GlvBeueEIbq05SEq4ganqM0g9nh02xXgv5XI3PeQ==",
"dev": true,
"requires": {
"@typescript-eslint/types": "4.7.0",
"@typescript-eslint/types": "4.8.1",
"eslint-visitor-keys": "^2.0.0"
}
},
@ -2917,9 +2917,9 @@
}
},
"codemirror": {
"version": "5.58.2",
"resolved": "https://registry.npmjs.org/codemirror/-/codemirror-5.58.2.tgz",
"integrity": "sha512-K/hOh24cCwRutd1Mk3uLtjWzNISOkm4fvXiMO7LucCrqbh6aJDdtqUziim3MZUI6wOY0rvY1SlL1Ork01uMy6w=="
"version": "5.58.3",
"resolved": "https://registry.npmjs.org/codemirror/-/codemirror-5.58.3.tgz",
"integrity": "sha512-KBhB+juiyOOgn0AqtRmWyAT3yoElkuvWTI6hsHa9E6GQrl6bk/fdAYcvuqW1/upO9T9rtEtapWdw4XYcNiVDEA=="
},
"collect-v8-coverage": {
"version": "1.0.1",
@ -3087,9 +3087,9 @@
"dev": true
},
"copy-webpack-plugin": {
"version": "6.3.0",
"resolved": "https://registry.npmjs.org/copy-webpack-plugin/-/copy-webpack-plugin-6.3.0.tgz",
"integrity": "sha512-kQ2cGGQLO6Ov2fe7rEGVxObI17dPeFkv8bRGnUAGZehOcrrObyAR9yWYlFGlJsyWM4EeuC/ytQNQkXxjYotMzg==",
"version": "6.3.2",
"resolved": "https://registry.npmjs.org/copy-webpack-plugin/-/copy-webpack-plugin-6.3.2.tgz",
"integrity": "sha512-MgJ1uouLIbDg4ST1GzqrGQyKoXY5iPqi6fghFqarijam7FQcBa/r6Rg0VkoIuzx75Xq8iAMghyOueMkWUQ5OaA==",
"dev": true,
"requires": {
"cacache": "^15.0.5",
@ -3977,9 +3977,9 @@
}
},
"eslint": {
"version": "7.13.0",
"resolved": "https://registry.npmjs.org/eslint/-/eslint-7.13.0.tgz",
"integrity": "sha512-uCORMuOO8tUzJmsdRtrvcGq5qposf7Rw0LwkTJkoDbOycVQtQjmnhZSuLQnozLE4TmAzlMVV45eCHmQ1OpDKUQ==",
"version": "7.14.0",
"resolved": "https://registry.npmjs.org/eslint/-/eslint-7.14.0.tgz",
"integrity": "sha512-5YubdnPXrlrYAFCKybPuHIAH++PINe1pmKNc5wQRB9HSbqIK1ywAnntE3Wwua4giKu0bjligf1gLF6qxMGOYRA==",
"dev": true,
"requires": {
"@babel/code-frame": "^7.0.0",
@ -5425,9 +5425,9 @@
"dev": true
},
"ini": {
"version": "1.3.5",
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.5.tgz",
"integrity": "sha512-RZY5huIKCMRWDUqZlEi72f/lmXKMvuszcMBduliQ3nnWbx9X/ZBQO7DijMEYS9EhHBb2qacRUMtC7svLwe0lcw==",
"version": "1.3.8",
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
"dev": true
},
"interpret": {
@ -11946,9 +11946,9 @@
}
},
"typescript": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.0.5.tgz",
"integrity": "sha512-ywmr/VrTVCmNTJ6iV2LwIrfG1P+lv6luD8sUJs+2eI9NLGigaN+nUQc13iHqisq7bra9lnmUSYqbJvegraBOPQ==",
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.1.2.tgz",
"integrity": "sha512-thGloWsGH3SOxv1SoY7QojKi0tc+8FnOmiarEGMbd/lar7QOEd3hvlx3Fp5y6FlDUGl9L+pd4n2e+oToGMmhRQ==",
"dev": true
},
"uglify-js": {

View File

@ -1,6 +1,6 @@
{
"name": "human2regex",
"version": "1.0.2",
"version": "1.1.0",
"description": "Humanized Regular Expressions",
"main": "./lib/index.js",
"typings": "./lib/index.d.ts",
@ -9,13 +9,13 @@
"@types/html-minifier": "^3.5.3",
"@types/jest": "^26.0.15",
"@types/mustache": "^4.0.1",
"@typescript-eslint/eslint-plugin": "^4.7.0",
"@typescript-eslint/parser": "^4.7.0",
"@typescript-eslint/eslint-plugin": "^4.8.1",
"@typescript-eslint/parser": "^4.8.1",
"before-build-webpack": "^0.2.9",
"codecov": "^3.8.1",
"copy-webpack-plugin": "^6.3.0",
"copy-webpack-plugin": "^6.3.2",
"css-loader": "^4.3.0",
"eslint": "^7.13.0",
"eslint": "^7.14.0",
"glob": "^7.1.6",
"html-minifier": "^4.0.0",
"jest": "^26.6.3",
@ -26,7 +26,7 @@
"ts-jest": "^26.4.4",
"ts-loader": "^8.0.11",
"ts-node": "^9.0.0",
"typescript": "^4.0.5",
"typescript": "^4.1.2",
"webpack": "^4.44.2",
"webpack-cli": "^3.3.12"
},
@ -46,7 +46,7 @@
"license": "MIT",
"dependencies": {
"chevrotain": "^7.0.3",
"codemirror": "^5.58.2"
"codemirror": "^5.58.3"
},
"repository": {
"type": "git",

View File

@ -336,7 +336,13 @@ match "World"
<h3 id="tut-final">Putting it all together</h3>
<p>Grouping, repetition, and matching are the 3 primary elements that make up H2R. They can be combined in any way to generate a regular expression. See the <a href="index.html">main page</a> for an example that combines all above to parse a URL.</p>
<h3>Miscellaneous features</h3>
<h3>Advanced features</h3>
<p class="font-weight-bold" id="tut-back">Backreferences</p>
<p>TODO</p>
<p class="font-weight-bold" id="tut-if">If statements</p>
<p>TODO</p>
<p class="font-weight-bold" id="tut-unicode">Unicode character properties</p>
<p>You can match specific unicode sequences using <code class="cm-s-idea">"\uXXXX"

View File

@ -7,6 +7,7 @@
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
import { IToken } from "chevrotain";
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "./generator_helper";
/**
* List of regular expression dialects we support
@ -63,31 +64,54 @@ const unicode_script_codes = [
];
/**
* The base concrete syntax tree class
* Context for validation
*
* @remarks Currently only used to validate groups
* @internal
*/
export abstract class H2RCST {
export class GeneratorContext {
public groups: { [ key: string ]: { startLine: number, startColumn: number, length: number } } = {};
/**
* Constructor for H2RCST
* Checks to see if we already have a group defined
*
* @param tokens Tokens used to calculate where an error occured
* @internal
* @param identifier the group name
* @returns true if the group name already exists
*/
constructor(public tokens: IToken[]) {
/* empty */
public hasGroup(identifier: string): boolean {
return Object.prototype.hasOwnProperty.call(this.groups, identifier);
}
/**
* Adds the identifier to the group list
*
* @param identifier the group name
*/
public addGroup(identifier: string, tokens: IToken[]): void {
const f = first(tokens);
const l = last(tokens);
this.groups[identifier] = {
startLine: f.startLine ?? NaN,
startColumn: f.startColumn ?? NaN,
length: (l.endOffset ?? l.startOffset) - f.startOffset,
};
}
}
interface Generates {
/**
* Validate that this is both valid and can be generated in the specified language
*
* @remarks There is no guarantee toRegex will work unless validate returns no errors
*
* @param language the regex dialect we're validating
* @param context the generator context
* @returns A list of errors
* @public
*/
public abstract validate(language: RegexDialect): ISemanticError[];
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
/**
* Generate a regular expression fragment based on this syntax tree
@ -98,6 +122,26 @@ export abstract class H2RCST {
* @returns a regular expression fragment
* @public
*/
toRegex(language: RegexDialect): string;
}
/**
* The base concrete syntax tree class
*
* @internal
*/
export abstract class H2RCST implements Generates {
/**
* Constructor for H2RCST
*
* @param tokens Tokens used to calculate where an error occured
* @internal
*/
constructor(public tokens: IToken[]) {
/* empty */
}
public abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
public abstract toRegex(language: RegexDialect): string;
/**
@ -186,7 +230,7 @@ export class MatchSubStatementValue {
*
* @internal
*/
export class MatchStatementValue {
export class MatchStatementValue implements Generates {
/**
* Constructor for MatchStatementValue
@ -198,6 +242,21 @@ export class MatchStatementValue {
constructor(public optional: boolean, public statement: MatchSubStatementCST) {
/* empty */
}
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
return this.statement.validate(language, context);
}
public toRegex(language: RegexDialect): string {
let match_stmt = this.statement.toRegex(language);
// need to group if optional and ungrouped
if (this.optional) {
match_stmt = groupIfRequired(match_stmt) + "?";
}
return match_stmt;
}
}
/**
@ -227,11 +286,11 @@ export class MatchSubStatementCST extends H2RCST {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
const errors: ISemanticError[] = [];
if (this.count) {
append(errors, this.count.validate(language));
append(errors, this.count.validate(language, context));
}
for (const value of this.values) {
@ -353,56 +412,16 @@ export class MatchSubStatementCST extends H2RCST {
}
}
let ret = "";
let require_grouping = false;
let dont_clobber_plus = false;
if (matches.length === 1) {
ret = first(matches);
if (ret.endsWith("+")) {
dont_clobber_plus = true;
}
}
else {
ret = minimizeMatchString(matches);
if (ret.length > 1 &&
(!ret.startsWith("(") || !ret.endsWith("["))) {
require_grouping = true;
}
}
let ret = minimizeMatchString(matches);
if (this.count) {
if (dont_clobber_plus) {
const clobber = this.count.toRegex(language);
// + can be ignored as well as a count as long as that count is > 0
switch (clobber) {
case "*":
case "?":
ret = "(?:" + ret + ")" + clobber;
break;
case "+":
// ignore
break;
default:
if (clobber.startsWith("{0")) {
ret = "(?:" + ret + ")" + clobber;
}
else {
// remove + and replace with count
ret.substring(0, ret.length - 1) + clobber;
}
break;
}
if (matches.length === 1) {
// we don't group if there's only 1 element
// but we need to make sure we don't add an additional + or *
ret = dontClobberRepetition(ret, this.count.toRegex(language));
}
else {
if (require_grouping) {
ret = "(?:" + ret + ")";
}
ret += this.count.toRegex(language);
ret = groupIfRequired(ret) + this.count.toRegex(language);
}
}
@ -427,8 +446,9 @@ export class UsingStatementCST extends H2RCST {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
unusedParameter(language, "Using Statement does not change based on language");
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
unusedParameter(language, "Count does not need checking");
unusedParameter(context, "Context is not needed");
const errors: ISemanticError[] = [];
let flag = this.flags[0];
@ -490,15 +510,13 @@ export class CountSubStatementCST extends H2RCST {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
unusedParameter(language, "Count does not need checking");
unusedParameter(context, "Context is not needed");
const errors: ISemanticError[] = [];
if (this.from < 0) {
errors.push(this.error("Value cannot be negative"));
}
else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
errors.push(this.error("Values must be in range of eachother"));
}
@ -548,49 +566,27 @@ export class MatchStatementCST extends StatementCST {
* Constructor for MatchStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param matches
* @param matches the list of matches
*/
constructor(tokens: IToken[], private completely_optional: boolean, private matches: MatchStatementValue[]) {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
const errors: ISemanticError[] = [];
for (const match of this.matches) {
append(errors, match.statement.validate(language));
append(errors, match.statement.validate(language, context));
}
return errors;
}
public toRegex(language: RegexDialect): string {
let final_matches = this.matches.map((x) => {
let match_stmt = x.statement.toRegex(language);
// need to group if optional and ungrouped
if (x.optional) {
if (!isSingleRegexCharacter(match_stmt)) {
// don't re-group a group
if (match_stmt[0] !== "(" && match_stmt[match_stmt.length-1] !== ")") {
match_stmt = "(?:" + match_stmt + ")";
}
}
match_stmt += "?";
}
return match_stmt;
}).join("");
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
if (this.completely_optional) {
if (!isSingleRegexCharacter(final_matches)) {
// don't re-group a group
if (final_matches[0] !== "(" && final_matches[final_matches.length-1] !== ")") {
final_matches = "(?:" + final_matches + ")";
}
}
final_matches += "?";
final_matches = groupIfRequired(final_matches) + "?";
}
return final_matches;
@ -616,22 +612,22 @@ export class RepeatStatementCST extends StatementCST {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
const errors: ISemanticError[] = [];
if (this.count !== null) {
append(errors, this.count.validate(language));
append(errors, this.count.validate(language, context));
}
for (const statement of this.statements) {
append(errors, statement.validate(language));
append(errors, statement.validate(language, context));
}
return errors;
}
public toRegex(language: RegexDialect): string {
let str = "(?:" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
let str = groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
if (this.count) {
str += this.count.toRegex(language);
@ -659,7 +655,7 @@ export class RepeatStatementCST extends StatementCST {
* @internal
*/
export class GroupStatementCST extends StatementCST {
/**
* Constructor for GroupStatementCST
*
@ -673,16 +669,21 @@ export class GroupStatementCST extends StatementCST {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
const errors : ISemanticError[] = [];
// All languages currently support named groups
//if (false) {
// errors.push(this.error("This language does not support named groups"));
//}
if (this.name !== null) {
if (context.hasGroup(this.name)) {
const past_group = context.groups[this.name];
errors.push(this.error(`Group with name "${this.name}" was already defined here: ${past_group.startLine}:${past_group.startLine}-${past_group.startLine}:${past_group.startLine+past_group.length}`));
}
else {
context.addGroup(this.name, this.tokens);
}
}
for (const statement of this.statements) {
append(errors, statement.validate(language));
append(errors, statement.validate(language, context));
}
return errors;
@ -711,6 +712,195 @@ export class GroupStatementCST extends StatementCST {
}
}
/**
* Concrete Syntax Tree for a Backreference statement
*
* @internal
*/
export class BackrefStatementCST extends StatementCST {
/**
* Constructor for BackrefStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param optional is this backref optional
* @param count optional number of times to repeat
* @param name the group name to call
*/
constructor(tokens: IToken[], private optional: boolean, private count: CountSubStatementCST | null, private name: string) {
super(tokens);
}
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
const errors: ISemanticError[] = [];
if (!context.hasGroup(this.name)) {
errors.push(this.error(`Cannot call group with name "${this.name}" as it was never previously defined`));
}
if (this.count !== null) {
append(errors, this.count.validate(language, context));
}
return errors;
}
public toRegex(language: RegexDialect): string {
let str = "";
switch (language) {
case RegexDialect.Python:
str = `(?P=${this.name})`;
break;
case RegexDialect.DotNet:
case RegexDialect.Java:
str = `\\k<${this.name}>`;
break;
default:
str = `\\g<${this.name}>`;
break;
}
if (this.count) {
str += this.count.toRegex(language);
// group for optionality because count would be incorrect otherwise
if (this.optional) {
str = "(?:" + str + ")?";
}
}
else if (this.optional) {
str = "?";
}
return str;
}
}
/**
* Concrete Syntax Tree for an If Pattern statement
*
* @internal
*/
export class IfPatternStatementCST extends StatementCST {
/**
* Constructor for IfPatternStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param matches list of matches to test against
* @param true_statements true path
* @param false_statements false path
*/
constructor(tokens: IToken[], private matches: MatchStatementValue[], private true_statements: StatementCST[], private false_statements: StatementCST[]) {
super(tokens);
}
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
const errors: ISemanticError[] = [];
if (language === RegexDialect.Java || language === RegexDialect.JS) {
errors.push(this.error("This language does not support conditionals"));
}
if (language === RegexDialect.Python) {
errors.push(this.error("This language does not support pattern conditionals"));
}
for (const match of this.matches) {
append(errors, match.validate(language, context));
}
for (const statement of this.true_statements) {
append(errors, statement.validate(language, context));
}
for (const statement of this.false_statements) {
append(errors, statement.validate(language, context));
}
return errors;
}
public toRegex(language: RegexDialect): string {
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
if (this.false_statements.length > 0) {
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
}
else {
return `(?(${if_stmt})${true_stmt})`;
}
}
}
/**
* Concrete Syntax Tree for an If group Ident statement
*
* @internal
*/
export class IfIdentStatementCST extends StatementCST {
/**
* Constructor for IfIdentStatementCST
*
* @param tokens Tokens used to calculate where an error occured
* @param identifier the group identifier to check
* @param true_statements true path
* @param false_statements false path
*/
constructor(tokens: IToken[], private identifier: string, private true_statements: StatementCST[], private false_statements: StatementCST[]) {
super(tokens);
}
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
const errors: ISemanticError[] = [];
if (language === RegexDialect.Java || language === RegexDialect.JS) {
errors.push(this.error("This language does not support conditionals"));
}
if (!context.hasGroup(this.identifier)) {
errors.push(this.error(`Group with name "${this.identifier}" does not exist`));
}
for (const statement of this.true_statements) {
append(errors, statement.validate(language, context));
}
for (const statement of this.false_statements) {
append(errors, statement.validate(language, context));
}
return errors;
}
public toRegex(language: RegexDialect): string {
let if_stmt = this.identifier;
// be more clear with languages that support it
if (language === RegexDialect.Boost) {
if_stmt = "<" + if_stmt + ">";
}
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
if (this.false_statements.length > 0) {
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
}
else {
return `(?(${if_stmt})${true_stmt})`;
}
}
}
/**
* Concrete Syntax Tree for a regular expression
*
@ -730,115 +920,20 @@ export class RegularExpressionCST extends H2RCST {
super(tokens);
}
public validate(language: RegexDialect): ISemanticError[] {
const errors: ISemanticError[] = this.usings.validate(language);
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
const errors: ISemanticError[] = this.usings.validate(language, context);
for (const statement of this.statements) {
append(errors, statement.validate(language));
append(errors, statement.validate(language, context));
}
return errors;
}
public toRegex(language: RegexDialect): string {
const modifiers = this.usings.toRegex(language);
const regex = this.statements.map((x) => x.toRegex(language)).join("");
return modifiers.replace("{regex}", regex);
}
}
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array of matches
* @internal
*/
export function minimizeMatchString(arr: string[]): string {
return minMatchString(arr, 0);
}
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array
* @param depth must be 0 for initial call
* @internal
*/
function minMatchString(arr: string[], depth: number = 0): string {
// base case: arr is empty
if (arr.length === 0) {
return "";
}
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
if (arr.length === 1) {
return first(arr) + "?";
}
// remove duplicates
arr = [ ...new Set(arr) ];
// base case: arr has 1 element (after duplicate removal means this is required)
if (arr.length === 1) {
return first(arr);
}
// base case: arr is all single letters
if (arr.every(isSingleRegexCharacter)) {
return "[" + arr.join("") + "]";
}
// now the real magic begins
// You are not expected to understand this
let longest_begin_substring = first(arr);
let longest_end_substring = first(arr);
for (let i = 1; i < arr.length; i++) {
// reduce longest_substring to match everything
for (let j = 0; j < longest_begin_substring.length; j++) {
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
longest_begin_substring = longest_begin_substring.substr(0, j);
break;
}
}
for (let j = 0; j < longest_end_substring.length; j++) {
if (arr[i].length-j < 0 || longest_end_substring[longest_end_substring.length-j-1] !== arr[i][arr[i].length-j-1]) {
longest_end_substring = longest_end_substring.substr(longest_end_substring.length-j, longest_end_substring.length);
break;
}
}
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
break;
}
}
// No matches whatsoever
// *technically* we can optimize further, but that is a VERY non-trivial problem
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
if (depth > 0) {
return "(?:" + arr.join("|") + ")";
}
else {
return arr.join("|");
}
}
// we have some matches
else {
// remove begin (if exists) and end (if exists) from each element and remove empty strings
const begin_pos = longest_begin_substring.length;
const end_pos = longest_end_substring.length;
const similar_matches: string[] = [];
for (const ele of arr) {
const match = ele.substring(begin_pos, ele.length-end_pos);
if (match.length !== 0) {
similar_matches.push(match);
}
}
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
}
}
}

224
src/generator_helper.ts Normal file
View File

@ -0,0 +1,224 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
/**
* Includes helper functions for the Generator
* @packageDocumentation
*/
import { first, isSingleRegexCharacter } from "./utilities";
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array of matches
* @internal
*/
export function minimizeMatchString(arr: string[]): string {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return first(arr);
}
return minMatchString(arr, 0);
}
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array
* @param depth must be 0 for initial call
* @returns an optimized string
* @internal
*/
function minMatchString(arr: string[], depth: number = 0): string {
// base case: arr is empty
if (arr.length === 0) {
return "";
}
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
if (arr.length === 1) {
return first(arr) + "?";
}
// remove duplicates
arr = [ ...new Set(arr) ];
// base case: arr has 1 element (after duplicate removal means this is required)
if (arr.length === 1) {
return first(arr);
}
// base case: arr is all single letters
if (arr.every(isSingleRegexCharacter)) {
return "[" + arr.join("") + "]";
}
// now the real magic begins
// You are not expected to understand this
let longest_begin_substring = first(arr);
let longest_end_substring = first(arr);
for (let i = 1; i < arr.length; i++) {
// reduce longest_substring to match everything
for (let j = 0; j < longest_begin_substring.length; j++) {
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
longest_begin_substring = longest_begin_substring.substr(0, j);
break;
}
}
for (let j = 0; j < longest_end_substring.length; j++) {
if (arr[i].length-j < 0 || longest_end_substring[longest_end_substring.length-j-1] !== arr[i][arr[i].length-j-1]) {
longest_end_substring = longest_end_substring.substr(longest_end_substring.length-j, longest_end_substring.length);
break;
}
}
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
break;
}
}
// No matches whatsoever
// *technically* we can optimize further, but that is a VERY non-trivial problem
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
if (depth > 0) {
return "(?:" + arr.join("|") + ")";
}
else {
return arr.join("|");
}
}
// we have some matches
else {
// remove begin (if exists) and end (if exists) from each element and remove empty strings
const begin_pos = longest_begin_substring.length;
const end_pos = longest_end_substring.length;
const similar_matches: string[] = [];
for (const ele of arr) {
const match = ele.substring(begin_pos, ele.length-end_pos);
if (match.length !== 0) {
similar_matches.push(match);
}
}
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
}
}
/**
* Groups a regex fragment if it needs to be grouped
*
* @param fragment fragment of regular expression to potentially group
* @returns a non-capturing group if there needs to be one
* @internal
*/
export function groupIfRequired(fragment: string): string {
if (isSingleRegexCharacter(fragment)) {
return fragment;
}
if (fragment[0] === "(" && fragment[fragment.length-1] === ")") {
let bracket_count = 0;
for (let i = 1; i < fragment.length-2; i++) {
if (fragment[i] === "\\") {
i++;
}
else if (fragment[i] === "(") {
bracket_count++;
}
else if (fragment[i] === ")") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
}
else if (fragment[0] === "[" && fragment[fragment.length-1] === "]") {
let bracket_count = 0;
for (let i = 1; i < fragment.length-2; i++) {
if (fragment[i] === "\\") {
i++;
}
//you'll never have a raw [ inside a []
//else if (fragment[i] === "[") {
// bracket_count++;
//}
else if (fragment[i] === "]") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
}
else {
return "(?:" + fragment + ")";
}
}
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
export function dontClobberRepetition(fragment: string, repetition: string): string {
// + can be ignored as well as a count as long as that count is > 0
if (fragment.endsWith("+")) {
switch (repetition) {
case "*":
// ignore: + is greater than *
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
case "+":
// ignore: already +
break;
default:
if (repetition.startsWith("{0")) {
fragment = "(?:" + fragment + ")" + repetition;
}
else {
// remove + and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
}
break;
}
}
else if (fragment.endsWith("*")) {
switch (repetition) {
case "*":
// ignore: already +
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
default:
// remove * and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
break;
}
}
else {
fragment += repetition;
}
return fragment;
}

View File

@ -7,7 +7,7 @@
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
import * as T from "./tokens";
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST, RegexDialect } from "./generator";
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST, RegexDialect, BackrefStatementCST, GeneratorContext, IfPatternStatementCST, IfIdentStatementCST } from "./generator";
import { first, usefulConditional, unusedParameter, CommonError } from "./utilities";
/**
@ -60,7 +60,7 @@ export class ParseResult {
* @public
*/
public validate(language: RegexDialect): CommonError[] {
return this.regexp_cst.validate(language).map(CommonError.fromSemanticError);
return this.regexp_cst.validate(language, new GeneratorContext()).map(CommonError.fromSemanticError);
}
/**
@ -558,12 +558,107 @@ export class Human2RegexParser extends EmbeddedActionsParser {
return new RepeatStatementCST(tokens, optional, count, statements);
});
const BackrefStatement = $.RULE("BackrefStatement", () => {
const tokens: IToken[] = [];
let optional = false;
let count: CountSubStatementCST | null = null;
$.OPTION5(() => {
tokens.push($.CONSUME(T.Optional));
optional = true;
});
tokens.push($.CONSUME(T.Call));
$.OPTION6(() => count = $.SUBRULE(CountSubStatement));
$.OPTION7(() => {
$.OPTION(() => $.CONSUME(T.The));
$.CONSUME(T.Group);
$.OPTION2(() => $.CONSUME(T.Called));
});
const name = $.CONSUME(T.Identifier).image;
tokens.push($.CONSUME4(T.EndOfLine));
return new BackrefStatementCST(tokens, optional, count, name);
});
const IfStatement = $.RULE("IfStatement", () => {
const tokens: IToken[] = [];
const msv: MatchStatementValue[] = [];
let optional = false;
const true_statements: StatementCST[] = [];
const false_statements: StatementCST[] = [];
let name: string = "";
tokens.push($.CONSUME(T.If));
$.OR2([
{ALT: () => {
name = $.CONSUME(T.Identifier).image;
}},
{ALT: () => {
$.CONSUME(T.Match);
$.OPTION4(() => {
$.CONSUME3(T.Optional);
optional = true;
});
msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
$.MANY(() => {
$.OR([
{ ALT: () => {
$.OPTION2(() => $.CONSUME2(T.And));
$.CONSUME(T.Then);
}},
{ ALT: () => $.CONSUME(T.And) },
]);
optional = false;
$.OPTION3(() => {
$.CONSUME2(T.Optional);
optional = true;
});
msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
});
}}
]);
tokens.push($.CONSUME3(T.EndOfLine));
$.CONSUME2(T.Indent);
$.AT_LEAST_ONE2(() => {
true_statements.push($.SUBRULE(Statement));
});
$.CONSUME2(T.Outdent);
$.OPTION(() => {
$.CONSUME(T.Else);
$.CONSUME4(T.EndOfLine);
$.CONSUME3(T.Indent);
$.AT_LEAST_ONE3(() => {
false_statements.push($.SUBRULE2(Statement));
});
$.CONSUME3(T.Outdent);
});
if (name === "") {
return new IfPatternStatementCST(tokens, msv, true_statements, false_statements);
}
else {
return new IfIdentStatementCST(tokens, name, true_statements, false_statements);
}
});
// statement super class
const Statement = $.RULE("Statement", () => {
return $.OR([
{ ALT: () => $.SUBRULE(MatchStatement) },
{ ALT: () => $.SUBRULE(GroupStatement) },
{ ALT: () => $.SUBRULE(RepeatStatement) }
{ ALT: () => $.SUBRULE(RepeatStatement) },
{ ALT: () => $.SUBRULE(BackrefStatement) },
{ ALT: () => $.SUBRULE(IfStatement) }
]);
});

View File

@ -53,34 +53,17 @@ import { createToken, Lexer } from "chevrotain";
/** @internal */ export const From = createToken({name: "From", pattern: /from/i});
/** @internal */ export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i});
/** @internal */ export const Create = createToken({name: "Create", pattern: /create(s)?/i});
/** @internal */ export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i});
/** @internal */ export const Called = createToken({name: "Called", pattern: /named|called/i});
/** @internal */ export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
/** @internal */ export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
/** @internal */ export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
/** @internal */ export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
/** @internal */ export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
/** @internal */ export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i});
/*
//Not being used currently
export const Of = createToken({name: "Of", pattern: /of/i});
export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
export const As = createToken({name: "As", pattern: /as/i});
export const If = createToken({name: "If", pattern: /if/i});
export const Start = createToken({name: "Start", pattern: /start(s) with?/i});
export const Ends = createToken({name: "Ends", pattern: /end(s)? with/i});
export const Else = createToken({name: "Else", pattern: /(other wise|otherwise|else)/i});
export const Unless = createToken({name: "Unless", pattern: /unless/i});
export const While = createToken({name: "While", pattern: /while/i});
export const More = createToken({name: "More", pattern: /more/i});
export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
export const None = createToken({name: "None", pattern: /none/i});
export const Neither = createToken({name: "Neither", pattern: /neither/i});
export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
export const By = createToken({name: "By", pattern: /by/i});
*/
/** @internal */ export const Call = createToken({name: "Call", pattern: /call|invoke|execute|(re ?)?run/i });
/** @internal */ export const The = createToken({name: "The", pattern: /the/i });
/** @internal */ export const If = createToken({name: "If", pattern: /if/i });
/** @internal */ export const Else = createToken({name: "Else", pattern: /else|otherwise/i });
/** @internal */ export const EndOfLine = createToken({name: "EOL", pattern: /\n/});
/** @internal */ export const WS = createToken({name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [ " ", "\r" ], group: Lexer.SKIPPED});
@ -127,22 +110,11 @@ export const AllTokens = [
Whitespace,
Number,
Unicode,
/*
Of,
As,
Called,
Call,
If,
Start,
Ends,
Else,
Unless,
While,
More,
Nothing,
By,
The,
None,
Neither,
*/
Using,
Global,
Multiline,
@ -158,7 +130,6 @@ export const AllTokens = [
Exclusive,
From,
Create,
Called,
Repeat,
Newline,
CarriageReturn,

View File

@ -186,6 +186,7 @@ export class CommonError {
*
* @param error The lexing error
* @returns a new CommonError
* @internal
*/
public static fromLexError(error: ILexingError): CommonError {
// not really fond of --> and <--
@ -199,6 +200,7 @@ export class CommonError {
*
* @param error The parsing error
* @returns a new CommonError
* @internal
*/
public static fromParseError(error: IRecognitionException): CommonError {
// not really fond of --> and <--
@ -212,6 +214,7 @@ export class CommonError {
*
* @param error The semantic error
* @returns a new CommonError
* @internal
*/
public static fromSemanticError(error: ISemanticError): CommonError {
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);

View File

@ -2,7 +2,7 @@
import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
import { RegexDialect, minimizeMatchString } from "../src/generator";
import { RegexDialect } from "../src/generator";
describe("Generator functionality", function() {
@ -67,6 +67,14 @@ describe("Generator functionality", function() {
const toks5 = lexer.tokenize('match between 2 and 2 exclusive "hello"').tokens;
const reg5 = parser.parse(toks5);
expect(reg5.validate(RegexDialect.JS).length).toBeGreaterThan(0);
const toks6 = lexer.tokenize('create a group called thing\n\tmatch "hi"\ncreate a group called thing\n\tmatch "hi"\n').tokens;
const reg6 = parser.parse(toks6);
expect(reg6.validate(RegexDialect.JS).length).toBeGreaterThan(0);
const toks7 = lexer.tokenize("invoke thing").tokens;
const reg7 = parser.parse(toks7);
expect(reg7.validate(RegexDialect.JS).length).toBeGreaterThan(0);
});
it("handles ranges", function() {
@ -97,6 +105,12 @@ describe("Generator functionality", function() {
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
expect(reg2.toRegex(RegexDialect.JS)).toBe("/[a-zA-Z][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/[[:alpha:]][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
const toks3 = lexer.tokenize("match not letter, not integer, not decimal").tokens;
const reg3 = parser.parse(toks3);
expect(reg3.validate(RegexDialect.JS).length).toBe(0);
expect(reg3.toRegex(RegexDialect.JS)).toBe("/[^a-zA-Z](?![+-]?\\d+)(?![+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+)))/");
expect(reg3.toRegex(RegexDialect.PCRE)).toBe("/[^[:alpha:]](?![+-]?\\d+)(?![+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+)))/");
});
it("doesn't clobber repetition", function() {
@ -115,23 +129,6 @@ describe("Generator functionality", function() {
expect(reg1.toRegex(RegexDialect.JS)).toBe("/(?!hello){1,6}/");
});
it("can minimize matches", function() {
const test_cases = [
{ from: [ "abc", "abc" ], to: "abc" },
{ from: [ "a", "ab" ], to: "ab?" },
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
{ from: [ "ab", "cd" ], to: "ab|cd" },
{ from: [ "abc", "bc" ], to: "a?bc" },
{ from: [ "abc", "xb" ], to: "abc|xb" }
];
for (const c of test_cases) {
const got = minimizeMatchString(c.from);
expect(got).toBe(c.to);
}
});
it("optimizes correctly", function() {
const toks0 = lexer.tokenize('match "a" or "b" or "b"').tokens;
const reg0 = parser.parse(toks0);
@ -157,6 +154,44 @@ describe("Generator functionality", function() {
const reg4 = parser.parse(toks4);
expect(reg4.validate(RegexDialect.JS).length).toBe(0);
expect(reg4.toRegex(RegexDialect.JS)).toBe("/a(?:1x1|2x2|3x3)z/");
const toks5 = lexer.tokenize('match "a", maybe "b" or "c"').tokens;
const reg5 = parser.parse(toks5);
expect(reg5.validate(RegexDialect.JS).length).toBe(0);
expect(reg5.toRegex(RegexDialect.JS)).toBe("/a[bc]?/");
});
it("can generate backreferences", function() {
const toks0 = lexer.tokenize('create a group called thing\n\tmatch "Hello World"\ninvoke thing\noptionally call 3 times the group called thing').tokens;
const reg0 = parser.parse(toks0);
expect(reg0.validate(RegexDialect.JS).length).toBe(0);
expect(reg0.toRegex(RegexDialect.JS)).toBe("/(?<thing>Hello World)\\g<thing>(?:\\g<thing>{3})?/");
expect(reg0.toRegex(RegexDialect.PCRE)).toBe("/(?P<thing>Hello World)\\g<thing>(?:\\g<thing>{3})?/");
expect(reg0.toRegex(RegexDialect.Python)).toBe("/(?P<thing>Hello World)(?P=thing)(?:(?P=thing){3})?/");
expect(reg0.toRegex(RegexDialect.DotNet)).toBe("/(?<thing>Hello World)\\k<thing>(?:\\k<thing>{3})?/");
});
it("can generate if statements", function() {
const toks0 = lexer.tokenize('if matches "a"\n\tmatch "b"\n').tokens;
const reg0 = parser.parse(toks0);
expect(reg0.validate(RegexDialect.JS).length).toBeGreaterThan(0);
expect(reg0.validate(RegexDialect.PCRE).length).toBe(0);
expect(reg0.toRegex(RegexDialect.PCRE)).toBe("/(?(a)b)/");
const toks1 = lexer.tokenize('if matches "alpha", maybe "b" or "f"\n\tmatch "c"\nelse\n\tif matches "d"\n\t\tmatch "e"\n\telse\n\t\tmatch "f"').tokens;
const reg1 = parser.parse(toks1);
expect(reg1.validate(RegexDialect.JS).length).toBeGreaterThan(0);
expect(reg1.validate(RegexDialect.Python).length).toBeGreaterThan(0);
expect(reg1.validate(RegexDialect.PCRE).length).toBe(0);
expect(reg1.toRegex(RegexDialect.PCRE)).toBe("/(?(alpha[bf]?)c|(?(d)e|f))/");
const toks2 = lexer.tokenize('create a group called thing\n\tmatch "a"\nif thing\n\tmatch "b"\nelse\n\tmatch "c"\n').tokens;
const reg2 = parser.parse(toks2);
expect(reg2.validate(RegexDialect.JS).length).toBeGreaterThan(0);
expect(reg2.validate(RegexDialect.PCRE).length).toBe(0);
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/(?P<thing>a)(?(thing)b|c)/");
expect(reg2.toRegex(RegexDialect.Boost)).toBe("/(?<thing>a)(?(<thing>)b|c)/");
});
it("generate dialect specific regex", function() {
@ -187,7 +222,7 @@ describe("Generator functionality", function() {
it("runs complex scripts", function() {
const str = `
using global and multiline and exact matching
using global and multiline and exact matching and case insensitive matching
create an optional group called protocol
match "http"
optionally match "s"
@ -222,6 +257,6 @@ create an optional group
const toks = lexer.tokenize(str).tokens;
const reg = parser.parse(toks);
expect(reg.validate(RegexDialect.JS).length).toBe(0);
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gm");
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gmi");
});
});

View File

@ -0,0 +1,63 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "../src/generator_helper";
describe("Generator helper functionality", function() {
it("can minimize matches", function() {
const test_cases = [
{ from: [], to: "" },
{ from: [ "abc" ], to: "abc" },
{ from: [ "abc", "abc" ], to: "abc" },
{ from: [ "a", "ab" ], to: "ab?" },
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
{ from: [ "ab", "cd" ], to: "ab|cd" },
{ from: [ "abc", "bc" ], to: "a?bc" },
{ from: [ "abc", "xb" ], to: "abc|xb" }
];
for (const c of test_cases) {
const got = minimizeMatchString(c.from);
expect(got).toBe(c.to);
}
});
it("groups correctly", function() {
const test_cases = [
{ from: "(?P=test)", to: "(?P=test)" },
{ from: "[abc\\]]", to: "[abc\\]]" },
{ from: "abc", to: "(?:abc)" },
{ from: "(abc)|d", to: "(?:(abc)|d)" },
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
];
for (const c of test_cases) {
const got = groupIfRequired(c.from);
expect(got).toBe(c.to);
}
});
it("doesn't clobber the repetition", function() {
const test_cases = [
{ fragment: "1+", repetition: "+", expected: "1+" },
{ fragment: "1*", repetition: "+", expected: "1+" },
{ fragment: "1+", repetition: "*", expected: "1+" },
{ fragment: "1*", repetition: "*", expected: "1*" },
{ fragment: "1+", repetition: "?", expected: "1+?" },
{ fragment: "1*", repetition: "?", expected: "1*?" },
{ fragment: "1+", repetition: "{0,}", expected: "(?:1+){0,}" },
{ fragment: "1*", repetition: "{0,}", expected: "1{0,}" },
{ fragment: "1+", repetition: "{1,2}", expected: "1{1,2}" },
{ fragment: "1*", repetition: "{1,2}", expected: "1{1,2}" },
];
for (const c of test_cases) {
const got = dontClobberRepetition(c.fragment, c.repetition);
expect(got).toBe(c.expected);
}
});
});

View File

@ -95,8 +95,7 @@ module.exports = {
after: {
root: "./lib",
include: [
"script.d.ts",
"script.d.ts.map"
"script.d.ts"
]
}
})