mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 12:30:09 -07:00
Merge branch 'new-features' into dependabot/npm_and_yarn/node-notifier-8.0.1
This commit is contained in:
commit
ed89a2995a
@ -81,6 +81,7 @@ The API reference is available [here](API.md)
|
|||||||
|
|
||||||
|
|
||||||
## Todo
|
## Todo
|
||||||
- Add more regex options such as back references, subroutines, lookahead/behind, and more character classes (eg, `[:alpha:]`)
|
- Add more regex options such as subroutines, conditions, and lookahead/behind
|
||||||
- Fix error messages (They sometimes point to the wrong location, off by 1 errors, etc)
|
- Fix error messages (They sometimes point to the wrong location, off by 1 errors, etc)
|
||||||
|
- Add more useful lex/parse errors (What even is an EarlyExitException?)
|
||||||
- Use a different/better static site generation method
|
- Use a different/better static site generation method
|
15
docs/bundle.min.js
vendored
15
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
140
lib/generator.d.ts
vendored
140
lib/generator.d.ts
vendored
@ -21,29 +21,45 @@ export interface ISemanticError {
|
|||||||
message: string;
|
message: string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* The base concrete syntax tree class
|
* Context for validation
|
||||||
*
|
*
|
||||||
|
* @remarks Currently only used to validate groups
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export declare abstract class H2RCST {
|
export declare class GeneratorContext {
|
||||||
tokens: IToken[];
|
groups: {
|
||||||
|
[key: string]: {
|
||||||
|
startLine: number;
|
||||||
|
startColumn: number;
|
||||||
|
length: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
/**
|
/**
|
||||||
* Constructor for H2RCST
|
* Checks to see if we already have a group defined
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param identifier the group name
|
||||||
* @internal
|
* @returns true if the group name already exists
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[]);
|
hasGroup(identifier: string): boolean;
|
||||||
|
/**
|
||||||
|
* Adds the identifier to the group list
|
||||||
|
*
|
||||||
|
* @param identifier the group name
|
||||||
|
*/
|
||||||
|
addGroup(identifier: string, tokens: IToken[]): void;
|
||||||
|
}
|
||||||
|
interface Generates {
|
||||||
/**
|
/**
|
||||||
* Validate that this is both valid and can be generated in the specified language
|
* Validate that this is both valid and can be generated in the specified language
|
||||||
*
|
*
|
||||||
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
||||||
*
|
*
|
||||||
* @param language the regex dialect we're validating
|
* @param language the regex dialect we're validating
|
||||||
|
* @param context the generator context
|
||||||
* @returns A list of errors
|
* @returns A list of errors
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
abstract validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
/**
|
/**
|
||||||
* Generate a regular expression fragment based on this syntax tree
|
* Generate a regular expression fragment based on this syntax tree
|
||||||
*
|
*
|
||||||
@ -53,6 +69,23 @@ export declare abstract class H2RCST {
|
|||||||
* @returns a regular expression fragment
|
* @returns a regular expression fragment
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* The base concrete syntax tree class
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare abstract class H2RCST implements Generates {
|
||||||
|
tokens: IToken[];
|
||||||
|
/**
|
||||||
|
* Constructor for H2RCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[]);
|
||||||
|
abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
abstract toRegex(language: RegexDialect): string;
|
abstract toRegex(language: RegexDialect): string;
|
||||||
/**
|
/**
|
||||||
* Creates an ISemanticError with a given message and the tokens provided from the constructor
|
* Creates an ISemanticError with a given message and the tokens provided from the constructor
|
||||||
@ -126,7 +159,7 @@ export declare class MatchSubStatementValue {
|
|||||||
*
|
*
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export declare class MatchStatementValue {
|
export declare class MatchStatementValue implements Generates {
|
||||||
optional: boolean;
|
optional: boolean;
|
||||||
statement: MatchSubStatementCST;
|
statement: MatchSubStatementCST;
|
||||||
/**
|
/**
|
||||||
@ -137,6 +170,8 @@ export declare class MatchStatementValue {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
constructor(optional: boolean, statement: MatchSubStatementCST);
|
constructor(optional: boolean, statement: MatchSubStatementCST);
|
||||||
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* The base class for all statement concrete syntax trees
|
* The base class for all statement concrete syntax trees
|
||||||
@ -163,7 +198,7 @@ export declare class MatchSubStatementCST extends H2RCST {
|
|||||||
* @param values sub statements to match
|
* @param values sub statements to match
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]);
|
constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -180,7 +215,7 @@ export declare class UsingStatementCST extends H2RCST {
|
|||||||
* @param flags using flags
|
* @param flags using flags
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], flags: UsingFlags[]);
|
constructor(tokens: IToken[], flags: UsingFlags[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -201,7 +236,7 @@ export declare class CountSubStatementCST extends H2RCST {
|
|||||||
* @param opt option modifier
|
* @param opt option modifier
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], from: number, to?: number | null, opt?: "inclusive" | "exclusive" | "+" | null);
|
constructor(tokens: IToken[], from: number, to?: number | null, opt?: "inclusive" | "exclusive" | "+" | null);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -216,10 +251,10 @@ export declare class MatchStatementCST extends StatementCST {
|
|||||||
* Constructor for MatchStatementCST
|
* Constructor for MatchStatementCST
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
* @param matches
|
* @param matches the list of matches
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], completely_optional: boolean, matches: MatchStatementValue[]);
|
constructor(tokens: IToken[], completely_optional: boolean, matches: MatchStatementValue[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -240,7 +275,7 @@ export declare class RepeatStatementCST extends StatementCST {
|
|||||||
* @param statements the statements to repeat
|
* @param statements the statements to repeat
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], optional: boolean, count: CountSubStatementCST | null, statements: StatementCST[]);
|
constructor(tokens: IToken[], optional: boolean, count: CountSubStatementCST | null, statements: StatementCST[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -262,7 +297,70 @@ export declare class GroupStatementCST extends StatementCST {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], optional: boolean, name: string | null, statements: StatementCST[]);
|
constructor(tokens: IToken[], optional: boolean, name: string | null, statements: StatementCST[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for a Backreference statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare class BackrefStatementCST extends StatementCST {
|
||||||
|
private optional;
|
||||||
|
private count;
|
||||||
|
private name;
|
||||||
|
/**
|
||||||
|
* Constructor for BackrefStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param optional is this backref optional
|
||||||
|
* @param count optional number of times to repeat
|
||||||
|
* @param name the group name to call
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], optional: boolean, count: CountSubStatementCST | null, name: string);
|
||||||
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If Pattern statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare class IfPatternStatementCST extends StatementCST {
|
||||||
|
private matches;
|
||||||
|
private true_statements;
|
||||||
|
private false_statements;
|
||||||
|
/**
|
||||||
|
* Constructor for IfPatternStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param matches list of matches to test against
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], matches: MatchStatementValue[], true_statements: StatementCST[], false_statements: StatementCST[]);
|
||||||
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If group Ident statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare class IfIdentStatementCST extends StatementCST {
|
||||||
|
private identifier;
|
||||||
|
private true_statements;
|
||||||
|
private false_statements;
|
||||||
|
/**
|
||||||
|
* Constructor for IfIdentStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param identifier the group identifier to check
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], identifier: string, true_statements: StatementCST[], false_statements: StatementCST[]);
|
||||||
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -282,13 +380,7 @@ export declare class RegularExpressionCST extends H2RCST {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], usings: UsingStatementCST, statements: StatementCST[]);
|
constructor(tokens: IToken[], usings: UsingStatementCST, statements: StatementCST[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
export {};
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array of matches
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
export declare function minimizeMatchString(arr: string[]): string;
|
|
||||||
|
421
lib/generator.js
421
lib/generator.js
@ -1,12 +1,13 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.minimizeMatchString = exports.RegularExpressionCST = exports.GroupStatementCST = exports.RepeatStatementCST = exports.MatchStatementCST = exports.CountSubStatementCST = exports.UsingStatementCST = exports.MatchSubStatementCST = exports.StatementCST = exports.MatchStatementValue = exports.MatchSubStatementValue = exports.MatchSubStatementType = exports.UsingFlags = exports.H2RCST = exports.RegexDialect = void 0;
|
exports.RegularExpressionCST = exports.IfIdentStatementCST = exports.IfPatternStatementCST = exports.BackrefStatementCST = exports.GroupStatementCST = exports.RepeatStatementCST = exports.MatchStatementCST = exports.CountSubStatementCST = exports.UsingStatementCST = exports.MatchSubStatementCST = exports.StatementCST = exports.MatchStatementValue = exports.MatchSubStatementValue = exports.MatchSubStatementType = exports.UsingFlags = exports.H2RCST = exports.GeneratorContext = exports.RegexDialect = void 0;
|
||||||
/**
|
/**
|
||||||
* Includes all Concrete Syntax Trees for Human2Regex
|
* Includes all Concrete Syntax Trees for Human2Regex
|
||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
*/
|
*/
|
||||||
const utilities_1 = require("./utilities");
|
const utilities_1 = require("./utilities");
|
||||||
|
const generator_helper_1 = require("./generator_helper");
|
||||||
/**
|
/**
|
||||||
* List of regular expression dialects we support
|
* List of regular expression dialects we support
|
||||||
*/
|
*/
|
||||||
@ -49,6 +50,42 @@ const unicode_script_codes = [
|
|||||||
"Tai_Tham", "Tai_Viet", "Takri", "Tamil", "Telugu", "Thaana", "Thai",
|
"Tai_Tham", "Tai_Viet", "Takri", "Tamil", "Telugu", "Thaana", "Thai",
|
||||||
"Tibetan", "Tifinagh", "Ugaritic", "Vai", "Yi"
|
"Tibetan", "Tifinagh", "Ugaritic", "Vai", "Yi"
|
||||||
];
|
];
|
||||||
|
/**
|
||||||
|
* Context for validation
|
||||||
|
*
|
||||||
|
* @remarks Currently only used to validate groups
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class GeneratorContext {
|
||||||
|
constructor() {
|
||||||
|
this.groups = {};
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Checks to see if we already have a group defined
|
||||||
|
*
|
||||||
|
* @param identifier the group name
|
||||||
|
* @returns true if the group name already exists
|
||||||
|
*/
|
||||||
|
hasGroup(identifier) {
|
||||||
|
return Object.prototype.hasOwnProperty.call(this.groups, identifier);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Adds the identifier to the group list
|
||||||
|
*
|
||||||
|
* @param identifier the group name
|
||||||
|
*/
|
||||||
|
addGroup(identifier, tokens) {
|
||||||
|
var _a, _b, _c;
|
||||||
|
const f = utilities_1.first(tokens);
|
||||||
|
const l = utilities_1.last(tokens);
|
||||||
|
this.groups[identifier] = {
|
||||||
|
startLine: (_a = f.startLine) !== null && _a !== void 0 ? _a : NaN,
|
||||||
|
startColumn: (_b = f.startColumn) !== null && _b !== void 0 ? _b : NaN,
|
||||||
|
length: ((_c = l.endOffset) !== null && _c !== void 0 ? _c : l.startOffset) - f.startOffset,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.GeneratorContext = GeneratorContext;
|
||||||
/**
|
/**
|
||||||
* The base concrete syntax tree class
|
* The base concrete syntax tree class
|
||||||
*
|
*
|
||||||
@ -166,6 +203,17 @@ class MatchStatementValue {
|
|||||||
this.statement = statement;
|
this.statement = statement;
|
||||||
/* empty */
|
/* empty */
|
||||||
}
|
}
|
||||||
|
validate(language, context) {
|
||||||
|
return this.statement.validate(language, context);
|
||||||
|
}
|
||||||
|
toRegex(language) {
|
||||||
|
let match_stmt = this.statement.toRegex(language);
|
||||||
|
// need to group if optional and ungrouped
|
||||||
|
if (this.optional) {
|
||||||
|
match_stmt = generator_helper_1.groupIfRequired(match_stmt) + "?";
|
||||||
|
}
|
||||||
|
return match_stmt;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
exports.MatchStatementValue = MatchStatementValue;
|
exports.MatchStatementValue = MatchStatementValue;
|
||||||
/**
|
/**
|
||||||
@ -196,10 +244,10 @@ class MatchSubStatementCST extends H2RCST {
|
|||||||
this.invert = invert;
|
this.invert = invert;
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = [];
|
const errors = [];
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
utilities_1.append(errors, this.count.validate(language));
|
utilities_1.append(errors, this.count.validate(language, context));
|
||||||
}
|
}
|
||||||
for (const value of this.values) {
|
for (const value of this.values) {
|
||||||
if (value.type === MatchSubStatementType.Between) {
|
if (value.type === MatchSubStatementType.Between) {
|
||||||
@ -311,50 +359,15 @@ class MatchSubStatementCST extends H2RCST {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let ret = "";
|
let ret = generator_helper_1.minimizeMatchString(matches);
|
||||||
let require_grouping = false;
|
|
||||||
let dont_clobber_plus = false;
|
|
||||||
if (matches.length === 1) {
|
|
||||||
ret = utilities_1.first(matches);
|
|
||||||
if (ret.endsWith("+")) {
|
|
||||||
dont_clobber_plus = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
ret = minimizeMatchString(matches);
|
|
||||||
if (ret.length > 1 &&
|
|
||||||
(!ret.startsWith("(") || !ret.endsWith("["))) {
|
|
||||||
require_grouping = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
if (dont_clobber_plus) {
|
if (matches.length === 1) {
|
||||||
const clobber = this.count.toRegex(language);
|
// we don't group if there's only 1 element
|
||||||
// + can be ignored as well as a count as long as that count is > 0
|
// but we need to make sure we don't add an additional + or *
|
||||||
switch (clobber) {
|
ret = generator_helper_1.dontClobberRepetition(ret, this.count.toRegex(language));
|
||||||
case "*":
|
|
||||||
case "?":
|
|
||||||
ret = "(?:" + ret + ")" + clobber;
|
|
||||||
break;
|
|
||||||
case "+":
|
|
||||||
// ignore
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
if (clobber.startsWith("{0")) {
|
|
||||||
ret = "(?:" + ret + ")" + clobber;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// remove + and replace with count
|
|
||||||
ret.substring(0, ret.length - 1) + clobber;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (require_grouping) {
|
ret = generator_helper_1.groupIfRequired(ret) + this.count.toRegex(language);
|
||||||
ret = "(?:" + ret + ")";
|
|
||||||
}
|
|
||||||
ret += this.count.toRegex(language);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -377,8 +390,9 @@ class UsingStatementCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
this.flags = flags;
|
this.flags = flags;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
utilities_1.unusedParameter(language, "Using Statement does not change based on language");
|
utilities_1.unusedParameter(language, "Count does not need checking");
|
||||||
|
utilities_1.unusedParameter(context, "Context is not needed");
|
||||||
const errors = [];
|
const errors = [];
|
||||||
let flag = this.flags[0];
|
let flag = this.flags[0];
|
||||||
for (let i = 1; i < this.flags.length; i++) {
|
for (let i = 1; i < this.flags.length; i++) {
|
||||||
@ -434,13 +448,11 @@ class CountSubStatementCST extends H2RCST {
|
|||||||
this.to = to;
|
this.to = to;
|
||||||
this.opt = opt;
|
this.opt = opt;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
utilities_1.unusedParameter(language, "Count does not need checking");
|
utilities_1.unusedParameter(language, "Count does not need checking");
|
||||||
|
utilities_1.unusedParameter(context, "Context is not needed");
|
||||||
const errors = [];
|
const errors = [];
|
||||||
if (this.from < 0) {
|
if (this.to !== null && ((this.opt === "exclusive" && (this.to - 1) <= this.from) || this.to <= this.from)) {
|
||||||
errors.push(this.error("Value cannot be negative"));
|
|
||||||
}
|
|
||||||
else if (this.to !== null && ((this.opt === "exclusive" && (this.to - 1) <= this.from) || this.to <= this.from)) {
|
|
||||||
errors.push(this.error("Values must be in range of eachother"));
|
errors.push(this.error("Values must be in range of eachother"));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
@ -483,43 +495,24 @@ class MatchStatementCST extends StatementCST {
|
|||||||
* Constructor for MatchStatementCST
|
* Constructor for MatchStatementCST
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
* @param matches
|
* @param matches the list of matches
|
||||||
*/
|
*/
|
||||||
constructor(tokens, completely_optional, matches) {
|
constructor(tokens, completely_optional, matches) {
|
||||||
super(tokens);
|
super(tokens);
|
||||||
this.completely_optional = completely_optional;
|
this.completely_optional = completely_optional;
|
||||||
this.matches = matches;
|
this.matches = matches;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = [];
|
const errors = [];
|
||||||
for (const match of this.matches) {
|
for (const match of this.matches) {
|
||||||
utilities_1.append(errors, match.statement.validate(language));
|
utilities_1.append(errors, match.statement.validate(language, context));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
toRegex(language) {
|
toRegex(language) {
|
||||||
let final_matches = this.matches.map((x) => {
|
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
|
||||||
let match_stmt = x.statement.toRegex(language);
|
|
||||||
// need to group if optional and ungrouped
|
|
||||||
if (x.optional) {
|
|
||||||
if (!utilities_1.isSingleRegexCharacter(match_stmt)) {
|
|
||||||
// don't re-group a group
|
|
||||||
if (match_stmt[0] !== "(" && match_stmt[match_stmt.length - 1] !== ")") {
|
|
||||||
match_stmt = "(?:" + match_stmt + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match_stmt += "?";
|
|
||||||
}
|
|
||||||
return match_stmt;
|
|
||||||
}).join("");
|
|
||||||
if (this.completely_optional) {
|
if (this.completely_optional) {
|
||||||
if (!utilities_1.isSingleRegexCharacter(final_matches)) {
|
final_matches = generator_helper_1.groupIfRequired(final_matches) + "?";
|
||||||
// don't re-group a group
|
|
||||||
if (final_matches[0] !== "(" && final_matches[final_matches.length - 1] !== ")") {
|
|
||||||
final_matches = "(?:" + final_matches + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
final_matches += "?";
|
|
||||||
}
|
}
|
||||||
return final_matches;
|
return final_matches;
|
||||||
}
|
}
|
||||||
@ -545,18 +538,18 @@ class RepeatStatementCST extends StatementCST {
|
|||||||
this.count = count;
|
this.count = count;
|
||||||
this.statements = statements;
|
this.statements = statements;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = [];
|
const errors = [];
|
||||||
if (this.count !== null) {
|
if (this.count !== null) {
|
||||||
utilities_1.append(errors, this.count.validate(language));
|
utilities_1.append(errors, this.count.validate(language, context));
|
||||||
}
|
}
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
utilities_1.append(errors, statement.validate(language));
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
toRegex(language) {
|
toRegex(language) {
|
||||||
let str = "(?:" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
|
let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
str += this.count.toRegex(language);
|
str += this.count.toRegex(language);
|
||||||
// group for optionality because count would be incorrect otherwise
|
// group for optionality because count would be incorrect otherwise
|
||||||
@ -595,14 +588,19 @@ class GroupStatementCST extends StatementCST {
|
|||||||
this.name = name;
|
this.name = name;
|
||||||
this.statements = statements;
|
this.statements = statements;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = [];
|
const errors = [];
|
||||||
// All languages currently support named groups
|
if (this.name !== null) {
|
||||||
//if (false) {
|
if (context.hasGroup(this.name)) {
|
||||||
// errors.push(this.error("This language does not support named groups"));
|
const past_group = context.groups[this.name];
|
||||||
//}
|
errors.push(this.error(`Group with name "${this.name}" was already defined here: ${past_group.startLine}:${past_group.startLine}-${past_group.startLine}:${past_group.startLine + past_group.length}`));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
context.addGroup(this.name, this.tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
utilities_1.append(errors, statement.validate(language));
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
@ -623,6 +621,169 @@ class GroupStatementCST extends StatementCST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
exports.GroupStatementCST = GroupStatementCST;
|
exports.GroupStatementCST = GroupStatementCST;
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for a Backreference statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class BackrefStatementCST extends StatementCST {
|
||||||
|
/**
|
||||||
|
* Constructor for BackrefStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param optional is this backref optional
|
||||||
|
* @param count optional number of times to repeat
|
||||||
|
* @param name the group name to call
|
||||||
|
*/
|
||||||
|
constructor(tokens, optional, count, name) {
|
||||||
|
super(tokens);
|
||||||
|
this.optional = optional;
|
||||||
|
this.count = count;
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
validate(language, context) {
|
||||||
|
const errors = [];
|
||||||
|
if (!context.hasGroup(this.name)) {
|
||||||
|
errors.push(this.error(`Cannot call group with name "${this.name}" as it was never previously defined`));
|
||||||
|
}
|
||||||
|
if (this.count !== null) {
|
||||||
|
utilities_1.append(errors, this.count.validate(language, context));
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
toRegex(language) {
|
||||||
|
let str = "";
|
||||||
|
switch (language) {
|
||||||
|
case RegexDialect.Python:
|
||||||
|
str = `(?P=${this.name})`;
|
||||||
|
break;
|
||||||
|
case RegexDialect.DotNet:
|
||||||
|
case RegexDialect.Java:
|
||||||
|
str = `\\k<${this.name}>`;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
str = `\\g<${this.name}>`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (this.count) {
|
||||||
|
str += this.count.toRegex(language);
|
||||||
|
// group for optionality because count would be incorrect otherwise
|
||||||
|
if (this.optional) {
|
||||||
|
str = "(?:" + str + ")?";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (this.optional) {
|
||||||
|
str = "?";
|
||||||
|
}
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.BackrefStatementCST = BackrefStatementCST;
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If Pattern statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class IfPatternStatementCST extends StatementCST {
|
||||||
|
/**
|
||||||
|
* Constructor for IfPatternStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param matches list of matches to test against
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens, matches, true_statements, false_statements) {
|
||||||
|
super(tokens);
|
||||||
|
this.matches = matches;
|
||||||
|
this.true_statements = true_statements;
|
||||||
|
this.false_statements = false_statements;
|
||||||
|
}
|
||||||
|
validate(language, context) {
|
||||||
|
const errors = [];
|
||||||
|
if (language === RegexDialect.Java || language === RegexDialect.JS) {
|
||||||
|
errors.push(this.error("This language does not support conditionals"));
|
||||||
|
}
|
||||||
|
if (language === RegexDialect.Python) {
|
||||||
|
errors.push(this.error("This language does not support pattern conditionals"));
|
||||||
|
}
|
||||||
|
for (const match of this.matches) {
|
||||||
|
utilities_1.append(errors, match.validate(language, context));
|
||||||
|
}
|
||||||
|
for (const statement of this.true_statements) {
|
||||||
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
for (const statement of this.false_statements) {
|
||||||
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
toRegex(language) {
|
||||||
|
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
|
||||||
|
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
if (this.false_statements.length > 0) {
|
||||||
|
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return `(?(${if_stmt})${true_stmt})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.IfPatternStatementCST = IfPatternStatementCST;
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If group Ident statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class IfIdentStatementCST extends StatementCST {
|
||||||
|
/**
|
||||||
|
* Constructor for IfIdentStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param identifier the group identifier to check
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens, identifier, true_statements, false_statements) {
|
||||||
|
super(tokens);
|
||||||
|
this.identifier = identifier;
|
||||||
|
this.true_statements = true_statements;
|
||||||
|
this.false_statements = false_statements;
|
||||||
|
}
|
||||||
|
validate(language, context) {
|
||||||
|
const errors = [];
|
||||||
|
if (language === RegexDialect.Java || language === RegexDialect.JS) {
|
||||||
|
errors.push(this.error("This language does not support conditionals"));
|
||||||
|
}
|
||||||
|
if (!context.hasGroup(this.identifier)) {
|
||||||
|
errors.push(this.error(`Group with name "${this.identifier}" does not exist`));
|
||||||
|
}
|
||||||
|
for (const statement of this.true_statements) {
|
||||||
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
for (const statement of this.false_statements) {
|
||||||
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
toRegex(language) {
|
||||||
|
let if_stmt = this.identifier;
|
||||||
|
// be more clear with languages that support it
|
||||||
|
if (language === RegexDialect.Boost) {
|
||||||
|
if_stmt = "<" + if_stmt + ">";
|
||||||
|
}
|
||||||
|
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
if (this.false_statements.length > 0) {
|
||||||
|
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return `(?(${if_stmt})${true_stmt})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.IfIdentStatementCST = IfIdentStatementCST;
|
||||||
/**
|
/**
|
||||||
* Concrete Syntax Tree for a regular expression
|
* Concrete Syntax Tree for a regular expression
|
||||||
*
|
*
|
||||||
@ -642,10 +803,10 @@ class RegularExpressionCST extends H2RCST {
|
|||||||
this.usings = usings;
|
this.usings = usings;
|
||||||
this.statements = statements;
|
this.statements = statements;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = this.usings.validate(language);
|
const errors = this.usings.validate(language, context);
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
utilities_1.append(errors, statement.validate(language));
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
@ -656,87 +817,3 @@ class RegularExpressionCST extends H2RCST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
exports.RegularExpressionCST = RegularExpressionCST;
|
exports.RegularExpressionCST = RegularExpressionCST;
|
||||||
/**
|
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array of matches
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
function minimizeMatchString(arr) {
|
|
||||||
return minMatchString(arr, 0);
|
|
||||||
}
|
|
||||||
exports.minimizeMatchString = minimizeMatchString;
|
|
||||||
/**
|
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array
|
|
||||||
* @param depth must be 0 for initial call
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
function minMatchString(arr, depth = 0) {
|
|
||||||
// base case: arr is empty
|
|
||||||
if (arr.length === 0) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
|
||||||
if (arr.length === 1) {
|
|
||||||
return utilities_1.first(arr) + "?";
|
|
||||||
}
|
|
||||||
// remove duplicates
|
|
||||||
arr = [...new Set(arr)];
|
|
||||||
// base case: arr has 1 element (after duplicate removal means this is required)
|
|
||||||
if (arr.length === 1) {
|
|
||||||
return utilities_1.first(arr);
|
|
||||||
}
|
|
||||||
// base case: arr is all single letters
|
|
||||||
if (arr.every(utilities_1.isSingleRegexCharacter)) {
|
|
||||||
return "[" + arr.join("") + "]";
|
|
||||||
}
|
|
||||||
// now the real magic begins
|
|
||||||
// You are not expected to understand this
|
|
||||||
let longest_begin_substring = utilities_1.first(arr);
|
|
||||||
let longest_end_substring = utilities_1.first(arr);
|
|
||||||
for (let i = 1; i < arr.length; i++) {
|
|
||||||
// reduce longest_substring to match everything
|
|
||||||
for (let j = 0; j < longest_begin_substring.length; j++) {
|
|
||||||
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
|
||||||
longest_begin_substring = longest_begin_substring.substr(0, j);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (let j = 0; j < longest_end_substring.length; j++) {
|
|
||||||
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
|
|
||||||
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// No matches whatsoever
|
|
||||||
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
|
||||||
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
|
||||||
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
||||||
if (depth > 0) {
|
|
||||||
return "(?:" + arr.join("|") + ")";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return arr.join("|");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// we have some matches
|
|
||||||
else {
|
|
||||||
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
|
||||||
const begin_pos = longest_begin_substring.length;
|
|
||||||
const end_pos = longest_end_substring.length;
|
|
||||||
const similar_matches = [];
|
|
||||||
for (const ele of arr) {
|
|
||||||
const match = ele.substring(begin_pos, ele.length - end_pos);
|
|
||||||
if (match.length !== 0) {
|
|
||||||
similar_matches.push(match);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
23
lib/generator_helper.d.ts
vendored
Normal file
23
lib/generator_helper.d.ts
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array of matches
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare function minimizeMatchString(arr: string[]): string;
|
||||||
|
/**
|
||||||
|
* Groups a regex fragment if it needs to be grouped
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression to potentially group
|
||||||
|
* @returns a non-capturing group if there needs to be one
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare function groupIfRequired(fragment: string): string;
|
||||||
|
/**
|
||||||
|
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression
|
||||||
|
* @param repetition repetition that may clobber the fragment
|
||||||
|
*/
|
||||||
|
export declare function dontClobberRepetition(fragment: string, repetition: string): string;
|
203
lib/generator_helper.js
Normal file
203
lib/generator_helper.js
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
"use strict";
|
||||||
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0;
|
||||||
|
/**
|
||||||
|
* Includes helper functions for the Generator
|
||||||
|
* @packageDocumentation
|
||||||
|
*/
|
||||||
|
const utilities_1 = require("./utilities");
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array of matches
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function minimizeMatchString(arr) {
|
||||||
|
// don't process an array of length 1, otherwise you'll get the wrong result
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return utilities_1.first(arr);
|
||||||
|
}
|
||||||
|
return minMatchString(arr, 0);
|
||||||
|
}
|
||||||
|
exports.minimizeMatchString = minimizeMatchString;
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array
|
||||||
|
* @param depth must be 0 for initial call
|
||||||
|
* @returns an optimized string
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function minMatchString(arr, depth = 0) {
|
||||||
|
// base case: arr is empty
|
||||||
|
if (arr.length === 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return utilities_1.first(arr) + "?";
|
||||||
|
}
|
||||||
|
// remove duplicates
|
||||||
|
arr = [...new Set(arr)];
|
||||||
|
// base case: arr has 1 element (after duplicate removal means this is required)
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return utilities_1.first(arr);
|
||||||
|
}
|
||||||
|
// base case: arr is all single letters
|
||||||
|
if (arr.every(utilities_1.isSingleRegexCharacter)) {
|
||||||
|
return "[" + arr.join("") + "]";
|
||||||
|
}
|
||||||
|
// now the real magic begins
|
||||||
|
// You are not expected to understand this
|
||||||
|
let longest_begin_substring = utilities_1.first(arr);
|
||||||
|
let longest_end_substring = utilities_1.first(arr);
|
||||||
|
for (let i = 1; i < arr.length; i++) {
|
||||||
|
// reduce longest_substring to match everything
|
||||||
|
for (let j = 0; j < longest_begin_substring.length; j++) {
|
||||||
|
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
||||||
|
longest_begin_substring = longest_begin_substring.substr(0, j);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (let j = 0; j < longest_end_substring.length; j++) {
|
||||||
|
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
|
||||||
|
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No matches whatsoever
|
||||||
|
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
||||||
|
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
||||||
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
||||||
|
if (depth > 0) {
|
||||||
|
return "(?:" + arr.join("|") + ")";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return arr.join("|");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we have some matches
|
||||||
|
else {
|
||||||
|
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
||||||
|
const begin_pos = longest_begin_substring.length;
|
||||||
|
const end_pos = longest_end_substring.length;
|
||||||
|
const similar_matches = [];
|
||||||
|
for (const ele of arr) {
|
||||||
|
const match = ele.substring(begin_pos, ele.length - end_pos);
|
||||||
|
if (match.length !== 0) {
|
||||||
|
similar_matches.push(match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Groups a regex fragment if it needs to be grouped
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression to potentially group
|
||||||
|
* @returns a non-capturing group if there needs to be one
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function groupIfRequired(fragment) {
|
||||||
|
if (utilities_1.isSingleRegexCharacter(fragment)) {
|
||||||
|
return fragment;
|
||||||
|
}
|
||||||
|
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
|
||||||
|
let bracket_count = 0;
|
||||||
|
for (let i = 1; i < fragment.length - 2; i++) {
|
||||||
|
if (fragment[i] === "\\") {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if (fragment[i] === "(") {
|
||||||
|
bracket_count++;
|
||||||
|
}
|
||||||
|
else if (fragment[i] === ")") {
|
||||||
|
bracket_count--;
|
||||||
|
if (bracket_count === -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
else if (fragment[0] === "[" && fragment[fragment.length - 1] === "]") {
|
||||||
|
let bracket_count = 0;
|
||||||
|
for (let i = 1; i < fragment.length - 2; i++) {
|
||||||
|
if (fragment[i] === "\\") {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
//you'll never have a raw [ inside a []
|
||||||
|
//else if (fragment[i] === "[") {
|
||||||
|
// bracket_count++;
|
||||||
|
//}
|
||||||
|
else if (fragment[i] === "]") {
|
||||||
|
bracket_count--;
|
||||||
|
if (bracket_count === -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.groupIfRequired = groupIfRequired;
|
||||||
|
/**
|
||||||
|
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression
|
||||||
|
* @param repetition repetition that may clobber the fragment
|
||||||
|
*/
|
||||||
|
function dontClobberRepetition(fragment, repetition) {
|
||||||
|
// + can be ignored as well as a count as long as that count is > 0
|
||||||
|
if (fragment.endsWith("+")) {
|
||||||
|
switch (repetition) {
|
||||||
|
case "*":
|
||||||
|
// ignore: + is greater than *
|
||||||
|
break;
|
||||||
|
case "?":
|
||||||
|
// non-greedy qualifier
|
||||||
|
fragment += repetition;
|
||||||
|
break;
|
||||||
|
case "+":
|
||||||
|
// ignore: already +
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (repetition.startsWith("{0")) {
|
||||||
|
fragment = "(?:" + fragment + ")" + repetition;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// remove + and replace with count
|
||||||
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (fragment.endsWith("*")) {
|
||||||
|
switch (repetition) {
|
||||||
|
case "*":
|
||||||
|
// ignore: already +
|
||||||
|
break;
|
||||||
|
case "?":
|
||||||
|
// non-greedy qualifier
|
||||||
|
fragment += repetition;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// remove * and replace with count
|
||||||
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fragment += repetition;
|
||||||
|
}
|
||||||
|
return fragment;
|
||||||
|
}
|
||||||
|
exports.dontClobberRepetition = dontClobberRepetition;
|
@ -83,7 +83,7 @@ class ParseResult {
|
|||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
validate(language) {
|
validate(language) {
|
||||||
return this.regexp_cst.validate(language).map(utilities_1.CommonError.fromSemanticError);
|
return this.regexp_cst.validate(language, new generator_1.GeneratorContext()).map(utilities_1.CommonError.fromSemanticError);
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Generate a regular expression string based on the parse result
|
* Generate a regular expression string based on the parse result
|
||||||
@ -499,12 +499,91 @@ class Human2RegexParser extends chevrotain_1.EmbeddedActionsParser {
|
|||||||
tokens.push($.CONSUME(T.Outdent));
|
tokens.push($.CONSUME(T.Outdent));
|
||||||
return new generator_1.RepeatStatementCST(tokens, optional, count, statements);
|
return new generator_1.RepeatStatementCST(tokens, optional, count, statements);
|
||||||
});
|
});
|
||||||
|
const BackrefStatement = $.RULE("BackrefStatement", () => {
|
||||||
|
const tokens = [];
|
||||||
|
let optional = false;
|
||||||
|
let count = null;
|
||||||
|
$.OPTION5(() => {
|
||||||
|
tokens.push($.CONSUME(T.Optional));
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
tokens.push($.CONSUME(T.Call));
|
||||||
|
$.OPTION6(() => count = $.SUBRULE(CountSubStatement));
|
||||||
|
$.OPTION7(() => {
|
||||||
|
$.OPTION(() => $.CONSUME(T.The));
|
||||||
|
$.CONSUME(T.Group);
|
||||||
|
$.OPTION2(() => $.CONSUME(T.Called));
|
||||||
|
});
|
||||||
|
const name = $.CONSUME(T.Identifier).image;
|
||||||
|
tokens.push($.CONSUME4(T.EndOfLine));
|
||||||
|
return new generator_1.BackrefStatementCST(tokens, optional, count, name);
|
||||||
|
});
|
||||||
|
const IfStatement = $.RULE("IfStatement", () => {
|
||||||
|
const tokens = [];
|
||||||
|
const msv = [];
|
||||||
|
let optional = false;
|
||||||
|
const true_statements = [];
|
||||||
|
const false_statements = [];
|
||||||
|
let name = "";
|
||||||
|
tokens.push($.CONSUME(T.If));
|
||||||
|
$.OR2([
|
||||||
|
{ ALT: () => {
|
||||||
|
name = $.CONSUME(T.Identifier).image;
|
||||||
|
} },
|
||||||
|
{ ALT: () => {
|
||||||
|
$.CONSUME(T.Match);
|
||||||
|
$.OPTION4(() => {
|
||||||
|
$.CONSUME3(T.Optional);
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
msv.push(new generator_1.MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
|
||||||
|
$.MANY(() => {
|
||||||
|
$.OR([
|
||||||
|
{ ALT: () => {
|
||||||
|
$.OPTION2(() => $.CONSUME2(T.And));
|
||||||
|
$.CONSUME(T.Then);
|
||||||
|
} },
|
||||||
|
{ ALT: () => $.CONSUME(T.And) },
|
||||||
|
]);
|
||||||
|
optional = false;
|
||||||
|
$.OPTION3(() => {
|
||||||
|
$.CONSUME2(T.Optional);
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
msv.push(new generator_1.MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
|
||||||
|
});
|
||||||
|
} }
|
||||||
|
]);
|
||||||
|
tokens.push($.CONSUME3(T.EndOfLine));
|
||||||
|
$.CONSUME2(T.Indent);
|
||||||
|
$.AT_LEAST_ONE2(() => {
|
||||||
|
true_statements.push($.SUBRULE(Statement));
|
||||||
|
});
|
||||||
|
$.CONSUME2(T.Outdent);
|
||||||
|
$.OPTION(() => {
|
||||||
|
$.CONSUME(T.Else);
|
||||||
|
$.CONSUME4(T.EndOfLine);
|
||||||
|
$.CONSUME3(T.Indent);
|
||||||
|
$.AT_LEAST_ONE3(() => {
|
||||||
|
false_statements.push($.SUBRULE2(Statement));
|
||||||
|
});
|
||||||
|
$.CONSUME3(T.Outdent);
|
||||||
|
});
|
||||||
|
if (name === "") {
|
||||||
|
return new generator_1.IfPatternStatementCST(tokens, msv, true_statements, false_statements);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return new generator_1.IfIdentStatementCST(tokens, name, true_statements, false_statements);
|
||||||
|
}
|
||||||
|
});
|
||||||
// statement super class
|
// statement super class
|
||||||
const Statement = $.RULE("Statement", () => {
|
const Statement = $.RULE("Statement", () => {
|
||||||
return $.OR([
|
return $.OR([
|
||||||
{ ALT: () => $.SUBRULE(MatchStatement) },
|
{ ALT: () => $.SUBRULE(MatchStatement) },
|
||||||
{ ALT: () => $.SUBRULE(GroupStatement) },
|
{ ALT: () => $.SUBRULE(GroupStatement) },
|
||||||
{ ALT: () => $.SUBRULE(RepeatStatement) }
|
{ ALT: () => $.SUBRULE(RepeatStatement) },
|
||||||
|
{ ALT: () => $.SUBRULE(BackrefStatement) },
|
||||||
|
{ ALT: () => $.SUBRULE(IfStatement) }
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
// full regex
|
// full regex
|
||||||
|
4
lib/tokens.d.ts
vendored
4
lib/tokens.d.ts
vendored
@ -51,6 +51,10 @@
|
|||||||
/** @internal */ export declare const CaseInsensitive: import("chevrotain").TokenType;
|
/** @internal */ export declare const CaseInsensitive: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const CaseSensitive: import("chevrotain").TokenType;
|
/** @internal */ export declare const CaseSensitive: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const OrMore: import("chevrotain").TokenType;
|
/** @internal */ export declare const OrMore: import("chevrotain").TokenType;
|
||||||
|
/** @internal */ export declare const Call: import("chevrotain").TokenType;
|
||||||
|
/** @internal */ export declare const The: import("chevrotain").TokenType;
|
||||||
|
/** @internal */ export declare const If: import("chevrotain").TokenType;
|
||||||
|
/** @internal */ export declare const Else: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const EndOfLine: import("chevrotain").TokenType;
|
/** @internal */ export declare const EndOfLine: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const WS: import("chevrotain").TokenType;
|
/** @internal */ export declare const WS: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const SingleLineComment: import("chevrotain").TokenType;
|
/** @internal */ export declare const SingleLineComment: import("chevrotain").TokenType;
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.AllTokens = exports.Outdent = exports.Indent = exports.StringLiteral = exports.NumberLiteral = exports.Identifier = exports.MultilineComment = exports.SingleLineComment = exports.WS = exports.EndOfLine = exports.OrMore = exports.CaseSensitive = exports.CaseInsensitive = exports.CarriageReturn = exports.Newline = exports.Repeat = exports.Called = exports.Create = exports.To = exports.From = exports.Exclusive = exports.Inclusive = exports.Exactly = exports.Times = exports.A = exports.Group = exports.Linefeed = exports.Tab = exports.Between = exports.Not = exports.Matching = exports.Exact = exports.Multiline = exports.Global = exports.Using = exports.Unicode = exports.Number = exports.Boundary = exports.Whitespace = exports.Integer = exports.Decimal = exports.Letter = exports.Character = exports.Digit = exports.Word = exports.And = exports.Or = exports.Anything = exports.Then = exports.Match = exports.Optional = exports.Ten = exports.Nine = exports.Eight = exports.Seven = exports.Six = exports.Five = exports.Four = exports.Three = exports.Two = exports.One = exports.Zero = void 0;
|
exports.CaseInsensitive = exports.CarriageReturn = exports.Newline = exports.Repeat = exports.Called = exports.Create = exports.To = exports.From = exports.Exclusive = exports.Inclusive = exports.Exactly = exports.Times = exports.A = exports.Group = exports.Linefeed = exports.Tab = exports.Between = exports.Not = exports.Matching = exports.Exact = exports.Multiline = exports.Global = exports.Using = exports.Unicode = exports.Number = exports.Boundary = exports.Whitespace = exports.Integer = exports.Decimal = exports.Letter = exports.Character = exports.Digit = exports.Word = exports.And = exports.Or = exports.Anything = exports.Then = exports.Match = exports.Optional = exports.Ten = exports.Nine = exports.Eight = exports.Seven = exports.Six = exports.Five = exports.Four = exports.Three = exports.Two = exports.One = exports.Zero = void 0;
|
||||||
|
exports.AllTokens = exports.Outdent = exports.Indent = exports.StringLiteral = exports.NumberLiteral = exports.Identifier = exports.MultilineComment = exports.SingleLineComment = exports.WS = exports.EndOfLine = exports.Else = exports.If = exports.The = exports.Call = exports.OrMore = exports.CaseSensitive = void 0;
|
||||||
/**
|
/**
|
||||||
* The tokens required for Human2Regex
|
* The tokens required for Human2Regex
|
||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
@ -52,32 +53,17 @@ const chevrotain_1 = require("chevrotain");
|
|||||||
/** @internal */ exports.From = chevrotain_1.createToken({ name: "From", pattern: /from/i });
|
/** @internal */ exports.From = chevrotain_1.createToken({ name: "From", pattern: /from/i });
|
||||||
/** @internal */ exports.To = chevrotain_1.createToken({ name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i });
|
/** @internal */ exports.To = chevrotain_1.createToken({ name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i });
|
||||||
/** @internal */ exports.Create = chevrotain_1.createToken({ name: "Create", pattern: /create(s)?/i });
|
/** @internal */ exports.Create = chevrotain_1.createToken({ name: "Create", pattern: /create(s)?/i });
|
||||||
/** @internal */ exports.Called = chevrotain_1.createToken({ name: "Called", pattern: /name(d)?|call(ed)?/i });
|
/** @internal */ exports.Called = chevrotain_1.createToken({ name: "Called", pattern: /named|called/i });
|
||||||
/** @internal */ exports.Repeat = chevrotain_1.createToken({ name: "Repeat", pattern: /repeat(s|ing)?/i });
|
/** @internal */ exports.Repeat = chevrotain_1.createToken({ name: "Repeat", pattern: /repeat(s|ing)?/i });
|
||||||
/** @internal */ exports.Newline = chevrotain_1.createToken({ name: "Newline", pattern: /(new line|newline)/i });
|
/** @internal */ exports.Newline = chevrotain_1.createToken({ name: "Newline", pattern: /(new line|newline)/i });
|
||||||
/** @internal */ exports.CarriageReturn = chevrotain_1.createToken({ name: "CarriageReturn", pattern: /carriage return/i });
|
/** @internal */ exports.CarriageReturn = chevrotain_1.createToken({ name: "CarriageReturn", pattern: /carriage return/i });
|
||||||
/** @internal */ exports.CaseInsensitive = chevrotain_1.createToken({ name: "CaseInsensitive", pattern: /case insensitive/i });
|
/** @internal */ exports.CaseInsensitive = chevrotain_1.createToken({ name: "CaseInsensitive", pattern: /case insensitive/i });
|
||||||
/** @internal */ exports.CaseSensitive = chevrotain_1.createToken({ name: "CaseSensitive", pattern: /case sensitive/i });
|
/** @internal */ exports.CaseSensitive = chevrotain_1.createToken({ name: "CaseSensitive", pattern: /case sensitive/i });
|
||||||
/** @internal */ exports.OrMore = chevrotain_1.createToken({ name: "OrMore", pattern: /\+|or more/i });
|
/** @internal */ exports.OrMore = chevrotain_1.createToken({ name: "OrMore", pattern: /\+|or more/i });
|
||||||
/*
|
/** @internal */ exports.Call = chevrotain_1.createToken({ name: "Call", pattern: /call|invoke|execute|(re ?)?run/i });
|
||||||
//Not being used currently
|
/** @internal */ exports.The = chevrotain_1.createToken({ name: "The", pattern: /the/i });
|
||||||
export const Of = createToken({name: "Of", pattern: /of/i});
|
/** @internal */ exports.If = chevrotain_1.createToken({ name: "If", pattern: /if/i });
|
||||||
export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
|
/** @internal */ exports.Else = chevrotain_1.createToken({ name: "Else", pattern: /else|otherwise/i });
|
||||||
export const As = createToken({name: "As", pattern: /as/i});
|
|
||||||
export const If = createToken({name: "If", pattern: /if/i});
|
|
||||||
export const Start = createToken({name: "Start", pattern: /start(s) with?/i});
|
|
||||||
export const Ends = createToken({name: "Ends", pattern: /end(s)? with/i});
|
|
||||||
export const Else = createToken({name: "Else", pattern: /(other wise|otherwise|else)/i});
|
|
||||||
export const Unless = createToken({name: "Unless", pattern: /unless/i});
|
|
||||||
export const While = createToken({name: "While", pattern: /while/i});
|
|
||||||
export const More = createToken({name: "More", pattern: /more/i});
|
|
||||||
export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
|
|
||||||
export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
|
|
||||||
export const None = createToken({name: "None", pattern: /none/i});
|
|
||||||
export const Neither = createToken({name: "Neither", pattern: /neither/i});
|
|
||||||
export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
|
|
||||||
export const By = createToken({name: "By", pattern: /by/i});
|
|
||||||
*/
|
|
||||||
/** @internal */ exports.EndOfLine = chevrotain_1.createToken({ name: "EOL", pattern: /\n/ });
|
/** @internal */ exports.EndOfLine = chevrotain_1.createToken({ name: "EOL", pattern: /\n/ });
|
||||||
/** @internal */ exports.WS = chevrotain_1.createToken({ name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [" ", "\r"], group: chevrotain_1.Lexer.SKIPPED });
|
/** @internal */ exports.WS = chevrotain_1.createToken({ name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [" ", "\r"], group: chevrotain_1.Lexer.SKIPPED });
|
||||||
/** @internal */ exports.SingleLineComment = chevrotain_1.createToken({ name: "SingleLineComment", pattern: /(#|\/\/).*/, group: chevrotain_1.Lexer.SKIPPED });
|
/** @internal */ exports.SingleLineComment = chevrotain_1.createToken({ name: "SingleLineComment", pattern: /(#|\/\/).*/, group: chevrotain_1.Lexer.SKIPPED });
|
||||||
@ -120,22 +106,11 @@ exports.AllTokens = [
|
|||||||
exports.Whitespace,
|
exports.Whitespace,
|
||||||
exports.Number,
|
exports.Number,
|
||||||
exports.Unicode,
|
exports.Unicode,
|
||||||
/*
|
exports.Called,
|
||||||
Of,
|
exports.Call,
|
||||||
As,
|
exports.If,
|
||||||
If,
|
exports.Else,
|
||||||
Start,
|
exports.The,
|
||||||
Ends,
|
|
||||||
Else,
|
|
||||||
Unless,
|
|
||||||
While,
|
|
||||||
More,
|
|
||||||
Nothing,
|
|
||||||
By,
|
|
||||||
The,
|
|
||||||
None,
|
|
||||||
Neither,
|
|
||||||
*/
|
|
||||||
exports.Using,
|
exports.Using,
|
||||||
exports.Global,
|
exports.Global,
|
||||||
exports.Multiline,
|
exports.Multiline,
|
||||||
@ -151,7 +126,6 @@ exports.AllTokens = [
|
|||||||
exports.Exclusive,
|
exports.Exclusive,
|
||||||
exports.From,
|
exports.From,
|
||||||
exports.Create,
|
exports.Create,
|
||||||
exports.Called,
|
|
||||||
exports.Repeat,
|
exports.Repeat,
|
||||||
exports.Newline,
|
exports.Newline,
|
||||||
exports.CarriageReturn,
|
exports.CarriageReturn,
|
||||||
|
3
lib/utilities.d.ts
vendored
3
lib/utilities.d.ts
vendored
@ -130,6 +130,7 @@ export declare class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The lexing error
|
* @param error The lexing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromLexError(error: ILexingError): CommonError;
|
static fromLexError(error: ILexingError): CommonError;
|
||||||
/**
|
/**
|
||||||
@ -137,6 +138,7 @@ export declare class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The parsing error
|
* @param error The parsing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromParseError(error: IRecognitionException): CommonError;
|
static fromParseError(error: IRecognitionException): CommonError;
|
||||||
/**
|
/**
|
||||||
@ -144,6 +146,7 @@ export declare class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The semantic error
|
* @param error The semantic error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromSemanticError(error: ISemanticError): CommonError;
|
static fromSemanticError(error: ISemanticError): CommonError;
|
||||||
/**
|
/**
|
||||||
|
@ -181,6 +181,7 @@ class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The lexing error
|
* @param error The lexing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromLexError(error) {
|
static fromLexError(error) {
|
||||||
// not really fond of --> and <--
|
// not really fond of --> and <--
|
||||||
@ -192,6 +193,7 @@ class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The parsing error
|
* @param error The parsing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromParseError(error) {
|
static fromParseError(error) {
|
||||||
var _a, _b, _c;
|
var _a, _b, _c;
|
||||||
@ -204,6 +206,7 @@ class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The semantic error
|
* @param error The semantic error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromSemanticError(error) {
|
static fromSemanticError(error) {
|
||||||
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
||||||
|
100
package-lock.json
generated
100
package-lock.json
generated
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "human2regex",
|
"name": "human2regex",
|
||||||
"version": "1.0.2",
|
"version": "1.1.0",
|
||||||
"lockfileVersion": 1,
|
"lockfileVersion": 1,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
@ -1607,13 +1607,13 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"@typescript-eslint/eslint-plugin": {
|
"@typescript-eslint/eslint-plugin": {
|
||||||
"version": "4.7.0",
|
"version": "4.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.7.0.tgz",
|
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.8.1.tgz",
|
||||||
"integrity": "sha512-li9aiSVBBd7kU5VlQlT1AqP0uWGDK6JYKUQ9cVDnOg34VNnd9t4jr0Yqc/bKxJr/tDCPDaB4KzoSFN9fgVxe/Q==",
|
"integrity": "sha512-d7LeQ7dbUrIv5YVFNzGgaW3IQKMmnmKFneRWagRlGYOSfLJVaRbj/FrBNOBC1a3tVO+TgNq1GbHvRtg1kwL0FQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@typescript-eslint/experimental-utils": "4.7.0",
|
"@typescript-eslint/experimental-utils": "4.8.1",
|
||||||
"@typescript-eslint/scope-manager": "4.7.0",
|
"@typescript-eslint/scope-manager": "4.8.1",
|
||||||
"debug": "^4.1.1",
|
"debug": "^4.1.1",
|
||||||
"functional-red-black-tree": "^1.0.1",
|
"functional-red-black-tree": "^1.0.1",
|
||||||
"regexpp": "^3.0.0",
|
"regexpp": "^3.0.0",
|
||||||
@ -1622,55 +1622,55 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"@typescript-eslint/experimental-utils": {
|
"@typescript-eslint/experimental-utils": {
|
||||||
"version": "4.7.0",
|
"version": "4.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/experimental-utils/-/experimental-utils-4.7.0.tgz",
|
"resolved": "https://registry.npmjs.org/@typescript-eslint/experimental-utils/-/experimental-utils-4.8.1.tgz",
|
||||||
"integrity": "sha512-cymzovXAiD4EF+YoHAB5Oh02MpnXjvyaOb+v+BdpY7lsJXZQN34oIETeUwVT2XfV9rSNpXaIcknDLfupO/tUoA==",
|
"integrity": "sha512-WigyLn144R3+lGATXW4nNcDJ9JlTkG8YdBWHkDlN0lC3gUGtDi7Pe3h5GPvFKMcRz8KbZpm9FJV9NTW8CpRHpg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@types/json-schema": "^7.0.3",
|
"@types/json-schema": "^7.0.3",
|
||||||
"@typescript-eslint/scope-manager": "4.7.0",
|
"@typescript-eslint/scope-manager": "4.8.1",
|
||||||
"@typescript-eslint/types": "4.7.0",
|
"@typescript-eslint/types": "4.8.1",
|
||||||
"@typescript-eslint/typescript-estree": "4.7.0",
|
"@typescript-eslint/typescript-estree": "4.8.1",
|
||||||
"eslint-scope": "^5.0.0",
|
"eslint-scope": "^5.0.0",
|
||||||
"eslint-utils": "^2.0.0"
|
"eslint-utils": "^2.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"@typescript-eslint/parser": {
|
"@typescript-eslint/parser": {
|
||||||
"version": "4.7.0",
|
"version": "4.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-4.7.0.tgz",
|
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-4.8.1.tgz",
|
||||||
"integrity": "sha512-+meGV8bMP1sJHBI2AFq1GeTwofcGiur8LoIr6v+rEmD9knyCqDlrQcFHR0KDDfldHIFDU/enZ53fla6ReF4wRw==",
|
"integrity": "sha512-QND8XSVetATHK9y2Ltc/XBl5Ro7Y62YuZKnPEwnNPB8E379fDsvzJ1dMJ46fg/VOmk0hXhatc+GXs5MaXuL5Uw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@typescript-eslint/scope-manager": "4.7.0",
|
"@typescript-eslint/scope-manager": "4.8.1",
|
||||||
"@typescript-eslint/types": "4.7.0",
|
"@typescript-eslint/types": "4.8.1",
|
||||||
"@typescript-eslint/typescript-estree": "4.7.0",
|
"@typescript-eslint/typescript-estree": "4.8.1",
|
||||||
"debug": "^4.1.1"
|
"debug": "^4.1.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"@typescript-eslint/scope-manager": {
|
"@typescript-eslint/scope-manager": {
|
||||||
"version": "4.7.0",
|
"version": "4.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-4.7.0.tgz",
|
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-4.8.1.tgz",
|
||||||
"integrity": "sha512-ILITvqwDJYbcDCROj6+Ob0oCKNg3SH46iWcNcTIT9B5aiVssoTYkhKjxOMNzR1F7WSJkik4zmuqve5MdnA0DyA==",
|
"integrity": "sha512-r0iUOc41KFFbZdPAdCS4K1mXivnSZqXS5D9oW+iykQsRlTbQRfuFRSW20xKDdYiaCoH+SkSLeIF484g3kWzwOQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@typescript-eslint/types": "4.7.0",
|
"@typescript-eslint/types": "4.8.1",
|
||||||
"@typescript-eslint/visitor-keys": "4.7.0"
|
"@typescript-eslint/visitor-keys": "4.8.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"@typescript-eslint/types": {
|
"@typescript-eslint/types": {
|
||||||
"version": "4.7.0",
|
"version": "4.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-4.7.0.tgz",
|
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-4.8.1.tgz",
|
||||||
"integrity": "sha512-uLszFe0wExJc+I7q0Z/+BnP7wao/kzX0hB5vJn4LIgrfrMLgnB2UXoReV19lkJQS1a1mHWGGODSxnBx6JQC3Sg==",
|
"integrity": "sha512-ave2a18x2Y25q5K05K/U3JQIe2Av4+TNi/2YuzyaXLAsDx6UZkz1boZ7nR/N6Wwae2PpudTZmHFXqu7faXfHmA==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"@typescript-eslint/typescript-estree": {
|
"@typescript-eslint/typescript-estree": {
|
||||||
"version": "4.7.0",
|
"version": "4.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-4.7.0.tgz",
|
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-4.8.1.tgz",
|
||||||
"integrity": "sha512-5XZRQznD1MfUmxu1t8/j2Af4OxbA7EFU2rbo0No7meb46eHgGkSieFdfV6omiC/DGIBhH9H9gXn7okBbVOm8jw==",
|
"integrity": "sha512-bJ6Fn/6tW2g7WIkCWh3QRlaSU7CdUUK52shx36/J7T5oTQzANvi6raoTsbwGM11+7eBbeem8hCCKbyvAc0X3sQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@typescript-eslint/types": "4.7.0",
|
"@typescript-eslint/types": "4.8.1",
|
||||||
"@typescript-eslint/visitor-keys": "4.7.0",
|
"@typescript-eslint/visitor-keys": "4.8.1",
|
||||||
"debug": "^4.1.1",
|
"debug": "^4.1.1",
|
||||||
"globby": "^11.0.1",
|
"globby": "^11.0.1",
|
||||||
"is-glob": "^4.0.1",
|
"is-glob": "^4.0.1",
|
||||||
@ -1680,12 +1680,12 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"@typescript-eslint/visitor-keys": {
|
"@typescript-eslint/visitor-keys": {
|
||||||
"version": "4.7.0",
|
"version": "4.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-4.7.0.tgz",
|
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-4.8.1.tgz",
|
||||||
"integrity": "sha512-aDJDWuCRsf1lXOtignlfiPODkzSxxop7D0rZ91L6ZuMlcMCSh0YyK+gAfo5zN/ih6WxMwhoXgJWC3cWQdaKC+A==",
|
"integrity": "sha512-3nrwXFdEYALQh/zW8rFwP4QltqsanCDz4CwWMPiIZmwlk9GlvBeueEIbq05SEq4ganqM0g9nh02xXgv5XI3PeQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@typescript-eslint/types": "4.7.0",
|
"@typescript-eslint/types": "4.8.1",
|
||||||
"eslint-visitor-keys": "^2.0.0"
|
"eslint-visitor-keys": "^2.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -2917,9 +2917,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"codemirror": {
|
"codemirror": {
|
||||||
"version": "5.58.2",
|
"version": "5.58.3",
|
||||||
"resolved": "https://registry.npmjs.org/codemirror/-/codemirror-5.58.2.tgz",
|
"resolved": "https://registry.npmjs.org/codemirror/-/codemirror-5.58.3.tgz",
|
||||||
"integrity": "sha512-K/hOh24cCwRutd1Mk3uLtjWzNISOkm4fvXiMO7LucCrqbh6aJDdtqUziim3MZUI6wOY0rvY1SlL1Ork01uMy6w=="
|
"integrity": "sha512-KBhB+juiyOOgn0AqtRmWyAT3yoElkuvWTI6hsHa9E6GQrl6bk/fdAYcvuqW1/upO9T9rtEtapWdw4XYcNiVDEA=="
|
||||||
},
|
},
|
||||||
"collect-v8-coverage": {
|
"collect-v8-coverage": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
@ -3087,9 +3087,9 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"copy-webpack-plugin": {
|
"copy-webpack-plugin": {
|
||||||
"version": "6.3.0",
|
"version": "6.3.2",
|
||||||
"resolved": "https://registry.npmjs.org/copy-webpack-plugin/-/copy-webpack-plugin-6.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/copy-webpack-plugin/-/copy-webpack-plugin-6.3.2.tgz",
|
||||||
"integrity": "sha512-kQ2cGGQLO6Ov2fe7rEGVxObI17dPeFkv8bRGnUAGZehOcrrObyAR9yWYlFGlJsyWM4EeuC/ytQNQkXxjYotMzg==",
|
"integrity": "sha512-MgJ1uouLIbDg4ST1GzqrGQyKoXY5iPqi6fghFqarijam7FQcBa/r6Rg0VkoIuzx75Xq8iAMghyOueMkWUQ5OaA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"cacache": "^15.0.5",
|
"cacache": "^15.0.5",
|
||||||
@ -3977,9 +3977,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"eslint": {
|
"eslint": {
|
||||||
"version": "7.13.0",
|
"version": "7.14.0",
|
||||||
"resolved": "https://registry.npmjs.org/eslint/-/eslint-7.13.0.tgz",
|
"resolved": "https://registry.npmjs.org/eslint/-/eslint-7.14.0.tgz",
|
||||||
"integrity": "sha512-uCORMuOO8tUzJmsdRtrvcGq5qposf7Rw0LwkTJkoDbOycVQtQjmnhZSuLQnozLE4TmAzlMVV45eCHmQ1OpDKUQ==",
|
"integrity": "sha512-5YubdnPXrlrYAFCKybPuHIAH++PINe1pmKNc5wQRB9HSbqIK1ywAnntE3Wwua4giKu0bjligf1gLF6qxMGOYRA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@babel/code-frame": "^7.0.0",
|
"@babel/code-frame": "^7.0.0",
|
||||||
@ -5425,9 +5425,9 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"ini": {
|
"ini": {
|
||||||
"version": "1.3.5",
|
"version": "1.3.8",
|
||||||
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.5.tgz",
|
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
|
||||||
"integrity": "sha512-RZY5huIKCMRWDUqZlEi72f/lmXKMvuszcMBduliQ3nnWbx9X/ZBQO7DijMEYS9EhHBb2qacRUMtC7svLwe0lcw==",
|
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"interpret": {
|
"interpret": {
|
||||||
@ -11946,9 +11946,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"typescript": {
|
"typescript": {
|
||||||
"version": "4.0.5",
|
"version": "4.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.0.5.tgz",
|
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.1.2.tgz",
|
||||||
"integrity": "sha512-ywmr/VrTVCmNTJ6iV2LwIrfG1P+lv6luD8sUJs+2eI9NLGigaN+nUQc13iHqisq7bra9lnmUSYqbJvegraBOPQ==",
|
"integrity": "sha512-thGloWsGH3SOxv1SoY7QojKi0tc+8FnOmiarEGMbd/lar7QOEd3hvlx3Fp5y6FlDUGl9L+pd4n2e+oToGMmhRQ==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"uglify-js": {
|
"uglify-js": {
|
||||||
|
14
package.json
14
package.json
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "human2regex",
|
"name": "human2regex",
|
||||||
"version": "1.0.2",
|
"version": "1.1.0",
|
||||||
"description": "Humanized Regular Expressions",
|
"description": "Humanized Regular Expressions",
|
||||||
"main": "./lib/index.js",
|
"main": "./lib/index.js",
|
||||||
"typings": "./lib/index.d.ts",
|
"typings": "./lib/index.d.ts",
|
||||||
@ -9,13 +9,13 @@
|
|||||||
"@types/html-minifier": "^3.5.3",
|
"@types/html-minifier": "^3.5.3",
|
||||||
"@types/jest": "^26.0.15",
|
"@types/jest": "^26.0.15",
|
||||||
"@types/mustache": "^4.0.1",
|
"@types/mustache": "^4.0.1",
|
||||||
"@typescript-eslint/eslint-plugin": "^4.7.0",
|
"@typescript-eslint/eslint-plugin": "^4.8.1",
|
||||||
"@typescript-eslint/parser": "^4.7.0",
|
"@typescript-eslint/parser": "^4.8.1",
|
||||||
"before-build-webpack": "^0.2.9",
|
"before-build-webpack": "^0.2.9",
|
||||||
"codecov": "^3.8.1",
|
"codecov": "^3.8.1",
|
||||||
"copy-webpack-plugin": "^6.3.0",
|
"copy-webpack-plugin": "^6.3.2",
|
||||||
"css-loader": "^4.3.0",
|
"css-loader": "^4.3.0",
|
||||||
"eslint": "^7.13.0",
|
"eslint": "^7.14.0",
|
||||||
"glob": "^7.1.6",
|
"glob": "^7.1.6",
|
||||||
"html-minifier": "^4.0.0",
|
"html-minifier": "^4.0.0",
|
||||||
"jest": "^26.6.3",
|
"jest": "^26.6.3",
|
||||||
@ -26,7 +26,7 @@
|
|||||||
"ts-jest": "^26.4.4",
|
"ts-jest": "^26.4.4",
|
||||||
"ts-loader": "^8.0.11",
|
"ts-loader": "^8.0.11",
|
||||||
"ts-node": "^9.0.0",
|
"ts-node": "^9.0.0",
|
||||||
"typescript": "^4.0.5",
|
"typescript": "^4.1.2",
|
||||||
"webpack": "^4.44.2",
|
"webpack": "^4.44.2",
|
||||||
"webpack-cli": "^3.3.12"
|
"webpack-cli": "^3.3.12"
|
||||||
},
|
},
|
||||||
@ -46,7 +46,7 @@
|
|||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"chevrotain": "^7.0.3",
|
"chevrotain": "^7.0.3",
|
||||||
"codemirror": "^5.58.2"
|
"codemirror": "^5.58.3"
|
||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
|
@ -336,7 +336,13 @@ match "World"
|
|||||||
<h3 id="tut-final">Putting it all together</h3>
|
<h3 id="tut-final">Putting it all together</h3>
|
||||||
<p>Grouping, repetition, and matching are the 3 primary elements that make up H2R. They can be combined in any way to generate a regular expression. See the <a href="index.html">main page</a> for an example that combines all above to parse a URL.</p>
|
<p>Grouping, repetition, and matching are the 3 primary elements that make up H2R. They can be combined in any way to generate a regular expression. See the <a href="index.html">main page</a> for an example that combines all above to parse a URL.</p>
|
||||||
|
|
||||||
<h3>Miscellaneous features</h3>
|
<h3>Advanced features</h3>
|
||||||
|
|
||||||
|
<p class="font-weight-bold" id="tut-back">Backreferences</p>
|
||||||
|
<p>TODO</p>
|
||||||
|
|
||||||
|
<p class="font-weight-bold" id="tut-if">If statements</p>
|
||||||
|
<p>TODO</p>
|
||||||
|
|
||||||
<p class="font-weight-bold" id="tut-unicode">Unicode character properties</p>
|
<p class="font-weight-bold" id="tut-unicode">Unicode character properties</p>
|
||||||
<p>You can match specific unicode sequences using <code class="cm-s-idea">"\uXXXX"
|
<p>You can match specific unicode sequences using <code class="cm-s-idea">"\uXXXX"
|
||||||
|
499
src/generator.ts
499
src/generator.ts
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
|
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
|
||||||
import { IToken } from "chevrotain";
|
import { IToken } from "chevrotain";
|
||||||
|
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "./generator_helper";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of regular expression dialects we support
|
* List of regular expression dialects we support
|
||||||
@ -63,31 +64,54 @@ const unicode_script_codes = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The base concrete syntax tree class
|
* Context for validation
|
||||||
*
|
*
|
||||||
|
* @remarks Currently only used to validate groups
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export abstract class H2RCST {
|
export class GeneratorContext {
|
||||||
|
public groups: { [ key: string ]: { startLine: number, startColumn: number, length: number } } = {};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for H2RCST
|
* Checks to see if we already have a group defined
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param identifier the group name
|
||||||
* @internal
|
* @returns true if the group name already exists
|
||||||
*/
|
*/
|
||||||
constructor(public tokens: IToken[]) {
|
public hasGroup(identifier: string): boolean {
|
||||||
/* empty */
|
return Object.prototype.hasOwnProperty.call(this.groups, identifier);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds the identifier to the group list
|
||||||
|
*
|
||||||
|
* @param identifier the group name
|
||||||
|
*/
|
||||||
|
public addGroup(identifier: string, tokens: IToken[]): void {
|
||||||
|
const f = first(tokens);
|
||||||
|
const l = last(tokens);
|
||||||
|
|
||||||
|
this.groups[identifier] = {
|
||||||
|
startLine: f.startLine ?? NaN,
|
||||||
|
startColumn: f.startColumn ?? NaN,
|
||||||
|
length: (l.endOffset ?? l.startOffset) - f.startOffset,
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Generates {
|
||||||
/**
|
/**
|
||||||
* Validate that this is both valid and can be generated in the specified language
|
* Validate that this is both valid and can be generated in the specified language
|
||||||
*
|
*
|
||||||
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
||||||
*
|
*
|
||||||
* @param language the regex dialect we're validating
|
* @param language the regex dialect we're validating
|
||||||
|
* @param context the generator context
|
||||||
* @returns A list of errors
|
* @returns A list of errors
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
public abstract validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate a regular expression fragment based on this syntax tree
|
* Generate a regular expression fragment based on this syntax tree
|
||||||
@ -98,6 +122,26 @@ export abstract class H2RCST {
|
|||||||
* @returns a regular expression fragment
|
* @returns a regular expression fragment
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The base concrete syntax tree class
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export abstract class H2RCST implements Generates {
|
||||||
|
/**
|
||||||
|
* Constructor for H2RCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
constructor(public tokens: IToken[]) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
public abstract toRegex(language: RegexDialect): string;
|
public abstract toRegex(language: RegexDialect): string;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -186,7 +230,7 @@ export class MatchSubStatementValue {
|
|||||||
*
|
*
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export class MatchStatementValue {
|
export class MatchStatementValue implements Generates {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for MatchStatementValue
|
* Constructor for MatchStatementValue
|
||||||
@ -198,6 +242,21 @@ export class MatchStatementValue {
|
|||||||
constructor(public optional: boolean, public statement: MatchSubStatementCST) {
|
constructor(public optional: boolean, public statement: MatchSubStatementCST) {
|
||||||
/* empty */
|
/* empty */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
|
return this.statement.validate(language, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
public toRegex(language: RegexDialect): string {
|
||||||
|
let match_stmt = this.statement.toRegex(language);
|
||||||
|
|
||||||
|
// need to group if optional and ungrouped
|
||||||
|
if (this.optional) {
|
||||||
|
match_stmt = groupIfRequired(match_stmt) + "?";
|
||||||
|
}
|
||||||
|
|
||||||
|
return match_stmt;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -227,11 +286,11 @@ export class MatchSubStatementCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
append(errors, this.count.validate(language));
|
append(errors, this.count.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const value of this.values) {
|
for (const value of this.values) {
|
||||||
@ -353,56 +412,16 @@ export class MatchSubStatementCST extends H2RCST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let ret = "";
|
let ret = minimizeMatchString(matches);
|
||||||
|
|
||||||
let require_grouping = false;
|
|
||||||
let dont_clobber_plus = false;
|
|
||||||
|
|
||||||
if (matches.length === 1) {
|
|
||||||
ret = first(matches);
|
|
||||||
if (ret.endsWith("+")) {
|
|
||||||
dont_clobber_plus = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
ret = minimizeMatchString(matches);
|
|
||||||
|
|
||||||
if (ret.length > 1 &&
|
|
||||||
(!ret.startsWith("(") || !ret.endsWith("["))) {
|
|
||||||
require_grouping = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
if (dont_clobber_plus) {
|
if (matches.length === 1) {
|
||||||
const clobber = this.count.toRegex(language);
|
// we don't group if there's only 1 element
|
||||||
|
// but we need to make sure we don't add an additional + or *
|
||||||
// + can be ignored as well as a count as long as that count is > 0
|
ret = dontClobberRepetition(ret, this.count.toRegex(language));
|
||||||
switch (clobber) {
|
|
||||||
case "*":
|
|
||||||
case "?":
|
|
||||||
ret = "(?:" + ret + ")" + clobber;
|
|
||||||
break;
|
|
||||||
case "+":
|
|
||||||
// ignore
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
if (clobber.startsWith("{0")) {
|
|
||||||
ret = "(?:" + ret + ")" + clobber;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// remove + and replace with count
|
|
||||||
ret.substring(0, ret.length - 1) + clobber;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (require_grouping) {
|
ret = groupIfRequired(ret) + this.count.toRegex(language);
|
||||||
ret = "(?:" + ret + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
ret += this.count.toRegex(language);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -427,8 +446,9 @@ export class UsingStatementCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
unusedParameter(language, "Using Statement does not change based on language");
|
unusedParameter(language, "Count does not need checking");
|
||||||
|
unusedParameter(context, "Context is not needed");
|
||||||
|
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
let flag = this.flags[0];
|
let flag = this.flags[0];
|
||||||
@ -490,15 +510,13 @@ export class CountSubStatementCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
unusedParameter(language, "Count does not need checking");
|
unusedParameter(language, "Count does not need checking");
|
||||||
|
unusedParameter(context, "Context is not needed");
|
||||||
|
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
if (this.from < 0) {
|
if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
|
||||||
errors.push(this.error("Value cannot be negative"));
|
|
||||||
}
|
|
||||||
else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
|
|
||||||
errors.push(this.error("Values must be in range of eachother"));
|
errors.push(this.error("Values must be in range of eachother"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -548,49 +566,27 @@ export class MatchStatementCST extends StatementCST {
|
|||||||
* Constructor for MatchStatementCST
|
* Constructor for MatchStatementCST
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
* @param matches
|
* @param matches the list of matches
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], private completely_optional: boolean, private matches: MatchStatementValue[]) {
|
constructor(tokens: IToken[], private completely_optional: boolean, private matches: MatchStatementValue[]) {
|
||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
for (const match of this.matches) {
|
for (const match of this.matches) {
|
||||||
append(errors, match.statement.validate(language));
|
append(errors, match.statement.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
public toRegex(language: RegexDialect): string {
|
public toRegex(language: RegexDialect): string {
|
||||||
let final_matches = this.matches.map((x) => {
|
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
|
||||||
let match_stmt = x.statement.toRegex(language);
|
|
||||||
|
|
||||||
// need to group if optional and ungrouped
|
|
||||||
if (x.optional) {
|
|
||||||
if (!isSingleRegexCharacter(match_stmt)) {
|
|
||||||
// don't re-group a group
|
|
||||||
if (match_stmt[0] !== "(" && match_stmt[match_stmt.length-1] !== ")") {
|
|
||||||
match_stmt = "(?:" + match_stmt + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match_stmt += "?";
|
|
||||||
}
|
|
||||||
|
|
||||||
return match_stmt;
|
|
||||||
}).join("");
|
|
||||||
|
|
||||||
if (this.completely_optional) {
|
if (this.completely_optional) {
|
||||||
if (!isSingleRegexCharacter(final_matches)) {
|
final_matches = groupIfRequired(final_matches) + "?";
|
||||||
// don't re-group a group
|
|
||||||
if (final_matches[0] !== "(" && final_matches[final_matches.length-1] !== ")") {
|
|
||||||
final_matches = "(?:" + final_matches + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final_matches += "?";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return final_matches;
|
return final_matches;
|
||||||
@ -616,22 +612,22 @@ export class RepeatStatementCST extends StatementCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
if (this.count !== null) {
|
if (this.count !== null) {
|
||||||
append(errors, this.count.validate(language));
|
append(errors, this.count.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
append(errors, statement.validate(language));
|
append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
public toRegex(language: RegexDialect): string {
|
public toRegex(language: RegexDialect): string {
|
||||||
let str = "(?:" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
|
let str = groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
str += this.count.toRegex(language);
|
str += this.count.toRegex(language);
|
||||||
@ -659,7 +655,7 @@ export class RepeatStatementCST extends StatementCST {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export class GroupStatementCST extends StatementCST {
|
export class GroupStatementCST extends StatementCST {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for GroupStatementCST
|
* Constructor for GroupStatementCST
|
||||||
*
|
*
|
||||||
@ -673,16 +669,21 @@ export class GroupStatementCST extends StatementCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors : ISemanticError[] = [];
|
const errors : ISemanticError[] = [];
|
||||||
|
|
||||||
// All languages currently support named groups
|
if (this.name !== null) {
|
||||||
//if (false) {
|
if (context.hasGroup(this.name)) {
|
||||||
// errors.push(this.error("This language does not support named groups"));
|
const past_group = context.groups[this.name];
|
||||||
//}
|
errors.push(this.error(`Group with name "${this.name}" was already defined here: ${past_group.startLine}:${past_group.startLine}-${past_group.startLine}:${past_group.startLine+past_group.length}`));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
context.addGroup(this.name, this.tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
append(errors, statement.validate(language));
|
append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors;
|
return errors;
|
||||||
@ -711,6 +712,195 @@ export class GroupStatementCST extends StatementCST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for a Backreference statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export class BackrefStatementCST extends StatementCST {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for BackrefStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param optional is this backref optional
|
||||||
|
* @param count optional number of times to repeat
|
||||||
|
* @param name the group name to call
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], private optional: boolean, private count: CountSubStatementCST | null, private name: string) {
|
||||||
|
super(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
|
if (!context.hasGroup(this.name)) {
|
||||||
|
errors.push(this.error(`Cannot call group with name "${this.name}" as it was never previously defined`));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.count !== null) {
|
||||||
|
append(errors, this.count.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
public toRegex(language: RegexDialect): string {
|
||||||
|
let str = "";
|
||||||
|
|
||||||
|
switch (language) {
|
||||||
|
case RegexDialect.Python:
|
||||||
|
str = `(?P=${this.name})`;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case RegexDialect.DotNet:
|
||||||
|
case RegexDialect.Java:
|
||||||
|
str = `\\k<${this.name}>`;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
str = `\\g<${this.name}>`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.count) {
|
||||||
|
str += this.count.toRegex(language);
|
||||||
|
|
||||||
|
// group for optionality because count would be incorrect otherwise
|
||||||
|
if (this.optional) {
|
||||||
|
str = "(?:" + str + ")?";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (this.optional) {
|
||||||
|
str = "?";
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If Pattern statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export class IfPatternStatementCST extends StatementCST {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for IfPatternStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param matches list of matches to test against
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], private matches: MatchStatementValue[], private true_statements: StatementCST[], private false_statements: StatementCST[]) {
|
||||||
|
super(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
|
if (language === RegexDialect.Java || language === RegexDialect.JS) {
|
||||||
|
errors.push(this.error("This language does not support conditionals"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (language === RegexDialect.Python) {
|
||||||
|
errors.push(this.error("This language does not support pattern conditionals"));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const match of this.matches) {
|
||||||
|
append(errors, match.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const statement of this.true_statements) {
|
||||||
|
append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const statement of this.false_statements) {
|
||||||
|
append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
public toRegex(language: RegexDialect): string {
|
||||||
|
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
|
||||||
|
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
|
if (this.false_statements.length > 0) {
|
||||||
|
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
|
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return `(?(${if_stmt})${true_stmt})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If group Ident statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export class IfIdentStatementCST extends StatementCST {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for IfIdentStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param identifier the group identifier to check
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], private identifier: string, private true_statements: StatementCST[], private false_statements: StatementCST[]) {
|
||||||
|
super(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
|
if (language === RegexDialect.Java || language === RegexDialect.JS) {
|
||||||
|
errors.push(this.error("This language does not support conditionals"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!context.hasGroup(this.identifier)) {
|
||||||
|
errors.push(this.error(`Group with name "${this.identifier}" does not exist`));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const statement of this.true_statements) {
|
||||||
|
append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const statement of this.false_statements) {
|
||||||
|
append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
public toRegex(language: RegexDialect): string {
|
||||||
|
let if_stmt = this.identifier;
|
||||||
|
|
||||||
|
// be more clear with languages that support it
|
||||||
|
if (language === RegexDialect.Boost) {
|
||||||
|
if_stmt = "<" + if_stmt + ">";
|
||||||
|
}
|
||||||
|
|
||||||
|
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
|
if (this.false_statements.length > 0) {
|
||||||
|
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
|
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return `(?(${if_stmt})${true_stmt})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Concrete Syntax Tree for a regular expression
|
* Concrete Syntax Tree for a regular expression
|
||||||
*
|
*
|
||||||
@ -730,115 +920,20 @@ export class RegularExpressionCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors: ISemanticError[] = this.usings.validate(language);
|
const errors: ISemanticError[] = this.usings.validate(language, context);
|
||||||
|
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
append(errors, statement.validate(language));
|
append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
public toRegex(language: RegexDialect): string {
|
public toRegex(language: RegexDialect): string {
|
||||||
const modifiers = this.usings.toRegex(language);
|
const modifiers = this.usings.toRegex(language);
|
||||||
const regex = this.statements.map((x) => x.toRegex(language)).join("");
|
const regex = this.statements.map((x) => x.toRegex(language)).join("");
|
||||||
|
|
||||||
return modifiers.replace("{regex}", regex);
|
return modifiers.replace("{regex}", regex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array of matches
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
export function minimizeMatchString(arr: string[]): string {
|
|
||||||
return minMatchString(arr, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array
|
|
||||||
* @param depth must be 0 for initial call
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
function minMatchString(arr: string[], depth: number = 0): string {
|
|
||||||
// base case: arr is empty
|
|
||||||
if (arr.length === 0) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
|
||||||
if (arr.length === 1) {
|
|
||||||
return first(arr) + "?";
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove duplicates
|
|
||||||
arr = [ ...new Set(arr) ];
|
|
||||||
|
|
||||||
// base case: arr has 1 element (after duplicate removal means this is required)
|
|
||||||
if (arr.length === 1) {
|
|
||||||
return first(arr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// base case: arr is all single letters
|
|
||||||
if (arr.every(isSingleRegexCharacter)) {
|
|
||||||
return "[" + arr.join("") + "]";
|
|
||||||
}
|
|
||||||
|
|
||||||
// now the real magic begins
|
|
||||||
// You are not expected to understand this
|
|
||||||
|
|
||||||
let longest_begin_substring = first(arr);
|
|
||||||
let longest_end_substring = first(arr);
|
|
||||||
|
|
||||||
for (let i = 1; i < arr.length; i++) {
|
|
||||||
// reduce longest_substring to match everything
|
|
||||||
for (let j = 0; j < longest_begin_substring.length; j++) {
|
|
||||||
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
|
||||||
longest_begin_substring = longest_begin_substring.substr(0, j);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (let j = 0; j < longest_end_substring.length; j++) {
|
|
||||||
if (arr[i].length-j < 0 || longest_end_substring[longest_end_substring.length-j-1] !== arr[i][arr[i].length-j-1]) {
|
|
||||||
longest_end_substring = longest_end_substring.substr(longest_end_substring.length-j, longest_end_substring.length);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// No matches whatsoever
|
|
||||||
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
|
||||||
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
|
||||||
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
||||||
if (depth > 0) {
|
|
||||||
return "(?:" + arr.join("|") + ")";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return arr.join("|");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// we have some matches
|
|
||||||
else {
|
|
||||||
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
|
||||||
const begin_pos = longest_begin_substring.length;
|
|
||||||
const end_pos = longest_end_substring.length;
|
|
||||||
|
|
||||||
const similar_matches: string[] = [];
|
|
||||||
for (const ele of arr) {
|
|
||||||
const match = ele.substring(begin_pos, ele.length-end_pos);
|
|
||||||
if (match.length !== 0) {
|
|
||||||
similar_matches.push(match);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
|
||||||
}
|
|
||||||
}
|
|
224
src/generator_helper.ts
Normal file
224
src/generator_helper.ts
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Includes helper functions for the Generator
|
||||||
|
* @packageDocumentation
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { first, isSingleRegexCharacter } from "./utilities";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array of matches
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export function minimizeMatchString(arr: string[]): string {
|
||||||
|
// don't process an array of length 1, otherwise you'll get the wrong result
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return first(arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return minMatchString(arr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array
|
||||||
|
* @param depth must be 0 for initial call
|
||||||
|
* @returns an optimized string
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function minMatchString(arr: string[], depth: number = 0): string {
|
||||||
|
// base case: arr is empty
|
||||||
|
if (arr.length === 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return first(arr) + "?";
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove duplicates
|
||||||
|
arr = [ ...new Set(arr) ];
|
||||||
|
|
||||||
|
// base case: arr has 1 element (after duplicate removal means this is required)
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return first(arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// base case: arr is all single letters
|
||||||
|
if (arr.every(isSingleRegexCharacter)) {
|
||||||
|
return "[" + arr.join("") + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
// now the real magic begins
|
||||||
|
// You are not expected to understand this
|
||||||
|
|
||||||
|
let longest_begin_substring = first(arr);
|
||||||
|
let longest_end_substring = first(arr);
|
||||||
|
|
||||||
|
for (let i = 1; i < arr.length; i++) {
|
||||||
|
// reduce longest_substring to match everything
|
||||||
|
for (let j = 0; j < longest_begin_substring.length; j++) {
|
||||||
|
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
||||||
|
longest_begin_substring = longest_begin_substring.substr(0, j);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (let j = 0; j < longest_end_substring.length; j++) {
|
||||||
|
if (arr[i].length-j < 0 || longest_end_substring[longest_end_substring.length-j-1] !== arr[i][arr[i].length-j-1]) {
|
||||||
|
longest_end_substring = longest_end_substring.substr(longest_end_substring.length-j, longest_end_substring.length);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No matches whatsoever
|
||||||
|
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
||||||
|
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
||||||
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
||||||
|
if (depth > 0) {
|
||||||
|
return "(?:" + arr.join("|") + ")";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return arr.join("|");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we have some matches
|
||||||
|
else {
|
||||||
|
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
||||||
|
const begin_pos = longest_begin_substring.length;
|
||||||
|
const end_pos = longest_end_substring.length;
|
||||||
|
|
||||||
|
const similar_matches: string[] = [];
|
||||||
|
for (const ele of arr) {
|
||||||
|
const match = ele.substring(begin_pos, ele.length-end_pos);
|
||||||
|
if (match.length !== 0) {
|
||||||
|
similar_matches.push(match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Groups a regex fragment if it needs to be grouped
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression to potentially group
|
||||||
|
* @returns a non-capturing group if there needs to be one
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export function groupIfRequired(fragment: string): string {
|
||||||
|
if (isSingleRegexCharacter(fragment)) {
|
||||||
|
return fragment;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fragment[0] === "(" && fragment[fragment.length-1] === ")") {
|
||||||
|
let bracket_count = 0;
|
||||||
|
|
||||||
|
for (let i = 1; i < fragment.length-2; i++) {
|
||||||
|
if (fragment[i] === "\\") {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if (fragment[i] === "(") {
|
||||||
|
bracket_count++;
|
||||||
|
}
|
||||||
|
else if (fragment[i] === ")") {
|
||||||
|
bracket_count--;
|
||||||
|
|
||||||
|
if (bracket_count === -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
else if (fragment[0] === "[" && fragment[fragment.length-1] === "]") {
|
||||||
|
let bracket_count = 0;
|
||||||
|
|
||||||
|
for (let i = 1; i < fragment.length-2; i++) {
|
||||||
|
if (fragment[i] === "\\") {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
//you'll never have a raw [ inside a []
|
||||||
|
//else if (fragment[i] === "[") {
|
||||||
|
// bracket_count++;
|
||||||
|
//}
|
||||||
|
else if (fragment[i] === "]") {
|
||||||
|
bracket_count--;
|
||||||
|
|
||||||
|
if (bracket_count === -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression
|
||||||
|
* @param repetition repetition that may clobber the fragment
|
||||||
|
*/
|
||||||
|
export function dontClobberRepetition(fragment: string, repetition: string): string {
|
||||||
|
// + can be ignored as well as a count as long as that count is > 0
|
||||||
|
|
||||||
|
if (fragment.endsWith("+")) {
|
||||||
|
switch (repetition) {
|
||||||
|
case "*":
|
||||||
|
// ignore: + is greater than *
|
||||||
|
break;
|
||||||
|
case "?":
|
||||||
|
// non-greedy qualifier
|
||||||
|
fragment += repetition;
|
||||||
|
break;
|
||||||
|
case "+":
|
||||||
|
// ignore: already +
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (repetition.startsWith("{0")) {
|
||||||
|
fragment = "(?:" + fragment + ")" + repetition;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// remove + and replace with count
|
||||||
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (fragment.endsWith("*")) {
|
||||||
|
switch (repetition) {
|
||||||
|
case "*":
|
||||||
|
// ignore: already +
|
||||||
|
break;
|
||||||
|
case "?":
|
||||||
|
// non-greedy qualifier
|
||||||
|
fragment += repetition;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// remove * and replace with count
|
||||||
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fragment += repetition;
|
||||||
|
}
|
||||||
|
|
||||||
|
return fragment;
|
||||||
|
}
|
101
src/parser.ts
101
src/parser.ts
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
|
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
|
||||||
import * as T from "./tokens";
|
import * as T from "./tokens";
|
||||||
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST, RegexDialect } from "./generator";
|
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST, RegexDialect, BackrefStatementCST, GeneratorContext, IfPatternStatementCST, IfIdentStatementCST } from "./generator";
|
||||||
import { first, usefulConditional, unusedParameter, CommonError } from "./utilities";
|
import { first, usefulConditional, unusedParameter, CommonError } from "./utilities";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -60,7 +60,7 @@ export class ParseResult {
|
|||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
public validate(language: RegexDialect): CommonError[] {
|
public validate(language: RegexDialect): CommonError[] {
|
||||||
return this.regexp_cst.validate(language).map(CommonError.fromSemanticError);
|
return this.regexp_cst.validate(language, new GeneratorContext()).map(CommonError.fromSemanticError);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -558,12 +558,107 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
|||||||
return new RepeatStatementCST(tokens, optional, count, statements);
|
return new RepeatStatementCST(tokens, optional, count, statements);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const BackrefStatement = $.RULE("BackrefStatement", () => {
|
||||||
|
const tokens: IToken[] = [];
|
||||||
|
let optional = false;
|
||||||
|
let count: CountSubStatementCST | null = null;
|
||||||
|
|
||||||
|
$.OPTION5(() => {
|
||||||
|
tokens.push($.CONSUME(T.Optional));
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
tokens.push($.CONSUME(T.Call));
|
||||||
|
|
||||||
|
$.OPTION6(() => count = $.SUBRULE(CountSubStatement));
|
||||||
|
|
||||||
|
$.OPTION7(() => {
|
||||||
|
$.OPTION(() => $.CONSUME(T.The));
|
||||||
|
$.CONSUME(T.Group);
|
||||||
|
$.OPTION2(() => $.CONSUME(T.Called));
|
||||||
|
});
|
||||||
|
|
||||||
|
const name = $.CONSUME(T.Identifier).image;
|
||||||
|
|
||||||
|
tokens.push($.CONSUME4(T.EndOfLine));
|
||||||
|
|
||||||
|
return new BackrefStatementCST(tokens, optional, count, name);
|
||||||
|
});
|
||||||
|
|
||||||
|
const IfStatement = $.RULE("IfStatement", () => {
|
||||||
|
const tokens: IToken[] = [];
|
||||||
|
const msv: MatchStatementValue[] = [];
|
||||||
|
let optional = false;
|
||||||
|
const true_statements: StatementCST[] = [];
|
||||||
|
const false_statements: StatementCST[] = [];
|
||||||
|
let name: string = "";
|
||||||
|
|
||||||
|
tokens.push($.CONSUME(T.If));
|
||||||
|
|
||||||
|
$.OR2([
|
||||||
|
{ALT: () => {
|
||||||
|
name = $.CONSUME(T.Identifier).image;
|
||||||
|
}},
|
||||||
|
{ALT: () => {
|
||||||
|
$.CONSUME(T.Match);
|
||||||
|
|
||||||
|
$.OPTION4(() => {
|
||||||
|
$.CONSUME3(T.Optional);
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
|
||||||
|
$.MANY(() => {
|
||||||
|
$.OR([
|
||||||
|
{ ALT: () => {
|
||||||
|
$.OPTION2(() => $.CONSUME2(T.And));
|
||||||
|
$.CONSUME(T.Then);
|
||||||
|
}},
|
||||||
|
{ ALT: () => $.CONSUME(T.And) },
|
||||||
|
]);
|
||||||
|
optional = false;
|
||||||
|
$.OPTION3(() => {
|
||||||
|
$.CONSUME2(T.Optional);
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
|
||||||
|
});
|
||||||
|
}}
|
||||||
|
]);
|
||||||
|
|
||||||
|
tokens.push($.CONSUME3(T.EndOfLine));
|
||||||
|
|
||||||
|
$.CONSUME2(T.Indent);
|
||||||
|
$.AT_LEAST_ONE2(() => {
|
||||||
|
true_statements.push($.SUBRULE(Statement));
|
||||||
|
});
|
||||||
|
$.CONSUME2(T.Outdent);
|
||||||
|
|
||||||
|
$.OPTION(() => {
|
||||||
|
$.CONSUME(T.Else);
|
||||||
|
$.CONSUME4(T.EndOfLine);
|
||||||
|
$.CONSUME3(T.Indent);
|
||||||
|
$.AT_LEAST_ONE3(() => {
|
||||||
|
false_statements.push($.SUBRULE2(Statement));
|
||||||
|
});
|
||||||
|
$.CONSUME3(T.Outdent);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (name === "") {
|
||||||
|
return new IfPatternStatementCST(tokens, msv, true_statements, false_statements);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return new IfIdentStatementCST(tokens, name, true_statements, false_statements);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// statement super class
|
// statement super class
|
||||||
const Statement = $.RULE("Statement", () => {
|
const Statement = $.RULE("Statement", () => {
|
||||||
return $.OR([
|
return $.OR([
|
||||||
{ ALT: () => $.SUBRULE(MatchStatement) },
|
{ ALT: () => $.SUBRULE(MatchStatement) },
|
||||||
{ ALT: () => $.SUBRULE(GroupStatement) },
|
{ ALT: () => $.SUBRULE(GroupStatement) },
|
||||||
{ ALT: () => $.SUBRULE(RepeatStatement) }
|
{ ALT: () => $.SUBRULE(RepeatStatement) },
|
||||||
|
{ ALT: () => $.SUBRULE(BackrefStatement) },
|
||||||
|
{ ALT: () => $.SUBRULE(IfStatement) }
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -53,34 +53,17 @@ import { createToken, Lexer } from "chevrotain";
|
|||||||
/** @internal */ export const From = createToken({name: "From", pattern: /from/i});
|
/** @internal */ export const From = createToken({name: "From", pattern: /from/i});
|
||||||
/** @internal */ export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i});
|
/** @internal */ export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i});
|
||||||
/** @internal */ export const Create = createToken({name: "Create", pattern: /create(s)?/i});
|
/** @internal */ export const Create = createToken({name: "Create", pattern: /create(s)?/i});
|
||||||
/** @internal */ export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i});
|
/** @internal */ export const Called = createToken({name: "Called", pattern: /named|called/i});
|
||||||
/** @internal */ export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
|
/** @internal */ export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
|
||||||
/** @internal */ export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
|
/** @internal */ export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
|
||||||
/** @internal */ export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
|
/** @internal */ export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
|
||||||
/** @internal */ export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
|
/** @internal */ export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
|
||||||
/** @internal */ export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
|
/** @internal */ export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
|
||||||
/** @internal */ export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i});
|
/** @internal */ export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i});
|
||||||
|
/** @internal */ export const Call = createToken({name: "Call", pattern: /call|invoke|execute|(re ?)?run/i });
|
||||||
/*
|
/** @internal */ export const The = createToken({name: "The", pattern: /the/i });
|
||||||
//Not being used currently
|
/** @internal */ export const If = createToken({name: "If", pattern: /if/i });
|
||||||
export const Of = createToken({name: "Of", pattern: /of/i});
|
/** @internal */ export const Else = createToken({name: "Else", pattern: /else|otherwise/i });
|
||||||
export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
|
|
||||||
export const As = createToken({name: "As", pattern: /as/i});
|
|
||||||
export const If = createToken({name: "If", pattern: /if/i});
|
|
||||||
export const Start = createToken({name: "Start", pattern: /start(s) with?/i});
|
|
||||||
export const Ends = createToken({name: "Ends", pattern: /end(s)? with/i});
|
|
||||||
export const Else = createToken({name: "Else", pattern: /(other wise|otherwise|else)/i});
|
|
||||||
export const Unless = createToken({name: "Unless", pattern: /unless/i});
|
|
||||||
export const While = createToken({name: "While", pattern: /while/i});
|
|
||||||
export const More = createToken({name: "More", pattern: /more/i});
|
|
||||||
export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
|
|
||||||
export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
|
|
||||||
export const None = createToken({name: "None", pattern: /none/i});
|
|
||||||
export const Neither = createToken({name: "Neither", pattern: /neither/i});
|
|
||||||
export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
|
|
||||||
export const By = createToken({name: "By", pattern: /by/i});
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/** @internal */ export const EndOfLine = createToken({name: "EOL", pattern: /\n/});
|
/** @internal */ export const EndOfLine = createToken({name: "EOL", pattern: /\n/});
|
||||||
/** @internal */ export const WS = createToken({name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [ " ", "\r" ], group: Lexer.SKIPPED});
|
/** @internal */ export const WS = createToken({name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [ " ", "\r" ], group: Lexer.SKIPPED});
|
||||||
@ -127,22 +110,11 @@ export const AllTokens = [
|
|||||||
Whitespace,
|
Whitespace,
|
||||||
Number,
|
Number,
|
||||||
Unicode,
|
Unicode,
|
||||||
/*
|
Called,
|
||||||
Of,
|
Call,
|
||||||
As,
|
|
||||||
If,
|
If,
|
||||||
Start,
|
|
||||||
Ends,
|
|
||||||
Else,
|
Else,
|
||||||
Unless,
|
|
||||||
While,
|
|
||||||
More,
|
|
||||||
Nothing,
|
|
||||||
By,
|
|
||||||
The,
|
The,
|
||||||
None,
|
|
||||||
Neither,
|
|
||||||
*/
|
|
||||||
Using,
|
Using,
|
||||||
Global,
|
Global,
|
||||||
Multiline,
|
Multiline,
|
||||||
@ -158,7 +130,6 @@ export const AllTokens = [
|
|||||||
Exclusive,
|
Exclusive,
|
||||||
From,
|
From,
|
||||||
Create,
|
Create,
|
||||||
Called,
|
|
||||||
Repeat,
|
Repeat,
|
||||||
Newline,
|
Newline,
|
||||||
CarriageReturn,
|
CarriageReturn,
|
||||||
|
@ -186,6 +186,7 @@ export class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The lexing error
|
* @param error The lexing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
public static fromLexError(error: ILexingError): CommonError {
|
public static fromLexError(error: ILexingError): CommonError {
|
||||||
// not really fond of --> and <--
|
// not really fond of --> and <--
|
||||||
@ -199,6 +200,7 @@ export class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The parsing error
|
* @param error The parsing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
public static fromParseError(error: IRecognitionException): CommonError {
|
public static fromParseError(error: IRecognitionException): CommonError {
|
||||||
// not really fond of --> and <--
|
// not really fond of --> and <--
|
||||||
@ -212,6 +214,7 @@ export class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The semantic error
|
* @param error The semantic error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
public static fromSemanticError(error: ISemanticError): CommonError {
|
public static fromSemanticError(error: ISemanticError): CommonError {
|
||||||
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
|
import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
|
||||||
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
|
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
|
||||||
import { RegexDialect, minimizeMatchString } from "../src/generator";
|
import { RegexDialect } from "../src/generator";
|
||||||
|
|
||||||
|
|
||||||
describe("Generator functionality", function() {
|
describe("Generator functionality", function() {
|
||||||
@ -67,6 +67,14 @@ describe("Generator functionality", function() {
|
|||||||
const toks5 = lexer.tokenize('match between 2 and 2 exclusive "hello"').tokens;
|
const toks5 = lexer.tokenize('match between 2 and 2 exclusive "hello"').tokens;
|
||||||
const reg5 = parser.parse(toks5);
|
const reg5 = parser.parse(toks5);
|
||||||
expect(reg5.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
expect(reg5.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const toks6 = lexer.tokenize('create a group called thing\n\tmatch "hi"\ncreate a group called thing\n\tmatch "hi"\n').tokens;
|
||||||
|
const reg6 = parser.parse(toks6);
|
||||||
|
expect(reg6.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const toks7 = lexer.tokenize("invoke thing").tokens;
|
||||||
|
const reg7 = parser.parse(toks7);
|
||||||
|
expect(reg7.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("handles ranges", function() {
|
it("handles ranges", function() {
|
||||||
@ -97,6 +105,12 @@ describe("Generator functionality", function() {
|
|||||||
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg2.toRegex(RegexDialect.JS)).toBe("/[a-zA-Z][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
|
expect(reg2.toRegex(RegexDialect.JS)).toBe("/[a-zA-Z][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
|
||||||
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/[[:alpha:]][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
|
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/[[:alpha:]][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
|
||||||
|
|
||||||
|
const toks3 = lexer.tokenize("match not letter, not integer, not decimal").tokens;
|
||||||
|
const reg3 = parser.parse(toks3);
|
||||||
|
expect(reg3.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg3.toRegex(RegexDialect.JS)).toBe("/[^a-zA-Z](?![+-]?\\d+)(?![+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+)))/");
|
||||||
|
expect(reg3.toRegex(RegexDialect.PCRE)).toBe("/[^[:alpha:]](?![+-]?\\d+)(?![+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+)))/");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("doesn't clobber repetition", function() {
|
it("doesn't clobber repetition", function() {
|
||||||
@ -115,23 +129,6 @@ describe("Generator functionality", function() {
|
|||||||
expect(reg1.toRegex(RegexDialect.JS)).toBe("/(?!hello){1,6}/");
|
expect(reg1.toRegex(RegexDialect.JS)).toBe("/(?!hello){1,6}/");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("can minimize matches", function() {
|
|
||||||
const test_cases = [
|
|
||||||
{ from: [ "abc", "abc" ], to: "abc" },
|
|
||||||
{ from: [ "a", "ab" ], to: "ab?" },
|
|
||||||
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
|
|
||||||
{ from: [ "ab", "cd" ], to: "ab|cd" },
|
|
||||||
{ from: [ "abc", "bc" ], to: "a?bc" },
|
|
||||||
{ from: [ "abc", "xb" ], to: "abc|xb" }
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const c of test_cases) {
|
|
||||||
const got = minimizeMatchString(c.from);
|
|
||||||
|
|
||||||
expect(got).toBe(c.to);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it("optimizes correctly", function() {
|
it("optimizes correctly", function() {
|
||||||
const toks0 = lexer.tokenize('match "a" or "b" or "b"').tokens;
|
const toks0 = lexer.tokenize('match "a" or "b" or "b"').tokens;
|
||||||
const reg0 = parser.parse(toks0);
|
const reg0 = parser.parse(toks0);
|
||||||
@ -157,6 +154,44 @@ describe("Generator functionality", function() {
|
|||||||
const reg4 = parser.parse(toks4);
|
const reg4 = parser.parse(toks4);
|
||||||
expect(reg4.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg4.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg4.toRegex(RegexDialect.JS)).toBe("/a(?:1x1|2x2|3x3)z/");
|
expect(reg4.toRegex(RegexDialect.JS)).toBe("/a(?:1x1|2x2|3x3)z/");
|
||||||
|
|
||||||
|
const toks5 = lexer.tokenize('match "a", maybe "b" or "c"').tokens;
|
||||||
|
const reg5 = parser.parse(toks5);
|
||||||
|
expect(reg5.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg5.toRegex(RegexDialect.JS)).toBe("/a[bc]?/");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("can generate backreferences", function() {
|
||||||
|
const toks0 = lexer.tokenize('create a group called thing\n\tmatch "Hello World"\ninvoke thing\noptionally call 3 times the group called thing').tokens;
|
||||||
|
const reg0 = parser.parse(toks0);
|
||||||
|
expect(reg0.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
|
||||||
|
expect(reg0.toRegex(RegexDialect.JS)).toBe("/(?<thing>Hello World)\\g<thing>(?:\\g<thing>{3})?/");
|
||||||
|
expect(reg0.toRegex(RegexDialect.PCRE)).toBe("/(?P<thing>Hello World)\\g<thing>(?:\\g<thing>{3})?/");
|
||||||
|
expect(reg0.toRegex(RegexDialect.Python)).toBe("/(?P<thing>Hello World)(?P=thing)(?:(?P=thing){3})?/");
|
||||||
|
expect(reg0.toRegex(RegexDialect.DotNet)).toBe("/(?<thing>Hello World)\\k<thing>(?:\\k<thing>{3})?/");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("can generate if statements", function() {
|
||||||
|
const toks0 = lexer.tokenize('if matches "a"\n\tmatch "b"\n').tokens;
|
||||||
|
const reg0 = parser.parse(toks0);
|
||||||
|
expect(reg0.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
expect(reg0.validate(RegexDialect.PCRE).length).toBe(0);
|
||||||
|
expect(reg0.toRegex(RegexDialect.PCRE)).toBe("/(?(a)b)/");
|
||||||
|
|
||||||
|
const toks1 = lexer.tokenize('if matches "alpha", maybe "b" or "f"\n\tmatch "c"\nelse\n\tif matches "d"\n\t\tmatch "e"\n\telse\n\t\tmatch "f"').tokens;
|
||||||
|
const reg1 = parser.parse(toks1);
|
||||||
|
expect(reg1.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
expect(reg1.validate(RegexDialect.Python).length).toBeGreaterThan(0);
|
||||||
|
expect(reg1.validate(RegexDialect.PCRE).length).toBe(0);
|
||||||
|
expect(reg1.toRegex(RegexDialect.PCRE)).toBe("/(?(alpha[bf]?)c|(?(d)e|f))/");
|
||||||
|
|
||||||
|
const toks2 = lexer.tokenize('create a group called thing\n\tmatch "a"\nif thing\n\tmatch "b"\nelse\n\tmatch "c"\n').tokens;
|
||||||
|
const reg2 = parser.parse(toks2);
|
||||||
|
expect(reg2.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
expect(reg2.validate(RegexDialect.PCRE).length).toBe(0);
|
||||||
|
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/(?P<thing>a)(?(thing)b|c)/");
|
||||||
|
expect(reg2.toRegex(RegexDialect.Boost)).toBe("/(?<thing>a)(?(<thing>)b|c)/");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("generate dialect specific regex", function() {
|
it("generate dialect specific regex", function() {
|
||||||
@ -187,7 +222,7 @@ describe("Generator functionality", function() {
|
|||||||
|
|
||||||
it("runs complex scripts", function() {
|
it("runs complex scripts", function() {
|
||||||
const str = `
|
const str = `
|
||||||
using global and multiline and exact matching
|
using global and multiline and exact matching and case insensitive matching
|
||||||
create an optional group called protocol
|
create an optional group called protocol
|
||||||
match "http"
|
match "http"
|
||||||
optionally match "s"
|
optionally match "s"
|
||||||
@ -222,6 +257,6 @@ create an optional group
|
|||||||
const toks = lexer.tokenize(str).tokens;
|
const toks = lexer.tokenize(str).tokens;
|
||||||
const reg = parser.parse(toks);
|
const reg = parser.parse(toks);
|
||||||
expect(reg.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gm");
|
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gmi");
|
||||||
});
|
});
|
||||||
});
|
});
|
63
tests/generator_helper.spec.ts
Normal file
63
tests/generator_helper.spec.ts
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
|
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "../src/generator_helper";
|
||||||
|
|
||||||
|
|
||||||
|
describe("Generator helper functionality", function() {
|
||||||
|
it("can minimize matches", function() {
|
||||||
|
const test_cases = [
|
||||||
|
{ from: [], to: "" },
|
||||||
|
{ from: [ "abc" ], to: "abc" },
|
||||||
|
{ from: [ "abc", "abc" ], to: "abc" },
|
||||||
|
{ from: [ "a", "ab" ], to: "ab?" },
|
||||||
|
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
|
||||||
|
{ from: [ "ab", "cd" ], to: "ab|cd" },
|
||||||
|
{ from: [ "abc", "bc" ], to: "a?bc" },
|
||||||
|
{ from: [ "abc", "xb" ], to: "abc|xb" }
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const c of test_cases) {
|
||||||
|
const got = minimizeMatchString(c.from);
|
||||||
|
|
||||||
|
expect(got).toBe(c.to);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("groups correctly", function() {
|
||||||
|
const test_cases = [
|
||||||
|
{ from: "(?P=test)", to: "(?P=test)" },
|
||||||
|
{ from: "[abc\\]]", to: "[abc\\]]" },
|
||||||
|
{ from: "abc", to: "(?:abc)" },
|
||||||
|
{ from: "(abc)|d", to: "(?:(abc)|d)" },
|
||||||
|
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
|
||||||
|
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const c of test_cases) {
|
||||||
|
const got = groupIfRequired(c.from);
|
||||||
|
|
||||||
|
expect(got).toBe(c.to);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("doesn't clobber the repetition", function() {
|
||||||
|
const test_cases = [
|
||||||
|
{ fragment: "1+", repetition: "+", expected: "1+" },
|
||||||
|
{ fragment: "1*", repetition: "+", expected: "1+" },
|
||||||
|
{ fragment: "1+", repetition: "*", expected: "1+" },
|
||||||
|
{ fragment: "1*", repetition: "*", expected: "1*" },
|
||||||
|
{ fragment: "1+", repetition: "?", expected: "1+?" },
|
||||||
|
{ fragment: "1*", repetition: "?", expected: "1*?" },
|
||||||
|
{ fragment: "1+", repetition: "{0,}", expected: "(?:1+){0,}" },
|
||||||
|
{ fragment: "1*", repetition: "{0,}", expected: "1{0,}" },
|
||||||
|
{ fragment: "1+", repetition: "{1,2}", expected: "1{1,2}" },
|
||||||
|
{ fragment: "1*", repetition: "{1,2}", expected: "1{1,2}" },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const c of test_cases) {
|
||||||
|
const got = dontClobberRepetition(c.fragment, c.repetition);
|
||||||
|
|
||||||
|
expect(got).toBe(c.expected);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
@ -95,8 +95,7 @@ module.exports = {
|
|||||||
after: {
|
after: {
|
||||||
root: "./lib",
|
root: "./lib",
|
||||||
include: [
|
include: [
|
||||||
"script.d.ts",
|
"script.d.ts"
|
||||||
"script.d.ts.map"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user