mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 04:20:35 -07:00
commit
f692514791
14
docs/bundle.min.js
vendored
14
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
15
lib/generator.d.ts
vendored
15
lib/generator.d.ts
vendored
@ -48,6 +48,12 @@ export declare class GeneratorContext {
|
||||
*/
|
||||
addGroup(identifier: string, tokens: IToken[]): void;
|
||||
}
|
||||
/**
|
||||
* Argument type: Just a plain object
|
||||
*/
|
||||
declare type GeneratorArguments = {
|
||||
[key: string]: string | boolean | number;
|
||||
};
|
||||
interface Generates {
|
||||
/**
|
||||
* Validate that this is both valid and can be generated in the specified language
|
||||
@ -66,10 +72,11 @@ interface Generates {
|
||||
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
||||
*
|
||||
* @param language the regex dialect we're generating
|
||||
* @param args any additional arguments we may have
|
||||
* @returns a regular expression fragment
|
||||
* @public
|
||||
*/
|
||||
toRegex(language: RegexDialect): string;
|
||||
toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
|
||||
}
|
||||
/**
|
||||
* The base concrete syntax tree class
|
||||
@ -86,7 +93,7 @@ export declare abstract class H2RCST implements Generates {
|
||||
*/
|
||||
constructor(tokens: IToken[]);
|
||||
abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||
abstract toRegex(language: RegexDialect): string;
|
||||
abstract toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
|
||||
/**
|
||||
* Creates an ISemanticError with a given message and the tokens provided from the constructor
|
||||
*
|
||||
@ -171,7 +178,7 @@ export declare class MatchStatementValue implements Generates {
|
||||
*/
|
||||
constructor(optional: boolean, statement: MatchSubStatementCST);
|
||||
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||
toRegex(language: RegexDialect): string;
|
||||
toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
|
||||
}
|
||||
/**
|
||||
* The base class for all statement concrete syntax trees
|
||||
@ -199,7 +206,7 @@ export declare class MatchSubStatementCST extends H2RCST {
|
||||
*/
|
||||
constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]);
|
||||
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||
toRegex(language: RegexDialect): string;
|
||||
toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
|
||||
}
|
||||
/**
|
||||
* Concrete Syntax Tree for Using statements
|
||||
|
@ -206,8 +206,8 @@ class MatchStatementValue {
|
||||
validate(language, context) {
|
||||
return this.statement.validate(language, context);
|
||||
}
|
||||
toRegex(language) {
|
||||
let match_stmt = this.statement.toRegex(language);
|
||||
toRegex(language, args) {
|
||||
let match_stmt = this.statement.toRegex(language, args);
|
||||
// need to group if optional and ungrouped
|
||||
if (this.optional) {
|
||||
match_stmt = generator_helper_1.groupIfRequired(match_stmt) + "?";
|
||||
@ -290,13 +290,18 @@ class MatchSubStatementCST extends H2RCST {
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
toRegex(language) {
|
||||
toRegex(language, args) {
|
||||
const matches = [];
|
||||
for (const value of this.values) {
|
||||
switch (value.type) {
|
||||
case MatchSubStatementType.SingleString: {
|
||||
const reg = utilities_1.regexEscape(utilities_1.removeQuotes(value.from));
|
||||
matches.push(this.invert ? `(?!${reg})` : reg);
|
||||
if (utilities_1.isSingleRegexCharacter(reg)) {
|
||||
matches.push(this.invert ? `[^${reg}]` : reg);
|
||||
}
|
||||
else {
|
||||
matches.push(this.invert ? `(?!${reg})` : reg);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MatchSubStatementType.Between: {
|
||||
@ -359,7 +364,13 @@ class MatchSubStatementCST extends H2RCST {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let ret = generator_helper_1.minimizeMatchString(matches);
|
||||
let ret = "";
|
||||
if (args !== null && args.has_neighbours === true) {
|
||||
ret = generator_helper_1.minimizeMatchString(matches, true);
|
||||
}
|
||||
else {
|
||||
ret = generator_helper_1.minimizeMatchString(matches);
|
||||
}
|
||||
if (this.count) {
|
||||
if (matches.length === 1) {
|
||||
// we don't group if there's only 1 element
|
||||
@ -510,7 +521,13 @@ class MatchStatementCST extends StatementCST {
|
||||
return errors;
|
||||
}
|
||||
toRegex(language) {
|
||||
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
|
||||
let final_matches = "";
|
||||
if (this.matches.length === 1) {
|
||||
final_matches = this.matches[0].toRegex(language, null);
|
||||
}
|
||||
else {
|
||||
final_matches = this.matches.map((x) => x.toRegex(language, { "has_neighbours": true })).join("");
|
||||
}
|
||||
if (this.completely_optional) {
|
||||
final_matches = generator_helper_1.groupIfRequired(final_matches) + "?";
|
||||
}
|
||||
@ -549,7 +566,7 @@ class RepeatStatementCST extends StatementCST {
|
||||
return errors;
|
||||
}
|
||||
toRegex(language) {
|
||||
let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
|
||||
let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
if (this.count) {
|
||||
str += this.count.toRegex(language);
|
||||
// group for optionality because count would be incorrect otherwise
|
||||
@ -615,7 +632,7 @@ class GroupStatementCST extends StatementCST {
|
||||
}
|
||||
str += `<${this.name}>`;
|
||||
}
|
||||
str += this.statements.map((x) => x.toRegex(language)).join("");
|
||||
str += this.statements.map((x) => x.toRegex(language, null)).join("");
|
||||
str += (this.optional ? ")?" : ")");
|
||||
return str;
|
||||
}
|
||||
@ -719,10 +736,10 @@ class IfPatternStatementCST extends StatementCST {
|
||||
return errors;
|
||||
}
|
||||
toRegex(language) {
|
||||
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
|
||||
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||
const if_stmt = this.matches.map((x) => x.toRegex(language, null)).join("");
|
||||
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
if (this.false_statements.length > 0) {
|
||||
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||
}
|
||||
else {
|
||||
@ -773,9 +790,9 @@ class IfIdentStatementCST extends StatementCST {
|
||||
if (language === RegexDialect.Boost) {
|
||||
if_stmt = "<" + if_stmt + ">";
|
||||
}
|
||||
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
if (this.false_statements.length > 0) {
|
||||
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||
}
|
||||
else {
|
||||
@ -812,7 +829,7 @@ class RegularExpressionCST extends H2RCST {
|
||||
}
|
||||
toRegex(language) {
|
||||
const modifiers = this.usings.toRegex(language);
|
||||
const regex = this.statements.map((x) => x.toRegex(language)).join("");
|
||||
const regex = this.statements.map((x) => x.toRegex(language, null)).join("");
|
||||
return modifiers.replace("{regex}", regex);
|
||||
}
|
||||
}
|
||||
|
2
lib/generator_helper.d.ts
vendored
2
lib/generator_helper.d.ts
vendored
@ -5,7 +5,7 @@
|
||||
* @param arr the array of matches
|
||||
* @internal
|
||||
*/
|
||||
export declare function minimizeMatchString(arr: string[]): string;
|
||||
export declare function minimizeMatchString(arr: string[], has_neighbours?: boolean): string;
|
||||
/**
|
||||
* Groups a regex fragment if it needs to be grouped
|
||||
*
|
||||
|
@ -13,12 +13,12 @@ const utilities_1 = require("./utilities");
|
||||
* @param arr the array of matches
|
||||
* @internal
|
||||
*/
|
||||
function minimizeMatchString(arr) {
|
||||
function minimizeMatchString(arr, has_neighbours = false) {
|
||||
// don't process an array of length 1, otherwise you'll get the wrong result
|
||||
if (arr.length === 1) {
|
||||
return utilities_1.first(arr);
|
||||
}
|
||||
return minMatchString(arr, 0);
|
||||
return minMatchString(arr, has_neighbours ? 1 : 0);
|
||||
}
|
||||
exports.minimizeMatchString = minimizeMatchString;
|
||||
/**
|
||||
@ -107,6 +107,10 @@ function groupIfRequired(fragment) {
|
||||
if (utilities_1.isSingleRegexCharacter(fragment)) {
|
||||
return fragment;
|
||||
}
|
||||
else if ((fragment[fragment.length - 1] === "*" || fragment[fragment.length - 1] === "+") &&
|
||||
utilities_1.isSingleRegexCharacter(fragment.substring(0, fragment.length - 1))) {
|
||||
return fragment;
|
||||
}
|
||||
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
|
||||
let bracket_count = 0;
|
||||
for (let i = 1; i < fragment.length - 2; i++) {
|
||||
|
2
package-lock.json
generated
2
package-lock.json
generated
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "human2regex",
|
||||
"version": "1.1.0",
|
||||
"version": "1.1.1",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "human2regex",
|
||||
"version": "1.1.0",
|
||||
"version": "1.1.1",
|
||||
"description": "Humanized Regular Expressions",
|
||||
"main": "./lib/index.js",
|
||||
"typings": "./lib/index.d.ts",
|
||||
|
@ -100,6 +100,11 @@ export class GeneratorContext {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Argument type: Just a plain object
|
||||
*/
|
||||
type GeneratorArguments = { [key: string]: string | boolean | number };
|
||||
|
||||
interface Generates {
|
||||
/**
|
||||
* Validate that this is both valid and can be generated in the specified language
|
||||
@ -119,10 +124,11 @@ interface Generates {
|
||||
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
||||
*
|
||||
* @param language the regex dialect we're generating
|
||||
* @param args any additional arguments we may have
|
||||
* @returns a regular expression fragment
|
||||
* @public
|
||||
*/
|
||||
toRegex(language: RegexDialect): string;
|
||||
toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -142,7 +148,7 @@ export abstract class H2RCST implements Generates {
|
||||
}
|
||||
|
||||
public abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||
public abstract toRegex(language: RegexDialect): string;
|
||||
public abstract toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
|
||||
|
||||
/**
|
||||
* Creates an ISemanticError with a given message and the tokens provided from the constructor
|
||||
@ -247,8 +253,8 @@ export class MatchStatementValue implements Generates {
|
||||
return this.statement.validate(language, context);
|
||||
}
|
||||
|
||||
public toRegex(language: RegexDialect): string {
|
||||
let match_stmt = this.statement.toRegex(language);
|
||||
public toRegex(language: RegexDialect, args: GeneratorArguments | null): string {
|
||||
let match_stmt = this.statement.toRegex(language, args);
|
||||
|
||||
// need to group if optional and ungrouped
|
||||
if (this.optional) {
|
||||
@ -341,14 +347,20 @@ export class MatchSubStatementCST extends H2RCST {
|
||||
return errors;
|
||||
}
|
||||
|
||||
public toRegex(language: RegexDialect): string {
|
||||
public toRegex(language: RegexDialect, args: GeneratorArguments | null): string {
|
||||
const matches: string[] = [];
|
||||
|
||||
for (const value of this.values) {
|
||||
switch (value.type) {
|
||||
case MatchSubStatementType.SingleString: {
|
||||
const reg = regexEscape(removeQuotes(value.from as string));
|
||||
matches.push(this.invert ? `(?!${reg})` : reg);
|
||||
|
||||
if (isSingleRegexCharacter(reg)) {
|
||||
matches.push(this.invert ? `[^${reg}]` : reg);
|
||||
}
|
||||
else {
|
||||
matches.push(this.invert ? `(?!${reg})` : reg);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MatchSubStatementType.Between: {
|
||||
@ -412,7 +424,13 @@ export class MatchSubStatementCST extends H2RCST {
|
||||
}
|
||||
}
|
||||
|
||||
let ret = minimizeMatchString(matches);
|
||||
let ret = "";
|
||||
if (args !== null && args.has_neighbours === true) {
|
||||
ret = minimizeMatchString(matches, true);
|
||||
}
|
||||
else {
|
||||
ret = minimizeMatchString(matches);
|
||||
}
|
||||
|
||||
if (this.count) {
|
||||
if (matches.length === 1) {
|
||||
@ -583,12 +601,18 @@ export class MatchStatementCST extends StatementCST {
|
||||
}
|
||||
|
||||
public toRegex(language: RegexDialect): string {
|
||||
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
|
||||
let final_matches = "";
|
||||
if (this.matches.length === 1) {
|
||||
final_matches = this.matches[0].toRegex(language, null);
|
||||
}
|
||||
else {
|
||||
final_matches = this.matches.map((x) => x.toRegex(language, { "has_neighbours": true })).join("");
|
||||
}
|
||||
|
||||
if (this.completely_optional) {
|
||||
final_matches = groupIfRequired(final_matches) + "?";
|
||||
}
|
||||
|
||||
|
||||
return final_matches;
|
||||
}
|
||||
}
|
||||
@ -627,7 +651,7 @@ export class RepeatStatementCST extends StatementCST {
|
||||
}
|
||||
|
||||
public toRegex(language: RegexDialect): string {
|
||||
let str = groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
|
||||
let str = groupIfRequired(this.statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
|
||||
if (this.count) {
|
||||
str += this.count.toRegex(language);
|
||||
@ -704,7 +728,7 @@ export class GroupStatementCST extends StatementCST {
|
||||
str += `<${this.name}>`;
|
||||
}
|
||||
|
||||
str += this.statements.map((x) => x.toRegex(language)).join("");
|
||||
str += this.statements.map((x) => x.toRegex(language, null)).join("");
|
||||
|
||||
str += (this.optional ? ")?" : ")");
|
||||
|
||||
@ -825,11 +849,11 @@ export class IfPatternStatementCST extends StatementCST {
|
||||
}
|
||||
|
||||
public toRegex(language: RegexDialect): string {
|
||||
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
|
||||
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||
const if_stmt = this.matches.map((x) => x.toRegex(language, null)).join("");
|
||||
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
|
||||
if (this.false_statements.length > 0) {
|
||||
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
|
||||
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||
}
|
||||
@ -888,10 +912,10 @@ export class IfIdentStatementCST extends StatementCST {
|
||||
if_stmt = "<" + if_stmt + ">";
|
||||
}
|
||||
|
||||
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
|
||||
if (this.false_statements.length > 0) {
|
||||
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
|
||||
|
||||
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||
}
|
||||
@ -932,7 +956,7 @@ export class RegularExpressionCST extends H2RCST {
|
||||
|
||||
public toRegex(language: RegexDialect): string {
|
||||
const modifiers = this.usings.toRegex(language);
|
||||
const regex = this.statements.map((x) => x.toRegex(language)).join("");
|
||||
const regex = this.statements.map((x) => x.toRegex(language, null)).join("");
|
||||
|
||||
return modifiers.replace("{regex}", regex);
|
||||
}
|
||||
|
@ -13,13 +13,13 @@ import { first, isSingleRegexCharacter } from "./utilities";
|
||||
* @param arr the array of matches
|
||||
* @internal
|
||||
*/
|
||||
export function minimizeMatchString(arr: string[]): string {
|
||||
export function minimizeMatchString(arr: string[], has_neighbours: boolean = false): string {
|
||||
// don't process an array of length 1, otherwise you'll get the wrong result
|
||||
if (arr.length === 1) {
|
||||
return first(arr);
|
||||
}
|
||||
|
||||
return minMatchString(arr, 0);
|
||||
return minMatchString(arr, has_neighbours ? 1 : 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -120,6 +120,10 @@ export function groupIfRequired(fragment: string): string {
|
||||
if (isSingleRegexCharacter(fragment)) {
|
||||
return fragment;
|
||||
}
|
||||
else if ((fragment[fragment.length-1] === "*" || fragment[fragment.length-1] === "+") &&
|
||||
isSingleRegexCharacter(fragment.substring(0, fragment.length-1))) {
|
||||
return fragment;
|
||||
}
|
||||
|
||||
if (fragment[0] === "(" && fragment[fragment.length-1] === ")") {
|
||||
let bracket_count = 0;
|
||||
|
@ -43,6 +43,13 @@ describe("Generator functionality", function() {
|
||||
expect(reg3.toRegex(RegexDialect.JS)).toBe("/(?:(?:hello){3,5})?/");
|
||||
});
|
||||
|
||||
it("generates an advanced regex", function() {
|
||||
const toks0 = lexer.tokenize('match "<" then a word or digit or "_" or "-" then ">"').tokens;
|
||||
const reg0 = parser.parse(toks0);
|
||||
expect(reg0.validate(RegexDialect.JS).length).toBe(0);
|
||||
expect(reg0.toRegex(RegexDialect.JS)).toBe("/<(?:\\w+|\\d|_|\\-)>/");
|
||||
});
|
||||
|
||||
it("validates invalid regexes", function() {
|
||||
const toks0 = lexer.tokenize('match unicode "NotARealClass"').tokens;
|
||||
const reg0 = parser.parse(toks0);
|
||||
@ -159,6 +166,16 @@ describe("Generator functionality", function() {
|
||||
const reg5 = parser.parse(toks5);
|
||||
expect(reg5.validate(RegexDialect.JS).length).toBe(0);
|
||||
expect(reg5.toRegex(RegexDialect.JS)).toBe("/a[bc]?/");
|
||||
|
||||
const toks6 = lexer.tokenize("optionally match 0+ any thing").tokens;
|
||||
const reg6 = parser.parse(toks6);
|
||||
expect(reg6.validate(RegexDialect.JS).length).toBe(0);
|
||||
expect(reg6.toRegex(RegexDialect.JS)).toBe("/.*?/");
|
||||
|
||||
const toks7 = lexer.tokenize('match 0+ not ">"').tokens;
|
||||
const reg7 = parser.parse(toks7);
|
||||
expect(reg7.validate(RegexDialect.JS).length).toBe(0);
|
||||
expect(reg7.toRegex(RegexDialect.JS)).toBe("/[^>]*/");
|
||||
});
|
||||
|
||||
it("can generate backreferences", function() {
|
||||
|
@ -30,6 +30,7 @@ describe("Generator helper functionality", function() {
|
||||
{ from: "(abc)|d", to: "(?:(abc)|d)" },
|
||||
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
|
||||
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
|
||||
{ from: ".*", to: ".*" }
|
||||
];
|
||||
|
||||
for (const c of test_cases) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user