1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 12:30:09 -07:00

Merge pull request #5 from pdemian/new-features

Bug fixes
This commit is contained in:
Patrick Demian 2021-01-19 02:07:15 -05:00 committed by GitHub
commit f692514791
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 123 additions and 49 deletions

14
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

15
lib/generator.d.ts vendored
View File

@ -48,6 +48,12 @@ export declare class GeneratorContext {
*/ */
addGroup(identifier: string, tokens: IToken[]): void; addGroup(identifier: string, tokens: IToken[]): void;
} }
/**
* Argument type: Just a plain object
*/
declare type GeneratorArguments = {
[key: string]: string | boolean | number;
};
interface Generates { interface Generates {
/** /**
* Validate that this is both valid and can be generated in the specified language * Validate that this is both valid and can be generated in the specified language
@ -66,10 +72,11 @@ interface Generates {
* @remarks There is no guarantee toRegex will work unless validate returns no errors * @remarks There is no guarantee toRegex will work unless validate returns no errors
* *
* @param language the regex dialect we're generating * @param language the regex dialect we're generating
* @param args any additional arguments we may have
* @returns a regular expression fragment * @returns a regular expression fragment
* @public * @public
*/ */
toRegex(language: RegexDialect): string; toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
} }
/** /**
* The base concrete syntax tree class * The base concrete syntax tree class
@ -86,7 +93,7 @@ export declare abstract class H2RCST implements Generates {
*/ */
constructor(tokens: IToken[]); constructor(tokens: IToken[]);
abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[]; abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
abstract toRegex(language: RegexDialect): string; abstract toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
/** /**
* Creates an ISemanticError with a given message and the tokens provided from the constructor * Creates an ISemanticError with a given message and the tokens provided from the constructor
* *
@ -171,7 +178,7 @@ export declare class MatchStatementValue implements Generates {
*/ */
constructor(optional: boolean, statement: MatchSubStatementCST); constructor(optional: boolean, statement: MatchSubStatementCST);
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[]; validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string; toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
} }
/** /**
* The base class for all statement concrete syntax trees * The base class for all statement concrete syntax trees
@ -199,7 +206,7 @@ export declare class MatchSubStatementCST extends H2RCST {
*/ */
constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]); constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]);
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[]; validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string; toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
} }
/** /**
* Concrete Syntax Tree for Using statements * Concrete Syntax Tree for Using statements

View File

@ -206,8 +206,8 @@ class MatchStatementValue {
validate(language, context) { validate(language, context) {
return this.statement.validate(language, context); return this.statement.validate(language, context);
} }
toRegex(language) { toRegex(language, args) {
let match_stmt = this.statement.toRegex(language); let match_stmt = this.statement.toRegex(language, args);
// need to group if optional and ungrouped // need to group if optional and ungrouped
if (this.optional) { if (this.optional) {
match_stmt = generator_helper_1.groupIfRequired(match_stmt) + "?"; match_stmt = generator_helper_1.groupIfRequired(match_stmt) + "?";
@ -290,13 +290,18 @@ class MatchSubStatementCST extends H2RCST {
} }
return errors; return errors;
} }
toRegex(language) { toRegex(language, args) {
const matches = []; const matches = [];
for (const value of this.values) { for (const value of this.values) {
switch (value.type) { switch (value.type) {
case MatchSubStatementType.SingleString: { case MatchSubStatementType.SingleString: {
const reg = utilities_1.regexEscape(utilities_1.removeQuotes(value.from)); const reg = utilities_1.regexEscape(utilities_1.removeQuotes(value.from));
matches.push(this.invert ? `(?!${reg})` : reg); if (utilities_1.isSingleRegexCharacter(reg)) {
matches.push(this.invert ? `[^${reg}]` : reg);
}
else {
matches.push(this.invert ? `(?!${reg})` : reg);
}
break; break;
} }
case MatchSubStatementType.Between: { case MatchSubStatementType.Between: {
@ -359,7 +364,13 @@ class MatchSubStatementCST extends H2RCST {
break; break;
} }
} }
let ret = generator_helper_1.minimizeMatchString(matches); let ret = "";
if (args !== null && args.has_neighbours === true) {
ret = generator_helper_1.minimizeMatchString(matches, true);
}
else {
ret = generator_helper_1.minimizeMatchString(matches);
}
if (this.count) { if (this.count) {
if (matches.length === 1) { if (matches.length === 1) {
// we don't group if there's only 1 element // we don't group if there's only 1 element
@ -510,7 +521,13 @@ class MatchStatementCST extends StatementCST {
return errors; return errors;
} }
toRegex(language) { toRegex(language) {
let final_matches = this.matches.map((x) => x.toRegex(language)).join(""); let final_matches = "";
if (this.matches.length === 1) {
final_matches = this.matches[0].toRegex(language, null);
}
else {
final_matches = this.matches.map((x) => x.toRegex(language, { "has_neighbours": true })).join("");
}
if (this.completely_optional) { if (this.completely_optional) {
final_matches = generator_helper_1.groupIfRequired(final_matches) + "?"; final_matches = generator_helper_1.groupIfRequired(final_matches) + "?";
} }
@ -549,7 +566,7 @@ class RepeatStatementCST extends StatementCST {
return errors; return errors;
} }
toRegex(language) { toRegex(language) {
let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language)).join("")); let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language, null)).join(""));
if (this.count) { if (this.count) {
str += this.count.toRegex(language); str += this.count.toRegex(language);
// group for optionality because count would be incorrect otherwise // group for optionality because count would be incorrect otherwise
@ -615,7 +632,7 @@ class GroupStatementCST extends StatementCST {
} }
str += `<${this.name}>`; str += `<${this.name}>`;
} }
str += this.statements.map((x) => x.toRegex(language)).join(""); str += this.statements.map((x) => x.toRegex(language, null)).join("");
str += (this.optional ? ")?" : ")"); str += (this.optional ? ")?" : ")");
return str; return str;
} }
@ -719,10 +736,10 @@ class IfPatternStatementCST extends StatementCST {
return errors; return errors;
} }
toRegex(language) { toRegex(language) {
const if_stmt = this.matches.map((x) => x.toRegex(language)).join(""); const if_stmt = this.matches.map((x) => x.toRegex(language, null)).join("");
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join("")); const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
if (this.false_statements.length > 0) { if (this.false_statements.length > 0) {
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join("")); const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`; return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
} }
else { else {
@ -773,9 +790,9 @@ class IfIdentStatementCST extends StatementCST {
if (language === RegexDialect.Boost) { if (language === RegexDialect.Boost) {
if_stmt = "<" + if_stmt + ">"; if_stmt = "<" + if_stmt + ">";
} }
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join("")); const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
if (this.false_statements.length > 0) { if (this.false_statements.length > 0) {
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join("")); const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`; return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
} }
else { else {
@ -812,7 +829,7 @@ class RegularExpressionCST extends H2RCST {
} }
toRegex(language) { toRegex(language) {
const modifiers = this.usings.toRegex(language); const modifiers = this.usings.toRegex(language);
const regex = this.statements.map((x) => x.toRegex(language)).join(""); const regex = this.statements.map((x) => x.toRegex(language, null)).join("");
return modifiers.replace("{regex}", regex); return modifiers.replace("{regex}", regex);
} }
} }

View File

@ -5,7 +5,7 @@
* @param arr the array of matches * @param arr the array of matches
* @internal * @internal
*/ */
export declare function minimizeMatchString(arr: string[]): string; export declare function minimizeMatchString(arr: string[], has_neighbours?: boolean): string;
/** /**
* Groups a regex fragment if it needs to be grouped * Groups a regex fragment if it needs to be grouped
* *

View File

@ -13,12 +13,12 @@ const utilities_1 = require("./utilities");
* @param arr the array of matches * @param arr the array of matches
* @internal * @internal
*/ */
function minimizeMatchString(arr) { function minimizeMatchString(arr, has_neighbours = false) {
// don't process an array of length 1, otherwise you'll get the wrong result // don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) { if (arr.length === 1) {
return utilities_1.first(arr); return utilities_1.first(arr);
} }
return minMatchString(arr, 0); return minMatchString(arr, has_neighbours ? 1 : 0);
} }
exports.minimizeMatchString = minimizeMatchString; exports.minimizeMatchString = minimizeMatchString;
/** /**
@ -107,6 +107,10 @@ function groupIfRequired(fragment) {
if (utilities_1.isSingleRegexCharacter(fragment)) { if (utilities_1.isSingleRegexCharacter(fragment)) {
return fragment; return fragment;
} }
else if ((fragment[fragment.length - 1] === "*" || fragment[fragment.length - 1] === "+") &&
utilities_1.isSingleRegexCharacter(fragment.substring(0, fragment.length - 1))) {
return fragment;
}
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") { if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
let bracket_count = 0; let bracket_count = 0;
for (let i = 1; i < fragment.length - 2; i++) { for (let i = 1; i < fragment.length - 2; i++) {

2
package-lock.json generated
View File

@ -1,6 +1,6 @@
{ {
"name": "human2regex", "name": "human2regex",
"version": "1.1.0", "version": "1.1.1",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

View File

@ -1,6 +1,6 @@
{ {
"name": "human2regex", "name": "human2regex",
"version": "1.1.0", "version": "1.1.1",
"description": "Humanized Regular Expressions", "description": "Humanized Regular Expressions",
"main": "./lib/index.js", "main": "./lib/index.js",
"typings": "./lib/index.d.ts", "typings": "./lib/index.d.ts",

View File

@ -100,6 +100,11 @@ export class GeneratorContext {
} }
} }
/**
* Argument type: Just a plain object
*/
type GeneratorArguments = { [key: string]: string | boolean | number };
interface Generates { interface Generates {
/** /**
* Validate that this is both valid and can be generated in the specified language * Validate that this is both valid and can be generated in the specified language
@ -119,10 +124,11 @@ interface Generates {
* @remarks There is no guarantee toRegex will work unless validate returns no errors * @remarks There is no guarantee toRegex will work unless validate returns no errors
* *
* @param language the regex dialect we're generating * @param language the regex dialect we're generating
* @param args any additional arguments we may have
* @returns a regular expression fragment * @returns a regular expression fragment
* @public * @public
*/ */
toRegex(language: RegexDialect): string; toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
} }
/** /**
@ -142,7 +148,7 @@ export abstract class H2RCST implements Generates {
} }
public abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[]; public abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
public abstract toRegex(language: RegexDialect): string; public abstract toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
/** /**
* Creates an ISemanticError with a given message and the tokens provided from the constructor * Creates an ISemanticError with a given message and the tokens provided from the constructor
@ -247,8 +253,8 @@ export class MatchStatementValue implements Generates {
return this.statement.validate(language, context); return this.statement.validate(language, context);
} }
public toRegex(language: RegexDialect): string { public toRegex(language: RegexDialect, args: GeneratorArguments | null): string {
let match_stmt = this.statement.toRegex(language); let match_stmt = this.statement.toRegex(language, args);
// need to group if optional and ungrouped // need to group if optional and ungrouped
if (this.optional) { if (this.optional) {
@ -341,14 +347,20 @@ export class MatchSubStatementCST extends H2RCST {
return errors; return errors;
} }
public toRegex(language: RegexDialect): string { public toRegex(language: RegexDialect, args: GeneratorArguments | null): string {
const matches: string[] = []; const matches: string[] = [];
for (const value of this.values) { for (const value of this.values) {
switch (value.type) { switch (value.type) {
case MatchSubStatementType.SingleString: { case MatchSubStatementType.SingleString: {
const reg = regexEscape(removeQuotes(value.from as string)); const reg = regexEscape(removeQuotes(value.from as string));
matches.push(this.invert ? `(?!${reg})` : reg);
if (isSingleRegexCharacter(reg)) {
matches.push(this.invert ? `[^${reg}]` : reg);
}
else {
matches.push(this.invert ? `(?!${reg})` : reg);
}
break; break;
} }
case MatchSubStatementType.Between: { case MatchSubStatementType.Between: {
@ -412,7 +424,13 @@ export class MatchSubStatementCST extends H2RCST {
} }
} }
let ret = minimizeMatchString(matches); let ret = "";
if (args !== null && args.has_neighbours === true) {
ret = minimizeMatchString(matches, true);
}
else {
ret = minimizeMatchString(matches);
}
if (this.count) { if (this.count) {
if (matches.length === 1) { if (matches.length === 1) {
@ -583,12 +601,18 @@ export class MatchStatementCST extends StatementCST {
} }
public toRegex(language: RegexDialect): string { public toRegex(language: RegexDialect): string {
let final_matches = this.matches.map((x) => x.toRegex(language)).join(""); let final_matches = "";
if (this.matches.length === 1) {
final_matches = this.matches[0].toRegex(language, null);
}
else {
final_matches = this.matches.map((x) => x.toRegex(language, { "has_neighbours": true })).join("");
}
if (this.completely_optional) { if (this.completely_optional) {
final_matches = groupIfRequired(final_matches) + "?"; final_matches = groupIfRequired(final_matches) + "?";
} }
return final_matches; return final_matches;
} }
} }
@ -627,7 +651,7 @@ export class RepeatStatementCST extends StatementCST {
} }
public toRegex(language: RegexDialect): string { public toRegex(language: RegexDialect): string {
let str = groupIfRequired(this.statements.map((x) => x.toRegex(language)).join("")); let str = groupIfRequired(this.statements.map((x) => x.toRegex(language, null)).join(""));
if (this.count) { if (this.count) {
str += this.count.toRegex(language); str += this.count.toRegex(language);
@ -704,7 +728,7 @@ export class GroupStatementCST extends StatementCST {
str += `<${this.name}>`; str += `<${this.name}>`;
} }
str += this.statements.map((x) => x.toRegex(language)).join(""); str += this.statements.map((x) => x.toRegex(language, null)).join("");
str += (this.optional ? ")?" : ")"); str += (this.optional ? ")?" : ")");
@ -825,11 +849,11 @@ export class IfPatternStatementCST extends StatementCST {
} }
public toRegex(language: RegexDialect): string { public toRegex(language: RegexDialect): string {
const if_stmt = this.matches.map((x) => x.toRegex(language)).join(""); const if_stmt = this.matches.map((x) => x.toRegex(language, null)).join("");
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join("")); const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
if (this.false_statements.length > 0) { if (this.false_statements.length > 0) {
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join("")); const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`; return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
} }
@ -888,10 +912,10 @@ export class IfIdentStatementCST extends StatementCST {
if_stmt = "<" + if_stmt + ">"; if_stmt = "<" + if_stmt + ">";
} }
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join("")); const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
if (this.false_statements.length > 0) { if (this.false_statements.length > 0) {
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join("")); const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`; return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
} }
@ -932,7 +956,7 @@ export class RegularExpressionCST extends H2RCST {
public toRegex(language: RegexDialect): string { public toRegex(language: RegexDialect): string {
const modifiers = this.usings.toRegex(language); const modifiers = this.usings.toRegex(language);
const regex = this.statements.map((x) => x.toRegex(language)).join(""); const regex = this.statements.map((x) => x.toRegex(language, null)).join("");
return modifiers.replace("{regex}", regex); return modifiers.replace("{regex}", regex);
} }

View File

@ -13,13 +13,13 @@ import { first, isSingleRegexCharacter } from "./utilities";
* @param arr the array of matches * @param arr the array of matches
* @internal * @internal
*/ */
export function minimizeMatchString(arr: string[]): string { export function minimizeMatchString(arr: string[], has_neighbours: boolean = false): string {
// don't process an array of length 1, otherwise you'll get the wrong result // don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) { if (arr.length === 1) {
return first(arr); return first(arr);
} }
return minMatchString(arr, 0); return minMatchString(arr, has_neighbours ? 1 : 0);
} }
/** /**
@ -120,6 +120,10 @@ export function groupIfRequired(fragment: string): string {
if (isSingleRegexCharacter(fragment)) { if (isSingleRegexCharacter(fragment)) {
return fragment; return fragment;
} }
else if ((fragment[fragment.length-1] === "*" || fragment[fragment.length-1] === "+") &&
isSingleRegexCharacter(fragment.substring(0, fragment.length-1))) {
return fragment;
}
if (fragment[0] === "(" && fragment[fragment.length-1] === ")") { if (fragment[0] === "(" && fragment[fragment.length-1] === ")") {
let bracket_count = 0; let bracket_count = 0;

View File

@ -43,6 +43,13 @@ describe("Generator functionality", function() {
expect(reg3.toRegex(RegexDialect.JS)).toBe("/(?:(?:hello){3,5})?/"); expect(reg3.toRegex(RegexDialect.JS)).toBe("/(?:(?:hello){3,5})?/");
}); });
it("generates an advanced regex", function() {
const toks0 = lexer.tokenize('match "<" then a word or digit or "_" or "-" then ">"').tokens;
const reg0 = parser.parse(toks0);
expect(reg0.validate(RegexDialect.JS).length).toBe(0);
expect(reg0.toRegex(RegexDialect.JS)).toBe("/<(?:\\w+|\\d|_|\\-)>/");
});
it("validates invalid regexes", function() { it("validates invalid regexes", function() {
const toks0 = lexer.tokenize('match unicode "NotARealClass"').tokens; const toks0 = lexer.tokenize('match unicode "NotARealClass"').tokens;
const reg0 = parser.parse(toks0); const reg0 = parser.parse(toks0);
@ -159,6 +166,16 @@ describe("Generator functionality", function() {
const reg5 = parser.parse(toks5); const reg5 = parser.parse(toks5);
expect(reg5.validate(RegexDialect.JS).length).toBe(0); expect(reg5.validate(RegexDialect.JS).length).toBe(0);
expect(reg5.toRegex(RegexDialect.JS)).toBe("/a[bc]?/"); expect(reg5.toRegex(RegexDialect.JS)).toBe("/a[bc]?/");
const toks6 = lexer.tokenize("optionally match 0+ any thing").tokens;
const reg6 = parser.parse(toks6);
expect(reg6.validate(RegexDialect.JS).length).toBe(0);
expect(reg6.toRegex(RegexDialect.JS)).toBe("/.*?/");
const toks7 = lexer.tokenize('match 0+ not ">"').tokens;
const reg7 = parser.parse(toks7);
expect(reg7.validate(RegexDialect.JS).length).toBe(0);
expect(reg7.toRegex(RegexDialect.JS)).toBe("/[^>]*/");
}); });
it("can generate backreferences", function() { it("can generate backreferences", function() {

View File

@ -30,6 +30,7 @@ describe("Generator helper functionality", function() {
{ from: "(abc)|d", to: "(?:(abc)|d)" }, { from: "(abc)|d", to: "(?:(abc)|d)" },
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" }, { from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" }, { from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
{ from: ".*", to: ".*" }
]; ];
for (const c of test_cases) { for (const c of test_cases) {