1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 04:20:35 -07:00

Merge pull request #5 from pdemian/new-features

Bug fixes
This commit is contained in:
Patrick Demian 2021-01-19 02:07:15 -05:00 committed by GitHub
commit f692514791
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 123 additions and 49 deletions

14
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

15
lib/generator.d.ts vendored
View File

@ -48,6 +48,12 @@ export declare class GeneratorContext {
*/
addGroup(identifier: string, tokens: IToken[]): void;
}
/**
* Argument type: Just a plain object
*/
declare type GeneratorArguments = {
[key: string]: string | boolean | number;
};
interface Generates {
/**
* Validate that this is both valid and can be generated in the specified language
@ -66,10 +72,11 @@ interface Generates {
* @remarks There is no guarantee toRegex will work unless validate returns no errors
*
* @param language the regex dialect we're generating
* @param args any additional arguments we may have
* @returns a regular expression fragment
* @public
*/
toRegex(language: RegexDialect): string;
toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
}
/**
* The base concrete syntax tree class
@ -86,7 +93,7 @@ export declare abstract class H2RCST implements Generates {
*/
constructor(tokens: IToken[]);
abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
abstract toRegex(language: RegexDialect): string;
abstract toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
/**
* Creates an ISemanticError with a given message and the tokens provided from the constructor
*
@ -171,7 +178,7 @@ export declare class MatchStatementValue implements Generates {
*/
constructor(optional: boolean, statement: MatchSubStatementCST);
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
}
/**
* The base class for all statement concrete syntax trees
@ -199,7 +206,7 @@ export declare class MatchSubStatementCST extends H2RCST {
*/
constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]);
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
toRegex(language: RegexDialect): string;
toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
}
/**
* Concrete Syntax Tree for Using statements

View File

@ -206,8 +206,8 @@ class MatchStatementValue {
validate(language, context) {
return this.statement.validate(language, context);
}
toRegex(language) {
let match_stmt = this.statement.toRegex(language);
toRegex(language, args) {
let match_stmt = this.statement.toRegex(language, args);
// need to group if optional and ungrouped
if (this.optional) {
match_stmt = generator_helper_1.groupIfRequired(match_stmt) + "?";
@ -290,13 +290,18 @@ class MatchSubStatementCST extends H2RCST {
}
return errors;
}
toRegex(language) {
toRegex(language, args) {
const matches = [];
for (const value of this.values) {
switch (value.type) {
case MatchSubStatementType.SingleString: {
const reg = utilities_1.regexEscape(utilities_1.removeQuotes(value.from));
matches.push(this.invert ? `(?!${reg})` : reg);
if (utilities_1.isSingleRegexCharacter(reg)) {
matches.push(this.invert ? `[^${reg}]` : reg);
}
else {
matches.push(this.invert ? `(?!${reg})` : reg);
}
break;
}
case MatchSubStatementType.Between: {
@ -359,7 +364,13 @@ class MatchSubStatementCST extends H2RCST {
break;
}
}
let ret = generator_helper_1.minimizeMatchString(matches);
let ret = "";
if (args !== null && args.has_neighbours === true) {
ret = generator_helper_1.minimizeMatchString(matches, true);
}
else {
ret = generator_helper_1.minimizeMatchString(matches);
}
if (this.count) {
if (matches.length === 1) {
// we don't group if there's only 1 element
@ -510,7 +521,13 @@ class MatchStatementCST extends StatementCST {
return errors;
}
toRegex(language) {
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
let final_matches = "";
if (this.matches.length === 1) {
final_matches = this.matches[0].toRegex(language, null);
}
else {
final_matches = this.matches.map((x) => x.toRegex(language, { "has_neighbours": true })).join("");
}
if (this.completely_optional) {
final_matches = generator_helper_1.groupIfRequired(final_matches) + "?";
}
@ -549,7 +566,7 @@ class RepeatStatementCST extends StatementCST {
return errors;
}
toRegex(language) {
let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language, null)).join(""));
if (this.count) {
str += this.count.toRegex(language);
// group for optionality because count would be incorrect otherwise
@ -615,7 +632,7 @@ class GroupStatementCST extends StatementCST {
}
str += `<${this.name}>`;
}
str += this.statements.map((x) => x.toRegex(language)).join("");
str += this.statements.map((x) => x.toRegex(language, null)).join("");
str += (this.optional ? ")?" : ")");
return str;
}
@ -719,10 +736,10 @@ class IfPatternStatementCST extends StatementCST {
return errors;
}
toRegex(language) {
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
const if_stmt = this.matches.map((x) => x.toRegex(language, null)).join("");
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
if (this.false_statements.length > 0) {
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
}
else {
@ -773,9 +790,9 @@ class IfIdentStatementCST extends StatementCST {
if (language === RegexDialect.Boost) {
if_stmt = "<" + if_stmt + ">";
}
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
if (this.false_statements.length > 0) {
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
}
else {
@ -812,7 +829,7 @@ class RegularExpressionCST extends H2RCST {
}
toRegex(language) {
const modifiers = this.usings.toRegex(language);
const regex = this.statements.map((x) => x.toRegex(language)).join("");
const regex = this.statements.map((x) => x.toRegex(language, null)).join("");
return modifiers.replace("{regex}", regex);
}
}

View File

@ -5,7 +5,7 @@
* @param arr the array of matches
* @internal
*/
export declare function minimizeMatchString(arr: string[]): string;
export declare function minimizeMatchString(arr: string[], has_neighbours?: boolean): string;
/**
* Groups a regex fragment if it needs to be grouped
*

View File

@ -13,12 +13,12 @@ const utilities_1 = require("./utilities");
* @param arr the array of matches
* @internal
*/
function minimizeMatchString(arr) {
function minimizeMatchString(arr, has_neighbours = false) {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return utilities_1.first(arr);
}
return minMatchString(arr, 0);
return minMatchString(arr, has_neighbours ? 1 : 0);
}
exports.minimizeMatchString = minimizeMatchString;
/**
@ -107,6 +107,10 @@ function groupIfRequired(fragment) {
if (utilities_1.isSingleRegexCharacter(fragment)) {
return fragment;
}
else if ((fragment[fragment.length - 1] === "*" || fragment[fragment.length - 1] === "+") &&
utilities_1.isSingleRegexCharacter(fragment.substring(0, fragment.length - 1))) {
return fragment;
}
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
let bracket_count = 0;
for (let i = 1; i < fragment.length - 2; i++) {

2
package-lock.json generated
View File

@ -1,6 +1,6 @@
{
"name": "human2regex",
"version": "1.1.0",
"version": "1.1.1",
"lockfileVersion": 1,
"requires": true,
"dependencies": {

View File

@ -1,6 +1,6 @@
{
"name": "human2regex",
"version": "1.1.0",
"version": "1.1.1",
"description": "Humanized Regular Expressions",
"main": "./lib/index.js",
"typings": "./lib/index.d.ts",

View File

@ -100,6 +100,11 @@ export class GeneratorContext {
}
}
/**
* Argument type: Just a plain object
*/
type GeneratorArguments = { [key: string]: string | boolean | number };
interface Generates {
/**
* Validate that this is both valid and can be generated in the specified language
@ -119,10 +124,11 @@ interface Generates {
* @remarks There is no guarantee toRegex will work unless validate returns no errors
*
* @param language the regex dialect we're generating
* @param args any additional arguments we may have
* @returns a regular expression fragment
* @public
*/
toRegex(language: RegexDialect): string;
toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
}
/**
@ -142,7 +148,7 @@ export abstract class H2RCST implements Generates {
}
public abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
public abstract toRegex(language: RegexDialect): string;
public abstract toRegex(language: RegexDialect, args: GeneratorArguments | null): string;
/**
* Creates an ISemanticError with a given message and the tokens provided from the constructor
@ -247,8 +253,8 @@ export class MatchStatementValue implements Generates {
return this.statement.validate(language, context);
}
public toRegex(language: RegexDialect): string {
let match_stmt = this.statement.toRegex(language);
public toRegex(language: RegexDialect, args: GeneratorArguments | null): string {
let match_stmt = this.statement.toRegex(language, args);
// need to group if optional and ungrouped
if (this.optional) {
@ -341,14 +347,20 @@ export class MatchSubStatementCST extends H2RCST {
return errors;
}
public toRegex(language: RegexDialect): string {
public toRegex(language: RegexDialect, args: GeneratorArguments | null): string {
const matches: string[] = [];
for (const value of this.values) {
switch (value.type) {
case MatchSubStatementType.SingleString: {
const reg = regexEscape(removeQuotes(value.from as string));
matches.push(this.invert ? `(?!${reg})` : reg);
if (isSingleRegexCharacter(reg)) {
matches.push(this.invert ? `[^${reg}]` : reg);
}
else {
matches.push(this.invert ? `(?!${reg})` : reg);
}
break;
}
case MatchSubStatementType.Between: {
@ -412,7 +424,13 @@ export class MatchSubStatementCST extends H2RCST {
}
}
let ret = minimizeMatchString(matches);
let ret = "";
if (args !== null && args.has_neighbours === true) {
ret = minimizeMatchString(matches, true);
}
else {
ret = minimizeMatchString(matches);
}
if (this.count) {
if (matches.length === 1) {
@ -583,12 +601,18 @@ export class MatchStatementCST extends StatementCST {
}
public toRegex(language: RegexDialect): string {
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
let final_matches = "";
if (this.matches.length === 1) {
final_matches = this.matches[0].toRegex(language, null);
}
else {
final_matches = this.matches.map((x) => x.toRegex(language, { "has_neighbours": true })).join("");
}
if (this.completely_optional) {
final_matches = groupIfRequired(final_matches) + "?";
}
return final_matches;
}
}
@ -627,7 +651,7 @@ export class RepeatStatementCST extends StatementCST {
}
public toRegex(language: RegexDialect): string {
let str = groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
let str = groupIfRequired(this.statements.map((x) => x.toRegex(language, null)).join(""));
if (this.count) {
str += this.count.toRegex(language);
@ -704,7 +728,7 @@ export class GroupStatementCST extends StatementCST {
str += `<${this.name}>`;
}
str += this.statements.map((x) => x.toRegex(language)).join("");
str += this.statements.map((x) => x.toRegex(language, null)).join("");
str += (this.optional ? ")?" : ")");
@ -825,11 +849,11 @@ export class IfPatternStatementCST extends StatementCST {
}
public toRegex(language: RegexDialect): string {
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
const if_stmt = this.matches.map((x) => x.toRegex(language, null)).join("");
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
if (this.false_statements.length > 0) {
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
}
@ -888,10 +912,10 @@ export class IfIdentStatementCST extends StatementCST {
if_stmt = "<" + if_stmt + ">";
}
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language, null)).join(""));
if (this.false_statements.length > 0) {
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language, null)).join(""));
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
}
@ -932,7 +956,7 @@ export class RegularExpressionCST extends H2RCST {
public toRegex(language: RegexDialect): string {
const modifiers = this.usings.toRegex(language);
const regex = this.statements.map((x) => x.toRegex(language)).join("");
const regex = this.statements.map((x) => x.toRegex(language, null)).join("");
return modifiers.replace("{regex}", regex);
}

View File

@ -13,13 +13,13 @@ import { first, isSingleRegexCharacter } from "./utilities";
* @param arr the array of matches
* @internal
*/
export function minimizeMatchString(arr: string[]): string {
export function minimizeMatchString(arr: string[], has_neighbours: boolean = false): string {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return first(arr);
}
return minMatchString(arr, 0);
return minMatchString(arr, has_neighbours ? 1 : 0);
}
/**
@ -120,6 +120,10 @@ export function groupIfRequired(fragment: string): string {
if (isSingleRegexCharacter(fragment)) {
return fragment;
}
else if ((fragment[fragment.length-1] === "*" || fragment[fragment.length-1] === "+") &&
isSingleRegexCharacter(fragment.substring(0, fragment.length-1))) {
return fragment;
}
if (fragment[0] === "(" && fragment[fragment.length-1] === ")") {
let bracket_count = 0;

View File

@ -43,6 +43,13 @@ describe("Generator functionality", function() {
expect(reg3.toRegex(RegexDialect.JS)).toBe("/(?:(?:hello){3,5})?/");
});
it("generates an advanced regex", function() {
const toks0 = lexer.tokenize('match "<" then a word or digit or "_" or "-" then ">"').tokens;
const reg0 = parser.parse(toks0);
expect(reg0.validate(RegexDialect.JS).length).toBe(0);
expect(reg0.toRegex(RegexDialect.JS)).toBe("/<(?:\\w+|\\d|_|\\-)>/");
});
it("validates invalid regexes", function() {
const toks0 = lexer.tokenize('match unicode "NotARealClass"').tokens;
const reg0 = parser.parse(toks0);
@ -159,6 +166,16 @@ describe("Generator functionality", function() {
const reg5 = parser.parse(toks5);
expect(reg5.validate(RegexDialect.JS).length).toBe(0);
expect(reg5.toRegex(RegexDialect.JS)).toBe("/a[bc]?/");
const toks6 = lexer.tokenize("optionally match 0+ any thing").tokens;
const reg6 = parser.parse(toks6);
expect(reg6.validate(RegexDialect.JS).length).toBe(0);
expect(reg6.toRegex(RegexDialect.JS)).toBe("/.*?/");
const toks7 = lexer.tokenize('match 0+ not ">"').tokens;
const reg7 = parser.parse(toks7);
expect(reg7.validate(RegexDialect.JS).length).toBe(0);
expect(reg7.toRegex(RegexDialect.JS)).toBe("/[^>]*/");
});
it("can generate backreferences", function() {

View File

@ -30,6 +30,7 @@ describe("Generator helper functionality", function() {
{ from: "(abc)|d", to: "(?:(abc)|d)" },
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
{ from: ".*", to: ".*" }
];
for (const c of test_cases) {