1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 20:40:08 -07:00

Improved code coverage and fixed bugs

This commit is contained in:
Patrick Demian 2020-11-21 03:05:31 -05:00
parent 34b7dac993
commit 9b9cee2276
10 changed files with 199 additions and 117 deletions

14
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

3
lib/generator.d.ts vendored
View File

@ -23,8 +23,7 @@ export interface ISemanticError {
/** /**
* Context for validation * Context for validation
* *
* Currently only used to validate groups * @remarks Currently only used to validate groups
*
* @internal * @internal
*/ */
export declare class GeneratorContext { export declare class GeneratorContext {

View File

@ -53,8 +53,7 @@ const unicode_script_codes = [
/** /**
* Context for validation * Context for validation
* *
* Currently only used to validate groups * @remarks Currently only used to validate groups
*
* @internal * @internal
*/ */
class GeneratorContext { class GeneratorContext {
@ -360,50 +359,15 @@ class MatchSubStatementCST extends H2RCST {
break; break;
} }
} }
let ret = ""; let ret = generator_helper_1.minimizeMatchString(matches);
let require_grouping = false;
let dont_clobber_plus = false;
if (matches.length === 1) {
ret = utilities_1.first(matches);
if (ret.endsWith("+")) {
dont_clobber_plus = true;
}
}
else {
ret = generator_helper_1.minimizeMatchString(matches);
if (ret.length > 1 &&
(!ret.startsWith("(") || !ret.startsWith("["))) {
require_grouping = true;
}
}
if (this.count) { if (this.count) {
if (dont_clobber_plus) { if (matches.length === 1) {
const clobber = this.count.toRegex(language); // we don't group if there's only 1 element
// + can be ignored as well as a count as long as that count is > 0 // but we need to make sure we don't add an additional + or *
switch (clobber) { ret = generator_helper_1.dontClobberRepetition(ret, this.count.toRegex(language));
case "*":
case "?":
ret = "(?:" + ret + ")" + clobber;
break;
case "+":
// ignore
break;
default:
if (clobber.startsWith("{0")) {
ret = "(?:" + ret + ")" + clobber;
} }
else { else {
// remove + and replace with count ret = generator_helper_1.groupIfRequired(ret) + this.count.toRegex(language);
ret.substring(0, ret.length - 1) + clobber;
}
break;
}
}
else {
if (require_grouping) {
ret = "(?:" + ret + ")";
}
ret += this.count.toRegex(language);
} }
} }
return ret; return ret;

View File

@ -14,3 +14,10 @@ export declare function minimizeMatchString(arr: string[]): string;
* @internal * @internal
*/ */
export declare function groupIfRequired(fragment: string): string; export declare function groupIfRequired(fragment: string): string;
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
export declare function dontClobberRepetition(fragment: string, repetition: string): string;

View File

@ -1,7 +1,7 @@
"use strict"; "use strict";
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true }); Object.defineProperty(exports, "__esModule", { value: true });
exports.groupIfRequired = exports.minimizeMatchString = void 0; exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0;
/** /**
* Includes helper functions for the Generator * Includes helper functions for the Generator
* @packageDocumentation * @packageDocumentation
@ -14,6 +14,10 @@ const utilities_1 = require("./utilities");
* @internal * @internal
*/ */
function minimizeMatchString(arr) { function minimizeMatchString(arr) {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return utilities_1.first(arr);
}
return minMatchString(arr, 0); return minMatchString(arr, 0);
} }
exports.minimizeMatchString = minimizeMatchString; exports.minimizeMatchString = minimizeMatchString;
@ -114,6 +118,9 @@ function groupIfRequired(fragment) {
} }
else if (fragment[i] === ")") { else if (fragment[i] === ")") {
bracket_count--; bracket_count--;
if (bracket_count === -1) {
break;
}
} }
} }
return bracket_count === 0 ? fragment : "(?:" + fragment + ")"; return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
@ -124,11 +131,15 @@ function groupIfRequired(fragment) {
if (fragment[i] === "\\") { if (fragment[i] === "\\") {
i++; i++;
} }
else if (fragment[i] === "[") { //you'll never have a raw [ inside a []
bracket_count++; //else if (fragment[i] === "[") {
} // bracket_count++;
//}
else if (fragment[i] === "]") { else if (fragment[i] === "]") {
bracket_count--; bracket_count--;
if (bracket_count === -1) {
break;
}
} }
} }
return bracket_count === 0 ? fragment : "(?:" + fragment + ")"; return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
@ -138,3 +149,55 @@ function groupIfRequired(fragment) {
} }
} }
exports.groupIfRequired = groupIfRequired; exports.groupIfRequired = groupIfRequired;
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
function dontClobberRepetition(fragment, repetition) {
// + can be ignored as well as a count as long as that count is > 0
if (fragment.endsWith("+")) {
switch (repetition) {
case "*":
// ignore: + is greater than *
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
case "+":
// ignore: already +
break;
default:
if (repetition.startsWith("{0")) {
fragment = "(?:" + fragment + ")" + repetition;
}
else {
// remove + and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
}
break;
}
}
else if (fragment.endsWith("*")) {
switch (repetition) {
case "*":
// ignore: already +
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
default:
// remove * and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
break;
}
}
else {
fragment += repetition;
}
return fragment;
}
exports.dontClobberRepetition = dontClobberRepetition;

View File

@ -7,7 +7,7 @@
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities"; import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
import { IToken } from "chevrotain"; import { IToken } from "chevrotain";
import { minimizeMatchString, groupIfRequired } from "./generator_helper"; import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "./generator_helper";
/** /**
* List of regular expression dialects we support * List of regular expression dialects we support
@ -66,8 +66,7 @@ const unicode_script_codes = [
/** /**
* Context for validation * Context for validation
* *
* Currently only used to validate groups * @remarks Currently only used to validate groups
*
* @internal * @internal
*/ */
export class GeneratorContext { export class GeneratorContext {
@ -126,7 +125,6 @@ interface Generates {
toRegex(language: RegexDialect): string; toRegex(language: RegexDialect): string;
} }
/** /**
* The base concrete syntax tree class * The base concrete syntax tree class
* *
@ -414,56 +412,16 @@ export class MatchSubStatementCST extends H2RCST {
} }
} }
let ret = ""; let ret = minimizeMatchString(matches);
let require_grouping = false;
let dont_clobber_plus = false;
if (matches.length === 1) {
ret = first(matches);
if (ret.endsWith("+")) {
dont_clobber_plus = true;
}
}
else {
ret = minimizeMatchString(matches);
if (ret.length > 1 &&
(!ret.startsWith("(") || !ret.startsWith("["))) {
require_grouping = true;
}
}
if (this.count) { if (this.count) {
if (dont_clobber_plus) { if (matches.length === 1) {
const clobber = this.count.toRegex(language); // we don't group if there's only 1 element
// but we need to make sure we don't add an additional + or *
// + can be ignored as well as a count as long as that count is > 0 ret = dontClobberRepetition(ret, this.count.toRegex(language));
switch (clobber) {
case "*":
case "?":
ret = "(?:" + ret + ")" + clobber;
break;
case "+":
// ignore
break;
default:
if (clobber.startsWith("{0")) {
ret = "(?:" + ret + ")" + clobber;
} }
else { else {
// remove + and replace with count ret = groupIfRequired(ret) + this.count.toRegex(language);
ret.substring(0, ret.length - 1) + clobber;
}
break;
}
}
else {
if (require_grouping) {
ret = "(?:" + ret + ")";
}
ret += this.count.toRegex(language);
} }
} }
@ -881,7 +839,6 @@ export class IfPatternStatementCST extends StatementCST {
} }
} }
/** /**
* Concrete Syntax Tree for an If group Ident statement * Concrete Syntax Tree for an If group Ident statement
* *

View File

@ -14,6 +14,11 @@ import { first, isSingleRegexCharacter } from "./utilities";
* @internal * @internal
*/ */
export function minimizeMatchString(arr: string[]): string { export function minimizeMatchString(arr: string[]): string {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return first(arr);
}
return minMatchString(arr, 0); return minMatchString(arr, 0);
} }
@ -128,6 +133,10 @@ export function groupIfRequired(fragment: string): string {
} }
else if (fragment[i] === ")") { else if (fragment[i] === ")") {
bracket_count--; bracket_count--;
if (bracket_count === -1) {
break;
}
} }
} }
@ -140,11 +149,16 @@ export function groupIfRequired(fragment: string): string {
if (fragment[i] === "\\") { if (fragment[i] === "\\") {
i++; i++;
} }
else if (fragment[i] === "[") { //you'll never have a raw [ inside a []
bracket_count++; //else if (fragment[i] === "[") {
} // bracket_count++;
//}
else if (fragment[i] === "]") { else if (fragment[i] === "]") {
bracket_count--; bracket_count--;
if (bracket_count === -1) {
break;
}
} }
} }
@ -154,3 +168,57 @@ export function groupIfRequired(fragment: string): string {
return "(?:" + fragment + ")"; return "(?:" + fragment + ")";
} }
} }
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
export function dontClobberRepetition(fragment: string, repetition: string): string {
// + can be ignored as well as a count as long as that count is > 0
if (fragment.endsWith("+")) {
switch (repetition) {
case "*":
// ignore: + is greater than *
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
case "+":
// ignore: already +
break;
default:
if (repetition.startsWith("{0")) {
fragment = "(?:" + fragment + ")" + repetition;
}
else {
// remove + and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
}
break;
}
}
else if (fragment.endsWith("*")) {
switch (repetition) {
case "*":
// ignore: already +
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
default:
// remove * and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
break;
}
}
else {
fragment += repetition;
}
return fragment;
}

View File

@ -222,7 +222,7 @@ describe("Generator functionality", function() {
it("runs complex scripts", function() { it("runs complex scripts", function() {
const str = ` const str = `
using global and multiline and exact matching using global and multiline and exact matching and case insensitive matching
create an optional group called protocol create an optional group called protocol
match "http" match "http"
optionally match "s" optionally match "s"
@ -257,6 +257,6 @@ create an optional group
const toks = lexer.tokenize(str).tokens; const toks = lexer.tokenize(str).tokens;
const reg = parser.parse(toks); const reg = parser.parse(toks);
expect(reg.validate(RegexDialect.JS).length).toBe(0); expect(reg.validate(RegexDialect.JS).length).toBe(0);
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gm"); expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gmi");
}); });
}); });

View File

@ -1,11 +1,13 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { minimizeMatchString, groupIfRequired } from "../src/generator_helper"; import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "../src/generator_helper";
describe("Generator helper functionality", function() { describe("Generator helper functionality", function() {
it("can minimize matches", function() { it("can minimize matches", function() {
const test_cases = [ const test_cases = [
{ from: [], to: "" },
{ from: [ "abc" ], to: "abc" },
{ from: [ "abc", "abc" ], to: "abc" }, { from: [ "abc", "abc" ], to: "abc" },
{ from: [ "a", "ab" ], to: "ab?" }, { from: [ "a", "ab" ], to: "ab?" },
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" }, { from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
@ -24,9 +26,11 @@ describe("Generator helper functionality", function() {
it("groups correctly", function() { it("groups correctly", function() {
const test_cases = [ const test_cases = [
{ from: "(?P=test)", to: "(?P=test)" }, { from: "(?P=test)", to: "(?P=test)" },
{ from: "[abc]", to: "[abc]" }, { from: "[abc\\]]", to: "[abc\\]]" },
{ from: "abc", to: "(?:abc)" }, { from: "abc", to: "(?:abc)" },
{ from: "(abc)|d", to: "(?:(abc)|d)" } { from: "(abc)|d", to: "(?:(abc)|d)" },
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
]; ];
for (const c of test_cases) { for (const c of test_cases) {
@ -35,4 +39,25 @@ describe("Generator helper functionality", function() {
expect(got).toBe(c.to); expect(got).toBe(c.to);
} }
}); });
it("doesn't clobber the repetition", function() {
const test_cases = [
{ fragment: "1+", repetition: "+", expected: "1+" },
{ fragment: "1*", repetition: "+", expected: "1+" },
{ fragment: "1+", repetition: "*", expected: "1+" },
{ fragment: "1*", repetition: "*", expected: "1*" },
{ fragment: "1+", repetition: "?", expected: "1+?" },
{ fragment: "1*", repetition: "?", expected: "1*?" },
{ fragment: "1+", repetition: "{0,}", expected: "(?:1+){0,}" },
{ fragment: "1*", repetition: "{0,}", expected: "1{0,}" },
{ fragment: "1+", repetition: "{1,2}", expected: "1{1,2}" },
{ fragment: "1*", repetition: "{1,2}", expected: "1{1,2}" },
];
for (const c of test_cases) {
const got = dontClobberRepetition(c.fragment, c.repetition);
expect(got).toBe(c.expected);
}
});
}); });

View File

@ -95,8 +95,7 @@ module.exports = {
after: { after: {
root: "./lib", root: "./lib",
include: [ include: [
"script.d.ts", "script.d.ts"
"script.d.ts.map"
] ]
} }
}) })