1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 04:20:35 -07:00

Improved code coverage and fixed bugs

This commit is contained in:
Patrick Demian 2020-11-21 03:05:31 -05:00
parent 34b7dac993
commit 9b9cee2276
10 changed files with 199 additions and 117 deletions

14
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

3
lib/generator.d.ts vendored
View File

@ -23,8 +23,7 @@ export interface ISemanticError {
/**
* Context for validation
*
* Currently only used to validate groups
*
* @remarks Currently only used to validate groups
* @internal
*/
export declare class GeneratorContext {

View File

@ -53,8 +53,7 @@ const unicode_script_codes = [
/**
* Context for validation
*
* Currently only used to validate groups
*
* @remarks Currently only used to validate groups
* @internal
*/
class GeneratorContext {
@ -360,50 +359,15 @@ class MatchSubStatementCST extends H2RCST {
break;
}
}
let ret = "";
let require_grouping = false;
let dont_clobber_plus = false;
if (matches.length === 1) {
ret = utilities_1.first(matches);
if (ret.endsWith("+")) {
dont_clobber_plus = true;
}
}
else {
ret = generator_helper_1.minimizeMatchString(matches);
if (ret.length > 1 &&
(!ret.startsWith("(") || !ret.startsWith("["))) {
require_grouping = true;
}
}
let ret = generator_helper_1.minimizeMatchString(matches);
if (this.count) {
if (dont_clobber_plus) {
const clobber = this.count.toRegex(language);
// + can be ignored as well as a count as long as that count is > 0
switch (clobber) {
case "*":
case "?":
ret = "(?:" + ret + ")" + clobber;
break;
case "+":
// ignore
break;
default:
if (clobber.startsWith("{0")) {
ret = "(?:" + ret + ")" + clobber;
}
else {
// remove + and replace with count
ret.substring(0, ret.length - 1) + clobber;
}
break;
}
if (matches.length === 1) {
// we don't group if there's only 1 element
// but we need to make sure we don't add an additional + or *
ret = generator_helper_1.dontClobberRepetition(ret, this.count.toRegex(language));
}
else {
if (require_grouping) {
ret = "(?:" + ret + ")";
}
ret += this.count.toRegex(language);
ret = generator_helper_1.groupIfRequired(ret) + this.count.toRegex(language);
}
}
return ret;

View File

@ -14,3 +14,10 @@ export declare function minimizeMatchString(arr: string[]): string;
* @internal
*/
export declare function groupIfRequired(fragment: string): string;
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
export declare function dontClobberRepetition(fragment: string, repetition: string): string;

View File

@ -1,7 +1,7 @@
"use strict";
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true });
exports.groupIfRequired = exports.minimizeMatchString = void 0;
exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0;
/**
* Includes helper functions for the Generator
* @packageDocumentation
@ -14,6 +14,10 @@ const utilities_1 = require("./utilities");
* @internal
*/
function minimizeMatchString(arr) {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return utilities_1.first(arr);
}
return minMatchString(arr, 0);
}
exports.minimizeMatchString = minimizeMatchString;
@ -114,6 +118,9 @@ function groupIfRequired(fragment) {
}
else if (fragment[i] === ")") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
@ -124,11 +131,15 @@ function groupIfRequired(fragment) {
if (fragment[i] === "\\") {
i++;
}
else if (fragment[i] === "[") {
bracket_count++;
}
//you'll never have a raw [ inside a []
//else if (fragment[i] === "[") {
// bracket_count++;
//}
else if (fragment[i] === "]") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
@ -138,3 +149,55 @@ function groupIfRequired(fragment) {
}
}
exports.groupIfRequired = groupIfRequired;
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
function dontClobberRepetition(fragment, repetition) {
// + can be ignored as well as a count as long as that count is > 0
if (fragment.endsWith("+")) {
switch (repetition) {
case "*":
// ignore: + is greater than *
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
case "+":
// ignore: already +
break;
default:
if (repetition.startsWith("{0")) {
fragment = "(?:" + fragment + ")" + repetition;
}
else {
// remove + and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
}
break;
}
}
else if (fragment.endsWith("*")) {
switch (repetition) {
case "*":
// ignore: already +
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
default:
// remove * and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
break;
}
}
else {
fragment += repetition;
}
return fragment;
}
exports.dontClobberRepetition = dontClobberRepetition;

View File

@ -7,7 +7,7 @@
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
import { IToken } from "chevrotain";
import { minimizeMatchString, groupIfRequired } from "./generator_helper";
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "./generator_helper";
/**
* List of regular expression dialects we support
@ -66,8 +66,7 @@ const unicode_script_codes = [
/**
* Context for validation
*
* Currently only used to validate groups
*
* @remarks Currently only used to validate groups
* @internal
*/
export class GeneratorContext {
@ -126,7 +125,6 @@ interface Generates {
toRegex(language: RegexDialect): string;
}
/**
* The base concrete syntax tree class
*
@ -414,56 +412,16 @@ export class MatchSubStatementCST extends H2RCST {
}
}
let ret = "";
let require_grouping = false;
let dont_clobber_plus = false;
if (matches.length === 1) {
ret = first(matches);
if (ret.endsWith("+")) {
dont_clobber_plus = true;
}
}
else {
ret = minimizeMatchString(matches);
if (ret.length > 1 &&
(!ret.startsWith("(") || !ret.startsWith("["))) {
require_grouping = true;
}
}
let ret = minimizeMatchString(matches);
if (this.count) {
if (dont_clobber_plus) {
const clobber = this.count.toRegex(language);
// + can be ignored as well as a count as long as that count is > 0
switch (clobber) {
case "*":
case "?":
ret = "(?:" + ret + ")" + clobber;
break;
case "+":
// ignore
break;
default:
if (clobber.startsWith("{0")) {
ret = "(?:" + ret + ")" + clobber;
}
else {
// remove + and replace with count
ret.substring(0, ret.length - 1) + clobber;
}
break;
}
if (matches.length === 1) {
// we don't group if there's only 1 element
// but we need to make sure we don't add an additional + or *
ret = dontClobberRepetition(ret, this.count.toRegex(language));
}
else {
if (require_grouping) {
ret = "(?:" + ret + ")";
}
ret += this.count.toRegex(language);
ret = groupIfRequired(ret) + this.count.toRegex(language);
}
}
@ -881,7 +839,6 @@ export class IfPatternStatementCST extends StatementCST {
}
}
/**
* Concrete Syntax Tree for an If group Ident statement
*

View File

@ -14,6 +14,11 @@ import { first, isSingleRegexCharacter } from "./utilities";
* @internal
*/
export function minimizeMatchString(arr: string[]): string {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return first(arr);
}
return minMatchString(arr, 0);
}
@ -128,6 +133,10 @@ export function groupIfRequired(fragment: string): string {
}
else if (fragment[i] === ")") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
@ -140,11 +149,16 @@ export function groupIfRequired(fragment: string): string {
if (fragment[i] === "\\") {
i++;
}
else if (fragment[i] === "[") {
bracket_count++;
}
//you'll never have a raw [ inside a []
//else if (fragment[i] === "[") {
// bracket_count++;
//}
else if (fragment[i] === "]") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
@ -154,3 +168,57 @@ export function groupIfRequired(fragment: string): string {
return "(?:" + fragment + ")";
}
}
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
export function dontClobberRepetition(fragment: string, repetition: string): string {
// + can be ignored as well as a count as long as that count is > 0
if (fragment.endsWith("+")) {
switch (repetition) {
case "*":
// ignore: + is greater than *
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
case "+":
// ignore: already +
break;
default:
if (repetition.startsWith("{0")) {
fragment = "(?:" + fragment + ")" + repetition;
}
else {
// remove + and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
}
break;
}
}
else if (fragment.endsWith("*")) {
switch (repetition) {
case "*":
// ignore: already +
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
default:
// remove * and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
break;
}
}
else {
fragment += repetition;
}
return fragment;
}

View File

@ -222,7 +222,7 @@ describe("Generator functionality", function() {
it("runs complex scripts", function() {
const str = `
using global and multiline and exact matching
using global and multiline and exact matching and case insensitive matching
create an optional group called protocol
match "http"
optionally match "s"
@ -257,6 +257,6 @@ create an optional group
const toks = lexer.tokenize(str).tokens;
const reg = parser.parse(toks);
expect(reg.validate(RegexDialect.JS).length).toBe(0);
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gm");
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gmi");
});
});

View File

@ -1,11 +1,13 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import { minimizeMatchString, groupIfRequired } from "../src/generator_helper";
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "../src/generator_helper";
describe("Generator helper functionality", function() {
it("can minimize matches", function() {
const test_cases = [
{ from: [], to: "" },
{ from: [ "abc" ], to: "abc" },
{ from: [ "abc", "abc" ], to: "abc" },
{ from: [ "a", "ab" ], to: "ab?" },
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
@ -24,9 +26,11 @@ describe("Generator helper functionality", function() {
it("groups correctly", function() {
const test_cases = [
{ from: "(?P=test)", to: "(?P=test)" },
{ from: "[abc]", to: "[abc]" },
{ from: "[abc\\]]", to: "[abc\\]]" },
{ from: "abc", to: "(?:abc)" },
{ from: "(abc)|d", to: "(?:(abc)|d)" }
{ from: "(abc)|d", to: "(?:(abc)|d)" },
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
];
for (const c of test_cases) {
@ -35,4 +39,25 @@ describe("Generator helper functionality", function() {
expect(got).toBe(c.to);
}
});
it("doesn't clobber the repetition", function() {
const test_cases = [
{ fragment: "1+", repetition: "+", expected: "1+" },
{ fragment: "1*", repetition: "+", expected: "1+" },
{ fragment: "1+", repetition: "*", expected: "1+" },
{ fragment: "1*", repetition: "*", expected: "1*" },
{ fragment: "1+", repetition: "?", expected: "1+?" },
{ fragment: "1*", repetition: "?", expected: "1*?" },
{ fragment: "1+", repetition: "{0,}", expected: "(?:1+){0,}" },
{ fragment: "1*", repetition: "{0,}", expected: "1{0,}" },
{ fragment: "1+", repetition: "{1,2}", expected: "1{1,2}" },
{ fragment: "1*", repetition: "{1,2}", expected: "1{1,2}" },
];
for (const c of test_cases) {
const got = dontClobberRepetition(c.fragment, c.repetition);
expect(got).toBe(c.expected);
}
});
});

View File

@ -95,8 +95,7 @@ module.exports = {
after: {
root: "./lib",
include: [
"script.d.ts",
"script.d.ts.map"
"script.d.ts"
]
}
})