mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 04:20:35 -07:00
Improved code coverage and fixed bugs
This commit is contained in:
parent
34b7dac993
commit
9b9cee2276
14
docs/bundle.min.js
vendored
14
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
3
lib/generator.d.ts
vendored
3
lib/generator.d.ts
vendored
@ -23,8 +23,7 @@ export interface ISemanticError {
|
||||
/**
|
||||
* Context for validation
|
||||
*
|
||||
* Currently only used to validate groups
|
||||
*
|
||||
* @remarks Currently only used to validate groups
|
||||
* @internal
|
||||
*/
|
||||
export declare class GeneratorContext {
|
||||
|
@ -53,8 +53,7 @@ const unicode_script_codes = [
|
||||
/**
|
||||
* Context for validation
|
||||
*
|
||||
* Currently only used to validate groups
|
||||
*
|
||||
* @remarks Currently only used to validate groups
|
||||
* @internal
|
||||
*/
|
||||
class GeneratorContext {
|
||||
@ -360,50 +359,15 @@ class MatchSubStatementCST extends H2RCST {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let ret = "";
|
||||
let require_grouping = false;
|
||||
let dont_clobber_plus = false;
|
||||
if (matches.length === 1) {
|
||||
ret = utilities_1.first(matches);
|
||||
if (ret.endsWith("+")) {
|
||||
dont_clobber_plus = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = generator_helper_1.minimizeMatchString(matches);
|
||||
if (ret.length > 1 &&
|
||||
(!ret.startsWith("(") || !ret.startsWith("["))) {
|
||||
require_grouping = true;
|
||||
}
|
||||
}
|
||||
let ret = generator_helper_1.minimizeMatchString(matches);
|
||||
if (this.count) {
|
||||
if (dont_clobber_plus) {
|
||||
const clobber = this.count.toRegex(language);
|
||||
// + can be ignored as well as a count as long as that count is > 0
|
||||
switch (clobber) {
|
||||
case "*":
|
||||
case "?":
|
||||
ret = "(?:" + ret + ")" + clobber;
|
||||
break;
|
||||
case "+":
|
||||
// ignore
|
||||
break;
|
||||
default:
|
||||
if (clobber.startsWith("{0")) {
|
||||
ret = "(?:" + ret + ")" + clobber;
|
||||
}
|
||||
else {
|
||||
// remove + and replace with count
|
||||
ret.substring(0, ret.length - 1) + clobber;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (matches.length === 1) {
|
||||
// we don't group if there's only 1 element
|
||||
// but we need to make sure we don't add an additional + or *
|
||||
ret = generator_helper_1.dontClobberRepetition(ret, this.count.toRegex(language));
|
||||
}
|
||||
else {
|
||||
if (require_grouping) {
|
||||
ret = "(?:" + ret + ")";
|
||||
}
|
||||
ret += this.count.toRegex(language);
|
||||
ret = generator_helper_1.groupIfRequired(ret) + this.count.toRegex(language);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
7
lib/generator_helper.d.ts
vendored
7
lib/generator_helper.d.ts
vendored
@ -14,3 +14,10 @@ export declare function minimizeMatchString(arr: string[]): string;
|
||||
* @internal
|
||||
*/
|
||||
export declare function groupIfRequired(fragment: string): string;
|
||||
/**
|
||||
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||
*
|
||||
* @param fragment fragment of regular expression
|
||||
* @param repetition repetition that may clobber the fragment
|
||||
*/
|
||||
export declare function dontClobberRepetition(fragment: string, repetition: string): string;
|
||||
|
@ -1,7 +1,7 @@
|
||||
"use strict";
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.groupIfRequired = exports.minimizeMatchString = void 0;
|
||||
exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0;
|
||||
/**
|
||||
* Includes helper functions for the Generator
|
||||
* @packageDocumentation
|
||||
@ -14,6 +14,10 @@ const utilities_1 = require("./utilities");
|
||||
* @internal
|
||||
*/
|
||||
function minimizeMatchString(arr) {
|
||||
// don't process an array of length 1, otherwise you'll get the wrong result
|
||||
if (arr.length === 1) {
|
||||
return utilities_1.first(arr);
|
||||
}
|
||||
return minMatchString(arr, 0);
|
||||
}
|
||||
exports.minimizeMatchString = minimizeMatchString;
|
||||
@ -114,6 +118,9 @@ function groupIfRequired(fragment) {
|
||||
}
|
||||
else if (fragment[i] === ")") {
|
||||
bracket_count--;
|
||||
if (bracket_count === -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||
@ -124,11 +131,15 @@ function groupIfRequired(fragment) {
|
||||
if (fragment[i] === "\\") {
|
||||
i++;
|
||||
}
|
||||
else if (fragment[i] === "[") {
|
||||
bracket_count++;
|
||||
}
|
||||
//you'll never have a raw [ inside a []
|
||||
//else if (fragment[i] === "[") {
|
||||
// bracket_count++;
|
||||
//}
|
||||
else if (fragment[i] === "]") {
|
||||
bracket_count--;
|
||||
if (bracket_count === -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||
@ -138,3 +149,55 @@ function groupIfRequired(fragment) {
|
||||
}
|
||||
}
|
||||
exports.groupIfRequired = groupIfRequired;
|
||||
/**
|
||||
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||
*
|
||||
* @param fragment fragment of regular expression
|
||||
* @param repetition repetition that may clobber the fragment
|
||||
*/
|
||||
function dontClobberRepetition(fragment, repetition) {
|
||||
// + can be ignored as well as a count as long as that count is > 0
|
||||
if (fragment.endsWith("+")) {
|
||||
switch (repetition) {
|
||||
case "*":
|
||||
// ignore: + is greater than *
|
||||
break;
|
||||
case "?":
|
||||
// non-greedy qualifier
|
||||
fragment += repetition;
|
||||
break;
|
||||
case "+":
|
||||
// ignore: already +
|
||||
break;
|
||||
default:
|
||||
if (repetition.startsWith("{0")) {
|
||||
fragment = "(?:" + fragment + ")" + repetition;
|
||||
}
|
||||
else {
|
||||
// remove + and replace with count
|
||||
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (fragment.endsWith("*")) {
|
||||
switch (repetition) {
|
||||
case "*":
|
||||
// ignore: already +
|
||||
break;
|
||||
case "?":
|
||||
// non-greedy qualifier
|
||||
fragment += repetition;
|
||||
break;
|
||||
default:
|
||||
// remove * and replace with count
|
||||
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
fragment += repetition;
|
||||
}
|
||||
return fragment;
|
||||
}
|
||||
exports.dontClobberRepetition = dontClobberRepetition;
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
|
||||
import { IToken } from "chevrotain";
|
||||
import { minimizeMatchString, groupIfRequired } from "./generator_helper";
|
||||
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "./generator_helper";
|
||||
|
||||
/**
|
||||
* List of regular expression dialects we support
|
||||
@ -66,8 +66,7 @@ const unicode_script_codes = [
|
||||
/**
|
||||
* Context for validation
|
||||
*
|
||||
* Currently only used to validate groups
|
||||
*
|
||||
* @remarks Currently only used to validate groups
|
||||
* @internal
|
||||
*/
|
||||
export class GeneratorContext {
|
||||
@ -126,7 +125,6 @@ interface Generates {
|
||||
toRegex(language: RegexDialect): string;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The base concrete syntax tree class
|
||||
*
|
||||
@ -414,56 +412,16 @@ export class MatchSubStatementCST extends H2RCST {
|
||||
}
|
||||
}
|
||||
|
||||
let ret = "";
|
||||
|
||||
let require_grouping = false;
|
||||
let dont_clobber_plus = false;
|
||||
|
||||
if (matches.length === 1) {
|
||||
ret = first(matches);
|
||||
if (ret.endsWith("+")) {
|
||||
dont_clobber_plus = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = minimizeMatchString(matches);
|
||||
|
||||
if (ret.length > 1 &&
|
||||
(!ret.startsWith("(") || !ret.startsWith("["))) {
|
||||
require_grouping = true;
|
||||
}
|
||||
}
|
||||
let ret = minimizeMatchString(matches);
|
||||
|
||||
if (this.count) {
|
||||
if (dont_clobber_plus) {
|
||||
const clobber = this.count.toRegex(language);
|
||||
|
||||
// + can be ignored as well as a count as long as that count is > 0
|
||||
switch (clobber) {
|
||||
case "*":
|
||||
case "?":
|
||||
ret = "(?:" + ret + ")" + clobber;
|
||||
break;
|
||||
case "+":
|
||||
// ignore
|
||||
break;
|
||||
default:
|
||||
if (clobber.startsWith("{0")) {
|
||||
ret = "(?:" + ret + ")" + clobber;
|
||||
}
|
||||
else {
|
||||
// remove + and replace with count
|
||||
ret.substring(0, ret.length - 1) + clobber;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (matches.length === 1) {
|
||||
// we don't group if there's only 1 element
|
||||
// but we need to make sure we don't add an additional + or *
|
||||
ret = dontClobberRepetition(ret, this.count.toRegex(language));
|
||||
}
|
||||
else {
|
||||
if (require_grouping) {
|
||||
ret = "(?:" + ret + ")";
|
||||
}
|
||||
|
||||
ret += this.count.toRegex(language);
|
||||
ret = groupIfRequired(ret) + this.count.toRegex(language);
|
||||
}
|
||||
}
|
||||
|
||||
@ -881,7 +839,6 @@ export class IfPatternStatementCST extends StatementCST {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Concrete Syntax Tree for an If group Ident statement
|
||||
*
|
||||
|
@ -14,6 +14,11 @@ import { first, isSingleRegexCharacter } from "./utilities";
|
||||
* @internal
|
||||
*/
|
||||
export function minimizeMatchString(arr: string[]): string {
|
||||
// don't process an array of length 1, otherwise you'll get the wrong result
|
||||
if (arr.length === 1) {
|
||||
return first(arr);
|
||||
}
|
||||
|
||||
return minMatchString(arr, 0);
|
||||
}
|
||||
|
||||
@ -128,6 +133,10 @@ export function groupIfRequired(fragment: string): string {
|
||||
}
|
||||
else if (fragment[i] === ")") {
|
||||
bracket_count--;
|
||||
|
||||
if (bracket_count === -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,11 +149,16 @@ export function groupIfRequired(fragment: string): string {
|
||||
if (fragment[i] === "\\") {
|
||||
i++;
|
||||
}
|
||||
else if (fragment[i] === "[") {
|
||||
bracket_count++;
|
||||
}
|
||||
//you'll never have a raw [ inside a []
|
||||
//else if (fragment[i] === "[") {
|
||||
// bracket_count++;
|
||||
//}
|
||||
else if (fragment[i] === "]") {
|
||||
bracket_count--;
|
||||
|
||||
if (bracket_count === -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -154,3 +168,57 @@ export function groupIfRequired(fragment: string): string {
|
||||
return "(?:" + fragment + ")";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||
*
|
||||
* @param fragment fragment of regular expression
|
||||
* @param repetition repetition that may clobber the fragment
|
||||
*/
|
||||
export function dontClobberRepetition(fragment: string, repetition: string): string {
|
||||
// + can be ignored as well as a count as long as that count is > 0
|
||||
|
||||
if (fragment.endsWith("+")) {
|
||||
switch (repetition) {
|
||||
case "*":
|
||||
// ignore: + is greater than *
|
||||
break;
|
||||
case "?":
|
||||
// non-greedy qualifier
|
||||
fragment += repetition;
|
||||
break;
|
||||
case "+":
|
||||
// ignore: already +
|
||||
break;
|
||||
default:
|
||||
if (repetition.startsWith("{0")) {
|
||||
fragment = "(?:" + fragment + ")" + repetition;
|
||||
}
|
||||
else {
|
||||
// remove + and replace with count
|
||||
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (fragment.endsWith("*")) {
|
||||
switch (repetition) {
|
||||
case "*":
|
||||
// ignore: already +
|
||||
break;
|
||||
case "?":
|
||||
// non-greedy qualifier
|
||||
fragment += repetition;
|
||||
break;
|
||||
default:
|
||||
// remove * and replace with count
|
||||
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
fragment += repetition;
|
||||
}
|
||||
|
||||
return fragment;
|
||||
}
|
||||
|
@ -222,7 +222,7 @@ describe("Generator functionality", function() {
|
||||
|
||||
it("runs complex scripts", function() {
|
||||
const str = `
|
||||
using global and multiline and exact matching
|
||||
using global and multiline and exact matching and case insensitive matching
|
||||
create an optional group called protocol
|
||||
match "http"
|
||||
optionally match "s"
|
||||
@ -257,6 +257,6 @@ create an optional group
|
||||
const toks = lexer.tokenize(str).tokens;
|
||||
const reg = parser.parse(toks);
|
||||
expect(reg.validate(RegexDialect.JS).length).toBe(0);
|
||||
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gm");
|
||||
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gmi");
|
||||
});
|
||||
});
|
@ -1,11 +1,13 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
import { minimizeMatchString, groupIfRequired } from "../src/generator_helper";
|
||||
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "../src/generator_helper";
|
||||
|
||||
|
||||
describe("Generator helper functionality", function() {
|
||||
it("can minimize matches", function() {
|
||||
const test_cases = [
|
||||
{ from: [], to: "" },
|
||||
{ from: [ "abc" ], to: "abc" },
|
||||
{ from: [ "abc", "abc" ], to: "abc" },
|
||||
{ from: [ "a", "ab" ], to: "ab?" },
|
||||
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
|
||||
@ -24,9 +26,11 @@ describe("Generator helper functionality", function() {
|
||||
it("groups correctly", function() {
|
||||
const test_cases = [
|
||||
{ from: "(?P=test)", to: "(?P=test)" },
|
||||
{ from: "[abc]", to: "[abc]" },
|
||||
{ from: "[abc\\]]", to: "[abc\\]]" },
|
||||
{ from: "abc", to: "(?:abc)" },
|
||||
{ from: "(abc)|d", to: "(?:(abc)|d)" }
|
||||
{ from: "(abc)|d", to: "(?:(abc)|d)" },
|
||||
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
|
||||
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
|
||||
];
|
||||
|
||||
for (const c of test_cases) {
|
||||
@ -35,4 +39,25 @@ describe("Generator helper functionality", function() {
|
||||
expect(got).toBe(c.to);
|
||||
}
|
||||
});
|
||||
|
||||
it("doesn't clobber the repetition", function() {
|
||||
const test_cases = [
|
||||
{ fragment: "1+", repetition: "+", expected: "1+" },
|
||||
{ fragment: "1*", repetition: "+", expected: "1+" },
|
||||
{ fragment: "1+", repetition: "*", expected: "1+" },
|
||||
{ fragment: "1*", repetition: "*", expected: "1*" },
|
||||
{ fragment: "1+", repetition: "?", expected: "1+?" },
|
||||
{ fragment: "1*", repetition: "?", expected: "1*?" },
|
||||
{ fragment: "1+", repetition: "{0,}", expected: "(?:1+){0,}" },
|
||||
{ fragment: "1*", repetition: "{0,}", expected: "1{0,}" },
|
||||
{ fragment: "1+", repetition: "{1,2}", expected: "1{1,2}" },
|
||||
{ fragment: "1*", repetition: "{1,2}", expected: "1{1,2}" },
|
||||
];
|
||||
|
||||
for (const c of test_cases) {
|
||||
const got = dontClobberRepetition(c.fragment, c.repetition);
|
||||
|
||||
expect(got).toBe(c.expected);
|
||||
}
|
||||
});
|
||||
});
|
@ -95,8 +95,7 @@ module.exports = {
|
||||
after: {
|
||||
root: "./lib",
|
||||
include: [
|
||||
"script.d.ts",
|
||||
"script.d.ts.map"
|
||||
"script.d.ts"
|
||||
]
|
||||
}
|
||||
})
|
||||
|
Loading…
x
Reference in New Issue
Block a user