mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-15 20:10:19 -07:00
209 lines
7.4 KiB
JavaScript
209 lines
7.4 KiB
JavaScript
"use strict";
|
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0;
|
|
/**
|
|
* Includes helper functions for the Generator
|
|
* @packageDocumentation
|
|
*/
|
|
const utilities_1 = require("./utilities");
|
|
/**
|
|
* Minimizes the match string by finding duplicates or substrings in the array
|
|
*
|
|
* @param arr the array of matches
|
|
* @internal
|
|
*/
|
|
function minimizeMatchString(arr, has_neighbours = false) {
|
|
// don't process an array of length 1, otherwise you'll get the wrong result
|
|
if (arr.length === 1) {
|
|
return utilities_1.first(arr);
|
|
}
|
|
return minMatchString(arr, has_neighbours ? 1 : 0);
|
|
}
|
|
exports.minimizeMatchString = minimizeMatchString;
|
|
/**
|
|
* Minimizes the match string by finding duplicates or substrings in the array
|
|
*
|
|
* @param arr the array
|
|
* @param depth must be 0 for initial call
|
|
* @returns an optimized string
|
|
* @internal
|
|
*/
|
|
function minMatchString(arr, depth = 0) {
|
|
// base case: arr is empty
|
|
if (arr.length === 0) {
|
|
return "";
|
|
}
|
|
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
|
if (arr.length === 1) {
|
|
return utilities_1.first(arr) + "?";
|
|
}
|
|
// remove duplicates
|
|
arr = [...new Set(arr)];
|
|
// base case: arr has 1 element (after duplicate removal means this is required)
|
|
if (arr.length === 1) {
|
|
return utilities_1.first(arr);
|
|
}
|
|
// base case: arr is all single letters or ranges
|
|
if (arr.every((value) => utilities_1.isSingleRegexCharacter(value) || utilities_1.isRangeRegex(value))) {
|
|
// if range, don't forget to remove '[' and ']'
|
|
return "[" + arr.map((x) => utilities_1.isSingleRegexCharacter(x) ? x : x.substring(1, x.length - 1)).join("") + "]";
|
|
}
|
|
// now the real magic begins
|
|
// You are not expected to understand this
|
|
let longest_begin_substring = utilities_1.first(arr);
|
|
let longest_end_substring = utilities_1.first(arr);
|
|
for (let i = 1; i < arr.length; i++) {
|
|
// reduce longest_substring to match everything
|
|
for (let j = 0; j < longest_begin_substring.length; j++) {
|
|
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
|
longest_begin_substring = longest_begin_substring.substr(0, j);
|
|
break;
|
|
}
|
|
}
|
|
for (let j = 0; j < longest_end_substring.length; j++) {
|
|
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
|
|
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
|
|
break;
|
|
}
|
|
}
|
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
break;
|
|
}
|
|
}
|
|
// No matches whatsoever
|
|
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
|
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
if (depth > 0) {
|
|
return "(?:" + arr.join("|") + ")";
|
|
}
|
|
else {
|
|
return arr.join("|");
|
|
}
|
|
}
|
|
// we have some matches
|
|
else {
|
|
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
|
const begin_pos = longest_begin_substring.length;
|
|
const end_pos = longest_end_substring.length;
|
|
const similar_matches = [];
|
|
for (const ele of arr) {
|
|
const match = ele.substring(begin_pos, ele.length - end_pos);
|
|
if (match.length !== 0) {
|
|
similar_matches.push(match);
|
|
}
|
|
}
|
|
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
|
}
|
|
}
|
|
/**
|
|
* Groups a regex fragment if it needs to be grouped
|
|
*
|
|
* @param fragment fragment of regular expression to potentially group
|
|
* @returns a non-capturing group if there needs to be one
|
|
* @internal
|
|
*/
|
|
function groupIfRequired(fragment) {
|
|
if (utilities_1.isSingleRegexCharacter(fragment)) {
|
|
return fragment;
|
|
}
|
|
else if ((fragment[fragment.length - 1] === "*" || fragment[fragment.length - 1] === "+") &&
|
|
utilities_1.isSingleRegexCharacter(fragment.substring(0, fragment.length - 1))) {
|
|
return fragment;
|
|
}
|
|
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
|
|
let bracket_count = 0;
|
|
for (let i = 1; i < fragment.length - 2; i++) {
|
|
if (fragment[i] === "\\") {
|
|
i++;
|
|
}
|
|
else if (fragment[i] === "(") {
|
|
bracket_count++;
|
|
}
|
|
else if (fragment[i] === ")") {
|
|
bracket_count--;
|
|
if (bracket_count === -1) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
|
}
|
|
else if (fragment[0] === "[" && fragment[fragment.length - 1] === "]") {
|
|
let bracket_count = 0;
|
|
for (let i = 1; i < fragment.length - 2; i++) {
|
|
if (fragment[i] === "\\") {
|
|
i++;
|
|
}
|
|
//you'll never have a raw [ inside a []
|
|
//else if (fragment[i] === "[") {
|
|
// bracket_count++;
|
|
//}
|
|
else if (fragment[i] === "]") {
|
|
bracket_count--;
|
|
if (bracket_count === -1) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
|
}
|
|
else {
|
|
return "(?:" + fragment + ")";
|
|
}
|
|
}
|
|
exports.groupIfRequired = groupIfRequired;
|
|
/**
|
|
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
|
*
|
|
* @param fragment fragment of regular expression
|
|
* @param repetition repetition that may clobber the fragment
|
|
*/
|
|
function dontClobberRepetition(fragment, repetition) {
|
|
// + can be ignored as well as a count as long as that count is > 0
|
|
if (fragment.endsWith("+")) {
|
|
switch (repetition) {
|
|
case "*":
|
|
// ignore: + is greater than *
|
|
break;
|
|
case "?":
|
|
// non-greedy qualifier
|
|
fragment += repetition;
|
|
break;
|
|
case "+":
|
|
// ignore: already +
|
|
break;
|
|
default:
|
|
if (repetition.startsWith("{0")) {
|
|
fragment = "(?:" + fragment + ")" + repetition;
|
|
}
|
|
else {
|
|
// remove + and replace with count
|
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
else if (fragment.endsWith("*")) {
|
|
switch (repetition) {
|
|
case "*":
|
|
// ignore: already +
|
|
break;
|
|
case "?":
|
|
// non-greedy qualifier
|
|
fragment += repetition;
|
|
break;
|
|
default:
|
|
// remove * and replace with count
|
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
fragment += repetition;
|
|
}
|
|
return fragment;
|
|
}
|
|
exports.dontClobberRepetition = dontClobberRepetition;
|