mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 04:20:35 -07:00
141 lines
4.9 KiB
JavaScript
141 lines
4.9 KiB
JavaScript
"use strict";
|
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.groupIfRequired = exports.minimizeMatchString = void 0;
|
|
/**
|
|
* Includes helper functions for the Generator
|
|
* @packageDocumentation
|
|
*/
|
|
const utilities_1 = require("./utilities");
|
|
/**
|
|
* Minimizes the match string by finding duplicates or substrings in the array
|
|
*
|
|
* @param arr the array of matches
|
|
* @internal
|
|
*/
|
|
function minimizeMatchString(arr) {
|
|
return minMatchString(arr, 0);
|
|
}
|
|
exports.minimizeMatchString = minimizeMatchString;
|
|
/**
|
|
* Minimizes the match string by finding duplicates or substrings in the array
|
|
*
|
|
* @param arr the array
|
|
* @param depth must be 0 for initial call
|
|
* @returns an optimized string
|
|
* @internal
|
|
*/
|
|
function minMatchString(arr, depth = 0) {
|
|
// base case: arr is empty
|
|
if (arr.length === 0) {
|
|
return "";
|
|
}
|
|
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
|
if (arr.length === 1) {
|
|
return utilities_1.first(arr) + "?";
|
|
}
|
|
// remove duplicates
|
|
arr = [...new Set(arr)];
|
|
// base case: arr has 1 element (after duplicate removal means this is required)
|
|
if (arr.length === 1) {
|
|
return utilities_1.first(arr);
|
|
}
|
|
// base case: arr is all single letters
|
|
if (arr.every(utilities_1.isSingleRegexCharacter)) {
|
|
return "[" + arr.join("") + "]";
|
|
}
|
|
// now the real magic begins
|
|
// You are not expected to understand this
|
|
let longest_begin_substring = utilities_1.first(arr);
|
|
let longest_end_substring = utilities_1.first(arr);
|
|
for (let i = 1; i < arr.length; i++) {
|
|
// reduce longest_substring to match everything
|
|
for (let j = 0; j < longest_begin_substring.length; j++) {
|
|
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
|
longest_begin_substring = longest_begin_substring.substr(0, j);
|
|
break;
|
|
}
|
|
}
|
|
for (let j = 0; j < longest_end_substring.length; j++) {
|
|
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
|
|
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
|
|
break;
|
|
}
|
|
}
|
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
break;
|
|
}
|
|
}
|
|
// No matches whatsoever
|
|
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
|
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
if (depth > 0) {
|
|
return "(?:" + arr.join("|") + ")";
|
|
}
|
|
else {
|
|
return arr.join("|");
|
|
}
|
|
}
|
|
// we have some matches
|
|
else {
|
|
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
|
const begin_pos = longest_begin_substring.length;
|
|
const end_pos = longest_end_substring.length;
|
|
const similar_matches = [];
|
|
for (const ele of arr) {
|
|
const match = ele.substring(begin_pos, ele.length - end_pos);
|
|
if (match.length !== 0) {
|
|
similar_matches.push(match);
|
|
}
|
|
}
|
|
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
|
}
|
|
}
|
|
/**
|
|
* Groups a regex fragment if it needs to be grouped
|
|
*
|
|
* @param fragment fragment of regular expression to potentially group
|
|
* @returns a non-capturing group if there needs to be one
|
|
* @internal
|
|
*/
|
|
function groupIfRequired(fragment) {
|
|
if (utilities_1.isSingleRegexCharacter(fragment)) {
|
|
return fragment;
|
|
}
|
|
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
|
|
let bracket_count = 0;
|
|
for (let i = 1; i < fragment.length - 2; i++) {
|
|
if (fragment[i] === "\\") {
|
|
i++;
|
|
}
|
|
else if (fragment[i] === "(") {
|
|
bracket_count++;
|
|
}
|
|
else if (fragment[i] === ")") {
|
|
bracket_count--;
|
|
}
|
|
}
|
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
|
}
|
|
else if (fragment[0] === "[" && fragment[fragment.length - 1] === "]") {
|
|
let bracket_count = 0;
|
|
for (let i = 1; i < fragment.length - 2; i++) {
|
|
if (fragment[i] === "\\") {
|
|
i++;
|
|
}
|
|
else if (fragment[i] === "[") {
|
|
bracket_count++;
|
|
}
|
|
else if (fragment[i] === "]") {
|
|
bracket_count--;
|
|
}
|
|
}
|
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
|
}
|
|
else {
|
|
return "(?:" + fragment + ")";
|
|
}
|
|
}
|
|
exports.groupIfRequired = groupIfRequired;
|