1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 04:20:35 -07:00
human2regex/lib/generator_helper.js
2020-11-21 01:42:43 -05:00

141 lines
4.9 KiB
JavaScript

"use strict";
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true });
exports.groupIfRequired = exports.minimizeMatchString = void 0;
/**
* Includes helper functions for the Generator
* @packageDocumentation
*/
const utilities_1 = require("./utilities");
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array of matches
* @internal
*/
function minimizeMatchString(arr) {
return minMatchString(arr, 0);
}
exports.minimizeMatchString = minimizeMatchString;
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array
* @param depth must be 0 for initial call
* @returns an optimized string
* @internal
*/
function minMatchString(arr, depth = 0) {
// base case: arr is empty
if (arr.length === 0) {
return "";
}
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
if (arr.length === 1) {
return utilities_1.first(arr) + "?";
}
// remove duplicates
arr = [...new Set(arr)];
// base case: arr has 1 element (after duplicate removal means this is required)
if (arr.length === 1) {
return utilities_1.first(arr);
}
// base case: arr is all single letters
if (arr.every(utilities_1.isSingleRegexCharacter)) {
return "[" + arr.join("") + "]";
}
// now the real magic begins
// You are not expected to understand this
let longest_begin_substring = utilities_1.first(arr);
let longest_end_substring = utilities_1.first(arr);
for (let i = 1; i < arr.length; i++) {
// reduce longest_substring to match everything
for (let j = 0; j < longest_begin_substring.length; j++) {
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
longest_begin_substring = longest_begin_substring.substr(0, j);
break;
}
}
for (let j = 0; j < longest_end_substring.length; j++) {
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
break;
}
}
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
break;
}
}
// No matches whatsoever
// *technically* we can optimize further, but that is a VERY non-trivial problem
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
if (depth > 0) {
return "(?:" + arr.join("|") + ")";
}
else {
return arr.join("|");
}
}
// we have some matches
else {
// remove begin (if exists) and end (if exists) from each element and remove empty strings
const begin_pos = longest_begin_substring.length;
const end_pos = longest_end_substring.length;
const similar_matches = [];
for (const ele of arr) {
const match = ele.substring(begin_pos, ele.length - end_pos);
if (match.length !== 0) {
similar_matches.push(match);
}
}
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
}
}
/**
* Groups a regex fragment if it needs to be grouped
*
* @param fragment fragment of regular expression to potentially group
* @returns a non-capturing group if there needs to be one
* @internal
*/
function groupIfRequired(fragment) {
if (utilities_1.isSingleRegexCharacter(fragment)) {
return fragment;
}
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
let bracket_count = 0;
for (let i = 1; i < fragment.length - 2; i++) {
if (fragment[i] === "\\") {
i++;
}
else if (fragment[i] === "(") {
bracket_count++;
}
else if (fragment[i] === ")") {
bracket_count--;
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
}
else if (fragment[0] === "[" && fragment[fragment.length - 1] === "]") {
let bracket_count = 0;
for (let i = 1; i < fragment.length - 2; i++) {
if (fragment[i] === "\\") {
i++;
}
else if (fragment[i] === "[") {
bracket_count++;
}
else if (fragment[i] === "]") {
bracket_count--;
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
}
else {
return "(?:" + fragment + ")";
}
}
exports.groupIfRequired = groupIfRequired;