1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-15 20:10:19 -07:00
human2regex/lib/generator_helper.js
2021-01-20 07:42:03 -05:00

209 lines
7.4 KiB
JavaScript

"use strict";
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true });
exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0;
/**
* Includes helper functions for the Generator
* @packageDocumentation
*/
const utilities_1 = require("./utilities");
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array of matches
* @internal
*/
function minimizeMatchString(arr, has_neighbours = false) {
// don't process an array of length 1, otherwise you'll get the wrong result
if (arr.length === 1) {
return utilities_1.first(arr);
}
return minMatchString(arr, has_neighbours ? 1 : 0);
}
exports.minimizeMatchString = minimizeMatchString;
/**
* Minimizes the match string by finding duplicates or substrings in the array
*
* @param arr the array
* @param depth must be 0 for initial call
* @returns an optimized string
* @internal
*/
function minMatchString(arr, depth = 0) {
// base case: arr is empty
if (arr.length === 0) {
return "";
}
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
if (arr.length === 1) {
return utilities_1.first(arr) + "?";
}
// remove duplicates
arr = [...new Set(arr)];
// base case: arr has 1 element (after duplicate removal means this is required)
if (arr.length === 1) {
return utilities_1.first(arr);
}
// base case: arr is all single letters or ranges
if (arr.every((value) => utilities_1.isSingleRegexCharacter(value) || utilities_1.isRangeRegex(value))) {
// if range, don't forget to remove '[' and ']'
return "[" + arr.map((x) => utilities_1.isSingleRegexCharacter(x) ? x : x.substring(1, x.length - 1)).join("") + "]";
}
// now the real magic begins
// You are not expected to understand this
let longest_begin_substring = utilities_1.first(arr);
let longest_end_substring = utilities_1.first(arr);
for (let i = 1; i < arr.length; i++) {
// reduce longest_substring to match everything
for (let j = 0; j < longest_begin_substring.length; j++) {
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
longest_begin_substring = longest_begin_substring.substr(0, j);
break;
}
}
for (let j = 0; j < longest_end_substring.length; j++) {
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
break;
}
}
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
break;
}
}
// No matches whatsoever
// *technically* we can optimize further, but that is a VERY non-trivial problem
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
if (depth > 0) {
return "(?:" + arr.join("|") + ")";
}
else {
return arr.join("|");
}
}
// we have some matches
else {
// remove begin (if exists) and end (if exists) from each element and remove empty strings
const begin_pos = longest_begin_substring.length;
const end_pos = longest_end_substring.length;
const similar_matches = [];
for (const ele of arr) {
const match = ele.substring(begin_pos, ele.length - end_pos);
if (match.length !== 0) {
similar_matches.push(match);
}
}
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
}
}
/**
* Groups a regex fragment if it needs to be grouped
*
* @param fragment fragment of regular expression to potentially group
* @returns a non-capturing group if there needs to be one
* @internal
*/
function groupIfRequired(fragment) {
if (utilities_1.isSingleRegexCharacter(fragment)) {
return fragment;
}
else if ((fragment[fragment.length - 1] === "*" || fragment[fragment.length - 1] === "+") &&
utilities_1.isSingleRegexCharacter(fragment.substring(0, fragment.length - 1))) {
return fragment;
}
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
let bracket_count = 0;
for (let i = 1; i < fragment.length - 2; i++) {
if (fragment[i] === "\\") {
i++;
}
else if (fragment[i] === "(") {
bracket_count++;
}
else if (fragment[i] === ")") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
}
else if (fragment[0] === "[" && fragment[fragment.length - 1] === "]") {
let bracket_count = 0;
for (let i = 1; i < fragment.length - 2; i++) {
if (fragment[i] === "\\") {
i++;
}
//you'll never have a raw [ inside a []
//else if (fragment[i] === "[") {
// bracket_count++;
//}
else if (fragment[i] === "]") {
bracket_count--;
if (bracket_count === -1) {
break;
}
}
}
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
}
else {
return "(?:" + fragment + ")";
}
}
exports.groupIfRequired = groupIfRequired;
/**
* Checks to see if fragment has a + or * at the end and has a repetition statement
*
* @param fragment fragment of regular expression
* @param repetition repetition that may clobber the fragment
*/
function dontClobberRepetition(fragment, repetition) {
// + can be ignored as well as a count as long as that count is > 0
if (fragment.endsWith("+")) {
switch (repetition) {
case "*":
// ignore: + is greater than *
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
case "+":
// ignore: already +
break;
default:
if (repetition.startsWith("{0")) {
fragment = "(?:" + fragment + ")" + repetition;
}
else {
// remove + and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
}
break;
}
}
else if (fragment.endsWith("*")) {
switch (repetition) {
case "*":
// ignore: already +
break;
case "?":
// non-greedy qualifier
fragment += repetition;
break;
default:
// remove * and replace with count
fragment = fragment.substring(0, fragment.length - 1) + repetition;
break;
}
}
else {
fragment += repetition;
}
return fragment;
}
exports.dontClobberRepetition = dontClobberRepetition;