"use strict"; /*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0; /** * Includes helper functions for the Generator * @packageDocumentation */ const utilities_1 = require("./utilities"); /** * Minimizes the match string by finding duplicates or substrings in the array * * @param arr the array of matches * @internal */ function minimizeMatchString(arr) { // don't process an array of length 1, otherwise you'll get the wrong result if (arr.length === 1) { return utilities_1.first(arr); } return minMatchString(arr, 0); } exports.minimizeMatchString = minimizeMatchString; /** * Minimizes the match string by finding duplicates or substrings in the array * * @param arr the array * @param depth must be 0 for initial call * @returns an optimized string * @internal */ function minMatchString(arr, depth = 0) { // base case: arr is empty if (arr.length === 0) { return ""; } // base case: arr has 1 element (must have at least 2, so this means this value is optional) if (arr.length === 1) { return utilities_1.first(arr) + "?"; } // remove duplicates arr = [...new Set(arr)]; // base case: arr has 1 element (after duplicate removal means this is required) if (arr.length === 1) { return utilities_1.first(arr); } // base case: arr is all single letters if (arr.every(utilities_1.isSingleRegexCharacter)) { return "[" + arr.join("") + "]"; } // now the real magic begins // You are not expected to understand this let longest_begin_substring = utilities_1.first(arr); let longest_end_substring = utilities_1.first(arr); for (let i = 1; i < arr.length; i++) { // reduce longest_substring to match everything for (let j = 0; j < longest_begin_substring.length; j++) { if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) { longest_begin_substring = longest_begin_substring.substr(0, j); break; } } for (let j = 0; j < longest_end_substring.length; j++) { if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) { longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length); break; } } if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) { break; } } // No matches whatsoever // *technically* we can optimize further, but that is a VERY non-trivial problem // For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z" if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) { if (depth > 0) { return "(?:" + arr.join("|") + ")"; } else { return arr.join("|"); } } // we have some matches else { // remove begin (if exists) and end (if exists) from each element and remove empty strings const begin_pos = longest_begin_substring.length; const end_pos = longest_end_substring.length; const similar_matches = []; for (const ele of arr) { const match = ele.substring(begin_pos, ele.length - end_pos); if (match.length !== 0) { similar_matches.push(match); } } return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring; } } /** * Groups a regex fragment if it needs to be grouped * * @param fragment fragment of regular expression to potentially group * @returns a non-capturing group if there needs to be one * @internal */ function groupIfRequired(fragment) { if (utilities_1.isSingleRegexCharacter(fragment)) { return fragment; } if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") { let bracket_count = 0; for (let i = 1; i < fragment.length - 2; i++) { if (fragment[i] === "\\") { i++; } else if (fragment[i] === "(") { bracket_count++; } else if (fragment[i] === ")") { bracket_count--; if (bracket_count === -1) { break; } } } return bracket_count === 0 ? fragment : "(?:" + fragment + ")"; } else if (fragment[0] === "[" && fragment[fragment.length - 1] === "]") { let bracket_count = 0; for (let i = 1; i < fragment.length - 2; i++) { if (fragment[i] === "\\") { i++; } //you'll never have a raw [ inside a [] //else if (fragment[i] === "[") { // bracket_count++; //} else if (fragment[i] === "]") { bracket_count--; if (bracket_count === -1) { break; } } } return bracket_count === 0 ? fragment : "(?:" + fragment + ")"; } else { return "(?:" + fragment + ")"; } } exports.groupIfRequired = groupIfRequired; /** * Checks to see if fragment has a + or * at the end and has a repetition statement * * @param fragment fragment of regular expression * @param repetition repetition that may clobber the fragment */ function dontClobberRepetition(fragment, repetition) { // + can be ignored as well as a count as long as that count is > 0 if (fragment.endsWith("+")) { switch (repetition) { case "*": // ignore: + is greater than * break; case "?": // non-greedy qualifier fragment += repetition; break; case "+": // ignore: already + break; default: if (repetition.startsWith("{0")) { fragment = "(?:" + fragment + ")" + repetition; } else { // remove + and replace with count fragment = fragment.substring(0, fragment.length - 1) + repetition; } break; } } else if (fragment.endsWith("*")) { switch (repetition) { case "*": // ignore: already + break; case "?": // non-greedy qualifier fragment += repetition; break; default: // remove * and replace with count fragment = fragment.substring(0, fragment.length - 1) + repetition; break; } } else { fragment += repetition; } return fragment; } exports.dontClobberRepetition = dontClobberRepetition;