bug fixes and more tests

2025-05-15 20:10:19 -07:00 · 2021-01-20 07:42:03 -05:00 · 2021-01-20 07:42:03 -05:00 · aa88f2069d
commit aa88f2069d
parent da5c2ca0af
13 changed files with 117 additions and 17 deletions
--- a/docs/bundle.min.js
+++ b/docs/bundle.min.js
--- a/docs/index.html
+++ b/docs/index.html
@ -29,4 +29,4 @@ create an optional group
 create an optional group
 	# fragment, again, we don't care, so ignore everything afterwards
 	match "#"
-	match 0+ any thing</textarea><h4>Errors:</h4><textarea readonly="readonly" class="form-control" id="errors" rows="5"></textarea></div><br><div class="col-lg-4 tenpx-margin-bottom"><div class="cheatsheet"><h2>Cheat Sheet:</h2><p>Full documentation available <a href="/tutorial.html">here</a></p><p class="font-weight-bold">Matching</p><p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly</p><p></p><p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p><p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world</p><p><code class="cm-s-idea">match a word</code> matches any word</p><p class="font-weight-bold">Repetition</p><p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p><p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p><p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p><p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p><p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p><p class="font-weight-bold">Grouping</p><p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p><p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p><p class="font-weight-bold">Using</p><p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p><p class="font-weight-bold">Misc</p><p><code class="cm-s-idea">// comment</code> is a single line comment</p><p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p></div></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright &copy; 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="/bundle.min.js"></script></body></html>
+	match 0+ any thing</textarea><h4>Errors:</h4><textarea readonly="readonly" class="form-control" id="errors" rows="5"></textarea></div><br><div class="col-lg-4 tenpx-margin-bottom"><div class="cheatsheet"><h2>Cheat Sheet:</h2><p>Full documentation available <a href="/tutorial.html">here</a></p><p class="font-weight-bold">Matching</p><p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly</p><p></p><p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p><p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world"</p><p><code class="cm-s-idea">match a word</code> matches any word</p><p class="font-weight-bold">Repetition</p><p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p><p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p><p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p><p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p><p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p><p class="font-weight-bold">Grouping</p><p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p><p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p><p class="font-weight-bold">Using</p><p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p><p class="font-weight-bold">Misc</p><p><code class="cm-s-idea">// comment</code> is a single line comment</p><p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p></div></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright &copy; 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="/bundle.min.js"></script></body></html>
--- a/lib/generator_helper.js
+++ b/lib/generator_helper.js
@ -44,9 +44,10 @@ function minMatchString(arr, depth = 0) {
    if (arr.length === 1) {
        return utilities_1.first(arr);
    }
-    // base case: arr is all single letters
+    // base case: arr is all single letters or ranges
-    if (arr.every(utilities_1.isSingleRegexCharacter)) {
+    if (arr.every((value) => utilities_1.isSingleRegexCharacter(value) || utilities_1.isRangeRegex(value))) {
-        return "[" + arr.join("") + "]";
+        // if range, don't forget to remove '[' and ']'
        return "[" + arr.map((x) => utilities_1.isSingleRegexCharacter(x) ? x : x.substring(1, x.length - 1)).join("") + "]";
    }
    // now the real magic begins
    // You are not expected to understand this
--- a/lib/utilities.d.ts
+++ b/lib/utilities.d.ts
@ -64,6 +64,16 @@ export declare function combineFlags(value: number, flag: number): number;
 * @internal
 */
 export declare function isSingleRegexCharacter(char: string): boolean;
 /**
 * Checks to see if the character is a range
 *
 * @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
 *
 * @param str the string to check
 * @returns if the value is a regex range
 * @internal
 */
 export declare function isRangeRegex(str: string): boolean;
 /**
 * Gets the first element of an array
 * @remarks does not validate if array has any elements
--- a/lib/utilities.js
+++ b/lib/utilities.js
@ -1,7 +1,7 @@
 "use strict";
 /*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0;
+exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isRangeRegex = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0;
 /**
 * The following section is used because the linter is set up to warn about certain operations
 * and for good reason! I'd much rather have these functions than accidently use bitwise operations, or
@ -86,6 +86,31 @@ function isSingleRegexCharacter(char) {
        char.length === 1;
 }
 exports.isSingleRegexCharacter = isSingleRegexCharacter;
 /**
 * Checks to see if the character is a range
 *
 * @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
 *
 * @param str the string to check
 * @returns if the value is a regex range
 * @internal
 */
 function isRangeRegex(str) {
    if (!str.startsWith("[") && !str.endsWith("]")) {
        return false;
    }
    const split = str.substring(1, str.length - 1).split("-");
    if (split.length !== 2) {
        return false;
    }
    //hack: check to ensure that we aren't escaped
    if (split[0].endsWith("\\") && split[0] !== "\\\\") {
        return false;
    }
    // ensure the "-" wasn't escaped
    return isSingleRegexCharacter(split[0]) && isSingleRegexCharacter(split[1]);
 }
 exports.isRangeRegex = isRangeRegex;
 /**
 * Gets the first element of an array
 * @remarks does not validate if array has any elements
--- a/package-lock.json
+++ b/package-lock.json
@ -1,6 +1,6 @@
 {
  "name": "human2regex",
-  "version": "1.1.3",
+  "version": "1.1.4",
  "lockfileVersion": 1,
  "requires": true,
  "dependencies": {
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "human2regex",
-  "version": "1.1.3",
+  "version": "1.1.4",
  "description": "Humanized Regular Expressions",
  "main": "./lib/index.js",
  "typings": "./lib/index.d.ts",
--- a/src/docs/index.hbs
+++ b/src/docs/index.hbs
@ -41,7 +41,7 @@
 						<p class="font-weight-bold">Matching</p>
 						<p>{{i-code}}match "hello world"{{end-i-code}} matches "hello world" exactly<p>
 						<p>{{i-code}}match "hello" then optionally " world"{{end-i-code}} matches "hello" or "hello world"</p>
-						<p>{{i-code}}match "hello" or "world"{{end-i-code}} matches "hello" or "world</p>
+						<p>{{i-code}}match "hello" or "world"{{end-i-code}} matches "hello" or "world"</p>
 						<p>{{i-code}}match a word{{end-i-code}} matches any word
 						<p class="font-weight-bold">Repetition</p>
 						<p>{{i-code}}match 0+ "hello"{{end-i-code}} matches 0 or more "hello"s</p>
--- a/src/generator_helper.ts
+++ b/src/generator_helper.ts
@ -5,7 +5,7 @@
 * @packageDocumentation
 */
-import { first, isSingleRegexCharacter } from "./utilities";
+import { first, isRangeRegex, isSingleRegexCharacter } from "./utilities";
 /**
 * Minimizes the match string by finding duplicates or substrings in the array
@ -49,9 +49,10 @@ function minMatchString(arr: string[], depth: number = 0): string {
        return first(arr);
    }
-    // base case: arr is all single letters
+    // base case: arr is all single letters or ranges
-    if (arr.every(isSingleRegexCharacter)) {
+    if (arr.every((value) => isSingleRegexCharacter(value) || isRangeRegex(value))) {
-        return "[" + arr.join("") + "]";
+        // if range, don't forget to remove '[' and ']'
        return "[" + arr.map((x) => isSingleRegexCharacter(x) ? x :x.substring(1, x.length-1)).join("") + "]";
    }
    // now the real magic begins
--- a/src/utilities.ts
+++ b/src/utilities.ts
@ -95,6 +95,35 @@ export function isSingleRegexCharacter(char: string): boolean {
           char.length === 1;
 }
 /**
 * Checks to see if the character is a range
 * 
 * @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
 * 
 * @param str the string to check
 * @returns if the value is a regex range
 * @internal
 */
 export function isRangeRegex(str: string): boolean {
    if (!str.startsWith("[") && !str.endsWith("]")) {
        return false;
    }
    const split = str.substring(1, str.length-1).split("-");
    if (split.length !== 2) {
        return false;
    }
    //hack: check to ensure that we aren't escaped
    if (split[0].endsWith("\\") && split[0] !== "\\\\") {
        return false;
    }
    // ensure the "-" wasn't escaped
    return isSingleRegexCharacter(split[0]) && isSingleRegexCharacter(split[1]);
 }
 /**
 * Gets the first element of an array
 * @remarks does not validate if array has any elements
--- a/tests/generator.spec.ts
+++ b/tests/generator.spec.ts
@ -99,6 +99,16 @@ describe("Generator functionality", function() {
        const reg1 = parser.parse(toks1);
        expect(reg1.validate(RegexDialect.JS).length).toBe(0);
        expect(reg1.toRegex(RegexDialect.JS)).toBe("/[\\u0061-\\u007A]/");
        const toks2 = lexer.tokenize('match "0"..."9" or "a".."z" or "A".."Z"').tokens;
        const reg2 = parser.parse(toks2);
        expect(reg2.validate(RegexDialect.JS).length).toBe(0);
        expect(reg2.toRegex(RegexDialect.JS)).toBe("/[0-9a-zA-Z]/");
        const toks3 = lexer.tokenize('match "0" or "a".."z" or "A".."Z"').tokens;
        const reg3 = parser.parse(toks3);
        expect(reg3.validate(RegexDialect.JS).length).toBe(0);
        expect(reg3.toRegex(RegexDialect.JS)).toBe("/[0a-zA-Z]/");
    });
    it("handles specifiers", function() {
@ -193,6 +203,16 @@ describe("Generator functionality", function() {
        const reg7 = parser.parse(toks7);
        expect(reg7.validate(RegexDialect.JS).length).toBe(0);
        expect(reg7.toRegex(RegexDialect.JS)).toBe("/[^>]*/");
        const toks8 = lexer.tokenize('match "a" or "a"').tokens;
        const reg8 = parser.parse(toks8);
        expect(reg8.validate(RegexDialect.JS).length).toBe(0);
        expect(reg8.toRegex(RegexDialect.JS)).toBe("/a/");
        const toks9 = lexer.tokenize('match "a".."z" or "a".."z"').tokens;
        const reg9 = parser.parse(toks9);
        expect(reg9.validate(RegexDialect.JS).length).toBe(0);
        expect(reg9.toRegex(RegexDialect.JS)).toBe("/[a-z]/");
    });
    it("can generate backreferences", function() {
--- a/tests/generator_helper.spec.ts
+++ b/tests/generator_helper.spec.ts
@ -12,7 +12,9 @@ describe("Generator helper functionality", function() {
            { from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
            { from: [ "ab", "cd" ], to: "ab|cd" },
            { from: [ "abc", "bc" ], to: "a?bc" },
-            { from: [ "abc", "xb" ], to: "abc|xb" }
+            { from: [ "abc", "xb" ], to: "abc|xb" },
            { from: [ "a", "a" ], to: "a" },
            { from: [ "a-z", "a-z" ], to: "a-z" }
        ];
        for (const c of test_cases) {
--- a/tests/utilities.spec.ts
+++ b/tests/utilities.spec.ts
@ -1,7 +1,7 @@
 /*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
 import "../src/utilities";
-import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append } from "../src/utilities";
+import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append, isRangeRegex } from "../src/utilities";
 import { UsingFlags, ISemanticError } from "../src/generator";
 import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain";
 import { Indent } from "../src/tokens";
@ -65,6 +65,18 @@ describe("Utility functions", function() {
        expect(isSingleRegexCharacter("💩")).toBe(false);
    });
    it("can determine if something is a range", function() {
        expect(isRangeRegex("")).toBe(false);
        expect(isRangeRegex("-3")).toBe(false);
        expect(isRangeRegex("[]")).toBe(false);
        expect(isRangeRegex("[-3]")).toBe(false);
        expect(isRangeRegex("[a-z]")).toBe(true);
        expect(isRangeRegex("[\\u1234-\\u1234]")).toBe(true);
        expect(isRangeRegex("[௹-௹]")).toBe(true);
        expect(isRangeRegex("[\\-3]")).toBe(false);
        expect(isRangeRegex("[\\\\-3]")).toBe(true);
    });
    it("should remove quotes correctly", function() {
        expect(removeQuotes('""')).toEqual("");
        expect(removeQuotes('"hello world"')).toEqual("hello world");