1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 04:20:35 -07:00

bug fixes and more tests

This commit is contained in:
Patrick Demian 2021-01-20 07:42:03 -05:00
parent da5c2ca0af
commit aa88f2069d
13 changed files with 117 additions and 17 deletions

6
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -29,4 +29,4 @@ create an optional group
create an optional group create an optional group
# fragment, again, we don't care, so ignore everything afterwards # fragment, again, we don't care, so ignore everything afterwards
match "#" match "#"
match 0+ any thing</textarea><h4>Errors:</h4><textarea readonly="readonly" class="form-control" id="errors" rows="5"></textarea></div><br><div class="col-lg-4 tenpx-margin-bottom"><div class="cheatsheet"><h2>Cheat Sheet:</h2><p>Full documentation available <a href="/tutorial.html">here</a></p><p class="font-weight-bold">Matching</p><p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly</p><p></p><p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p><p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world</p><p><code class="cm-s-idea">match a word</code> matches any word</p><p class="font-weight-bold">Repetition</p><p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p><p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p><p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p><p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p><p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p><p class="font-weight-bold">Grouping</p><p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p><p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p><p class="font-weight-bold">Using</p><p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p><p class="font-weight-bold">Misc</p><p><code class="cm-s-idea">// comment</code> is a single line comment</p><p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p></div></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright &copy; 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="/bundle.min.js"></script></body></html> match 0+ any thing</textarea><h4>Errors:</h4><textarea readonly="readonly" class="form-control" id="errors" rows="5"></textarea></div><br><div class="col-lg-4 tenpx-margin-bottom"><div class="cheatsheet"><h2>Cheat Sheet:</h2><p>Full documentation available <a href="/tutorial.html">here</a></p><p class="font-weight-bold">Matching</p><p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly</p><p></p><p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p><p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world"</p><p><code class="cm-s-idea">match a word</code> matches any word</p><p class="font-weight-bold">Repetition</p><p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p><p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p><p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p><p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p><p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p><p class="font-weight-bold">Grouping</p><p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p><p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p><p class="font-weight-bold">Using</p><p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p><p class="font-weight-bold">Misc</p><p><code class="cm-s-idea">// comment</code> is a single line comment</p><p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p></div></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright &copy; 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="/bundle.min.js"></script></body></html>

View File

@ -44,9 +44,10 @@ function minMatchString(arr, depth = 0) {
if (arr.length === 1) { if (arr.length === 1) {
return utilities_1.first(arr); return utilities_1.first(arr);
} }
// base case: arr is all single letters // base case: arr is all single letters or ranges
if (arr.every(utilities_1.isSingleRegexCharacter)) { if (arr.every((value) => utilities_1.isSingleRegexCharacter(value) || utilities_1.isRangeRegex(value))) {
return "[" + arr.join("") + "]"; // if range, don't forget to remove '[' and ']'
return "[" + arr.map((x) => utilities_1.isSingleRegexCharacter(x) ? x : x.substring(1, x.length - 1)).join("") + "]";
} }
// now the real magic begins // now the real magic begins
// You are not expected to understand this // You are not expected to understand this

10
lib/utilities.d.ts vendored
View File

@ -64,6 +64,16 @@ export declare function combineFlags(value: number, flag: number): number;
* @internal * @internal
*/ */
export declare function isSingleRegexCharacter(char: string): boolean; export declare function isSingleRegexCharacter(char: string): boolean;
/**
* Checks to see if the character is a range
*
* @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
*
* @param str the string to check
* @returns if the value is a regex range
* @internal
*/
export declare function isRangeRegex(str: string): boolean;
/** /**
* Gets the first element of an array * Gets the first element of an array
* @remarks does not validate if array has any elements * @remarks does not validate if array has any elements

View File

@ -1,7 +1,7 @@
"use strict"; "use strict";
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true }); Object.defineProperty(exports, "__esModule", { value: true });
exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0; exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isRangeRegex = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0;
/** /**
* The following section is used because the linter is set up to warn about certain operations * The following section is used because the linter is set up to warn about certain operations
* and for good reason! I'd much rather have these functions than accidently use bitwise operations, or * and for good reason! I'd much rather have these functions than accidently use bitwise operations, or
@ -86,6 +86,31 @@ function isSingleRegexCharacter(char) {
char.length === 1; char.length === 1;
} }
exports.isSingleRegexCharacter = isSingleRegexCharacter; exports.isSingleRegexCharacter = isSingleRegexCharacter;
/**
* Checks to see if the character is a range
*
* @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
*
* @param str the string to check
* @returns if the value is a regex range
* @internal
*/
function isRangeRegex(str) {
if (!str.startsWith("[") && !str.endsWith("]")) {
return false;
}
const split = str.substring(1, str.length - 1).split("-");
if (split.length !== 2) {
return false;
}
//hack: check to ensure that we aren't escaped
if (split[0].endsWith("\\") && split[0] !== "\\\\") {
return false;
}
// ensure the "-" wasn't escaped
return isSingleRegexCharacter(split[0]) && isSingleRegexCharacter(split[1]);
}
exports.isRangeRegex = isRangeRegex;
/** /**
* Gets the first element of an array * Gets the first element of an array
* @remarks does not validate if array has any elements * @remarks does not validate if array has any elements

2
package-lock.json generated
View File

@ -1,6 +1,6 @@
{ {
"name": "human2regex", "name": "human2regex",
"version": "1.1.3", "version": "1.1.4",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

View File

@ -1,6 +1,6 @@
{ {
"name": "human2regex", "name": "human2regex",
"version": "1.1.3", "version": "1.1.4",
"description": "Humanized Regular Expressions", "description": "Humanized Regular Expressions",
"main": "./lib/index.js", "main": "./lib/index.js",
"typings": "./lib/index.d.ts", "typings": "./lib/index.d.ts",

View File

@ -41,7 +41,7 @@
<p class="font-weight-bold">Matching</p> <p class="font-weight-bold">Matching</p>
<p>{{i-code}}match "hello world"{{end-i-code}} matches "hello world" exactly<p> <p>{{i-code}}match "hello world"{{end-i-code}} matches "hello world" exactly<p>
<p>{{i-code}}match "hello" then optionally " world"{{end-i-code}} matches "hello" or "hello world"</p> <p>{{i-code}}match "hello" then optionally " world"{{end-i-code}} matches "hello" or "hello world"</p>
<p>{{i-code}}match "hello" or "world"{{end-i-code}} matches "hello" or "world</p> <p>{{i-code}}match "hello" or "world"{{end-i-code}} matches "hello" or "world"</p>
<p>{{i-code}}match a word{{end-i-code}} matches any word <p>{{i-code}}match a word{{end-i-code}} matches any word
<p class="font-weight-bold">Repetition</p> <p class="font-weight-bold">Repetition</p>
<p>{{i-code}}match 0+ "hello"{{end-i-code}} matches 0 or more "hello"s</p> <p>{{i-code}}match 0+ "hello"{{end-i-code}} matches 0 or more "hello"s</p>

View File

@ -5,7 +5,7 @@
* @packageDocumentation * @packageDocumentation
*/ */
import { first, isSingleRegexCharacter } from "./utilities"; import { first, isRangeRegex, isSingleRegexCharacter } from "./utilities";
/** /**
* Minimizes the match string by finding duplicates or substrings in the array * Minimizes the match string by finding duplicates or substrings in the array
@ -49,9 +49,10 @@ function minMatchString(arr: string[], depth: number = 0): string {
return first(arr); return first(arr);
} }
// base case: arr is all single letters // base case: arr is all single letters or ranges
if (arr.every(isSingleRegexCharacter)) { if (arr.every((value) => isSingleRegexCharacter(value) || isRangeRegex(value))) {
return "[" + arr.join("") + "]"; // if range, don't forget to remove '[' and ']'
return "[" + arr.map((x) => isSingleRegexCharacter(x) ? x :x.substring(1, x.length-1)).join("") + "]";
} }
// now the real magic begins // now the real magic begins

View File

@ -95,6 +95,35 @@ export function isSingleRegexCharacter(char: string): boolean {
char.length === 1; char.length === 1;
} }
/**
* Checks to see if the character is a range
*
* @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
*
* @param str the string to check
* @returns if the value is a regex range
* @internal
*/
export function isRangeRegex(str: string): boolean {
if (!str.startsWith("[") && !str.endsWith("]")) {
return false;
}
const split = str.substring(1, str.length-1).split("-");
if (split.length !== 2) {
return false;
}
//hack: check to ensure that we aren't escaped
if (split[0].endsWith("\\") && split[0] !== "\\\\") {
return false;
}
// ensure the "-" wasn't escaped
return isSingleRegexCharacter(split[0]) && isSingleRegexCharacter(split[1]);
}
/** /**
* Gets the first element of an array * Gets the first element of an array
* @remarks does not validate if array has any elements * @remarks does not validate if array has any elements

View File

@ -99,6 +99,16 @@ describe("Generator functionality", function() {
const reg1 = parser.parse(toks1); const reg1 = parser.parse(toks1);
expect(reg1.validate(RegexDialect.JS).length).toBe(0); expect(reg1.validate(RegexDialect.JS).length).toBe(0);
expect(reg1.toRegex(RegexDialect.JS)).toBe("/[\\u0061-\\u007A]/"); expect(reg1.toRegex(RegexDialect.JS)).toBe("/[\\u0061-\\u007A]/");
const toks2 = lexer.tokenize('match "0"..."9" or "a".."z" or "A".."Z"').tokens;
const reg2 = parser.parse(toks2);
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
expect(reg2.toRegex(RegexDialect.JS)).toBe("/[0-9a-zA-Z]/");
const toks3 = lexer.tokenize('match "0" or "a".."z" or "A".."Z"').tokens;
const reg3 = parser.parse(toks3);
expect(reg3.validate(RegexDialect.JS).length).toBe(0);
expect(reg3.toRegex(RegexDialect.JS)).toBe("/[0a-zA-Z]/");
}); });
it("handles specifiers", function() { it("handles specifiers", function() {
@ -193,6 +203,16 @@ describe("Generator functionality", function() {
const reg7 = parser.parse(toks7); const reg7 = parser.parse(toks7);
expect(reg7.validate(RegexDialect.JS).length).toBe(0); expect(reg7.validate(RegexDialect.JS).length).toBe(0);
expect(reg7.toRegex(RegexDialect.JS)).toBe("/[^>]*/"); expect(reg7.toRegex(RegexDialect.JS)).toBe("/[^>]*/");
const toks8 = lexer.tokenize('match "a" or "a"').tokens;
const reg8 = parser.parse(toks8);
expect(reg8.validate(RegexDialect.JS).length).toBe(0);
expect(reg8.toRegex(RegexDialect.JS)).toBe("/a/");
const toks9 = lexer.tokenize('match "a".."z" or "a".."z"').tokens;
const reg9 = parser.parse(toks9);
expect(reg9.validate(RegexDialect.JS).length).toBe(0);
expect(reg9.toRegex(RegexDialect.JS)).toBe("/[a-z]/");
}); });
it("can generate backreferences", function() { it("can generate backreferences", function() {

View File

@ -12,7 +12,9 @@ describe("Generator helper functionality", function() {
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" }, { from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
{ from: [ "ab", "cd" ], to: "ab|cd" }, { from: [ "ab", "cd" ], to: "ab|cd" },
{ from: [ "abc", "bc" ], to: "a?bc" }, { from: [ "abc", "bc" ], to: "a?bc" },
{ from: [ "abc", "xb" ], to: "abc|xb" } { from: [ "abc", "xb" ], to: "abc|xb" },
{ from: [ "a", "a" ], to: "a" },
{ from: [ "a-z", "a-z" ], to: "a-z" }
]; ];
for (const c of test_cases) { for (const c of test_cases) {

View File

@ -1,7 +1,7 @@
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
import "../src/utilities"; import "../src/utilities";
import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append } from "../src/utilities"; import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append, isRangeRegex } from "../src/utilities";
import { UsingFlags, ISemanticError } from "../src/generator"; import { UsingFlags, ISemanticError } from "../src/generator";
import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain"; import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain";
import { Indent } from "../src/tokens"; import { Indent } from "../src/tokens";
@ -65,6 +65,18 @@ describe("Utility functions", function() {
expect(isSingleRegexCharacter("💩")).toBe(false); expect(isSingleRegexCharacter("💩")).toBe(false);
}); });
it("can determine if something is a range", function() {
expect(isRangeRegex("")).toBe(false);
expect(isRangeRegex("-3")).toBe(false);
expect(isRangeRegex("[]")).toBe(false);
expect(isRangeRegex("[-3]")).toBe(false);
expect(isRangeRegex("[a-z]")).toBe(true);
expect(isRangeRegex("[\\u1234-\\u1234]")).toBe(true);
expect(isRangeRegex("[௹-௹]")).toBe(true);
expect(isRangeRegex("[\\-3]")).toBe(false);
expect(isRangeRegex("[\\\\-3]")).toBe(true);
});
it("should remove quotes correctly", function() { it("should remove quotes correctly", function() {
expect(removeQuotes('""')).toEqual(""); expect(removeQuotes('""')).toEqual("");
expect(removeQuotes('"hello world"')).toEqual("hello world"); expect(removeQuotes('"hello world"')).toEqual("hello world");