mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-15 20:10:19 -07:00
bug fixes and more tests
This commit is contained in:
parent
da5c2ca0af
commit
aa88f2069d
6
docs/bundle.min.js
vendored
6
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
@ -29,4 +29,4 @@ create an optional group
|
|||||||
create an optional group
|
create an optional group
|
||||||
# fragment, again, we don't care, so ignore everything afterwards
|
# fragment, again, we don't care, so ignore everything afterwards
|
||||||
match "#"
|
match "#"
|
||||||
match 0+ any thing</textarea><h4>Errors:</h4><textarea readonly="readonly" class="form-control" id="errors" rows="5"></textarea></div><br><div class="col-lg-4 tenpx-margin-bottom"><div class="cheatsheet"><h2>Cheat Sheet:</h2><p>Full documentation available <a href="/tutorial.html">here</a></p><p class="font-weight-bold">Matching</p><p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly</p><p></p><p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p><p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world</p><p><code class="cm-s-idea">match a word</code> matches any word</p><p class="font-weight-bold">Repetition</p><p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p><p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p><p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p><p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p><p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p><p class="font-weight-bold">Grouping</p><p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p><p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p><p class="font-weight-bold">Using</p><p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p><p class="font-weight-bold">Misc</p><p><code class="cm-s-idea">// comment</code> is a single line comment</p><p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p></div></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright © 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="/bundle.min.js"></script></body></html>
|
match 0+ any thing</textarea><h4>Errors:</h4><textarea readonly="readonly" class="form-control" id="errors" rows="5"></textarea></div><br><div class="col-lg-4 tenpx-margin-bottom"><div class="cheatsheet"><h2>Cheat Sheet:</h2><p>Full documentation available <a href="/tutorial.html">here</a></p><p class="font-weight-bold">Matching</p><p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly</p><p></p><p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p><p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world"</p><p><code class="cm-s-idea">match a word</code> matches any word</p><p class="font-weight-bold">Repetition</p><p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p><p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p><p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p><p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p><p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p><p class="font-weight-bold">Grouping</p><p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p><p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p><p class="font-weight-bold">Using</p><p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p><p class="font-weight-bold">Misc</p><p><code class="cm-s-idea">// comment</code> is a single line comment</p><p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p></div></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright © 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="/bundle.min.js"></script></body></html>
|
@ -44,9 +44,10 @@ function minMatchString(arr, depth = 0) {
|
|||||||
if (arr.length === 1) {
|
if (arr.length === 1) {
|
||||||
return utilities_1.first(arr);
|
return utilities_1.first(arr);
|
||||||
}
|
}
|
||||||
// base case: arr is all single letters
|
// base case: arr is all single letters or ranges
|
||||||
if (arr.every(utilities_1.isSingleRegexCharacter)) {
|
if (arr.every((value) => utilities_1.isSingleRegexCharacter(value) || utilities_1.isRangeRegex(value))) {
|
||||||
return "[" + arr.join("") + "]";
|
// if range, don't forget to remove '[' and ']'
|
||||||
|
return "[" + arr.map((x) => utilities_1.isSingleRegexCharacter(x) ? x : x.substring(1, x.length - 1)).join("") + "]";
|
||||||
}
|
}
|
||||||
// now the real magic begins
|
// now the real magic begins
|
||||||
// You are not expected to understand this
|
// You are not expected to understand this
|
||||||
|
10
lib/utilities.d.ts
vendored
10
lib/utilities.d.ts
vendored
@ -64,6 +64,16 @@ export declare function combineFlags(value: number, flag: number): number;
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export declare function isSingleRegexCharacter(char: string): boolean;
|
export declare function isSingleRegexCharacter(char: string): boolean;
|
||||||
|
/**
|
||||||
|
* Checks to see if the character is a range
|
||||||
|
*
|
||||||
|
* @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
|
||||||
|
*
|
||||||
|
* @param str the string to check
|
||||||
|
* @returns if the value is a regex range
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare function isRangeRegex(str: string): boolean;
|
||||||
/**
|
/**
|
||||||
* Gets the first element of an array
|
* Gets the first element of an array
|
||||||
* @remarks does not validate if array has any elements
|
* @remarks does not validate if array has any elements
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0;
|
exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isRangeRegex = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0;
|
||||||
/**
|
/**
|
||||||
* The following section is used because the linter is set up to warn about certain operations
|
* The following section is used because the linter is set up to warn about certain operations
|
||||||
* and for good reason! I'd much rather have these functions than accidently use bitwise operations, or
|
* and for good reason! I'd much rather have these functions than accidently use bitwise operations, or
|
||||||
@ -86,6 +86,31 @@ function isSingleRegexCharacter(char) {
|
|||||||
char.length === 1;
|
char.length === 1;
|
||||||
}
|
}
|
||||||
exports.isSingleRegexCharacter = isSingleRegexCharacter;
|
exports.isSingleRegexCharacter = isSingleRegexCharacter;
|
||||||
|
/**
|
||||||
|
* Checks to see if the character is a range
|
||||||
|
*
|
||||||
|
* @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
|
||||||
|
*
|
||||||
|
* @param str the string to check
|
||||||
|
* @returns if the value is a regex range
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function isRangeRegex(str) {
|
||||||
|
if (!str.startsWith("[") && !str.endsWith("]")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const split = str.substring(1, str.length - 1).split("-");
|
||||||
|
if (split.length !== 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
//hack: check to ensure that we aren't escaped
|
||||||
|
if (split[0].endsWith("\\") && split[0] !== "\\\\") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// ensure the "-" wasn't escaped
|
||||||
|
return isSingleRegexCharacter(split[0]) && isSingleRegexCharacter(split[1]);
|
||||||
|
}
|
||||||
|
exports.isRangeRegex = isRangeRegex;
|
||||||
/**
|
/**
|
||||||
* Gets the first element of an array
|
* Gets the first element of an array
|
||||||
* @remarks does not validate if array has any elements
|
* @remarks does not validate if array has any elements
|
||||||
|
2
package-lock.json
generated
2
package-lock.json
generated
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "human2regex",
|
"name": "human2regex",
|
||||||
"version": "1.1.3",
|
"version": "1.1.4",
|
||||||
"lockfileVersion": 1,
|
"lockfileVersion": 1,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "human2regex",
|
"name": "human2regex",
|
||||||
"version": "1.1.3",
|
"version": "1.1.4",
|
||||||
"description": "Humanized Regular Expressions",
|
"description": "Humanized Regular Expressions",
|
||||||
"main": "./lib/index.js",
|
"main": "./lib/index.js",
|
||||||
"typings": "./lib/index.d.ts",
|
"typings": "./lib/index.d.ts",
|
||||||
|
@ -41,7 +41,7 @@
|
|||||||
<p class="font-weight-bold">Matching</p>
|
<p class="font-weight-bold">Matching</p>
|
||||||
<p>{{i-code}}match "hello world"{{end-i-code}} matches "hello world" exactly<p>
|
<p>{{i-code}}match "hello world"{{end-i-code}} matches "hello world" exactly<p>
|
||||||
<p>{{i-code}}match "hello" then optionally " world"{{end-i-code}} matches "hello" or "hello world"</p>
|
<p>{{i-code}}match "hello" then optionally " world"{{end-i-code}} matches "hello" or "hello world"</p>
|
||||||
<p>{{i-code}}match "hello" or "world"{{end-i-code}} matches "hello" or "world</p>
|
<p>{{i-code}}match "hello" or "world"{{end-i-code}} matches "hello" or "world"</p>
|
||||||
<p>{{i-code}}match a word{{end-i-code}} matches any word
|
<p>{{i-code}}match a word{{end-i-code}} matches any word
|
||||||
<p class="font-weight-bold">Repetition</p>
|
<p class="font-weight-bold">Repetition</p>
|
||||||
<p>{{i-code}}match 0+ "hello"{{end-i-code}} matches 0 or more "hello"s</p>
|
<p>{{i-code}}match 0+ "hello"{{end-i-code}} matches 0 or more "hello"s</p>
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { first, isSingleRegexCharacter } from "./utilities";
|
import { first, isRangeRegex, isSingleRegexCharacter } from "./utilities";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
@ -49,9 +49,10 @@ function minMatchString(arr: string[], depth: number = 0): string {
|
|||||||
return first(arr);
|
return first(arr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// base case: arr is all single letters
|
// base case: arr is all single letters or ranges
|
||||||
if (arr.every(isSingleRegexCharacter)) {
|
if (arr.every((value) => isSingleRegexCharacter(value) || isRangeRegex(value))) {
|
||||||
return "[" + arr.join("") + "]";
|
// if range, don't forget to remove '[' and ']'
|
||||||
|
return "[" + arr.map((x) => isSingleRegexCharacter(x) ? x :x.substring(1, x.length-1)).join("") + "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
// now the real magic begins
|
// now the real magic begins
|
||||||
|
@ -95,6 +95,35 @@ export function isSingleRegexCharacter(char: string): boolean {
|
|||||||
char.length === 1;
|
char.length === 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks to see if the character is a range
|
||||||
|
*
|
||||||
|
* @remarks a range is in the format of [X-Y] where X and Y are valid single regex characters
|
||||||
|
*
|
||||||
|
* @param str the string to check
|
||||||
|
* @returns if the value is a regex range
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export function isRangeRegex(str: string): boolean {
|
||||||
|
if (!str.startsWith("[") && !str.endsWith("]")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const split = str.substring(1, str.length-1).split("-");
|
||||||
|
|
||||||
|
if (split.length !== 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//hack: check to ensure that we aren't escaped
|
||||||
|
if (split[0].endsWith("\\") && split[0] !== "\\\\") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensure the "-" wasn't escaped
|
||||||
|
return isSingleRegexCharacter(split[0]) && isSingleRegexCharacter(split[1]);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the first element of an array
|
* Gets the first element of an array
|
||||||
* @remarks does not validate if array has any elements
|
* @remarks does not validate if array has any elements
|
||||||
|
@ -99,6 +99,16 @@ describe("Generator functionality", function() {
|
|||||||
const reg1 = parser.parse(toks1);
|
const reg1 = parser.parse(toks1);
|
||||||
expect(reg1.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg1.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg1.toRegex(RegexDialect.JS)).toBe("/[\\u0061-\\u007A]/");
|
expect(reg1.toRegex(RegexDialect.JS)).toBe("/[\\u0061-\\u007A]/");
|
||||||
|
|
||||||
|
const toks2 = lexer.tokenize('match "0"..."9" or "a".."z" or "A".."Z"').tokens;
|
||||||
|
const reg2 = parser.parse(toks2);
|
||||||
|
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg2.toRegex(RegexDialect.JS)).toBe("/[0-9a-zA-Z]/");
|
||||||
|
|
||||||
|
const toks3 = lexer.tokenize('match "0" or "a".."z" or "A".."Z"').tokens;
|
||||||
|
const reg3 = parser.parse(toks3);
|
||||||
|
expect(reg3.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg3.toRegex(RegexDialect.JS)).toBe("/[0a-zA-Z]/");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("handles specifiers", function() {
|
it("handles specifiers", function() {
|
||||||
@ -193,6 +203,16 @@ describe("Generator functionality", function() {
|
|||||||
const reg7 = parser.parse(toks7);
|
const reg7 = parser.parse(toks7);
|
||||||
expect(reg7.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg7.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg7.toRegex(RegexDialect.JS)).toBe("/[^>]*/");
|
expect(reg7.toRegex(RegexDialect.JS)).toBe("/[^>]*/");
|
||||||
|
|
||||||
|
const toks8 = lexer.tokenize('match "a" or "a"').tokens;
|
||||||
|
const reg8 = parser.parse(toks8);
|
||||||
|
expect(reg8.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg8.toRegex(RegexDialect.JS)).toBe("/a/");
|
||||||
|
|
||||||
|
const toks9 = lexer.tokenize('match "a".."z" or "a".."z"').tokens;
|
||||||
|
const reg9 = parser.parse(toks9);
|
||||||
|
expect(reg9.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg9.toRegex(RegexDialect.JS)).toBe("/[a-z]/");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("can generate backreferences", function() {
|
it("can generate backreferences", function() {
|
||||||
|
@ -12,7 +12,9 @@ describe("Generator helper functionality", function() {
|
|||||||
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
|
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
|
||||||
{ from: [ "ab", "cd" ], to: "ab|cd" },
|
{ from: [ "ab", "cd" ], to: "ab|cd" },
|
||||||
{ from: [ "abc", "bc" ], to: "a?bc" },
|
{ from: [ "abc", "bc" ], to: "a?bc" },
|
||||||
{ from: [ "abc", "xb" ], to: "abc|xb" }
|
{ from: [ "abc", "xb" ], to: "abc|xb" },
|
||||||
|
{ from: [ "a", "a" ], to: "a" },
|
||||||
|
{ from: [ "a-z", "a-z" ], to: "a-z" }
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const c of test_cases) {
|
for (const c of test_cases) {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
import "../src/utilities";
|
import "../src/utilities";
|
||||||
import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append } from "../src/utilities";
|
import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append, isRangeRegex } from "../src/utilities";
|
||||||
import { UsingFlags, ISemanticError } from "../src/generator";
|
import { UsingFlags, ISemanticError } from "../src/generator";
|
||||||
import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain";
|
import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain";
|
||||||
import { Indent } from "../src/tokens";
|
import { Indent } from "../src/tokens";
|
||||||
@ -65,6 +65,18 @@ describe("Utility functions", function() {
|
|||||||
expect(isSingleRegexCharacter("💩")).toBe(false);
|
expect(isSingleRegexCharacter("💩")).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("can determine if something is a range", function() {
|
||||||
|
expect(isRangeRegex("")).toBe(false);
|
||||||
|
expect(isRangeRegex("-3")).toBe(false);
|
||||||
|
expect(isRangeRegex("[]")).toBe(false);
|
||||||
|
expect(isRangeRegex("[-3]")).toBe(false);
|
||||||
|
expect(isRangeRegex("[a-z]")).toBe(true);
|
||||||
|
expect(isRangeRegex("[\\u1234-\\u1234]")).toBe(true);
|
||||||
|
expect(isRangeRegex("[௹-௹]")).toBe(true);
|
||||||
|
expect(isRangeRegex("[\\-3]")).toBe(false);
|
||||||
|
expect(isRangeRegex("[\\\\-3]")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
it("should remove quotes correctly", function() {
|
it("should remove quotes correctly", function() {
|
||||||
expect(removeQuotes('""')).toEqual("");
|
expect(removeQuotes('""')).toEqual("");
|
||||||
expect(removeQuotes('"hello world"')).toEqual("hello world");
|
expect(removeQuotes('"hello world"')).toEqual("hello world");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user