1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 04:20:35 -07:00

More bugs fixed, updated readme

This commit is contained in:
Patrick Demian 2020-11-06 16:06:27 -05:00
parent 7d7d6337e1
commit 298aee7226
8 changed files with 1899 additions and 526 deletions

View File

@ -2,21 +2,22 @@
## Purpose
Generate regular expressions from natural language. Currently WIP, but should look something like this:
Generate regular expressions from natural language.
Instead of a convoluted mess of symbols why not
Instead of a convoluted mess of symbols like `/([\w\.=\-]*\w+)/g` why not
using global matching
create a group called "capture_me"
match 0+ words or "." or "=" or "-"
create a group called capture_me
match 0+ characters or "." or "=" or "-"
match 1+ words
Is the former not much easier to read and bug fix than the latter?
Running the program should result in the following output:
Your regex = /\$([\w\.=\-]*[\w]+)/g
"capture_me" is group id 1
Your regex = /(?<capture_me>[\w\.\=\-]*\w++)/g
Is the former not much easier to read and bug fix than the latter?
You can then use your regex in your language of choice, with Human2Regex validating your regex for you.
Another example
@ -61,16 +62,24 @@ Another example
Running the program should result in the following output:
Your regex = /^(https?:\/\/)?((\w\.)*)(:\d+)?([\w_\-]\.\w)((/[\w_\-]))?(\?([\w_\-]=[\w_\-]))?(#.*)$/g
"protocol" is group id 1
"subdomain" is group id 2
"domain" is group id 4
"path" is group id 5
"query" is group id 5 or 6 if "path" exists
Your regex = /^(?<protocol>https?\:\/\/)?(?<subdomain>(\w+\.)*)?(?<domain>(?:\w+|_|\-)+\.\w+)\:?\d*(?<path>(\/(?:\w+|_|\-)*)*)?(\?(?<query>((?:\w+|_|\-)+\=(?:\w+|_|\-)+)*))?(#.*)?$/g
Which one would you rather debug?
## Usage
Configure config.ts
Run
Build
npm run build
Run
point web browser to: docs/index.html
Test
npm t
## Todo
- Seperate website and source code. Move to yarn/npm
- Add more regex options such as back references, subroutines, lookahead/behind, and more character classes (eg, `[:alpha:]`)

12
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

2310
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{
"name": "human2regex",
"version": "0.9.0",
"version": "0.9.5",
"description": "Humanized Regular Expressions",
"main": "bundle.min.js",
"devDependencies": {
@ -8,20 +8,20 @@
"@types/html-minifier": "^3.5.3",
"@types/jest": "^26.0.15",
"@types/mustache": "^4.0.1",
"@typescript-eslint/eslint-plugin": "^4.4.0",
"@typescript-eslint/parser": "^4.4.0",
"@typescript-eslint/eslint-plugin": "^4.6.1",
"@typescript-eslint/parser": "^4.6.1",
"before-build-webpack": "^0.2.9",
"copy-webpack-plugin": "^6.2.1",
"copy-webpack-plugin": "^6.3.0",
"css-loader": "^4.3.0",
"eslint": "^7.11.0",
"glob": "^7.1.6",
"html-minifier": "^4.0.0",
"jest": "^26.6.1",
"jest": "^26.6.3",
"mini-css-extract-plugin": "^1.0.0",
"mustache": "^4.0.1",
"optimize-css-assets-webpack-plugin": "^5.0.4",
"ts-jest": "^26.4.3",
"ts-loader": "^8.0.4",
"ts-loader": "^8.0.9",
"ts-node": "^9.0.0",
"typescript": "^4.0.5",
"webpack": "^4.44.2",
@ -37,7 +37,7 @@
"author": "Patrick Demian",
"license": "MIT",
"dependencies": {
"chevrotain": "^7.0.2",
"chevrotain": "^7.0.3",
"codemirror": "^5.58.2"
},
"repository": {

View File

@ -334,9 +334,13 @@ export class MatchSubStatementCST extends H2RCST {
let ret = "";
let require_grouping = false;
let dont_clobber_plus = false;
if (str.length === 1) {
ret = str[0];
if (ret.endsWith("+")) {
dont_clobber_plus = true;
}
}
// we can use regex's [] for single chars, otherwise we need a group
else if (str.every(isSingleRegexCharacter)) {
@ -349,10 +353,36 @@ export class MatchSubStatementCST extends H2RCST {
}
if (this.count) {
if (require_grouping) {
ret = "(?:" + ret + ")";
if (dont_clobber_plus) {
const clobber = this.count.toRegex(language);
// + can be ignored as well as a count as long as that count is > 0
switch (clobber) {
case "*":
case "?":
ret = "(?:" + ret + ")" + clobber;
break;
case "+":
// ignore
break;
default:
if (clobber.startsWith("{0")) {
ret = "(?:" + ret + ")" + clobber;
}
else {
// remove + and replace with count
ret.substring(0, ret.length - 1) + clobber;
}
break;
}
}
else {
if (require_grouping) {
ret = "(?:" + ret + ")";
}
ret += this.count.toRegex(language);
}
ret += this.count.toRegex(language);
}
return ret;

View File

@ -27,8 +27,8 @@ import { createToken, Lexer } from "chevrotain";
/** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
/** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
/** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /(character|letter)s?/i});
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)s?/i});
/** @internal */ export const Boundary = createToken({name: "BoundarySpecifier", pattern: /(word )boundary/i});
/** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
/** @internal */ export const Unicode = createToken({name: "UnicodeSpecifier", pattern: /unicode( class)?/i});

View File

@ -32,6 +32,11 @@ describe("Generator functionality", function() {
const reg1 = parser.parse();
expect(reg1.validate(RegexDialect.JS).length).toBe(0);
expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/");
parser.input = lexer.tokenize("match 1+ words").tokens;
const reg2 = parser.parse();
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
expect(reg2.toRegex(RegexDialect.JS)).toBe("/\\w+/"); // used to generate w++. make sure not to regress
});
it("validates invalid regexes", function() {

View File

@ -1,3 +1,5 @@
/* eslint-disable @typescript-eslint/explicit-function-return-type */
/* eslint-disable @typescript-eslint/naming-convention */
/* eslint-disable @typescript-eslint/no-var-requires */
/* eslint-disable no-undef */
const path = require("path");
@ -29,7 +31,6 @@ const config = {
function build_mustache() {
if (!existsSync(config.dst)){
mkdirSync(config.dst);
}
@ -48,7 +49,7 @@ function build_mustache() {
};
// build main mustache files
for(const item of files) {
for (const item of files) {
const filename = path.basename(item, ".json");
const view = read_json_file(item);
const to = path.join(config.dst, filename + ".html");