1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 12:30:09 -07:00

More bugs fixed, updated readme

This commit is contained in:
Patrick Demian 2020-11-06 16:06:27 -05:00
parent 7d7d6337e1
commit 298aee7226
8 changed files with 1899 additions and 526 deletions

View File

@ -2,21 +2,22 @@
## Purpose ## Purpose
Generate regular expressions from natural language. Currently WIP, but should look something like this: Generate regular expressions from natural language.
Instead of a convoluted mess of symbols why not Instead of a convoluted mess of symbols like `/([\w\.=\-]*\w+)/g` why not
using global matching using global matching
create a group called "capture_me" create a group called capture_me
match 0+ words or "." or "=" or "-" match 0+ characters or "." or "=" or "-"
match 1+ words match 1+ words
Is the former not much easier to read and bug fix than the latter?
Running the program should result in the following output: Running the program should result in the following output:
Your regex = /\$([\w\.=\-]*[\w]+)/g Your regex = /(?<capture_me>[\w\.\=\-]*\w++)/g
"capture_me" is group id 1
Is the former not much easier to read and bug fix than the latter? You can then use your regex in your language of choice, with Human2Regex validating your regex for you.
Another example Another example
@ -61,16 +62,24 @@ Another example
Running the program should result in the following output: Running the program should result in the following output:
Your regex = /^(https?:\/\/)?((\w\.)*)(:\d+)?([\w_\-]\.\w)((/[\w_\-]))?(\?([\w_\-]=[\w_\-]))?(#.*)$/g Your regex = /^(?<protocol>https?\:\/\/)?(?<subdomain>(\w+\.)*)?(?<domain>(?:\w+|_|\-)+\.\w+)\:?\d*(?<path>(\/(?:\w+|_|\-)*)*)?(\?(?<query>((?:\w+|_|\-)+\=(?:\w+|_|\-)+)*))?(#.*)?$/g
"protocol" is group id 1
"subdomain" is group id 2 Which one would you rather debug?
"domain" is group id 4
"path" is group id 5
"query" is group id 5 or 6 if "path" exists
## Usage ## Usage
Configure config.ts Build
Run
npm run build npm run build
Run
point web browser to: docs/index.html
Test
npm t
## Todo
- Seperate website and source code. Move to yarn/npm
- Add more regex options such as back references, subroutines, lookahead/behind, and more character classes (eg, `[:alpha:]`)

12
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

2310
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"name": "human2regex", "name": "human2regex",
"version": "0.9.0", "version": "0.9.5",
"description": "Humanized Regular Expressions", "description": "Humanized Regular Expressions",
"main": "bundle.min.js", "main": "bundle.min.js",
"devDependencies": { "devDependencies": {
@ -8,20 +8,20 @@
"@types/html-minifier": "^3.5.3", "@types/html-minifier": "^3.5.3",
"@types/jest": "^26.0.15", "@types/jest": "^26.0.15",
"@types/mustache": "^4.0.1", "@types/mustache": "^4.0.1",
"@typescript-eslint/eslint-plugin": "^4.4.0", "@typescript-eslint/eslint-plugin": "^4.6.1",
"@typescript-eslint/parser": "^4.4.0", "@typescript-eslint/parser": "^4.6.1",
"before-build-webpack": "^0.2.9", "before-build-webpack": "^0.2.9",
"copy-webpack-plugin": "^6.2.1", "copy-webpack-plugin": "^6.3.0",
"css-loader": "^4.3.0", "css-loader": "^4.3.0",
"eslint": "^7.11.0", "eslint": "^7.11.0",
"glob": "^7.1.6", "glob": "^7.1.6",
"html-minifier": "^4.0.0", "html-minifier": "^4.0.0",
"jest": "^26.6.1", "jest": "^26.6.3",
"mini-css-extract-plugin": "^1.0.0", "mini-css-extract-plugin": "^1.0.0",
"mustache": "^4.0.1", "mustache": "^4.0.1",
"optimize-css-assets-webpack-plugin": "^5.0.4", "optimize-css-assets-webpack-plugin": "^5.0.4",
"ts-jest": "^26.4.3", "ts-jest": "^26.4.3",
"ts-loader": "^8.0.4", "ts-loader": "^8.0.9",
"ts-node": "^9.0.0", "ts-node": "^9.0.0",
"typescript": "^4.0.5", "typescript": "^4.0.5",
"webpack": "^4.44.2", "webpack": "^4.44.2",
@ -37,7 +37,7 @@
"author": "Patrick Demian", "author": "Patrick Demian",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"chevrotain": "^7.0.2", "chevrotain": "^7.0.3",
"codemirror": "^5.58.2" "codemirror": "^5.58.2"
}, },
"repository": { "repository": {

View File

@ -334,9 +334,13 @@ export class MatchSubStatementCST extends H2RCST {
let ret = ""; let ret = "";
let require_grouping = false; let require_grouping = false;
let dont_clobber_plus = false;
if (str.length === 1) { if (str.length === 1) {
ret = str[0]; ret = str[0];
if (ret.endsWith("+")) {
dont_clobber_plus = true;
}
} }
// we can use regex's [] for single chars, otherwise we need a group // we can use regex's [] for single chars, otherwise we need a group
else if (str.every(isSingleRegexCharacter)) { else if (str.every(isSingleRegexCharacter)) {
@ -349,10 +353,36 @@ export class MatchSubStatementCST extends H2RCST {
} }
if (this.count) { if (this.count) {
if (require_grouping) { if (dont_clobber_plus) {
ret = "(?:" + ret + ")"; const clobber = this.count.toRegex(language);
// + can be ignored as well as a count as long as that count is > 0
switch (clobber) {
case "*":
case "?":
ret = "(?:" + ret + ")" + clobber;
break;
case "+":
// ignore
break;
default:
if (clobber.startsWith("{0")) {
ret = "(?:" + ret + ")" + clobber;
}
else {
// remove + and replace with count
ret.substring(0, ret.length - 1) + clobber;
}
break;
}
}
else {
if (require_grouping) {
ret = "(?:" + ret + ")";
}
ret += this.count.toRegex(language);
} }
ret += this.count.toRegex(language);
} }
return ret; return ret;

View File

@ -27,8 +27,8 @@ import { createToken, Lexer } from "chevrotain";
/** @internal */ export const And = createToken({name: "And", pattern: /and|,/i}); /** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
/** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i}); /** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
/** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i}); /** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i}); /** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /(character|letter)s?/i});
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i}); /** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)s?/i});
/** @internal */ export const Boundary = createToken({name: "BoundarySpecifier", pattern: /(word )boundary/i}); /** @internal */ export const Boundary = createToken({name: "BoundarySpecifier", pattern: /(word )boundary/i});
/** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i}); /** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
/** @internal */ export const Unicode = createToken({name: "UnicodeSpecifier", pattern: /unicode( class)?/i}); /** @internal */ export const Unicode = createToken({name: "UnicodeSpecifier", pattern: /unicode( class)?/i});

View File

@ -32,6 +32,11 @@ describe("Generator functionality", function() {
const reg1 = parser.parse(); const reg1 = parser.parse();
expect(reg1.validate(RegexDialect.JS).length).toBe(0); expect(reg1.validate(RegexDialect.JS).length).toBe(0);
expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/"); expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/");
parser.input = lexer.tokenize("match 1+ words").tokens;
const reg2 = parser.parse();
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
expect(reg2.toRegex(RegexDialect.JS)).toBe("/\\w+/"); // used to generate w++. make sure not to regress
}); });
it("validates invalid regexes", function() { it("validates invalid regexes", function() {

View File

@ -1,3 +1,5 @@
/* eslint-disable @typescript-eslint/explicit-function-return-type */
/* eslint-disable @typescript-eslint/naming-convention */
/* eslint-disable @typescript-eslint/no-var-requires */ /* eslint-disable @typescript-eslint/no-var-requires */
/* eslint-disable no-undef */ /* eslint-disable no-undef */
const path = require("path"); const path = require("path");
@ -29,7 +31,6 @@ const config = {
function build_mustache() { function build_mustache() {
if (!existsSync(config.dst)){ if (!existsSync(config.dst)){
mkdirSync(config.dst); mkdirSync(config.dst);
} }
@ -48,7 +49,7 @@ function build_mustache() {
}; };
// build main mustache files // build main mustache files
for(const item of files) { for (const item of files) {
const filename = path.basename(item, ".json"); const filename = path.basename(item, ".json");
const view = read_json_file(item); const view = read_json_file(item);
const to = path.join(config.dst, filename + ".html"); const to = path.join(config.dst, filename + ".html");