mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 20:40:08 -07:00
More bugs fixed, updated readme
This commit is contained in:
parent
7d7d6337e1
commit
298aee7226
39
Readme.md
39
Readme.md
@ -2,21 +2,22 @@
|
|||||||
|
|
||||||
## Purpose
|
## Purpose
|
||||||
|
|
||||||
Generate regular expressions from natural language. Currently WIP, but should look something like this:
|
Generate regular expressions from natural language.
|
||||||
|
|
||||||
Instead of a convoluted mess of symbols why not
|
Instead of a convoluted mess of symbols like `/([\w\.=\-]*\w+)/g` why not
|
||||||
|
|
||||||
using global matching
|
using global matching
|
||||||
create a group called "capture_me"
|
create a group called capture_me
|
||||||
match 0+ words or "." or "=" or "-"
|
match 0+ characters or "." or "=" or "-"
|
||||||
match 1+ words
|
match 1+ words
|
||||||
|
|
||||||
|
Is the former not much easier to read and bug fix than the latter?
|
||||||
|
|
||||||
Running the program should result in the following output:
|
Running the program should result in the following output:
|
||||||
|
|
||||||
Your regex = /\$([\w\.=\-]*[\w]+)/g
|
Your regex = /(?<capture_me>[\w\.\=\-]*\w++)/g
|
||||||
"capture_me" is group id 1
|
|
||||||
|
|
||||||
Is the former not much easier to read and bug fix than the latter?
|
You can then use your regex in your language of choice, with Human2Regex validating your regex for you.
|
||||||
|
|
||||||
Another example
|
Another example
|
||||||
|
|
||||||
@ -61,16 +62,24 @@ Another example
|
|||||||
|
|
||||||
Running the program should result in the following output:
|
Running the program should result in the following output:
|
||||||
|
|
||||||
Your regex = /^(https?:\/\/)?((\w\.)*)(:\d+)?([\w_\-]\.\w)((/[\w_\-]))?(\?([\w_\-]=[\w_\-]))?(#.*)$/g
|
Your regex = /^(?<protocol>https?\:\/\/)?(?<subdomain>(\w+\.)*)?(?<domain>(?:\w+|_|\-)+\.\w+)\:?\d*(?<path>(\/(?:\w+|_|\-)*)*)?(\?(?<query>((?:\w+|_|\-)+\=(?:\w+|_|\-)+)*))?(#.*)?$/g
|
||||||
"protocol" is group id 1
|
|
||||||
"subdomain" is group id 2
|
Which one would you rather debug?
|
||||||
"domain" is group id 4
|
|
||||||
"path" is group id 5
|
|
||||||
"query" is group id 5 or 6 if "path" exists
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
Configure config.ts
|
Build
|
||||||
Run
|
|
||||||
|
|
||||||
npm run build
|
npm run build
|
||||||
|
|
||||||
|
Run
|
||||||
|
|
||||||
|
point web browser to: docs/index.html
|
||||||
|
|
||||||
|
Test
|
||||||
|
|
||||||
|
npm t
|
||||||
|
|
||||||
|
|
||||||
|
## Todo
|
||||||
|
- Seperate website and source code. Move to yarn/npm
|
||||||
|
- Add more regex options such as back references, subroutines, lookahead/behind, and more character classes (eg, `[:alpha:]`)
|
12
docs/bundle.min.js
vendored
12
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
2310
package-lock.json
generated
2310
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
14
package.json
14
package.json
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "human2regex",
|
"name": "human2regex",
|
||||||
"version": "0.9.0",
|
"version": "0.9.5",
|
||||||
"description": "Humanized Regular Expressions",
|
"description": "Humanized Regular Expressions",
|
||||||
"main": "bundle.min.js",
|
"main": "bundle.min.js",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@ -8,20 +8,20 @@
|
|||||||
"@types/html-minifier": "^3.5.3",
|
"@types/html-minifier": "^3.5.3",
|
||||||
"@types/jest": "^26.0.15",
|
"@types/jest": "^26.0.15",
|
||||||
"@types/mustache": "^4.0.1",
|
"@types/mustache": "^4.0.1",
|
||||||
"@typescript-eslint/eslint-plugin": "^4.4.0",
|
"@typescript-eslint/eslint-plugin": "^4.6.1",
|
||||||
"@typescript-eslint/parser": "^4.4.0",
|
"@typescript-eslint/parser": "^4.6.1",
|
||||||
"before-build-webpack": "^0.2.9",
|
"before-build-webpack": "^0.2.9",
|
||||||
"copy-webpack-plugin": "^6.2.1",
|
"copy-webpack-plugin": "^6.3.0",
|
||||||
"css-loader": "^4.3.0",
|
"css-loader": "^4.3.0",
|
||||||
"eslint": "^7.11.0",
|
"eslint": "^7.11.0",
|
||||||
"glob": "^7.1.6",
|
"glob": "^7.1.6",
|
||||||
"html-minifier": "^4.0.0",
|
"html-minifier": "^4.0.0",
|
||||||
"jest": "^26.6.1",
|
"jest": "^26.6.3",
|
||||||
"mini-css-extract-plugin": "^1.0.0",
|
"mini-css-extract-plugin": "^1.0.0",
|
||||||
"mustache": "^4.0.1",
|
"mustache": "^4.0.1",
|
||||||
"optimize-css-assets-webpack-plugin": "^5.0.4",
|
"optimize-css-assets-webpack-plugin": "^5.0.4",
|
||||||
"ts-jest": "^26.4.3",
|
"ts-jest": "^26.4.3",
|
||||||
"ts-loader": "^8.0.4",
|
"ts-loader": "^8.0.9",
|
||||||
"ts-node": "^9.0.0",
|
"ts-node": "^9.0.0",
|
||||||
"typescript": "^4.0.5",
|
"typescript": "^4.0.5",
|
||||||
"webpack": "^4.44.2",
|
"webpack": "^4.44.2",
|
||||||
@ -37,7 +37,7 @@
|
|||||||
"author": "Patrick Demian",
|
"author": "Patrick Demian",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"chevrotain": "^7.0.2",
|
"chevrotain": "^7.0.3",
|
||||||
"codemirror": "^5.58.2"
|
"codemirror": "^5.58.2"
|
||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
|
@ -334,9 +334,13 @@ export class MatchSubStatementCST extends H2RCST {
|
|||||||
let ret = "";
|
let ret = "";
|
||||||
|
|
||||||
let require_grouping = false;
|
let require_grouping = false;
|
||||||
|
let dont_clobber_plus = false;
|
||||||
|
|
||||||
if (str.length === 1) {
|
if (str.length === 1) {
|
||||||
ret = str[0];
|
ret = str[0];
|
||||||
|
if (ret.endsWith("+")) {
|
||||||
|
dont_clobber_plus = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// we can use regex's [] for single chars, otherwise we need a group
|
// we can use regex's [] for single chars, otherwise we need a group
|
||||||
else if (str.every(isSingleRegexCharacter)) {
|
else if (str.every(isSingleRegexCharacter)) {
|
||||||
@ -349,11 +353,37 @@ export class MatchSubStatementCST extends H2RCST {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
|
if (dont_clobber_plus) {
|
||||||
|
const clobber = this.count.toRegex(language);
|
||||||
|
|
||||||
|
// + can be ignored as well as a count as long as that count is > 0
|
||||||
|
switch (clobber) {
|
||||||
|
case "*":
|
||||||
|
case "?":
|
||||||
|
ret = "(?:" + ret + ")" + clobber;
|
||||||
|
break;
|
||||||
|
case "+":
|
||||||
|
// ignore
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (clobber.startsWith("{0")) {
|
||||||
|
ret = "(?:" + ret + ")" + clobber;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// remove + and replace with count
|
||||||
|
ret.substring(0, ret.length - 1) + clobber;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
if (require_grouping) {
|
if (require_grouping) {
|
||||||
ret = "(?:" + ret + ")";
|
ret = "(?:" + ret + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
ret += this.count.toRegex(language);
|
ret += this.count.toRegex(language);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -27,8 +27,8 @@ import { createToken, Lexer } from "chevrotain";
|
|||||||
/** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
|
/** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
|
||||||
/** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
|
/** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
|
||||||
/** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
|
/** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
|
||||||
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
|
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /(character|letter)s?/i});
|
||||||
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
|
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)s?/i});
|
||||||
/** @internal */ export const Boundary = createToken({name: "BoundarySpecifier", pattern: /(word )boundary/i});
|
/** @internal */ export const Boundary = createToken({name: "BoundarySpecifier", pattern: /(word )boundary/i});
|
||||||
/** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
|
/** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
|
||||||
/** @internal */ export const Unicode = createToken({name: "UnicodeSpecifier", pattern: /unicode( class)?/i});
|
/** @internal */ export const Unicode = createToken({name: "UnicodeSpecifier", pattern: /unicode( class)?/i});
|
||||||
|
@ -32,6 +32,11 @@ describe("Generator functionality", function() {
|
|||||||
const reg1 = parser.parse();
|
const reg1 = parser.parse();
|
||||||
expect(reg1.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg1.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/");
|
expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/");
|
||||||
|
|
||||||
|
parser.input = lexer.tokenize("match 1+ words").tokens;
|
||||||
|
const reg2 = parser.parse();
|
||||||
|
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg2.toRegex(RegexDialect.JS)).toBe("/\\w+/"); // used to generate w++. make sure not to regress
|
||||||
});
|
});
|
||||||
|
|
||||||
it("validates invalid regexes", function() {
|
it("validates invalid regexes", function() {
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||||
|
/* eslint-disable @typescript-eslint/naming-convention */
|
||||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||||
/* eslint-disable no-undef */
|
/* eslint-disable no-undef */
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
@ -29,7 +31,6 @@ const config = {
|
|||||||
|
|
||||||
|
|
||||||
function build_mustache() {
|
function build_mustache() {
|
||||||
|
|
||||||
if (!existsSync(config.dst)){
|
if (!existsSync(config.dst)){
|
||||||
mkdirSync(config.dst);
|
mkdirSync(config.dst);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user