mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 04:20:35 -07:00
More bugs fixed, updated readme
This commit is contained in:
parent
7d7d6337e1
commit
298aee7226
39
Readme.md
39
Readme.md
@ -2,21 +2,22 @@
|
||||
|
||||
## Purpose
|
||||
|
||||
Generate regular expressions from natural language. Currently WIP, but should look something like this:
|
||||
Generate regular expressions from natural language.
|
||||
|
||||
Instead of a convoluted mess of symbols why not
|
||||
Instead of a convoluted mess of symbols like `/([\w\.=\-]*\w+)/g` why not
|
||||
|
||||
using global matching
|
||||
create a group called "capture_me"
|
||||
match 0+ words or "." or "=" or "-"
|
||||
create a group called capture_me
|
||||
match 0+ characters or "." or "=" or "-"
|
||||
match 1+ words
|
||||
|
||||
Is the former not much easier to read and bug fix than the latter?
|
||||
|
||||
Running the program should result in the following output:
|
||||
|
||||
Your regex = /\$([\w\.=\-]*[\w]+)/g
|
||||
"capture_me" is group id 1
|
||||
Your regex = /(?<capture_me>[\w\.\=\-]*\w++)/g
|
||||
|
||||
Is the former not much easier to read and bug fix than the latter?
|
||||
You can then use your regex in your language of choice, with Human2Regex validating your regex for you.
|
||||
|
||||
Another example
|
||||
|
||||
@ -61,16 +62,24 @@ Another example
|
||||
|
||||
Running the program should result in the following output:
|
||||
|
||||
Your regex = /^(https?:\/\/)?((\w\.)*)(:\d+)?([\w_\-]\.\w)((/[\w_\-]))?(\?([\w_\-]=[\w_\-]))?(#.*)$/g
|
||||
"protocol" is group id 1
|
||||
"subdomain" is group id 2
|
||||
"domain" is group id 4
|
||||
"path" is group id 5
|
||||
"query" is group id 5 or 6 if "path" exists
|
||||
Your regex = /^(?<protocol>https?\:\/\/)?(?<subdomain>(\w+\.)*)?(?<domain>(?:\w+|_|\-)+\.\w+)\:?\d*(?<path>(\/(?:\w+|_|\-)*)*)?(\?(?<query>((?:\w+|_|\-)+\=(?:\w+|_|\-)+)*))?(#.*)?$/g
|
||||
|
||||
Which one would you rather debug?
|
||||
|
||||
## Usage
|
||||
Configure config.ts
|
||||
Run
|
||||
Build
|
||||
|
||||
npm run build
|
||||
|
||||
Run
|
||||
|
||||
point web browser to: docs/index.html
|
||||
|
||||
Test
|
||||
|
||||
npm t
|
||||
|
||||
|
||||
## Todo
|
||||
- Seperate website and source code. Move to yarn/npm
|
||||
- Add more regex options such as back references, subroutines, lookahead/behind, and more character classes (eg, `[:alpha:]`)
|
12
docs/bundle.min.js
vendored
12
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
2310
package-lock.json
generated
2310
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
14
package.json
14
package.json
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "human2regex",
|
||||
"version": "0.9.0",
|
||||
"version": "0.9.5",
|
||||
"description": "Humanized Regular Expressions",
|
||||
"main": "bundle.min.js",
|
||||
"devDependencies": {
|
||||
@ -8,20 +8,20 @@
|
||||
"@types/html-minifier": "^3.5.3",
|
||||
"@types/jest": "^26.0.15",
|
||||
"@types/mustache": "^4.0.1",
|
||||
"@typescript-eslint/eslint-plugin": "^4.4.0",
|
||||
"@typescript-eslint/parser": "^4.4.0",
|
||||
"@typescript-eslint/eslint-plugin": "^4.6.1",
|
||||
"@typescript-eslint/parser": "^4.6.1",
|
||||
"before-build-webpack": "^0.2.9",
|
||||
"copy-webpack-plugin": "^6.2.1",
|
||||
"copy-webpack-plugin": "^6.3.0",
|
||||
"css-loader": "^4.3.0",
|
||||
"eslint": "^7.11.0",
|
||||
"glob": "^7.1.6",
|
||||
"html-minifier": "^4.0.0",
|
||||
"jest": "^26.6.1",
|
||||
"jest": "^26.6.3",
|
||||
"mini-css-extract-plugin": "^1.0.0",
|
||||
"mustache": "^4.0.1",
|
||||
"optimize-css-assets-webpack-plugin": "^5.0.4",
|
||||
"ts-jest": "^26.4.3",
|
||||
"ts-loader": "^8.0.4",
|
||||
"ts-loader": "^8.0.9",
|
||||
"ts-node": "^9.0.0",
|
||||
"typescript": "^4.0.5",
|
||||
"webpack": "^4.44.2",
|
||||
@ -37,7 +37,7 @@
|
||||
"author": "Patrick Demian",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"chevrotain": "^7.0.2",
|
||||
"chevrotain": "^7.0.3",
|
||||
"codemirror": "^5.58.2"
|
||||
},
|
||||
"repository": {
|
||||
|
@ -334,9 +334,13 @@ export class MatchSubStatementCST extends H2RCST {
|
||||
let ret = "";
|
||||
|
||||
let require_grouping = false;
|
||||
let dont_clobber_plus = false;
|
||||
|
||||
if (str.length === 1) {
|
||||
ret = str[0];
|
||||
if (ret.endsWith("+")) {
|
||||
dont_clobber_plus = true;
|
||||
}
|
||||
}
|
||||
// we can use regex's [] for single chars, otherwise we need a group
|
||||
else if (str.every(isSingleRegexCharacter)) {
|
||||
@ -349,10 +353,36 @@ export class MatchSubStatementCST extends H2RCST {
|
||||
}
|
||||
|
||||
if (this.count) {
|
||||
if (require_grouping) {
|
||||
ret = "(?:" + ret + ")";
|
||||
if (dont_clobber_plus) {
|
||||
const clobber = this.count.toRegex(language);
|
||||
|
||||
// + can be ignored as well as a count as long as that count is > 0
|
||||
switch (clobber) {
|
||||
case "*":
|
||||
case "?":
|
||||
ret = "(?:" + ret + ")" + clobber;
|
||||
break;
|
||||
case "+":
|
||||
// ignore
|
||||
break;
|
||||
default:
|
||||
if (clobber.startsWith("{0")) {
|
||||
ret = "(?:" + ret + ")" + clobber;
|
||||
}
|
||||
else {
|
||||
// remove + and replace with count
|
||||
ret.substring(0, ret.length - 1) + clobber;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (require_grouping) {
|
||||
ret = "(?:" + ret + ")";
|
||||
}
|
||||
|
||||
ret += this.count.toRegex(language);
|
||||
}
|
||||
ret += this.count.toRegex(language);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -27,8 +27,8 @@ import { createToken, Lexer } from "chevrotain";
|
||||
/** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
|
||||
/** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
|
||||
/** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
|
||||
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
|
||||
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
|
||||
/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /(character|letter)s?/i});
|
||||
/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)s?/i});
|
||||
/** @internal */ export const Boundary = createToken({name: "BoundarySpecifier", pattern: /(word )boundary/i});
|
||||
/** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
|
||||
/** @internal */ export const Unicode = createToken({name: "UnicodeSpecifier", pattern: /unicode( class)?/i});
|
||||
|
@ -32,6 +32,11 @@ describe("Generator functionality", function() {
|
||||
const reg1 = parser.parse();
|
||||
expect(reg1.validate(RegexDialect.JS).length).toBe(0);
|
||||
expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/");
|
||||
|
||||
parser.input = lexer.tokenize("match 1+ words").tokens;
|
||||
const reg2 = parser.parse();
|
||||
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
|
||||
expect(reg2.toRegex(RegexDialect.JS)).toBe("/\\w+/"); // used to generate w++. make sure not to regress
|
||||
});
|
||||
|
||||
it("validates invalid regexes", function() {
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/naming-convention */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable no-undef */
|
||||
const path = require("path");
|
||||
@ -29,7 +31,6 @@ const config = {
|
||||
|
||||
|
||||
function build_mustache() {
|
||||
|
||||
if (!existsSync(config.dst)){
|
||||
mkdirSync(config.dst);
|
||||
}
|
||||
@ -48,7 +49,7 @@ function build_mustache() {
|
||||
};
|
||||
|
||||
// build main mustache files
|
||||
for(const item of files) {
|
||||
for (const item of files) {
|
||||
const filename = path.basename(item, ".json");
|
||||
const view = read_json_file(item);
|
||||
const to = path.join(config.dst, filename + ".html");
|
||||
|
Loading…
x
Reference in New Issue
Block a user