More bugs fixed, updated readme

2025-05-16 12:30:09 -07:00 · 2020-11-06 16:06:27 -05:00 · 2020-11-06 16:06:27 -05:00 · 298aee7226
commit 298aee7226
parent 7d7d6337e1
8 changed files with 1899 additions and 526 deletions
--- a/Readme.md
+++ b/Readme.md
@ -2,21 +2,22 @@
 ## Purpose
-Generate regular expressions from natural language. Currently WIP, but should look something like this:
+Generate regular expressions from natural language.
-Instead of a convoluted mess of symbols why not
+Instead of a convoluted mess of symbols like `/([\w\.=\-]*\w+)/g` why not
    using global matching
-    create a group called "capture_me" 
+    create a group called capture_me
-        match 0+ words or "." or "=" or "-"
+        match 0+ characters or "." or "=" or "-"
        match 1+ words
 Is the former not much easier to read and bug fix than the latter?
 Running the program should result in the following output:
-    Your regex = /\$([\w\.=\-]*[\w]+)/g
+    Your regex = /(?<capture_me>[\w\.\=\-]*\w++)/g
    "capture_me" is group id 1
-Is the former not much easier to read and bug fix than the latter?
+You can then use your regex in your language of choice, with Human2Regex validating your regex for you.
 Another example
@ -61,16 +62,24 @@ Another example
 Running the program should result in the following output:
-    Your regex = /^(https?:\/\/)?((\w\.)*)(:\d+)?([\w_\-]\.\w)((/[\w_\-]))?(\?([\w_\-]=[\w_\-]))?(#.*)$/g
+    Your regex = /^(?<protocol>https?\:\/\/)?(?<subdomain>(\w+\.)*)?(?<domain>(?:\w+|_|\-)+\.\w+)\:?\d*(?<path>(\/(?:\w+|_|\-)*)*)?(\?(?<query>((?:\w+|_|\-)+\=(?:\w+|_|\-)+)*))?(#.*)?$/g
-    "protocol" is group id 1
+
-    "subdomain" is group id 2
+Which one would you rather debug?
    "domain" is group id 4
    "path" is group id 5
    "query" is group id 5 or 6 if "path" exists
 ## Usage
-Configure config.ts
+Build
 Run
    npm run build
 Run
    point web browser to: docs/index.html
 Test
    npm t
 ## Todo
 - Seperate website and source code. Move to yarn/npm
 - Add more regex options such as back references, subroutines, lookahead/behind, and more character classes (eg,  `[:alpha:]`)
--- a/docs/bundle.min.js
+++ b/docs/bundle.min.js
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "human2regex",
-  "version": "0.9.0",
+  "version": "0.9.5",
  "description": "Humanized Regular Expressions",
  "main": "bundle.min.js",
  "devDependencies": {
@ -8,20 +8,20 @@
    "@types/html-minifier": "^3.5.3",
    "@types/jest": "^26.0.15",
    "@types/mustache": "^4.0.1",
-    "@typescript-eslint/eslint-plugin": "^4.4.0",
+    "@typescript-eslint/eslint-plugin": "^4.6.1",
-    "@typescript-eslint/parser": "^4.4.0",
+    "@typescript-eslint/parser": "^4.6.1",
    "before-build-webpack": "^0.2.9",
-    "copy-webpack-plugin": "^6.2.1",
+    "copy-webpack-plugin": "^6.3.0",
    "css-loader": "^4.3.0",
    "eslint": "^7.11.0",
    "glob": "^7.1.6",
    "html-minifier": "^4.0.0",
-    "jest": "^26.6.1",
+    "jest": "^26.6.3",
    "mini-css-extract-plugin": "^1.0.0",
    "mustache": "^4.0.1",
    "optimize-css-assets-webpack-plugin": "^5.0.4",
    "ts-jest": "^26.4.3",
-    "ts-loader": "^8.0.4",
+    "ts-loader": "^8.0.9",
    "ts-node": "^9.0.0",
    "typescript": "^4.0.5",
    "webpack": "^4.44.2",
@ -37,7 +37,7 @@
  "author": "Patrick Demian",
  "license": "MIT",
  "dependencies": {
-    "chevrotain": "^7.0.2",
+    "chevrotain": "^7.0.3",
    "codemirror": "^5.58.2"
  },
  "repository": {
--- a/src/generator.ts
+++ b/src/generator.ts
@ -334,9 +334,13 @@ export class MatchSubStatementCST extends H2RCST {
        let ret = "";
        let require_grouping = false;
        let dont_clobber_plus = false;
        if (str.length === 1) {
            ret = str[0];
            if (ret.endsWith("+")) {
                dont_clobber_plus = true;
            }
        }
        // we can use regex's [] for single chars, otherwise we need a group
        else if (str.every(isSingleRegexCharacter)) {
@ -349,10 +353,36 @@ export class MatchSubStatementCST extends H2RCST {
        }
        if (this.count) {
-            if (require_grouping) {
+            if (dont_clobber_plus) {
-                ret = "(?:" + ret + ")";
+                const clobber = this.count.toRegex(language);
                // + can be ignored as well as a count as long as that count is > 0
                switch (clobber) {
                    case "*":
                    case "?":
                        ret = "(?:" + ret + ")" + clobber;
                        break;
                    case "+":
                        // ignore
                        break;
                    default:
                        if (clobber.startsWith("{0")) {
                            ret = "(?:" + ret + ")" + clobber;
                        }
                        else {
                            // remove + and replace with count
                            ret.substring(0, ret.length - 1) + clobber;
                        }
                        break;
                }
            }
            else {
                if (require_grouping) {
                    ret = "(?:" + ret + ")";
                }
                ret += this.count.toRegex(language);
            }
            ret += this.count.toRegex(language);
        }
        return ret;
--- a/src/tokens.ts
+++ b/src/tokens.ts
@ -27,8 +27,8 @@ import { createToken, Lexer } from "chevrotain";
 /** @internal */ export const And = createToken({name: "And", pattern: /and|,/i});
 /** @internal */ export const Word = createToken({name: "WordSpecifier", pattern: /word(s)?/i});
 /** @internal */ export const Digit = createToken({name: "DigitSpecifier", pattern: /digit(s)?/i});
-/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /character(s)?/i});
+/** @internal */ export const Character = createToken({name: "CharacterSpecifier", pattern: /(character|letter)s?/i});
-/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)(s)?/i});
+/** @internal */ export const Whitespace = createToken({name: "WhitespaceSpecifier", pattern: /(white space|whitespace)s?/i});
 /** @internal */ export const Boundary = createToken({name: "BoundarySpecifier", pattern: /(word )boundary/i});
 /** @internal */ export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
 /** @internal */ export const Unicode = createToken({name: "UnicodeSpecifier", pattern: /unicode( class)?/i});
--- a/tests/generator.spec.ts
+++ b/tests/generator.spec.ts
@ -32,6 +32,11 @@ describe("Generator functionality", function() {
        const reg1 = parser.parse();
        expect(reg1.validate(RegexDialect.JS).length).toBe(0);
        expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/");
        parser.input = lexer.tokenize("match 1+ words").tokens;
        const reg2 = parser.parse();
        expect(reg2.validate(RegexDialect.JS).length).toBe(0);
        expect(reg2.toRegex(RegexDialect.JS)).toBe("/\\w+/"); // used to generate w++. make sure not to regress
    });
    it("validates invalid regexes", function() {
--- a/webpack.config.js
+++ b/webpack.config.js
@ -1,3 +1,5 @@
 /* eslint-disable @typescript-eslint/explicit-function-return-type */
 /* eslint-disable @typescript-eslint/naming-convention */
 /* eslint-disable @typescript-eslint/no-var-requires */
 /* eslint-disable no-undef */
 const path = require("path");
@ -29,7 +31,6 @@ const config = {
 function build_mustache() {
 	if (!existsSync(config.dst)){
 		mkdirSync(config.dst);
 	}
@ -48,7 +49,7 @@ function build_mustache() {
    };
    // build main mustache files
-    for(const item of files) {
+    for (const item of files) {
        const filename = path.basename(item, ".json");
        const view = read_json_file(item);
        const to = path.join(config.dst, filename + ".html");