1
0
mirror of https://github.com/pdemian/human2regex.git synced 2025-05-16 04:20:35 -07:00

Added boost support, better code quality

This commit is contained in:
Patrick Demian 2020-11-18 01:55:36 -05:00
parent cdead0834b
commit 11275a9fba
14 changed files with 98 additions and 39 deletions

3
API.md
View File

@ -127,7 +127,8 @@ export enum RegexDialect {
PCRE, PCRE,
DotNet, DotNet,
Java, Java,
Python Python,
Boost
} }
``` ```

6
docs/bundle.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -1,4 +1,4 @@
<!DOCTYPE html><html lang="en" dir="ltr"><head><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"><meta name="description" content="Create regular expressions with natural, human language"><meta name="keywords" content="Human2Regex, Human, Regex, Natural, Language, Natural Language"><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Human2Regex</title><link href="bundle.min.css" rel="stylesheet" type="text/css"><meta name="theme-color" content="#212529"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="default"><link rel="icon" type="image/x-icon" href="favicon.ico"></head><body><a class="skip skip-top" href="#maincontent">Skip to main content</a><div class="wrapper"><nav class="navbar navbar-expand-lg navbar-light fixed-top" id="mainNav"><div class="container"><a class="navbar-brand" href="index.html"><img src="favicon-small.png" width="30" height="30" class="d-inline-block align-top" alt="logo">&nbsp;Human2Regex</a><div class="float-right heading-links"><a class="heading-link" href="index.html">Index</a> <span>&nbsp;|&nbsp;</span> <a class="heading-link" href="tutorial.html">Tutorial</a></div></div></nav><div class="container" id="maincontent" role="main"><div class="row"><div class="col-lg-8 tenpx-margin-bottom"><div class="form-group row zero-margin-bottom"><label for="dialect" class="col-sm-4 col-form-label">Regex dialect:</label><div class="col-sm-8"><select class="form-control" id="dialect"><option value="js" selected="selected">Javascript</option><option value="dotnet">.NET</option><option value="python">Python</option><option value="java">Java 7+</option><option value="pcre">PCRE</option></select></div></div><h4>Your Regular Expression:</h4><div class="row"><div class="col-xl-11 tenpx-margin-bottom"><input readonly="readonly" class="form-control" id="regex"></div><div class="col-xl-1"><button type="button" class="btn btn-secondary float-right" id="clip">Copy</button></div></div><h4>Human Speak:</h4><textarea class="form-control" id="human" rows="25"> <!DOCTYPE html><html lang="en" dir="ltr"><head><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"><meta name="description" content="Create regular expressions with natural, human language"><meta name="keywords" content="Human2Regex, Human, Regex, Natural, Language, Natural Language"><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Human2Regex</title><link href="bundle.min.css" rel="stylesheet" type="text/css"><meta name="theme-color" content="#212529"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="default"><link rel="icon" type="image/x-icon" href="favicon.ico"></head><body><a class="skip skip-top" href="#maincontent">Skip to main content</a><div class="wrapper"><nav class="navbar navbar-expand-lg navbar-light fixed-top" id="mainNav"><div class="container"><a class="navbar-brand" href="index.html"><img src="favicon-small.png" width="30" height="30" class="d-inline-block align-top" alt="logo">&nbsp;Human2Regex</a><div class="float-right heading-links"><a class="heading-link" href="index.html">Index</a> <span>&nbsp;|&nbsp;</span> <a class="heading-link" href="tutorial.html">Tutorial</a></div></div></nav><div class="container" id="maincontent" role="main"><div class="row"><div class="col-lg-8 tenpx-margin-bottom"><div class="form-group row zero-margin-bottom"><label for="dialect" class="col-sm-4 col-form-label">Regex dialect:</label><div class="col-sm-8"><select class="form-control" id="dialect"><option value="js" selected="selected">Javascript</option><option value="dotnet">.NET</option><option value="python">Python</option><option value="boost">C++ Boost</option><option value="java">Java 7+</option><option value="pcre">PCRE</option></select></div></div><h4>Your Regular Expression:</h4><div class="row"><div class="col-xl-11 tenpx-margin-bottom"><input readonly="readonly" class="form-control" id="regex"></div><div class="col-xl-1"><button type="button" class="btn btn-secondary float-right" id="clip">Copy</button></div></div><h4>Human Speak:</h4><textarea class="form-control" id="human" rows="25">
/* Make a regex that matches (basic) URLs */ /* Make a regex that matches (basic) URLs */
using global and exact matching using global and exact matching

5
lib/generator.d.ts vendored
View File

@ -8,7 +8,8 @@ export declare enum RegexDialect {
PCRE = 1, PCRE = 1,
DotNet = 2, DotNet = 2,
Java = 3, Java = 3,
Python = 4 Python = 4,
Boost = 5
} }
/** /**
* Interface for all semantic errors * Interface for all semantic errors
@ -287,7 +288,7 @@ export declare class RegularExpressionCST extends H2RCST {
/** /**
* Minimizes the match string by finding duplicates or substrings in the array * Minimizes the match string by finding duplicates or substrings in the array
* *
* @param arr the array * @param arr the array of matches
* @internal * @internal
*/ */
export declare function minimizeMatchString(arr: string[]): string; export declare function minimizeMatchString(arr: string[]): string;

View File

@ -17,6 +17,7 @@ var RegexDialect;
RegexDialect[RegexDialect["DotNet"] = 2] = "DotNet"; RegexDialect[RegexDialect["DotNet"] = 2] = "DotNet";
RegexDialect[RegexDialect["Java"] = 3] = "Java"; RegexDialect[RegexDialect["Java"] = 3] = "Java";
RegexDialect[RegexDialect["Python"] = 4] = "Python"; RegexDialect[RegexDialect["Python"] = 4] = "Python";
RegexDialect[RegexDialect["Boost"] = 5] = "Boost";
})(RegexDialect = exports.RegexDialect || (exports.RegexDialect = {})); })(RegexDialect = exports.RegexDialect || (exports.RegexDialect = {}));
const unicode_property_codes = [ const unicode_property_codes = [
"C", "Cc", "Cf", "Cn", "Co", "Cs", "C", "Cc", "Cf", "Cn", "Co", "Cs",
@ -196,9 +197,9 @@ class MatchSubStatementCST extends H2RCST {
this.values = values; this.values = values;
} }
validate(language) { validate(language) {
let errors = []; const errors = [];
if (this.count) { if (this.count) {
errors = errors.concat(this.count.validate(language)); utilities_1.append(errors, this.count.validate(language));
} }
for (const value of this.values) { for (const value of this.values) {
if (value.type === MatchSubStatementType.Between) { if (value.type === MatchSubStatementType.Between) {
@ -490,9 +491,9 @@ class MatchStatementCST extends StatementCST {
this.matches = matches; this.matches = matches;
} }
validate(language) { validate(language) {
let errors = []; const errors = [];
for (const match of this.matches) { for (const match of this.matches) {
errors = errors.concat(match.statement.validate(language)); utilities_1.append(errors, match.statement.validate(language));
} }
return errors; return errors;
} }
@ -545,12 +546,12 @@ class RepeatStatementCST extends StatementCST {
this.statements = statements; this.statements = statements;
} }
validate(language) { validate(language) {
let errors = []; const errors = [];
if (this.count !== null) { if (this.count !== null) {
errors = errors.concat(this.count.validate(language)); utilities_1.append(errors, this.count.validate(language));
} }
for (const statement of this.statements) { for (const statement of this.statements) {
errors = errors.concat(statement.validate(language)); utilities_1.append(errors, statement.validate(language));
} }
return errors; return errors;
} }
@ -595,13 +596,13 @@ class GroupStatementCST extends StatementCST {
this.statements = statements; this.statements = statements;
} }
validate(language) { validate(language) {
let errors = []; const errors = [];
// All languages currently support named groups // All languages currently support named groups
//if (false) { //if (false) {
// errors.push(this.error("This language does not support named groups")); // errors.push(this.error("This language does not support named groups"));
//} //}
for (const statement of this.statements) { for (const statement of this.statements) {
errors = errors.concat(statement.validate(language)); utilities_1.append(errors, statement.validate(language));
} }
return errors; return errors;
} }
@ -642,9 +643,9 @@ class RegularExpressionCST extends H2RCST {
this.statements = statements; this.statements = statements;
} }
validate(language) { validate(language) {
let errors = this.usings.validate(language); const errors = this.usings.validate(language);
for (const statement of this.statements) { for (const statement of this.statements) {
errors = errors.concat(statement.validate(language)); utilities_1.append(errors, statement.validate(language));
} }
return errors; return errors;
} }
@ -658,7 +659,7 @@ exports.RegularExpressionCST = RegularExpressionCST;
/** /**
* Minimizes the match string by finding duplicates or substrings in the array * Minimizes the match string by finding duplicates or substrings in the array
* *
* @param arr the array * @param arr the array of matches
* @internal * @internal
*/ */
function minimizeMatchString(arr) { function minimizeMatchString(arr) {

8
lib/utilities.d.ts vendored
View File

@ -107,6 +107,14 @@ export declare function removeQuotes(input: string): string;
* @internal * @internal
*/ */
export declare function regexEscape(input: string): string; export declare function regexEscape(input: string): string;
/**
* Append a list of arrays to an array
*
* @param array the array to append to
* @param arrays the list of arrays that you want to add to array
* @internal
*/
export declare function append<T>(array: T[], ...arrays: T[][]): void;
/** /**
* Common Error class that encapsulates information from the lexer, parser, and generator * Common Error class that encapsulates information from the lexer, parser, and generator
*/ */

View File

@ -1,7 +1,7 @@
"use strict"; "use strict";
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
Object.defineProperty(exports, "__esModule", { value: true }); Object.defineProperty(exports, "__esModule", { value: true });
exports.CommonError = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0; exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0;
/** /**
* The following section is used because the linter is set up to warn about certain operations * The following section is used because the linter is set up to warn about certain operations
* and for good reason! I'd much rather have these functions than accidently use bitwise operations, or * and for good reason! I'd much rather have these functions than accidently use bitwise operations, or
@ -149,6 +149,21 @@ function regexEscape(input) {
return input.replace(/([:\\\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1"); return input.replace(/([:\\\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
} }
exports.regexEscape = regexEscape; exports.regexEscape = regexEscape;
/**
* Append a list of arrays to an array
*
* @param array the array to append to
* @param arrays the list of arrays that you want to add to array
* @internal
*/
function append(array, ...arrays) {
for (const list of arrays) {
for (const item of list) {
array.push(item);
}
}
}
exports.append = append;
/** /**
* Common Error class that encapsulates information from the lexer, parser, and generator * Common Error class that encapsulates information from the lexer, parser, and generator
*/ */

View File

@ -1,6 +1,6 @@
{ {
"name": "human2regex", "name": "human2regex",
"version": "1.0.1", "version": "1.0.2",
"description": "Humanized Regular Expressions", "description": "Humanized Regular Expressions",
"main": "./lib/index.js", "main": "./lib/index.js",
"typings": "./lib/index.d.ts", "typings": "./lib/index.d.ts",

View File

@ -12,6 +12,7 @@
<option value="js" selected>Javascript</option> <option value="js" selected>Javascript</option>
<option value="dotnet">.NET</option> <option value="dotnet">.NET</option>
<option value="python">Python</option> <option value="python">Python</option>
<option value="boost">C++ Boost</option>
<option value="java">Java 7+</option> <option value="java">Java 7+</option>
<option value="pcre">PCRE</option> <option value="pcre">PCRE</option>
</select> </select>

View File

@ -5,7 +5,7 @@
* @packageDocumentation * @packageDocumentation
*/ */
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag } from "./utilities"; import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
import { IToken } from "chevrotain"; import { IToken } from "chevrotain";
/** /**
@ -16,7 +16,8 @@ export enum RegexDialect {
PCRE, PCRE,
DotNet, DotNet,
Java, Java,
Python Python,
Boost
} }
/** /**
@ -140,7 +141,6 @@ export enum UsingFlags {
* @remarks Word, Digit, Character, Whitespace, Number, Tab, Linefeed, Newline, and Carriage return are \w+, \d, \w, \s, \d+, \t, \n, \n, \r respectively * @remarks Word, Digit, Character, Whitespace, Number, Tab, Linefeed, Newline, and Carriage return are \w+, \d, \w, \s, \d+, \t, \n, \n, \r respectively
* @internal * @internal
*/ */
export enum MatchSubStatementType { export enum MatchSubStatementType {
SingleString, SingleString,
Between, Between,
@ -228,10 +228,10 @@ export class MatchSubStatementCST extends H2RCST {
} }
public validate(language: RegexDialect): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = []; const errors: ISemanticError[] = [];
if (this.count) { if (this.count) {
errors = errors.concat(this.count.validate(language)); append(errors, this.count.validate(language));
} }
for (const value of this.values) { for (const value of this.values) {
@ -555,10 +555,10 @@ export class MatchStatementCST extends StatementCST {
} }
public validate(language: RegexDialect): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = []; const errors: ISemanticError[] = [];
for (const match of this.matches) { for (const match of this.matches) {
errors = errors.concat(match.statement.validate(language)); append(errors, match.statement.validate(language));
} }
return errors; return errors;
@ -617,14 +617,14 @@ export class RepeatStatementCST extends StatementCST {
} }
public validate(language: RegexDialect): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = []; const errors: ISemanticError[] = [];
if (this.count !== null) { if (this.count !== null) {
errors = errors.concat(this.count.validate(language)); append(errors, this.count.validate(language));
} }
for (const statement of this.statements) { for (const statement of this.statements) {
errors = errors.concat(statement.validate(language)); append(errors, statement.validate(language));
} }
return errors; return errors;
@ -674,7 +674,7 @@ export class GroupStatementCST extends StatementCST {
} }
public validate(language: RegexDialect): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors : ISemanticError[] = []; const errors : ISemanticError[] = [];
// All languages currently support named groups // All languages currently support named groups
//if (false) { //if (false) {
@ -682,7 +682,7 @@ export class GroupStatementCST extends StatementCST {
//} //}
for (const statement of this.statements) { for (const statement of this.statements) {
errors = errors.concat(statement.validate(language)); append(errors, statement.validate(language));
} }
return errors; return errors;
@ -731,10 +731,10 @@ export class RegularExpressionCST extends H2RCST {
} }
public validate(language: RegexDialect): ISemanticError[] { public validate(language: RegexDialect): ISemanticError[] {
let errors: ISemanticError[] = this.usings.validate(language); const errors: ISemanticError[] = this.usings.validate(language);
for (const statement of this.statements) { for (const statement of this.statements) {
errors = errors.concat(statement.validate(language)); append(errors, statement.validate(language));
} }
return errors; return errors;
@ -747,12 +747,10 @@ export class RegularExpressionCST extends H2RCST {
} }
} }
/** /**
* Minimizes the match string by finding duplicates or substrings in the array * Minimizes the match string by finding duplicates or substrings in the array
* *
* @param arr the array * @param arr the array of matches
* @internal * @internal
*/ */
export function minimizeMatchString(arr: string[]): string { export function minimizeMatchString(arr: string[]): string {

View File

@ -149,6 +149,8 @@ document.addEventListener("DOMContentLoaded", function() {
return RegexDialect.PCRE; return RegexDialect.PCRE;
case "python": case "python":
return RegexDialect.Python; return RegexDialect.Python;
case "boost":
return RegexDialect.Boost;
default: default:
return RegexDialect.JS; return RegexDialect.JS;
} }

View File

@ -158,6 +158,21 @@ export function regexEscape(input: string): string {
return input.replace(/([:\\\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1"); return input.replace(/([:\\\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1");
} }
/**
* Append a list of arrays to an array
*
* @param array the array to append to
* @param arrays the list of arrays that you want to add to array
* @internal
*/
export function append<T>(array: T[], ...arrays: T[][]): void {
for (const list of arrays) {
for (const item of list) {
array.push(item);
}
}
}
/** /**
* Common Error class that encapsulates information from the lexer, parser, and generator * Common Error class that encapsulates information from the lexer, parser, and generator
*/ */

View File

@ -91,6 +91,12 @@ describe("Generator functionality", function() {
const reg1 = parser.parse(toks1); const reg1 = parser.parse(toks1);
expect(reg1.validate(RegexDialect.JS).length).toBe(0); expect(reg1.validate(RegexDialect.JS).length).toBe(0);
expect(reg1.toRegex(RegexDialect.JS)).toBe("/\\B\\W+\\D\\W\\S\\D+[^\\t][^\\n][^\\r]/"); expect(reg1.toRegex(RegexDialect.JS)).toBe("/\\B\\W+\\D\\W\\S\\D+[^\\t][^\\n][^\\r]/");
const toks2 = lexer.tokenize("match letter, integer, decimal").tokens;
const reg2 = parser.parse(toks2);
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
expect(reg2.toRegex(RegexDialect.JS)).toBe("/[a-zA-Z][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/[[:alpha:]][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
}); });
it("doesn't clobber repetition", function() { it("doesn't clobber repetition", function() {

View File

@ -1,7 +1,7 @@
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ /*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
import "../src/utilities"; import "../src/utilities";
import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError } from "../src/utilities"; import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append } from "../src/utilities";
import { UsingFlags, ISemanticError } from "../src/generator"; import { UsingFlags, ISemanticError } from "../src/generator";
import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain"; import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain";
import { Indent } from "../src/tokens"; import { Indent } from "../src/tokens";
@ -26,6 +26,17 @@ describe("Utility functions", function() {
expect(hasFlag(combineFlags(UsingFlags.Global, UsingFlags.Exact), UsingFlags.Multiline)).toBe(false); expect(hasFlag(combineFlags(UsingFlags.Global, UsingFlags.Exact), UsingFlags.Multiline)).toBe(false);
}); });
it("appends correctly", function() {
const my_array = [ 1, 2, 3 ];
append(my_array, [ 4, 5, 6 ]);
expect(my_array).toEqual([ 1, 2, 3, 4, 5, 6 ]);
append(my_array, [ 7, 8 ], [ 9 ]);
expect(my_array).toEqual([ 1, 2, 3, 4, 5, 6, 7, 8, 9 ]);
});
it("should return correct array elements", function() { it("should return correct array elements", function() {
expect(first([ 1, 2, 3 ])).toBe(1); expect(first([ 1, 2, 3 ])).toBe(1);
expect(last([ 1, 2, 3 ])).toBe(3); expect(last([ 1, 2, 3 ])).toBe(3);