mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 04:20:35 -07:00
Merge pull request #4 from pdemian/new-static-site-generator
New static site generator
This commit is contained in:
commit
32a641c8e4
10
API.md
10
API.md
@ -132,17 +132,17 @@ export enum RegexDialect {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
After choosing one, you must validate the regular expression. This may be skipped if and only if the input has already been validated before as the generator is not guaranteed to work unless there are no errors.
|
After choosing one, you should validate the regular expression. This may be skipped if and only if the input has already been validated before. For example, you may keep the `parse_result` around and generate it multiple times, only validating the first time. The generator is not guaranteed to work unless there are no validation errors. The generator does no validation itself and may either return garbage output or crash.
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
const validation_errors = parse_result.validate();
|
const validation_errors = parse_result.validate(RegexDialect.JS);
|
||||||
```
|
```
|
||||||
|
|
||||||
The result is a list of errors which, again, is a `CommonError`. If there are no errors, you can call the `toRegex()` function to create a string representation of the regular expression. You can also call the `toRegExp()` function to create a `RegExp` expression used in Javascript
|
The result is a list of errors which is a `CommonError`. If there are no errors, you can call the `toRegex()` function to create a string representation of the regular expression. You can also call the `toRegExp()` function to create a `RegExp` expression used in Javascript
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
const my_regex_string = parse_result.toRegex(); // type is string
|
const my_regex_string = parse_result.toRegex(RegexDialect.JS); // type is string
|
||||||
const my_regex = parse_result.toRegExp(); // type is RegExp
|
const my_regex = parse_result.toRegExp(RegexDialect.JS); // type is RegExp
|
||||||
```
|
```
|
||||||
|
|
||||||
This will contain your regular expression.
|
This will contain your regular expression.
|
||||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
|||||||
The MIT License (MIT)
|
The MIT License (MIT)
|
||||||
|
|
||||||
Copyright (c) 2020 Patrick Demian
|
Copyright (c) 2021 Patrick Demian
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
@ -81,6 +81,7 @@ The API reference is available [here](API.md)
|
|||||||
|
|
||||||
|
|
||||||
## Todo
|
## Todo
|
||||||
- Add more regex options such as back references, subroutines, lookahead/behind, and more character classes (eg, `[:alpha:]`)
|
- Add more regex options such as subroutines~~, conditions, and lookahead/behind~~
|
||||||
- Fix error messages (They sometimes point to the wrong location, off by 1 errors, etc)
|
- Fix error messages (They sometimes point to the wrong location, off by 1 errors, etc)
|
||||||
- Use a different/better static site generation method
|
- Add more useful lex/parse errors (What even is an EarlyExitException?)
|
||||||
|
- ~~Use a different/better static site generation method~~
|
15
config.json
Normal file
15
config.json
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"prod": true,
|
||||||
|
"dst": "./docs/",
|
||||||
|
"src": "./src/",
|
||||||
|
"compression_config": {
|
||||||
|
"html": {
|
||||||
|
"collapseWhitespace": true,
|
||||||
|
"minifyCSS": true,
|
||||||
|
"minifyJS": true,
|
||||||
|
"removeComments": true,
|
||||||
|
"removeEmptyAttributes": true,
|
||||||
|
"removeRedundantAttributes": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1 +1 @@
|
|||||||
<!DOCTYPE html><html lang="en" dir="ltr"><head><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"><meta name="description" content="Not Found"><meta name="keywords" content="Human2Regex, Human, Regex, Natural, Language, Natural Language"><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Error 404 - Not Found</title><link href="bundle.min.css" rel="stylesheet" type="text/css"><meta name="theme-color" content="#212529"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="default"><link rel="icon" type="image/x-icon" href="favicon.ico"></head><body><a class="skip skip-top" href="#maincontent">Skip to main content</a><div class="wrapper"><nav class="navbar navbar-expand-lg navbar-light fixed-top" id="mainNav"><div class="container"><a class="navbar-brand" href="index.html"><img src="favicon-small.png" width="30" height="30" class="d-inline-block align-top" alt="logo"> Human2Regex</a><div class="float-right heading-links"><a class="heading-link" href="index.html">Index</a> <span> | </span> <a class="heading-link" href="tutorial.html">Tutorial</a></div></div></nav><div class="container contained-container" id="maincontent" role="main"><div class="align_header"><div class="mx-auto"><div class="site-heading"><h1>404</h1><span class="subheading">Not Found</span></div></div></div><br><br><br><div class="row"><div class="col-12 mx-auto"><h3 class="align_header">The resource could not be found.</h3></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright © 2020 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="bundle.min.js"></script></body></html>
|
<!DOCTYPE html><html lang="en" dir="ltr"><head><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"><meta name="description" content="Not Found"><meta name="keywords" content="Human2Regex, Human, Regex, Natural, Language, Natural Language"><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Error 404 - Not Found</title><link href="/bundle.min.css" rel="stylesheet" type="text/css"><meta name="theme-color" content="#212529"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="default"><link rel="icon" type="image/x-icon" href="/favicon.ico"></head><body><a class="skip skip-top" href="#maincontent">Skip to main content</a><div class="wrapper"><nav class="navbar navbar-expand-lg navbar-light fixed-top" id="mainNav"><div class="container"><a class="navbar-brand" href="/index.html"><img src="/favicon-small.png" width="30" height="30" class="d-inline-block align-top" alt="logo"> Human2Regex</a><div class="float-right heading-links"><a class="heading-link" href="/index.html">Index</a> <span> | </span> <a class="heading-link" href="/tutorial.html">Tutorial</a></div></div></nav><div class="container contained-container" id="maincontent" role="main"><div class="align_header"><div class="mx-auto"><div class="site-heading"><h1>404</h1><span class="subheading">Not Found</span></div></div></div><br><br><br><div class="row"><div class="col-12 mx-auto"><h3 class="align_header">The resource could not be found.</h3></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright © 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="/bundle.min.js"></script></body></html>
|
2
docs/bundle.min.css
vendored
2
docs/bundle.min.css
vendored
File diff suppressed because one or more lines are too long
15
docs/bundle.min.js
vendored
15
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
@ -1,5 +1,4 @@
|
|||||||
<!DOCTYPE html><html lang="en" dir="ltr"><head><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"><meta name="description" content="Create regular expressions with natural, human language"><meta name="keywords" content="Human2Regex, Human, Regex, Natural, Language, Natural Language"><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Human2Regex</title><link href="bundle.min.css" rel="stylesheet" type="text/css"><meta name="theme-color" content="#212529"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="default"><link rel="icon" type="image/x-icon" href="favicon.ico"></head><body><a class="skip skip-top" href="#maincontent">Skip to main content</a><div class="wrapper"><nav class="navbar navbar-expand-lg navbar-light fixed-top" id="mainNav"><div class="container"><a class="navbar-brand" href="index.html"><img src="favicon-small.png" width="30" height="30" class="d-inline-block align-top" alt="logo"> Human2Regex</a><div class="float-right heading-links"><a class="heading-link" href="index.html">Index</a> <span> | </span> <a class="heading-link" href="tutorial.html">Tutorial</a></div></div></nav><div class="container" id="maincontent" role="main"><div class="row"><div class="col-lg-8 tenpx-margin-bottom"><div class="form-group row zero-margin-bottom"><label for="dialect" class="col-sm-4 col-form-label">Regex dialect:</label><div class="col-sm-8"><select class="form-control" id="dialect"><option value="js" selected="selected">Javascript</option><option value="dotnet">.NET</option><option value="python">Python</option><option value="boost">C++ Boost</option><option value="java">Java 7+</option><option value="pcre">PCRE</option></select></div></div><h4>Your Regular Expression:</h4><div class="row"><div class="col-xl-11 tenpx-margin-bottom"><input readonly="readonly" class="form-control" id="regex"></div><div class="col-xl-1"><button type="button" class="btn btn-secondary float-right" id="clip">Copy</button></div></div><h4>Human Speak:</h4><textarea class="form-control" id="human" rows="25">
|
<!DOCTYPE html><html lang="en" dir="ltr"><head><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"><meta name="description" content="Create regular expressions with natural, human language"><meta name="keywords" content="Human2Regex, Human, Regex, Natural, Language, Natural Language"><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Human2Regex</title><link href="/bundle.min.css" rel="stylesheet" type="text/css"><meta name="theme-color" content="#212529"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="default"><link rel="icon" type="image/x-icon" href="/favicon.ico"></head><body><a class="skip skip-top" href="#maincontent">Skip to main content</a><div class="wrapper"><nav class="navbar navbar-expand-lg navbar-light fixed-top" id="mainNav"><div class="container"><a class="navbar-brand" href="/index.html"><img src="/favicon-small.png" width="30" height="30" class="d-inline-block align-top" alt="logo"> Human2Regex</a><div class="float-right heading-links"><a class="heading-link" href="/index.html">Index</a> <span> | </span> <a class="heading-link" href="/tutorial.html">Tutorial</a></div></div></nav><div class="container" id="maincontent" role="main"><div class="row"><div class="col-lg-8 tenpx-margin-bottom"><div class="form-group row zero-margin-bottom"><label for="dialect" class="col-sm-4 col-form-label">Regex dialect:</label><div class="col-sm-8"><select class="form-control" id="dialect"><option value="js" selected="selected">Javascript</option><option value="dotnet">.NET</option><option value="python">Python</option><option value="boost">C++ Boost</option><option value="java">Java 7+</option><option value="pcre">PCRE</option></select></div></div><h4>Your Regular Expression:</h4><div class="row"><div class="col-xl-11 tenpx-margin-bottom"><input readonly="readonly" class="form-control" id="regex"></div><div class="col-xl-1"><button type="button" class="btn btn-secondary float-right" id="clip">Copy</button></div></div><h4>Human Speak:</h4><textarea class="form-control" id="human" rows="25">/* Make a regex that matches (basic) URLs */
|
||||||
/* Make a regex that matches (basic) URLs */
|
|
||||||
|
|
||||||
using global and exact matching
|
using global and exact matching
|
||||||
create an optional group called protocol
|
create an optional group called protocol
|
||||||
@ -30,5 +29,4 @@ create an optional group
|
|||||||
create an optional group
|
create an optional group
|
||||||
# fragment, again, we don't care, so ignore everything afterwards
|
# fragment, again, we don't care, so ignore everything afterwards
|
||||||
match "#"
|
match "#"
|
||||||
match 0+ any thing
|
match 0+ any thing</textarea><h4>Errors:</h4><textarea readonly="readonly" class="form-control" id="errors" rows="5"></textarea></div><br><div class="col-lg-4 tenpx-margin-bottom"><div class="cheatsheet"><h2>Cheat Sheet:</h2><p>Full documentation available <a href="/tutorial.html">here</a></p><p class="font-weight-bold">Matching</p><p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly</p><p></p><p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p><p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world</p><p><code class="cm-s-idea">match a word</code> matches any word</p><p class="font-weight-bold">Repetition</p><p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p><p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p><p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p><p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p><p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p><p class="font-weight-bold">Grouping</p><p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p><p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p><p class="font-weight-bold">Using</p><p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p><p class="font-weight-bold">Misc</p><p><code class="cm-s-idea">// comment</code> is a single line comment</p><p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p></div></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright © 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="/bundle.min.js"></script></body></html>
|
||||||
</textarea><h4>Errors:</h4><textarea readonly="readonly" class="form-control" id="errors" rows="5"></textarea></div><br><div class="col-lg-4 tenpx-margin-bottom"><div class="cheatsheet"><h2>Cheat Sheet:</h2><p>Full documentation available <a href="tutorial.html">here</a></p><p class="font-weight-bold">Matching</p><p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly</p><p></p><p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p><p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world</p><p><code class="cm-s-idea">match a word</code> matches any word</p><p class="font-weight-bold">Repetition</p><p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p><p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p><p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p><p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p><p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p><p class="font-weight-bold">Grouping</p><p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p><p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p><p class="font-weight-bold">Using</p><p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p><p class="font-weight-bold">Misc</p><p><code class="cm-s-idea">// comment</code> is a single line comment</p><p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p></div></div></div></div><footer><div class="container"><div class="row"><div class="col-lg-8 col-md-10 mx-auto"><p class="copyright">Copyright © 2020 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p></div></div></div></footer></div><script defer="defer" src="bundle.min.js"></script></body></html>
|
|
File diff suppressed because one or more lines are too long
142
lib/generator.d.ts
vendored
142
lib/generator.d.ts
vendored
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
import { IToken } from "chevrotain";
|
import { IToken } from "chevrotain";
|
||||||
/**
|
/**
|
||||||
* List of regular expression dialects we support
|
* List of regular expression dialects we support
|
||||||
@ -21,29 +21,45 @@ export interface ISemanticError {
|
|||||||
message: string;
|
message: string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* The base concrete syntax tree class
|
* Context for validation
|
||||||
*
|
*
|
||||||
|
* @remarks Currently only used to validate groups
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export declare abstract class H2RCST {
|
export declare class GeneratorContext {
|
||||||
tokens: IToken[];
|
groups: {
|
||||||
|
[key: string]: {
|
||||||
|
startLine: number;
|
||||||
|
startColumn: number;
|
||||||
|
length: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
/**
|
/**
|
||||||
* Constructor for H2RCST
|
* Checks to see if we already have a group defined
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param identifier the group name
|
||||||
* @internal
|
* @returns true if the group name already exists
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[]);
|
hasGroup(identifier: string): boolean;
|
||||||
|
/**
|
||||||
|
* Adds the identifier to the group list
|
||||||
|
*
|
||||||
|
* @param identifier the group name
|
||||||
|
*/
|
||||||
|
addGroup(identifier: string, tokens: IToken[]): void;
|
||||||
|
}
|
||||||
|
interface Generates {
|
||||||
/**
|
/**
|
||||||
* Validate that this is both valid and can be generated in the specified language
|
* Validate that this is both valid and can be generated in the specified language
|
||||||
*
|
*
|
||||||
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
||||||
*
|
*
|
||||||
* @param language the regex dialect we're validating
|
* @param language the regex dialect we're validating
|
||||||
|
* @param context the generator context
|
||||||
* @returns A list of errors
|
* @returns A list of errors
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
abstract validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
/**
|
/**
|
||||||
* Generate a regular expression fragment based on this syntax tree
|
* Generate a regular expression fragment based on this syntax tree
|
||||||
*
|
*
|
||||||
@ -53,6 +69,23 @@ export declare abstract class H2RCST {
|
|||||||
* @returns a regular expression fragment
|
* @returns a regular expression fragment
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* The base concrete syntax tree class
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare abstract class H2RCST implements Generates {
|
||||||
|
tokens: IToken[];
|
||||||
|
/**
|
||||||
|
* Constructor for H2RCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[]);
|
||||||
|
abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
abstract toRegex(language: RegexDialect): string;
|
abstract toRegex(language: RegexDialect): string;
|
||||||
/**
|
/**
|
||||||
* Creates an ISemanticError with a given message and the tokens provided from the constructor
|
* Creates an ISemanticError with a given message and the tokens provided from the constructor
|
||||||
@ -126,7 +159,7 @@ export declare class MatchSubStatementValue {
|
|||||||
*
|
*
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export declare class MatchStatementValue {
|
export declare class MatchStatementValue implements Generates {
|
||||||
optional: boolean;
|
optional: boolean;
|
||||||
statement: MatchSubStatementCST;
|
statement: MatchSubStatementCST;
|
||||||
/**
|
/**
|
||||||
@ -137,6 +170,8 @@ export declare class MatchStatementValue {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
constructor(optional: boolean, statement: MatchSubStatementCST);
|
constructor(optional: boolean, statement: MatchSubStatementCST);
|
||||||
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* The base class for all statement concrete syntax trees
|
* The base class for all statement concrete syntax trees
|
||||||
@ -163,7 +198,7 @@ export declare class MatchSubStatementCST extends H2RCST {
|
|||||||
* @param values sub statements to match
|
* @param values sub statements to match
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]);
|
constructor(tokens: IToken[], count: CountSubStatementCST | null, invert: boolean, values: MatchSubStatementValue[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -180,7 +215,7 @@ export declare class UsingStatementCST extends H2RCST {
|
|||||||
* @param flags using flags
|
* @param flags using flags
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], flags: UsingFlags[]);
|
constructor(tokens: IToken[], flags: UsingFlags[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -201,7 +236,7 @@ export declare class CountSubStatementCST extends H2RCST {
|
|||||||
* @param opt option modifier
|
* @param opt option modifier
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], from: number, to?: number | null, opt?: "inclusive" | "exclusive" | "+" | null);
|
constructor(tokens: IToken[], from: number, to?: number | null, opt?: "inclusive" | "exclusive" | "+" | null);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -216,10 +251,10 @@ export declare class MatchStatementCST extends StatementCST {
|
|||||||
* Constructor for MatchStatementCST
|
* Constructor for MatchStatementCST
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
* @param matches
|
* @param matches the list of matches
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], completely_optional: boolean, matches: MatchStatementValue[]);
|
constructor(tokens: IToken[], completely_optional: boolean, matches: MatchStatementValue[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -240,7 +275,7 @@ export declare class RepeatStatementCST extends StatementCST {
|
|||||||
* @param statements the statements to repeat
|
* @param statements the statements to repeat
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], optional: boolean, count: CountSubStatementCST | null, statements: StatementCST[]);
|
constructor(tokens: IToken[], optional: boolean, count: CountSubStatementCST | null, statements: StatementCST[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -262,7 +297,70 @@ export declare class GroupStatementCST extends StatementCST {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], optional: boolean, name: string | null, statements: StatementCST[]);
|
constructor(tokens: IToken[], optional: boolean, name: string | null, statements: StatementCST[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for a Backreference statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare class BackrefStatementCST extends StatementCST {
|
||||||
|
private optional;
|
||||||
|
private count;
|
||||||
|
private name;
|
||||||
|
/**
|
||||||
|
* Constructor for BackrefStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param optional is this backref optional
|
||||||
|
* @param count optional number of times to repeat
|
||||||
|
* @param name the group name to call
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], optional: boolean, count: CountSubStatementCST | null, name: string);
|
||||||
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If Pattern statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare class IfPatternStatementCST extends StatementCST {
|
||||||
|
private matches;
|
||||||
|
private true_statements;
|
||||||
|
private false_statements;
|
||||||
|
/**
|
||||||
|
* Constructor for IfPatternStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param matches list of matches to test against
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], matches: MatchStatementValue[], true_statements: StatementCST[], false_statements: StatementCST[]);
|
||||||
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If group Ident statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare class IfIdentStatementCST extends StatementCST {
|
||||||
|
private identifier;
|
||||||
|
private true_statements;
|
||||||
|
private false_statements;
|
||||||
|
/**
|
||||||
|
* Constructor for IfIdentStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param identifier the group identifier to check
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], identifier: string, true_statements: StatementCST[], false_statements: StatementCST[]);
|
||||||
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -282,13 +380,7 @@ export declare class RegularExpressionCST extends H2RCST {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], usings: UsingStatementCST, statements: StatementCST[]);
|
constructor(tokens: IToken[], usings: UsingStatementCST, statements: StatementCST[]);
|
||||||
validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
toRegex(language: RegexDialect): string;
|
toRegex(language: RegexDialect): string;
|
||||||
}
|
}
|
||||||
/**
|
export {};
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array of matches
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
export declare function minimizeMatchString(arr: string[]): string;
|
|
||||||
|
423
lib/generator.js
423
lib/generator.js
@ -1,12 +1,13 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.minimizeMatchString = exports.RegularExpressionCST = exports.GroupStatementCST = exports.RepeatStatementCST = exports.MatchStatementCST = exports.CountSubStatementCST = exports.UsingStatementCST = exports.MatchSubStatementCST = exports.StatementCST = exports.MatchStatementValue = exports.MatchSubStatementValue = exports.MatchSubStatementType = exports.UsingFlags = exports.H2RCST = exports.RegexDialect = void 0;
|
exports.RegularExpressionCST = exports.IfIdentStatementCST = exports.IfPatternStatementCST = exports.BackrefStatementCST = exports.GroupStatementCST = exports.RepeatStatementCST = exports.MatchStatementCST = exports.CountSubStatementCST = exports.UsingStatementCST = exports.MatchSubStatementCST = exports.StatementCST = exports.MatchStatementValue = exports.MatchSubStatementValue = exports.MatchSubStatementType = exports.UsingFlags = exports.H2RCST = exports.GeneratorContext = exports.RegexDialect = void 0;
|
||||||
/**
|
/**
|
||||||
* Includes all Concrete Syntax Trees for Human2Regex
|
* Includes all Concrete Syntax Trees for Human2Regex
|
||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
*/
|
*/
|
||||||
const utilities_1 = require("./utilities");
|
const utilities_1 = require("./utilities");
|
||||||
|
const generator_helper_1 = require("./generator_helper");
|
||||||
/**
|
/**
|
||||||
* List of regular expression dialects we support
|
* List of regular expression dialects we support
|
||||||
*/
|
*/
|
||||||
@ -49,6 +50,42 @@ const unicode_script_codes = [
|
|||||||
"Tai_Tham", "Tai_Viet", "Takri", "Tamil", "Telugu", "Thaana", "Thai",
|
"Tai_Tham", "Tai_Viet", "Takri", "Tamil", "Telugu", "Thaana", "Thai",
|
||||||
"Tibetan", "Tifinagh", "Ugaritic", "Vai", "Yi"
|
"Tibetan", "Tifinagh", "Ugaritic", "Vai", "Yi"
|
||||||
];
|
];
|
||||||
|
/**
|
||||||
|
* Context for validation
|
||||||
|
*
|
||||||
|
* @remarks Currently only used to validate groups
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class GeneratorContext {
|
||||||
|
constructor() {
|
||||||
|
this.groups = {};
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Checks to see if we already have a group defined
|
||||||
|
*
|
||||||
|
* @param identifier the group name
|
||||||
|
* @returns true if the group name already exists
|
||||||
|
*/
|
||||||
|
hasGroup(identifier) {
|
||||||
|
return Object.prototype.hasOwnProperty.call(this.groups, identifier);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Adds the identifier to the group list
|
||||||
|
*
|
||||||
|
* @param identifier the group name
|
||||||
|
*/
|
||||||
|
addGroup(identifier, tokens) {
|
||||||
|
var _a, _b, _c;
|
||||||
|
const f = utilities_1.first(tokens);
|
||||||
|
const l = utilities_1.last(tokens);
|
||||||
|
this.groups[identifier] = {
|
||||||
|
startLine: (_a = f.startLine) !== null && _a !== void 0 ? _a : NaN,
|
||||||
|
startColumn: (_b = f.startColumn) !== null && _b !== void 0 ? _b : NaN,
|
||||||
|
length: ((_c = l.endOffset) !== null && _c !== void 0 ? _c : l.startOffset) - f.startOffset,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.GeneratorContext = GeneratorContext;
|
||||||
/**
|
/**
|
||||||
* The base concrete syntax tree class
|
* The base concrete syntax tree class
|
||||||
*
|
*
|
||||||
@ -166,6 +203,17 @@ class MatchStatementValue {
|
|||||||
this.statement = statement;
|
this.statement = statement;
|
||||||
/* empty */
|
/* empty */
|
||||||
}
|
}
|
||||||
|
validate(language, context) {
|
||||||
|
return this.statement.validate(language, context);
|
||||||
|
}
|
||||||
|
toRegex(language) {
|
||||||
|
let match_stmt = this.statement.toRegex(language);
|
||||||
|
// need to group if optional and ungrouped
|
||||||
|
if (this.optional) {
|
||||||
|
match_stmt = generator_helper_1.groupIfRequired(match_stmt) + "?";
|
||||||
|
}
|
||||||
|
return match_stmt;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
exports.MatchStatementValue = MatchStatementValue;
|
exports.MatchStatementValue = MatchStatementValue;
|
||||||
/**
|
/**
|
||||||
@ -196,10 +244,10 @@ class MatchSubStatementCST extends H2RCST {
|
|||||||
this.invert = invert;
|
this.invert = invert;
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = [];
|
const errors = [];
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
utilities_1.append(errors, this.count.validate(language));
|
utilities_1.append(errors, this.count.validate(language, context));
|
||||||
}
|
}
|
||||||
for (const value of this.values) {
|
for (const value of this.values) {
|
||||||
if (value.type === MatchSubStatementType.Between) {
|
if (value.type === MatchSubStatementType.Between) {
|
||||||
@ -311,50 +359,15 @@ class MatchSubStatementCST extends H2RCST {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let ret = "";
|
let ret = generator_helper_1.minimizeMatchString(matches);
|
||||||
let require_grouping = false;
|
|
||||||
let dont_clobber_plus = false;
|
|
||||||
if (matches.length === 1) {
|
|
||||||
ret = utilities_1.first(matches);
|
|
||||||
if (ret.endsWith("+")) {
|
|
||||||
dont_clobber_plus = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
ret = minimizeMatchString(matches);
|
|
||||||
if (ret.length > 1 &&
|
|
||||||
(!ret.startsWith("(") || !ret.endsWith("["))) {
|
|
||||||
require_grouping = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
if (dont_clobber_plus) {
|
if (matches.length === 1) {
|
||||||
const clobber = this.count.toRegex(language);
|
// we don't group if there's only 1 element
|
||||||
// + can be ignored as well as a count as long as that count is > 0
|
// but we need to make sure we don't add an additional + or *
|
||||||
switch (clobber) {
|
ret = generator_helper_1.dontClobberRepetition(ret, this.count.toRegex(language));
|
||||||
case "*":
|
|
||||||
case "?":
|
|
||||||
ret = "(?:" + ret + ")" + clobber;
|
|
||||||
break;
|
|
||||||
case "+":
|
|
||||||
// ignore
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
if (clobber.startsWith("{0")) {
|
|
||||||
ret = "(?:" + ret + ")" + clobber;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// remove + and replace with count
|
|
||||||
ret.substring(0, ret.length - 1) + clobber;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (require_grouping) {
|
ret = generator_helper_1.groupIfRequired(ret) + this.count.toRegex(language);
|
||||||
ret = "(?:" + ret + ")";
|
|
||||||
}
|
|
||||||
ret += this.count.toRegex(language);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -377,8 +390,9 @@ class UsingStatementCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
this.flags = flags;
|
this.flags = flags;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
utilities_1.unusedParameter(language, "Using Statement does not change based on language");
|
utilities_1.unusedParameter(language, "Count does not need checking");
|
||||||
|
utilities_1.unusedParameter(context, "Context is not needed");
|
||||||
const errors = [];
|
const errors = [];
|
||||||
let flag = this.flags[0];
|
let flag = this.flags[0];
|
||||||
for (let i = 1; i < this.flags.length; i++) {
|
for (let i = 1; i < this.flags.length; i++) {
|
||||||
@ -434,13 +448,11 @@ class CountSubStatementCST extends H2RCST {
|
|||||||
this.to = to;
|
this.to = to;
|
||||||
this.opt = opt;
|
this.opt = opt;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
utilities_1.unusedParameter(language, "Count does not need checking");
|
utilities_1.unusedParameter(language, "Count does not need checking");
|
||||||
|
utilities_1.unusedParameter(context, "Context is not needed");
|
||||||
const errors = [];
|
const errors = [];
|
||||||
if (this.from < 0) {
|
if (this.to !== null && ((this.opt === "exclusive" && (this.to - 1) <= this.from) || this.to <= this.from)) {
|
||||||
errors.push(this.error("Value cannot be negative"));
|
|
||||||
}
|
|
||||||
else if (this.to !== null && ((this.opt === "exclusive" && (this.to - 1) <= this.from) || this.to <= this.from)) {
|
|
||||||
errors.push(this.error("Values must be in range of eachother"));
|
errors.push(this.error("Values must be in range of eachother"));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
@ -483,43 +495,24 @@ class MatchStatementCST extends StatementCST {
|
|||||||
* Constructor for MatchStatementCST
|
* Constructor for MatchStatementCST
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
* @param matches
|
* @param matches the list of matches
|
||||||
*/
|
*/
|
||||||
constructor(tokens, completely_optional, matches) {
|
constructor(tokens, completely_optional, matches) {
|
||||||
super(tokens);
|
super(tokens);
|
||||||
this.completely_optional = completely_optional;
|
this.completely_optional = completely_optional;
|
||||||
this.matches = matches;
|
this.matches = matches;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = [];
|
const errors = [];
|
||||||
for (const match of this.matches) {
|
for (const match of this.matches) {
|
||||||
utilities_1.append(errors, match.statement.validate(language));
|
utilities_1.append(errors, match.statement.validate(language, context));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
toRegex(language) {
|
toRegex(language) {
|
||||||
let final_matches = this.matches.map((x) => {
|
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
|
||||||
let match_stmt = x.statement.toRegex(language);
|
|
||||||
// need to group if optional and ungrouped
|
|
||||||
if (x.optional) {
|
|
||||||
if (!utilities_1.isSingleRegexCharacter(match_stmt)) {
|
|
||||||
// don't re-group a group
|
|
||||||
if (match_stmt[0] !== "(" && match_stmt[match_stmt.length - 1] !== ")") {
|
|
||||||
match_stmt = "(?:" + match_stmt + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match_stmt += "?";
|
|
||||||
}
|
|
||||||
return match_stmt;
|
|
||||||
}).join("");
|
|
||||||
if (this.completely_optional) {
|
if (this.completely_optional) {
|
||||||
if (!utilities_1.isSingleRegexCharacter(final_matches)) {
|
final_matches = generator_helper_1.groupIfRequired(final_matches) + "?";
|
||||||
// don't re-group a group
|
|
||||||
if (final_matches[0] !== "(" && final_matches[final_matches.length - 1] !== ")") {
|
|
||||||
final_matches = "(?:" + final_matches + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
final_matches += "?";
|
|
||||||
}
|
}
|
||||||
return final_matches;
|
return final_matches;
|
||||||
}
|
}
|
||||||
@ -545,18 +538,18 @@ class RepeatStatementCST extends StatementCST {
|
|||||||
this.count = count;
|
this.count = count;
|
||||||
this.statements = statements;
|
this.statements = statements;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = [];
|
const errors = [];
|
||||||
if (this.count !== null) {
|
if (this.count !== null) {
|
||||||
utilities_1.append(errors, this.count.validate(language));
|
utilities_1.append(errors, this.count.validate(language, context));
|
||||||
}
|
}
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
utilities_1.append(errors, statement.validate(language));
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
toRegex(language) {
|
toRegex(language) {
|
||||||
let str = "(?:" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
|
let str = generator_helper_1.groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
str += this.count.toRegex(language);
|
str += this.count.toRegex(language);
|
||||||
// group for optionality because count would be incorrect otherwise
|
// group for optionality because count would be incorrect otherwise
|
||||||
@ -595,14 +588,19 @@ class GroupStatementCST extends StatementCST {
|
|||||||
this.name = name;
|
this.name = name;
|
||||||
this.statements = statements;
|
this.statements = statements;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = [];
|
const errors = [];
|
||||||
// All languages currently support named groups
|
if (this.name !== null) {
|
||||||
//if (false) {
|
if (context.hasGroup(this.name)) {
|
||||||
// errors.push(this.error("This language does not support named groups"));
|
const past_group = context.groups[this.name];
|
||||||
//}
|
errors.push(this.error(`Group with name "${this.name}" was already defined here: ${past_group.startLine}:${past_group.startLine}-${past_group.startLine}:${past_group.startLine + past_group.length}`));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
context.addGroup(this.name, this.tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
utilities_1.append(errors, statement.validate(language));
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
@ -623,6 +621,169 @@ class GroupStatementCST extends StatementCST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
exports.GroupStatementCST = GroupStatementCST;
|
exports.GroupStatementCST = GroupStatementCST;
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for a Backreference statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class BackrefStatementCST extends StatementCST {
|
||||||
|
/**
|
||||||
|
* Constructor for BackrefStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param optional is this backref optional
|
||||||
|
* @param count optional number of times to repeat
|
||||||
|
* @param name the group name to call
|
||||||
|
*/
|
||||||
|
constructor(tokens, optional, count, name) {
|
||||||
|
super(tokens);
|
||||||
|
this.optional = optional;
|
||||||
|
this.count = count;
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
validate(language, context) {
|
||||||
|
const errors = [];
|
||||||
|
if (!context.hasGroup(this.name)) {
|
||||||
|
errors.push(this.error(`Cannot call group with name "${this.name}" as it was never previously defined`));
|
||||||
|
}
|
||||||
|
if (this.count !== null) {
|
||||||
|
utilities_1.append(errors, this.count.validate(language, context));
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
toRegex(language) {
|
||||||
|
let str = "";
|
||||||
|
switch (language) {
|
||||||
|
case RegexDialect.Python:
|
||||||
|
str = `(?P=${this.name})`;
|
||||||
|
break;
|
||||||
|
case RegexDialect.DotNet:
|
||||||
|
case RegexDialect.Java:
|
||||||
|
str = `\\k<${this.name}>`;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
str = `\\g<${this.name}>`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (this.count) {
|
||||||
|
str += this.count.toRegex(language);
|
||||||
|
// group for optionality because count would be incorrect otherwise
|
||||||
|
if (this.optional) {
|
||||||
|
str = "(?:" + str + ")?";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (this.optional) {
|
||||||
|
str = "?";
|
||||||
|
}
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.BackrefStatementCST = BackrefStatementCST;
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If Pattern statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class IfPatternStatementCST extends StatementCST {
|
||||||
|
/**
|
||||||
|
* Constructor for IfPatternStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param matches list of matches to test against
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens, matches, true_statements, false_statements) {
|
||||||
|
super(tokens);
|
||||||
|
this.matches = matches;
|
||||||
|
this.true_statements = true_statements;
|
||||||
|
this.false_statements = false_statements;
|
||||||
|
}
|
||||||
|
validate(language, context) {
|
||||||
|
const errors = [];
|
||||||
|
if (language === RegexDialect.Java || language === RegexDialect.JS) {
|
||||||
|
errors.push(this.error("This language does not support conditionals"));
|
||||||
|
}
|
||||||
|
if (language === RegexDialect.Python) {
|
||||||
|
errors.push(this.error("This language does not support pattern conditionals"));
|
||||||
|
}
|
||||||
|
for (const match of this.matches) {
|
||||||
|
utilities_1.append(errors, match.validate(language, context));
|
||||||
|
}
|
||||||
|
for (const statement of this.true_statements) {
|
||||||
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
for (const statement of this.false_statements) {
|
||||||
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
toRegex(language) {
|
||||||
|
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
|
||||||
|
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
if (this.false_statements.length > 0) {
|
||||||
|
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return `(?(${if_stmt})${true_stmt})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.IfPatternStatementCST = IfPatternStatementCST;
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If group Ident statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
class IfIdentStatementCST extends StatementCST {
|
||||||
|
/**
|
||||||
|
* Constructor for IfIdentStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param identifier the group identifier to check
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens, identifier, true_statements, false_statements) {
|
||||||
|
super(tokens);
|
||||||
|
this.identifier = identifier;
|
||||||
|
this.true_statements = true_statements;
|
||||||
|
this.false_statements = false_statements;
|
||||||
|
}
|
||||||
|
validate(language, context) {
|
||||||
|
const errors = [];
|
||||||
|
if (language === RegexDialect.Java || language === RegexDialect.JS) {
|
||||||
|
errors.push(this.error("This language does not support conditionals"));
|
||||||
|
}
|
||||||
|
if (!context.hasGroup(this.identifier)) {
|
||||||
|
errors.push(this.error(`Group with name "${this.identifier}" does not exist`));
|
||||||
|
}
|
||||||
|
for (const statement of this.true_statements) {
|
||||||
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
for (const statement of this.false_statements) {
|
||||||
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
toRegex(language) {
|
||||||
|
let if_stmt = this.identifier;
|
||||||
|
// be more clear with languages that support it
|
||||||
|
if (language === RegexDialect.Boost) {
|
||||||
|
if_stmt = "<" + if_stmt + ">";
|
||||||
|
}
|
||||||
|
const true_stmt = generator_helper_1.groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
if (this.false_statements.length > 0) {
|
||||||
|
const false_stmt = generator_helper_1.groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return `(?(${if_stmt})${true_stmt})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.IfIdentStatementCST = IfIdentStatementCST;
|
||||||
/**
|
/**
|
||||||
* Concrete Syntax Tree for a regular expression
|
* Concrete Syntax Tree for a regular expression
|
||||||
*
|
*
|
||||||
@ -642,10 +803,10 @@ class RegularExpressionCST extends H2RCST {
|
|||||||
this.usings = usings;
|
this.usings = usings;
|
||||||
this.statements = statements;
|
this.statements = statements;
|
||||||
}
|
}
|
||||||
validate(language) {
|
validate(language, context) {
|
||||||
const errors = this.usings.validate(language);
|
const errors = this.usings.validate(language, context);
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
utilities_1.append(errors, statement.validate(language));
|
utilities_1.append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
@ -656,87 +817,3 @@ class RegularExpressionCST extends H2RCST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
exports.RegularExpressionCST = RegularExpressionCST;
|
exports.RegularExpressionCST = RegularExpressionCST;
|
||||||
/**
|
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array of matches
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
function minimizeMatchString(arr) {
|
|
||||||
return minMatchString(arr, 0);
|
|
||||||
}
|
|
||||||
exports.minimizeMatchString = minimizeMatchString;
|
|
||||||
/**
|
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array
|
|
||||||
* @param depth must be 0 for initial call
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
function minMatchString(arr, depth = 0) {
|
|
||||||
// base case: arr is empty
|
|
||||||
if (arr.length === 0) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
|
||||||
if (arr.length === 1) {
|
|
||||||
return utilities_1.first(arr) + "?";
|
|
||||||
}
|
|
||||||
// remove duplicates
|
|
||||||
arr = [...new Set(arr)];
|
|
||||||
// base case: arr has 1 element (after duplicate removal means this is required)
|
|
||||||
if (arr.length === 1) {
|
|
||||||
return utilities_1.first(arr);
|
|
||||||
}
|
|
||||||
// base case: arr is all single letters
|
|
||||||
if (arr.every(utilities_1.isSingleRegexCharacter)) {
|
|
||||||
return "[" + arr.join("") + "]";
|
|
||||||
}
|
|
||||||
// now the real magic begins
|
|
||||||
// You are not expected to understand this
|
|
||||||
let longest_begin_substring = utilities_1.first(arr);
|
|
||||||
let longest_end_substring = utilities_1.first(arr);
|
|
||||||
for (let i = 1; i < arr.length; i++) {
|
|
||||||
// reduce longest_substring to match everything
|
|
||||||
for (let j = 0; j < longest_begin_substring.length; j++) {
|
|
||||||
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
|
||||||
longest_begin_substring = longest_begin_substring.substr(0, j);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (let j = 0; j < longest_end_substring.length; j++) {
|
|
||||||
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
|
|
||||||
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// No matches whatsoever
|
|
||||||
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
|
||||||
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
|
||||||
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
||||||
if (depth > 0) {
|
|
||||||
return "(?:" + arr.join("|") + ")";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return arr.join("|");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// we have some matches
|
|
||||||
else {
|
|
||||||
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
|
||||||
const begin_pos = longest_begin_substring.length;
|
|
||||||
const end_pos = longest_end_substring.length;
|
|
||||||
const similar_matches = [];
|
|
||||||
for (const ele of arr) {
|
|
||||||
const match = ele.substring(begin_pos, ele.length - end_pos);
|
|
||||||
if (match.length !== 0) {
|
|
||||||
similar_matches.push(match);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
23
lib/generator_helper.d.ts
vendored
Normal file
23
lib/generator_helper.d.ts
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array of matches
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare function minimizeMatchString(arr: string[]): string;
|
||||||
|
/**
|
||||||
|
* Groups a regex fragment if it needs to be grouped
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression to potentially group
|
||||||
|
* @returns a non-capturing group if there needs to be one
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export declare function groupIfRequired(fragment: string): string;
|
||||||
|
/**
|
||||||
|
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression
|
||||||
|
* @param repetition repetition that may clobber the fragment
|
||||||
|
*/
|
||||||
|
export declare function dontClobberRepetition(fragment: string, repetition: string): string;
|
203
lib/generator_helper.js
Normal file
203
lib/generator_helper.js
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
"use strict";
|
||||||
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.dontClobberRepetition = exports.groupIfRequired = exports.minimizeMatchString = void 0;
|
||||||
|
/**
|
||||||
|
* Includes helper functions for the Generator
|
||||||
|
* @packageDocumentation
|
||||||
|
*/
|
||||||
|
const utilities_1 = require("./utilities");
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array of matches
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function minimizeMatchString(arr) {
|
||||||
|
// don't process an array of length 1, otherwise you'll get the wrong result
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return utilities_1.first(arr);
|
||||||
|
}
|
||||||
|
return minMatchString(arr, 0);
|
||||||
|
}
|
||||||
|
exports.minimizeMatchString = minimizeMatchString;
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array
|
||||||
|
* @param depth must be 0 for initial call
|
||||||
|
* @returns an optimized string
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function minMatchString(arr, depth = 0) {
|
||||||
|
// base case: arr is empty
|
||||||
|
if (arr.length === 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return utilities_1.first(arr) + "?";
|
||||||
|
}
|
||||||
|
// remove duplicates
|
||||||
|
arr = [...new Set(arr)];
|
||||||
|
// base case: arr has 1 element (after duplicate removal means this is required)
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return utilities_1.first(arr);
|
||||||
|
}
|
||||||
|
// base case: arr is all single letters
|
||||||
|
if (arr.every(utilities_1.isSingleRegexCharacter)) {
|
||||||
|
return "[" + arr.join("") + "]";
|
||||||
|
}
|
||||||
|
// now the real magic begins
|
||||||
|
// You are not expected to understand this
|
||||||
|
let longest_begin_substring = utilities_1.first(arr);
|
||||||
|
let longest_end_substring = utilities_1.first(arr);
|
||||||
|
for (let i = 1; i < arr.length; i++) {
|
||||||
|
// reduce longest_substring to match everything
|
||||||
|
for (let j = 0; j < longest_begin_substring.length; j++) {
|
||||||
|
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
||||||
|
longest_begin_substring = longest_begin_substring.substr(0, j);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (let j = 0; j < longest_end_substring.length; j++) {
|
||||||
|
if (arr[i].length - j < 0 || longest_end_substring[longest_end_substring.length - j - 1] !== arr[i][arr[i].length - j - 1]) {
|
||||||
|
longest_end_substring = longest_end_substring.substr(longest_end_substring.length - j, longest_end_substring.length);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No matches whatsoever
|
||||||
|
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
||||||
|
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
||||||
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
||||||
|
if (depth > 0) {
|
||||||
|
return "(?:" + arr.join("|") + ")";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return arr.join("|");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we have some matches
|
||||||
|
else {
|
||||||
|
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
||||||
|
const begin_pos = longest_begin_substring.length;
|
||||||
|
const end_pos = longest_end_substring.length;
|
||||||
|
const similar_matches = [];
|
||||||
|
for (const ele of arr) {
|
||||||
|
const match = ele.substring(begin_pos, ele.length - end_pos);
|
||||||
|
if (match.length !== 0) {
|
||||||
|
similar_matches.push(match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Groups a regex fragment if it needs to be grouped
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression to potentially group
|
||||||
|
* @returns a non-capturing group if there needs to be one
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function groupIfRequired(fragment) {
|
||||||
|
if (utilities_1.isSingleRegexCharacter(fragment)) {
|
||||||
|
return fragment;
|
||||||
|
}
|
||||||
|
if (fragment[0] === "(" && fragment[fragment.length - 1] === ")") {
|
||||||
|
let bracket_count = 0;
|
||||||
|
for (let i = 1; i < fragment.length - 2; i++) {
|
||||||
|
if (fragment[i] === "\\") {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if (fragment[i] === "(") {
|
||||||
|
bracket_count++;
|
||||||
|
}
|
||||||
|
else if (fragment[i] === ")") {
|
||||||
|
bracket_count--;
|
||||||
|
if (bracket_count === -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
else if (fragment[0] === "[" && fragment[fragment.length - 1] === "]") {
|
||||||
|
let bracket_count = 0;
|
||||||
|
for (let i = 1; i < fragment.length - 2; i++) {
|
||||||
|
if (fragment[i] === "\\") {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
//you'll never have a raw [ inside a []
|
||||||
|
//else if (fragment[i] === "[") {
|
||||||
|
// bracket_count++;
|
||||||
|
//}
|
||||||
|
else if (fragment[i] === "]") {
|
||||||
|
bracket_count--;
|
||||||
|
if (bracket_count === -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.groupIfRequired = groupIfRequired;
|
||||||
|
/**
|
||||||
|
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression
|
||||||
|
* @param repetition repetition that may clobber the fragment
|
||||||
|
*/
|
||||||
|
function dontClobberRepetition(fragment, repetition) {
|
||||||
|
// + can be ignored as well as a count as long as that count is > 0
|
||||||
|
if (fragment.endsWith("+")) {
|
||||||
|
switch (repetition) {
|
||||||
|
case "*":
|
||||||
|
// ignore: + is greater than *
|
||||||
|
break;
|
||||||
|
case "?":
|
||||||
|
// non-greedy qualifier
|
||||||
|
fragment += repetition;
|
||||||
|
break;
|
||||||
|
case "+":
|
||||||
|
// ignore: already +
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (repetition.startsWith("{0")) {
|
||||||
|
fragment = "(?:" + fragment + ")" + repetition;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// remove + and replace with count
|
||||||
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (fragment.endsWith("*")) {
|
||||||
|
switch (repetition) {
|
||||||
|
case "*":
|
||||||
|
// ignore: already +
|
||||||
|
break;
|
||||||
|
case "?":
|
||||||
|
// non-greedy qualifier
|
||||||
|
fragment += repetition;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// remove * and replace with count
|
||||||
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fragment += repetition;
|
||||||
|
}
|
||||||
|
return fragment;
|
||||||
|
}
|
||||||
|
exports.dontClobberRepetition = dontClobberRepetition;
|
2
lib/index.d.ts
vendored
2
lib/index.d.ts
vendored
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
/**
|
/**
|
||||||
* Includes all packages
|
* Includes all packages
|
||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.RegexDialect = exports.ParseResult = exports.Human2RegexParser = exports.Human2RegexParserOptions = exports.TokenizeResult = exports.IndentType = exports.Human2RegexLexer = exports.Human2RegexLexerOptions = exports.CommonError = void 0;
|
exports.RegexDialect = exports.ParseResult = exports.Human2RegexParser = exports.Human2RegexParserOptions = exports.TokenizeResult = exports.IndentType = exports.Human2RegexLexer = exports.Human2RegexLexerOptions = exports.CommonError = void 0;
|
||||||
/**
|
/**
|
||||||
|
2
lib/lexer.d.ts
vendored
2
lib/lexer.d.ts
vendored
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
/**
|
/**
|
||||||
* The Lexer for Human2Regex
|
* The Lexer for Human2Regex
|
||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.Human2RegexLexer = exports.TokenizeResult = exports.Human2RegexLexerOptions = exports.IndentType = void 0;
|
exports.Human2RegexLexer = exports.TokenizeResult = exports.Human2RegexLexerOptions = exports.IndentType = void 0;
|
||||||
/**
|
/**
|
||||||
|
2
lib/parser.d.ts
vendored
2
lib/parser.d.ts
vendored
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
/**
|
/**
|
||||||
* The parser for Human2Regex
|
* The parser for Human2Regex
|
||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
||||||
if (k2 === undefined) k2 = k;
|
if (k2 === undefined) k2 = k;
|
||||||
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
|
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
|
||||||
@ -83,7 +83,7 @@ class ParseResult {
|
|||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
validate(language) {
|
validate(language) {
|
||||||
return this.regexp_cst.validate(language).map(utilities_1.CommonError.fromSemanticError);
|
return this.regexp_cst.validate(language, new generator_1.GeneratorContext()).map(utilities_1.CommonError.fromSemanticError);
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Generate a regular expression string based on the parse result
|
* Generate a regular expression string based on the parse result
|
||||||
@ -499,12 +499,91 @@ class Human2RegexParser extends chevrotain_1.EmbeddedActionsParser {
|
|||||||
tokens.push($.CONSUME(T.Outdent));
|
tokens.push($.CONSUME(T.Outdent));
|
||||||
return new generator_1.RepeatStatementCST(tokens, optional, count, statements);
|
return new generator_1.RepeatStatementCST(tokens, optional, count, statements);
|
||||||
});
|
});
|
||||||
|
const BackrefStatement = $.RULE("BackrefStatement", () => {
|
||||||
|
const tokens = [];
|
||||||
|
let optional = false;
|
||||||
|
let count = null;
|
||||||
|
$.OPTION5(() => {
|
||||||
|
tokens.push($.CONSUME(T.Optional));
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
tokens.push($.CONSUME(T.Rerun));
|
||||||
|
$.OPTION6(() => count = $.SUBRULE(CountSubStatement));
|
||||||
|
$.OPTION7(() => {
|
||||||
|
$.OPTION(() => $.CONSUME(T.The));
|
||||||
|
$.CONSUME(T.Group);
|
||||||
|
$.OPTION2(() => $.CONSUME(T.Called));
|
||||||
|
});
|
||||||
|
const name = $.CONSUME(T.Identifier).image;
|
||||||
|
tokens.push($.CONSUME4(T.EndOfLine));
|
||||||
|
return new generator_1.BackrefStatementCST(tokens, optional, count, name);
|
||||||
|
});
|
||||||
|
const IfStatement = $.RULE("IfStatement", () => {
|
||||||
|
const tokens = [];
|
||||||
|
const msv = [];
|
||||||
|
let optional = false;
|
||||||
|
const true_statements = [];
|
||||||
|
const false_statements = [];
|
||||||
|
let name = "";
|
||||||
|
tokens.push($.CONSUME(T.If));
|
||||||
|
$.OR2([
|
||||||
|
{ ALT: () => {
|
||||||
|
name = $.CONSUME(T.Identifier).image;
|
||||||
|
} },
|
||||||
|
{ ALT: () => {
|
||||||
|
$.CONSUME(T.Match);
|
||||||
|
$.OPTION4(() => {
|
||||||
|
$.CONSUME3(T.Optional);
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
msv.push(new generator_1.MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
|
||||||
|
$.MANY(() => {
|
||||||
|
$.OR([
|
||||||
|
{ ALT: () => {
|
||||||
|
$.OPTION2(() => $.CONSUME2(T.And));
|
||||||
|
$.CONSUME(T.Then);
|
||||||
|
} },
|
||||||
|
{ ALT: () => $.CONSUME(T.And) },
|
||||||
|
]);
|
||||||
|
optional = false;
|
||||||
|
$.OPTION3(() => {
|
||||||
|
$.CONSUME2(T.Optional);
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
msv.push(new generator_1.MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
|
||||||
|
});
|
||||||
|
} }
|
||||||
|
]);
|
||||||
|
tokens.push($.CONSUME3(T.EndOfLine));
|
||||||
|
$.CONSUME2(T.Indent);
|
||||||
|
$.AT_LEAST_ONE2(() => {
|
||||||
|
true_statements.push($.SUBRULE(Statement));
|
||||||
|
});
|
||||||
|
$.CONSUME2(T.Outdent);
|
||||||
|
$.OPTION(() => {
|
||||||
|
$.CONSUME(T.Else);
|
||||||
|
$.CONSUME4(T.EndOfLine);
|
||||||
|
$.CONSUME3(T.Indent);
|
||||||
|
$.AT_LEAST_ONE3(() => {
|
||||||
|
false_statements.push($.SUBRULE2(Statement));
|
||||||
|
});
|
||||||
|
$.CONSUME3(T.Outdent);
|
||||||
|
});
|
||||||
|
if (name === "") {
|
||||||
|
return new generator_1.IfPatternStatementCST(tokens, msv, true_statements, false_statements);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return new generator_1.IfIdentStatementCST(tokens, name, true_statements, false_statements);
|
||||||
|
}
|
||||||
|
});
|
||||||
// statement super class
|
// statement super class
|
||||||
const Statement = $.RULE("Statement", () => {
|
const Statement = $.RULE("Statement", () => {
|
||||||
return $.OR([
|
return $.OR([
|
||||||
{ ALT: () => $.SUBRULE(MatchStatement) },
|
{ ALT: () => $.SUBRULE(MatchStatement) },
|
||||||
{ ALT: () => $.SUBRULE(GroupStatement) },
|
{ ALT: () => $.SUBRULE(GroupStatement) },
|
||||||
{ ALT: () => $.SUBRULE(RepeatStatement) }
|
{ ALT: () => $.SUBRULE(RepeatStatement) },
|
||||||
|
{ ALT: () => $.SUBRULE(BackrefStatement) },
|
||||||
|
{ ALT: () => $.SUBRULE(IfStatement) }
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
// full regex
|
// full regex
|
||||||
|
6
lib/tokens.d.ts
vendored
6
lib/tokens.d.ts
vendored
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
/** @internal */ export declare const Zero: import("chevrotain").TokenType;
|
/** @internal */ export declare const Zero: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const One: import("chevrotain").TokenType;
|
/** @internal */ export declare const One: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const Two: import("chevrotain").TokenType;
|
/** @internal */ export declare const Two: import("chevrotain").TokenType;
|
||||||
@ -51,6 +51,10 @@
|
|||||||
/** @internal */ export declare const CaseInsensitive: import("chevrotain").TokenType;
|
/** @internal */ export declare const CaseInsensitive: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const CaseSensitive: import("chevrotain").TokenType;
|
/** @internal */ export declare const CaseSensitive: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const OrMore: import("chevrotain").TokenType;
|
/** @internal */ export declare const OrMore: import("chevrotain").TokenType;
|
||||||
|
/** @internal */ export declare const Rerun: import("chevrotain").TokenType;
|
||||||
|
/** @internal */ export declare const The: import("chevrotain").TokenType;
|
||||||
|
/** @internal */ export declare const If: import("chevrotain").TokenType;
|
||||||
|
/** @internal */ export declare const Else: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const EndOfLine: import("chevrotain").TokenType;
|
/** @internal */ export declare const EndOfLine: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const WS: import("chevrotain").TokenType;
|
/** @internal */ export declare const WS: import("chevrotain").TokenType;
|
||||||
/** @internal */ export declare const SingleLineComment: import("chevrotain").TokenType;
|
/** @internal */ export declare const SingleLineComment: import("chevrotain").TokenType;
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.AllTokens = exports.Outdent = exports.Indent = exports.StringLiteral = exports.NumberLiteral = exports.Identifier = exports.MultilineComment = exports.SingleLineComment = exports.WS = exports.EndOfLine = exports.OrMore = exports.CaseSensitive = exports.CaseInsensitive = exports.CarriageReturn = exports.Newline = exports.Repeat = exports.Called = exports.Create = exports.To = exports.From = exports.Exclusive = exports.Inclusive = exports.Exactly = exports.Times = exports.A = exports.Group = exports.Linefeed = exports.Tab = exports.Between = exports.Not = exports.Matching = exports.Exact = exports.Multiline = exports.Global = exports.Using = exports.Unicode = exports.Number = exports.Boundary = exports.Whitespace = exports.Integer = exports.Decimal = exports.Letter = exports.Character = exports.Digit = exports.Word = exports.And = exports.Or = exports.Anything = exports.Then = exports.Match = exports.Optional = exports.Ten = exports.Nine = exports.Eight = exports.Seven = exports.Six = exports.Five = exports.Four = exports.Three = exports.Two = exports.One = exports.Zero = void 0;
|
exports.CaseInsensitive = exports.CarriageReturn = exports.Newline = exports.Repeat = exports.Called = exports.Create = exports.To = exports.From = exports.Exclusive = exports.Inclusive = exports.Exactly = exports.Times = exports.A = exports.Group = exports.Linefeed = exports.Tab = exports.Between = exports.Not = exports.Matching = exports.Exact = exports.Multiline = exports.Global = exports.Using = exports.Unicode = exports.Number = exports.Boundary = exports.Whitespace = exports.Integer = exports.Decimal = exports.Letter = exports.Character = exports.Digit = exports.Word = exports.And = exports.Or = exports.Anything = exports.Then = exports.Match = exports.Optional = exports.Ten = exports.Nine = exports.Eight = exports.Seven = exports.Six = exports.Five = exports.Four = exports.Three = exports.Two = exports.One = exports.Zero = void 0;
|
||||||
|
exports.AllTokens = exports.Outdent = exports.Indent = exports.StringLiteral = exports.NumberLiteral = exports.Identifier = exports.MultilineComment = exports.SingleLineComment = exports.WS = exports.EndOfLine = exports.Else = exports.If = exports.The = exports.Rerun = exports.OrMore = exports.CaseSensitive = void 0;
|
||||||
/**
|
/**
|
||||||
* The tokens required for Human2Regex
|
* The tokens required for Human2Regex
|
||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
@ -52,32 +53,17 @@ const chevrotain_1 = require("chevrotain");
|
|||||||
/** @internal */ exports.From = chevrotain_1.createToken({ name: "From", pattern: /from/i });
|
/** @internal */ exports.From = chevrotain_1.createToken({ name: "From", pattern: /from/i });
|
||||||
/** @internal */ exports.To = chevrotain_1.createToken({ name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i });
|
/** @internal */ exports.To = chevrotain_1.createToken({ name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i });
|
||||||
/** @internal */ exports.Create = chevrotain_1.createToken({ name: "Create", pattern: /create(s)?/i });
|
/** @internal */ exports.Create = chevrotain_1.createToken({ name: "Create", pattern: /create(s)?/i });
|
||||||
/** @internal */ exports.Called = chevrotain_1.createToken({ name: "Called", pattern: /name(d)?|call(ed)?/i });
|
/** @internal */ exports.Called = chevrotain_1.createToken({ name: "Called", pattern: /named|called/i });
|
||||||
/** @internal */ exports.Repeat = chevrotain_1.createToken({ name: "Repeat", pattern: /repeat(s|ing)?/i });
|
/** @internal */ exports.Repeat = chevrotain_1.createToken({ name: "Repeat", pattern: /repeat(s|ing)?/i });
|
||||||
/** @internal */ exports.Newline = chevrotain_1.createToken({ name: "Newline", pattern: /(new line|newline)/i });
|
/** @internal */ exports.Newline = chevrotain_1.createToken({ name: "Newline", pattern: /(new line|newline)/i });
|
||||||
/** @internal */ exports.CarriageReturn = chevrotain_1.createToken({ name: "CarriageReturn", pattern: /carriage return/i });
|
/** @internal */ exports.CarriageReturn = chevrotain_1.createToken({ name: "CarriageReturn", pattern: /carriage return/i });
|
||||||
/** @internal */ exports.CaseInsensitive = chevrotain_1.createToken({ name: "CaseInsensitive", pattern: /case insensitive/i });
|
/** @internal */ exports.CaseInsensitive = chevrotain_1.createToken({ name: "CaseInsensitive", pattern: /case insensitive/i });
|
||||||
/** @internal */ exports.CaseSensitive = chevrotain_1.createToken({ name: "CaseSensitive", pattern: /case sensitive/i });
|
/** @internal */ exports.CaseSensitive = chevrotain_1.createToken({ name: "CaseSensitive", pattern: /case sensitive/i });
|
||||||
/** @internal */ exports.OrMore = chevrotain_1.createToken({ name: "OrMore", pattern: /\+|or more/i });
|
/** @internal */ exports.OrMore = chevrotain_1.createToken({ name: "OrMore", pattern: /\+|or more/i });
|
||||||
/*
|
/** @internal */ exports.Rerun = chevrotain_1.createToken({ name: "Rerun", pattern: /re( |-)?(run|capture)/i });
|
||||||
//Not being used currently
|
/** @internal */ exports.The = chevrotain_1.createToken({ name: "The", pattern: /the/i });
|
||||||
export const Of = createToken({name: "Of", pattern: /of/i});
|
/** @internal */ exports.If = chevrotain_1.createToken({ name: "If", pattern: /if/i });
|
||||||
export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
|
/** @internal */ exports.Else = chevrotain_1.createToken({ name: "Else", pattern: /else|otherwise/i });
|
||||||
export const As = createToken({name: "As", pattern: /as/i});
|
|
||||||
export const If = createToken({name: "If", pattern: /if/i});
|
|
||||||
export const Start = createToken({name: "Start", pattern: /start(s) with?/i});
|
|
||||||
export const Ends = createToken({name: "Ends", pattern: /end(s)? with/i});
|
|
||||||
export const Else = createToken({name: "Else", pattern: /(other wise|otherwise|else)/i});
|
|
||||||
export const Unless = createToken({name: "Unless", pattern: /unless/i});
|
|
||||||
export const While = createToken({name: "While", pattern: /while/i});
|
|
||||||
export const More = createToken({name: "More", pattern: /more/i});
|
|
||||||
export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
|
|
||||||
export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
|
|
||||||
export const None = createToken({name: "None", pattern: /none/i});
|
|
||||||
export const Neither = createToken({name: "Neither", pattern: /neither/i});
|
|
||||||
export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
|
|
||||||
export const By = createToken({name: "By", pattern: /by/i});
|
|
||||||
*/
|
|
||||||
/** @internal */ exports.EndOfLine = chevrotain_1.createToken({ name: "EOL", pattern: /\n/ });
|
/** @internal */ exports.EndOfLine = chevrotain_1.createToken({ name: "EOL", pattern: /\n/ });
|
||||||
/** @internal */ exports.WS = chevrotain_1.createToken({ name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [" ", "\r"], group: chevrotain_1.Lexer.SKIPPED });
|
/** @internal */ exports.WS = chevrotain_1.createToken({ name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [" ", "\r"], group: chevrotain_1.Lexer.SKIPPED });
|
||||||
/** @internal */ exports.SingleLineComment = chevrotain_1.createToken({ name: "SingleLineComment", pattern: /(#|\/\/).*/, group: chevrotain_1.Lexer.SKIPPED });
|
/** @internal */ exports.SingleLineComment = chevrotain_1.createToken({ name: "SingleLineComment", pattern: /(#|\/\/).*/, group: chevrotain_1.Lexer.SKIPPED });
|
||||||
@ -120,22 +106,11 @@ exports.AllTokens = [
|
|||||||
exports.Whitespace,
|
exports.Whitespace,
|
||||||
exports.Number,
|
exports.Number,
|
||||||
exports.Unicode,
|
exports.Unicode,
|
||||||
/*
|
exports.Called,
|
||||||
Of,
|
exports.Rerun,
|
||||||
As,
|
exports.If,
|
||||||
If,
|
exports.Else,
|
||||||
Start,
|
exports.The,
|
||||||
Ends,
|
|
||||||
Else,
|
|
||||||
Unless,
|
|
||||||
While,
|
|
||||||
More,
|
|
||||||
Nothing,
|
|
||||||
By,
|
|
||||||
The,
|
|
||||||
None,
|
|
||||||
Neither,
|
|
||||||
*/
|
|
||||||
exports.Using,
|
exports.Using,
|
||||||
exports.Global,
|
exports.Global,
|
||||||
exports.Multiline,
|
exports.Multiline,
|
||||||
@ -151,7 +126,6 @@ exports.AllTokens = [
|
|||||||
exports.Exclusive,
|
exports.Exclusive,
|
||||||
exports.From,
|
exports.From,
|
||||||
exports.Create,
|
exports.Create,
|
||||||
exports.Called,
|
|
||||||
exports.Repeat,
|
exports.Repeat,
|
||||||
exports.Newline,
|
exports.Newline,
|
||||||
exports.CarriageReturn,
|
exports.CarriageReturn,
|
||||||
|
5
lib/utilities.d.ts
vendored
5
lib/utilities.d.ts
vendored
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
/**
|
/**
|
||||||
* Some utility functions for Human2Regex
|
* Some utility functions for Human2Regex
|
||||||
* @packageDocumentation
|
* @packageDocumentation
|
||||||
@ -130,6 +130,7 @@ export declare class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The lexing error
|
* @param error The lexing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromLexError(error: ILexingError): CommonError;
|
static fromLexError(error: ILexingError): CommonError;
|
||||||
/**
|
/**
|
||||||
@ -137,6 +138,7 @@ export declare class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The parsing error
|
* @param error The parsing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromParseError(error: IRecognitionException): CommonError;
|
static fromParseError(error: IRecognitionException): CommonError;
|
||||||
/**
|
/**
|
||||||
@ -144,6 +146,7 @@ export declare class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The semantic error
|
* @param error The semantic error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromSemanticError(error: ISemanticError): CommonError;
|
static fromSemanticError(error: ISemanticError): CommonError;
|
||||||
/**
|
/**
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0;
|
exports.CommonError = exports.append = exports.regexEscape = exports.removeQuotes = exports.findLastIndex = exports.last = exports.first = exports.isSingleRegexCharacter = exports.combineFlags = exports.hasFlag = exports.makeFlag = exports.usefulConditional = exports.unusedParameter = void 0;
|
||||||
/**
|
/**
|
||||||
@ -181,6 +181,7 @@ class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The lexing error
|
* @param error The lexing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromLexError(error) {
|
static fromLexError(error) {
|
||||||
// not really fond of --> and <--
|
// not really fond of --> and <--
|
||||||
@ -192,6 +193,7 @@ class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The parsing error
|
* @param error The parsing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromParseError(error) {
|
static fromParseError(error) {
|
||||||
var _a, _b, _c;
|
var _a, _b, _c;
|
||||||
@ -204,6 +206,7 @@ class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The semantic error
|
* @param error The semantic error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
static fromSemanticError(error) {
|
static fromSemanticError(error) {
|
||||||
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
||||||
|
4765
package-lock.json
generated
4765
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
28
package.json
28
package.json
@ -1,32 +1,32 @@
|
|||||||
{
|
{
|
||||||
"name": "human2regex",
|
"name": "human2regex",
|
||||||
"version": "1.0.2",
|
"version": "1.1.0",
|
||||||
"description": "Humanized Regular Expressions",
|
"description": "Humanized Regular Expressions",
|
||||||
"main": "./lib/index.js",
|
"main": "./lib/index.js",
|
||||||
"typings": "./lib/index.d.ts",
|
"typings": "./lib/index.d.ts",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/glob": "^7.1.3",
|
"@types/glob": "^7.1.3",
|
||||||
"@types/html-minifier": "^3.5.3",
|
"@types/html-minifier": "^3.5.3",
|
||||||
"@types/jest": "^26.0.15",
|
"@types/jest": "^26.0.19",
|
||||||
"@types/mustache": "^4.0.1",
|
"@typescript-eslint/eslint-plugin": "^4.11.1",
|
||||||
"@typescript-eslint/eslint-plugin": "^4.7.0",
|
"@typescript-eslint/parser": "^4.11.1",
|
||||||
"@typescript-eslint/parser": "^4.7.0",
|
|
||||||
"before-build-webpack": "^0.2.9",
|
"before-build-webpack": "^0.2.9",
|
||||||
|
"clean-webpack-plugin": "^3.0.0",
|
||||||
"codecov": "^3.8.1",
|
"codecov": "^3.8.1",
|
||||||
"copy-webpack-plugin": "^6.3.0",
|
"copy-webpack-plugin": "^6.4.1",
|
||||||
"css-loader": "^4.3.0",
|
"css-loader": "^4.3.0",
|
||||||
"eslint": "^7.13.0",
|
"eslint": "^7.17.0",
|
||||||
"glob": "^7.1.6",
|
"glob": "^7.1.6",
|
||||||
|
"handlebars": "^4.7.6",
|
||||||
"html-minifier": "^4.0.0",
|
"html-minifier": "^4.0.0",
|
||||||
"jest": "^26.6.3",
|
"jest": "^26.6.3",
|
||||||
"mini-css-extract-plugin": "^1.3.1",
|
"mini-css-extract-plugin": "^1.3.3",
|
||||||
"mustache": "^4.0.1",
|
|
||||||
"optimize-css-assets-webpack-plugin": "^5.0.4",
|
"optimize-css-assets-webpack-plugin": "^5.0.4",
|
||||||
"remove-files-webpack-plugin": "^1.4.4",
|
"remove-files-webpack-plugin": "^1.4.4",
|
||||||
"ts-jest": "^26.4.4",
|
"ts-jest": "^26.4.4",
|
||||||
"ts-loader": "^8.0.11",
|
"ts-loader": "^8.0.13",
|
||||||
"ts-node": "^9.0.0",
|
"ts-node": "^9.1.1",
|
||||||
"typescript": "^4.0.5",
|
"typescript": "^4.1.3",
|
||||||
"webpack": "^4.44.2",
|
"webpack": "^4.44.2",
|
||||||
"webpack-cli": "^3.3.12"
|
"webpack-cli": "^3.3.12"
|
||||||
},
|
},
|
||||||
@ -45,8 +45,8 @@
|
|||||||
"author": "Patrick Demian",
|
"author": "Patrick Demian",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"chevrotain": "^7.0.3",
|
"chevrotain": "^7.1.0",
|
||||||
"codemirror": "^5.58.2"
|
"codemirror": "^5.59.1"
|
||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
|
@ -1,21 +1,21 @@
|
|||||||
{{! Copyright (c) 2020 Patrick Demian; Licensed under MIT }}
|
{{! Copyright (c) 2021 Patrick Demian; Licensed under MIT }}
|
||||||
|
|
||||||
{{> header}}
|
{{> header title="Error 404 - Not Found" description="Not Found"}}
|
||||||
<!-- Main Content -->
|
<!-- Main Content -->
|
||||||
<div class="container contained-container" id="maincontent" role="main">
|
<div class="container contained-container" id="maincontent" role="main">
|
||||||
<!-- Page Header -->
|
<!-- Page Header -->
|
||||||
<div class="align_header">
|
<div class="align_header">
|
||||||
<div class="mx-auto">
|
<div class="mx-auto">
|
||||||
<div class="site-heading">
|
<div class="site-heading">
|
||||||
<h1>{{error-code}}</h1>
|
<h1>404</h1>
|
||||||
<span class="subheading">{{error-subheading}}</span>
|
<span class="subheading">Not Found</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<br><br><br>
|
<br><br><br>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-12 mx-auto">
|
<div class="col-12 mx-auto">
|
||||||
<h3 class="align_header">{{{error-string}}}</h3>
|
<h3 class="align_header">The resource could not be found.</h3>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
@ -1,9 +0,0 @@
|
|||||||
{
|
|
||||||
"page": {
|
|
||||||
"title": "Error 404 - Not Found",
|
|
||||||
"description": "Not Found"
|
|
||||||
},
|
|
||||||
"error-code": "404",
|
|
||||||
"error-subheading": "Not Found",
|
|
||||||
"error-string": "The resource could not be found."
|
|
||||||
}
|
|
Before Width: | Height: | Size: 2.1 KiB After Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 8.0 KiB |
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
.align_header {
|
.align_header {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
@ -182,3 +182,25 @@ pre code {
|
|||||||
.heading-link:focus {
|
.heading-link:focus {
|
||||||
color: rgba(255, 255, 255, .8)
|
color: rgba(255, 255, 255, .8)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.tut-contents {
|
||||||
|
display: table;
|
||||||
|
padding: 7px;
|
||||||
|
border: 1px solid #a2a9b1;
|
||||||
|
background-color: #f8f9fa;
|
||||||
|
padding: 5px;
|
||||||
|
font-size: 16px!important;
|
||||||
|
}
|
||||||
|
.tut-title {
|
||||||
|
text-align: center;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
.tut-contents li {
|
||||||
|
list-style-type: none;
|
||||||
|
}
|
||||||
|
.tut-contents>ul {
|
||||||
|
margin-top: 0;
|
||||||
|
padding-left: 0;
|
||||||
|
margin-left: 0.5em;
|
||||||
|
margin-right: 0.5em;
|
||||||
|
}
|
64
src/docs/index.hbs
Normal file
64
src/docs/index.hbs
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
{{! Copyright (c) 2021 Patrick Demian; Licensed under MIT }}
|
||||||
|
|
||||||
|
{{> header title="Human2Regex" description="Create regular expressions with natural, human language"}}
|
||||||
|
<!-- Main Content -->
|
||||||
|
<div class="container" id="maincontent" role="main">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-lg-8 tenpx-margin-bottom">
|
||||||
|
<div class="form-group row zero-margin-bottom">
|
||||||
|
<label for="dialect" class="col-sm-4 col-form-label">Regex dialect:</label>
|
||||||
|
<div class="col-sm-8">
|
||||||
|
<select class="form-control" id="dialect">
|
||||||
|
<option value="js" selected>Javascript</option>
|
||||||
|
<option value="dotnet">.NET</option>
|
||||||
|
<option value="python">Python</option>
|
||||||
|
<option value="boost">C++ Boost</option>
|
||||||
|
<option value="java">Java 7+</option>
|
||||||
|
<option value="pcre">PCRE</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<h4>Your Regular Expression:</h4>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-xl-11 tenpx-margin-bottom">
|
||||||
|
<input readonly type="text" class="form-control" id="regex"></input>
|
||||||
|
</div>
|
||||||
|
<div class="col-xl-1">
|
||||||
|
<button type="button" class="btn btn-secondary float-right" id="clip">Copy</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h4>Human Speak:</h4>
|
||||||
|
<textarea class="form-control" id="human" rows="25">{{> example_code}}</textarea>
|
||||||
|
<h4>Errors:</h4>
|
||||||
|
<textarea readonly class="form-control " id="errors" rows="5"></textarea>
|
||||||
|
</div>
|
||||||
|
<br>
|
||||||
|
<div class="col-lg-4 tenpx-margin-bottom">
|
||||||
|
<div class="cheatsheet">
|
||||||
|
<h2>Cheat Sheet:</h2>
|
||||||
|
<p>Full documentation available <a href="/tutorial.html">here</a></p>
|
||||||
|
<p class="font-weight-bold">Matching</p>
|
||||||
|
<p>{{i-code}}match "hello world"{{end-i-code}} matches "hello world" exactly<p>
|
||||||
|
<p>{{i-code}}match "hello" then optionally " world"{{end-i-code}} matches "hello" or "hello world"</p>
|
||||||
|
<p>{{i-code}}match "hello" or "world"{{end-i-code}} matches "hello" or "world</p>
|
||||||
|
<p>{{i-code}}match a word{{end-i-code}} matches any word
|
||||||
|
<p class="font-weight-bold">Repetition</p>
|
||||||
|
<p>{{i-code}}match 0+ "hello"{{end-i-code}} matches 0 or more "hello"s</p>
|
||||||
|
<p>{{i-code}}match 3 "hello"{{end-i-code}} matches exactly "hellohellohello"</p>
|
||||||
|
<p>{{i-code}}match 1 to 5 "hello"{{end-i-code}} matches between 1 to 5 "hello"s</p>
|
||||||
|
<p>{{i-code}}repeat 0 or more{{end-i-code}} repeats the intended text 0 or more times (default)</p>
|
||||||
|
<p>{{i-code}}optionally repeat between 3 to 5{{end-i-code}} optionally repeats the indented text 3 to 5 times</p>
|
||||||
|
<p class="font-weight-bold">Grouping</p>
|
||||||
|
<p>{{i-code}}create a group called mygroup{{end-i-code}} creates a group called "mygroup"</p>
|
||||||
|
<p>{{i-code}}create an optional group{{end-i-code}} creates an unnamed optional group</p>
|
||||||
|
<p class="font-weight-bold">Using</p>
|
||||||
|
<p>{{i-code}}using global and case insensitive{{end-i-code}} uses the 'g' and 'i' flags</p>
|
||||||
|
<p class="font-weight-bold">Misc</p>
|
||||||
|
<p>{{i-code}}// comment{{end-i-code}} is a single line comment</p>
|
||||||
|
<p>{{i-code}}/* comment */{{end-i-code}} is a multi line comment</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{{> footer}}
|
@ -1,6 +0,0 @@
|
|||||||
{
|
|
||||||
"page": {
|
|
||||||
"title": "Human2Regex",
|
|
||||||
"description": "Create regular expressions with natural, human language"
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,97 +0,0 @@
|
|||||||
{{! Copyright (c) 2020 Patrick Demian; Licensed under MIT }}
|
|
||||||
|
|
||||||
{{> header}}
|
|
||||||
<!-- Main Content -->
|
|
||||||
<div class="container" id="maincontent" role="main">
|
|
||||||
<div class="row">
|
|
||||||
<div class="col-lg-8 tenpx-margin-bottom">
|
|
||||||
<div class="form-group row zero-margin-bottom">
|
|
||||||
<label for="dialect" class="col-sm-4 col-form-label">Regex dialect:</label>
|
|
||||||
<div class="col-sm-8">
|
|
||||||
<select class="form-control" id="dialect">
|
|
||||||
<option value="js" selected>Javascript</option>
|
|
||||||
<option value="dotnet">.NET</option>
|
|
||||||
<option value="python">Python</option>
|
|
||||||
<option value="boost">C++ Boost</option>
|
|
||||||
<option value="java">Java 7+</option>
|
|
||||||
<option value="pcre">PCRE</option>
|
|
||||||
</select>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<h4>Your Regular Expression:</h4>
|
|
||||||
<div class="row">
|
|
||||||
<div class="col-xl-11 tenpx-margin-bottom">
|
|
||||||
<input readonly type="text" class="form-control" id="regex"></input>
|
|
||||||
</div>
|
|
||||||
<div class="col-xl-1">
|
|
||||||
<button type="button" class="btn btn-secondary float-right" id="clip">Copy</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h4>Human Speak:</h4>
|
|
||||||
<textarea class="form-control" id="human" rows="25">
|
|
||||||
/* Make a regex that matches (basic) URLs */
|
|
||||||
|
|
||||||
using global and exact matching
|
|
||||||
create an optional group called protocol
|
|
||||||
match "http"
|
|
||||||
possibly match "s"
|
|
||||||
match "://"
|
|
||||||
create an optional group called subdomain
|
|
||||||
repeat
|
|
||||||
match a word, then "."
|
|
||||||
create a group called domain
|
|
||||||
match 1+ words or "_" or "-"
|
|
||||||
match "."
|
|
||||||
match a word
|
|
||||||
# port, but we don't care about it, so ignore it
|
|
||||||
optionally match ":" then 0+ digits
|
|
||||||
create an optional group called path
|
|
||||||
repeat
|
|
||||||
match "/"
|
|
||||||
match 0+ words or "_" or "-"
|
|
||||||
create an optional group
|
|
||||||
# we don't want to capture the '?', so don't name the group until afterwards
|
|
||||||
match "?"
|
|
||||||
create a group called query
|
|
||||||
repeat
|
|
||||||
match 1+ words or "_" or "-"
|
|
||||||
match "="
|
|
||||||
match 1+ words or "_" or "-"
|
|
||||||
create an optional group
|
|
||||||
# fragment, again, we don't care, so ignore everything afterwards
|
|
||||||
match "#"
|
|
||||||
match 0+ any thing
|
|
||||||
</textarea>
|
|
||||||
<h4>Errors:</h4>
|
|
||||||
<textarea readonly class="form-control " id="errors" rows="5"></textarea>
|
|
||||||
</div>
|
|
||||||
<br>
|
|
||||||
<div class="col-lg-4 tenpx-margin-bottom">
|
|
||||||
<div class="cheatsheet">
|
|
||||||
<h2>Cheat Sheet:</h2>
|
|
||||||
<p>Full documentation available <a href="tutorial.html">here</a></p>
|
|
||||||
<p class="font-weight-bold">Matching</p>
|
|
||||||
<p><code class="cm-s-idea">match "hello world"</code> matches "hello world" exactly<p>
|
|
||||||
<p><code class="cm-s-idea">match "hello" then optionally " world"</code> matches "hello" or "hello world"</p>
|
|
||||||
<p><code class="cm-s-idea">match "hello" or "world"</code> matches "hello" or "world</p>
|
|
||||||
<p><code class="cm-s-idea">match a word</code> matches any word
|
|
||||||
<p class="font-weight-bold">Repetition</p>
|
|
||||||
<p><code class="cm-s-idea">match 0+ "hello"</code> matches 0 or more "hello"s</p>
|
|
||||||
<p><code class="cm-s-idea">match 3 "hello"</code> matches exactly "hellohellohello"</p>
|
|
||||||
<p><code class="cm-s-idea">match 1 to 5 "hello"</code> matches between 1 to 5 "hello"s</p>
|
|
||||||
<p><code class="cm-s-idea">repeat 0 or more</code> repeats the intended text 0 or more times (default)</p>
|
|
||||||
<p><code class="cm-s-idea">optionally repeat between 3 to 5</code> optionally repeats the indented text 3 to 5 times</p>
|
|
||||||
<p class="font-weight-bold">Grouping</p>
|
|
||||||
<p><code class="cm-s-idea">create a group called mygroup</code> creates a group called "mygroup"</p>
|
|
||||||
<p><code class="cm-s-idea">create an optional group</code> creates an unnamed optional group</p>
|
|
||||||
<p class="font-weight-bold">Using</p>
|
|
||||||
<p><code class="cm-s-idea">using global and case insensitive</code> uses the 'g' and 'i' flags</p>
|
|
||||||
<p class="font-weight-bold">Misc</p>
|
|
||||||
<p><code class="cm-s-idea">// comment</code> is a single line comment</p>
|
|
||||||
<p><code class="cm-s-idea">/* comment */</code> is a multi line comment</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{{> footer}}
|
|
32
src/docs/partials/example_code.hbs
Normal file
32
src/docs/partials/example_code.hbs
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
/* Make a regex that matches (basic) URLs */
|
||||||
|
|
||||||
|
using global and exact matching
|
||||||
|
create an optional group called protocol
|
||||||
|
match "http"
|
||||||
|
possibly match "s"
|
||||||
|
match "://"
|
||||||
|
create an optional group called subdomain
|
||||||
|
repeat
|
||||||
|
match a word, then "."
|
||||||
|
create a group called domain
|
||||||
|
match 1+ words or "_" or "-"
|
||||||
|
match "."
|
||||||
|
match a word
|
||||||
|
# port, but we don't care about it, so ignore it
|
||||||
|
optionally match ":" then 0+ digits
|
||||||
|
create an optional group called path
|
||||||
|
repeat
|
||||||
|
match "/"
|
||||||
|
match 0+ words or "_" or "-"
|
||||||
|
create an optional group
|
||||||
|
# we don't want to capture the '?', so don't name the group until afterwards
|
||||||
|
match "?"
|
||||||
|
create a group called query
|
||||||
|
repeat
|
||||||
|
match 1+ words or "_" or "-"
|
||||||
|
match "="
|
||||||
|
match 1+ words or "_" or "-"
|
||||||
|
create an optional group
|
||||||
|
# fragment, again, we don't care, so ignore everything afterwards
|
||||||
|
match "#"
|
||||||
|
match 0+ any thing
|
@ -1,11 +1,11 @@
|
|||||||
{{! Copyright (c) 2020 Patrick Demian; Licensed under MIT }}
|
{{! Copyright (c) 2021 Patrick Demian; Licensed under MIT }}
|
||||||
|
|
||||||
<!-- Footer -->
|
<!-- Footer -->
|
||||||
<footer>
|
<footer>
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-lg-8 col-md-10 mx-auto">
|
<div class="col-lg-8 col-md-10 mx-auto">
|
||||||
<p class="copyright">Copyright © 2020 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p>
|
<p class="copyright">Copyright © 2021 Patrick Demian. This page's source code is available at <a rel="noopener noreferrer" href="https://github.com/pdemian/human2regex">github.com/pdemian/human2regex</a></p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -14,7 +14,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Our script -->
|
<!-- Our script -->
|
||||||
<script defer src="bundle.min.js"></script>
|
<script defer src="/bundle.min.js"></script>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
@ -1,22 +1,22 @@
|
|||||||
{{! Copyright (c) 2020 Patrick Demian; Licensed under MIT }}
|
{{! Copyright (c) 2021 Patrick Demian; Licensed under MIT }}
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en" dir="ltr">
|
<html lang="en" dir="ltr">
|
||||||
<head>
|
<head>
|
||||||
<!-- Metadata -->
|
<!-- Metadata -->
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||||
<meta name="description" content="{{page.description}}">
|
<meta name="description" content="{{description}}">
|
||||||
<meta name="keywords" content="Human2Regex, Human, Regex, Natural, Language, Natural Language">
|
<meta name="keywords" content="Human2Regex, Human, Regex, Natural, Language, Natural Language">
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||||
|
|
||||||
<title>{{page.title}}</title>
|
<title>{{title}}</title>
|
||||||
|
|
||||||
<!-- Our own CSS -->
|
<!-- Our own CSS -->
|
||||||
<link href="bundle.min.css" rel="stylesheet" type="text/css">
|
<link href="/bundle.min.css" rel="stylesheet" type="text/css">
|
||||||
|
|
||||||
<meta name="theme-color" content="#212529">
|
<meta name="theme-color" content="#212529">
|
||||||
<meta name="apple-mobile-web-app-capable" content="yes">
|
<meta name="apple-mobile-web-app-capable" content="yes">
|
||||||
<meta name="apple-mobile-web-app-status-bar-style" content="default">
|
<meta name="apple-mobile-web-app-status-bar-style" content="default">
|
||||||
<link rel="icon" type="image/x-icon" href="favicon.ico">
|
<link rel="icon" type="image/x-icon" href="/favicon.ico">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<a class="skip skip-top" href="#maincontent">Skip to main content</a>
|
<a class="skip skip-top" href="#maincontent">Skip to main content</a>
|
||||||
@ -25,14 +25,14 @@
|
|||||||
<!-- Navigation -->
|
<!-- Navigation -->
|
||||||
<nav class="navbar navbar-expand-lg navbar-light fixed-top" id="mainNav">
|
<nav class="navbar navbar-expand-lg navbar-light fixed-top" id="mainNav">
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<a class="navbar-brand" href="index.html">
|
<a class="navbar-brand" href="/index.html">
|
||||||
<img src="favicon-small.png" width="30" height="30" class="d-inline-block align-top" alt="logo"> Human2Regex
|
<img src="/favicon-small.png" width="30" height="30" class="d-inline-block align-top" alt="logo"> Human2Regex
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
<div class="float-right heading-links">
|
<div class="float-right heading-links">
|
||||||
<a class="heading-link" href="index.html">Index</a>
|
<a class="heading-link" href="/index.html">Index</a>
|
||||||
<span> | </span>
|
<span> | </span>
|
||||||
<a class="heading-link" href="tutorial.html">Tutorial</a>
|
<a class="heading-link" href="/tutorial.html">Tutorial</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</nav>
|
</nav>
|
@ -1,43 +1,65 @@
|
|||||||
{{! Copyright (c) 2020 Patrick Demian; Licensed under MIT }}
|
{{! Copyright (c) 2021 Patrick Demian; Licensed under MIT }}
|
||||||
|
|
||||||
{{> header}}
|
{{> header title="Human2Regex Tutorial" description="Create regular expressions with natural, human language"}}
|
||||||
<!-- Main Content -->
|
<!-- Main Content -->
|
||||||
<div class="container contained-container" id="maincontent" role="main">
|
<div class="container contained-container" id="maincontent" role="main">
|
||||||
<div id="tutorial">
|
<div id="tutorial">
|
||||||
<h2>Tutorial</h2>
|
<h2 id="tut-begin">Tutorial</h2>
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
<p class="font-weight-bold" id="tut-preface">0. Preface</p>
|
<p class="font-weight-bold" id="tut-preface">0. Preface</p>
|
||||||
<p>Human2Regex (H2R) is a way to spell out a regular expression in an easy to read, easy to modify language. H2R supports multiple languages as well as many (though not all) different regular expression options such as named groups and quantifiers. You may notice multiple keywords specifying the same thing, and that is intended! Just like how in English there are many ways to express yourself, H2R is made to be flexible and easy to understand. With a range, do you prefer "...", "through", or "to"? It's up to you to choose, H2R supports all of those!</p>
|
<p>Human2Regex (H2R) is a way to spell out a regular expression in an easy to read, easy to modify language. H2R supports multiple languages as well as many (though not all) different regular expression options such as named groups and quantifiers. You may notice multiple keywords specifying the same thing, and that is intended! Just like how in English there are many ways to express yourself, H2R is made to be flexible and easy to understand. With a range, do you prefer "...", "through", or "to"? It's up to you to choose, H2R supports all of those!</p>
|
||||||
|
|
||||||
|
<div class="tut-contents" role="navigation">
|
||||||
|
<div class="tut-title">Contents</div>
|
||||||
|
<ul>
|
||||||
|
<li>
|
||||||
|
<a href="#tut-begin">Tutorial</a>
|
||||||
|
<ul>
|
||||||
|
<li><a href="#tut-first-match">1.1 First match</a></li>
|
||||||
|
<li><a href="#tut-using">1.2 Using Specifiers</a></li>
|
||||||
|
<li><a href="#tut-multiple-match">1.3 Matching multiple items</a></li>
|
||||||
|
<li><a href="#tut-optionality">1.4 Optionality</a></li>
|
||||||
|
<li><a href="#tut-negation">1.5 Negation</a></li>
|
||||||
|
<li><a href="#tut-other-match">1.6 Other matching specifiers</a></li>
|
||||||
|
<li><a href="#tut-repeition">1.7 Repetition</a></li>
|
||||||
|
<li><a href="#tut-grouping">1.8 Grouping</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li><a href="#tut-final">Putting it all together</a></li>
|
||||||
|
<li>
|
||||||
|
<a href="#tut-backref">Advanced features</a>
|
||||||
|
<ul>
|
||||||
|
<li><a href="#tut-backref">Backreferences</a></li>
|
||||||
|
<li><a href="#tut-if">If statements</a></li>
|
||||||
|
<li><a href="#tut-unicode">Unicode character properties</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<p class="font-weight-bold" id="tut-first-match">1. Your first Match</p>
|
<p class="font-weight-bold" id="tut-first-match">1. Your first Match</p>
|
||||||
<p>Every language starts with a "Hello World" program, so let's match the output of those programs. Matching is done using the keyword <code class="cm-s-idea">match</code> followed by what you want to match.
|
<p>Every language starts with a "Hello World" program, so let's match the output of those programs. Matching is done using the keyword {{i-code}}match{{end-i-code}} followed by what you want to match.
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "Hello World"{{end-s-code}}
|
||||||
match "Hello World"
|
The above statement will generate a regular expression that matches "Hello World", like "/Hello World/". Any invalid characters will automatically be escaped, so you don't need to worry about it. H2R also supports block comments with {{i-code}}/**/{{end-i-code}}, or line comments with {{i-code}}//{{end-i-code}} or {{i-code}}#{{end-i-code}} so you can explain why or what you intend to match.</p>
|
||||||
</code></span>
|
{{p-code}}/* This is a block comment */
|
||||||
The above statement will generate a regular expression that matches "Hello World", like "/Hello World/". Any invalid characters will automatically be escaped, so you don't need to worry about it. H2R also supports block comments with <code class="cm-s-idea">/**/</code>, or line comments with <code class="cm-s-idea">//</code> or <code class="cm-s-idea">#</code> so you can explain why or what you intend to match.</p>
|
|
||||||
<pre class="tutorial-code"><code class="cm-s-idea">/* This is a block comment */
|
|
||||||
match "Hello World" // matches the output of "Hello World" programs
|
match "Hello World" // matches the output of "Hello World" programs
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>Now what if we want to match every case variation of "Hello World" like "hello world" or "hELLO wORLD"? H2R supports the <code class="cm-s-idea">or</code> operator which allows you to specify many possible combinations.
|
<p>Now what if we want to match every case variation of "Hello World" like "hello world" or "hELLO wORLD"? H2R supports the {{i-code}}or{{end-i-code}} operator which allows you to specify many possible combinations.
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "Hello World" or "hello world" or "hELLO wORLD"{{end-s-code}}
|
||||||
match "Hello World" or "hello world" or "hELLO wORLD"
|
Or, you can use a {{i-code}}using{{end-i-code}} statement to specify that you want it to be case insensitive.</p>
|
||||||
</code></span>
|
|
||||||
Or, you can use a <code class="cm-s-idea">using</code> statement to specify that you want it to be case insensitive.</p>
|
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<p class="font-weight-bold" id="tut-using">2. Using Specifiers</p>
|
<p class="font-weight-bold" id="tut-using">2. Using Specifiers</p>
|
||||||
<p>Using statements appear at the beginning. You may have one or more using statements which each can contain one or more specifiers. For example:
|
<p>Using statements appear at the beginning. You may have one or more using statements which each can contain one or more specifiers. For example:
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}using global and case insensitive matching{{end-s-code}}
|
||||||
using global and case insensitive matching
|
|
||||||
</code></span>
|
|
||||||
or</p>
|
or</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}using global
|
||||||
<code class="cm-s-idea">using global
|
|
||||||
using case insensitive
|
using case insensitive
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>The <code class="cm-s-idea">matching</code> keyword is optional. The flags which are available are:</p>
|
<p>The {{i-code}}matching{{end-i-code}} keyword is optional. The flags which are available are:</p>
|
||||||
|
|
||||||
<table class="table table-sm table-striped table-bordered">
|
<table class="table table-sm table-striped table-bordered">
|
||||||
<thead>
|
<thead>
|
||||||
@ -49,27 +71,27 @@ using case insensitive
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">multiline</code></td>
|
<td>{{i-code}}multiline{{end-i-code}}</td>
|
||||||
<td>Matches can cross line breaks</td>
|
<td>Matches can cross line breaks</td>
|
||||||
<td>/<your regex>/m</td>
|
<td>/<your regex>/m</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">global</code></td>
|
<td>{{i-code}}global{{end-i-code}}</td>
|
||||||
<td>Multiple matches are allowed</td>
|
<td>Multiple matches are allowed</td>
|
||||||
<td>/<your regex>/g</td>
|
<td>/<your regex>/g</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">case sensitive</code></td>
|
<td>{{i-code}}case sensitive{{end-i-code}}</td>
|
||||||
<td>Match must be exact case</td>
|
<td>Match must be exact case</td>
|
||||||
<td><span class="font-italic">none</span></td>
|
<td><span class="font-italic">none</span></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">case insensitive</code></td>
|
<td>{{i-code}}case insensitive{{end-i-code}}</td>
|
||||||
<td>Match may be any case</td>
|
<td>Match may be any case</td>
|
||||||
<td>/<your regex>/i</td>
|
<td>/<your regex>/i</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">exact</code></td>
|
<td>{{i-code}}exact{{end-i-code}}</td>
|
||||||
<td>An exact statement matches a whole line exactly, nothing before, nothing after</td>
|
<td>An exact statement matches a whole line exactly, nothing before, nothing after</td>
|
||||||
<td>/^<your regex>$/</td>
|
<td>/^<your regex>$/</td>
|
||||||
</tr>
|
</tr>
|
||||||
@ -77,70 +99,49 @@ using case insensitive
|
|||||||
</table>
|
</table>
|
||||||
|
|
||||||
<p>To match any variation of hello world, we would then do the following:</p>
|
<p>To match any variation of hello world, we would then do the following:</p>
|
||||||
<pre class="tutorial-code"><code class="cm-s-idea">using case insensitive matching
|
{{p-code}}using case insensitive matching
|
||||||
match "hello world"
|
match "hello world"
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<p class="font-weight-bold" id="tut-multiple-match">3. Matching multiple items</p>
|
<p class="font-weight-bold" id="tut-multiple-match">3. Matching multiple items</p>
|
||||||
<p>H2R comes with 2 options to match multiple items in a row. The first is to simply write multiple separate <code class="cm-s-idea">match</code> statements like:</p>
|
<p>H2R comes with 2 options to match multiple items in a row. The first is to simply write multiple separate {{i-code}}match{{end-i-code}} statements like:</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}match "hello"
|
||||||
<code class="cm-s-idea">match "hello"
|
|
||||||
match " "
|
match " "
|
||||||
match "world"
|
match "world"
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>However, you can also use a comma, <code class="cm-s-idea">and</code>, or <code class="cm-s-idea">then</code> for a more concise match.
|
<p>However, you can also use a comma, {{i-code}}and{{end-i-code}}, or {{i-code}}then{{end-i-code}} for a more concise match.
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "hello", " ", "world"{{end-s-code}}
|
||||||
match "hello", " ", "world"
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "hello" and " " and "world"{{end-s-code}}
|
||||||
match "hello" and " " and "world"
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "hello" then " " then "world"{{end-s-code}}
|
||||||
match "hello" then " " then "world"
|
|
||||||
</code></span>
|
|
||||||
or any combination like
|
or any combination like
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "hello", " " and then "world"{{end-s-code}}
|
||||||
match "hello", " " and then "world"
|
|
||||||
</code></span>
|
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<p class="font-weight-bold" id="tut-optionality">4. Optionality</p>
|
<p class="font-weight-bold" id="tut-optionality">4. Optionality</p>
|
||||||
<p>Sometimes you wish to match something that may or may not exist. In H2R, this is done via the <code class="cm-s-idea">optional</code>, <code class="cm-s-idea">optionally</code>, <code class="cm-s-idea">possibly</code> or <code class="cm-s-idea">maybe</code> keyword.
|
<p>Sometimes you wish to match something that may or may not exist. In H2R, this is done via the {{i-code}}optional{{end-i-code}}, {{i-code}}optionally{{end-i-code}}, {{i-code}}possibly{{end-i-code}} or {{i-code}}maybe{{end-i-code}} keyword.
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}optionally match "hello world"{{end-s-code}}
|
||||||
optionally match "hello world"
|
will match 0 or 1 "hello world"'s. This can be used alongside matching multiple statements in a single {{i-code}}match{{end-i-code}} statement.
|
||||||
</code></span>
|
{{s-code}}match "hello", maybe " ", "world"{{end-s-code}}
|
||||||
will match 0 or 1 "hello world"'s. This can be used alongside matching multiple statements in a single <code class="cm-s-idea">match</code> statement.
|
will match "hello", an optional space if it exists, and "world". However, the start {{i-code}}optional{{end-i-code}} is for the entire match statement. Thus,
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}possibly match "hello", " ", then "world"{{end-s-code}}
|
||||||
match "hello", maybe " ", "world"
|
will actually make the whole "hello world" an optional match rather than just the first "hello". If you want to make the first match optional but keep the rest required, place the {{i-code}}optional{{end-i-code}} immediately after the {{i-code}}match{{end-i-code}}.</p>
|
||||||
</code></span>
|
|
||||||
will match "hello", an optional space if it exists, and "world". However, the start <code class="cm-s-idea">optional</code> is for the entire match statement. Thus,
|
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
|
||||||
possibly match "hello", " ", then "world"
|
|
||||||
</code></span>
|
|
||||||
will actually make the whole "hello world" an optional match rather than just the first "hello". If you want to make the first match optional but keep the rest required, place the <code class="cm-s-idea">optional</code> immediately after the <code class="cm-s-idea">match</code>.</p>
|
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<p class="font-weight-bold" id="tut-negation">5. Negation</p>
|
<p class="font-weight-bold" id="tut-negation">5. Negation</p>
|
||||||
<p>You can negate a match with the operator <code class="cm-s-idea">not</code>
|
<p>You can negate a match with the operator {{i-code}}not{{end-i-code}}
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match not "hello world"{{end-s-code}}
|
||||||
match not "hello world"
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match anything but "hello world"{{end-s-code}}
|
||||||
match anything but "hello world"
|
|
||||||
</code></span>
|
|
||||||
will match everything except for "hello world".</p>
|
will match everything except for "hello world".</p>
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<p class="font-weight-bold" id="tut-other-match">6. Other matching specifiers</p>
|
<p class="font-weight-bold" id="tut-other-match">6. Other matching specifiers</p>
|
||||||
<p>Many times you don't know exactly what you wish to match. H2R comes with many specifiers that you can use for your matching. For example, you may wish to match any word. You can do that with:
|
<p>Many times you don't know exactly what you wish to match. H2R comes with many specifiers that you can use for your matching. For example, you may wish to match any word. You can do that with:
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match a word{{end-s-code}}
|
||||||
match a word
|
The {{i-code}}a{{end-i-code}} or {{i-code}}an{{end-i-code}} is optional. The possible specifiers that H2R supports are the following:</p>
|
||||||
</code></span>
|
|
||||||
The <code class="cm-s-idea">a</code> or <code class="cm-s-idea">an</code> is optional. The possible specifiers that H2R supports are the following:</p>
|
|
||||||
<table class="table table-sm table-striped table-bordered">
|
<table class="table table-sm table-striped table-bordered">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
@ -152,71 +153,71 @@ match a word
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">anything</code></td>
|
<td>{{i-code}}anything{{end-i-code}}</td>
|
||||||
<td>Matches any character</td>
|
<td>Matches any character</td>
|
||||||
<td>.</td>
|
<td>.</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">word(s)</code></td>
|
<td>{{i-code}}word(s){{end-i-code}}</td>
|
||||||
<td>Matches many a-z, A-Z, _, or digit characters</td>
|
<td>Matches many a-z, A-Z, _, or digit characters</td>
|
||||||
<td>\w+</td>
|
<td>\w+</td>
|
||||||
<td>For a-z only, use <code class="cm-s-idea">letter(s)</code></td>
|
<td>For a-z only, use {{i-code}}letter(s){{end-i-code}}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">letter(s)</code></td>
|
<td>{{i-code}}letter(s){{end-i-code}}</td>
|
||||||
<td>Matches any letter character</td>
|
<td>Matches any letter character</td>
|
||||||
<td>[a-zA-Z]</td>
|
<td>[a-zA-Z]</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">number(s)</code></td>
|
<td>{{i-code}}number(s){{end-i-code}}</td>
|
||||||
<td>Matches a string of digit characters</td>
|
<td>Matches a string of digit characters</td>
|
||||||
<td>\d+</td>
|
<td>\d+</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">digit(s)</code></td>
|
<td>{{i-code}}digit(s){{end-i-code}}</td>
|
||||||
<td>Matches any digit character</td>
|
<td>Matches any digit character</td>
|
||||||
<td>\d</td>
|
<td>\d</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">integer(s)</code></td>
|
<td>{{i-code}}integer(s){{end-i-code}}</td>
|
||||||
<td>Matches an integer</td>
|
<td>Matches an integer</td>
|
||||||
<td>[+-]?\d+</td>
|
<td>[+-]?\d+</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">decimal(s)</code></td>
|
<td>{{i-code}}decimal(s){{end-i-code}}</td>
|
||||||
<td>Matches digits, an optional decimal point and more digits</td>
|
<td>Matches digits, an optional decimal point and more digits</td>
|
||||||
<td>[+-]?((\d+[,.]?\d*)|([,.]\d+))</td>
|
<td>[+-]?((\d+[,.]?\d*)|([,.]\d+))</td>
|
||||||
<td>Supports both "," and "." decimal points</td>
|
<td>Supports both "," and "." decimal points</td>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">character(s)</code></td>
|
<td>{{i-code}}character(s){{end-i-code}}</td>
|
||||||
<td>Matches a-z, A-Z, _, or digits</td>
|
<td>Matches a-z, A-Z, _, or digits</td>
|
||||||
<td>\w</td>
|
<td>\w</td>
|
||||||
<td>For a-z only, use <code class="cm-s-idea">letter(s)</code></td>
|
<td>For a-z only, use {{i-code}}letter(s){{end-i-code}}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">whitespace(s)</code></td>
|
<td>{{i-code}}whitespace(s){{end-i-code}}</td>
|
||||||
<td>Matches any whitespace character</td>
|
<td>Matches any whitespace character</td>
|
||||||
<td>\s</td>
|
<td>\s</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">(word )boundary</code></td>
|
<td>{{i-code}}(word )boundary{{end-i-code}}</td>
|
||||||
<td>Boundary between a word</td>
|
<td>Boundary between a word</td>
|
||||||
<td>\b</td>
|
<td>\b</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">line feed</code>/<code class="cm-s-idea">newline</code></td>
|
<td>{{i-code}}line feed{{end-i-code}}/{{i-code}}newline{{end-i-code}}</td>
|
||||||
<td>Matches a newline</td>
|
<td>Matches a newline</td>
|
||||||
<td>\n</td>
|
<td>\n</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code class="cm-s-idea">carriage return</code></td>
|
<td>{{i-code}}carriage return{{end-i-code}}</td>
|
||||||
<td>Matches a carriage return</td>
|
<td>Matches a carriage return</td>
|
||||||
<td>\r</td>
|
<td>\r</td>
|
||||||
<td> </td>
|
<td> </td>
|
||||||
@ -225,132 +226,130 @@ match a word
|
|||||||
</table>
|
</table>
|
||||||
|
|
||||||
<p>You can also create ranges of characters to match. Say for example, you wanted to match any characters between a and z, you could write any of the following:
|
<p>You can also create ranges of characters to match. Say for example, you wanted to match any characters between a and z, you could write any of the following:
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match from "a" to "z" // "from" is optional{{end-s-code}}
|
||||||
match from "a" to "z" // "from" is optional
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match between "a" and "z" // "between" is optional{{end-s-code}}
|
||||||
match between "a" and "z" // "between" is optional
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "a" ... "z" // can use "..." or ".."{{end-s-code}}
|
||||||
match "a" ... "z" // can use "..." or ".."
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "a" - "z"{{end-s-code}}
|
||||||
match "a" - "z"
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "a" through "z" // can also use thru{{end-s-code}}
|
||||||
match "a" through "z" // can also use thru
|
|
||||||
</code></span>
|
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<p class="font-weight-bold" id="tut-repeition">7. Repetition</p>
|
<p class="font-weight-bold" id="tut-repeition">7. Repetition</p>
|
||||||
<p>H2R supports 2 types of repetition: single match repetition, or grouped repetition. When using <code class="cm-s-idea">match</code> you can specify the number of captures you want just before the text to capture.
|
<p>H2R supports 2 types of repetition: single match repetition, or grouped repetition. When using {{i-code}}match{{end-i-code}} you can specify the number of captures you want just before the text to capture.
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match 2 digits{{end-s-code}}
|
||||||
match 2 digits
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match exactly 2 digits{{end-s-code}}
|
||||||
match exactly 2 digits
|
|
||||||
</code></span>
|
|
||||||
will match any 2 digits in a row. You can also specify a range you wish to capture
|
will match any 2 digits in a row. You can also specify a range you wish to capture
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match 2 ... 5 digits{{end-s-code}}
|
||||||
match 2 ... 5 digits
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match 2 to 5 digits{{end-s-code}}
|
||||||
match 2 to 5 digits
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match between 2 to 5 digits{{end-s-code}}
|
||||||
match between 2 to 5 digits
|
will match 2, 3, 4, or 5 digits. You can specify if the final number is exclusive with the {{i-code}}exclusive{{end-i-code}} or {{i-code}}inclusive{{end-i-code}} keywords.
|
||||||
</code></span>
|
{{s-code}}match 2 to 5 exclusive digits{{end-s-code}}
|
||||||
will match 2, 3, 4, or 5 digits. You can specify if the final number is exclusive with the <code class="cm-s-idea">exclusive</code> or <code class="cm-s-idea">inclusive</code> keywords.
|
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
|
||||||
match 2 to 5 exclusive digits
|
|
||||||
</code></span>
|
|
||||||
will only match up to 4 digits. You can also choose to leave the end unspecified.
|
will only match up to 4 digits. You can also choose to leave the end unspecified.
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match 2+ digits{{end-s-code}}
|
||||||
match 2+ digits
|
|
||||||
</code></span>
|
|
||||||
or
|
or
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match 2 or more digits{{end-s-code}}
|
||||||
match 2 or more digits
|
will match 2 or more digits. Repeition can be chained with the {{i-code}}and then{{end-i-code}} keywords or the {{i-code}}optional{{end-i-code}} keyword. For example:
|
||||||
</code></span>
|
{{s-code}}match 1+ digits then optionally "." then optionally 0...8 digits{{end-s-code}}
|
||||||
will match 2 or more digits. Repeition can be chained with the <code class="cm-s-idea">and then</code> keywords or the <code class="cm-s-idea">optional</code> keyword. For example:
|
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
|
||||||
match 1+ digits then optionally "." then optionally 0...8 digits
|
|
||||||
</code></span>
|
|
||||||
|
|
||||||
Suppose you want to repeat a group of these match statements. You can group a repetition using the <code class="cm-s-idea">repeat</code> keyword. Everything underneath that is tabbed (scoped) will be repeated. By default, this will match 0 or more of the following statements.</p>
|
Suppose you want to repeat a group of these match statements. You can group a repetition using the {{i-code}}repeat{{end-i-code}} keyword. Everything underneath that is tabbed (scoped) will be repeated. By default, this will match 0 or more of the following statements.</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}repeat
|
||||||
<code class="cm-s-idea">repeat
|
|
||||||
match "Hello "
|
match "Hello "
|
||||||
match "World"
|
match "World"
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>Will match 0 or more "Hello "s, but only 1 "World". The same qualifiers that exist for <code class="cm-s-idea">match</code> statements also exist for <code class="cm-s-idea">repeat</code> statements.</p>
|
<p>Will match 0 or more "Hello "s, but only 1 "World". The same qualifiers that exist for {{i-code}}match{{end-i-code}} statements also exist for {{i-code}}repeat{{end-i-code}} statements.</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}optionally repeat 3...7 times
|
||||||
<code class="cm-s-idea">optionally repeat 3...7 times
|
|
||||||
match "Hello World"
|
match "Hello World"
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>Will potentially match "Hello World" between 3 and 7 times. H2R also supports the following for numbers: <code class="cm-s-idea">One, Two, Three, Four, Five, Six, Seven, Eight, Nine, and Ten</code></p>
|
<p>Will potentially match "Hello World" between 3 and 7 times. H2R also supports the following for numbers: {{i-code}}One, Two, Three, Four, Five, Six, Seven, Eight, Nine, and Ten{{end-i-code}}</p>
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<p class="font-weight-bold" id="tut-grouping">8. Grouping</p>
|
<p class="font-weight-bold" id="tut-grouping">8. Grouping</p>
|
||||||
<p>Just like regular expressions, capture groups are supported in H2R. Each group is defined using the <code class="cm-s-idea">create a group</code> keyphrase.</p>
|
<p>Just like regular expressions, capture groups are supported in H2R. Each group is defined using the {{i-code}}create a group{{end-i-code}} keyphrase.</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}create a group
|
||||||
<code class="cm-s-idea">create a group
|
|
||||||
match "Hello World"
|
match "Hello World"
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>This will create a non-named captured group, equivalent to the regular expression "/(Hello World)/". A non-named captured group will show up in your chosen language's matches, however will not be given a name. To access this match, you will need to know the index of the group. Most regular expression engines support named capture groups, and H2R highly recommends using this feature. If you wish to do so, simply give it a name:<p>
|
<p>This will create a non-named captured group, equivalent to the regular expression "/(Hello World)/". A non-named captured group will show up in your chosen language's matches, however will not be given a name. To access this match, you will need to know the index of the group. Most regular expression engines support named capture groups, and H2R highly recommends using this feature. If you wish to do so, simply give it a name:</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}create a group called TestGroup
|
||||||
<code class="cm-s-idea">create a group called TestGroup
|
|
||||||
match "Hello World"
|
match "Hello World"
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>In most languages, a named group can be accessed through the match result's group list. Take for example, in JavaScript,
|
<p>In most languages, a named group can be accessed through the match result's group list. Take for example, in JavaScript,</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}"hello".match(/(?<TestGroup>hello)/).groups{{end-p-code}}
|
||||||
<code class="cm-s-idea">"hello".match(/(?<TestGroup>hello)/).groups</code>
|
|
||||||
</pre>
|
|
||||||
|
|
||||||
<p>Will return an object with {TestGroup: "hello"}. For another example, check out <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match#Using_named_capturing_groups">MDN web docs</a>. Groups can also be optional.</p>
|
<p>Will return an object with {TestGroup: "hello"}. For another example, check out <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match#Using_named_capturing_groups">MDN web docs</a>. Groups can also be optional.</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}create an optional group
|
||||||
<code class="cm-s-idea">create an optional group
|
|
||||||
match "Hello World"
|
match "Hello World"
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>And groups may be nested</p>
|
<p>And groups may be nested</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}create a group called TestGroup
|
||||||
<code class="cm-s-idea">create a group called TestGroup
|
|
||||||
match "Hello"
|
match "Hello"
|
||||||
create a group called InnerGroup
|
create a group called InnerGroup
|
||||||
match "World"
|
match "World"
|
||||||
</code></pre>
|
{{end-p-code}}
|
||||||
<p>The regular expression returned by this will be "/(?<TestGroup>Hello(?<InnerGroup>World))/". Again, in JavaScript, the following</p>
|
<p>The regular expression returned by this will be "/(?<TestGroup>Hello(?<InnerGroup>World))/". Again, in JavaScript, the following</p>
|
||||||
<pre class="tutorial-code">
|
{{p-code}}"HelloWorld".match(/(?<TestGroup>Hello(?<InnerGroup>World))/).groups{{end-p-code}}
|
||||||
<code class="cm-s-idea">"HelloWorld".match(/(?<TestGroup>Hello(?<InnerGroup>World))/).groups</code>
|
|
||||||
</pre>
|
|
||||||
<p>Will return an object with {TestGroup: "HelloWorld", InnerGroup: "World"}.</p>
|
<p>Will return an object with {TestGroup: "HelloWorld", InnerGroup: "World"}.</p>
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<h3 id="tut-final">Putting it all together</h3>
|
<h3 id="tut-final">Putting it all together</h3>
|
||||||
<p>Grouping, repetition, and matching are the 3 primary elements that make up H2R. They can be combined in any way to generate a regular expression. See the <a href="index.html">main page</a> for an example that combines all above to parse a URL.</p>
|
<p>Grouping, repetition, and matching are the 3 primary elements that make up H2R. They can be combined in any way to generate a regular expression. See the <a href="index.html">main page</a> for an example that combines all above to parse a URL.</p>
|
||||||
|
|
||||||
<h3>Miscellaneous features</h3>
|
<h3>Advanced features</h3>
|
||||||
|
|
||||||
|
<p class="font-weight-bold" id="tut-backref">Backreferences</p>
|
||||||
|
<p>Sometimes you may wish to match the same text as a previously matched. Take for example matching opening and closing XML tags such as <hello>world</hello>:</p>
|
||||||
|
{{p-code}}match "<"
|
||||||
|
create a group called opening_tag
|
||||||
|
match a word or digit or "_" or "-"
|
||||||
|
match ">"
|
||||||
|
match 0+ not "<"
|
||||||
|
match "</"
|
||||||
|
create a group called closing_tag
|
||||||
|
match a word or digit or "_" or "-"
|
||||||
|
match ">"
|
||||||
|
{{end-p-code}}
|
||||||
|
<p>To ensure you matched the same opening tag as closing tag, you'll normally need to perform an additional step afterwards by checking the capture groups are equal. However, in most regex engines, this can be performed automatically through backreferences. Backreferences effectively re-capture the same group. Human2Regex allows you to {{i-code}}rerun{{end-i-code}} or {{i-code}}recapture{{end-i-code}} a previous group.</p>
|
||||||
|
{{p-code}}match "<"
|
||||||
|
create a group called tag
|
||||||
|
match a word or digit or "_" or "-"
|
||||||
|
match ">"
|
||||||
|
match 0+ not "<"
|
||||||
|
match "</"
|
||||||
|
recapture tag
|
||||||
|
match ">"
|
||||||
|
{{end-p-code}}
|
||||||
|
<p>The regex will only successfully match if both the tags are the same. One thing to note however, the first group must be captured. For a "function"-like capture see regex subroutines (not yet implemented).</p>
|
||||||
|
<p>To allow for a more natural language, {{i-code}}recapture the group {{end-i-code}} and {{i-code}}recapture the group called{{end-i-code}} are also supported.</p>
|
||||||
|
<p class="font-weight-bold" id="tut-if">If statements</p>
|
||||||
|
<p>Certain regex languages support if statements which can be used simplify statements. Human2Regex supports {{i-code}}if{{end-i-code}}, {{i-code}}else if{{end-i-code}}, and {{i-code}}else{{end-i-code}} statements. Inside each {{i-code}}if{{end-i-code}}, you can recapture a group or run a new match. This is done as the following:</p>
|
||||||
|
{{p-code}}if match "hello" then optionally "world"
|
||||||
|
match "!"
|
||||||
|
else if match "goodbye" then optionally "world"
|
||||||
|
match "!"
|
||||||
|
{{end-p-code}}
|
||||||
|
<p>or</p>
|
||||||
|
{{p-code}}create a group called tag
|
||||||
|
match "<" then a word or digit or "_" or "-" then ">"
|
||||||
|
//do we have another tag? keep matching the same tags
|
||||||
|
if rerun tag
|
||||||
|
repeat
|
||||||
|
recapture tag
|
||||||
|
//ignore everything else
|
||||||
|
else
|
||||||
|
match 0+ any thing
|
||||||
|
{{end-p-code}}
|
||||||
<p class="font-weight-bold" id="tut-unicode">Unicode character properties</p>
|
<p class="font-weight-bold" id="tut-unicode">Unicode character properties</p>
|
||||||
<p>You can match specific unicode sequences using <code class="cm-s-idea">"\uXXXX"
|
<p>You can match specific unicode sequences using {{i-code}}"\uXXXX"
|
||||||
</code> or <code class="cm-s-idea">"\UXXXXXXXX"</code> where X is a hexadecimal character.
|
{{end-i-code}} or {{i-code}}"\UXXXXXXXX"{{end-i-code}} where X is a hexadecimal character.
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
{{s-code}}match "\u0669" // matches arabic digit 9 "٩"{{end-s-code}}
|
||||||
match "\u0669" // matches arabic digit 9 "٩"
|
Unicode character classes/scripts can be matched using the {{i-code}}unicode{{end-i-code}} keyword.
|
||||||
</code></span>
|
{{s-code}}match unicode "Latin" // matches any latin character{{end-s-code}}
|
||||||
Unicode character classes/scripts can be matched using the <code class="cm-s-idea">unicode</code> keyword.
|
{{s-code}}match unicode "N" // matches any number character{{end-s-code}}
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
|
||||||
match unicode "Latin" // matches any latin character
|
|
||||||
</code></span>
|
|
||||||
<span class="tutorial-code"><code class="cm-s-idea">
|
|
||||||
match unicode "N" // matches any number character
|
|
||||||
</code></span>
|
|
||||||
The following Unicode class specifiers are available:</p>
|
The following Unicode class specifiers are available:</p>
|
||||||
<table class="table table-sm table-striped table-bordered">
|
<table class="table table-sm table-striped table-bordered">
|
||||||
<thead>
|
<thead>
|
@ -1,6 +0,0 @@
|
|||||||
{
|
|
||||||
"page": {
|
|
||||||
"title": "Human2Regex Tutorial",
|
|
||||||
"description": "Create regular expressions with natural, human language"
|
|
||||||
}
|
|
||||||
}
|
|
501
src/generator.ts
501
src/generator.ts
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Includes all Concrete Syntax Trees for Human2Regex
|
* Includes all Concrete Syntax Trees for Human2Regex
|
||||||
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
|
import { regexEscape, removeQuotes, hasFlag, combineFlags, isSingleRegexCharacter, first, last, unusedParameter, makeFlag, append } from "./utilities";
|
||||||
import { IToken } from "chevrotain";
|
import { IToken } from "chevrotain";
|
||||||
|
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "./generator_helper";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of regular expression dialects we support
|
* List of regular expression dialects we support
|
||||||
@ -63,31 +64,54 @@ const unicode_script_codes = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The base concrete syntax tree class
|
* Context for validation
|
||||||
*
|
*
|
||||||
|
* @remarks Currently only used to validate groups
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export abstract class H2RCST {
|
export class GeneratorContext {
|
||||||
|
public groups: { [ key: string ]: { startLine: number, startColumn: number, length: number } } = {};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for H2RCST
|
* Checks to see if we already have a group defined
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param identifier the group name
|
||||||
* @internal
|
* @returns true if the group name already exists
|
||||||
*/
|
*/
|
||||||
constructor(public tokens: IToken[]) {
|
public hasGroup(identifier: string): boolean {
|
||||||
/* empty */
|
return Object.prototype.hasOwnProperty.call(this.groups, identifier);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds the identifier to the group list
|
||||||
|
*
|
||||||
|
* @param identifier the group name
|
||||||
|
*/
|
||||||
|
public addGroup(identifier: string, tokens: IToken[]): void {
|
||||||
|
const f = first(tokens);
|
||||||
|
const l = last(tokens);
|
||||||
|
|
||||||
|
this.groups[identifier] = {
|
||||||
|
startLine: f.startLine ?? NaN,
|
||||||
|
startColumn: f.startColumn ?? NaN,
|
||||||
|
length: (l.endOffset ?? l.startOffset) - f.startOffset,
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Generates {
|
||||||
/**
|
/**
|
||||||
* Validate that this is both valid and can be generated in the specified language
|
* Validate that this is both valid and can be generated in the specified language
|
||||||
*
|
*
|
||||||
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
* @remarks There is no guarantee toRegex will work unless validate returns no errors
|
||||||
*
|
*
|
||||||
* @param language the regex dialect we're validating
|
* @param language the regex dialect we're validating
|
||||||
|
* @param context the generator context
|
||||||
* @returns A list of errors
|
* @returns A list of errors
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
public abstract validate(language: RegexDialect): ISemanticError[];
|
validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate a regular expression fragment based on this syntax tree
|
* Generate a regular expression fragment based on this syntax tree
|
||||||
@ -98,6 +122,26 @@ export abstract class H2RCST {
|
|||||||
* @returns a regular expression fragment
|
* @returns a regular expression fragment
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
|
toRegex(language: RegexDialect): string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The base concrete syntax tree class
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export abstract class H2RCST implements Generates {
|
||||||
|
/**
|
||||||
|
* Constructor for H2RCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
constructor(public tokens: IToken[]) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract validate(language: RegexDialect, context: GeneratorContext): ISemanticError[];
|
||||||
public abstract toRegex(language: RegexDialect): string;
|
public abstract toRegex(language: RegexDialect): string;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -186,7 +230,7 @@ export class MatchSubStatementValue {
|
|||||||
*
|
*
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export class MatchStatementValue {
|
export class MatchStatementValue implements Generates {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for MatchStatementValue
|
* Constructor for MatchStatementValue
|
||||||
@ -198,6 +242,21 @@ export class MatchStatementValue {
|
|||||||
constructor(public optional: boolean, public statement: MatchSubStatementCST) {
|
constructor(public optional: boolean, public statement: MatchSubStatementCST) {
|
||||||
/* empty */
|
/* empty */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
|
return this.statement.validate(language, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
public toRegex(language: RegexDialect): string {
|
||||||
|
let match_stmt = this.statement.toRegex(language);
|
||||||
|
|
||||||
|
// need to group if optional and ungrouped
|
||||||
|
if (this.optional) {
|
||||||
|
match_stmt = groupIfRequired(match_stmt) + "?";
|
||||||
|
}
|
||||||
|
|
||||||
|
return match_stmt;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -227,11 +286,11 @@ export class MatchSubStatementCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
append(errors, this.count.validate(language));
|
append(errors, this.count.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const value of this.values) {
|
for (const value of this.values) {
|
||||||
@ -353,56 +412,16 @@ export class MatchSubStatementCST extends H2RCST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let ret = "";
|
let ret = minimizeMatchString(matches);
|
||||||
|
|
||||||
let require_grouping = false;
|
|
||||||
let dont_clobber_plus = false;
|
|
||||||
|
|
||||||
if (matches.length === 1) {
|
|
||||||
ret = first(matches);
|
|
||||||
if (ret.endsWith("+")) {
|
|
||||||
dont_clobber_plus = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
ret = minimizeMatchString(matches);
|
|
||||||
|
|
||||||
if (ret.length > 1 &&
|
|
||||||
(!ret.startsWith("(") || !ret.endsWith("["))) {
|
|
||||||
require_grouping = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
if (dont_clobber_plus) {
|
if (matches.length === 1) {
|
||||||
const clobber = this.count.toRegex(language);
|
// we don't group if there's only 1 element
|
||||||
|
// but we need to make sure we don't add an additional + or *
|
||||||
// + can be ignored as well as a count as long as that count is > 0
|
ret = dontClobberRepetition(ret, this.count.toRegex(language));
|
||||||
switch (clobber) {
|
|
||||||
case "*":
|
|
||||||
case "?":
|
|
||||||
ret = "(?:" + ret + ")" + clobber;
|
|
||||||
break;
|
|
||||||
case "+":
|
|
||||||
// ignore
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
if (clobber.startsWith("{0")) {
|
|
||||||
ret = "(?:" + ret + ")" + clobber;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// remove + and replace with count
|
|
||||||
ret.substring(0, ret.length - 1) + clobber;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (require_grouping) {
|
ret = groupIfRequired(ret) + this.count.toRegex(language);
|
||||||
ret = "(?:" + ret + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
ret += this.count.toRegex(language);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -427,8 +446,9 @@ export class UsingStatementCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
unusedParameter(language, "Using Statement does not change based on language");
|
unusedParameter(language, "Count does not need checking");
|
||||||
|
unusedParameter(context, "Context is not needed");
|
||||||
|
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
let flag = this.flags[0];
|
let flag = this.flags[0];
|
||||||
@ -490,15 +510,13 @@ export class CountSubStatementCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
unusedParameter(language, "Count does not need checking");
|
unusedParameter(language, "Count does not need checking");
|
||||||
|
unusedParameter(context, "Context is not needed");
|
||||||
|
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
if (this.from < 0) {
|
if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
|
||||||
errors.push(this.error("Value cannot be negative"));
|
|
||||||
}
|
|
||||||
else if (this.to !== null && ((this.opt === "exclusive" && (this.to-1) <= this.from) || this.to <= this.from)) {
|
|
||||||
errors.push(this.error("Values must be in range of eachother"));
|
errors.push(this.error("Values must be in range of eachother"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -548,49 +566,27 @@ export class MatchStatementCST extends StatementCST {
|
|||||||
* Constructor for MatchStatementCST
|
* Constructor for MatchStatementCST
|
||||||
*
|
*
|
||||||
* @param tokens Tokens used to calculate where an error occured
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
* @param matches
|
* @param matches the list of matches
|
||||||
*/
|
*/
|
||||||
constructor(tokens: IToken[], private completely_optional: boolean, private matches: MatchStatementValue[]) {
|
constructor(tokens: IToken[], private completely_optional: boolean, private matches: MatchStatementValue[]) {
|
||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
for (const match of this.matches) {
|
for (const match of this.matches) {
|
||||||
append(errors, match.statement.validate(language));
|
append(errors, match.statement.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
public toRegex(language: RegexDialect): string {
|
public toRegex(language: RegexDialect): string {
|
||||||
let final_matches = this.matches.map((x) => {
|
let final_matches = this.matches.map((x) => x.toRegex(language)).join("");
|
||||||
let match_stmt = x.statement.toRegex(language);
|
|
||||||
|
|
||||||
// need to group if optional and ungrouped
|
|
||||||
if (x.optional) {
|
|
||||||
if (!isSingleRegexCharacter(match_stmt)) {
|
|
||||||
// don't re-group a group
|
|
||||||
if (match_stmt[0] !== "(" && match_stmt[match_stmt.length-1] !== ")") {
|
|
||||||
match_stmt = "(?:" + match_stmt + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match_stmt += "?";
|
|
||||||
}
|
|
||||||
|
|
||||||
return match_stmt;
|
|
||||||
}).join("");
|
|
||||||
|
|
||||||
if (this.completely_optional) {
|
if (this.completely_optional) {
|
||||||
if (!isSingleRegexCharacter(final_matches)) {
|
final_matches = groupIfRequired(final_matches) + "?";
|
||||||
// don't re-group a group
|
|
||||||
if (final_matches[0] !== "(" && final_matches[final_matches.length-1] !== ")") {
|
|
||||||
final_matches = "(?:" + final_matches + ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final_matches += "?";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return final_matches;
|
return final_matches;
|
||||||
@ -616,22 +612,22 @@ export class RepeatStatementCST extends StatementCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors: ISemanticError[] = [];
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
if (this.count !== null) {
|
if (this.count !== null) {
|
||||||
append(errors, this.count.validate(language));
|
append(errors, this.count.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
append(errors, statement.validate(language));
|
append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
public toRegex(language: RegexDialect): string {
|
public toRegex(language: RegexDialect): string {
|
||||||
let str = "(?:" + this.statements.map((x) => x.toRegex(language)).join("") + ")";
|
let str = groupIfRequired(this.statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
if (this.count) {
|
if (this.count) {
|
||||||
str += this.count.toRegex(language);
|
str += this.count.toRegex(language);
|
||||||
@ -659,7 +655,7 @@ export class RepeatStatementCST extends StatementCST {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
export class GroupStatementCST extends StatementCST {
|
export class GroupStatementCST extends StatementCST {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for GroupStatementCST
|
* Constructor for GroupStatementCST
|
||||||
*
|
*
|
||||||
@ -673,16 +669,21 @@ export class GroupStatementCST extends StatementCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors : ISemanticError[] = [];
|
const errors : ISemanticError[] = [];
|
||||||
|
|
||||||
// All languages currently support named groups
|
if (this.name !== null) {
|
||||||
//if (false) {
|
if (context.hasGroup(this.name)) {
|
||||||
// errors.push(this.error("This language does not support named groups"));
|
const past_group = context.groups[this.name];
|
||||||
//}
|
errors.push(this.error(`Group with name "${this.name}" was already defined here: ${past_group.startLine}:${past_group.startLine}-${past_group.startLine}:${past_group.startLine+past_group.length}`));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
context.addGroup(this.name, this.tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
append(errors, statement.validate(language));
|
append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors;
|
return errors;
|
||||||
@ -711,6 +712,195 @@ export class GroupStatementCST extends StatementCST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for a Backreference statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export class BackrefStatementCST extends StatementCST {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for BackrefStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param optional is this backref optional
|
||||||
|
* @param count optional number of times to repeat
|
||||||
|
* @param name the group name to call
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], private optional: boolean, private count: CountSubStatementCST | null, private name: string) {
|
||||||
|
super(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
|
if (!context.hasGroup(this.name)) {
|
||||||
|
errors.push(this.error(`Cannot call group with name "${this.name}" as it was never previously defined`));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.count !== null) {
|
||||||
|
append(errors, this.count.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
public toRegex(language: RegexDialect): string {
|
||||||
|
let str = "";
|
||||||
|
|
||||||
|
switch (language) {
|
||||||
|
case RegexDialect.Python:
|
||||||
|
str = `(?P=${this.name})`;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case RegexDialect.DotNet:
|
||||||
|
case RegexDialect.Java:
|
||||||
|
str = `\\k<${this.name}>`;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
str = `\\g<${this.name}>`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.count) {
|
||||||
|
str += this.count.toRegex(language);
|
||||||
|
|
||||||
|
// group for optionality because count would be incorrect otherwise
|
||||||
|
if (this.optional) {
|
||||||
|
str = "(?:" + str + ")?";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (this.optional) {
|
||||||
|
str = "?";
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If Pattern statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export class IfPatternStatementCST extends StatementCST {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for IfPatternStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param matches list of matches to test against
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], private matches: MatchStatementValue[], private true_statements: StatementCST[], private false_statements: StatementCST[]) {
|
||||||
|
super(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
|
if (language === RegexDialect.Java || language === RegexDialect.JS) {
|
||||||
|
errors.push(this.error("This language does not support conditionals"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (language === RegexDialect.Python) {
|
||||||
|
errors.push(this.error("This language does not support pattern conditionals"));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const match of this.matches) {
|
||||||
|
append(errors, match.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const statement of this.true_statements) {
|
||||||
|
append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const statement of this.false_statements) {
|
||||||
|
append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
public toRegex(language: RegexDialect): string {
|
||||||
|
const if_stmt = this.matches.map((x) => x.toRegex(language)).join("");
|
||||||
|
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
|
if (this.false_statements.length > 0) {
|
||||||
|
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
|
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return `(?(${if_stmt})${true_stmt})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Concrete Syntax Tree for an If group Ident statement
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export class IfIdentStatementCST extends StatementCST {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for IfIdentStatementCST
|
||||||
|
*
|
||||||
|
* @param tokens Tokens used to calculate where an error occured
|
||||||
|
* @param identifier the group identifier to check
|
||||||
|
* @param true_statements true path
|
||||||
|
* @param false_statements false path
|
||||||
|
*/
|
||||||
|
constructor(tokens: IToken[], private identifier: string, private true_statements: StatementCST[], private false_statements: StatementCST[]) {
|
||||||
|
super(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
|
const errors: ISemanticError[] = [];
|
||||||
|
|
||||||
|
if (language === RegexDialect.Java || language === RegexDialect.JS) {
|
||||||
|
errors.push(this.error("This language does not support conditionals"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!context.hasGroup(this.identifier)) {
|
||||||
|
errors.push(this.error(`Group with name "${this.identifier}" does not exist`));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const statement of this.true_statements) {
|
||||||
|
append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const statement of this.false_statements) {
|
||||||
|
append(errors, statement.validate(language, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
public toRegex(language: RegexDialect): string {
|
||||||
|
let if_stmt = this.identifier;
|
||||||
|
|
||||||
|
// be more clear with languages that support it
|
||||||
|
if (language === RegexDialect.Boost) {
|
||||||
|
if_stmt = "<" + if_stmt + ">";
|
||||||
|
}
|
||||||
|
|
||||||
|
const true_stmt = groupIfRequired(this.true_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
|
if (this.false_statements.length > 0) {
|
||||||
|
const false_stmt = groupIfRequired(this.false_statements.map((x) => x.toRegex(language)).join(""));
|
||||||
|
|
||||||
|
return `(?(${if_stmt})${true_stmt}|${false_stmt})`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return `(?(${if_stmt})${true_stmt})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Concrete Syntax Tree for a regular expression
|
* Concrete Syntax Tree for a regular expression
|
||||||
*
|
*
|
||||||
@ -730,115 +920,20 @@ export class RegularExpressionCST extends H2RCST {
|
|||||||
super(tokens);
|
super(tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public validate(language: RegexDialect): ISemanticError[] {
|
public validate(language: RegexDialect, context: GeneratorContext): ISemanticError[] {
|
||||||
const errors: ISemanticError[] = this.usings.validate(language);
|
const errors: ISemanticError[] = this.usings.validate(language, context);
|
||||||
|
|
||||||
for (const statement of this.statements) {
|
for (const statement of this.statements) {
|
||||||
append(errors, statement.validate(language));
|
append(errors, statement.validate(language, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
public toRegex(language: RegexDialect): string {
|
public toRegex(language: RegexDialect): string {
|
||||||
const modifiers = this.usings.toRegex(language);
|
const modifiers = this.usings.toRegex(language);
|
||||||
const regex = this.statements.map((x) => x.toRegex(language)).join("");
|
const regex = this.statements.map((x) => x.toRegex(language)).join("");
|
||||||
|
|
||||||
return modifiers.replace("{regex}", regex);
|
return modifiers.replace("{regex}", regex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array of matches
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
export function minimizeMatchString(arr: string[]): string {
|
|
||||||
return minMatchString(arr, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Minimizes the match string by finding duplicates or substrings in the array
|
|
||||||
*
|
|
||||||
* @param arr the array
|
|
||||||
* @param depth must be 0 for initial call
|
|
||||||
* @internal
|
|
||||||
*/
|
|
||||||
function minMatchString(arr: string[], depth: number = 0): string {
|
|
||||||
// base case: arr is empty
|
|
||||||
if (arr.length === 0) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
|
||||||
if (arr.length === 1) {
|
|
||||||
return first(arr) + "?";
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove duplicates
|
|
||||||
arr = [ ...new Set(arr) ];
|
|
||||||
|
|
||||||
// base case: arr has 1 element (after duplicate removal means this is required)
|
|
||||||
if (arr.length === 1) {
|
|
||||||
return first(arr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// base case: arr is all single letters
|
|
||||||
if (arr.every(isSingleRegexCharacter)) {
|
|
||||||
return "[" + arr.join("") + "]";
|
|
||||||
}
|
|
||||||
|
|
||||||
// now the real magic begins
|
|
||||||
// You are not expected to understand this
|
|
||||||
|
|
||||||
let longest_begin_substring = first(arr);
|
|
||||||
let longest_end_substring = first(arr);
|
|
||||||
|
|
||||||
for (let i = 1; i < arr.length; i++) {
|
|
||||||
// reduce longest_substring to match everything
|
|
||||||
for (let j = 0; j < longest_begin_substring.length; j++) {
|
|
||||||
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
|
||||||
longest_begin_substring = longest_begin_substring.substr(0, j);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (let j = 0; j < longest_end_substring.length; j++) {
|
|
||||||
if (arr[i].length-j < 0 || longest_end_substring[longest_end_substring.length-j-1] !== arr[i][arr[i].length-j-1]) {
|
|
||||||
longest_end_substring = longest_end_substring.substr(longest_end_substring.length-j, longest_end_substring.length);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// No matches whatsoever
|
|
||||||
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
|
||||||
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
|
||||||
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
|
||||||
if (depth > 0) {
|
|
||||||
return "(?:" + arr.join("|") + ")";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return arr.join("|");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// we have some matches
|
|
||||||
else {
|
|
||||||
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
|
||||||
const begin_pos = longest_begin_substring.length;
|
|
||||||
const end_pos = longest_end_substring.length;
|
|
||||||
|
|
||||||
const similar_matches: string[] = [];
|
|
||||||
for (const ele of arr) {
|
|
||||||
const match = ele.substring(begin_pos, ele.length-end_pos);
|
|
||||||
if (match.length !== 0) {
|
|
||||||
similar_matches.push(match);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
|
||||||
}
|
|
||||||
}
|
|
224
src/generator_helper.ts
Normal file
224
src/generator_helper.ts
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Includes helper functions for the Generator
|
||||||
|
* @packageDocumentation
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { first, isSingleRegexCharacter } from "./utilities";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array of matches
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export function minimizeMatchString(arr: string[]): string {
|
||||||
|
// don't process an array of length 1, otherwise you'll get the wrong result
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return first(arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return minMatchString(arr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimizes the match string by finding duplicates or substrings in the array
|
||||||
|
*
|
||||||
|
* @param arr the array
|
||||||
|
* @param depth must be 0 for initial call
|
||||||
|
* @returns an optimized string
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
function minMatchString(arr: string[], depth: number = 0): string {
|
||||||
|
// base case: arr is empty
|
||||||
|
if (arr.length === 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
// base case: arr has 1 element (must have at least 2, so this means this value is optional)
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return first(arr) + "?";
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove duplicates
|
||||||
|
arr = [ ...new Set(arr) ];
|
||||||
|
|
||||||
|
// base case: arr has 1 element (after duplicate removal means this is required)
|
||||||
|
if (arr.length === 1) {
|
||||||
|
return first(arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// base case: arr is all single letters
|
||||||
|
if (arr.every(isSingleRegexCharacter)) {
|
||||||
|
return "[" + arr.join("") + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
// now the real magic begins
|
||||||
|
// You are not expected to understand this
|
||||||
|
|
||||||
|
let longest_begin_substring = first(arr);
|
||||||
|
let longest_end_substring = first(arr);
|
||||||
|
|
||||||
|
for (let i = 1; i < arr.length; i++) {
|
||||||
|
// reduce longest_substring to match everything
|
||||||
|
for (let j = 0; j < longest_begin_substring.length; j++) {
|
||||||
|
if (arr[i].length < j || longest_begin_substring[j] !== arr[i][j]) {
|
||||||
|
longest_begin_substring = longest_begin_substring.substr(0, j);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (let j = 0; j < longest_end_substring.length; j++) {
|
||||||
|
if (arr[i].length-j < 0 || longest_end_substring[longest_end_substring.length-j-1] !== arr[i][arr[i].length-j-1]) {
|
||||||
|
longest_end_substring = longest_end_substring.substr(longest_end_substring.length-j, longest_end_substring.length);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No matches whatsoever
|
||||||
|
// *technically* we can optimize further, but that is a VERY non-trivial problem
|
||||||
|
// For example optimizing: [ "a1x1z", "a2y2z", "a3z3z" ] to: "a[123][xyz][123]z"
|
||||||
|
if (longest_begin_substring.length === 0 && longest_end_substring.length === 0) {
|
||||||
|
if (depth > 0) {
|
||||||
|
return "(?:" + arr.join("|") + ")";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return arr.join("|");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we have some matches
|
||||||
|
else {
|
||||||
|
// remove begin (if exists) and end (if exists) from each element and remove empty strings
|
||||||
|
const begin_pos = longest_begin_substring.length;
|
||||||
|
const end_pos = longest_end_substring.length;
|
||||||
|
|
||||||
|
const similar_matches: string[] = [];
|
||||||
|
for (const ele of arr) {
|
||||||
|
const match = ele.substring(begin_pos, ele.length-end_pos);
|
||||||
|
if (match.length !== 0) {
|
||||||
|
similar_matches.push(match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return longest_begin_substring + minMatchString(similar_matches, depth + 1) + longest_end_substring;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Groups a regex fragment if it needs to be grouped
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression to potentially group
|
||||||
|
* @returns a non-capturing group if there needs to be one
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export function groupIfRequired(fragment: string): string {
|
||||||
|
if (isSingleRegexCharacter(fragment)) {
|
||||||
|
return fragment;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fragment[0] === "(" && fragment[fragment.length-1] === ")") {
|
||||||
|
let bracket_count = 0;
|
||||||
|
|
||||||
|
for (let i = 1; i < fragment.length-2; i++) {
|
||||||
|
if (fragment[i] === "\\") {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if (fragment[i] === "(") {
|
||||||
|
bracket_count++;
|
||||||
|
}
|
||||||
|
else if (fragment[i] === ")") {
|
||||||
|
bracket_count--;
|
||||||
|
|
||||||
|
if (bracket_count === -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
else if (fragment[0] === "[" && fragment[fragment.length-1] === "]") {
|
||||||
|
let bracket_count = 0;
|
||||||
|
|
||||||
|
for (let i = 1; i < fragment.length-2; i++) {
|
||||||
|
if (fragment[i] === "\\") {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
//you'll never have a raw [ inside a []
|
||||||
|
//else if (fragment[i] === "[") {
|
||||||
|
// bracket_count++;
|
||||||
|
//}
|
||||||
|
else if (fragment[i] === "]") {
|
||||||
|
bracket_count--;
|
||||||
|
|
||||||
|
if (bracket_count === -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bracket_count === 0 ? fragment : "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "(?:" + fragment + ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks to see if fragment has a + or * at the end and has a repetition statement
|
||||||
|
*
|
||||||
|
* @param fragment fragment of regular expression
|
||||||
|
* @param repetition repetition that may clobber the fragment
|
||||||
|
*/
|
||||||
|
export function dontClobberRepetition(fragment: string, repetition: string): string {
|
||||||
|
// + can be ignored as well as a count as long as that count is > 0
|
||||||
|
|
||||||
|
if (fragment.endsWith("+")) {
|
||||||
|
switch (repetition) {
|
||||||
|
case "*":
|
||||||
|
// ignore: + is greater than *
|
||||||
|
break;
|
||||||
|
case "?":
|
||||||
|
// non-greedy qualifier
|
||||||
|
fragment += repetition;
|
||||||
|
break;
|
||||||
|
case "+":
|
||||||
|
// ignore: already +
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (repetition.startsWith("{0")) {
|
||||||
|
fragment = "(?:" + fragment + ")" + repetition;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// remove + and replace with count
|
||||||
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (fragment.endsWith("*")) {
|
||||||
|
switch (repetition) {
|
||||||
|
case "*":
|
||||||
|
// ignore: already +
|
||||||
|
break;
|
||||||
|
case "?":
|
||||||
|
// non-greedy qualifier
|
||||||
|
fragment += repetition;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// remove * and replace with count
|
||||||
|
fragment = fragment.substring(0, fragment.length - 1) + repetition;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fragment += repetition;
|
||||||
|
}
|
||||||
|
|
||||||
|
return fragment;
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Includes all packages
|
* Includes all packages
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Lexer for Human2Regex
|
* The Lexer for Human2Regex
|
||||||
|
103
src/parser.ts
103
src/parser.ts
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The parser for Human2Regex
|
* The parser for Human2Regex
|
||||||
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
|
import { EmbeddedActionsParser, IOrAlt, IToken } from "chevrotain";
|
||||||
import * as T from "./tokens";
|
import * as T from "./tokens";
|
||||||
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST, RegexDialect } from "./generator";
|
import { CountSubStatementCST, UsingFlags, MatchSubStatementType, MatchSubStatementValue, MatchSubStatementCST, UsingStatementCST, RegularExpressionCST, StatementCST, RepeatStatementCST, MatchStatementValue, MatchStatementCST, GroupStatementCST, RegexDialect, BackrefStatementCST, GeneratorContext, IfPatternStatementCST, IfIdentStatementCST } from "./generator";
|
||||||
import { first, usefulConditional, unusedParameter, CommonError } from "./utilities";
|
import { first, usefulConditional, unusedParameter, CommonError } from "./utilities";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -60,7 +60,7 @@ export class ParseResult {
|
|||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
public validate(language: RegexDialect): CommonError[] {
|
public validate(language: RegexDialect): CommonError[] {
|
||||||
return this.regexp_cst.validate(language).map(CommonError.fromSemanticError);
|
return this.regexp_cst.validate(language, new GeneratorContext()).map(CommonError.fromSemanticError);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -558,12 +558,107 @@ export class Human2RegexParser extends EmbeddedActionsParser {
|
|||||||
return new RepeatStatementCST(tokens, optional, count, statements);
|
return new RepeatStatementCST(tokens, optional, count, statements);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const BackrefStatement = $.RULE("BackrefStatement", () => {
|
||||||
|
const tokens: IToken[] = [];
|
||||||
|
let optional = false;
|
||||||
|
let count: CountSubStatementCST | null = null;
|
||||||
|
|
||||||
|
$.OPTION5(() => {
|
||||||
|
tokens.push($.CONSUME(T.Optional));
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
tokens.push($.CONSUME(T.Rerun));
|
||||||
|
|
||||||
|
$.OPTION6(() => count = $.SUBRULE(CountSubStatement));
|
||||||
|
|
||||||
|
$.OPTION7(() => {
|
||||||
|
$.OPTION(() => $.CONSUME(T.The));
|
||||||
|
$.CONSUME(T.Group);
|
||||||
|
$.OPTION2(() => $.CONSUME(T.Called));
|
||||||
|
});
|
||||||
|
|
||||||
|
const name = $.CONSUME(T.Identifier).image;
|
||||||
|
|
||||||
|
tokens.push($.CONSUME4(T.EndOfLine));
|
||||||
|
|
||||||
|
return new BackrefStatementCST(tokens, optional, count, name);
|
||||||
|
});
|
||||||
|
|
||||||
|
const IfStatement = $.RULE("IfStatement", () => {
|
||||||
|
const tokens: IToken[] = [];
|
||||||
|
const msv: MatchStatementValue[] = [];
|
||||||
|
let optional = false;
|
||||||
|
const true_statements: StatementCST[] = [];
|
||||||
|
const false_statements: StatementCST[] = [];
|
||||||
|
let name: string = "";
|
||||||
|
|
||||||
|
tokens.push($.CONSUME(T.If));
|
||||||
|
|
||||||
|
$.OR2([
|
||||||
|
{ALT: () => {
|
||||||
|
name = $.CONSUME(T.Identifier).image;
|
||||||
|
}},
|
||||||
|
{ALT: () => {
|
||||||
|
$.CONSUME(T.Match);
|
||||||
|
|
||||||
|
$.OPTION4(() => {
|
||||||
|
$.CONSUME3(T.Optional);
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
msv.push(new MatchStatementValue(optional, $.SUBRULE(MatchSubStatement)));
|
||||||
|
$.MANY(() => {
|
||||||
|
$.OR([
|
||||||
|
{ ALT: () => {
|
||||||
|
$.OPTION2(() => $.CONSUME2(T.And));
|
||||||
|
$.CONSUME(T.Then);
|
||||||
|
}},
|
||||||
|
{ ALT: () => $.CONSUME(T.And) },
|
||||||
|
]);
|
||||||
|
optional = false;
|
||||||
|
$.OPTION3(() => {
|
||||||
|
$.CONSUME2(T.Optional);
|
||||||
|
optional = true;
|
||||||
|
});
|
||||||
|
msv.push(new MatchStatementValue(optional, $.SUBRULE2(MatchSubStatement)));
|
||||||
|
});
|
||||||
|
}}
|
||||||
|
]);
|
||||||
|
|
||||||
|
tokens.push($.CONSUME3(T.EndOfLine));
|
||||||
|
|
||||||
|
$.CONSUME2(T.Indent);
|
||||||
|
$.AT_LEAST_ONE2(() => {
|
||||||
|
true_statements.push($.SUBRULE(Statement));
|
||||||
|
});
|
||||||
|
$.CONSUME2(T.Outdent);
|
||||||
|
|
||||||
|
$.OPTION(() => {
|
||||||
|
$.CONSUME(T.Else);
|
||||||
|
$.CONSUME4(T.EndOfLine);
|
||||||
|
$.CONSUME3(T.Indent);
|
||||||
|
$.AT_LEAST_ONE3(() => {
|
||||||
|
false_statements.push($.SUBRULE2(Statement));
|
||||||
|
});
|
||||||
|
$.CONSUME3(T.Outdent);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (name === "") {
|
||||||
|
return new IfPatternStatementCST(tokens, msv, true_statements, false_statements);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return new IfIdentStatementCST(tokens, name, true_statements, false_statements);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// statement super class
|
// statement super class
|
||||||
const Statement = $.RULE("Statement", () => {
|
const Statement = $.RULE("Statement", () => {
|
||||||
return $.OR([
|
return $.OR([
|
||||||
{ ALT: () => $.SUBRULE(MatchStatement) },
|
{ ALT: () => $.SUBRULE(MatchStatement) },
|
||||||
{ ALT: () => $.SUBRULE(GroupStatement) },
|
{ ALT: () => $.SUBRULE(GroupStatement) },
|
||||||
{ ALT: () => $.SUBRULE(RepeatStatement) }
|
{ ALT: () => $.SUBRULE(RepeatStatement) },
|
||||||
|
{ ALT: () => $.SUBRULE(BackrefStatement) },
|
||||||
|
{ ALT: () => $.SUBRULE(IfStatement) }
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
|
import { Human2RegexLexer, Human2RegexLexerOptions } from "./lexer";
|
||||||
@ -10,10 +10,10 @@ import "codemirror/addon/mode/simple";
|
|||||||
import "codemirror/addon/runmode/runmode";
|
import "codemirror/addon/runmode/runmode";
|
||||||
import "codemirror/addon/lint/lint";
|
import "codemirror/addon/lint/lint";
|
||||||
|
|
||||||
import "./docs/bootstrap.css";
|
import "./docs/assets/bootstrap.css";
|
||||||
import "./docs/cleanblog.css";
|
import "./docs/assets/cleanblog.css";
|
||||||
import "./docs/codemirror.css";
|
import "./docs/assets/codemirror.css";
|
||||||
import "./docs/style.css";
|
import "./docs/assets/style.css";
|
||||||
|
|
||||||
interface CodeMirror {
|
interface CodeMirror {
|
||||||
defineSimpleMode: (name: string, value: Record<string, unknown>) => void;
|
defineSimpleMode: (name: string, value: Record<string, unknown>) => void;
|
||||||
@ -81,6 +81,10 @@ document.addEventListener("DOMContentLoaded", function() {
|
|||||||
{token: "builtin", regex: /case insensitive/i},
|
{token: "builtin", regex: /case insensitive/i},
|
||||||
{token: "builtin", regex: /case sensitive/i},
|
{token: "builtin", regex: /case sensitive/i},
|
||||||
{token: "operator", regex: /\+|or more/i},
|
{token: "operator", regex: /\+|or more/i},
|
||||||
|
{token: "keyword", regex: /re( |-)?(run|capture)/i },
|
||||||
|
{token: "operator", regex: /the/i },
|
||||||
|
{token: "keyword", regex: /if/i },
|
||||||
|
{token: "keyword", regex: /else|otherwise/i },
|
||||||
{token: "variable", regex: /[a-z]\w*/i},
|
{token: "variable", regex: /[a-z]\w*/i},
|
||||||
{token: "number", regex: /\d+/},
|
{token: "number", regex: /\d+/},
|
||||||
{token: "string", regex: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i},
|
{token: "string", regex: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i},
|
||||||
@ -200,8 +204,6 @@ document.addEventListener("DOMContentLoaded", function() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const editor = code_mirror.fromTextArea($human, {
|
const editor = code_mirror.fromTextArea($human, {
|
||||||
mode: "human2regex",
|
mode: "human2regex",
|
||||||
lineNumbers: false,
|
lineNumbers: false,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The tokens required for Human2Regex
|
* The tokens required for Human2Regex
|
||||||
@ -53,34 +53,17 @@ import { createToken, Lexer } from "chevrotain";
|
|||||||
/** @internal */ export const From = createToken({name: "From", pattern: /from/i});
|
/** @internal */ export const From = createToken({name: "From", pattern: /from/i});
|
||||||
/** @internal */ export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i});
|
/** @internal */ export const To = createToken({name: "To", pattern: /(to|through|thru|\-|\.\.\.?)/i});
|
||||||
/** @internal */ export const Create = createToken({name: "Create", pattern: /create(s)?/i});
|
/** @internal */ export const Create = createToken({name: "Create", pattern: /create(s)?/i});
|
||||||
/** @internal */ export const Called = createToken({name: "Called", pattern: /name(d)?|call(ed)?/i});
|
/** @internal */ export const Called = createToken({name: "Called", pattern: /named|called/i});
|
||||||
/** @internal */ export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
|
/** @internal */ export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
|
||||||
/** @internal */ export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
|
/** @internal */ export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
|
||||||
/** @internal */ export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
|
/** @internal */ export const CarriageReturn = createToken({name: "CarriageReturn", pattern: /carriage return/i});
|
||||||
/** @internal */ export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
|
/** @internal */ export const CaseInsensitive = createToken({name: "CaseInsensitive", pattern: /case insensitive/i});
|
||||||
/** @internal */ export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
|
/** @internal */ export const CaseSensitive = createToken({name: "CaseSensitive", pattern: /case sensitive/i});
|
||||||
/** @internal */ export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i});
|
/** @internal */ export const OrMore = createToken({name: "OrMore", pattern: /\+|or more/i});
|
||||||
|
/** @internal */ export const Rerun = createToken({name: "Rerun", pattern: /re( |-)?(run|capture)/i });
|
||||||
/*
|
/** @internal */ export const The = createToken({name: "The", pattern: /the/i });
|
||||||
//Not being used currently
|
/** @internal */ export const If = createToken({name: "If", pattern: /if/i });
|
||||||
export const Of = createToken({name: "Of", pattern: /of/i});
|
/** @internal */ export const Else = createToken({name: "Else", pattern: /else|otherwise/i });
|
||||||
export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
|
|
||||||
export const As = createToken({name: "As", pattern: /as/i});
|
|
||||||
export const If = createToken({name: "If", pattern: /if/i});
|
|
||||||
export const Start = createToken({name: "Start", pattern: /start(s) with?/i});
|
|
||||||
export const Ends = createToken({name: "Ends", pattern: /end(s)? with/i});
|
|
||||||
export const Else = createToken({name: "Else", pattern: /(other wise|otherwise|else)/i});
|
|
||||||
export const Unless = createToken({name: "Unless", pattern: /unless/i});
|
|
||||||
export const While = createToken({name: "While", pattern: /while/i});
|
|
||||||
export const More = createToken({name: "More", pattern: /more/i});
|
|
||||||
export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
|
|
||||||
export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
|
|
||||||
export const None = createToken({name: "None", pattern: /none/i});
|
|
||||||
export const Neither = createToken({name: "Neither", pattern: /neither/i});
|
|
||||||
export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
|
|
||||||
export const By = createToken({name: "By", pattern: /by/i});
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/** @internal */ export const EndOfLine = createToken({name: "EOL", pattern: /\n/});
|
/** @internal */ export const EndOfLine = createToken({name: "EOL", pattern: /\n/});
|
||||||
/** @internal */ export const WS = createToken({name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [ " ", "\r" ], group: Lexer.SKIPPED});
|
/** @internal */ export const WS = createToken({name: "Whitespace", pattern: /[^\S\n]+/, start_chars_hint: [ " ", "\r" ], group: Lexer.SKIPPED});
|
||||||
@ -127,22 +110,11 @@ export const AllTokens = [
|
|||||||
Whitespace,
|
Whitespace,
|
||||||
Number,
|
Number,
|
||||||
Unicode,
|
Unicode,
|
||||||
/*
|
Called,
|
||||||
Of,
|
Rerun,
|
||||||
As,
|
|
||||||
If,
|
If,
|
||||||
Start,
|
|
||||||
Ends,
|
|
||||||
Else,
|
Else,
|
||||||
Unless,
|
|
||||||
While,
|
|
||||||
More,
|
|
||||||
Nothing,
|
|
||||||
By,
|
|
||||||
The,
|
The,
|
||||||
None,
|
|
||||||
Neither,
|
|
||||||
*/
|
|
||||||
Using,
|
Using,
|
||||||
Global,
|
Global,
|
||||||
Multiline,
|
Multiline,
|
||||||
@ -158,7 +130,6 @@ export const AllTokens = [
|
|||||||
Exclusive,
|
Exclusive,
|
||||||
From,
|
From,
|
||||||
Create,
|
Create,
|
||||||
Called,
|
|
||||||
Repeat,
|
Repeat,
|
||||||
Newline,
|
Newline,
|
||||||
CarriageReturn,
|
CarriageReturn,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Some utility functions for Human2Regex
|
* Some utility functions for Human2Regex
|
||||||
@ -186,6 +186,7 @@ export class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The lexing error
|
* @param error The lexing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
public static fromLexError(error: ILexingError): CommonError {
|
public static fromLexError(error: ILexingError): CommonError {
|
||||||
// not really fond of --> and <--
|
// not really fond of --> and <--
|
||||||
@ -199,6 +200,7 @@ export class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The parsing error
|
* @param error The parsing error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
public static fromParseError(error: IRecognitionException): CommonError {
|
public static fromParseError(error: IRecognitionException): CommonError {
|
||||||
// not really fond of --> and <--
|
// not really fond of --> and <--
|
||||||
@ -212,6 +214,7 @@ export class CommonError {
|
|||||||
*
|
*
|
||||||
* @param error The semantic error
|
* @param error The semantic error
|
||||||
* @returns a new CommonError
|
* @returns a new CommonError
|
||||||
|
* @internal
|
||||||
*/
|
*/
|
||||||
public static fromSemanticError(error: ISemanticError): CommonError {
|
public static fromSemanticError(error: ISemanticError): CommonError {
|
||||||
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
return new CommonError("Semantic Error", error.startLine, error.startColumn, error.length, error.message);
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
import { Human2RegexParser, Human2RegexParserOptions, ParseResult,
|
import { Human2RegexParser, Human2RegexParserOptions, ParseResult,
|
||||||
Human2RegexLexer, Human2RegexLexerOptions, TokenizeResult,
|
Human2RegexLexer, Human2RegexLexerOptions, TokenizeResult,
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
|
import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
|
||||||
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
|
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
|
||||||
import { RegexDialect, minimizeMatchString } from "../src/generator";
|
import { RegexDialect } from "../src/generator";
|
||||||
|
|
||||||
|
|
||||||
describe("Generator functionality", function() {
|
describe("Generator functionality", function() {
|
||||||
@ -67,6 +67,14 @@ describe("Generator functionality", function() {
|
|||||||
const toks5 = lexer.tokenize('match between 2 and 2 exclusive "hello"').tokens;
|
const toks5 = lexer.tokenize('match between 2 and 2 exclusive "hello"').tokens;
|
||||||
const reg5 = parser.parse(toks5);
|
const reg5 = parser.parse(toks5);
|
||||||
expect(reg5.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
expect(reg5.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const toks6 = lexer.tokenize('create a group called thing\n\tmatch "hi"\ncreate a group called thing\n\tmatch "hi"\n').tokens;
|
||||||
|
const reg6 = parser.parse(toks6);
|
||||||
|
expect(reg6.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const toks7 = lexer.tokenize("rerun thing").tokens;
|
||||||
|
const reg7 = parser.parse(toks7);
|
||||||
|
expect(reg7.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("handles ranges", function() {
|
it("handles ranges", function() {
|
||||||
@ -97,6 +105,12 @@ describe("Generator functionality", function() {
|
|||||||
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg2.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg2.toRegex(RegexDialect.JS)).toBe("/[a-zA-Z][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
|
expect(reg2.toRegex(RegexDialect.JS)).toBe("/[a-zA-Z][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
|
||||||
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/[[:alpha:]][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
|
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/[[:alpha:]][+-]?\\d+[+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+))/");
|
||||||
|
|
||||||
|
const toks3 = lexer.tokenize("match not letter, not integer, not decimal").tokens;
|
||||||
|
const reg3 = parser.parse(toks3);
|
||||||
|
expect(reg3.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg3.toRegex(RegexDialect.JS)).toBe("/[^a-zA-Z](?![+-]?\\d+)(?![+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+)))/");
|
||||||
|
expect(reg3.toRegex(RegexDialect.PCRE)).toBe("/[^[:alpha:]](?![+-]?\\d+)(?![+-]?(?:(?:\\d+[,.]?\\d*)|(?:[,.]\\d+)))/");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("doesn't clobber repetition", function() {
|
it("doesn't clobber repetition", function() {
|
||||||
@ -115,23 +129,6 @@ describe("Generator functionality", function() {
|
|||||||
expect(reg1.toRegex(RegexDialect.JS)).toBe("/(?!hello){1,6}/");
|
expect(reg1.toRegex(RegexDialect.JS)).toBe("/(?!hello){1,6}/");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("can minimize matches", function() {
|
|
||||||
const test_cases = [
|
|
||||||
{ from: [ "abc", "abc" ], to: "abc" },
|
|
||||||
{ from: [ "a", "ab" ], to: "ab?" },
|
|
||||||
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
|
|
||||||
{ from: [ "ab", "cd" ], to: "ab|cd" },
|
|
||||||
{ from: [ "abc", "bc" ], to: "a?bc" },
|
|
||||||
{ from: [ "abc", "xb" ], to: "abc|xb" }
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const c of test_cases) {
|
|
||||||
const got = minimizeMatchString(c.from);
|
|
||||||
|
|
||||||
expect(got).toBe(c.to);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it("optimizes correctly", function() {
|
it("optimizes correctly", function() {
|
||||||
const toks0 = lexer.tokenize('match "a" or "b" or "b"').tokens;
|
const toks0 = lexer.tokenize('match "a" or "b" or "b"').tokens;
|
||||||
const reg0 = parser.parse(toks0);
|
const reg0 = parser.parse(toks0);
|
||||||
@ -157,6 +154,44 @@ describe("Generator functionality", function() {
|
|||||||
const reg4 = parser.parse(toks4);
|
const reg4 = parser.parse(toks4);
|
||||||
expect(reg4.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg4.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg4.toRegex(RegexDialect.JS)).toBe("/a(?:1x1|2x2|3x3)z/");
|
expect(reg4.toRegex(RegexDialect.JS)).toBe("/a(?:1x1|2x2|3x3)z/");
|
||||||
|
|
||||||
|
const toks5 = lexer.tokenize('match "a", maybe "b" or "c"').tokens;
|
||||||
|
const reg5 = parser.parse(toks5);
|
||||||
|
expect(reg5.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
expect(reg5.toRegex(RegexDialect.JS)).toBe("/a[bc]?/");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("can generate backreferences", function() {
|
||||||
|
const toks0 = lexer.tokenize('create a group called thing\n\tmatch "Hello World"\nrerun thing\noptionally recapture 3 times the group called thing').tokens;
|
||||||
|
const reg0 = parser.parse(toks0);
|
||||||
|
expect(reg0.validate(RegexDialect.JS).length).toBe(0);
|
||||||
|
|
||||||
|
expect(reg0.toRegex(RegexDialect.JS)).toBe("/(?<thing>Hello World)\\g<thing>(?:\\g<thing>{3})?/");
|
||||||
|
expect(reg0.toRegex(RegexDialect.PCRE)).toBe("/(?P<thing>Hello World)\\g<thing>(?:\\g<thing>{3})?/");
|
||||||
|
expect(reg0.toRegex(RegexDialect.Python)).toBe("/(?P<thing>Hello World)(?P=thing)(?:(?P=thing){3})?/");
|
||||||
|
expect(reg0.toRegex(RegexDialect.DotNet)).toBe("/(?<thing>Hello World)\\k<thing>(?:\\k<thing>{3})?/");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("can generate if statements", function() {
|
||||||
|
const toks0 = lexer.tokenize('if matches "a"\n\tmatch "b"\n').tokens;
|
||||||
|
const reg0 = parser.parse(toks0);
|
||||||
|
expect(reg0.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
expect(reg0.validate(RegexDialect.PCRE).length).toBe(0);
|
||||||
|
expect(reg0.toRegex(RegexDialect.PCRE)).toBe("/(?(a)b)/");
|
||||||
|
|
||||||
|
const toks1 = lexer.tokenize('if matches "alpha", maybe "b" or "f"\n\tmatch "c"\nelse\n\tif matches "d"\n\t\tmatch "e"\n\telse\n\t\tmatch "f"').tokens;
|
||||||
|
const reg1 = parser.parse(toks1);
|
||||||
|
expect(reg1.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
expect(reg1.validate(RegexDialect.Python).length).toBeGreaterThan(0);
|
||||||
|
expect(reg1.validate(RegexDialect.PCRE).length).toBe(0);
|
||||||
|
expect(reg1.toRegex(RegexDialect.PCRE)).toBe("/(?(alpha[bf]?)c|(?(d)e|f))/");
|
||||||
|
|
||||||
|
const toks2 = lexer.tokenize('create a group called thing\n\tmatch "a"\nif thing\n\tmatch "b"\nelse\n\tmatch "c"\n').tokens;
|
||||||
|
const reg2 = parser.parse(toks2);
|
||||||
|
expect(reg2.validate(RegexDialect.JS).length).toBeGreaterThan(0);
|
||||||
|
expect(reg2.validate(RegexDialect.PCRE).length).toBe(0);
|
||||||
|
expect(reg2.toRegex(RegexDialect.PCRE)).toBe("/(?P<thing>a)(?(thing)b|c)/");
|
||||||
|
expect(reg2.toRegex(RegexDialect.Boost)).toBe("/(?<thing>a)(?(<thing>)b|c)/");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("generate dialect specific regex", function() {
|
it("generate dialect specific regex", function() {
|
||||||
@ -187,7 +222,7 @@ describe("Generator functionality", function() {
|
|||||||
|
|
||||||
it("runs complex scripts", function() {
|
it("runs complex scripts", function() {
|
||||||
const str = `
|
const str = `
|
||||||
using global and multiline and exact matching
|
using global and multiline and exact matching and case insensitive matching
|
||||||
create an optional group called protocol
|
create an optional group called protocol
|
||||||
match "http"
|
match "http"
|
||||||
optionally match "s"
|
optionally match "s"
|
||||||
@ -222,6 +257,6 @@ create an optional group
|
|||||||
const toks = lexer.tokenize(str).tokens;
|
const toks = lexer.tokenize(str).tokens;
|
||||||
const reg = parser.parse(toks);
|
const reg = parser.parse(toks);
|
||||||
expect(reg.validate(RegexDialect.JS).length).toBe(0);
|
expect(reg.validate(RegexDialect.JS).length).toBe(0);
|
||||||
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gm");
|
expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?<protocol>https?\\:\\/\\/)?(?<subdomain>(?:\\w+\\.)*)?(?<domain>(?:\\w+|_|\\-)+\\.\\w+)(?:\\:\\d*)?(?<path>(?:\\/(?:\\w+|_|\\-)*)*)?(\\?(?<query>(?:(?:\\w+|_|\\-)+=(?:\\w+|_|\\-)+)*))?(#.*)?$/gmi");
|
||||||
});
|
});
|
||||||
});
|
});
|
62
tests/generator_helper.spec.ts
Normal file
62
tests/generator_helper.spec.ts
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
|
import { minimizeMatchString, groupIfRequired, dontClobberRepetition } from "../src/generator_helper";
|
||||||
|
|
||||||
|
describe("Generator helper functionality", function() {
|
||||||
|
it("can minimize matches", function() {
|
||||||
|
const test_cases = [
|
||||||
|
{ from: [], to: "" },
|
||||||
|
{ from: [ "abc" ], to: "abc" },
|
||||||
|
{ from: [ "abc", "abc" ], to: "abc" },
|
||||||
|
{ from: [ "a", "ab" ], to: "ab?" },
|
||||||
|
{ from: [ "a1x1z", "a2y2z", "a3z3z" ], to: "a(?:1x1|2y2|3z3)z" },
|
||||||
|
{ from: [ "ab", "cd" ], to: "ab|cd" },
|
||||||
|
{ from: [ "abc", "bc" ], to: "a?bc" },
|
||||||
|
{ from: [ "abc", "xb" ], to: "abc|xb" }
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const c of test_cases) {
|
||||||
|
const got = minimizeMatchString(c.from);
|
||||||
|
|
||||||
|
expect(got).toBe(c.to);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("groups correctly", function() {
|
||||||
|
const test_cases = [
|
||||||
|
{ from: "(?P=test)", to: "(?P=test)" },
|
||||||
|
{ from: "[abc\\]]", to: "[abc\\]]" },
|
||||||
|
{ from: "abc", to: "(?:abc)" },
|
||||||
|
{ from: "(abc)|d", to: "(?:(abc)|d)" },
|
||||||
|
{ from: "[abc\\]][abc]", to: "(?:[abc\\]][abc])" },
|
||||||
|
{ from: "(abc(abc)\\))(abc)", to: "(?:(abc(abc)\\))(abc))" },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const c of test_cases) {
|
||||||
|
const got = groupIfRequired(c.from);
|
||||||
|
|
||||||
|
expect(got).toBe(c.to);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("doesn't clobber the repetition", function() {
|
||||||
|
const test_cases = [
|
||||||
|
{ fragment: "1+", repetition: "+", expected: "1+" },
|
||||||
|
{ fragment: "1*", repetition: "+", expected: "1+" },
|
||||||
|
{ fragment: "1+", repetition: "*", expected: "1+" },
|
||||||
|
{ fragment: "1*", repetition: "*", expected: "1*" },
|
||||||
|
{ fragment: "1+", repetition: "?", expected: "1+?" },
|
||||||
|
{ fragment: "1*", repetition: "?", expected: "1*?" },
|
||||||
|
{ fragment: "1+", repetition: "{0,}", expected: "(?:1+){0,}" },
|
||||||
|
{ fragment: "1*", repetition: "{0,}", expected: "1{0,}" },
|
||||||
|
{ fragment: "1+", repetition: "{1,2}", expected: "1{1,2}" },
|
||||||
|
{ fragment: "1*", repetition: "{1,2}", expected: "1{1,2}" },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const c of test_cases) {
|
||||||
|
const got = dontClobberRepetition(c.fragment, c.repetition);
|
||||||
|
|
||||||
|
expect(got).toBe(c.expected);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
import { Human2RegexLexer, Human2RegexLexerOptions, IndentType } from "../src/lexer";
|
import { Human2RegexLexer, Human2RegexLexerOptions, IndentType } from "../src/lexer";
|
||||||
import { Indent } from "../src/tokens";
|
import { Indent } from "../src/tokens";
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
|
import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser";
|
||||||
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
|
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
|
import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer";
|
||||||
import { Human2RegexParser, Human2RegexParserOptions, ParseResult } from "../src/parser";
|
import { Human2RegexParser, Human2RegexParserOptions, ParseResult } from "../src/parser";
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2021 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
import "../src/utilities";
|
import "../src/utilities";
|
||||||
import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append } from "../src/utilities";
|
import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError, append } from "../src/utilities";
|
||||||
|
@ -1,62 +1,62 @@
|
|||||||
|
/* eslint-disable func-style */
|
||||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||||
/* eslint-disable @typescript-eslint/naming-convention */
|
/* eslint-disable @typescript-eslint/naming-convention */
|
||||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||||
/* eslint-disable no-undef */
|
/* eslint-disable no-undef */
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
const { glob } = require("glob");
|
const { glob } = require("glob");
|
||||||
const { render } = require("mustache");
|
|
||||||
const { readFileSync, writeFileSync, existsSync, mkdirSync } = require("fs");
|
const { readFileSync, writeFileSync, existsSync, mkdirSync } = require("fs");
|
||||||
const { minify } = require("html-minifier");
|
const { minify } = require("html-minifier");
|
||||||
const CopyPlugin = require("copy-webpack-plugin");
|
const CopyPlugin = require("copy-webpack-plugin");
|
||||||
|
const Handlebars = require("handlebars");
|
||||||
const MiniCssExtractPlugin = require("mini-css-extract-plugin");
|
const MiniCssExtractPlugin = require("mini-css-extract-plugin");
|
||||||
const OptimizeCSSAssetsPlugin = require("optimize-css-assets-webpack-plugin");
|
const OptimizeCSSAssetsPlugin = require("optimize-css-assets-webpack-plugin");
|
||||||
const WebpackBeforeBuildPlugin = require("before-build-webpack");
|
const WebpackBeforeBuildPlugin = require("before-build-webpack");
|
||||||
const TerserPlugin = require("terser-webpack-plugin");
|
const TerserPlugin = require("terser-webpack-plugin");
|
||||||
const RemovePlugin = require('remove-files-webpack-plugin');
|
const RemovePlugin = require("remove-files-webpack-plugin");
|
||||||
|
const { CleanWebpackPlugin } = require("clean-webpack-plugin");
|
||||||
|
|
||||||
const config = {
|
const config = require("./config.json");
|
||||||
prod: true,
|
|
||||||
dst: "./docs/",
|
|
||||||
src: "./src/",
|
|
||||||
compression_config: {
|
|
||||||
html: {
|
|
||||||
collapseWhitespace: true,
|
|
||||||
minifyCSS: true,
|
|
||||||
minifyJS: true,
|
|
||||||
removeComments: true,
|
|
||||||
removeEmptyAttributes: true,
|
|
||||||
removeRedundantAttributes: true
|
|
||||||
},
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
// todo: if I'm bored, make this a plugin for webpack so it gets "emitted"
|
||||||
function build_mustache() {
|
function buildHandlebars() {
|
||||||
if (!existsSync(config.dst)){
|
if (!existsSync(config.dst)){
|
||||||
mkdirSync(config.dst);
|
mkdirSync(config.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
const read_json_file = (filename) => JSON.parse(readFileSync(filename), "utf8");
|
const files = glob.sync(path.join(config.src, "docs", "*.hbs"));
|
||||||
|
|
||||||
|
const context = {
|
||||||
|
build: {
|
||||||
|
prod: config.prod,
|
||||||
|
year: String(new Date().getFullYear())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const compress_html = (input) => config.prod ? minify(input, config.compression_config.html) : input;
|
// helper functions
|
||||||
|
const compressHtml = (input) => config.prod ? minify(input, config.compression_config.html) : input;
|
||||||
|
|
||||||
// get views
|
Handlebars.registerHelper("i-code", () => new Handlebars.SafeString('<code class="cm-s-idea">'));
|
||||||
const files = glob.sync(path.join(config.src, "docs", "*.json"));
|
Handlebars.registerHelper("s-code", () => new Handlebars.SafeString('<span class="tutorial-code"><code class="cm-s-idea">'));
|
||||||
|
Handlebars.registerHelper("p-code", () => new Handlebars.SafeString('<pre class="tutorial-code"><code class="cm-s-idea">'));
|
||||||
|
|
||||||
|
Handlebars.registerHelper("end-i-code", () => new Handlebars.SafeString("</code>"));
|
||||||
|
Handlebars.registerHelper("end-s-code", () => new Handlebars.SafeString("</code></span>"));
|
||||||
|
Handlebars.registerHelper("end-p-code", () => new Handlebars.SafeString("</code></pre>"));
|
||||||
|
|
||||||
// get partials
|
// get partials
|
||||||
const partials = {
|
Handlebars.registerPartial("header", readFileSync(path.join(config.src, "docs", "partials", "header.hbs"), "utf8"));
|
||||||
header: readFileSync(path.join(config.src, "docs", "header.mustache"), "utf8"),
|
Handlebars.registerPartial("footer", readFileSync(path.join(config.src, "docs", "partials", "footer.hbs"), "utf8"));
|
||||||
footer: readFileSync(path.join(config.src, "docs", "footer.mustache"), "utf8")
|
Handlebars.registerPartial("example_code", readFileSync(path.join(config.src, "docs", "partials", "example_code.hbs"), "utf8"));
|
||||||
};
|
|
||||||
|
// build handlebar files
|
||||||
|
for (const file of files) {
|
||||||
|
const filename = path.basename(file);
|
||||||
|
const to = path.join(config.dst, path.basename(filename, ".hbs") + ".html");
|
||||||
|
const template = readFileSync(path.join(config.src, "docs", filename), "utf8");
|
||||||
|
const html = Handlebars.compile(template)(context);
|
||||||
|
|
||||||
// build main mustache files
|
writeFileSync(to, compressHtml(html));
|
||||||
for (const item of files) {
|
|
||||||
const filename = path.basename(item, ".json");
|
|
||||||
const view = read_json_file(item);
|
|
||||||
const to = path.join(config.dst, filename + ".html");
|
|
||||||
const template = readFileSync(path.join(config.src, "docs", filename + ".mustache"), "utf8");
|
|
||||||
|
|
||||||
writeFileSync(to, compress_html(render(template, view, partials)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,23 +80,28 @@ module.exports = {
|
|||||||
minimize: config.prod,
|
minimize: config.prod,
|
||||||
minimizer: [ new TerserPlugin({cache: true, parallel: true}), new OptimizeCSSAssetsPlugin({}) ]
|
minimizer: [ new TerserPlugin({cache: true, parallel: true}), new OptimizeCSSAssetsPlugin({}) ]
|
||||||
},
|
},
|
||||||
|
performance: {
|
||||||
|
hints: false,
|
||||||
|
maxEntrypointSize: 512000,
|
||||||
|
maxAssetSize: 512000
|
||||||
|
},
|
||||||
plugins: [
|
plugins: [
|
||||||
|
new CleanWebpackPlugin({verbose:true, protectWebpackAssets: false}),
|
||||||
new CopyPlugin({
|
new CopyPlugin({
|
||||||
patterns: [
|
patterns: [
|
||||||
{ from: config.src + "docs/" + "!(*.css|*.mustache|*.json)", to: "", flatten: true}
|
{ from: config.src + "docs/" + "assets/" + "!(*.css|*.hbs)", to: "", flatten: true}
|
||||||
]
|
]
|
||||||
}),
|
}),
|
||||||
new MiniCssExtractPlugin({ filename: "bundle.min.css" }),
|
new MiniCssExtractPlugin({ filename: "bundle.min.css" }),
|
||||||
new WebpackBeforeBuildPlugin(function(_, callback) {
|
new WebpackBeforeBuildPlugin(function(_, callback) {
|
||||||
build_mustache();
|
buildHandlebars();
|
||||||
callback();
|
callback();
|
||||||
}),
|
}, [ "done" ]),
|
||||||
new RemovePlugin({
|
new RemovePlugin({
|
||||||
after: {
|
after: {
|
||||||
root: "./lib",
|
root: "./lib",
|
||||||
include: [
|
include: [
|
||||||
"script.d.ts",
|
"script.d.ts"
|
||||||
"script.d.ts.map"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user