mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 12:30:09 -07:00
Added AST
This commit is contained in:
parent
1354272eb8
commit
d0185903c2
377
docs/bundle.min.js
vendored
377
docs/bundle.min.js
vendored
@ -99,8 +99,50 @@ $(function() {
|
|||||||
});
|
});
|
||||||
*/
|
*/
|
||||||
const opts = new tokenizer_1.TokenizerOptions();
|
const opts = new tokenizer_1.TokenizerOptions();
|
||||||
const res = tokenizer_1.tokenize("match 1+ thing from thingy", opts);
|
const result = tokenizer_1.tokenize(`
|
||||||
console.log(res);
|
// H2R supports // # and /**/ as comments
|
||||||
|
// A group is only captured if given a name.
|
||||||
|
// You can use "and", "or", "not" to specify "[]" regex
|
||||||
|
// You can use "then" to combine match statements, however I find using multiple "match" statements easier to read
|
||||||
|
|
||||||
|
// exact matching means use a ^ and $ to signify the start and end of the string
|
||||||
|
|
||||||
|
using global and exact matching
|
||||||
|
create an optional group called "protocol"
|
||||||
|
match "http"
|
||||||
|
optionally match "s"
|
||||||
|
match "://"
|
||||||
|
create a group called "subdomain"
|
||||||
|
repeat
|
||||||
|
match 1+ words
|
||||||
|
match "."
|
||||||
|
create a group called "domain"
|
||||||
|
match 1+ words or "_" or "-"
|
||||||
|
match "."
|
||||||
|
match a word
|
||||||
|
# port, but we don't care about it, so ignore it
|
||||||
|
optionally match ":" then 0+ digits
|
||||||
|
create an optional group called "path"
|
||||||
|
repeat
|
||||||
|
match "/"
|
||||||
|
match 0+ words or "_" or "-"
|
||||||
|
create an optional group
|
||||||
|
# we don't want to capture the '?', so don't name the group until afterwards
|
||||||
|
match "?"
|
||||||
|
create a group called "query"
|
||||||
|
repeat
|
||||||
|
match 1+ words or "_" or "-"
|
||||||
|
match "="
|
||||||
|
match 1+ words or "_" or "-"
|
||||||
|
create an optional group
|
||||||
|
# fragment, again, we don't care, so ignore everything afterwards
|
||||||
|
match "#"
|
||||||
|
match 0+ any thing
|
||||||
|
`, opts);
|
||||||
|
for (const r of result.tokens) {
|
||||||
|
console.log(r.to_string());
|
||||||
|
}
|
||||||
|
console.log(result.errors);
|
||||||
|
|
||||||
|
|
||||||
/***/ }),
|
/***/ }),
|
||||||
@ -124,12 +166,15 @@ exports.tokenize = exports.TokenizerOptions = void 0;
|
|||||||
// TODO: replace every version of switch(<some string>) with switch(<some string>.charCodeAt(0))
|
// TODO: replace every version of switch(<some string>) with switch(<some string>.charCodeAt(0))
|
||||||
const tokens_1 = __webpack_require__(3);
|
const tokens_1 = __webpack_require__(3);
|
||||||
const keywords = {
|
const keywords = {
|
||||||
|
/* Full Keywords */
|
||||||
"optional": tokens_1.TokenType.KEYWORD_OPTIONAL,
|
"optional": tokens_1.TokenType.KEYWORD_OPTIONAL,
|
||||||
"optionally": tokens_1.TokenType.KEYWORD_OPTIONAL,
|
"optionally": tokens_1.TokenType.KEYWORD_OPTIONAL,
|
||||||
"match": tokens_1.TokenType.KEYWORD_MATCH,
|
"match": tokens_1.TokenType.KEYWORD_MATCH,
|
||||||
|
"matches": tokens_1.TokenType.KEYWORD_MATCH,
|
||||||
"then": tokens_1.TokenType.KEYWORD_THEN,
|
"then": tokens_1.TokenType.KEYWORD_THEN,
|
||||||
"any": tokens_1.TokenType.KEYWORD_ANY,
|
"any": tokens_1.TokenType.KEYWORD_ANY,
|
||||||
"anything": tokens_1.TokenType.KEYWORD_ANY,
|
"anything": tokens_1.TokenType.KEYWORD_ANY,
|
||||||
|
"anythings": tokens_1.TokenType.KEYWORD_ANY,
|
||||||
"of": tokens_1.TokenType.KEYWORD_OF,
|
"of": tokens_1.TokenType.KEYWORD_OF,
|
||||||
"or": tokens_1.TokenType.KEYWORD_OR,
|
"or": tokens_1.TokenType.KEYWORD_OR,
|
||||||
"and": tokens_1.TokenType.KEYWORD_AND,
|
"and": tokens_1.TokenType.KEYWORD_AND,
|
||||||
@ -138,9 +183,15 @@ const keywords = {
|
|||||||
"character": tokens_1.TokenType.KEYWORD_CHAR_SPECIFIER,
|
"character": tokens_1.TokenType.KEYWORD_CHAR_SPECIFIER,
|
||||||
"whitespace": tokens_1.TokenType.KEYWORD_WHITESPACE_SPECIFIER,
|
"whitespace": tokens_1.TokenType.KEYWORD_WHITESPACE_SPECIFIER,
|
||||||
"number": tokens_1.TokenType.KEYWORD_NUMBER_SPECIFIER,
|
"number": tokens_1.TokenType.KEYWORD_NUMBER_SPECIFIER,
|
||||||
|
"words": tokens_1.TokenType.KEYWODE_WORD_SPECIFIER,
|
||||||
|
"digits": tokens_1.TokenType.KEYWORD_DIGIT_SPECIFIER,
|
||||||
|
"characters": tokens_1.TokenType.KEYWORD_CHAR_SPECIFIER,
|
||||||
|
"whitespaces": tokens_1.TokenType.KEYWORD_WHITESPACE_SPECIFIER,
|
||||||
|
"numbers": tokens_1.TokenType.KEYWORD_NUMBER_SPECIFIER,
|
||||||
"multiple": tokens_1.TokenType.KEYWORD_MULTIPLE,
|
"multiple": tokens_1.TokenType.KEYWORD_MULTIPLE,
|
||||||
"as": tokens_1.TokenType.KEYWORD_AS,
|
"as": tokens_1.TokenType.KEYWORD_AS,
|
||||||
"if": tokens_1.TokenType.KEYWORD_IF,
|
"if": tokens_1.TokenType.KEYWORD_IF,
|
||||||
|
"start": tokens_1.TokenType.KEYWORD_STARTS,
|
||||||
"starts": tokens_1.TokenType.KEYWORD_STARTS,
|
"starts": tokens_1.TokenType.KEYWORD_STARTS,
|
||||||
"with": tokens_1.TokenType.KEYWORD_WITH,
|
"with": tokens_1.TokenType.KEYWORD_WITH,
|
||||||
"ends": tokens_1.TokenType.KEYWORD_ENDS,
|
"ends": tokens_1.TokenType.KEYWORD_ENDS,
|
||||||
@ -158,8 +209,6 @@ const keywords = {
|
|||||||
"between": tokens_1.TokenType.KEYWORD_BETWEEN,
|
"between": tokens_1.TokenType.KEYWORD_BETWEEN,
|
||||||
"tab": tokens_1.TokenType.KEYWORD_TAB,
|
"tab": tokens_1.TokenType.KEYWORD_TAB,
|
||||||
"linefeed": tokens_1.TokenType.KEYWORD_LINEFEED,
|
"linefeed": tokens_1.TokenType.KEYWORD_LINEFEED,
|
||||||
"carriage": tokens_1.TokenType.KEYWORD_CARRIAGE,
|
|
||||||
"return": tokens_1.TokenType.KEYWORD_RETURN,
|
|
||||||
"group": tokens_1.TokenType.KEYWORD_GROUP,
|
"group": tokens_1.TokenType.KEYWORD_GROUP,
|
||||||
"by": tokens_1.TokenType.KEYWORD_BY,
|
"by": tokens_1.TokenType.KEYWORD_BY,
|
||||||
"an": tokens_1.TokenType.KEYWORD_ARTICLE,
|
"an": tokens_1.TokenType.KEYWORD_ARTICLE,
|
||||||
@ -171,7 +220,58 @@ const keywords = {
|
|||||||
"exclusive": tokens_1.TokenType.KEYWORD_EXCLUSIVE,
|
"exclusive": tokens_1.TokenType.KEYWORD_EXCLUSIVE,
|
||||||
"exclusively": tokens_1.TokenType.KEYWORD_EXCLUSIVE,
|
"exclusively": tokens_1.TokenType.KEYWORD_EXCLUSIVE,
|
||||||
"from": tokens_1.TokenType.KEYWORD_FROM,
|
"from": tokens_1.TokenType.KEYWORD_FROM,
|
||||||
"to": tokens_1.TokenType.KEYWORD_TO
|
"to": tokens_1.TokenType.KEYWORD_TO,
|
||||||
|
"create": tokens_1.TokenType.KEYWORD_CREATE,
|
||||||
|
"creates": tokens_1.TokenType.KEYWORD_CREATE,
|
||||||
|
"called": tokens_1.TokenType.KEYWORD_CALLED,
|
||||||
|
"repeat": tokens_1.TokenType.KEYWORD_REPEAT,
|
||||||
|
"repeats": tokens_1.TokenType.KEYWORD_REPEAT,
|
||||||
|
"newline": tokens_1.TokenType.KEYWORD_NEWLINE,
|
||||||
|
"none": tokens_1.TokenType.KEYWORD_NONE,
|
||||||
|
"neither": tokens_1.TokenType.KEYWORD_NEITHER,
|
||||||
|
/* Partial keywords */
|
||||||
|
"thing": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"things": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"white": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"space": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"spaces": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"other": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"wise": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"multi": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"new": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"line": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"feed": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"carriage": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"return": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"case": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"insensitive": tokens_1.TokenType.PARTIAL_KEYWORD,
|
||||||
|
"sensitive": tokens_1.TokenType.PARTIAL_KEYWORD
|
||||||
|
};
|
||||||
|
const numbers = {
|
||||||
|
"zero": "0",
|
||||||
|
"one": "1",
|
||||||
|
"two": "2",
|
||||||
|
"three": "3",
|
||||||
|
"four": "4",
|
||||||
|
"five": "5",
|
||||||
|
"six": "6",
|
||||||
|
"seven": "7",
|
||||||
|
"eight": "8",
|
||||||
|
"nine": "9",
|
||||||
|
"ten": "10"
|
||||||
|
};
|
||||||
|
const token_transformations = {
|
||||||
|
"thing": [{ preceeding_token: "any", transforms_to: tokens_1.TokenType.KEYWORD_ANY }],
|
||||||
|
"things": [{ preceeding_token: "any", transforms_to: tokens_1.TokenType.KEYWORD_ANY }],
|
||||||
|
"space": [{ preceeding_token: "white", transforms_to: tokens_1.TokenType.KEYWORD_WHITESPACE_SPECIFIER }],
|
||||||
|
"spaces": [{ preceeding_token: "white", transforms_to: tokens_1.TokenType.KEYWORD_WHITESPACE_SPECIFIER }],
|
||||||
|
"wise": [{ preceeding_token: "other", transforms_to: tokens_1.TokenType.KEYWORD_ELSE }],
|
||||||
|
"line": [{ preceeding_token: "multi", transforms_to: tokens_1.TokenType.KEYWORD_MULTILINE },
|
||||||
|
{ preceeding_token: "new", transforms_to: tokens_1.TokenType.KEYWORD_NEWLINE }],
|
||||||
|
"feed": [{ preceeding_token: "line", transforms_to: tokens_1.TokenType.KEYWORD_LINEFEED }],
|
||||||
|
"return": [{ preceeding_token: "carriage", transforms_to: tokens_1.TokenType.KEYWORD_CARRIAGE_RETURN }],
|
||||||
|
"sensitive": [{ preceeding_token: "case", transforms_to: tokens_1.TokenType.KEYWORD_CASE_SENSITIVE }],
|
||||||
|
"insensitive": [{ preceeding_token: "case", transforms_to: tokens_1.TokenType.KEYWORD_CASE_INSENSITIVE }],
|
||||||
};
|
};
|
||||||
const escape_sequences = {
|
const escape_sequences = {
|
||||||
"a": "\a",
|
"a": "\a",
|
||||||
@ -185,12 +285,6 @@ const escape_sequences = {
|
|||||||
"\"": '"',
|
"\"": '"',
|
||||||
"\\": "\\",
|
"\\": "\\",
|
||||||
};
|
};
|
||||||
class TokenizerOptions {
|
|
||||||
constructor() {
|
|
||||||
this.convert_spaces_to_tabs = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
exports.TokenizerOptions = TokenizerOptions;
|
|
||||||
const escape_sequence_hex_regex = new RegExp(/[0-9A-Fa-f]/g);
|
const escape_sequence_hex_regex = new RegExp(/[0-9A-Fa-f]/g);
|
||||||
function escape_sequence_gather_hex(input, i, max) {
|
function escape_sequence_gather_hex(input, i, max) {
|
||||||
let hex = "";
|
let hex = "";
|
||||||
@ -234,38 +328,74 @@ function escape_sequence_mapper(input, i) {
|
|||||||
return { code: input[i], read: 1 };
|
return { code: input[i], read: 1 };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
function is_digit(input) {
|
const test_char_0 = "0".charCodeAt(0);
|
||||||
//return /[0-9]/g.test(input);
|
const test_char_9 = "9".charCodeAt(0);
|
||||||
const value = input.charCodeAt(0);
|
const test_char_a = "a".charCodeAt(0);
|
||||||
return value >= 48 && value <= 57;
|
const test_char_z = "z".charCodeAt(0);
|
||||||
|
const test_char_A = "A".charCodeAt(0);
|
||||||
|
const test_char_Z = "Z".charCodeAt(0);
|
||||||
|
function is_digit(input, i) {
|
||||||
|
const value = input.charCodeAt(i);
|
||||||
|
return value >= test_char_0 && value <= test_char_9;
|
||||||
}
|
}
|
||||||
function is_char(input) {
|
function is_char(input, i) {
|
||||||
//return input.toUpperCase() != input.toLowerCase();
|
const value = input.charCodeAt(i);
|
||||||
//return /[a-zA-Z]/g.test(input);
|
return ((value >= test_char_a && value <= test_char_z) ||
|
||||||
const value = input.charCodeAt(0);
|
(value >= test_char_A && value <= test_char_Z));
|
||||||
return ((value >= 65 && value <= 90) || (value >= 97 && value <= 122));
|
|
||||||
}
|
}
|
||||||
|
function transform_tokens(tokens, errors) {
|
||||||
|
for (let i = 0; i < tokens.length; i++) {
|
||||||
|
//check past tokens: if it matches the preceeding tokens, we transform it.
|
||||||
|
if (tokens[i].type === tokens_1.TokenType.PARTIAL_KEYWORD && token_transformations[tokens[i].token_string]) {
|
||||||
|
const transform = token_transformations[tokens[i].token_string];
|
||||||
|
for (let j = 0; j < transform.length; j++) {
|
||||||
|
if (i - 1 >= 0 && transform[j].preceeding_token === tokens[i - 1].token_string) {
|
||||||
|
// use the i-1 token because it has the start line and position
|
||||||
|
tokens[i - 1].type = transform[j].transforms_to;
|
||||||
|
tokens[i - 1].token_string += " " + tokens[i].token_string;
|
||||||
|
tokens.splice(i, 1); // remove this token
|
||||||
|
i--; // move token counter back because we removed the token
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* else ignore */
|
||||||
|
}
|
||||||
|
// do we still have partial tokens? those are errors then
|
||||||
|
for (let i = 0; i < tokens.length; i++) {
|
||||||
|
if (tokens[i].type === tokens_1.TokenType.PARTIAL_KEYWORD) {
|
||||||
|
errors.push(new tokens_1.TokenError(`Unknown keyword "${tokens[i].token_string}"`, tokens[i].line, tokens[i].position));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class TokenizerOptions {
|
||||||
|
constructor() {
|
||||||
|
this.convert_spaces_to_tabs = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.TokenizerOptions = TokenizerOptions;
|
||||||
/* Basic Tokenizer */
|
/* Basic Tokenizer */
|
||||||
function tokenize(input, options) {
|
function tokenize(input, options) {
|
||||||
let line = 1;
|
let line = 1;
|
||||||
let position = 1;
|
let position = 1;
|
||||||
const tokens = [];
|
const tokens = [];
|
||||||
const errors = [];
|
const errors = [];
|
||||||
|
// gather tokens
|
||||||
for (let i = 0; i < input.length; i++, position++) {
|
for (let i = 0; i < input.length; i++, position++) {
|
||||||
// 4 spaces = 1 tab. That is final. Debate over
|
// 4 spaces = 1 tab. That is final. Debate over
|
||||||
if (options.convert_spaces_to_tabs && input.startsWith(" ", i)) {
|
if (options.convert_spaces_to_tabs && input.startsWith(" ", i)) {
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.INDENT, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.INDENT, line, position, 4));
|
||||||
i += 3;
|
i += 3;
|
||||||
position += 3;
|
position += 3;
|
||||||
}
|
}
|
||||||
// between (ex: 0...3 or 0-3)
|
// between (ex: 0...3 or 0-3)
|
||||||
else if (input.startsWith("...", i)) {
|
else if (input.startsWith("...", i)) {
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.BETWEEN, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.BETWEEN, line, position, 3));
|
||||||
i += 2;
|
i += 2;
|
||||||
position += 2;
|
position += 2;
|
||||||
}
|
}
|
||||||
else if (input.startsWith("..", i)) {
|
else if (input.startsWith("..", i)) {
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.BETWEEN, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.BETWEEN, line, position, 3));
|
||||||
i++;
|
i++;
|
||||||
position++;
|
position++;
|
||||||
}
|
}
|
||||||
@ -273,7 +403,7 @@ function tokenize(input, options) {
|
|||||||
else if (input.startsWith("//", i)) {
|
else if (input.startsWith("//", i)) {
|
||||||
for (i++, position++; i < input.length; i++, position++) {
|
for (i++, position++; i < input.length; i++, position++) {
|
||||||
if (input[i] === "\n") {
|
if (input[i] === "\n") {
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position, -1));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -283,7 +413,6 @@ function tokenize(input, options) {
|
|||||||
else if (input.startsWith("/*", i)) {
|
else if (input.startsWith("/*", i)) {
|
||||||
for (i++, position++; i < input.length - 1; i++, position++) {
|
for (i++, position++; i < input.length - 1; i++, position++) {
|
||||||
if (input[i] === "*" && input[i + 1] === "/") {
|
if (input[i] === "*" && input[i + 1] === "/") {
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position));
|
|
||||||
i++;
|
i++;
|
||||||
position++;
|
position++;
|
||||||
break;
|
break;
|
||||||
@ -302,7 +431,7 @@ function tokenize(input, options) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (input.startsWith("\r\n", i)) {
|
else if (input.startsWith("\r\n", i)) {
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position, -1));
|
||||||
i++;
|
i++;
|
||||||
line++;
|
line++;
|
||||||
position = 0;
|
position = 0;
|
||||||
@ -313,7 +442,7 @@ function tokenize(input, options) {
|
|||||||
case "#":
|
case "#":
|
||||||
for (i++, position++; i < input.length; i++, position++) {
|
for (i++, position++; i < input.length; i++, position++) {
|
||||||
if (input[i] === "\n") {
|
if (input[i] === "\n") {
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position, -1));
|
||||||
line++;
|
line++;
|
||||||
position = 0;
|
position = 0;
|
||||||
break;
|
break;
|
||||||
@ -357,7 +486,7 @@ function tokenize(input, options) {
|
|||||||
}
|
}
|
||||||
} while (i < input.length);
|
} while (i < input.length);
|
||||||
if (found_ending) {
|
if (found_ending) {
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.QUOTE, line, position, quote));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.QUOTE, line, position, quote.length + 2, quote));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
//we reached the end of the line or the end of the file
|
//we reached the end of the line or the end of the file
|
||||||
@ -369,91 +498,65 @@ function tokenize(input, options) {
|
|||||||
}
|
}
|
||||||
// between (ex: 0...3 or 0-3)
|
// between (ex: 0...3 or 0-3)
|
||||||
case "-":
|
case "-":
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.BETWEEN, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.BETWEEN, line, position, 1));
|
||||||
|
break;
|
||||||
|
case "+":
|
||||||
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.KEYWORD_OR, line, position, 1));
|
||||||
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.KEYWORD_MORE, line, position, 0));
|
||||||
break;
|
break;
|
||||||
case "\n":
|
case "\n":
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.END_OF_STATEMENT, line, position, -1));
|
||||||
|
line++;
|
||||||
|
position = 0;
|
||||||
break;
|
break;
|
||||||
case "\r":
|
case "\r":
|
||||||
// ignore
|
// ignore
|
||||||
break;
|
break;
|
||||||
case "\t":
|
case "\t":
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.INDENT, line, position));
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.INDENT, line, position, 1));
|
||||||
break;
|
break;
|
||||||
case " ":
|
case " ":
|
||||||
|
// ignore
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
// is digit? build up a number
|
// is digit? build up a number
|
||||||
if (is_digit(input[i])) {
|
if (is_digit(input, i)) {
|
||||||
|
const digit_begin = position;
|
||||||
let digits = input[i];
|
let digits = input[i];
|
||||||
do {
|
for (; i + 1 < input.length && is_digit(input, i + 1); i++, position++) {
|
||||||
i++;
|
digits += input[i + 1];
|
||||||
position++;
|
}
|
||||||
digits += input[i];
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, digit_begin, position - digit_begin + 1, digits));
|
||||||
} while (i + 1 < input.length && is_digit(input[i + 1]));
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, digits));
|
|
||||||
}
|
}
|
||||||
// is char? build up a word
|
// is char? build up a word
|
||||||
else if (is_char(input[i])) {
|
else if (is_char(input, i)) {
|
||||||
|
const word_begin = position;
|
||||||
let text = input[i];
|
let text = input[i];
|
||||||
do {
|
for (; i + 1 < input.length && is_char(input, i + 1); i++, position++) {
|
||||||
i++;
|
text += input[i + 1];
|
||||||
position++;
|
}
|
||||||
text += input[i];
|
|
||||||
} while (i + 1 < input.length && is_char(input[i + 1]));
|
|
||||||
const keyword_text = text.toLowerCase();
|
const keyword_text = text.toLowerCase();
|
||||||
|
// keyword (ex. "match")
|
||||||
if (keywords[keyword_text]) {
|
if (keywords[keyword_text]) {
|
||||||
tokens.push(new tokens_1.Token(keywords[keyword_text], line, position));
|
tokens.push(new tokens_1.Token(keywords[keyword_text], line, word_begin, position - word_begin + 1, keyword_text));
|
||||||
|
}
|
||||||
|
// text number (ex. "one")
|
||||||
|
else if (numbers[keyword_text]) {
|
||||||
|
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, word_begin, position - word_begin + 1, keyword_text));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
switch (keyword_text) {
|
errors.push(new tokens_1.TokenError(`Unknown keyword "${text}"`, line, word_begin));
|
||||||
case "none":
|
|
||||||
case "zero":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "0"));
|
|
||||||
break;
|
|
||||||
case "one":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "1"));
|
|
||||||
break;
|
|
||||||
case "two":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "2"));
|
|
||||||
break;
|
|
||||||
case "three":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "3"));
|
|
||||||
break;
|
|
||||||
case "four":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "4"));
|
|
||||||
break;
|
|
||||||
case "five":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "5"));
|
|
||||||
break;
|
|
||||||
case "six":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "6"));
|
|
||||||
break;
|
|
||||||
case "seven":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "7"));
|
|
||||||
break;
|
|
||||||
case "eight":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "8"));
|
|
||||||
break;
|
|
||||||
case "nine":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "9"));
|
|
||||||
break;
|
|
||||||
case "ten":
|
|
||||||
tokens.push(new tokens_1.Token(tokens_1.TokenType.NUMBER, line, position, "10"));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
errors.push(new tokens_1.TokenError(`Unknown keyword ${text}`, line, position));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
errors.push(new tokens_1.TokenError(`Unknown character in text: ${input.charCodeAt(i)}`, line, position));
|
errors.push(new tokens_1.TokenError(`Unknown character in text: "${input[i]}" (${input.charCodeAt(i)})`, line, position));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// transform tokens
|
||||||
|
transform_tokens(tokens, errors);
|
||||||
return { tokens: tokens, errors: errors };
|
return { tokens: tokens, errors: errors };
|
||||||
}
|
}
|
||||||
exports.tokenize = tokenize;
|
exports.tokenize = tokenize;
|
||||||
@ -474,47 +577,55 @@ var TokenType;
|
|||||||
TokenType[TokenType["BETWEEN"] = 2] = "BETWEEN";
|
TokenType[TokenType["BETWEEN"] = 2] = "BETWEEN";
|
||||||
TokenType[TokenType["QUOTE"] = 3] = "QUOTE";
|
TokenType[TokenType["QUOTE"] = 3] = "QUOTE";
|
||||||
TokenType[TokenType["NUMBER"] = 4] = "NUMBER";
|
TokenType[TokenType["NUMBER"] = 4] = "NUMBER";
|
||||||
TokenType[TokenType["KEYWORD_BETWEEN"] = 5] = "KEYWORD_BETWEEN";
|
TokenType[TokenType["PARTIAL_KEYWORD"] = 5] = "PARTIAL_KEYWORD";
|
||||||
TokenType[TokenType["KEYWORD_OPTIONAL"] = 6] = "KEYWORD_OPTIONAL";
|
TokenType[TokenType["KEYWORD_BETWEEN"] = 6] = "KEYWORD_BETWEEN";
|
||||||
TokenType[TokenType["KEYWORD_MATCH"] = 7] = "KEYWORD_MATCH";
|
TokenType[TokenType["KEYWORD_OPTIONAL"] = 7] = "KEYWORD_OPTIONAL";
|
||||||
TokenType[TokenType["KEYWORD_THEN"] = 8] = "KEYWORD_THEN";
|
TokenType[TokenType["KEYWORD_MATCH"] = 8] = "KEYWORD_MATCH";
|
||||||
TokenType[TokenType["KEYWORD_AND"] = 9] = "KEYWORD_AND";
|
TokenType[TokenType["KEYWORD_THEN"] = 9] = "KEYWORD_THEN";
|
||||||
TokenType[TokenType["KEYWORD_OR"] = 10] = "KEYWORD_OR";
|
TokenType[TokenType["KEYWORD_AND"] = 10] = "KEYWORD_AND";
|
||||||
TokenType[TokenType["KEYWORD_ANY"] = 11] = "KEYWORD_ANY";
|
TokenType[TokenType["KEYWORD_OR"] = 11] = "KEYWORD_OR";
|
||||||
TokenType[TokenType["KEYWORD_OF"] = 12] = "KEYWORD_OF";
|
TokenType[TokenType["KEYWORD_ANY"] = 12] = "KEYWORD_ANY";
|
||||||
TokenType[TokenType["KEYWODE_WORD_SPECIFIER"] = 13] = "KEYWODE_WORD_SPECIFIER";
|
TokenType[TokenType["KEYWORD_OF"] = 13] = "KEYWORD_OF";
|
||||||
TokenType[TokenType["KEYWORD_DIGIT_SPECIFIER"] = 14] = "KEYWORD_DIGIT_SPECIFIER";
|
TokenType[TokenType["KEYWORD_NONE"] = 14] = "KEYWORD_NONE";
|
||||||
TokenType[TokenType["KEYWORD_CHAR_SPECIFIER"] = 15] = "KEYWORD_CHAR_SPECIFIER";
|
TokenType[TokenType["KEYWORD_NEITHER"] = 15] = "KEYWORD_NEITHER";
|
||||||
TokenType[TokenType["KEYWORD_WHITESPACE_SPECIFIER"] = 16] = "KEYWORD_WHITESPACE_SPECIFIER";
|
TokenType[TokenType["KEYWODE_WORD_SPECIFIER"] = 16] = "KEYWODE_WORD_SPECIFIER";
|
||||||
TokenType[TokenType["KEYWORD_NUMBER_SPECIFIER"] = 17] = "KEYWORD_NUMBER_SPECIFIER";
|
TokenType[TokenType["KEYWORD_DIGIT_SPECIFIER"] = 17] = "KEYWORD_DIGIT_SPECIFIER";
|
||||||
TokenType[TokenType["KEYWORD_MULTIPLE"] = 18] = "KEYWORD_MULTIPLE";
|
TokenType[TokenType["KEYWORD_CHAR_SPECIFIER"] = 18] = "KEYWORD_CHAR_SPECIFIER";
|
||||||
TokenType[TokenType["KEYWORD_AS"] = 19] = "KEYWORD_AS";
|
TokenType[TokenType["KEYWORD_WHITESPACE_SPECIFIER"] = 19] = "KEYWORD_WHITESPACE_SPECIFIER";
|
||||||
TokenType[TokenType["KEYWORD_IF"] = 20] = "KEYWORD_IF";
|
TokenType[TokenType["KEYWORD_NUMBER_SPECIFIER"] = 20] = "KEYWORD_NUMBER_SPECIFIER";
|
||||||
TokenType[TokenType["KEYWORD_STARTS"] = 21] = "KEYWORD_STARTS";
|
TokenType[TokenType["KEYWORD_MULTIPLE"] = 21] = "KEYWORD_MULTIPLE";
|
||||||
TokenType[TokenType["KEYWORD_WITH"] = 22] = "KEYWORD_WITH";
|
TokenType[TokenType["KEYWORD_AS"] = 22] = "KEYWORD_AS";
|
||||||
TokenType[TokenType["KEYWORD_ENDS"] = 23] = "KEYWORD_ENDS";
|
TokenType[TokenType["KEYWORD_IF"] = 23] = "KEYWORD_IF";
|
||||||
TokenType[TokenType["KEYWORD_ELSE"] = 24] = "KEYWORD_ELSE";
|
TokenType[TokenType["KEYWORD_STARTS"] = 24] = "KEYWORD_STARTS";
|
||||||
TokenType[TokenType["KEYWORD_UNLESS"] = 25] = "KEYWORD_UNLESS";
|
TokenType[TokenType["KEYWORD_WITH"] = 25] = "KEYWORD_WITH";
|
||||||
TokenType[TokenType["KEYWORD_WHILE"] = 26] = "KEYWORD_WHILE";
|
TokenType[TokenType["KEYWORD_ENDS"] = 26] = "KEYWORD_ENDS";
|
||||||
TokenType[TokenType["KEYWORD_MORE"] = 27] = "KEYWORD_MORE";
|
TokenType[TokenType["KEYWORD_ELSE"] = 27] = "KEYWORD_ELSE";
|
||||||
TokenType[TokenType["KEYWORD_USING"] = 28] = "KEYWORD_USING";
|
TokenType[TokenType["KEYWORD_UNLESS"] = 28] = "KEYWORD_UNLESS";
|
||||||
TokenType[TokenType["KEYWORD_GLOBAL"] = 29] = "KEYWORD_GLOBAL";
|
TokenType[TokenType["KEYWORD_WHILE"] = 29] = "KEYWORD_WHILE";
|
||||||
TokenType[TokenType["KEYWORD_MULTILINE"] = 30] = "KEYWORD_MULTILINE";
|
TokenType[TokenType["KEYWORD_MORE"] = 30] = "KEYWORD_MORE";
|
||||||
TokenType[TokenType["KEYWORD_EXACT"] = 31] = "KEYWORD_EXACT";
|
TokenType[TokenType["KEYWORD_USING"] = 31] = "KEYWORD_USING";
|
||||||
TokenType[TokenType["KEYWORD_MATCHING"] = 32] = "KEYWORD_MATCHING";
|
TokenType[TokenType["KEYWORD_GLOBAL"] = 32] = "KEYWORD_GLOBAL";
|
||||||
TokenType[TokenType["KEYWORD_NOT"] = 33] = "KEYWORD_NOT";
|
TokenType[TokenType["KEYWORD_MULTILINE"] = 33] = "KEYWORD_MULTILINE";
|
||||||
TokenType[TokenType["KEYWORD_TAB"] = 34] = "KEYWORD_TAB";
|
TokenType[TokenType["KEYWORD_EXACT"] = 34] = "KEYWORD_EXACT";
|
||||||
TokenType[TokenType["KEYWORD_LINEFEED"] = 35] = "KEYWORD_LINEFEED";
|
TokenType[TokenType["KEYWORD_MATCHING"] = 35] = "KEYWORD_MATCHING";
|
||||||
TokenType[TokenType["KEYWORD_CARRIAGE"] = 36] = "KEYWORD_CARRIAGE";
|
TokenType[TokenType["KEYWORD_NOT"] = 36] = "KEYWORD_NOT";
|
||||||
TokenType[TokenType["KEYWORD_RETURN"] = 37] = "KEYWORD_RETURN";
|
TokenType[TokenType["KEYWORD_TAB"] = 37] = "KEYWORD_TAB";
|
||||||
TokenType[TokenType["KEYWORD_GROUP"] = 38] = "KEYWORD_GROUP";
|
TokenType[TokenType["KEYWORD_LINEFEED"] = 38] = "KEYWORD_LINEFEED";
|
||||||
TokenType[TokenType["KEYWORD_BY"] = 39] = "KEYWORD_BY";
|
TokenType[TokenType["KEYWORD_CARRIAGE_RETURN"] = 39] = "KEYWORD_CARRIAGE_RETURN";
|
||||||
TokenType[TokenType["KEYWORD_ARTICLE"] = 40] = "KEYWORD_ARTICLE";
|
TokenType[TokenType["KEYWORD_GROUP"] = 40] = "KEYWORD_GROUP";
|
||||||
TokenType[TokenType["KEYWORD_EXACTLY"] = 41] = "KEYWORD_EXACTLY";
|
TokenType[TokenType["KEYWORD_BY"] = 41] = "KEYWORD_BY";
|
||||||
TokenType[TokenType["KEYWORD_INCLUSIVE"] = 42] = "KEYWORD_INCLUSIVE";
|
TokenType[TokenType["KEYWORD_ARTICLE"] = 42] = "KEYWORD_ARTICLE";
|
||||||
TokenType[TokenType["KEYWORD_EXCLUSIVE"] = 43] = "KEYWORD_EXCLUSIVE";
|
TokenType[TokenType["KEYWORD_EXACTLY"] = 43] = "KEYWORD_EXACTLY";
|
||||||
TokenType[TokenType["KEYWORD_FROM"] = 44] = "KEYWORD_FROM";
|
TokenType[TokenType["KEYWORD_INCLUSIVE"] = 44] = "KEYWORD_INCLUSIVE";
|
||||||
TokenType[TokenType["KEYWORD_TO"] = 45] = "KEYWORD_TO";
|
TokenType[TokenType["KEYWORD_EXCLUSIVE"] = 45] = "KEYWORD_EXCLUSIVE";
|
||||||
|
TokenType[TokenType["KEYWORD_FROM"] = 46] = "KEYWORD_FROM";
|
||||||
|
TokenType[TokenType["KEYWORD_TO"] = 47] = "KEYWORD_TO";
|
||||||
|
TokenType[TokenType["KEYWORD_CREATE"] = 48] = "KEYWORD_CREATE";
|
||||||
|
TokenType[TokenType["KEYWORD_CALLED"] = 49] = "KEYWORD_CALLED";
|
||||||
|
TokenType[TokenType["KEYWORD_REPEAT"] = 50] = "KEYWORD_REPEAT";
|
||||||
|
TokenType[TokenType["KEYWORD_NEWLINE"] = 51] = "KEYWORD_NEWLINE";
|
||||||
|
TokenType[TokenType["KEYWORD_CASE_SENSITIVE"] = 52] = "KEYWORD_CASE_SENSITIVE";
|
||||||
|
TokenType[TokenType["KEYWORD_CASE_INSENSITIVE"] = 53] = "KEYWORD_CASE_INSENSITIVE";
|
||||||
})(TokenType = exports.TokenType || (exports.TokenType = {}));
|
})(TokenType = exports.TokenType || (exports.TokenType = {}));
|
||||||
class TokenError extends Error {
|
class TokenError extends Error {
|
||||||
constructor(message, line, position) {
|
constructor(message, line, position) {
|
||||||
@ -523,16 +634,26 @@ class TokenError extends Error {
|
|||||||
this.position = position;
|
this.position = position;
|
||||||
}
|
}
|
||||||
to_string() {
|
to_string() {
|
||||||
return `${this.line}:${this.position} ${this.message}`;
|
return `Token Error: ${this.line}:${this.position} ${this.message}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
exports.TokenError = TokenError;
|
exports.TokenError = TokenError;
|
||||||
class Token {
|
class Token {
|
||||||
constructor(type, line, position, token_string) {
|
constructor(type, line, position, length, token_string) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.line = line;
|
this.line = line;
|
||||||
this.position = position;
|
this.position = position;
|
||||||
|
this.length = length;
|
||||||
this.token_string = token_string;
|
this.token_string = token_string;
|
||||||
|
/* nothing required */
|
||||||
|
}
|
||||||
|
to_string() {
|
||||||
|
let str = `${this.line}:${this.position} ${TokenType[this.type]}`;
|
||||||
|
if (this.token_string) {
|
||||||
|
str += ` "${this.token_string}"`;
|
||||||
|
}
|
||||||
|
str += ` (size: ${this.length})`;
|
||||||
|
return str;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
exports.Token = Token;
|
exports.Token = Token;
|
||||||
|
236
src/ast.ts
236
src/ast.ts
@ -0,0 +1,236 @@
|
|||||||
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
|
import { Token } from "./tokens";
|
||||||
|
|
||||||
|
export class SyntaxError extends Error {
|
||||||
|
constructor(message: string, public tokens: Token[]) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public to_string(): string {
|
||||||
|
return `Syntax Error: ${this.message}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: line number/position? */
|
||||||
|
export interface AbstractSyntaxTree {
|
||||||
|
to_string(): string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Qualifier implements AbstractSyntaxTree {
|
||||||
|
constructor(public type: string) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
|
||||||
|
if(this.type === "g") {
|
||||||
|
return "g";
|
||||||
|
}
|
||||||
|
else if(this.type === "m") {
|
||||||
|
return "m";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "i";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Regex implements AbstractSyntaxTree {
|
||||||
|
constructor(public inner_trees: AbstractSyntaxTree[], public qualifiers: Qualifier[]) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public to_string(): string {
|
||||||
|
let str = "/";
|
||||||
|
|
||||||
|
for(const tree of this.inner_trees) {
|
||||||
|
str += tree.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
str += "/";
|
||||||
|
|
||||||
|
for(const tree of this.qualifiers) {
|
||||||
|
str += tree.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Group implements AbstractSyntaxTree {
|
||||||
|
constructor(public inner_tree: AbstractSyntaxTree, public name?: string) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public to_string(): string {
|
||||||
|
return "(" + (name ? `?<${this.name}>` : "") + `${this.inner_tree.to_string()})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Any implements AbstractSyntaxTree {
|
||||||
|
constructor() {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public to_string(): string {
|
||||||
|
return ".";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class AnyOf implements AbstractSyntaxTree {
|
||||||
|
constructor(public inner_trees: AbstractSyntaxTree[], public negated: boolean) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public to_string(): string {
|
||||||
|
let str = "[";
|
||||||
|
|
||||||
|
if(this.negated) {
|
||||||
|
str += "^";
|
||||||
|
}
|
||||||
|
|
||||||
|
for(const tree of this.inner_trees) {
|
||||||
|
str += tree.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
str += "]";
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Repeat implements AbstractSyntaxTree {
|
||||||
|
constructor(public inner_tree: AbstractSyntaxTree, public first_required: boolean) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public to_string(): string {
|
||||||
|
return this.inner_tree.to_string() + (this.first_required ? "+" : "*");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Optional implements AbstractSyntaxTree {
|
||||||
|
constructor(public inner_tree: AbstractSyntaxTree) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public to_string(): string {
|
||||||
|
return `${this.inner_tree.to_string()}?`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Anchor implements AbstractSyntaxTree {
|
||||||
|
constructor(public inner_tree: AbstractSyntaxTree) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
|
||||||
|
public to_string(): string {
|
||||||
|
return `^${this.inner_tree.to_string()}$`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Range implements AbstractSyntaxTree {
|
||||||
|
constructor(public from: string, public to: string) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
return `${this.from}-${this.to}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class QuantifierExactly implements AbstractSyntaxTree {
|
||||||
|
constructor(public inner_tree: AbstractSyntaxTree, public count: number) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
return `${this.inner_tree.to_string()}{${this.count}}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class QuantifierBetween implements AbstractSyntaxTree {
|
||||||
|
constructor(public inner_tree: AbstractSyntaxTree, public from: number, public to?: number, public inclusive?: boolean) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
let str = `${this.inner_tree.to_string()}{${this.from},`;
|
||||||
|
|
||||||
|
if(this.to) {
|
||||||
|
str += (this.to-(this.inclusive?0:1));
|
||||||
|
}
|
||||||
|
|
||||||
|
str += "}";
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Or implements AbstractSyntaxTree {
|
||||||
|
constructor(public left_tree: AbstractSyntaxTree, public right_tree: AbstractSyntaxTree) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
return `${this.left_tree.to_string()}|${this.right_tree.to_string()}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class And implements AbstractSyntaxTree {
|
||||||
|
constructor(public left_tree: AbstractSyntaxTree, public right_tree: AbstractSyntaxTree) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
return `${this.left_tree.to_string()}${this.right_tree.to_string()}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Specifier implements AbstractSyntaxTree {
|
||||||
|
constructor(public type: string, public negated: boolean) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
let str = "\\";
|
||||||
|
|
||||||
|
if(this.type === "w") {
|
||||||
|
str += (this.negated ? "W" : "w");
|
||||||
|
}
|
||||||
|
else if(this.type === "d") {
|
||||||
|
str += (this.negated ? "D" : "d");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
str += (this.negated ? "S" : "s");
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
// \w \d \s : word, digit, whitespace
|
||||||
|
}
|
||||||
|
|
||||||
|
export class Match implements AbstractSyntaxTree {
|
||||||
|
// remember: transform unicode, escape stuff
|
||||||
|
|
||||||
|
constructor(public match: string) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
/* TODO: ESCAPE/TRANSFORM CHARACTERS! */
|
||||||
|
|
||||||
|
return this.match;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export class SpecialCharacter implements AbstractSyntaxTree {
|
||||||
|
//type: \t\r\n
|
||||||
|
|
||||||
|
constructor(public type: string) {
|
||||||
|
/* empty */
|
||||||
|
}
|
||||||
|
public to_string(): string {
|
||||||
|
if(this.type === "t") {
|
||||||
|
return "\\t";
|
||||||
|
}
|
||||||
|
else if(this.type === "r") {
|
||||||
|
return "\\r";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "\\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||||
|
|
||||||
import { Token, TokenType } from "./tokens";
|
import { Token, TokenType } from "./tokens";
|
||||||
|
import * as AST from "./ast";
|
||||||
|
|
||||||
export class ParserOptions {
|
export class ParserOptions {
|
||||||
|
|
||||||
|
@ -67,6 +67,8 @@ const keywords = {
|
|||||||
"repeat": TokenType.KEYWORD_REPEAT,
|
"repeat": TokenType.KEYWORD_REPEAT,
|
||||||
"repeats": TokenType.KEYWORD_REPEAT,
|
"repeats": TokenType.KEYWORD_REPEAT,
|
||||||
"newline": TokenType.KEYWORD_NEWLINE,
|
"newline": TokenType.KEYWORD_NEWLINE,
|
||||||
|
"none": TokenType.KEYWORD_NONE,
|
||||||
|
"neither": TokenType.KEYWORD_NEITHER,
|
||||||
|
|
||||||
/* Partial keywords */
|
/* Partial keywords */
|
||||||
"thing": TokenType.PARTIAL_KEYWORD,
|
"thing": TokenType.PARTIAL_KEYWORD,
|
||||||
@ -82,6 +84,9 @@ const keywords = {
|
|||||||
"feed": TokenType.PARTIAL_KEYWORD,
|
"feed": TokenType.PARTIAL_KEYWORD,
|
||||||
"carriage": TokenType.PARTIAL_KEYWORD,
|
"carriage": TokenType.PARTIAL_KEYWORD,
|
||||||
"return": TokenType.PARTIAL_KEYWORD,
|
"return": TokenType.PARTIAL_KEYWORD,
|
||||||
|
"case": TokenType.PARTIAL_KEYWORD,
|
||||||
|
"insensitive": TokenType.PARTIAL_KEYWORD,
|
||||||
|
"sensitive": TokenType.PARTIAL_KEYWORD
|
||||||
};
|
};
|
||||||
|
|
||||||
const numbers = {
|
const numbers = {
|
||||||
@ -103,15 +108,17 @@ interface token_transformation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const token_transformations : token_transformation = {
|
const token_transformations : token_transformation = {
|
||||||
"thing": [ { preceeding_token: "any", transforms_to: TokenType.KEYWORD_ANY } ],
|
"thing": [ { preceeding_token: "any", transforms_to: TokenType.KEYWORD_ANY } ],
|
||||||
"things": [ { preceeding_token: "any", transforms_to: TokenType.KEYWORD_ANY } ],
|
"things": [ { preceeding_token: "any", transforms_to: TokenType.KEYWORD_ANY } ],
|
||||||
"space": [ { preceeding_token: "white", transforms_to: TokenType.KEYWORD_WHITESPACE_SPECIFIER } ],
|
"space": [ { preceeding_token: "white", transforms_to: TokenType.KEYWORD_WHITESPACE_SPECIFIER } ],
|
||||||
"spaces": [ { preceeding_token: "white", transforms_to: TokenType.KEYWORD_WHITESPACE_SPECIFIER } ],
|
"spaces": [ { preceeding_token: "white", transforms_to: TokenType.KEYWORD_WHITESPACE_SPECIFIER } ],
|
||||||
"wise": [ { preceeding_token: "other", transforms_to: TokenType.KEYWORD_ELSE } ],
|
"wise": [ { preceeding_token: "other", transforms_to: TokenType.KEYWORD_ELSE } ],
|
||||||
"line": [ { preceeding_token: "multi", transforms_to: TokenType.KEYWORD_MULTILINE },
|
"line": [ { preceeding_token: "multi", transforms_to: TokenType.KEYWORD_MULTILINE },
|
||||||
{ preceeding_token: "new", transforms_to: TokenType.KEYWORD_NEWLINE } ],
|
{ preceeding_token: "new", transforms_to: TokenType.KEYWORD_NEWLINE } ],
|
||||||
"feed": [ { preceeding_token: "line", transforms_to: TokenType.KEYWORD_LINEFEED } ],
|
"feed": [ { preceeding_token: "line", transforms_to: TokenType.KEYWORD_LINEFEED } ],
|
||||||
"return": [ { preceeding_token: "carriage", transforms_to: TokenType.KEYWORD_CARRIAGE_RETURN } ],
|
"return": [ { preceeding_token: "carriage", transforms_to: TokenType.KEYWORD_CARRIAGE_RETURN } ],
|
||||||
|
"sensitive": [ { preceeding_token: "case", transforms_to: TokenType.KEYWORD_CASE_SENSITIVE } ],
|
||||||
|
"insensitive": [ { preceeding_token: "case", transforms_to: TokenType.KEYWORD_CASE_INSENSITIVE } ],
|
||||||
};
|
};
|
||||||
|
|
||||||
const escape_sequences = {
|
const escape_sequences = {
|
||||||
|
@ -13,6 +13,8 @@ export enum TokenType {
|
|||||||
KEYWORD_OR,
|
KEYWORD_OR,
|
||||||
KEYWORD_ANY,
|
KEYWORD_ANY,
|
||||||
KEYWORD_OF,
|
KEYWORD_OF,
|
||||||
|
KEYWORD_NONE,
|
||||||
|
KEYWORD_NEITHER,
|
||||||
KEYWODE_WORD_SPECIFIER,
|
KEYWODE_WORD_SPECIFIER,
|
||||||
KEYWORD_DIGIT_SPECIFIER,
|
KEYWORD_DIGIT_SPECIFIER,
|
||||||
KEYWORD_CHAR_SPECIFIER,
|
KEYWORD_CHAR_SPECIFIER,
|
||||||
@ -48,7 +50,9 @@ export enum TokenType {
|
|||||||
KEYWORD_CREATE,
|
KEYWORD_CREATE,
|
||||||
KEYWORD_CALLED,
|
KEYWORD_CALLED,
|
||||||
KEYWORD_REPEAT,
|
KEYWORD_REPEAT,
|
||||||
KEYWORD_NEWLINE
|
KEYWORD_NEWLINE,
|
||||||
|
KEYWORD_CASE_SENSITIVE,
|
||||||
|
KEYWORD_CASE_INSENSITIVE
|
||||||
}
|
}
|
||||||
|
|
||||||
export class TokenError extends Error {
|
export class TokenError extends Error {
|
||||||
@ -57,7 +61,7 @@ export class TokenError extends Error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public to_string(): string {
|
public to_string(): string {
|
||||||
return `${this.line}:${this.position} ${this.message}`;
|
return `Token Error: ${this.line}:${this.position} ${this.message}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user