mirror of
https://github.com/pdemian/human2regex.git
synced 2025-05-16 12:30:09 -07:00
Tokenizer now correctly recognizes Outdents
This commit is contained in:
parent
5e9c185923
commit
44838b8a43
9155
docs/bundle.min.js
vendored
9155
docs/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
@ -1,7 +1,7 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
import { CstParser } from "chevrotain";
|
||||
import * as T from "./tokenizer";
|
||||
import * as T from "./tokens";
|
||||
|
||||
export class Human2RegexParser extends CstParser {
|
||||
constructor() {
|
||||
|
@ -2,8 +2,7 @@
|
||||
|
||||
import "./style.css";
|
||||
|
||||
import { Human2RegexLexer } from './tokenizer';
|
||||
import { Human2RegexParser } from './parser';
|
||||
import { Human2RegexLexer } from "./tokenizer";
|
||||
|
||||
/*
|
||||
$(function() {
|
||||
@ -11,7 +10,9 @@ $(function() {
|
||||
});
|
||||
*/
|
||||
|
||||
const result = Human2RegexLexer.tokenize(`
|
||||
const lexer = new Human2RegexLexer();
|
||||
|
||||
const result = lexer.tokenize(`
|
||||
// H2R supports // # and /**/ as comments
|
||||
// A group is only captured if given a name.
|
||||
// You can use "and", "or", "not" to specify "[]" regex
|
||||
@ -52,6 +53,14 @@ create an optional group
|
||||
match 0+ any thing
|
||||
`);
|
||||
|
||||
//let str = "";
|
||||
|
||||
//for(const r of result.tokens) {
|
||||
// str += r.tokenType === Newline ? "\n" : r.image + " ";
|
||||
//}
|
||||
|
||||
//console.log(str);
|
||||
|
||||
for(const r of result.tokens) {
|
||||
console.log(r);
|
||||
}
|
||||
|
411
src/tokenizer.ts
411
src/tokenizer.ts
@ -1,179 +1,8 @@
|
||||
/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
|
||||
|
||||
import { createToken, Lexer, IToken, createTokenInstance, ILexingResult } from "chevrotain";
|
||||
|
||||
export const Zero = createToken({name: "Zero", pattern: /zero/i });
|
||||
export const One = createToken({name: "One", pattern: /one/i });
|
||||
export const Two = createToken({name: "Two", pattern: /two/i });
|
||||
export const Three = createToken({name: "Three", pattern: /three/i });
|
||||
export const Four = createToken({name: "Four", pattern: /four/i });
|
||||
export const Five = createToken({name: "Five", pattern: /five/i });
|
||||
export const Six = createToken({name: "Six", pattern: /six/i });
|
||||
export const Seven = createToken({name: "Seven", pattern: /seven/i });
|
||||
export const Eight = createToken({name: "Eight", pattern: /eight/i });
|
||||
export const Nine = createToken({name: "Nine", pattern: /nine/i });
|
||||
export const Ten = createToken({name: "Ten", pattern: /ten/i });
|
||||
|
||||
export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
|
||||
export const Match = createToken({name: "Match", pattern: /match(es)?/i });
|
||||
export const Then = createToken({name: "Then", pattern: /then/i });
|
||||
export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
|
||||
export const Of = createToken({name: "Of", pattern: /of/i});
|
||||
export const Or = createToken({name: "Or", pattern: /or/i});
|
||||
export const And = createToken({name: "And", pattern: /and|,/i});
|
||||
export const Word = createToken({name: "Word Specifier", pattern: /word(s)?/i});
|
||||
export const Digit = createToken({name: "Digit Specifier", pattern: /digit(s)?/i});
|
||||
export const Character = createToken({name: "Character Specifier", pattern: /character(s)?/i});
|
||||
export const Whitespace = createToken({name: "Whitespace Specifier", pattern: /(white space|whitespace)(s)?/i});
|
||||
export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
|
||||
export const As = createToken({name: "As", pattern: /as/i});
|
||||
export const If = createToken({name: "If", pattern: /if/i});
|
||||
export const Start = createToken({name: "Start", pattern: /start(s)?/i});
|
||||
export const With = createToken({name: "With", pattern: /with/i});
|
||||
export const Ends = createToken({name: "Ends", pattern: /end(s)?/i});
|
||||
export const Otherwise = createToken({name: "Otherwise", pattern: /(other wise|otherwise)/i});
|
||||
export const Else = createToken({name: "Else", pattern: /else/i});
|
||||
export const Unless = createToken({name: "Unless", pattern: /unless/i});
|
||||
export const While = createToken({name: "While", pattern: /while/i});
|
||||
export const More = createToken({name: "More", pattern: /more/i});
|
||||
export const Using = createToken({name: "Using", pattern: /using/i});
|
||||
export const Global = createToken({name: "Global", pattern: /global/i});
|
||||
export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
|
||||
export const Exact = createToken({name: "Exact", pattern: /exact/i});
|
||||
export const Matching = createToken({name: "Matching", pattern: /matching/i});
|
||||
export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
|
||||
export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
|
||||
export const Between = createToken({name: "Between", pattern: /between/i});
|
||||
export const Tab = createToken({name: "Tab", pattern: /tab/i});
|
||||
export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
|
||||
export const Group = createToken({name: "Group", pattern: /group/i});
|
||||
export const By = createToken({name: "By", pattern: /by/i});
|
||||
export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
|
||||
export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
|
||||
export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
|
||||
export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
|
||||
export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
|
||||
export const From = createToken({name: "From", pattern: /from/i});
|
||||
export const To = createToken({name: "To", pattern: /(to|\-|\.\.|\.\.\.)/i});
|
||||
export const Create = createToken({name: "Create", pattern: /create(s)?/i});
|
||||
export const Called = createToken({name: "Called", pattern: /called/i});
|
||||
export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
|
||||
export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
|
||||
export const None = createToken({name: "None", pattern: /none/i});
|
||||
export const Neither = createToken({name: "Neither", pattern: /neither/i});
|
||||
export const CarriageReturn = createToken({name: "Carriage Return", pattern: /carriage return/i});
|
||||
export const CaseInsensitive = createToken({name: "Case Insensitive", pattern: /case insensitive/i});
|
||||
export const CaseSensitive = createToken({name: "Case Sensitive", pattern: /case sensitive/i});
|
||||
export const OrMore = createToken({name: "Or More", pattern: /\+/ });
|
||||
|
||||
export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
|
||||
export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
|
||||
|
||||
export const EndOfLine = createToken({name: "EOL", pattern: /\n/, group: "nl" });
|
||||
export const WhiteSpace = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
|
||||
export const SingleLineComment = createToken({name: "Single-Line Comment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
|
||||
export const MultilineComment = createToken({name: "Multi-Line Comment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
|
||||
|
||||
export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
|
||||
export const NumberLiteral = createToken({name: "Number Literal", pattern: /-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/ });
|
||||
export const StringLiteral = createToken({name: "String Literal", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
|
||||
|
||||
enum IndentBaseType {
|
||||
Indent,
|
||||
Outdent
|
||||
}
|
||||
|
||||
export const Indent = createToken({
|
||||
name: "Indent",
|
||||
start_chars_hint: [ "\t", " " ],
|
||||
pattern: (text, offset, matchedTokens, groups) => Human2RegexLexer.matchIndentBase(text, offset, matchedTokens, groups, IndentBaseType.Indent),
|
||||
// custom token patterns should explicitly specify the line_breaks option
|
||||
line_breaks: false
|
||||
});
|
||||
|
||||
export const Outdent = createToken({
|
||||
name: "Outdent",
|
||||
start_chars_hint: [ "\t", " " ],
|
||||
pattern: (text, offset, matchedTokens, groups) => Human2RegexLexer.matchIndentBase(text, offset, matchedTokens, groups, IndentBaseType.Outdent),
|
||||
// custom token patterns should explicitly specify the line_breaks option
|
||||
line_breaks: false
|
||||
});
|
||||
|
||||
export const AllTokens = [
|
||||
Zero,
|
||||
One,
|
||||
Two,
|
||||
Three,
|
||||
Four,
|
||||
Five,
|
||||
Six,
|
||||
Seven,
|
||||
Eight,
|
||||
Nine,
|
||||
Ten,
|
||||
Optional,
|
||||
Matching,
|
||||
Match,
|
||||
Then,
|
||||
Anything,
|
||||
Of,
|
||||
Or,
|
||||
And,
|
||||
Word,
|
||||
Digit,
|
||||
Character,
|
||||
Whitespace,
|
||||
Number,
|
||||
As,
|
||||
If,
|
||||
Start,
|
||||
With,
|
||||
Ends,
|
||||
Otherwise,
|
||||
Else,
|
||||
Unless,
|
||||
While,
|
||||
More,
|
||||
Using,
|
||||
Global,
|
||||
Multiline,
|
||||
Exact,
|
||||
Nothing,
|
||||
Not,
|
||||
Between,
|
||||
Tab,
|
||||
Linefeed,
|
||||
Group,
|
||||
By,
|
||||
A,
|
||||
The,
|
||||
Exactly,
|
||||
Inclusive,
|
||||
Exclusive,
|
||||
From,
|
||||
Create,
|
||||
Called,
|
||||
Repeat,
|
||||
Newline,
|
||||
None,
|
||||
Neither,
|
||||
CarriageReturn,
|
||||
CaseInsensitive,
|
||||
CaseSensitive,
|
||||
OrMore,
|
||||
To,
|
||||
EndOfLine,
|
||||
Indent,
|
||||
Outdent,
|
||||
WhiteSpace,
|
||||
SingleLineComment,
|
||||
MultilineComment,
|
||||
Identifier,
|
||||
NumberLiteral,
|
||||
StringLiteral,
|
||||
];
|
||||
|
||||
const H2RLexer = new Lexer(AllTokens, { ensureOptimizations: true });
|
||||
import { Lexer, IToken, createTokenInstance, ILexingResult } from "chevrotain";
|
||||
import { last, findLastIndex } from "./utilities";
|
||||
import { Indent, Outdent, EndOfLine, AllTokens } from "./tokens";
|
||||
|
||||
export enum IndentType {
|
||||
Tabs,
|
||||
@ -188,118 +17,21 @@ export class Human2RegexLexerOptions {
|
||||
}
|
||||
|
||||
export class Human2RegexLexer {
|
||||
//Taken and adapted from https://github.com/SAP/chevrotain/blob/master/examples/lexer/python_indentation/python_indentation.js
|
||||
private static already_init = false;
|
||||
|
||||
// State required for matching the indentations
|
||||
private static options = new Human2RegexLexerOptions();
|
||||
private static indentStack = [ 0 ];
|
||||
private static wsRegExp: RegExp;
|
||||
private static spacesPerTab = " ";
|
||||
private lexer : Lexer;
|
||||
|
||||
private static findLastIndex<T>(array: T[], predicate: (x: T) => boolean) : number {
|
||||
for (let index = array.length; index >= 0; index--) {
|
||||
if (predicate(array[index])) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
constructor(private options: Human2RegexLexerOptions = new Human2RegexLexerOptions()) {
|
||||
if (Human2RegexLexer.already_init) {
|
||||
throw new Error("Only 1 instance of Human2RegexLexer allowed");
|
||||
}
|
||||
|
||||
/**
|
||||
* This custom Token matcher uses Lexer context ("matchedTokens" and "groups" arguments)
|
||||
* combined with state via closure ("indentStack" and "lastTextMatched") to match indentation.
|
||||
*/
|
||||
public static matchIndentBase(text: string, offset: number, matchedTokens: IToken[], groups: {[groupName: string]: IToken[]}, type: IndentBaseType) : RegExpExecArray | null {
|
||||
const noTokensMatchedYet = !matchedTokens.length;
|
||||
const newLines = groups.nl;
|
||||
const noNewLinesMatchedYet = !newLines.length;
|
||||
const isFirstLine = noTokensMatchedYet && noNewLinesMatchedYet;
|
||||
const isStartOfLine =
|
||||
// only newlines matched so far
|
||||
(noTokensMatchedYet && !noNewLinesMatchedYet) ||
|
||||
// Both newlines and other Tokens have been matched AND the offset is just after the last newline
|
||||
(!noTokensMatchedYet &&
|
||||
!noNewLinesMatchedYet &&
|
||||
offset === newLines[newLines.length-1].startOffset + 1);
|
||||
Human2RegexLexer.already_init = true;
|
||||
|
||||
// indentation can only be matched at the start of a line.
|
||||
if (isFirstLine || isStartOfLine) {
|
||||
let currIndentLevel: number = -1;
|
||||
let indent_regex: RegExp | null = null;
|
||||
|
||||
Human2RegexLexer.wsRegExp.lastIndex = offset;
|
||||
const match = Human2RegexLexer.wsRegExp.exec(text);
|
||||
|
||||
// possible non-empty indentation
|
||||
if (match !== null) {
|
||||
currIndentLevel = match[0].length;
|
||||
//if (this.options.type === IndentType.Tabs) {
|
||||
// currIndentLevel = match[0].length;
|
||||
//}
|
||||
//else {
|
||||
// currIndentLevel = match[0].replace(Human2RegexLexer.spacesPerTab, "\t").length;
|
||||
//}
|
||||
}
|
||||
// "empty" indentation means indentLevel of 0.
|
||||
else {
|
||||
currIndentLevel = 0;
|
||||
}
|
||||
|
||||
const prevIndentLevel = this.indentStack[this.indentStack.length-1];
|
||||
// deeper indentation
|
||||
if (currIndentLevel > prevIndentLevel && type === IndentBaseType.Indent) {
|
||||
this.indentStack.push(currIndentLevel);
|
||||
return match;
|
||||
}
|
||||
// shallower indentation
|
||||
else if (currIndentLevel < prevIndentLevel && type === IndentBaseType.Outdent) {
|
||||
const matchIndentIndex = this.findLastIndex(this.indentStack, (stackIndentDepth) => stackIndentDepth === currIndentLevel);
|
||||
|
||||
// any outdent must match some previous indentation level.
|
||||
if (matchIndentIndex === -1) {
|
||||
throw Error(`invalid outdent at offset: ${offset}`);
|
||||
}
|
||||
|
||||
const numberOfDedents = this.indentStack.length - matchIndentIndex - 1;
|
||||
|
||||
// This is a little tricky
|
||||
// 1. If there is no match (0 level indent) than this custom token
|
||||
// matcher would return "null" and so we need to add all the required outdents ourselves.
|
||||
// 2. If there was match (> 0 level indent) than we need to add minus one number of outsents
|
||||
// because the lexer would create one due to returning a none null result.
|
||||
const iStart = match !== null ? 1 : 0;
|
||||
for (let i = iStart; i < numberOfDedents; i++) {
|
||||
this.indentStack.pop();
|
||||
matchedTokens.push(createTokenInstance(Outdent, "", NaN, NaN, NaN, NaN, NaN, NaN));
|
||||
}
|
||||
|
||||
// even though we are adding fewer outdents directly we still need to update the indent stack fully.
|
||||
if (iStart === 1) {
|
||||
this.indentStack.pop();
|
||||
}
|
||||
return match;
|
||||
}
|
||||
else {
|
||||
// same indent, this should be lexed as simple whitespace and ignored
|
||||
return null;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// indentation cannot be matched under other circumstances
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static tokenize(text: string, options: Human2RegexLexerOptions | null = null) : ILexingResult{
|
||||
// have to reset the indent stack between processing of different text inputs
|
||||
Human2RegexLexer.indentStack = [ 0 ];
|
||||
|
||||
if (options !== null) {
|
||||
Human2RegexLexer.options = this.options;
|
||||
}
|
||||
|
||||
/*
|
||||
if (this.options.type === IndentType.Tabs) {
|
||||
Human2RegexLexer.wsRegExp = /\t/y;
|
||||
indent_regex = /\t/y;
|
||||
}
|
||||
else {
|
||||
let reg = ` {${this.options.spaces_per_tab}}`;
|
||||
@ -308,20 +40,117 @@ export class Human2RegexLexer {
|
||||
reg += "|\\t";
|
||||
}
|
||||
|
||||
Human2RegexLexer.wsRegExp = new RegExp(reg, "y");
|
||||
|
||||
Human2RegexLexer.spacesPerTab = Array(this.options.spaces_per_tab+1).join(" ");
|
||||
}*/
|
||||
Human2RegexLexer.wsRegExp = / +/y;
|
||||
|
||||
const lexResult = H2RLexer.tokenize(text);
|
||||
|
||||
//add remaining Outdents
|
||||
while (Human2RegexLexer.indentStack.length > 1) {
|
||||
lexResult.tokens.push(createTokenInstance(Outdent, "", NaN, NaN, NaN, NaN, NaN, NaN));
|
||||
Human2RegexLexer.indentStack.pop();
|
||||
indent_regex = new RegExp(reg, "y");
|
||||
}
|
||||
|
||||
Indent.PATTERN = indent_regex;
|
||||
|
||||
this.lexer = new Lexer(AllTokens, { ensureOptimizations: true });
|
||||
}
|
||||
|
||||
public tokenize(text: string) : ILexingResult {
|
||||
const lexResult = this.lexer.tokenize(text);
|
||||
|
||||
if (lexResult.tokens.length == 0) {
|
||||
return lexResult;
|
||||
}
|
||||
|
||||
// create Outdents
|
||||
|
||||
const tokens: IToken[] = [];
|
||||
|
||||
const indentStack = [ 0 ];
|
||||
|
||||
let currIndentLevel = 0;
|
||||
let startOfLine = true;
|
||||
let hadIndents = false;
|
||||
|
||||
for (let i = 0; i < lexResult.tokens.length; i++) {
|
||||
if (lexResult.tokens[i].tokenType === EndOfLine) {
|
||||
startOfLine = true;
|
||||
tokens.push(lexResult.tokens[i]);
|
||||
}
|
||||
else if (lexResult.tokens[i].tokenType === Indent) {
|
||||
hadIndents = true;
|
||||
currIndentLevel = 1;
|
||||
|
||||
const start_token = lexResult.tokens[i];
|
||||
let length = lexResult.tokens[i].image.length;
|
||||
|
||||
while (lexResult.tokens[i+1].tokenType === Indent) {
|
||||
currIndentLevel++;
|
||||
i++;
|
||||
length += lexResult.tokens[i].image.length;
|
||||
}
|
||||
|
||||
if (!startOfLine || (currIndentLevel > last(indentStack) + 1)) {
|
||||
lexResult.errors.push({
|
||||
offset: start_token.startOffset,
|
||||
line: start_token.startLine ?? NaN,
|
||||
column: start_token.startColumn ?? NaN,
|
||||
length: length,
|
||||
message: "Unexpected indentation found"
|
||||
});
|
||||
}
|
||||
else if (currIndentLevel > last(indentStack)) {
|
||||
indentStack.push(currIndentLevel);
|
||||
tokens.push(start_token);
|
||||
}
|
||||
else if (currIndentLevel < last(indentStack)) {
|
||||
const index = findLastIndex(indentStack, currIndentLevel);
|
||||
|
||||
if (index < 0) {
|
||||
lexResult.errors.push({
|
||||
offset: start_token.startOffset,
|
||||
line: start_token.startLine ?? NaN,
|
||||
column: start_token.startColumn ?? NaN,
|
||||
length: length,
|
||||
message: "Unexpected indentation found"
|
||||
});
|
||||
}
|
||||
else {
|
||||
const numberOfDedents = indentStack.length - index - 1;
|
||||
|
||||
for(let i = 0; i < numberOfDedents; i++) {
|
||||
indentStack.pop();
|
||||
tokens.push(createTokenInstance(Outdent, "", start_token.startOffset, start_token.startOffset + length, start_token.startLine ?? NaN, start_token.endLine ?? NaN, start_token.startColumn ?? NaN, (start_token.startColumn ?? NaN) + length));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// same indent level: don't care
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(startOfLine && !hadIndents) {
|
||||
const tok = lexResult.tokens[i];
|
||||
|
||||
while (indentStack.length > 1) {
|
||||
indentStack.pop();
|
||||
tokens.push(createTokenInstance(Outdent, "", tok.startOffset, tok.startOffset, tok.startLine ?? NaN, NaN, tok.startColumn ?? NaN, NaN));
|
||||
}
|
||||
}
|
||||
startOfLine = false;
|
||||
hadIndents = false;
|
||||
tokens.push(lexResult.tokens[i]);
|
||||
}
|
||||
}
|
||||
|
||||
const tok = last(tokens);
|
||||
|
||||
// Do we have an EOL marker at the end?
|
||||
if(tok.tokenType !== EndOfLine) {
|
||||
tokens.push(createTokenInstance(EndOfLine, "\n", tok.endOffset ?? NaN, tok.endOffset ?? NaN, tok.startLine ?? NaN, NaN, tok.startColumn ?? NaN, NaN));
|
||||
}
|
||||
|
||||
//add remaining Outdents
|
||||
while (indentStack.length > 1) {
|
||||
indentStack.pop();
|
||||
tokens.push(createTokenInstance(Outdent, "", tok.endOffset ?? NaN, tok.endOffset ?? NaN, tok.startLine ?? NaN, NaN, tok.startColumn ?? NaN, NaN));
|
||||
}
|
||||
|
||||
|
||||
lexResult.tokens = tokens;
|
||||
return lexResult;
|
||||
}
|
||||
}
|
154
src/tokens.ts
Normal file
154
src/tokens.ts
Normal file
@ -0,0 +1,154 @@
|
||||
import { createToken, Lexer } from "chevrotain";
|
||||
|
||||
export const Zero = createToken({name: "Zero", pattern: /zero/i });
|
||||
export const One = createToken({name: "One", pattern: /one/i });
|
||||
export const Two = createToken({name: "Two", pattern: /two/i });
|
||||
export const Three = createToken({name: "Three", pattern: /three/i });
|
||||
export const Four = createToken({name: "Four", pattern: /four/i });
|
||||
export const Five = createToken({name: "Five", pattern: /five/i });
|
||||
export const Six = createToken({name: "Six", pattern: /six/i });
|
||||
export const Seven = createToken({name: "Seven", pattern: /seven/i });
|
||||
export const Eight = createToken({name: "Eight", pattern: /eight/i });
|
||||
export const Nine = createToken({name: "Nine", pattern: /nine/i });
|
||||
export const Ten = createToken({name: "Ten", pattern: /ten/i });
|
||||
|
||||
export const Optional = createToken({name: "Optional", pattern: /optional(ly)?/i });
|
||||
export const Match = createToken({name: "Match", pattern: /match(es)?/i });
|
||||
export const Then = createToken({name: "Then", pattern: /then/i });
|
||||
export const Anything = createToken({name: "Anything", pattern: /(any thing|any|anything)(s)?/i});
|
||||
export const Of = createToken({name: "Of", pattern: /of/i});
|
||||
export const Or = createToken({name: "Or", pattern: /or/i});
|
||||
export const And = createToken({name: "And", pattern: /and|,/i});
|
||||
export const Word = createToken({name: "Word Specifier", pattern: /word(s)?/i});
|
||||
export const Digit = createToken({name: "Digit Specifier", pattern: /digit(s)?/i});
|
||||
export const Character = createToken({name: "Character Specifier", pattern: /character(s)?/i});
|
||||
export const Whitespace = createToken({name: "Whitespace Specifier", pattern: /(white space|whitespace)(s)?/i});
|
||||
export const Number = createToken({name: "NumberSpecifier", pattern: /number(s)?/i});
|
||||
export const As = createToken({name: "As", pattern: /as/i});
|
||||
export const If = createToken({name: "If", pattern: /if/i});
|
||||
export const Start = createToken({name: "Start", pattern: /start(s)?/i});
|
||||
export const With = createToken({name: "With", pattern: /with/i});
|
||||
export const Ends = createToken({name: "Ends", pattern: /end(s)?/i});
|
||||
export const Otherwise = createToken({name: "Otherwise", pattern: /(other wise|otherwise)/i});
|
||||
export const Else = createToken({name: "Else", pattern: /else/i});
|
||||
export const Unless = createToken({name: "Unless", pattern: /unless/i});
|
||||
export const While = createToken({name: "While", pattern: /while/i});
|
||||
export const More = createToken({name: "More", pattern: /more/i});
|
||||
export const Using = createToken({name: "Using", pattern: /using/i});
|
||||
export const Global = createToken({name: "Global", pattern: /global/i});
|
||||
export const Multiline = createToken({name: "Multiline", pattern: /(multi line|multiline)/i});
|
||||
export const Exact = createToken({name: "Exact", pattern: /exact/i});
|
||||
export const Matching = createToken({name: "Matching", pattern: /matching/i});
|
||||
export const Nothing = createToken({name: "Nothing", pattern: /nothing/i});
|
||||
export const Not = createToken({name: "Not", pattern: /not/i }); //, longer_alt: Nothing});
|
||||
export const Between = createToken({name: "Between", pattern: /between/i});
|
||||
export const Tab = createToken({name: "Tab", pattern: /tab/i});
|
||||
export const Linefeed = createToken({name: "Linefeed", pattern: /(line feed|linefeed)/i});
|
||||
export const Group = createToken({name: "Group", pattern: /group/i});
|
||||
export const By = createToken({name: "By", pattern: /by/i});
|
||||
export const A = createToken({name: "A", pattern: /a(n)?/i }); //, longer_alt: Anything});
|
||||
export const The = createToken({name: "The", pattern: /the/i }); //, longer_alt: Then});
|
||||
export const Exactly = createToken({name: "Exactly", pattern: /exact(ly)?/i});
|
||||
export const Inclusive = createToken({name: "Inclusive", pattern: /inclusive(ly)?/i});
|
||||
export const Exclusive = createToken({name: "Exclusive", pattern: /exclusive(ly)?/i});
|
||||
export const From = createToken({name: "From", pattern: /from/i});
|
||||
export const To = createToken({name: "To", pattern: /(to|\-|\.\.|\.\.\.)/i});
|
||||
export const Create = createToken({name: "Create", pattern: /create(s)?/i});
|
||||
export const Called = createToken({name: "Called", pattern: /called/i});
|
||||
export const Repeat = createToken({name: "Repeat", pattern: /repeat(s|ing)?/i});
|
||||
export const Newline = createToken({name: "Newline", pattern: /(new line|newline)/i});
|
||||
export const None = createToken({name: "None", pattern: /none/i});
|
||||
export const Neither = createToken({name: "Neither", pattern: /neither/i});
|
||||
export const CarriageReturn = createToken({name: "Carriage Return", pattern: /carriage return/i});
|
||||
export const CaseInsensitive = createToken({name: "Case Insensitive", pattern: /case insensitive/i});
|
||||
export const CaseSensitive = createToken({name: "Case Sensitive", pattern: /case sensitive/i});
|
||||
export const OrMore = createToken({name: "Or More", pattern: /\+/ });
|
||||
|
||||
export const LBracket = createToken({name: "Left Bracket", pattern: /\(/ });
|
||||
export const RBracket = createToken({name: "Right Bracket", pattern: /\)/ });
|
||||
|
||||
export const EndOfLine = createToken({name: "EOL", pattern: /\n/ });
|
||||
export const WhiteSpace = createToken({name: "Whitespace", pattern: /\s+/, group: Lexer.SKIPPED });
|
||||
export const SingleLineComment = createToken({name: "Single-Line Comment", pattern: /(#|\/\/).*/, group: Lexer.SKIPPED });
|
||||
export const MultilineComment = createToken({name: "Multi-Line Comment", pattern: /\/\*(.*)\*\//, line_breaks: true, group: Lexer.SKIPPED });
|
||||
|
||||
export const Identifier = createToken({name: "Identifier", pattern: /[a-z]\w*/i });
|
||||
export const NumberLiteral = createToken({name: "Number Literal", pattern: /-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/ });
|
||||
export const StringLiteral = createToken({name: "String Literal", pattern: /"(?:[^\\"]|\\(?:[bfnrtv"\\/]|u[0-9a-f]{4}|U[0-9a-f]{8}))*"/i });
|
||||
|
||||
export const Indent = createToken({name: "Indent"});
|
||||
|
||||
export const Outdent = createToken({name: "Outdent"});
|
||||
|
||||
export const AllTokens = [
|
||||
Zero,
|
||||
One,
|
||||
Two,
|
||||
Three,
|
||||
Four,
|
||||
Five,
|
||||
Six,
|
||||
Seven,
|
||||
Eight,
|
||||
Nine,
|
||||
Ten,
|
||||
Optional,
|
||||
Matching,
|
||||
Match,
|
||||
Then,
|
||||
Anything,
|
||||
Of,
|
||||
Or,
|
||||
And,
|
||||
Word,
|
||||
Digit,
|
||||
Character,
|
||||
Whitespace,
|
||||
Number,
|
||||
As,
|
||||
If,
|
||||
Start,
|
||||
With,
|
||||
Ends,
|
||||
Otherwise,
|
||||
Else,
|
||||
Unless,
|
||||
While,
|
||||
More,
|
||||
Using,
|
||||
Global,
|
||||
Multiline,
|
||||
Exact,
|
||||
Nothing,
|
||||
Not,
|
||||
Between,
|
||||
Tab,
|
||||
Linefeed,
|
||||
Group,
|
||||
By,
|
||||
A,
|
||||
The,
|
||||
Exactly,
|
||||
Inclusive,
|
||||
Exclusive,
|
||||
From,
|
||||
Create,
|
||||
Called,
|
||||
Repeat,
|
||||
Newline,
|
||||
None,
|
||||
Neither,
|
||||
CarriageReturn,
|
||||
CaseInsensitive,
|
||||
CaseSensitive,
|
||||
OrMore,
|
||||
To,
|
||||
EndOfLine,
|
||||
Indent,
|
||||
WhiteSpace,
|
||||
SingleLineComment,
|
||||
MultilineComment,
|
||||
Identifier,
|
||||
NumberLiteral,
|
||||
StringLiteral,
|
||||
];
|
21
src/utilities.ts
Normal file
21
src/utilities.ts
Normal file
@ -0,0 +1,21 @@
|
||||
export function last<T>(array: T[]) : T {
|
||||
return array[array.length-1];
|
||||
}
|
||||
|
||||
export function findLastIndex<T>(array: T[], value: T) : number {
|
||||
for (let index = array.length-1; index >= 0; index--) {
|
||||
if (array[index] === value) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
export function findLastIndexPredicate<T>(array: T[], predicate: (x: T) => boolean) : number {
|
||||
for (let index = array.length-1; index >= 0; index--) {
|
||||
if (predicate(array[index])) {
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user