Refactored code and made tokenizer

TODO: webpack config?
2025-05-16 12:30:09 -07:00 · 2020-10-10 04:09:13 -04:00 · 2020-10-10 04:09:13 -04:00 · 40ca670a2a
commit 40ca670a2a
parent 0a4f65b1a8
6 changed files with 438 additions and 317 deletions
--- a/src/parser.ts
+++ b/src/parser.ts
@ -0,0 +1,11 @@
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
+import { Token, TokenType } from "./tokens";
+
+export class ParserOptions {
+
+}
+
+export function parse(tokens: Token[]) {
+    return undefined;
+}
--- a/src/script.js
+++ b/src/script.js
@ -1,152 +0,0 @@
-/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
-"use strict";
-const keywords = [
-    "optional", "optionally", "match", "then", "any", "of", "or", "word", "digit", "unicode", "character",
-    "multiple", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "anything",
-    "whitespace", "as", "number", "if", "starts", "with", "ends", "otherwise", "else", "unless", "while", "more",
-    "using", "global", "and", "multiline", "exact", "matching", "not", "between", "tab", "linefeed", "carriage", "return",
-    "group", "by", "exactly", "inclusive", "inclusively", "exclusive", "exclusively", "including", "from", "to"
-];
-var TokenType;
-(function (TokenType) {
-    TokenType[TokenType["END_OF_STATEMENT"] = 0] = "END_OF_STATEMENT";
-    TokenType[TokenType["INDENT"] = 1] = "INDENT";
-    TokenType[TokenType["BETWEEN"] = 2] = "BETWEEN";
-    TokenType[TokenType["QUOTE"] = 3] = "QUOTE";
-    TokenType[TokenType["KEYWORD_BETWEEN"] = 4] = "KEYWORD_BETWEEN";
-    TokenType[TokenType["KEYWORD_OPTIONAL"] = 5] = "KEYWORD_OPTIONAL";
-    TokenType[TokenType["KEYWORD_MATCH"] = 6] = "KEYWORD_MATCH";
-    TokenType[TokenType["KEYWORD_THEN"] = 7] = "KEYWORD_THEN";
-    TokenType[TokenType["KEYWORD_AND"] = 8] = "KEYWORD_AND";
-    TokenType[TokenType["KEYWORD_OR"] = 9] = "KEYWORD_OR";
-    TokenType[TokenType["KEYWORD_ANY"] = 10] = "KEYWORD_ANY";
-    TokenType[TokenType["KEYWORD_OF"] = 11] = "KEYWORD_OF";
-})(TokenType || (TokenType = {}));
-class Token {
-    constructor(type, token_string) {
-        this.type = type;
-        this.token_string = token_string;
-    }
-}
-class TokenizerOptions {
-    constructor() {
-        this.convert_spaces_to_tabs = false;
-    }
-}
-/* Basic Tokenizer: To be replaced with a unicode variant later */
-function tokenize(input, options) {
-    let tokens = [];
-    let errors = [];
-    for (let i = 0; i < input.length; i++) {
-        // 4 spaces = 1 tab. That is final. Debate over
-        if (options.convert_spaces_to_tabs && input.startsWith("    ", i)) {
-            tokens.push(new Token(TokenType.INDENT));
-            i += 3;
-        }
-        // between (ex: 0...3 or 0-3)
-        else if (input.startsWith("...", i)) {
-            tokens.push(new Token(TokenType.BETWEEN));
-            i += 2;
-        }
-        else if (input.startsWith("..", i)) {
-            tokens.push(new Token(TokenType.BETWEEN));
-            i += 1;
-        }
-        // comments
-        else if (input.startsWith("//", i)) {
-            i += 1;
-            while (i < input.length) {
-                if (input[i] == '\n') {
-                    tokens.push(new Token(TokenType.END_OF_STATEMENT));
-                    break;
-                }
-                i++;
-            }
-        }
-        else if (input.startsWith("\r\n", i)) {
-            tokens.push(new Token(TokenType.END_OF_STATEMENT));
-            i += 1;
-        }
-        else {
-            switch (input[i]) {
-                // comment
-                case '#':
-                    i++;
-                    while (i < input.length) {
-                        if (input[i] == '\n') {
-                            tokens.push(new Token(TokenType.END_OF_STATEMENT));
-                            break;
-                        }
-                        i++;
-                    }
-                    break;
-                // quote
-                case '"':
-                case '\"':
-                    // build up a word between quotes
-                    const quote_char = input[i];
-                    let found_ending = false;
-                    let quote = "";
-                    do {
-                        i++;
-                        if (input[i] == quote_char) {
-                            found_ending = true;
-                            break;
-                        }
-                        else if (input[i] == '\n') {
-                        }
-                    } while (i < input.length);
-                    if (found_ending) {
-                        tokens.push(new Token(TokenType.QUOTE, quote));
-                    }
-                    else {
-                        // Skip until newline and throw an error
-                    }
-                    break;
-                // between (ex: 0...3 or 0-3)
-                case '-':
-                    tokens.push(new Token(TokenType.BETWEEN));
-                    break;
-                case '\n':
-                    tokens.push(new Token(TokenType.END_OF_STATEMENT));
-                    break;
-                case '\r':
-                    // ignore
-                    break;
-                case '\t':
-                    tokens.push(new Token(TokenType.INDENT));
-                    break;
-                case ' ':
-                    break;
-                default:
-                    // is digit? build up a number
-                    // is char? build up a word
-                    keywords.includes("word");
-                    // build up a word
-                    break;
-            }
-        }
-    }
-    return { tokens: tokens, errors: errors };
-}
-/*
-String.prototype.escape = function() {
-    var tagsToReplace = {
-        '&': '&amp;',
-        '<': '&lt;',
-        '>': '&gt;'
-    };
-    return this.replace(/[&<>]/g, function(tag) {
-        return tagsToReplace[tag] || tag;
-    });
-};
-String.prototype.norm = function() {
-    if(String.prototype.normalize != undefined) {
-        return this.normalize("NFD").replace(/[\u0300-\u036F]/g,"");
-    }
-    return this;
-};
-
-*/
-$(function () {
-});
--- a/src/script.ts
+++ b/src/script.ts
@ -1,167 +1,9 @@
-/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
-
 "use strict";

-const keywords = [
-    "optional", "optionally", "match", "then", "any", "of", "or", "word", "digit", "unicode", "character", 
-    "multiple", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "anything",
-    "whitespace", "as", "number", "if", "starts", "with", "ends", "otherwise", "else", "unless", "while", "more",
-    "using", "global", "and", "multiline", "exact", "matching", "not", "between", "tab", "linefeed", "carriage", "return",
-    "group", "by", "exactly", "inclusive", "inclusively", "exclusive", "exclusively", "including", "from", "to"
-];
-
-enum TokenType {
-    END_OF_STATEMENT,
-    INDENT,
-    BETWEEN,
-    QUOTE,
-    KEYWORD_BETWEEN,
-    KEYWORD_OPTIONAL,
-    KEYWORD_MATCH,
-    KEYWORD_THEN,
-    KEYWORD_AND,
-    KEYWORD_OR,
-    KEYWORD_ANY,
-    KEYWORD_OF,
-}
-
-class Token {
-    constructor(public type: TokenType, public token_string?: string) {
-        
-    }
-}
-
-class TokenizerOptions {
-    public convert_spaces_to_tabs: boolean = false;
-
-}
-
-/* Basic Tokenizer: To be replaced with a unicode variant later */
-
-function tokenize(input: string, options: TokenizerOptions) : { tokens: Token[], errors: Error[] } {
-    let tokens : Token[] = [];
-    let errors : Error[] = [];
-
-    for(let i = 0; i < input.length; i++) {
-
-        // 4 spaces = 1 tab. That is final. Debate over
-        if(options.convert_spaces_to_tabs && input.startsWith("    ", i)) {
-            tokens.push(new Token(TokenType.INDENT));
-            i += 3;
-        } 
-        // between (ex: 0...3 or 0-3)
-        else if(input.startsWith("...", i)) {
-            tokens.push(new Token(TokenType.BETWEEN));
-            i += 2;
-        } else if(input.startsWith("..", i)) {
-            tokens.push(new Token(TokenType.BETWEEN));
-            i += 1; 
-        } 
-        // comments
-        else if(input.startsWith("//", i)) {
-            i += 1;
-            while(i < input.length) {
-                if(input[i] == '\n') {
-                    tokens.push(new Token(TokenType.END_OF_STATEMENT));
-                    break;
-                }
-                i++;
-            }
-        } else if (input.startsWith("\r\n", i)) {
-            tokens.push(new Token(TokenType.END_OF_STATEMENT));
-            i += 1;
-        } else {
-            switch(input[i]) {
-                // comment
-                case '#':
-                    i++;
-                    while(i < input.length) {
-                        if(input[i] == '\n') {
-                            tokens.push(new Token(TokenType.END_OF_STATEMENT));
-                            break;
-                        }
-                        i++;
-                    }
-                    break;
-                // quote
-                case '"':
-                case '\"':
-                    // build up a word between quotes
-                    const quote_char = input[i];
-                    let found_ending = false;
-
-                    let quote = "";
-
-                    do {
-                        i++;
-                        if(input[i] == quote_char) {
-                            found_ending = true;
-                            break;
-                        }
-                        else if(input[i] == '\n') {
-
-                        }
-                    } while(i < input.length);
-
-                    if(found_ending) {
-                        tokens.push(new Token(TokenType.QUOTE, quote));
-                    }
-                    else {
-                        // Skip until newline and throw an error
-                    }
-
-                    break;
-
-                // between (ex: 0...3 or 0-3)
-                case '-':
-                    tokens.push(new Token(TokenType.BETWEEN));
-                    break;
-                case '\n':
-                    tokens.push(new Token(TokenType.END_OF_STATEMENT));
-                    break;
-                case '\r':
-                    // ignore
-                    break;
-                case '\t':
-                    tokens.push(new Token(TokenType.INDENT));
-                    break;
-                case ' ':
-                    break;
-                default:
-                    // is digit? build up a number
-
-                    // is char? build up a word
-
-                    keywords.includes("word");
-                    // build up a word
-                    break;
-            }
-        }
-    }
-
-    return { tokens: tokens, errors: errors };
-}
-
-/*
-String.prototype.escape = function() {
-    var tagsToReplace = {
-        '&': '&amp;',
-        '<': '&lt;',
-        '>': '&gt;'
-    };
-    return this.replace(/[&<>]/g, function(tag) {
-        return tagsToReplace[tag] || tag;
-    });
-};
-String.prototype.norm = function() {
-	if(String.prototype.normalize != undefined) {
-		return this.normalize("NFD").replace(/[\u0300-\u036F]/g,"");
-	}
-	return this;
-};
-
-*/
-
-$( function() {
+import { Token, TokenType } from "./tokens";
+import { TokenizerOptions, tokenize } from "./tokenizer";
+import { ParserOptions, parse } from "./parser";

+$(function() {
+    
 });
--- a/src/style.css
+++ b/src/style.css
@ -263,8 +263,8 @@ footer {
 }

 /* accessibility */
-a {
-	color: #00497A;
+a:hover {
+    color: #208bff;
 }

 .navbar-light .navbar-nav .nav-link {
--- a/src/tokenizer.ts
+++ b/src/tokenizer.ts
@ -0,0 +1,356 @@
+/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */
+
+// TODO: replace every version of switch(<some string>) with switch(<some string>.charCodeAt(0))
+
+import { Token, TokenType, TokenError } from "./tokens";
+
+const keywords = {
+    "optional": TokenType.KEYWORD_OPTIONAL,
+    "optionally": TokenType.KEYWORD_OPTIONAL,
+    "match": TokenType.KEYWORD_MATCH,
+    "then": TokenType.KEYWORD_THEN,
+    "any": TokenType.KEYWORD_ANY, 
+    "anything": TokenType.KEYWORD_ANY,
+    "of": TokenType.KEYWORD_OF,
+    "or": TokenType.KEYWORD_OR,
+    "and": TokenType.KEYWORD_AND,
+    "word": TokenType.KEYWODE_WORD_SPECIFIER,
+    "digit": TokenType.KEYWORD_DIGIT_SPECIFIER,
+    "character": TokenType.KEYWORD_CHAR_SPECIFIER, 
+    "whitespace": TokenType.KEYWORD_WHITESPACE_SPECIFIER,
+    "number": TokenType.KEYWORD_NUMBER_SPECIFIER, 
+    "multiple": TokenType.KEYWORD_MULTIPLE, 
+    "as": TokenType.KEYWORD_AS,
+    "if": TokenType.KEYWORD_IF,
+    "starts": TokenType.KEYWORD_STARTS,
+    "with": TokenType.KEYWORD_WITH,
+    "ends": TokenType.KEYWORD_ENDS,
+    "otherwise": TokenType.KEYWORD_ELSE,
+    "else": TokenType.KEYWORD_ELSE,
+    "unless": TokenType.KEYWORD_UNLESS,
+    "while": TokenType.KEYWORD_WHILE,
+    "more": TokenType.KEYWORD_MORE,
+    "using": TokenType.KEYWORD_USING,
+    "global": TokenType.KEYWORD_GLOBAL,
+    "multiline": TokenType.KEYWORD_MULTILINE,
+    "exact": TokenType.KEYWORD_EXACT,
+    "matching": TokenType.KEYWORD_MATCHING, 
+    "not": TokenType.KEYWORD_NOT,
+    "between": TokenType.KEYWORD_BETWEEN, 
+    "tab": TokenType.KEYWORD_TAB,
+    "linefeed": TokenType.KEYWORD_LINEFEED,
+    "carriage": TokenType.KEYWORD_CARRIAGE,
+    "return": TokenType.KEYWORD_RETURN,
+    "group": TokenType.KEYWORD_GROUP,
+    "by": TokenType.KEYWORD_BY,
+    "an": TokenType.KEYWORD_ARTICLE,
+    "a": TokenType.KEYWORD_ARTICLE,
+    "the": TokenType.KEYWORD_ARTICLE,
+    "exactly": TokenType.KEYWORD_EXACTLY,
+    "inclusive": TokenType.KEYWORD_INCLUSIVE,
+    "inclusively": TokenType.KEYWORD_INCLUSIVE,
+    "exclusive": TokenType.KEYWORD_EXCLUSIVE,
+    "exclusively": TokenType.KEYWORD_EXCLUSIVE,
+    "from": TokenType.KEYWORD_FROM, 
+    "to": TokenType.KEYWORD_TO
+};
+
+const escape_sequences = {
+    'a': '\a',
+    'b': '\b',
+    'e': '\e',
+    'f': '\f',
+    'n': '\n',
+    'r': '\r',
+    't': '\t',
+    '"': '"',
+    '\'': '\'',
+    '\\': '\\',
+};
+
+export class TokenizerOptions {
+    public convert_spaces_to_tabs: boolean = false;
+}
+
+const escape_sequence_hex_regex = new RegExp(/[0-9A-Fa-f]/g);
+
+function escape_sequence_gather_hex(input: string, i : number, max: number) : string {
+    let hex = "";
+    for(i++; i < input.length && max-- > 0; i++) {
+        if(escape_sequence_hex_regex.test(input[i])) hex += input[i];
+    }
+    return hex;
+}
+
+function escape_sequence_mapper(input: string, i : number) : { code: string, read: number, error?: Error } {
+    if(escape_sequences[input[i]] != undefined) {
+        return { code: escape_sequences[input[i]], read: 1 };
+    }
+    //variable hex code
+    else if(input[i] == 'x') {
+        const hex = escape_sequence_gather_hex(input, ++i, 4);
+
+        return { code:  String.fromCharCode(parseInt(hex, 16)), read: hex.length + 1 };
+    }
+    //4 hex unicode
+    else if(input[i] == 'u') {
+        const unicode = escape_sequence_gather_hex(input, ++i, 4);
+        if(unicode.length != 4) {
+            return { code: "", read: unicode.length + 1, error: new Error("Bad escape sequence")};
+        }
+        else {
+            return { code: String.fromCharCode(parseInt(unicode, 16)), read: 5 };
+        }
+    }
+    else if(input[i] == 'U') {
+        const unicode = escape_sequence_gather_hex(input, ++i, 8);
+
+        if(unicode.length != 8) {
+            return { code: "", read: unicode.length + 1, error: new Error("Bad escape sequence")};
+        }
+        else {
+            return { code: String.fromCharCode(parseInt(unicode, 16)), read: 9 };
+        }
+    }
+    else {
+        // should throw an exception, but gonna just ignore it
+        return { code:  input[i], read: 1 };
+    }
+}
+
+function is_digit(input: string) : boolean {
+    //return /[0-9]/g.test(input);
+    const value = input.charCodeAt(0);
+    return value >= 48 && value <= 57;
+}
+
+function is_char(input: string) : boolean {
+    //return input.toUpperCase() != input.toLowerCase();
+    //return /[a-zA-Z]/g.test(input);
+
+    const value = input.charCodeAt(0);
+    return ((value >= 65 && value <= 90) || (value >= 97 && value <= 122));
+}
+
+/* Basic Tokenizer */
+export function tokenize(input: string, options: TokenizerOptions) : { tokens: Token[], errors: TokenError[] } {
+    let line = 1;
+    let position = 1;
+    
+    let tokens : Token[] = [];
+    let errors : TokenError[] = [];
+
+    for(let i = 0; i < input.length; i++, position++) {
+        // 4 spaces = 1 tab. That is final. Debate over
+        if(options.convert_spaces_to_tabs && input.startsWith("    ", i)) {
+            tokens.push(new Token(TokenType.INDENT, line, position));
+            i += 3;
+            position += 3;
+        } 
+        // between (ex: 0...3 or 0-3)
+        else if(input.startsWith("...", i)) {
+            tokens.push(new Token(TokenType.BETWEEN, line, position));
+            i += 2;
+            position += 2;
+        } 
+        else if(input.startsWith("..", i)) {
+            tokens.push(new Token(TokenType.BETWEEN, line, position));
+            i++;
+            position++;
+        } 
+        // comments
+        else if(input.startsWith("//", i)) {
+            for(i++, position++; i < input.length; i++, position++) {
+                if(input[i] == '\n') {
+                    tokens.push(new Token(TokenType.END_OF_STATEMENT, line, position));
+                    break;
+                }
+            }
+            line++;
+            position = 0;
+        } 
+        else if(input.startsWith("/*", i)) {
+            for(i++, position++; i < input.length-1; i++, position++) {
+                if(input[i] == '*' && input[i+1] == '/') {
+                    tokens.push(new Token(TokenType.END_OF_STATEMENT, line, position));
+                    i++;
+                    position++;
+                    break;
+                }
+                if(input[i] == '\n') {
+                    line++;
+                    position = 0;
+                }
+            }
+            if(i == input.length-1) {
+                errors.push(new TokenError("Unexpected EOF", line, position));
+            }
+            else {
+                line++;
+                position = 0;
+            }
+        }
+        else if (input.startsWith("\r\n", i)) {
+            tokens.push(new Token(TokenType.END_OF_STATEMENT, line, position));
+            i++;
+            line++;
+            position = 0;
+        } 
+        else {
+            switch(input[i]) {
+                // comment
+                case '#':
+                    for(i++, position++; i < input.length; i++, position++) {
+                        if(input[i] == '\n') {
+                            tokens.push(new Token(TokenType.END_OF_STATEMENT, line, position));
+                            line++;
+                            position = 0;
+                            break;
+                        }
+                    }
+                    break;
+                // quote
+                case '"':
+                case '\"':
+                    // build up a word between quotes
+                    const quote_begin = { line: line, position: position };
+                    const quote_char = input[i];
+                    let found_ending = false;
+
+                    let quote = "";
+
+                    do {
+                        i++;
+                        position++;
+                        if(input[i] == '\\') {
+                            i++;
+                            position++;
+                            const sequence = escape_sequence_mapper(input, i);
+
+                            if(sequence.error != undefined) {
+                                errors.push(new TokenError(sequence.error.message, line, position));
+                            }
+
+                            position += sequence.read;
+                            i += sequence.read;
+                            quote += sequence.code;
+
+                        }
+                        else if(input[i] == quote_char) {
+                            found_ending = true;
+                            break;
+                        }
+                        else if(input[i] == '\n') {
+                            line++;
+                            position = 0;
+                            break;
+                        }
+                        else {
+                            quote += input[i];
+                        }
+                    } while(i < input.length);
+
+                    if(found_ending) {
+                        tokens.push(new Token(TokenType.QUOTE, line, position, quote));
+                    }
+                    else {
+                        //we reached the end of the line or the end of the file
+                        errors.push(new TokenError(`Unexpected end of quote. Quote began at ${quote_begin.line}:${quote_begin.position}`, line, position));
+                        line++;
+                        position = 0;
+                    }
+                    break;
+
+                // between (ex: 0...3 or 0-3)
+                case '-':
+                    tokens.push(new Token(TokenType.BETWEEN, line, position));
+                    break;
+                case '\n':
+                    tokens.push(new Token(TokenType.END_OF_STATEMENT, line, position));
+                    break;
+                case '\r':
+                    // ignore
+                    break;
+                case '\t':
+                    tokens.push(new Token(TokenType.INDENT, line, position));
+                    break;
+                case ' ':
+                    break;
+                default:
+                    // is digit? build up a number
+                    if(is_digit(input[i])) {
+                        let digits = input[i];
+                        
+                        do {
+                            i++; position++;
+                            digits += input[i];
+                        } while(i+1 < input.length && is_digit(input[i+1]));
+
+                        tokens.push(new Token(TokenType.NUMBER, line, position, digits));
+                    }
+                    // is char? build up a word
+                    else if(is_char(input[i])) {
+                        let text = input[i];
+
+                        do {
+                            i++; position++;
+                            text += input[i];
+                        } while(i+1 < input.length && is_char(input[i+1]));
+
+                        const keyword_text = text.toLowerCase();
+
+                        if(keywords[keyword_text] != undefined) {
+                            tokens.push(new Token(keywords[keyword_text], line, position));
+                        }
+                        else {
+                            switch(keyword_text) {
+                                case "none":
+                                case "zero":
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "0")); 
+                                    break;
+                                case "one": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "1")); 
+                                    break;
+                                case "two": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "2")); 
+                                    break;
+                                case "three": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "3")); 
+                                    break;
+                                case "four": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "4")); 
+                                    break;
+                                case "five": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "5")); 
+                                    break;
+                                case "six": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "6")); 
+                                    break;
+                                case "seven": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "7")); 
+                                    break;
+                                case "eight": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "8")); 
+                                    break;
+                                case "nine": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "9")); 
+                                    break;
+                                case "ten": 
+                                    tokens.push(new Token(TokenType.NUMBER, line, position, "10")); 
+                                    break;
+                                default:
+                                    errors.push(new TokenError(`Unknown keyword ${text}`, line, position));
+                                    break;
+                            }
+                        }
+                    }
+                    else {
+                        errors.push(new TokenError(`Unknown character in text: ${input.charCodeAt(i)}`, line, position));
+                    }
+                    break;
+            }
+        }
+    }
+
+    return { tokens: tokens, errors: errors };
+}
--- a/src/tokens.ts
+++ b/src/tokens.ts
@ -0,0 +1,64 @@
+export enum TokenType {
+    END_OF_STATEMENT,
+    INDENT,
+    BETWEEN,
+    QUOTE,
+    NUMBER,
+    KEYWORD_BETWEEN,
+    KEYWORD_OPTIONAL,
+    KEYWORD_MATCH,
+    KEYWORD_THEN,
+    KEYWORD_AND,
+    KEYWORD_OR,
+    KEYWORD_ANY,
+    KEYWORD_OF,
+    KEYWODE_WORD_SPECIFIER,
+    KEYWORD_DIGIT_SPECIFIER,
+    KEYWORD_CHAR_SPECIFIER,
+    KEYWORD_WHITESPACE_SPECIFIER,
+    KEYWORD_NUMBER_SPECIFIER,
+    KEYWORD_MULTIPLE,
+    KEYWORD_AS,
+    KEYWORD_IF,
+    KEYWORD_STARTS,
+    KEYWORD_WITH,
+    KEYWORD_ENDS,
+    KEYWORD_ELSE,
+    KEYWORD_UNLESS,
+    KEYWORD_WHILE,
+    KEYWORD_MORE,
+    KEYWORD_USING,
+    KEYWORD_GLOBAL,
+    KEYWORD_MULTILINE,
+    KEYWORD_EXACT,
+    KEYWORD_MATCHING,
+    KEYWORD_NOT,
+    KEYWORD_TAB,
+    KEYWORD_LINEFEED,
+    KEYWORD_CARRIAGE,
+    KEYWORD_RETURN,
+    KEYWORD_GROUP,
+    KEYWORD_BY,
+    KEYWORD_ARTICLE,
+    KEYWORD_EXACTLY,
+    KEYWORD_INCLUSIVE,
+    KEYWORD_EXCLUSIVE,
+    KEYWORD_FROM,
+    KEYWORD_TO
+}
+
+export class TokenError extends Error {
+    constructor(message: string, public line: number, public position: number) {
+        super(message);
+    }
+
+    public to_string() {
+        return `${this.line}:${this.position} ${this.message}`;
+    }
+}
+
+export class Token {
+    constructor(public type: TokenType, public line: number, public position: number, public token_string?: string) {
+        
+    }
+}