diff --git a/.eslintrc.json b/.eslintrc.json index 15c9b88..f9b97fe 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -17,6 +17,11 @@ ], "rules": { "@typescript-eslint/no-inferrable-types": "off", + "@typescript-eslint/explicit-function-return-type": "error", + "no-magic-numbers": [ + "warn", + { "ignoreArrayIndexes": true, "ignore": [0,1,2,3,4,5,6,7,8,9]} + ], "curly": "warn", "no-loss-of-precision": "error", "default-case-last": "warn", diff --git a/.gitignore b/.gitignore index 97966cd..b456dc0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ # Node build artifacts node_modules/ -npm-debug.log -src/*.js \ No newline at end of file +npm-debug.log \ No newline at end of file diff --git a/src/script.ts b/src/script.ts index 69b3716..961d6b7 100644 --- a/src/script.ts +++ b/src/script.ts @@ -12,6 +12,10 @@ $(function() { */ const opts = new TokenizerOptions(); -const res = tokenize("match 1+ thing from thingy", opts); +const result = tokenize("match /* 9+ */ 1+ optionally 1..3 0-zero then //comment match", opts); -console.log(res); \ No newline at end of file +for(const r of result.tokens) { + console.log(r.to_string()); +} + +console.log(result.errors); \ No newline at end of file diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 515712c..4891a10 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -69,7 +69,7 @@ const escape_sequences = { }; export class TokenizerOptions { - public convert_spaces_to_tabs: boolean = false; + public convert_spaces_to_tabs: boolean = true; } const escape_sequence_hex_regex = new RegExp(/[0-9A-Fa-f]/g); @@ -120,18 +120,21 @@ function escape_sequence_mapper(input: string, i : number) : { code: string, rea } } -function is_digit(input: string) : boolean { +const test_chars = "09azAZ"; + +function is_digit(input: string, i: number) : boolean { //return /[0-9]/g.test(input); - const value = input.charCodeAt(0); - return value >= 48 && value <= 57; + const value = input.charCodeAt(i); + return value >= test_chars.charCodeAt(0) && value <= test_chars.charCodeAt(1); } -function is_char(input: string) : boolean { +function is_char(input: string, i: number) : boolean { //return input.toUpperCase() != input.toLowerCase(); //return /[a-zA-Z]/g.test(input); - const value = input.charCodeAt(0); - return ((value >= 65 && value <= 90) || (value >= 97 && value <= 122)); + const value = input.charCodeAt(i); + return ((value >= test_chars.charCodeAt(2) && value <= test_chars.charCodeAt(3)) || + (value >= test_chars.charCodeAt(4) && value <= test_chars.charCodeAt(5))); } /* Basic Tokenizer */ @@ -174,7 +177,6 @@ export function tokenize(input: string, options: TokenizerOptions) : { tokens: T else if(input.startsWith("/*", i)) { for(i++, position++; i < input.length-1; i++, position++) { if(input[i] === "*" && input[i+1] === "/") { - tokens.push(new Token(TokenType.END_OF_STATEMENT, line, position)); i++; position++; break; @@ -268,6 +270,10 @@ export function tokenize(input: string, options: TokenizerOptions) : { tokens: T case "-": tokens.push(new Token(TokenType.BETWEEN, line, position)); break; + case "+": + tokens.push(new Token(TokenType.KEYWORD_OR, line, position)); + tokens.push(new Token(TokenType.KEYWORD_MORE, line, position)); + break; case "\n": tokens.push(new Token(TokenType.END_OF_STATEMENT, line, position)); break; @@ -281,24 +287,22 @@ export function tokenize(input: string, options: TokenizerOptions) : { tokens: T break; default: // is digit? build up a number - if(is_digit(input[i])) { + if(is_digit(input, i)) { let digits = input[i]; - do { - i++; position++; - digits += input[i]; - } while(i+1 < input.length && is_digit(input[i+1])); + for(; i+1 < input.length && is_digit(input, i+1); i++, position++) { + digits += input[i+1]; + } tokens.push(new Token(TokenType.NUMBER, line, position, digits)); } // is char? build up a word - else if(is_char(input[i])) { + else if(is_char(input, i)) { let text = input[i]; - do { - i++; position++; - text += input[i]; - } while(i+1 < input.length && is_char(input[i+1])); + for(; i+1 < input.length && is_char(input, i+1); i++, position++) { + text += input[i+1]; + } const keyword_text = text.toLowerCase(); @@ -348,7 +352,7 @@ export function tokenize(input: string, options: TokenizerOptions) : { tokens: T } } else { - errors.push(new TokenError(`Unknown character in text: ${input.charCodeAt(i)}`, line, position)); + errors.push(new TokenError(`Unknown character in text: "${input[i]}" (${input.charCodeAt(i)})`, line, position)); } break; } diff --git a/src/tokens.ts b/src/tokens.ts index b68c111..c1d633c 100644 --- a/src/tokens.ts +++ b/src/tokens.ts @@ -52,13 +52,23 @@ export class TokenError extends Error { super(message); } - public to_string() { + public to_string(): string { return `${this.line}:${this.position} ${this.message}`; } } export class Token { - constructor(public type: TokenType, public line: number, public position: number, public token_string?: string) { - + constructor(public type: TokenType, public line: number, public position: number, public token_string?: string) { + /* nothing required */ + } + + public to_string(): string { + let str = `${this.line}:${this.position} ${TokenType[this.type]}`; + + if (this.token_string) { + str += ` "${this.token_string}"`; + } + + return str; } } \ No newline at end of file