From 7d7d6337e1ab84872bbe034e2c3968cd7e680aec Mon Sep 17 00:00:00 2001 From: Patrick Demian Date: Thu, 5 Nov 2020 01:03:31 -0500 Subject: [PATCH] Added tests and fixed bugs thanks to tests --- src/generator.ts | 14 ++++-- src/lexer.ts | 4 +- src/parser.ts | 92 +++++++++++++++++----------------- src/utilities.ts | 2 +- tests/generator.spec.ts | 91 +++++++++++++++++++++++++++++++++ tests/lexer.spec.ts | 73 +++++++++++++++++++++++++-- tests/parser.spec.ts | 71 ++++++++++++++++++++++++++ tests/utilities.spec.ts | 108 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 397 insertions(+), 58 deletions(-) create mode 100644 tests/generator.spec.ts create mode 100644 tests/parser.spec.ts create mode 100644 tests/utilities.spec.ts diff --git a/src/generator.ts b/src/generator.ts index f271105..b9c47c2 100644 --- a/src/generator.ts +++ b/src/generator.ts @@ -68,15 +68,13 @@ const unicode_script_codes = [ * @internal */ export abstract class H2RCST { - public tokens: IToken[]; - /** * Constructor for H2RCST * * @param tokens Tokens used to calculate where an error occured * @internal */ - constructor(tokens: IToken[]) { + constructor(public tokens: IToken[]) { this.tokens = tokens; } @@ -335,6 +333,8 @@ export class MatchSubStatementCST extends H2RCST { let ret = ""; + let require_grouping = false; + if (str.length === 1) { ret = str[0]; } @@ -344,10 +344,14 @@ export class MatchSubStatementCST extends H2RCST { } else { //use a no-capture group - ret = "(?:" + str.join("|") + ")"; + ret = str.join("|"); + require_grouping = true; } if (this.count) { + if (require_grouping) { + ret = "(?:" + ret + ")"; + } ret += this.count.toRegex(language); } @@ -380,7 +384,7 @@ export class UsingStatementCST extends H2RCST { for (let i = 1; i < this.flags.length; i++) { if (hasFlag(flag, this.flags[i])) { - errors.push(this.error("Duplicate modifier: " + MatchSubStatementType[this.flags[i]] )); + errors.push(this.error("Duplicate modifier: " + UsingFlags[this.flags[i]] )); } flag = combineFlags(flag, this.flags[i]); } diff --git a/src/lexer.ts b/src/lexer.ts index ab6e754..9cfea2a 100644 --- a/src/lexer.ts +++ b/src/lexer.ts @@ -93,7 +93,7 @@ export class Human2RegexLexer { this.lexer = new Lexer(AllTokens, { ensureOptimizations: true, skipValidations: options.skip_validations }); } - private lexError(token: IToken) : ILexingError { + private lexError(token: IToken): ILexingError { return { offset: token.startOffset, line: token.startLine ?? NaN, @@ -109,7 +109,7 @@ export class Human2RegexLexer { * @param text the text to analyze * @returns a lexing result which contains the token stream and error list */ - public tokenize(text: string) : ILexingResult { + public tokenize(text: string): ILexingResult { const lex_result = this.lexer.tokenize(text); if (lex_result.tokens.length === 0) { diff --git a/src/parser.ts b/src/parser.ts index 5063b88..93f4b5d 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -154,7 +154,7 @@ export class Human2RegexParser extends EmbeddedActionsParser { }); // match sub rules - let mss_rules: IOrAlt[] | null = null; + let mss_rules: IOrAlt<{tokens: IToken[], statement: MatchSubStatementValue}>[] | null = null; const MatchSubStatement = $.RULE("MatchSubStatement", () => { let count: CountSubStatementCST | null = null; let invert: boolean = false; @@ -164,7 +164,7 @@ export class Human2RegexParser extends EmbeddedActionsParser { let to: string | null = null; let type: MatchSubStatementType = MatchSubStatementType.Anything; - const tokens: IToken[] = []; + let tokens: IToken[] = []; count = $.OPTION(() => { const css = $.SUBRULE(CountSubStatement); @@ -184,122 +184,124 @@ export class Human2RegexParser extends EmbeddedActionsParser { SEP: T.Or, DEF: () => { $.OPTION3(() => $.CONSUME(T.A)); - values.push($.OR(mss_rules || (mss_rules = [ - + const result = $.OR(mss_rules || (mss_rules = [ // range [a-z] { ALT: () => { - $.OPTION4(() => $.CONSUME(T.From)); - from = $.CONSUME2(T.StringLiteral).image; + const token0 = $.OPTION4(() => $.CONSUME(T.From)); + const token1 = $.CONSUME2(T.StringLiteral); + from = token1.image; $.CONSUME(T.To); - const token = $.CONSUME3(T.StringLiteral); - tokens.push(token); - to = token.image; + const token2 = $.CONSUME3(T.StringLiteral); + to = token2.image; type = MatchSubStatementType.Between; - return new MatchSubStatementValue(type, from, to); + if (usefulConditional(token0, "Bug in type definition. Option should return , but it doesn't")) { + return { tokens: [ token0, token2 ], statement: new MatchSubStatementValue(type, from, to) }; + } + return { tokens: [ token1, token2 ], statement: new MatchSubStatementValue(type, from, to) }; }}, // range [a-z] { ALT: () => { - $.CONSUME(T.Between); + const token1 = $.CONSUME(T.Between); from = $.CONSUME4(T.StringLiteral).image; $.CONSUME(T.And); - const token = $.CONSUME5(T.StringLiteral); - to = token.image; - tokens.push(token); + const token2 = $.CONSUME5(T.StringLiteral); + to = token2.image; type = MatchSubStatementType.Between; - return new MatchSubStatementValue(type, from, to); + return { tokens: [ token1, token2 ], statement: new MatchSubStatementValue(type, from, to) }; }}, // exact string { ALT: () => { const token = $.CONSUME(T.StringLiteral); - tokens.push(token); value = token.image; type = MatchSubStatementType.SingleString; - return new MatchSubStatementValue(type, value); + return { tokens: [ token ], statement: new MatchSubStatementValue(type, value) }; }}, //unicode { ALT: () => { - $.CONSUME(T.Unicode); - const token = $.CONSUME5(T.StringLiteral); - tokens.push(token); - value = token.image; + const token1 = $.CONSUME(T.Unicode); + const token2 = $.CONSUME6(T.StringLiteral); + value = token2.image; type = MatchSubStatementType.Unicode; - return new MatchSubStatementValue(type, value); + return { tokens: [ token1, token2 ], statement: new MatchSubStatementValue(type, value) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Anything)); + const token = $.CONSUME(T.Anything); type = MatchSubStatementType.Anything; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Boundary)); + const token = $.CONSUME(T.Boundary); type = MatchSubStatementType.Boundary; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Word)); + const token = $.CONSUME(T.Word); type = MatchSubStatementType.Word; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Digit)); + const token = $.CONSUME(T.Digit); type = MatchSubStatementType.Digit; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Character)); + const token = $.CONSUME(T.Character); type = MatchSubStatementType.Character; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Whitespace)); + const token = $.CONSUME(T.Whitespace); type = MatchSubStatementType.Whitespace; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Number)); + const token = $.CONSUME(T.Number); type = MatchSubStatementType.Number; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Tab)); + const token = $.CONSUME(T.Tab); type = MatchSubStatementType.Tab; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Linefeed)); + const token = $.CONSUME(T.Linefeed); type = MatchSubStatementType.Linefeed; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.Newline)); + const token = $.CONSUME(T.Newline); type = MatchSubStatementType.Newline; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, { ALT: () => { - tokens.push($.CONSUME(T.CarriageReturn)); + const token = $.CONSUME(T.CarriageReturn); type = MatchSubStatementType.CarriageReturn; - return new MatchSubStatementValue(type); + return { tokens: [ token ], statement: new MatchSubStatementValue(type) }; }}, - ]))); + ])); + + tokens = tokens.concat(result.tokens); + values.push(result.statement); } }); diff --git a/src/utilities.ts b/src/utilities.ts index 67c0187..2508dea 100644 --- a/src/utilities.ts +++ b/src/utilities.ts @@ -155,7 +155,7 @@ export function removeQuotes(input: string): string { * @internal */ export function regexEscape(input: string): string { - return input.replace("\\", "\\\\").replace(/([=:\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1"); + return input.replace(/([:\\\-\.\[\]\^\|\(\)\*\+\?\{\}\$\/])/g, "\\$1"); } /** diff --git a/tests/generator.spec.ts b/tests/generator.spec.ts new file mode 100644 index 0000000..91d852a --- /dev/null +++ b/tests/generator.spec.ts @@ -0,0 +1,91 @@ +/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ + +import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser"; +import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer"; +import { RegexDialect } from "../src/generator"; + + +describe("Generator functionality", function() { + const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true)); + // eslint-disable-next-line init-declarations + const parser = new Human2RegexParser(new Human2RegexParserOptions(true)); + + it("generates an empty regex", function() { + parser.input = lexer.tokenize("").tokens; + const reg0 = parser.parse(); + expect(reg0.validate(RegexDialect.JS).length).toBe(0); + expect(reg0.toRegex(RegexDialect.JS)).toBe("//"); + + parser.input = lexer.tokenize("\n/*hello world*/\n").tokens; + const reg1 = parser.parse(); + expect(reg1.validate(RegexDialect.JS).length).toBe(0); + expect(reg1.toRegex(RegexDialect.JS)).toBe("//"); + }); + + it("generates a basic regex", function() { + parser.input = lexer.tokenize('match "hello" or "world"').tokens; + const reg0 = parser.parse(); + expect(reg0.validate(RegexDialect.JS).length).toBe(0); + expect(reg0.toRegex(RegexDialect.JS)).toBe("/hello|world/"); + + parser.input = lexer.tokenize('match "http" then optionally "s"').tokens; + const reg1 = parser.parse(); + expect(reg1.validate(RegexDialect.JS).length).toBe(0); + expect(reg1.toRegex(RegexDialect.JS)).toBe("/https?/"); + }); + + it("validates invalid regexes", function() { + parser.input = lexer.tokenize('match unicode "Latin"').tokens; + const reg0 = parser.parse(); + expect(reg0.validate(RegexDialect.DotNet).length).toBeGreaterThan(0); + + parser.input = lexer.tokenize("using global and global").tokens; + const reg1 = parser.parse(); + expect(reg1.validate(RegexDialect.DotNet).length).toBeGreaterThan(0); + + parser.input = lexer.tokenize('match "a" to "asdf"').tokens; + const reg2 = parser.parse(); + expect(reg2.validate(RegexDialect.DotNet).length).toBeGreaterThan(0); + + }); + + it("runs complex scripts", function() { + const str = ` +using global and exact matching +create an optional group called protocol + match "http" + optionally match "s" + match "://" +create an optional group called subdomain + repeat + match a word + match "." +create a group called domain + match 1+ words or "_" or "-" + match "." + match a word +# port, but we don't care about it, so ignore it +optionally match ":" then 0+ digits +create an optional group called path + repeat + match "/" + match 0+ words or "_" or "-" +create an optional group + # we don't want to capture the '?', so don't name the group until afterwards + match "?" + create a group called query + repeat + match 1+ words or "_" or "-" + match "=" + match 1+ words or "_" or "-" +create an optional group + # fragment, again, we don't care, so ignore everything afterwards + match "#" + match 0+ any thing +`; + parser.input = lexer.tokenize(str).tokens; + const reg = parser.parse(); + expect(reg.validate(RegexDialect.JS).length).toBe(0); + expect(reg.toRegex(RegexDialect.JS)).toBe("/^(?https?\\:\\/\\/)?(?(\\w+\\.)*)?(?(?:\\w+|_|\\-)+\\.\\w+)\\:?\\d*(?(\\/(?:\\w+|_|\\-)*)*)?(\\?(?((?:\\w+|_|\\-)+\=(?:\\w+|_|\\-)+)*))?(#.*)?$/g"); + }); +}); \ No newline at end of file diff --git a/tests/lexer.spec.ts b/tests/lexer.spec.ts index bed372a..410e00b 100644 --- a/tests/lexer.spec.ts +++ b/tests/lexer.spec.ts @@ -1,6 +1,69 @@ -describe("calculate", function() { - it("add", function() { - const result = 5 + 2; - expect(result).toBe(7); - }); +/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ + +import { Human2RegexLexer, Human2RegexLexerOptions, IndentType } from "../src/lexer"; +import { Indent } from "../src/tokens"; + +describe("Lexer capabilities", function() { + const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true)); + + it("validates", function() { + expect(() => lexer.setOptions(new Human2RegexLexerOptions(false, IndentType.Both))).not.toThrow(); + }); + + it("parses nothing", function() { + expect(() => lexer.tokenize("")).not.toThrow(); + expect(lexer.tokenize("").errors).toHaveLength(0); + expect(lexer.tokenize("").tokens).toHaveLength(0); + + expect(() => lexer.tokenize("\n/* hello world */\n")).not.toThrow(); + expect(lexer.tokenize("\n/* hello world */\n").errors).toHaveLength(0); + expect(lexer.tokenize("\n/* hello world */\n").tokens).toHaveLength(0); + }); + + it("parses something", function() { + // tabs + expect(() => lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n')).not.toThrow(); + expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0); + expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens).toHaveLength(17); + + // spaces + expect(() => lexer.tokenize('optionally create a group called test\n optionally match "-" or "$/()" then "^[]"\n')).not.toThrow(); + expect(lexer.tokenize('optionally create a group called test\n optionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0); + expect(lexer.tokenize('optionally create a group called test\n optionally match "-" or "$/()" then "^[]"\n').tokens).toHaveLength(17); + + // no EOF newline + expect(() => lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"')).not.toThrow(); + expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"').errors).toHaveLength(0); + expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"').tokens).toHaveLength(17); + + // Outdent + expect(() => lexer.tokenize('optionally create a group\n\trepeat\n\t\tmatch "-"\n\toptionally match "-" or "$/()" then "^[]"\n')).not.toThrow(); + expect(lexer.tokenize('optionally create a group\n\trepeat\n\t\tmatch "-"\n\toptionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0); + expect(lexer.tokenize('optionally create a group\n\trepeat\n\t\tmatch "-"\n\toptionally match "-" or "$/()" then "^[]"\n').tokens).toHaveLength(22); + }); + + it("fails to parse bad text", function() { + // double indent + expect(() => lexer.tokenize('optionally create a group called test\n\t\toptionally match "-" or "$/()" then "^[]"')).not.toThrow(); + expect(lexer.tokenize('optionally create a group called test\n\t\toptionally match "-" or "$/()" then "^[]"').errors.length).toBeGreaterThan(0); + + // missing " at end + expect(() => lexer.tokenize('optionally create a group\n\toptionally match "- or "$/()" then "^[]')).not.toThrow(); + expect(lexer.tokenize('optionally create a group\n\toptionally match "- or "$/()" then "^[]').errors.length).toBeGreaterThan(0); + }); + + it("handles switching between tabs and spaces", function() { + lexer.setOptions(new Human2RegexLexerOptions(true, IndentType.Tabs)); + + // tabs + expect(() => lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n')).not.toThrow(); + expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0); + expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens).toHaveLength(17); + expect(lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens.map((x) => x.tokenType)).toContain(Indent); + + // spaces should be ignored + expect(() => lexer.tokenize('optionally create a group called test\n optionally match "-" or "$/()" then "^[]"\n')).not.toThrow(); + expect(lexer.tokenize('optionally create a group called test\n optionally match "-" or "$/()" then "^[]"\n').errors).toHaveLength(0); + expect(lexer.tokenize('optionally create a group called test\n optionally match "-" or "$/()" then "^[]"\n').tokens.map((x) => x.tokenType)).not.toContain(Indent); + }); }); \ No newline at end of file diff --git a/tests/parser.spec.ts b/tests/parser.spec.ts new file mode 100644 index 0000000..f07dd34 --- /dev/null +++ b/tests/parser.spec.ts @@ -0,0 +1,71 @@ +/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ + +import { Human2RegexLexer, Human2RegexLexerOptions } from "../src/lexer"; +import { Human2RegexParser, Human2RegexParserOptions } from "../src/parser"; +import { IToken } from "chevrotain"; +import { RegularExpressionCST } from "../src/generator"; + + +describe("Parser capabilities", function() { + const lexer = new Human2RegexLexer(new Human2RegexLexerOptions(true)); + // eslint-disable-next-line init-declarations + let parser!: Human2RegexParser; + + it("validates", function() { + expect(() => parser = new Human2RegexParser(new Human2RegexParserOptions(false))).not.toThrow(); + }); + + it("parses nothing", function() { + let tokens: IToken[] = []; + + tokens = lexer.tokenize("").tokens; + parser.input = tokens; + expect(() => parser.parse()).not.toThrow(); + expect(parser.parse()).toBeInstanceOf(RegularExpressionCST); + expect(parser.errors.length).toEqual(0); + + tokens = lexer.tokenize("\n/* hello world */\n").tokens; + parser.input = tokens; + expect(() => parser.parse()).not.toThrow(); + expect(parser.parse()).toBeInstanceOf(RegularExpressionCST); + expect(parser.errors.length).toEqual(0); + }); + + it("parses something", function() { + let tokens: IToken[] = []; + + tokens = lexer.tokenize('optionally create a group called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens; + parser.input = tokens; + expect(() => parser.parse()).not.toThrow(); + parser.input = tokens; + expect(parser.parse()).toBeInstanceOf(RegularExpressionCST); + expect(parser.errors.length).toEqual(0); + + tokens = lexer.tokenize('optionally create a group called test\n\trepeat 3..five\n\t\toptionally match "-" or "$/()" then "^[]"').tokens; + parser.input = tokens; + expect(() => parser.parse()).not.toThrow(); + parser.input = tokens; + expect(parser.parse()).toBeInstanceOf(RegularExpressionCST); + expect(parser.errors.length).toEqual(0); + }); + + it("fails to parse bad text", function() { + let tokens: IToken[] = []; + + tokens = lexer.tokenize('optionally create a group called\n\toptionally match "-" or "$/()" then "^[]"\n').tokens; + parser.input = tokens; + expect(() => parser.parse()).not.toThrow(); + expect(parser.errors.length).toBeGreaterThan(0); + + tokens = lexer.tokenize('optionally create a called test\n\toptionally match "-" or "$/()" then "^[]"\n').tokens; + parser.input = tokens; + expect(() => parser.parse()).not.toThrow(); + expect(parser.errors.length).toBeGreaterThan(0); + + tokens = lexer.tokenize('optionally create a group\n\toptionally match or "$/()" then "^[]"\n').tokens; + parser.input = tokens; + expect(() => parser.parse()).not.toThrow(); + expect(parser.errors.length).toBeGreaterThan(0); + }); +}); + diff --git a/tests/utilities.spec.ts b/tests/utilities.spec.ts new file mode 100644 index 0000000..e094d0e --- /dev/null +++ b/tests/utilities.spec.ts @@ -0,0 +1,108 @@ +/*! Copyright (c) 2020 Patrick Demian; Licensed under MIT */ + +import "../src/utilities"; +import { isSingleRegexCharacter, findLastIndex, removeQuotes, regexEscape, hasFlag, combineFlags, makeFlag, first, last, CommonError } from "../src/utilities"; +import { UsingFlags, ISemanticError } from "../src/generator"; +import { IRecognitionException, ILexingError, createTokenInstance } from "chevrotain"; +import { Indent } from "../src/tokens"; + +describe("Utility functions", function() { + + it("should handle flags", function() { + expect(makeFlag(0)).toBe(1); + expect(makeFlag(7)).toBe(1*2*2*2*2*2*2*2); + + expect(combineFlags(UsingFlags.Exact, UsingFlags.Global)).toBe(UsingFlags.Exact + UsingFlags.Global); + expect(combineFlags(UsingFlags.Multiline, combineFlags(UsingFlags.Exact, UsingFlags.Global))).toBe(UsingFlags.Exact + UsingFlags.Global + UsingFlags.Multiline); + + expect(hasFlag(UsingFlags.Exact, UsingFlags.Exact)).toBe(true); + expect(hasFlag(UsingFlags.Exact, UsingFlags.Global)).toBe(false); + + expect(hasFlag(UsingFlags.Global + UsingFlags.Exact, UsingFlags.Exact)).toBe(true); + expect(hasFlag(UsingFlags.Global + UsingFlags.Exact, UsingFlags.Multiline)).toBe(false); + + expect(hasFlag(combineFlags(UsingFlags.Global, UsingFlags.Exact), UsingFlags.Exact)).toBe(true); + expect(hasFlag(combineFlags(UsingFlags.Global, UsingFlags.Exact), UsingFlags.Global)).toBe(true); + expect(hasFlag(combineFlags(UsingFlags.Global, UsingFlags.Exact), UsingFlags.Multiline)).toBe(false); + }); + + it("should return correct array elements", function() { + expect(first([ 1, 2, 3 ])).toBe(1); + expect(last([ 1, 2, 3 ])).toBe(3); + }); + + it("should recognize single regex regular characters", function() { + expect(isSingleRegexCharacter("")).toBe(false); + expect(isSingleRegexCharacter("a")).toBe(true); + expect(isSingleRegexCharacter("ab")).toBe(false); + }); + + it("should recognize single regex escape characters", function() { + expect(isSingleRegexCharacter("\\n")).toBe(true); + expect(isSingleRegexCharacter("\\r\\n")).toBe(false); + expect(isSingleRegexCharacter("\\na")).toBe(false); + expect(isSingleRegexCharacter("\\?")).toBe(true); + }); + + it("should recognize single unicode characters", function() { + expect(isSingleRegexCharacter("\\u1")).toBe(false); + expect(isSingleRegexCharacter("\\u1234")).toBe(true); + expect(isSingleRegexCharacter("\\u1234\\u1234")).toBe(false); + expect(isSingleRegexCharacter("\\U12345678")).toBe(false); + expect(isSingleRegexCharacter("\\U1")).toBe(false); + expect(isSingleRegexCharacter("௹")).toBe(true); + expect(isSingleRegexCharacter("💩")).toBe(false); + }); + + it("should remove quotes correctly", function() { + expect(removeQuotes('""')).toEqual(""); + expect(removeQuotes('"hello world"')).toEqual("hello world"); + expect(removeQuotes('"hello"world"')).toEqual('hello"world'); + }); + + it("should escape regex correctly", function() { + expect(regexEscape("")).toEqual(""); + expect(regexEscape("\\$")).toEqual("\\\\\\$"); + expect(regexEscape("^(.*)?\\?$")).toEqual("\\^\\(\\.\\*\\)\\?\\\\\\?\\$"); + expect(regexEscape("\\p{Latin}")).toEqual("\\\\p\\{Latin\\}"); + }); + + it("should find the last index of an element", function() { + expect(findLastIndex([], 3)).toBe(-1); + expect(findLastIndex([ 3, 1, 2, 3, 3 ], 3)).toBe(4); + expect(findLastIndex([ 3, 1, 2, 3, 3 ], 1)).toBe(1); + expect(findLastIndex([ 3, 1, 2, 3, 3 ], 9)).toBe(-1); + }); + + it("should generate CommonErrors correctly", function() { + const lex_error: ILexingError = { + offset: 123, + line: 123, + column: 123, + length: 123, + message: "error" + }; + + const par_error: IRecognitionException = { + name: "Recognition Exception", + message: "Mismatch at 1,1", + token: createTokenInstance(Indent, "", 123, 124, 123, 123, 123, 124), + resyncedTokens: [], + context: { ruleStack: [], ruleOccurrenceStack: [] } + }; + + const sem_error: ISemanticError = { + startLine: 123, + startColumn: 123, + length: 123, + message: "error" + }; + + expect(CommonError.fromLexError(lex_error)).toBeInstanceOf(CommonError); + expect(CommonError.fromParseError(par_error)).toBeInstanceOf(CommonError); + expect(CommonError.fromSemanticError(sem_error)).toBeInstanceOf(CommonError); + + expect(() => CommonError.fromSemanticError(sem_error).toString()).not.toThrow(); + expect(CommonError.fromSemanticError(sem_error).toString()).not.toBeNull(); + }); +}); \ No newline at end of file