structure RegexTests = struct open Railroad open Railroad.Test structure CiDfa = CaseInsensitiveDfa structure CsDfa = CaseSensitiveDfa val caseInsensitiveTests = describe "case insensitive regex" [ test "recognises word 'hello' in string 'Hello world'" (fn _ => let (* arrange *) val regexString = "hello" val dfa = CiDfa.fromString regexString val inputString = "Hello world" (* act *) val matches = CiDfa.matchString (dfa, inputString) (* assert *) val expectedMatches = [(0, 4)] in Expect.isTrue (matches = expectedMatches) end) , test "recognises word 'world' in string 'HELLO WORLD'" (fn _ => let (* arrange *) val regexString = "world" val dfa = CiDfa.fromString regexString val inputString = "HELLO WORLD" (* act *) val matches = CiDfa.matchString (dfa, inputString) (* assert *) val expectedMatches = [(6, 10)] in Expect.isTrue (matches = expectedMatches) end) ] val caseSensitiveTests = describe "case sensitive regex" [ test "does not recognise word 'hello' in string 'Hello world'" (fn _ => let (* arrange *) val regexString = "hello" val dfa = CsDfa.fromString regexString val inputString = "Hello world" (* act *) val matches = CsDfa.matchString (dfa, inputString) (* assert *) val expectedMatches = [] in Expect.isTrue (matches = expectedMatches) end) , test "recognises word 'Hello' in string 'Hello world'" (fn _ => let (* arrange *) val regexString = "Hello" val dfa = CsDfa.fromString regexString val inputString = "Hello world" (* act *) val matches = CsDfa.matchString (dfa, inputString) (* assert *) val expectedMatches = [(0, 4)] in Expect.isTrue (matches = expectedMatches) end) , test "does not recognise word 'world' in string 'HELLO WORLD'" (fn _ => let (* arrange *) val regexString = "world" val dfa = CsDfa.fromString regexString val inputString = "HELLO WORLD" (* act *) val matches = CsDfa.matchString (dfa, inputString) (* assert *) val expectedMatches = [] in Expect.isTrue (matches = expectedMatches) end) , test "recognises word 'WORLD' in string 'HELLO WORLD'" (fn _ => let (* arrange *) val regexString = "WORLD" val dfa = CsDfa.fromString regexString val inputString = "HELLO WORLD" (* act *) val matches = CsDfa.matchString (dfa, inputString) (* assert *) val expectedMatches = [(6, 10)] in Expect.isTrue (matches = expectedMatches) end) ] val endMarkerTests = describe "regex endMarker" [test "returns an empty DFA when regexString contains endMarker" (fn _ => let (* arrange *) (* the end marker is #"\^@" *) val regexString = "hello \^@ world" (* act *) val dfa = CsDfa.fromString regexString (* assert *) val actualLength = Vector.length dfa val expectedLength = 0 in Expect.isTrue (actualLength = expectedLength) end)] fun recogniseEscapeSequence (regexString, inputString) = let (* arrange *) val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, inputString) (* assert *) val expectedMatches = [(6, 6)] in Expect.isTrue (matches = expectedMatches) end fun doesNotRecogniseUnescaped (regexString, inputString) = let (* arrange *) val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, inputString) (* assert *) val expectedMatches = [] in Expect.isTrue (matches = expectedMatches) end val escapeSequenceTests = describe "regex escape sequences" [ test "recognises alert" (fn _ => recogniseEscapeSequence ("\\a", "hello \a world")) , test "recognises backspace" (fn _ => recogniseEscapeSequence ("\\b", "hello \b world")) , test "recognises tab" (fn _ => recogniseEscapeSequence ("\\t", "hello \t world")) , test "recognises newline" (fn _ => recogniseEscapeSequence ("\\n", "hello \n world")) , test "recognises vertical tab" (fn _ => recogniseEscapeSequence ("\\v", "hello \v world")) , test "recognises form feed" (fn _ => recogniseEscapeSequence ("\\f", "hello \f world")) , test "recognises carriage return" (fn _ => recogniseEscapeSequence ("\\r", "hello \r world")) , test "recognises backslash" (fn _ => recogniseEscapeSequence ("\\\\", "hello \\ world")) ] val metacharacterEscapeTest = describe "regex metacharacter escape sequences" [ test "recognises (" (fn _ => recogniseEscapeSequence ("\\(", "hello ( world")) , test "recognises )" (fn _ => recogniseEscapeSequence ("\\)", "hello ) world")) , test "recognises [" (fn _ => recogniseEscapeSequence ("\\[", "hello [ world")) , test "recognises ]" (fn _ => recogniseEscapeSequence ("\\]", "hello ] world")) , test "recognises +" (fn _ => recogniseEscapeSequence ("\\+", "hello + world")) , test "recognises |" (fn _ => recogniseEscapeSequence ("\\|", "hello | world")) , test "recognises ?" (fn _ => recogniseEscapeSequence ("\\?", "hello ? world")) , test "recognises ." (fn _ => recogniseEscapeSequence ("\\.", "hello . world")) , test "recognises -" (fn _ => recogniseEscapeSequence ("\\-", "hello - world")) (* checking that unescaped metacharacter is not recognised *) , test "does not recognise (" (fn _ => doesNotRecogniseUnescaped ("(", "hello ( world")) , test "does not recognise )" (fn _ => doesNotRecogniseUnescaped (")", "hello ) world")) , test "does not recognise [" (fn _ => doesNotRecogniseUnescaped ("[", "hello [ world")) , test "does not recognise ]" (fn _ => doesNotRecogniseUnescaped ("[", "hello ] world")) , test "does not recognise +" (fn _ => doesNotRecogniseUnescaped ("+", "hello + world")) , test "does not recognise |" (fn _ => doesNotRecogniseUnescaped ("|", "hello | world")) , test "does not recognise ?" (fn _ => doesNotRecogniseUnescaped ("?", "hello ? world")) , test "does not recognise -" (fn _ => doesNotRecogniseUnescaped ("-", "hello - world")) ] (* tests based on regex tutorial by FreeCodeCamp *) val freeCodeCampTests = describe "regex freeCodeCamp tests" [ test "The dog chased the cat" (fn _ => let (* arrange *) val sentence = "The dog chased the cat" val regexString = "the" val caseSensitiveDfa = CsDfa.fromString regexString val caseInsensitiveDfa = CiDfa.fromString regexString (* act *) val caseSensitiveMatches = CsDfa.matchString (caseSensitiveDfa, sentence) val caseInsensitiveMatches = CiDfa.matchString (caseInsensitiveDfa, sentence) (* assert *) val expectedCaseSensitive = [(15, 17)] val expectedCaseInsensitive = [(0, 2), (15, 17)] val expected = caseSensitiveMatches = expectedCaseSensitive andalso caseInsensitiveMatches = expectedCaseInsensitive in Expect.isTrue (expected) end) , test "Somewhere Waldo is hiding in this text." (fn _ => let (* arrange *) val sentence = "Somewhere Waldo is hiding in this text." val regexString = "Waldo" val dfa = CsDfa.fromString regexString (* act *) val matches = CsDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [(10, 14)] in Expect.isTrue (expectedMatches = matches) end) , test "James has a pet cat." (fn _ => let (* arrange *) val sentence = "James has a pet cat." val regexString = "dog|cat|bird|fish" val dfa = CsDfa.fromString regexString (* act *) val matches = CsDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [(16, 18)] in Expect.isTrue (expectedMatches = matches) end) , test "Ignore Case While Matching" (fn _ => let (* arrange *) val sentence = "freeCodeCamp" val regexString = "freecodecamp" val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [(0, 11)] in Expect.isTrue (expectedMatches = matches) end) , test "Extract the word 'coding' from this string" (fn _ => let (* arrange *) val sentence = "Extract the word 'coding' from this string" val regexString = "coding" val dfa = CsDfa.fromString regexString (* act *) val matches = CsDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [(18, 23)] in Expect.isTrue (expectedMatches = matches) end) , test "Repeat, Repeat, Repeat" (fn _ => let (* arrange *) val sentence = "Repeat, Repeat, Repeat" val regexString = "Repeat" val dfa = CsDfa.fromString regexString (* act *) val matches = CsDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [(0, 5), (8, 13), (16, 21)] in Expect.isTrue (expectedMatches = matches) end) , test "Twinkle, twinkle, little start" (fn _ => let (* arrange *) val sentence = "Twinkle, twinkle, little start" val regexString = "twinkle" val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [(0, 6), (9, 15)] in Expect.isTrue (expectedMatches = matches) end) , test "hu. regex" (fn _ => let (* arrange *) val regexString = "hu." val dfa = CsDfa.fromString regexString val humStr = "To mock a humming bird" val hugStr = "Bear hug" (* act *) val humMatches = CiDfa.matchString (dfa, humStr) val hugMatches = CiDfa.matchString (dfa, hugStr) (* assert *) val expectedHumMatches = [(10, 12)] val expectedHugMatches = [(5, 7)] val isExpected = humMatches = expectedHumMatches andalso hugMatches = expectedHugMatches in Expect.isTrue isExpected end) , test "Let's have fun with regular expressions!" (fn _ => let (* arrange *) val sentence = "Let's have fun with regular expressions!" val regexString = ".un" val dfa = CsDfa.fromString regexString (* act *) val matches = CsDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [(11, 13)] in Expect.isTrue (matches = expectedMatches) end) , test "Beware of bugs in the above code" (fn _ => let (* arrange *) val sentence = "Beware of bugs in the above code;\ \I have only proved it correct, not tried it." val regexString = "[aeiou]" val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [ (1, 1) , (3, 3) , (5, 5) , (7, 7) , (11, 11) , (15, 15) , (20, 20) , (22, 22) , (24, 24) , (26, 26) , (29, 29) , (31, 31) , (33, 33) , (36, 36) , (38, 38) , (40, 40) , (47, 47) , (49, 49) , (52, 52) , (56, 56) , (59, 59) , (65, 65) , (70, 70) , (71, 71) , (74, 74) ] in Expect.isTrue (matches = expectedMatches) end) , test "The quick brown fox jumps over the lazy dog." (fn _ => let (* arrange *) val sentence = "The quick brown fox jumps over the lazy dog." val regexString = "[a-zA-Z]" val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [ (0, 0) , (1, 1) , (2, 2) , (4, 4) , (5, 5) , (6, 6) , (7, 7) , (8, 8) , (10, 10) , (11, 11) , (12, 12) , (13, 13) , (14, 14) , (16, 16) , (17, 17) , (18, 18) , (20, 20) , (21, 21) , (22, 22) , (23, 23) , (24, 24) , (26, 26) , (27, 27) , (28, 28) , (29, 29) , (31, 31) , (32, 32) , (33, 33) , (35, 35) , (36, 36) , (37, 37) , (38, 38) , (40, 40) , (41, 41) , (42, 42) ] in Expect.isTrue (matches = expectedMatches) end) , test "Blueberry 3.141592653s are delicious." (fn _ => let (* arrange *) val sentence = "Blueberry 3.141592653s are delicious." val regexString = "[2-6h-s]" val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [ (1, 1) , (6, 6) , (7, 7) , (10, 10) , (13, 13) , (15, 15) , (17, 17) , (18, 18) , (19, 19) , (20, 20) , (21, 21) , (24, 24) , (29, 29) , (30, 30) , (32, 32) , (33, 33) , (35, 35) ] in Expect.isTrue (expectedMatches = matches) end) , test "3 blind mice." (fn _ => let (* arrange *) val sentence = "3 blind mice." val regexString = "[^0-9aeiou]" val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [ (1, 1) , (2, 2) , (3, 3) , (5, 5) , (6, 6) , (7, 7) , (8, 8) , (10, 10) , (12, 12) ] in Expect.isTrue (expectedMatches = matches) end) , test "Mississipi" (fn _ => let (* arrange *) val sentence = "Mississipi" val regexString = "s+" val dfa = CiDfa.fromString regexString (* act *) val matches = CiDfa.matchString (dfa, sentence) (* assert *) val expectedMatches = [(2, 3), (5, 6)] in Expect.isTrue (expectedMatches = matches) end) , test "goooal" (fn _ => let (* arrange *) val soccerSentence = "goooal" val gPhrase = "gut feeling" val oPhrase = "over the moon" val goRegex = "go*" val dfa = CsDfa.fromString goRegex (* act *) val soccerMatches = CsDfa.matchString (dfa, soccerSentence) val gPhraseMatches = CsDfa.matchString (dfa, gPhrase) val oPhraseMatches = CsDfa.matchString (dfa, oPhrase) (* assert *) val expectedSoccerMatches = [(0, 3)] val expectedGPhraseMatches = [(0, 0)] val expectedOPhraseMatches = [] val isExpected = soccerMatches = expectedSoccerMatches andalso gPhraseMatches = expectedGPhraseMatches andalso oPhraseMatches = expectedOPhraseMatches in Expect.isTrue isExpected end) ] val tests = [ caseInsensitiveTests , caseSensitiveTests , endMarkerTests , escapeSequenceTests , metacharacterEscapeTest , freeCodeCampTests ] end