Files
sml-projects/shf/test/regex-tests.sml

619 lines
18 KiB
Standard ML
Raw Permalink Normal View History

2025-10-09 05:34:32 +01:00
structure RegexTests =
struct
open Railroad
open Railroad.Test
structure CiDfa = CaseInsensitiveDfa
structure CsDfa = CaseSensitiveDfa
val caseInsensitiveTests = describe "case insensitive regex"
[ test "recognises word 'hello' in string 'Hello world'" (fn _ =>
let
(* arrange *)
val regexString = "hello"
val dfa = CiDfa.fromString regexString
val inputString = "Hello world"
(* act *)
val matches = CiDfa.matchString (dfa, inputString)
(* assert *)
val expectedMatches = [(0, 4)]
in
Expect.isTrue (matches = expectedMatches)
end)
, test "recognises word 'world' in string 'HELLO WORLD'" (fn _ =>
let
(* arrange *)
val regexString = "world"
val dfa = CiDfa.fromString regexString
val inputString = "HELLO WORLD"
(* act *)
val matches = CiDfa.matchString (dfa, inputString)
(* assert *)
val expectedMatches = [(6, 10)]
in
Expect.isTrue (matches = expectedMatches)
end)
]
2025-10-09 05:39:01 +01:00
val caseSensitiveTests = describe "case sensitive regex"
[ test "does not recognise word 'hello' in string 'Hello world'" (fn _ =>
let
(* arrange *)
val regexString = "hello"
val dfa = CsDfa.fromString regexString
val inputString = "Hello world"
(* act *)
val matches = CsDfa.matchString (dfa, inputString)
(* assert *)
val expectedMatches = []
in
Expect.isTrue (matches = expectedMatches)
end)
, test "recognises word 'Hello' in string 'Hello world'" (fn _ =>
let
(* arrange *)
val regexString = "Hello"
val dfa = CsDfa.fromString regexString
val inputString = "Hello world"
(* act *)
val matches = CsDfa.matchString (dfa, inputString)
(* assert *)
val expectedMatches = [(0, 4)]
in
Expect.isTrue (matches = expectedMatches)
end)
, test "does not recognise word 'world' in string 'HELLO WORLD'" (fn _ =>
let
(* arrange *)
val regexString = "world"
val dfa = CsDfa.fromString regexString
val inputString = "HELLO WORLD"
(* act *)
val matches = CsDfa.matchString (dfa, inputString)
(* assert *)
val expectedMatches = []
in
Expect.isTrue (matches = expectedMatches)
end)
, test "recognises word 'WORLD' in string 'HELLO WORLD'" (fn _ =>
let
(* arrange *)
val regexString = "WORLD"
val dfa = CsDfa.fromString regexString
val inputString = "HELLO WORLD"
(* act *)
val matches = CsDfa.matchString (dfa, inputString)
(* assert *)
val expectedMatches = [(6, 10)]
in
Expect.isTrue (matches = expectedMatches)
end)
]
val endMarkerTests = describe "regex endMarker"
2025-10-11 14:35:15 +01:00
[ test "returns an empty DFA when regexString contains endMarker" (fn _ =>
let
(* arrange *)
(* the end marker is #"\^@" *)
val regexString = "hello \^@ world"
(* act *)
val dfa = CsDfa.fromString regexString
(* assert *)
val actualLength = Vector.length dfa
val expectedLength = 0
in
Expect.isTrue (actualLength = expectedLength)
end)
, test "matches a string when regex has question mark at the end" (fn _ =>
let
(* arrange *)
val sentence = "favo"
val regexString = "favou?"
val dfa = CsDfa.fromString regexString
2025-10-11 14:35:15 +01:00
(* act *)
val matches = CsDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(0, 3)]
in
Expect.isTrue (matches = expectedMatches)
end)
]
2025-10-09 06:06:07 +01:00
fun recogniseEscapeSequence (regexString, inputString) =
let
(* arrange *)
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, inputString)
(* assert *)
val expectedMatches = [(6, 6)]
in
Expect.isTrue (matches = expectedMatches)
end
fun doesNotRecogniseUnescaped (regexString, inputString) =
let
(* arrange *)
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, inputString)
(* assert *)
val expectedMatches = []
in
Expect.isTrue (matches = expectedMatches)
end
2025-10-09 06:06:07 +01:00
val escapeSequenceTests = describe "regex escape sequences"
[ test "recognises alert" (fn _ =>
recogniseEscapeSequence ("\\a", "hello \a world"))
, test "recognises backspace" (fn _ =>
recogniseEscapeSequence ("\\b", "hello \b world"))
, test "recognises tab" (fn _ =>
recogniseEscapeSequence ("\\t", "hello \t world"))
, test "recognises newline" (fn _ =>
recogniseEscapeSequence ("\\n", "hello \n world"))
, test "recognises vertical tab" (fn _ =>
recogniseEscapeSequence ("\\v", "hello \v world"))
, test "recognises form feed" (fn _ =>
recogniseEscapeSequence ("\\f", "hello \f world"))
, test "recognises carriage return" (fn _ =>
recogniseEscapeSequence ("\\r", "hello \r world"))
, test "recognises backslash" (fn _ =>
recogniseEscapeSequence ("\\\\", "hello \\ world"))
]
val metacharacterEscapeTest = describe "regex metacharacter escape sequences"
[ test "recognises (" (fn _ =>
recogniseEscapeSequence ("\\(", "hello ( world"))
, test "recognises )" (fn _ =>
recogniseEscapeSequence ("\\)", "hello ) world"))
, test "recognises [" (fn _ =>
recogniseEscapeSequence ("\\[", "hello [ world"))
, test "recognises ]" (fn _ =>
recogniseEscapeSequence ("\\]", "hello ] world"))
, test "recognises +" (fn _ =>
recogniseEscapeSequence ("\\+", "hello + world"))
, test "recognises |" (fn _ =>
recogniseEscapeSequence ("\\|", "hello | world"))
, test "recognises ?" (fn _ =>
recogniseEscapeSequence ("\\?", "hello ? world"))
, test "recognises ." (fn _ =>
recogniseEscapeSequence ("\\.", "hello . world"))
, test "recognises -" (fn _ =>
recogniseEscapeSequence ("\\-", "hello - world"))
(* checking that unescaped metacharacter is not recognised *)
, test "does not recognise (" (fn _ =>
doesNotRecogniseUnescaped ("(", "hello ( world"))
, test "does not recognise )" (fn _ =>
doesNotRecogniseUnescaped (")", "hello ) world"))
, test "does not recognise [" (fn _ =>
doesNotRecogniseUnescaped ("[", "hello [ world"))
, test "does not recognise ]" (fn _ =>
doesNotRecogniseUnescaped ("[", "hello ] world"))
, test "does not recognise +" (fn _ =>
doesNotRecogniseUnescaped ("+", "hello + world"))
, test "does not recognise |" (fn _ =>
doesNotRecogniseUnescaped ("|", "hello | world"))
, test "does not recognise ?" (fn _ =>
doesNotRecogniseUnescaped ("?", "hello ? world"))
, test "does not recognise -" (fn _ =>
doesNotRecogniseUnescaped ("-", "hello - world"))
]
(* tests based on regex tutorial by FreeCodeCamp *)
val freeCodeCampTests = describe "regex freeCodeCamp tests"
[ test "The dog chased the cat" (fn _ =>
let
(* arrange *)
val sentence = "The dog chased the cat"
val regexString = "the"
val caseSensitiveDfa = CsDfa.fromString regexString
val caseInsensitiveDfa = CiDfa.fromString regexString
(* act *)
val caseSensitiveMatches =
CsDfa.matchString (caseSensitiveDfa, sentence)
val caseInsensitiveMatches =
CiDfa.matchString (caseInsensitiveDfa, sentence)
(* assert *)
val expectedCaseSensitive = [(15, 17)]
val expectedCaseInsensitive = [(0, 2), (15, 17)]
val expected =
caseSensitiveMatches = expectedCaseSensitive
andalso caseInsensitiveMatches = expectedCaseInsensitive
in
Expect.isTrue (expected)
end)
, test "Somewhere Waldo is hiding in this text." (fn _ =>
let
(* arrange *)
val sentence = "Somewhere Waldo is hiding in this text."
val regexString = "Waldo"
val dfa = CsDfa.fromString regexString
(* act *)
val matches = CsDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(10, 14)]
in
Expect.isTrue (expectedMatches = matches)
end)
, test "James has a pet cat." (fn _ =>
let
(* arrange *)
val sentence = "James has a pet cat."
val regexString = "dog|cat|bird|fish"
val dfa = CsDfa.fromString regexString
(* act *)
val matches = CsDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(16, 18)]
in
Expect.isTrue (expectedMatches = matches)
end)
, test "Ignore Case While Matching" (fn _ =>
let
(* arrange *)
val sentence = "freeCodeCamp"
val regexString = "freecodecamp"
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(0, 11)]
in
Expect.isTrue (expectedMatches = matches)
end)
, test "Extract the word 'coding' from this string" (fn _ =>
let
(* arrange *)
val sentence = "Extract the word 'coding' from this string"
val regexString = "coding"
val dfa = CsDfa.fromString regexString
(* act *)
val matches = CsDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(18, 23)]
in
Expect.isTrue (expectedMatches = matches)
end)
, test "Repeat, Repeat, Repeat" (fn _ =>
let
(* arrange *)
val sentence = "Repeat, Repeat, Repeat"
val regexString = "Repeat"
val dfa = CsDfa.fromString regexString
(* act *)
val matches = CsDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(0, 5), (8, 13), (16, 21)]
in
Expect.isTrue (expectedMatches = matches)
end)
, test "Twinkle, twinkle, little start" (fn _ =>
let
(* arrange *)
val sentence = "Twinkle, twinkle, little start"
val regexString = "twinkle"
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(0, 6), (9, 15)]
in
Expect.isTrue (expectedMatches = matches)
end)
2025-10-09 22:26:33 +01:00
, test "hu. regex" (fn _ =>
let
(* arrange *)
val regexString = "hu."
val dfa = CsDfa.fromString regexString
val humStr = "To mock a humming bird"
val hugStr = "Bear hug"
(* act *)
val humMatches = CiDfa.matchString (dfa, humStr)
val hugMatches = CiDfa.matchString (dfa, hugStr)
(* assert *)
val expectedHumMatches = [(10, 12)]
val expectedHugMatches = [(5, 7)]
val isExpected =
humMatches = expectedHumMatches
andalso hugMatches = expectedHugMatches
in
Expect.isTrue isExpected
end)
, test "Let's have fun with regular expressions!" (fn _ =>
let
(* arrange *)
val sentence = "Let's have fun with regular expressions!"
val regexString = ".un"
val dfa = CsDfa.fromString regexString
(* act *)
val matches = CsDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(11, 13)]
in
Expect.isTrue (matches = expectedMatches)
end)
, test "Beware of bugs in the above code" (fn _ =>
let
(* arrange *)
val sentence =
"Beware of bugs in the above code;\
\I have only proved it correct, not tried it."
val regexString = "[aeiou]"
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches =
[ (1, 1)
, (3, 3)
, (5, 5)
, (7, 7)
, (11, 11)
, (15, 15)
, (20, 20)
, (22, 22)
, (24, 24)
, (26, 26)
, (29, 29)
, (31, 31)
, (33, 33)
, (36, 36)
, (38, 38)
, (40, 40)
, (47, 47)
, (49, 49)
, (52, 52)
, (56, 56)
, (59, 59)
, (65, 65)
, (70, 70)
, (71, 71)
, (74, 74)
]
in
Expect.isTrue (matches = expectedMatches)
end)
, test "The quick brown fox jumps over the lazy dog." (fn _ =>
let
(* arrange *)
val sentence = "The quick brown fox jumps over the lazy dog."
val regexString = "[a-zA-Z]"
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches =
[ (0, 0)
, (1, 1)
, (2, 2)
, (4, 4)
, (5, 5)
, (6, 6)
, (7, 7)
, (8, 8)
, (10, 10)
, (11, 11)
, (12, 12)
, (13, 13)
, (14, 14)
, (16, 16)
, (17, 17)
, (18, 18)
, (20, 20)
, (21, 21)
, (22, 22)
, (23, 23)
, (24, 24)
, (26, 26)
, (27, 27)
, (28, 28)
, (29, 29)
, (31, 31)
, (32, 32)
, (33, 33)
, (35, 35)
, (36, 36)
, (37, 37)
, (38, 38)
, (40, 40)
, (41, 41)
, (42, 42)
]
in
Expect.isTrue (matches = expectedMatches)
end)
, test "Blueberry 3.141592653s are delicious." (fn _ =>
let
(* arrange *)
val sentence = "Blueberry 3.141592653s are delicious."
val regexString = "[2-6h-s]"
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches =
[ (1, 1)
, (6, 6)
, (7, 7)
, (10, 10)
, (13, 13)
, (15, 15)
, (17, 17)
, (18, 18)
, (19, 19)
, (20, 20)
, (21, 21)
, (24, 24)
, (29, 29)
, (30, 30)
, (32, 32)
, (33, 33)
, (35, 35)
]
in
Expect.isTrue (expectedMatches = matches)
end)
, test "3 blind mice." (fn _ =>
let
(* arrange *)
val sentence = "3 blind mice."
val regexString = "[^0-9aeiou]"
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches =
[ (1, 1)
, (2, 2)
, (3, 3)
, (5, 5)
, (6, 6)
, (7, 7)
, (8, 8)
, (10, 10)
, (12, 12)
]
in
Expect.isTrue (expectedMatches = matches)
end)
, test "Mississipi" (fn _ =>
let
(* arrange *)
val sentence = "Mississipi"
val regexString = "s+"
val dfa = CiDfa.fromString regexString
(* act *)
val matches = CiDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(2, 3), (5, 6)]
in
Expect.isTrue (expectedMatches = matches)
end)
, test "goooal" (fn _ =>
let
(* arrange *)
val soccerSentence = "goooal"
val gPhrase = "gut feeling"
val oPhrase = "over the moon"
val goRegex = "go*"
val dfa = CsDfa.fromString goRegex
(* act *)
val soccerMatches = CsDfa.matchString (dfa, soccerSentence)
val gPhraseMatches = CsDfa.matchString (dfa, gPhrase)
val oPhraseMatches = CsDfa.matchString (dfa, oPhrase)
(* assert *)
val expectedSoccerMatches = [(0, 3)]
val expectedGPhraseMatches = [(0, 0), (10, 10)]
val expectedOPhraseMatches = []
val isExpected =
soccerMatches = expectedSoccerMatches
andalso gPhraseMatches = expectedGPhraseMatches
andalso oPhraseMatches = expectedOPhraseMatches
in
Expect.isTrue isExpected
end)
, test "chewie quote" (fn _ =>
let
(* arrange *)
val sentence = "Aaaaaaargh"
val regexString = "Aa*"
val dfa = CsDfa.fromString regexString
(* act *)
val matches = CsDfa.matchString (dfa, sentence)
(* assert *)
val expectedMatches = [(0, 6)]
in
Expect.isTrue (matches = expectedMatches)
end)
, test "favorite" (fn _ =>
let
(* arrange *)
val sentenceWithoutU = "favorite"
val sentenceWithU = "favourite"
val regexString = "favou?rite"
val dfa = CsDfa.fromString regexString
(* act *)
val matchesWithoutU = CsDfa.matchString (dfa, sentenceWithoutU)
val matchesWithU = CsDfa.matchString (dfa, sentenceWithU)
(* assert *)
val expectedMatchesWithoutU = [(0, 7)]
val expectedMatchesWithU = [(0, 8)]
val isExpected =
matchesWithoutU = expectedMatchesWithoutU
andalso matchesWithU = expectedMatchesWithU
in
Expect.isTrue isExpected
end)
]
2025-10-09 06:06:07 +01:00
val tests =
[ caseInsensitiveTests
, caseSensitiveTests
, endMarkerTests
, escapeSequenceTests
, metacharacterEscapeTest
, freeCodeCampTests
2025-10-09 06:06:07 +01:00
]
2025-10-09 05:34:32 +01:00
end