619 lines
18 KiB
Standard ML
619 lines
18 KiB
Standard ML
structure RegexTests =
|
|
struct
|
|
open Railroad
|
|
open Railroad.Test
|
|
|
|
structure CiDfa = CaseInsensitiveDfa
|
|
structure CsDfa = CaseSensitiveDfa
|
|
|
|
val caseInsensitiveTests = describe "case insensitive regex"
|
|
[ test "recognises word 'hello' in string 'Hello world'" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val regexString = "hello"
|
|
val dfa = CiDfa.fromString regexString
|
|
val inputString = "Hello world"
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, inputString)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(0, 4)]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
, test "recognises word 'world' in string 'HELLO WORLD'" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val regexString = "world"
|
|
val dfa = CiDfa.fromString regexString
|
|
val inputString = "HELLO WORLD"
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, inputString)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(6, 10)]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
]
|
|
|
|
val caseSensitiveTests = describe "case sensitive regex"
|
|
[ test "does not recognise word 'hello' in string 'Hello world'" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val regexString = "hello"
|
|
val dfa = CsDfa.fromString regexString
|
|
val inputString = "Hello world"
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, inputString)
|
|
|
|
(* assert *)
|
|
val expectedMatches = []
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
, test "recognises word 'Hello' in string 'Hello world'" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val regexString = "Hello"
|
|
val dfa = CsDfa.fromString regexString
|
|
val inputString = "Hello world"
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, inputString)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(0, 4)]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
, test "does not recognise word 'world' in string 'HELLO WORLD'" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val regexString = "world"
|
|
val dfa = CsDfa.fromString regexString
|
|
val inputString = "HELLO WORLD"
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, inputString)
|
|
|
|
(* assert *)
|
|
val expectedMatches = []
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
, test "recognises word 'WORLD' in string 'HELLO WORLD'" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val regexString = "WORLD"
|
|
val dfa = CsDfa.fromString regexString
|
|
val inputString = "HELLO WORLD"
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, inputString)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(6, 10)]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
]
|
|
|
|
val endMarkerTests = describe "regex endMarker"
|
|
[ test "returns an empty DFA when regexString contains endMarker" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
(* the end marker is #"\^@" *)
|
|
val regexString = "hello \^@ world"
|
|
|
|
(* act *)
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* assert *)
|
|
val actualLength = Vector.length dfa
|
|
val expectedLength = 0
|
|
in
|
|
Expect.isTrue (actualLength = expectedLength)
|
|
end)
|
|
, test "matches a string when regex has question mark at the end" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "favo"
|
|
val regexString = "favou?"
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(0, 3)]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
]
|
|
|
|
fun recogniseEscapeSequence (regexString, inputString) =
|
|
let
|
|
(* arrange *)
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, inputString)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(6, 6)]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end
|
|
|
|
fun doesNotRecogniseUnescaped (regexString, inputString) =
|
|
let
|
|
(* arrange *)
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, inputString)
|
|
|
|
(* assert *)
|
|
val expectedMatches = []
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end
|
|
|
|
val escapeSequenceTests = describe "regex escape sequences"
|
|
[ test "recognises alert" (fn _ =>
|
|
recogniseEscapeSequence ("\\a", "hello \a world"))
|
|
, test "recognises backspace" (fn _ =>
|
|
recogniseEscapeSequence ("\\b", "hello \b world"))
|
|
, test "recognises tab" (fn _ =>
|
|
recogniseEscapeSequence ("\\t", "hello \t world"))
|
|
, test "recognises newline" (fn _ =>
|
|
recogniseEscapeSequence ("\\n", "hello \n world"))
|
|
, test "recognises vertical tab" (fn _ =>
|
|
recogniseEscapeSequence ("\\v", "hello \v world"))
|
|
, test "recognises form feed" (fn _ =>
|
|
recogniseEscapeSequence ("\\f", "hello \f world"))
|
|
, test "recognises carriage return" (fn _ =>
|
|
recogniseEscapeSequence ("\\r", "hello \r world"))
|
|
, test "recognises backslash" (fn _ =>
|
|
recogniseEscapeSequence ("\\\\", "hello \\ world"))
|
|
]
|
|
|
|
val metacharacterEscapeTest = describe "regex metacharacter escape sequences"
|
|
[ test "recognises (" (fn _ =>
|
|
recogniseEscapeSequence ("\\(", "hello ( world"))
|
|
, test "recognises )" (fn _ =>
|
|
recogniseEscapeSequence ("\\)", "hello ) world"))
|
|
, test "recognises [" (fn _ =>
|
|
recogniseEscapeSequence ("\\[", "hello [ world"))
|
|
, test "recognises ]" (fn _ =>
|
|
recogniseEscapeSequence ("\\]", "hello ] world"))
|
|
, test "recognises +" (fn _ =>
|
|
recogniseEscapeSequence ("\\+", "hello + world"))
|
|
, test "recognises |" (fn _ =>
|
|
recogniseEscapeSequence ("\\|", "hello | world"))
|
|
, test "recognises ?" (fn _ =>
|
|
recogniseEscapeSequence ("\\?", "hello ? world"))
|
|
, test "recognises ." (fn _ =>
|
|
recogniseEscapeSequence ("\\.", "hello . world"))
|
|
, test "recognises -" (fn _ =>
|
|
recogniseEscapeSequence ("\\-", "hello - world"))
|
|
|
|
(* checking that unescaped metacharacter is not recognised *)
|
|
, test "does not recognise (" (fn _ =>
|
|
doesNotRecogniseUnescaped ("(", "hello ( world"))
|
|
, test "does not recognise )" (fn _ =>
|
|
doesNotRecogniseUnescaped (")", "hello ) world"))
|
|
, test "does not recognise [" (fn _ =>
|
|
doesNotRecogniseUnescaped ("[", "hello [ world"))
|
|
, test "does not recognise ]" (fn _ =>
|
|
doesNotRecogniseUnescaped ("[", "hello ] world"))
|
|
, test "does not recognise +" (fn _ =>
|
|
doesNotRecogniseUnescaped ("+", "hello + world"))
|
|
, test "does not recognise |" (fn _ =>
|
|
doesNotRecogniseUnescaped ("|", "hello | world"))
|
|
, test "does not recognise ?" (fn _ =>
|
|
doesNotRecogniseUnescaped ("?", "hello ? world"))
|
|
, test "does not recognise -" (fn _ =>
|
|
doesNotRecogniseUnescaped ("-", "hello - world"))
|
|
]
|
|
|
|
(* tests based on regex tutorial by FreeCodeCamp *)
|
|
val freeCodeCampTests = describe "regex freeCodeCamp tests"
|
|
[ test "The dog chased the cat" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "The dog chased the cat"
|
|
val regexString = "the"
|
|
val caseSensitiveDfa = CsDfa.fromString regexString
|
|
val caseInsensitiveDfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val caseSensitiveMatches =
|
|
CsDfa.matchString (caseSensitiveDfa, sentence)
|
|
val caseInsensitiveMatches =
|
|
CiDfa.matchString (caseInsensitiveDfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedCaseSensitive = [(15, 17)]
|
|
val expectedCaseInsensitive = [(0, 2), (15, 17)]
|
|
val expected =
|
|
caseSensitiveMatches = expectedCaseSensitive
|
|
andalso caseInsensitiveMatches = expectedCaseInsensitive
|
|
in
|
|
Expect.isTrue (expected)
|
|
end)
|
|
, test "Somewhere Waldo is hiding in this text." (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "Somewhere Waldo is hiding in this text."
|
|
val regexString = "Waldo"
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(10, 14)]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "James has a pet cat." (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "James has a pet cat."
|
|
val regexString = "dog|cat|bird|fish"
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(16, 18)]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "Ignore Case While Matching" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "freeCodeCamp"
|
|
val regexString = "freecodecamp"
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(0, 11)]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "Extract the word 'coding' from this string" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "Extract the word 'coding' from this string"
|
|
val regexString = "coding"
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(18, 23)]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "Repeat, Repeat, Repeat" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "Repeat, Repeat, Repeat"
|
|
val regexString = "Repeat"
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(0, 5), (8, 13), (16, 21)]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "Twinkle, twinkle, little start" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "Twinkle, twinkle, little start"
|
|
val regexString = "twinkle"
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(0, 6), (9, 15)]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "hu. regex" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val regexString = "hu."
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
val humStr = "To mock a humming bird"
|
|
val hugStr = "Bear hug"
|
|
|
|
(* act *)
|
|
val humMatches = CiDfa.matchString (dfa, humStr)
|
|
val hugMatches = CiDfa.matchString (dfa, hugStr)
|
|
|
|
(* assert *)
|
|
val expectedHumMatches = [(10, 12)]
|
|
val expectedHugMatches = [(5, 7)]
|
|
val isExpected =
|
|
humMatches = expectedHumMatches
|
|
andalso hugMatches = expectedHugMatches
|
|
in
|
|
Expect.isTrue isExpected
|
|
end)
|
|
, test "Let's have fun with regular expressions!" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "Let's have fun with regular expressions!"
|
|
val regexString = ".un"
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(11, 13)]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
, test "Beware of bugs in the above code" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence =
|
|
"Beware of bugs in the above code;\
|
|
\I have only proved it correct, not tried it."
|
|
val regexString = "[aeiou]"
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches =
|
|
[ (1, 1)
|
|
, (3, 3)
|
|
, (5, 5)
|
|
, (7, 7)
|
|
, (11, 11)
|
|
, (15, 15)
|
|
, (20, 20)
|
|
, (22, 22)
|
|
, (24, 24)
|
|
, (26, 26)
|
|
, (29, 29)
|
|
, (31, 31)
|
|
, (33, 33)
|
|
, (36, 36)
|
|
, (38, 38)
|
|
, (40, 40)
|
|
, (47, 47)
|
|
, (49, 49)
|
|
, (52, 52)
|
|
, (56, 56)
|
|
, (59, 59)
|
|
, (65, 65)
|
|
, (70, 70)
|
|
, (71, 71)
|
|
, (74, 74)
|
|
]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
, test "The quick brown fox jumps over the lazy dog." (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "The quick brown fox jumps over the lazy dog."
|
|
val regexString = "[a-zA-Z]"
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches =
|
|
[ (0, 0)
|
|
, (1, 1)
|
|
, (2, 2)
|
|
, (4, 4)
|
|
, (5, 5)
|
|
, (6, 6)
|
|
, (7, 7)
|
|
, (8, 8)
|
|
, (10, 10)
|
|
, (11, 11)
|
|
, (12, 12)
|
|
, (13, 13)
|
|
, (14, 14)
|
|
, (16, 16)
|
|
, (17, 17)
|
|
, (18, 18)
|
|
, (20, 20)
|
|
, (21, 21)
|
|
, (22, 22)
|
|
, (23, 23)
|
|
, (24, 24)
|
|
, (26, 26)
|
|
, (27, 27)
|
|
, (28, 28)
|
|
, (29, 29)
|
|
, (31, 31)
|
|
, (32, 32)
|
|
, (33, 33)
|
|
, (35, 35)
|
|
, (36, 36)
|
|
, (37, 37)
|
|
, (38, 38)
|
|
, (40, 40)
|
|
, (41, 41)
|
|
, (42, 42)
|
|
]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
, test "Blueberry 3.141592653s are delicious." (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "Blueberry 3.141592653s are delicious."
|
|
val regexString = "[2-6h-s]"
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches =
|
|
[ (1, 1)
|
|
, (6, 6)
|
|
, (7, 7)
|
|
, (10, 10)
|
|
, (13, 13)
|
|
, (15, 15)
|
|
, (17, 17)
|
|
, (18, 18)
|
|
, (19, 19)
|
|
, (20, 20)
|
|
, (21, 21)
|
|
, (24, 24)
|
|
, (29, 29)
|
|
, (30, 30)
|
|
, (32, 32)
|
|
, (33, 33)
|
|
, (35, 35)
|
|
]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "3 blind mice." (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "3 blind mice."
|
|
val regexString = "[^0-9aeiou]"
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches =
|
|
[ (1, 1)
|
|
, (2, 2)
|
|
, (3, 3)
|
|
, (5, 5)
|
|
, (6, 6)
|
|
, (7, 7)
|
|
, (8, 8)
|
|
, (10, 10)
|
|
, (12, 12)
|
|
]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "Mississipi" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "Mississipi"
|
|
val regexString = "s+"
|
|
val dfa = CiDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CiDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(2, 3), (5, 6)]
|
|
in
|
|
Expect.isTrue (expectedMatches = matches)
|
|
end)
|
|
, test "goooal" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val soccerSentence = "goooal"
|
|
val gPhrase = "gut feeling"
|
|
val oPhrase = "over the moon"
|
|
|
|
val goRegex = "go*"
|
|
val dfa = CsDfa.fromString goRegex
|
|
|
|
(* act *)
|
|
val soccerMatches = CsDfa.matchString (dfa, soccerSentence)
|
|
val gPhraseMatches = CsDfa.matchString (dfa, gPhrase)
|
|
val oPhraseMatches = CsDfa.matchString (dfa, oPhrase)
|
|
|
|
(* assert *)
|
|
val expectedSoccerMatches = [(0, 3)]
|
|
val expectedGPhraseMatches = [(0, 0), (10, 10)]
|
|
val expectedOPhraseMatches = []
|
|
|
|
val isExpected =
|
|
soccerMatches = expectedSoccerMatches
|
|
andalso gPhraseMatches = expectedGPhraseMatches
|
|
andalso oPhraseMatches = expectedOPhraseMatches
|
|
in
|
|
Expect.isTrue isExpected
|
|
end)
|
|
, test "chewie quote" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentence = "Aaaaaaargh"
|
|
val regexString = "Aa*"
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matches = CsDfa.matchString (dfa, sentence)
|
|
|
|
(* assert *)
|
|
val expectedMatches = [(0, 6)]
|
|
in
|
|
Expect.isTrue (matches = expectedMatches)
|
|
end)
|
|
, test "favorite" (fn _ =>
|
|
let
|
|
(* arrange *)
|
|
val sentenceWithoutU = "favorite"
|
|
val sentenceWithU = "favourite"
|
|
|
|
val regexString = "favou?rite"
|
|
val dfa = CsDfa.fromString regexString
|
|
|
|
(* act *)
|
|
val matchesWithoutU = CsDfa.matchString (dfa, sentenceWithoutU)
|
|
val matchesWithU = CsDfa.matchString (dfa, sentenceWithU)
|
|
|
|
(* assert *)
|
|
val expectedMatchesWithoutU = [(0, 7)]
|
|
val expectedMatchesWithU = [(0, 8)]
|
|
|
|
val isExpected =
|
|
matchesWithoutU = expectedMatchesWithoutU
|
|
andalso matchesWithU = expectedMatchesWithU
|
|
in
|
|
Expect.isTrue isExpected
|
|
end)
|
|
]
|
|
|
|
val tests =
|
|
[ caseInsensitiveTests
|
|
, caseSensitiveTests
|
|
, endMarkerTests
|
|
, escapeSequenceTests
|
|
, metacharacterEscapeTest
|
|
, freeCodeCampTests
|
|
]
|
|
end
|