http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html http://news.ycombinator.com/item?id=3117590 Here's a direct wart transliteration: def next(s)
(s 1 len.s)
def match(regex text)
if (and !blank?.regex (iso regex.0 "^"))
(matchhere next.regex text)
(or (matchhere regex text)
(if !blank?.text
(match regex next.text)))
def matchhere(regex text)
(if blank?.regex
1
(and (iso regex.0 "$") !next.regex)
!text
blank?.text
nil
(iso regex.1 "*")
(matchstar regex.0 (next next.regex) text)
(or (iso regex.0 ".") (iso regex.0 text.0))
(matchhere next.regex next.text))
def matchstar(c regex text)
or (matchhere regex text)
(if (and !blank?.text (or (iso c text.0) (iso c ".")))
(matchstar c regex next.text))
It's slow because we're doing a string-copy on every call to next. Here's a version that deals in indices to avoid that: def match(regex text)
with (maxr len.regex maxt len.text
startmatch nil matchhere nil matchstar nil)
def! startmatch(r t)
(if (and (~iso r maxr) (iso "^" regex.r))
(matchhere (+ r 1) t)
(or (matchhere r t)
(if (~iso t maxt)
(startmatch r (+ t 1)))))
def! matchhere(r t)
(if (iso r maxr)
1
(and (iso r (- maxr 1)) (iso "$" regex.r))
(iso t maxt)
(iso t maxt)
nil
(iso "*" (regex (+ r 1)))
(matchstar regex.r (+ r 2) t)
(or (iso "." regex.r) (iso regex.r text.t))
(matchhere (+ r 1) (+ t 1)))
def! matchstar(c r t)
(or (matchhere r t)
(if (and (~iso t maxt) (or (iso c text.t) (iso c ".")))
(matchstar c r (+ t 1))))
(startmatch 0 0)
Instead of operating on indices we could simply coerce the strings to lists at the start to make next cheap (ie just cdr): def match(regex text)
coerce! regex list
coerce! text list
if (and regex (iso car.regex "^"))
(matchhere cdr.regex text)
(or (matchhere regex text)
(if !blank?.text
(match regex cdr.text)))
def matchhere(regex text)
(if blank?.regex
1
(and (iso regex.0 "$") !cdr.regex)
!text
blank?.text
nil
(iso regex.1 "*")
(matchstar regex.0 cddr.regex text)
(or (iso regex.0 ".") (iso regex.0 text.0))
(matchhere cdr.regex cdr.text))
def matchstar(c regex text)
or (matchhere regex text)
(if (and !blank?.text (or (iso c text.0) (iso c ".")))
(matchstar c regex cdr.text))
Finally, some unit tests: (test "empty - 1"
:valueof (match "" "")
:should ~be nil)
(test "empty - 2"
:valueof (match "" "a")
:should ~be nil)
(test "empty - 3"
:valueof (match "a" "")
:should be nil)
(test "literal - 1"
:valueof (match "abc" "abc")
:should ~be nil)
(test "literal - 2"
:valueof (match "abc" "def")
:should be nil)
(test "literal - 3"
:valueof (match "abc" "ab")
:should be nil)
(test "literal - 4"
:valueof (match "ab" "abc")
:should ~be nil)
(test "dot - 1"
:valueof (match "a." "abc")
:should ~be nil)
(test "dot - 2"
:valueof (match "a.c" "abc")
:should ~be nil)
(test "dot - 3"
:valueof (match ".c" "abc")
:should ~be nil)
(test "dot - 4"
:valueof (match ".a" "abc")
:should be nil)
(test "star - 1"
:valueof (match "a*" "abc")
:should ~be nil)
(test "star - 2"
:valueof (match "a*b" "abc")
:should ~be nil)
(test "star - 3"
:valueof (match "a*c" "abc")
:should ~be nil)
(test "star - 4"
:valueof (match "a*d" "abc")
:should be nil)
(test "dot star - 1"
:valueof (match "a.*b" "abc")
:should ~be nil)
(test "dot star - 2"
:valueof (match "a.*c" "abc")
:should ~be nil)
(test "dot star - 3"
:valueof (match ".*c" "abc")
:should ~be nil)
(test "caret - 1"
:valueof (match "^a" "abc")
:should ~be nil)
(test "caret - 2"
:valueof (match "^a*c" "abc")
:should be nil)
(test "dollar - 1"
:valueof (match "c$" "abc")
:should ~be nil)
(test "dollar - 2"
:valueof (match "b$" "abc")
:should be nil)
|