Blog of Dimiter "malkia" Stanev mainly for Common Lisp.

Sunday, August 19, 2007

Rob Pike's regular expression match routine in Common Lisp

;;; From the "Beautiful Code" book.
;;; Chapter 1 - "A Regular Expression Matcher", Brian Kernighan
;;; (also in chapter 9 of the "Practice of Programming" book)
;;;
;;; Original C version by Rob Pike
;;; Common Lisp version by Dimiter "malkia" Stanev
;;;
;;; /* match: search for regexp anywhere in text */
;;; int match(char *regexp, char *text)
;;; {
;;; if (regexp[0] == '^')
;;; return matchhere(regexp+1, text);
;;; do { /* must look even if string is empty */
;;; if (matchhere(regexp, text))
;;; return 1;
;;; } while (*text++ != '\0');
;;; return 0;
;;; }
;;;
;;; /* matchhere: search for regexp at beginning of text */
;;; int matchhere(char *regexp, char *text)
;;; {
;;; if (regexp[0] == '\0')
;;; return 1;
;;; if (regexp[1] == '*')
;;; return matchstar(regexp[0], regexp+2, text);
;;; if (regexp[0] == '$' && regexp[1] == '\0')
;;; return *text == '\0';
;;; if (*text!='\0' && (regexp[0]=='.' || regexp[0]==*text))
;;; return matchhere(regexp+1, text+1);
;;; return 0;
;;; }
;;;
;;; /* matchstar: search for c*regexp at beginning of text */
;;; int matchstar(int c, char *regexp, char *text)
;;; {
;;; do { /* a * matches zero or more instances */
;;; if (matchhere(regexp, text))
;;; return 1;
;;; } while (*text != '\0' && (*text++ == c || c == '.'));
;;; return 0;
;;; }
;;;

(declaim (optimize (speed 3) (safety 0) (debug 0)
(space 0) (compilation-speed 0)
#+lispworks (hcl:fixnum-safety 0)))

(declaim (ftype (function (simple-base-string simple-base-string)
boolean) match))

(defun match (regexp text)
(declare (type simple-base-string regexp text))
(let ((text-length (length text))
(regexp-length (length regexp)))
(declare (type fixnum text-length regexp-length))
(labels ((match-star (ro to)
(declare (type fixnum ro to))
(let ((c (char regexp (- ro 2))))
(declare (type base-char c))
(if (char= c #\.)
(dotimes (o (- text-length to))
(declare (type fixnum o))
(when (match-here ro (+ o to))
(return-from match-star t)))
(dotimes (o (- text-length to))
(declare (type fixnum o))
(when (match-here ro (+ o to))
(return-from match-star t))
(when (char/= (char text (+ o to)) c)
(return-from match-star nil)))))
nil)
(match-here (ro to)
(declare (type fixnum ro to))
(cond ((= ro regexp-length)
t)
((and (/= regexp-length (1+ ro))
(char= (char regexp (1+ ro)) #\*))
(match-star (+ ro 2) to))
((and (= regexp-length (1+ ro))
(char= (char regexp ro) #\$))
(= text-length to))
(t (when (and (/= text-length to)
(or (char= (char regexp ro) #\.)
(char= (char regexp ro)
(char text to))))
(match-here (1+ ro) (1+ to)))))))
(cond ((= regexp-length 0)
(= text-length 0))
((char= (char regexp 0) #\^)
(match-here 1 0))
(t (dotimes (o text-length)
(declare (type fixnum o))
(when (match-here 0 o)
(return-from match t))))))))

2 comments:

Anonymous said...

Thanks for writing this.

Slobodan Blazeski said...

Try using pre tag its very hard to read the unformatted code and/or you could add link to http://paste.lisp.org/