Index: /trunk/example.lisp
===================================================================
--- /trunk/example.lisp	(revision 18)
+++ /trunk/example.lisp	(revision 19)
@@ -29,5 +29,5 @@
 (defun un-dquote (s) (regex-replace-all "\"\"" (snip s) "\""))
 
-(deflexer scan-csv
+(deflexer scan-csv (:priority-only t)
   ("," comma)
   ("[^\"',]+" value)
@@ -35,13 +35,5 @@
   ("\"(?:[^\"]|\"\")*\"" value un-dquote))
 
-(defun scan-all (input)
-  (labels ((scan (start tokens)
-	     (if (> (length input) start)
-		 (multiple-value-bind (class image remainder)
-		     (scan-csv input start)
-		   (when class
-		     (scan remainder (cons (cons class image) tokens))))
-		 (nreverse tokens))))
-    (scan 0 '())))
 
-(scan-all "no quotes,'a ''quote''',\"another \"\"quote\"\"\"")
+
+(lex-all 'scan-csv "no quotes,'a ''quote''',\"another \"\"quote\"\"\"")
Index: /trunk/lex.lisp
===================================================================
--- /trunk/lex.lisp	(revision 18)
+++ /trunk/lex.lisp	(revision 19)
@@ -18,10 +18,17 @@
 (defpackage #:dso-lex
     (:documentation "Allows the definition of lexers.  See DEFLEXER.")
-  (:use #:cl #:cl-ppcre)
-  (:export #:deflexer #:make-lexer))
+  (:use #:cl #:cl-ppcre #:dso-util)
+  (:export #:deflexer #:make-lexer #:lex-all))
 
 (in-package #:dso-lex)
 
 
+
+;;; regex manipulation
+
+(defun anchor-and-mode (regex)
+  `(:sequence (:flags :single-line-mode-p) :start-anchor ,regex))
+
+(defun wrap (regex) (anchor-and-mode `(:regex ,regex)))
 
 (defun combine (regex-list)
@@ -30,5 +37,42 @@
 		 regex-list)))
     (when (rest mapped) (setq mapped `((:alternation ,@mapped))))
-    `(:sequence (:flags :single-line-mode-p) :start-anchor ,@mapped)))
+    (anchor-and-mode (car mapped))))
+
+
+
+;;; creating lexing forms
+
+(defun break-defs (defs)
+  (let (regexs classes filters)
+    (dolist (d (reverse defs) (values regexs classes filters))
+      (destructuring-bind (regex class &optional filter) d
+        (push regex regexs)
+        (push class classes)
+        (push filter filters)))))
+
+(defun greedy-lexer-form (input-var start-var defs)
+  (multiple-value-bind (regexs classes filters) (break-defs defs)
+    (setf regexs (mapcar 'wrap regexs))
+    `(let ((classes ,(coerce classes 'vector))
+           (filters ,(coerce filters 'vector))
+           max
+           at)
+       ,@(mapcar
+          (lambda (i)
+            `(let ((end (nth-value 1 (scan ',(nth i regexs) ,input-var :start ,start-var))))
+               (format t "scanner ~A ended at ~A~%" ,i end)
+               (when (and end (or (null at) (> end max)))
+                 (setf max end
+                       at ,i))))
+          (range (length regexs)))
+       (when at
+         (let ((image (make-array (- max ,start-var)
+                                  :element-type 'character
+                                  :displaced-to ,input-var
+                                  :displaced-index-offset ,start-var))
+               (filter (aref filters at)))
+           (values (aref classes at)
+                   (if filter (funcall filter image) image)
+                   max))))))
 
 (defun lexer-form (input-var start-var defs)
@@ -52,36 +96,56 @@
 
 
-(defun make-lexer (defs)
+;;; creating lexing functions
+
+(defun make-lexer (defs &key priority-only)
   "Returns a lexer function.  The DEFS consists of token-class
 definitions, each being a list of a regular expression, the name of
 the class, and an optional filter.  The returned function takes as
-arguments an input sequence and an optional start position.
+arguments an input sequence and an optional start position, and
+returning the matched token-class, image, and image-length as values.
 
-Currently, matching is done using *only* priority (first match wins),
-and does not look for the longest match.
+Unless PRIORITY-ONLY is true, the longest match will win, and
+rule-priority will only be used to break ties.  Otherwise, the first
+match wins.
 
 Example:
 
-(let ((lexer (make-lexer '((\"[0-9]+\" number parse-integer)
-                           (\"[a-zA-Z]\" letter)))))
-  (funcall lexer \"2pi\" 1))"
+ (let ((lexer (make-lexer '((\"[0-9]+\" number parse-integer)
+                            (\"[a-zA-Z]\" letter)))))
+   (funcall lexer \"2pi\" 1))"
   (eval `(lambda (input &optional (start 0))
-           ,(lexer-form 'input 'start defs))))
+           ,(if priority-only
+                (lexer-form 'input 'start defs)
+                (greedy-lexer-form 'input 'start defs)))))
 
-(defmacro deflexer (name &body defs)
-  "Defines a lexer, called as a function of the given NAME.  The body
+(defmacro deflexer (name (&key priority-only) &body defs)
+  "Defines a lexer, called as a function of the given NAME, and returning
+the matched token-class, image, and image-length as values.  The body
 consists of token-class definitions, each being a list of a regular
 expression, the name of the class, and an optional filter.
 
-Currently, matching is done using *only* priority (first match wins),
-and does not look for the longest match.
+Unless PRIORITY-ONLY is true, the longest match will win, and
+rule-priority will only be used to break ties.  Otherwise, the first
+match wins.
 
 Example:
 
-(deflexer lexer
-  (\"[0-9]+\" number parse-integer)
-  (\"[a-zA-Z]\" letter))
+ (deflexer lexer ()
+   (\"[0-9]+\" number parse-integer)
+   (\"[a-zA-Z]\" letter))
 
-(lexer \"2pi\" 1)"
+ (lexer \"2pi\" 1)"
   `(defun ,name (input &optional (start 0))
-     ,(lexer-form 'input 'start defs)))
+     ,(if priority-only
+          (lexer-form 'input 'start defs)
+          (greedy-lexer-form 'input 'start defs))))
+
+(defun lex-all (lexer input)
+  (labels ((scan (start tokens)
+             (if (> (length input) start)
+		 (multiple-value-bind (class image remainder)
+		     (funcall lexer input start)
+		   (when class
+		     (scan remainder (cons (cons class image) tokens))))
+		 (nreverse tokens))))
+    (scan 0 '())))
