Changeset 19 for trunk/lex.lisp
- Timestamp:
- 02/27/2008 05:56:24 AM (18 years ago)
- File:
-
- 1 edited
-
trunk/lex.lisp (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/lex.lisp
r18 r19 18 18 (defpackage #:dso-lex 19 19 (:documentation "Allows the definition of lexers. See DEFLEXER.") 20 (:use #:cl #:cl-ppcre )21 (:export #:deflexer #:make-lexer ))20 (:use #:cl #:cl-ppcre #:dso-util) 21 (:export #:deflexer #:make-lexer #:lex-all)) 22 22 23 23 (in-package #:dso-lex) 24 24 25 25 26 27 ;;; regex manipulation 28 29 (defun anchor-and-mode (regex) 30 `(:sequence (:flags :single-line-mode-p) :start-anchor ,regex)) 31 32 (defun wrap (regex) (anchor-and-mode `(:regex ,regex))) 26 33 27 34 (defun combine (regex-list) … … 30 37 regex-list))) 31 38 (when (rest mapped) (setq mapped `((:alternation ,@mapped)))) 32 `(:sequence (:flags :single-line-mode-p) :start-anchor ,@mapped))) 39 (anchor-and-mode (car mapped)))) 40 41 42 43 ;;; creating lexing forms 44 45 (defun break-defs (defs) 46 (let (regexs classes filters) 47 (dolist (d (reverse defs) (values regexs classes filters)) 48 (destructuring-bind (regex class &optional filter) d 49 (push regex regexs) 50 (push class classes) 51 (push filter filters))))) 52 53 (defun greedy-lexer-form (input-var start-var defs) 54 (multiple-value-bind (regexs classes filters) (break-defs defs) 55 (setf regexs (mapcar 'wrap regexs)) 56 `(let ((classes ,(coerce classes 'vector)) 57 (filters ,(coerce filters 'vector)) 58 max 59 at) 60 ,@(mapcar 61 (lambda (i) 62 `(let ((end (nth-value 1 (scan ',(nth i regexs) ,input-var :start ,start-var)))) 63 (format t "scanner ~A ended at ~A~%" ,i end) 64 (when (and end (or (null at) (> end max))) 65 (setf max end 66 at ,i)))) 67 (range (length regexs))) 68 (when at 69 (let ((image (make-array (- max ,start-var) 70 :element-type 'character 71 :displaced-to ,input-var 72 :displaced-index-offset ,start-var)) 73 (filter (aref filters at))) 74 (values (aref classes at) 75 (if filter (funcall filter image) image) 76 max)))))) 33 77 34 78 (defun lexer-form (input-var start-var defs) … … 52 96 53 97 54 (defun make-lexer (defs) 98 ;;; creating lexing functions 99 100 (defun make-lexer (defs &key priority-only) 55 101 "Returns a lexer function. The DEFS consists of token-class 56 102 definitions, each being a list of a regular expression, the name of 57 103 the class, and an optional filter. The returned function takes as 58 arguments an input sequence and an optional start position. 104 arguments an input sequence and an optional start position, and 105 returning the matched token-class, image, and image-length as values. 59 106 60 Currently, matching is done using *only* priority (first match wins), 61 and does not look for the longest match. 107 Unless PRIORITY-ONLY is true, the longest match will win, and 108 rule-priority will only be used to break ties. Otherwise, the first 109 match wins. 62 110 63 111 Example: 64 112 65 (let ((lexer (make-lexer '((\"[0-9]+\" number parse-integer)66 (\"[a-zA-Z]\" letter)))))67 (funcall lexer \"2pi\" 1))"113 (let ((lexer (make-lexer '((\"[0-9]+\" number parse-integer) 114 (\"[a-zA-Z]\" letter))))) 115 (funcall lexer \"2pi\" 1))" 68 116 (eval `(lambda (input &optional (start 0)) 69 ,(lexer-form 'input 'start defs)))) 117 ,(if priority-only 118 (lexer-form 'input 'start defs) 119 (greedy-lexer-form 'input 'start defs))))) 70 120 71 (defmacro deflexer (name &body defs) 72 "Defines a lexer, called as a function of the given NAME. The body 121 (defmacro deflexer (name (&key priority-only) &body defs) 122 "Defines a lexer, called as a function of the given NAME, and returning 123 the matched token-class, image, and image-length as values. The body 73 124 consists of token-class definitions, each being a list of a regular 74 125 expression, the name of the class, and an optional filter. 75 126 76 Currently, matching is done using *only* priority (first match wins), 77 and does not look for the longest match. 127 Unless PRIORITY-ONLY is true, the longest match will win, and 128 rule-priority will only be used to break ties. Otherwise, the first 129 match wins. 78 130 79 131 Example: 80 132 81 (deflexer lexer 82 (\"[0-9]+\" number parse-integer)83 (\"[a-zA-Z]\" letter))133 (deflexer lexer () 134 (\"[0-9]+\" number parse-integer) 135 (\"[a-zA-Z]\" letter)) 84 136 85 (lexer \"2pi\" 1)"137 (lexer \"2pi\" 1)" 86 138 `(defun ,name (input &optional (start 0)) 87 ,(lexer-form 'input 'start defs))) 139 ,(if priority-only 140 (lexer-form 'input 'start defs) 141 (greedy-lexer-form 'input 'start defs)))) 142 143 (defun lex-all (lexer input) 144 (labels ((scan (start tokens) 145 (if (> (length input) start) 146 (multiple-value-bind (class image remainder) 147 (funcall lexer input start) 148 (when class 149 (scan remainder (cons (cons class image) tokens)))) 150 (nreverse tokens)))) 151 (scan 0 '())))
Note: See TracChangeset
for help on using the changeset viewer.
