fisl

fisl is scheme lox
git clone git://git.alexkarle.com.com/fisl
Log | Files | Refs | README | LICENSE

parser.scm (11125B) [raw]


      1 ;; parser.scm -- parser routines
      2 (import (chicken format))
      3 
      4 (define parser-sync #f)
      5 
      6 
      7 ;; EXPRESSIONS
      8 (define-record binary left operator right)
      9 (define-record grouping expression)
     10 (define-record literal value)
     11 (define-record unary operator right)
     12 (define-record variable name)
     13 (define-record assignment name value)
     14 (define-record logical left operator right)
     15 
     16 (set-record-printer! binary
     17   (lambda (x out) (fprintf out "(~A ~S ~S)"
     18                            (token-lexeme (binary-operator x))
     19                            (binary-left x)
     20                            (binary-right x))))
     21 
     22 (set-record-printer! grouping
     23   (lambda (x out) (fprintf out "(group ~S)" (grouping-expression x))))
     24 
     25 (set-record-printer! literal
     26   (lambda (x out) (fprintf out "~S" (literal-value x))))
     27 
     28 (set-record-printer! unary
     29   (lambda (x out) (fprintf out "(~A ~S)"
     30                            (token-lexeme (unary-operator x))
     31                            (unary-right x))))
     32 
     33 (set-record-printer! variable
     34   (lambda (x out) (fprintf out "~A" (token-lexeme (variable-name x)))))
     35 
     36 (set-record-printer! assignment
     37   (lambda (x out) (fprintf out "(set! ~A ~A)" (token-lexeme (assignment-name x)) (assignment-value x))))
     38 
     39 (set-record-printer! logical
     40   (lambda (x out) (fprintf out "(~A ~A ~A)"
     41 		    (token-lexeme (logical-operator x))
     42 		    (logical-left x)
     43 		    (logical-right x))))
     44 
     45 
     46 ;; STATEMENTS
     47 (define-record print-stmt value)
     48 (define-record expr-stmt value)
     49 (define-record var-stmt name init)
     50 (define-record block stmts)
     51 (define-record if-stmt cond-expr then-stmt else-stmt)
     52 (define-record while-stmt cond-expr body-stmt)
     53 
     54 (set-record-printer! print-stmt
     55   (lambda (x out)
     56     (fprintf out "(print ~A)" (print-stmt-value x))))
     57 
     58 (set-record-printer! expr-stmt
     59   (lambda (x out)
     60     (fprintf out "(expr ~A)" (expr-stmt-value x))))
     61 
     62 (set-record-printer! var-stmt
     63   (lambda (x out)
     64     (fprintf out "(var ~A ~A)" (var-stmt-name x) (var-stmt-init x))))
     65 
     66 (set-record-printer! block
     67   (lambda (x out)
     68     (fprintf out "(block ~A)" (block-stmts x))))
     69 
     70 (set-record-printer! if-stmt
     71   (lambda (x out)
     72     (fprintf out "(if ~A ~A ~A)"
     73       (if-stmt-cond-expr x)
     74       (if-stmt-then-stmt x)
     75       (if-stmt-else-stmt x))))
     76 
     77 (set-record-printer! while-stmt
     78   (lambda (x out)
     79     (fprintf out "(while ~A ~A)" (while-stmt-cond-expr x) (while-stmt-body-stmt x))))
     80 
     81 ;; helper to check if first is of types
     82 (define (top-type? tokens types)
     83   (if (symbol? types)
     84       (eq? (token-type (car tokens)) types)
     85       (memq (token-type (car tokens)) types)))
     86 
     87 
     88 (define (assert-type! toks types msg)
     89   (if (not (top-type? toks types))
     90       (parse-err! toks msg)))
     91 
     92 
     93 (define (parse-declaration tokens)
     94   (if (top-type? tokens 'VAR)
     95       (parse-var-decl (cdr tokens))
     96       (parse-statement tokens)))
     97 
     98 (define (parse-var-decl tokens)
     99   (assert-type! tokens 'IDENTIFIER "expected variable name")
    100   (let-values (((init toks)
    101 		(if (top-type? (cdr tokens) 'EQUAL)
    102 		    (parse-expression '() (cddr tokens))
    103 		    (values '() (cdr tokens)))))
    104     (assert-type! toks 'SEMICOLON "Expected ';' after variable declaration")
    105     (values (make-var-stmt (car tokens) init) (cdr toks))))
    106 
    107 (define (parse-statement tokens)
    108   (cond ((top-type? tokens 'PRINT)
    109 	 (parse-print-statement (cdr tokens)))
    110 	((top-type? tokens 'FOR)
    111 	 (parse-for-statement (cdr tokens)))
    112 	((top-type? tokens 'IF)
    113 	 (parse-if-statement (cdr tokens)))
    114 	((top-type? tokens 'WHILE)
    115 	 (parse-while-statement (cdr tokens)))
    116 	((top-type? tokens 'LEFT_BRACE)
    117 	 (let-values (((stmts toks) (parse-block (cdr tokens))))
    118 	   ;; TODO: return the block record instead of stmts? Not the
    119 	   ;; way the book does it but seems cleaner :thinking:
    120 	   (values (make-block stmts) toks)))
    121 	(else (parse-expression-statement tokens))))
    122 
    123 ;; Used for print and expr statements, which have the same formula
    124 (define (parse-generic-stmt tokens maker)
    125   (let-values (((expr toks) (parse-expression '() tokens)))
    126     (if (top-type? toks 'SEMICOLON)
    127       (values (maker expr) (cdr toks))
    128       (if in-repl
    129         (values (maker expr) toks)
    130 	;; TODO: this might break for-loop parsing in the repl?
    131         (parse-err! toks "expected ;")))))
    132 
    133 (define (parse-print-statement tokens)
    134   (parse-generic-stmt tokens make-print-stmt))
    135 
    136 (define (parse-expression-statement tokens)
    137   (parse-generic-stmt tokens make-expr-stmt))
    138 
    139 (define (parse-while-statement tokens)
    140   (assert-type! tokens 'LEFT_PAREN "Expected '(' after 'while'")
    141   (let-values (((cond-expr toks) (parse-expression '() (cdr tokens))))
    142     (assert-type! toks 'RIGHT_PAREN "Expected ')' after while condition")
    143     (let-values (((body-stmt toks2) (parse-statement (cdr toks))))
    144       (values (make-while-stmt cond-expr body-stmt) toks2))))
    145 
    146 (define (parse-for-statement tokens)
    147   (define (extract-init toks)
    148     (cond ((top-type? toks 'SEMICOLON)
    149 	   (values '() (cdr toks)))
    150 	  ((top-type? toks 'VAR)
    151 	   (parse-var-decl (cdr toks)))
    152 	  (else (parse-expression-statement toks))))
    153   (define (extract-cond toks)
    154     (cond ((top-type? toks 'SEMICOLON)
    155 	   (values '() (cdr toks)))
    156 	  (else (parse-expression '() toks))))
    157   (define (extract-incr toks)
    158     (assert-type! toks 'SEMICOLON "Expected ';' after loop condition")
    159     (cond ((top-type? (cdr toks) 'RIGHT_PAREN)
    160 	   (values '() (cddr toks)))
    161 	  (else (parse-expression '() (cdr toks)))))
    162   (define (extract-body toks)
    163     (assert-type! toks 'RIGHT_PAREN "Expected ')' after for clauses")
    164     (parse-statement (cdr toks)))
    165   (define (body-append-incr body incr)
    166     (if (null? incr)
    167 	body
    168 	(make-block (list body (make-expr-stmt incr)))))
    169   (define (body-to-while body conde)
    170     (if (null? conde)
    171 	(make-while-stmt (make-literal #t) body)
    172 	(make-while-stmt conde body)))
    173   (define (while-add-init while init)
    174     (if (null? init)
    175 	while
    176 	(make-block (list init while))))
    177   (assert-type! tokens 'LEFT_PAREN "Expected '(' after 'for'")
    178   (let*-values (((init t1) (extract-init (cdr tokens)))
    179 		((conde t2) (extract-cond t1))
    180 		((incr t3) (extract-incr t2))
    181 		((body t4) (extract-body t3)))
    182     (values (while-add-init (body-to-while (body-append-incr body incr) conde) init)
    183 	    t4)))
    184 
    185 
    186 (define (parse-block tokens)
    187   (let loop ((stmts '()) (toks tokens))
    188     (if (top-type? toks 'RIGHT_BRACE)
    189 	(values stmts (cdr toks))
    190 	(if (top-type? toks 'EOF)
    191 	    (parse-err! toks "expected '}' after block")
    192 	    (let-values (((decl rest) (parse-declaration toks)))
    193 	      ;; TODO: can we do this with cons instead of append?
    194 	      ;; I don't think so, given that we'd need to (cons decl (loop ...))
    195 	      ;; but (loop) returns multiple values (sigh)
    196 	      (loop (append stmts (list decl)) rest))))))
    197 
    198 (define (parse-if-statement tokens)
    199   (assert-type! tokens 'LEFT_PAREN "Expected '(' after 'if'")
    200   (let-values (((cond-expr toks) (parse-expression '() (cdr tokens))))
    201     (assert-type! toks 'RIGHT_PAREN "Expected ')' after if condition")
    202     (let-values (((then-stmt toks2) (parse-statement (cdr toks))))
    203       (if (top-type? toks2 'ELSE)
    204 	  (let-values (((else-stmt toks3) (parse-statement (cdr toks2))))
    205 	    (values (make-if-stmt cond-expr then-stmt else-stmt) toks3))
    206 	  (values (make-if-stmt cond-expr then-stmt '()) toks2)))))
    207 
    208 (define (parse-assignment expr toks)
    209   (let-values (((e2 t2) (parse-or expr toks)))
    210     (if (top-type? t2 'EQUAL)
    211       (let-values (((e3 t3) (parse-assignment e2 (cdr t2))))
    212         (if (variable? e2)
    213           (values (make-assignment (variable-name e2) e3) t3)
    214           (begin (err! "Invalid parse-assignment target") (values e2 t3))))
    215       (values e2 t2))))
    216 
    217 (define (parse-expression expr toks)
    218   (parse-assignment expr toks))
    219 
    220 ;; Most of the binary operators have the same pattern:
    221 ;;   1. Evaluate the left side of the expression
    222 ;;   2. While the top is the operator, keep evaluating / building up the expression
    223 ;;   3. Return once the operator isn't matched
    224 ;; This function does it all, with a generic 'lower' to evaluate if 'types' matched
    225 (define (parse-generic maker expr tokens lower types)
    226   (let-values (((e2 t2) (lower expr tokens)))
    227     (let loop ((e e2) (ts t2))
    228       (if (top-type? ts types)
    229         ;; top of ts is an operator, eval right side on rest
    230         (let-values (((e3 t3) (lower e (cdr ts))))
    231           (loop (maker e (car ts) e3) t3))
    232         (values e ts)))))
    233 
    234 (define (parse-generic-binary expr tokens lower types)
    235   (parse-generic make-binary expr tokens lower types))
    236 
    237 (define (parse-equality expr toks)
    238   (parse-generic-binary expr toks parse-comparison '(BANG_EQUAL EQUAL_EQUAL)))
    239 
    240 (define (parse-comparison expr toks)
    241   (parse-generic-binary expr toks parse-term '(GREATER GREATER_EQUAL LESS LESS_EQUAL)))
    242 
    243 (define (parse-term expr toks)
    244   (parse-generic-binary expr toks parse-factor '(MINUS PLUS)))
    245 
    246 (define (parse-factor expr toks)
    247   (parse-generic-binary expr toks parse-unary '(SLASH STAR)))
    248 
    249 (define (parse-generic-logical expr tokens lower types)
    250   (parse-generic make-logical expr tokens lower types))
    251 
    252 (define (parse-or expr toks)
    253   (parse-generic-logical expr toks parse-and '(OR)))
    254 
    255 (define (parse-and expr toks)
    256   (parse-generic-logical expr toks parse-equality '(AND)))
    257 
    258 (define (parse-unary expr toks)
    259   (if (top-type? toks '(BANG MINUS))
    260       (let-values (((e2 t2) (parse-unary expr (cdr toks))))
    261         (values (make-unary (car toks) e2) t2))
    262       (parse-primary expr toks)))
    263 
    264 (define (parse-primary expr toks)
    265   (let ((top (car toks)) (rest (cdr toks)))
    266     (cond
    267      ((top-type? toks 'FALSE) (values (make-literal #f) rest))
    268      ((top-type? toks 'TRUE) (values (make-literal #t) rest))
    269      ((top-type? toks 'NIL) (values (make-literal ') rest))
    270      ((top-type? toks '(NUMBER STRING))
    271       (values (make-literal (token-literal top)) rest))
    272      ((top-type? toks 'IDENTIFIER) (values (make-variable top) rest))
    273      ((top-type? toks 'LEFT_PAREN)
    274       (let-values (((e2 t2) (parse-expression expr rest)))
    275 	(assert-type! t2 'RIGHT_PAREN "Expected ')'")
    276 	(values (make-grouping e2) (cdr t2))))
    277      (else (parse-err! toks "Unknown token")))))
    278 
    279 (define (parse-err! toks msg)
    280   (let ((top (car toks)))
    281     (if (top-type? toks 'EOF)
    282       (fname-err! (format "~A:~A ~A" (token-line top) "Error at end." msg))
    283       (fname-err! (format "~A:~A ~A. ~A"
    284                           (token-line top)
    285                           "Error at"
    286                           (token-lexeme top)
    287                           msg)))
    288     (let ((t2 (synchronize (cdr toks))))
    289       (parser-sync t2))))
    290 
    291 ;; Given a list of tokens, returns the next statement (best guess based
    292 ;; on keyword being a statement keyword OR seeing a semicolon)
    293 (define (synchronize tokens)
    294   (cond
    295     ((null? tokens) '())
    296     ((top-type? tokens 'SEMICOLON) (cdr tokens))
    297     ((top-type? tokens '(CLASS FUN VAR FOR IF WHILE PRINT RETURN)) tokens)
    298     (else (synchronize (cdr tokens)))))
    299 
    300 (define (parse tokens)
    301   ;; Loop through declarations, starting with tokens BUT using call/cc
    302   ;; to bookmark the loop so we can synchronize on parse-err!
    303   (let loop ((toks (call/cc (lambda (cc) (set! parser-sync cc) tokens))))
    304     (if (and (not (null? toks)) (not (top-type? toks 'EOF)))
    305       (let-values (((expr rest) (parse-declaration toks)))
    306         (cons expr (loop rest)))
    307       '())))