parser.scm (11125B) [raw]
1 ;; parser.scm -- parser routines 2 (import (chicken format)) 3 4 (define parser-sync #f) 5 6 7 ;; EXPRESSIONS 8 (define-record binary left operator right) 9 (define-record grouping expression) 10 (define-record literal value) 11 (define-record unary operator right) 12 (define-record variable name) 13 (define-record assignment name value) 14 (define-record logical left operator right) 15 16 (set-record-printer! binary 17 (lambda (x out) (fprintf out "(~A ~S ~S)" 18 (token-lexeme (binary-operator x)) 19 (binary-left x) 20 (binary-right x)))) 21 22 (set-record-printer! grouping 23 (lambda (x out) (fprintf out "(group ~S)" (grouping-expression x)))) 24 25 (set-record-printer! literal 26 (lambda (x out) (fprintf out "~S" (literal-value x)))) 27 28 (set-record-printer! unary 29 (lambda (x out) (fprintf out "(~A ~S)" 30 (token-lexeme (unary-operator x)) 31 (unary-right x)))) 32 33 (set-record-printer! variable 34 (lambda (x out) (fprintf out "~A" (token-lexeme (variable-name x))))) 35 36 (set-record-printer! assignment 37 (lambda (x out) (fprintf out "(set! ~A ~A)" (token-lexeme (assignment-name x)) (assignment-value x)))) 38 39 (set-record-printer! logical 40 (lambda (x out) (fprintf out "(~A ~A ~A)" 41 (token-lexeme (logical-operator x)) 42 (logical-left x) 43 (logical-right x)))) 44 45 46 ;; STATEMENTS 47 (define-record print-stmt value) 48 (define-record expr-stmt value) 49 (define-record var-stmt name init) 50 (define-record block stmts) 51 (define-record if-stmt cond-expr then-stmt else-stmt) 52 (define-record while-stmt cond-expr body-stmt) 53 54 (set-record-printer! print-stmt 55 (lambda (x out) 56 (fprintf out "(print ~A)" (print-stmt-value x)))) 57 58 (set-record-printer! expr-stmt 59 (lambda (x out) 60 (fprintf out "(expr ~A)" (expr-stmt-value x)))) 61 62 (set-record-printer! var-stmt 63 (lambda (x out) 64 (fprintf out "(var ~A ~A)" (var-stmt-name x) (var-stmt-init x)))) 65 66 (set-record-printer! block 67 (lambda (x out) 68 (fprintf out "(block ~A)" (block-stmts x)))) 69 70 (set-record-printer! if-stmt 71 (lambda (x out) 72 (fprintf out "(if ~A ~A ~A)" 73 (if-stmt-cond-expr x) 74 (if-stmt-then-stmt x) 75 (if-stmt-else-stmt x)))) 76 77 (set-record-printer! while-stmt 78 (lambda (x out) 79 (fprintf out "(while ~A ~A)" (while-stmt-cond-expr x) (while-stmt-body-stmt x)))) 80 81 ;; helper to check if first is of types 82 (define (top-type? tokens types) 83 (if (symbol? types) 84 (eq? (token-type (car tokens)) types) 85 (memq (token-type (car tokens)) types))) 86 87 88 (define (assert-type! toks types msg) 89 (if (not (top-type? toks types)) 90 (parse-err! toks msg))) 91 92 93 (define (parse-declaration tokens) 94 (if (top-type? tokens 'VAR) 95 (parse-var-decl (cdr tokens)) 96 (parse-statement tokens))) 97 98 (define (parse-var-decl tokens) 99 (assert-type! tokens 'IDENTIFIER "expected variable name") 100 (let-values (((init toks) 101 (if (top-type? (cdr tokens) 'EQUAL) 102 (parse-expression '() (cddr tokens)) 103 (values '() (cdr tokens))))) 104 (assert-type! toks 'SEMICOLON "Expected ';' after variable declaration") 105 (values (make-var-stmt (car tokens) init) (cdr toks)))) 106 107 (define (parse-statement tokens) 108 (cond ((top-type? tokens 'PRINT) 109 (parse-print-statement (cdr tokens))) 110 ((top-type? tokens 'FOR) 111 (parse-for-statement (cdr tokens))) 112 ((top-type? tokens 'IF) 113 (parse-if-statement (cdr tokens))) 114 ((top-type? tokens 'WHILE) 115 (parse-while-statement (cdr tokens))) 116 ((top-type? tokens 'LEFT_BRACE) 117 (let-values (((stmts toks) (parse-block (cdr tokens)))) 118 ;; TODO: return the block record instead of stmts? Not the 119 ;; way the book does it but seems cleaner :thinking: 120 (values (make-block stmts) toks))) 121 (else (parse-expression-statement tokens)))) 122 123 ;; Used for print and expr statements, which have the same formula 124 (define (parse-generic-stmt tokens maker) 125 (let-values (((expr toks) (parse-expression '() tokens))) 126 (if (top-type? toks 'SEMICOLON) 127 (values (maker expr) (cdr toks)) 128 (if in-repl 129 (values (maker expr) toks) 130 ;; TODO: this might break for-loop parsing in the repl? 131 (parse-err! toks "expected ;"))))) 132 133 (define (parse-print-statement tokens) 134 (parse-generic-stmt tokens make-print-stmt)) 135 136 (define (parse-expression-statement tokens) 137 (parse-generic-stmt tokens make-expr-stmt)) 138 139 (define (parse-while-statement tokens) 140 (assert-type! tokens 'LEFT_PAREN "Expected '(' after 'while'") 141 (let-values (((cond-expr toks) (parse-expression '() (cdr tokens)))) 142 (assert-type! toks 'RIGHT_PAREN "Expected ')' after while condition") 143 (let-values (((body-stmt toks2) (parse-statement (cdr toks)))) 144 (values (make-while-stmt cond-expr body-stmt) toks2)))) 145 146 (define (parse-for-statement tokens) 147 (define (extract-init toks) 148 (cond ((top-type? toks 'SEMICOLON) 149 (values '() (cdr toks))) 150 ((top-type? toks 'VAR) 151 (parse-var-decl (cdr toks))) 152 (else (parse-expression-statement toks)))) 153 (define (extract-cond toks) 154 (cond ((top-type? toks 'SEMICOLON) 155 (values '() (cdr toks))) 156 (else (parse-expression '() toks)))) 157 (define (extract-incr toks) 158 (assert-type! toks 'SEMICOLON "Expected ';' after loop condition") 159 (cond ((top-type? (cdr toks) 'RIGHT_PAREN) 160 (values '() (cddr toks))) 161 (else (parse-expression '() (cdr toks))))) 162 (define (extract-body toks) 163 (assert-type! toks 'RIGHT_PAREN "Expected ')' after for clauses") 164 (parse-statement (cdr toks))) 165 (define (body-append-incr body incr) 166 (if (null? incr) 167 body 168 (make-block (list body (make-expr-stmt incr))))) 169 (define (body-to-while body conde) 170 (if (null? conde) 171 (make-while-stmt (make-literal #t) body) 172 (make-while-stmt conde body))) 173 (define (while-add-init while init) 174 (if (null? init) 175 while 176 (make-block (list init while)))) 177 (assert-type! tokens 'LEFT_PAREN "Expected '(' after 'for'") 178 (let*-values (((init t1) (extract-init (cdr tokens))) 179 ((conde t2) (extract-cond t1)) 180 ((incr t3) (extract-incr t2)) 181 ((body t4) (extract-body t3))) 182 (values (while-add-init (body-to-while (body-append-incr body incr) conde) init) 183 t4))) 184 185 186 (define (parse-block tokens) 187 (let loop ((stmts '()) (toks tokens)) 188 (if (top-type? toks 'RIGHT_BRACE) 189 (values stmts (cdr toks)) 190 (if (top-type? toks 'EOF) 191 (parse-err! toks "expected '}' after block") 192 (let-values (((decl rest) (parse-declaration toks))) 193 ;; TODO: can we do this with cons instead of append? 194 ;; I don't think so, given that we'd need to (cons decl (loop ...)) 195 ;; but (loop) returns multiple values (sigh) 196 (loop (append stmts (list decl)) rest)))))) 197 198 (define (parse-if-statement tokens) 199 (assert-type! tokens 'LEFT_PAREN "Expected '(' after 'if'") 200 (let-values (((cond-expr toks) (parse-expression '() (cdr tokens)))) 201 (assert-type! toks 'RIGHT_PAREN "Expected ')' after if condition") 202 (let-values (((then-stmt toks2) (parse-statement (cdr toks)))) 203 (if (top-type? toks2 'ELSE) 204 (let-values (((else-stmt toks3) (parse-statement (cdr toks2)))) 205 (values (make-if-stmt cond-expr then-stmt else-stmt) toks3)) 206 (values (make-if-stmt cond-expr then-stmt '()) toks2))))) 207 208 (define (parse-assignment expr toks) 209 (let-values (((e2 t2) (parse-or expr toks))) 210 (if (top-type? t2 'EQUAL) 211 (let-values (((e3 t3) (parse-assignment e2 (cdr t2)))) 212 (if (variable? e2) 213 (values (make-assignment (variable-name e2) e3) t3) 214 (begin (err! "Invalid parse-assignment target") (values e2 t3)))) 215 (values e2 t2)))) 216 217 (define (parse-expression expr toks) 218 (parse-assignment expr toks)) 219 220 ;; Most of the binary operators have the same pattern: 221 ;; 1. Evaluate the left side of the expression 222 ;; 2. While the top is the operator, keep evaluating / building up the expression 223 ;; 3. Return once the operator isn't matched 224 ;; This function does it all, with a generic 'lower' to evaluate if 'types' matched 225 (define (parse-generic maker expr tokens lower types) 226 (let-values (((e2 t2) (lower expr tokens))) 227 (let loop ((e e2) (ts t2)) 228 (if (top-type? ts types) 229 ;; top of ts is an operator, eval right side on rest 230 (let-values (((e3 t3) (lower e (cdr ts)))) 231 (loop (maker e (car ts) e3) t3)) 232 (values e ts))))) 233 234 (define (parse-generic-binary expr tokens lower types) 235 (parse-generic make-binary expr tokens lower types)) 236 237 (define (parse-equality expr toks) 238 (parse-generic-binary expr toks parse-comparison '(BANG_EQUAL EQUAL_EQUAL))) 239 240 (define (parse-comparison expr toks) 241 (parse-generic-binary expr toks parse-term '(GREATER GREATER_EQUAL LESS LESS_EQUAL))) 242 243 (define (parse-term expr toks) 244 (parse-generic-binary expr toks parse-factor '(MINUS PLUS))) 245 246 (define (parse-factor expr toks) 247 (parse-generic-binary expr toks parse-unary '(SLASH STAR))) 248 249 (define (parse-generic-logical expr tokens lower types) 250 (parse-generic make-logical expr tokens lower types)) 251 252 (define (parse-or expr toks) 253 (parse-generic-logical expr toks parse-and '(OR))) 254 255 (define (parse-and expr toks) 256 (parse-generic-logical expr toks parse-equality '(AND))) 257 258 (define (parse-unary expr toks) 259 (if (top-type? toks '(BANG MINUS)) 260 (let-values (((e2 t2) (parse-unary expr (cdr toks)))) 261 (values (make-unary (car toks) e2) t2)) 262 (parse-primary expr toks))) 263 264 (define (parse-primary expr toks) 265 (let ((top (car toks)) (rest (cdr toks))) 266 (cond 267 ((top-type? toks 'FALSE) (values (make-literal #f) rest)) 268 ((top-type? toks 'TRUE) (values (make-literal #t) rest)) 269 ((top-type? toks 'NIL) (values (make-literal ') rest)) 270 ((top-type? toks '(NUMBER STRING)) 271 (values (make-literal (token-literal top)) rest)) 272 ((top-type? toks 'IDENTIFIER) (values (make-variable top) rest)) 273 ((top-type? toks 'LEFT_PAREN) 274 (let-values (((e2 t2) (parse-expression expr rest))) 275 (assert-type! t2 'RIGHT_PAREN "Expected ')'") 276 (values (make-grouping e2) (cdr t2)))) 277 (else (parse-err! toks "Unknown token"))))) 278 279 (define (parse-err! toks msg) 280 (let ((top (car toks))) 281 (if (top-type? toks 'EOF) 282 (fname-err! (format "~A:~A ~A" (token-line top) "Error at end." msg)) 283 (fname-err! (format "~A:~A ~A. ~A" 284 (token-line top) 285 "Error at" 286 (token-lexeme top) 287 msg))) 288 (let ((t2 (synchronize (cdr toks)))) 289 (parser-sync t2)))) 290 291 ;; Given a list of tokens, returns the next statement (best guess based 292 ;; on keyword being a statement keyword OR seeing a semicolon) 293 (define (synchronize tokens) 294 (cond 295 ((null? tokens) '()) 296 ((top-type? tokens 'SEMICOLON) (cdr tokens)) 297 ((top-type? tokens '(CLASS FUN VAR FOR IF WHILE PRINT RETURN)) tokens) 298 (else (synchronize (cdr tokens))))) 299 300 (define (parse tokens) 301 ;; Loop through declarations, starting with tokens BUT using call/cc 302 ;; to bookmark the loop so we can synchronize on parse-err! 303 (let loop ((toks (call/cc (lambda (cc) (set! parser-sync cc) tokens)))) 304 (if (and (not (null? toks)) (not (top-type? toks 'EOF))) 305 (let-values (((expr rest) (parse-declaration toks))) 306 (cons expr (loop rest))) 307 '())))