fisl

fisl is scheme lox
git clone git://git.alexkarle.com.com/fisl
Log | Files | Refs | README | LICENSE

scanner.scm (4937B) [raw]


      1 ;; scanner.scm -- tokenizes input
      2 (import (chicken format))
      3 
      4 ;; Auto-generates the scaffolding getters and setters
      5 ;;   make-token, token-type, set-token-type!, etc
      6 (define-record token type lexeme literal line)
      7 (set-record-printer! token (lambda (t out)
      8 			     (fprintf out "#,(token type:~S lex:~S lit:~S ln:~S)"
      9 				      (token-type t) (token-lexeme t) (token-literal t) (token-line t))))
     10 
     11 (define (digit? c)
     12   (and c (char<=? #\0 c) (char>=? #\9 c)))
     13 
     14 (define (alpha? c)
     15   (and c
     16        (or
     17         (eq? c #\_)
     18         (and (char<=? #\a c) (char>=? #\z c))
     19         (and (char<=? #\A c) (char>=? #\Z c)))))
     20 
     21 (define (get-keyword k)
     22   (alist-ref k '(("and"   . AND)
     23 		 ("class" . CLASS)
     24 		 ("else"  . ELSE)
     25 		 ("false" . FALSE)
     26 		 ("for"   . FOR)
     27 		 ("fun"   . FUN)
     28 		 ("if"    . IF)
     29 		 ("nil"   . NIL)
     30 		 ("or"    . OR)
     31 		 ("print" . PRINT)
     32 		 ("return". RETURN)
     33 		 ("super" . SUPER)
     34 		 ("this"  . THIS)
     35 		 ("true"  . TRUE)
     36 		 ("var"   . VAR)
     37 		 ("while" . WHILE)) equal?))
     38 
     39 (define (alnum? c)
     40   (and c (or (alpha? c) (digit? c))))
     41 
     42 (define (scan src)
     43   (define (peek i)
     44     ;; safe string-ref
     45     (if (< i (string-length src))
     46         (string-ref src i)
     47         #f))
     48 
     49   (define (get-tokens s i line in)
     50     ;; Gets all tokens after 'start', tracks state in i (current char), line, in
     51     (define (tok-range type s2 i2)
     52       ;; Helper to make a token, cons it to our list, and recurse with fresh state
     53       (let ((text (substring src s2 (add1 i2))))
     54         (let ((tok (cond
     55                     ((eq? type 'STRING) (make-token type text (substring src (add1 s2) i2) line))
     56                     ((eq? type 'NUMBER) (make-token type text (string->number text) line))
     57                     ((eq? type 'IDENTIFIER)
     58                      (let ((k (get-keyword text)))
     59                        (if k
     60                            (make-token k text #f line)
     61                            (make-token 'IDENTIFIER text #f line))))
     62                     (else (make-token type text #f line)))))
     63           (cons tok (get-tokens (add1 i2) (add1 i2) line #f)))))
     64 
     65     (define (tok type)
     66       ;; helper to tokenize current span
     67       (tok-range type s i))
     68 
     69     (define (skip . line2)
     70       ;; Helper to skip this character range
     71       (get-tokens (add1 i) (add1 i) (optional line2 line) in))
     72 
     73     (define (advance . line2)
     74       ;; Helper to iterate; keeps start but increments range
     75       (get-tokens s (add1 i) (optional line2 line) in))
     76 
     77     (let ((c (peek i)) (n (peek (add1 i))))
     78       (if (and (not in) (not c))
     79           (list (make-token 'EOF "" #f line))
     80           (cond
     81            ((eq? in 'comment) (if (or (not c) (eq? #\newline c))
     82                                   (get-tokens (add1 i) (add1 i) (add1 line) #f)
     83                                   (advance)))
     84            ((eq? in 'string)
     85             (cond
     86              ((not c) (fname-err! (format "~A:unterminated string" line)))
     87              ((eq? #\" c) (tok 'STRING))
     88              ((eq? #\newline c) (advance (add1 line)))
     89              (else (advance))))
     90            ((eq? in 'number)
     91             (cond
     92              ((digit? c) (advance))
     93              ((eq? #\. c) (get-tokens s (add1 i) line 'decimal))
     94              (else (tok-range 'NUMBER s (sub1 i)))))
     95            ((eq? in 'decimal)
     96             (cond
     97              ((digit? c) (advance))
     98              (else (tok-range 'NUMBER s (sub1 i)))))
     99            ((eq? in 'alpha)
    100             (cond
    101              ((alnum? c) (advance))
    102              (else (tok-range 'IDENTIFIER s (sub1 i)))))
    103            (else (cond
    104                   ((eq? #\( c) (tok 'LEFT_PAREN))
    105                   ((eq? #\) c) (tok 'RIGHT_PAREN))
    106                   ((eq? #\{ c) (tok 'LEFT_BRACE))
    107                   ((eq? #\} c) (tok 'RIGHT_BRACE))
    108                   ((eq? #\, c) (tok 'COMMA))
    109                   ((eq? #\. c) (tok 'DOT))
    110                   ((eq? #\- c) (tok 'MINUS))
    111                   ((eq? #\+ c) (tok 'PLUS))
    112                   ((eq? #\; c) (tok 'SEMICOLON))
    113                   ((eq? #\* c) (tok 'STAR))
    114                   ((eq? #\! c) (if (eq? #\= n) (tok-range 'BANG_EQUAL s (add1 i)) (tok 'BANG)))
    115                   ((eq? #\= c) (if (eq? #\= n) (tok-range 'EQUAL_EQUAL s (add1 i) ) (tok 'EQUAL)))
    116                   ((eq? #\< c) (if (eq? #\= n) (tok-range 'LESS_EQUAL s (add1 i) ) (tok 'LESS)))
    117                   ((eq? #\> c) (if (eq? #\= n) (tok-range 'GREATER_EQUAL s (add1 i) ) (tok 'GREATER)))
    118                   ((eq? #\/ c) (if (eq? #\/ n) (get-tokens s (add1 i) line 'comment) (tok 'SLASH)))
    119                   ((eq? #\" c) (get-tokens s (add1 i) line 'string))
    120                   ((digit? c) (get-tokens s (add1 i) line 'number))
    121                   ((alpha? c) (get-tokens s (add1 i) line 'alpha))
    122                   ((eq? #\space c) (skip))
    123                   ((eq? #\tab c) (skip))
    124                   ((eq? #\newline c) (skip (add1 line)))
    125                   (else (fname-err! (format "~A:unexpected character: ~A" line c)) (skip))))))))
    126 
    127   (get-tokens 0 0 1 #f))