fisl

fisl is scheme lox
git clone git://git.alexkarle.com.com/fisl
Log | Files | Refs | README

commit 81c951c6e234c23e0cfbf3d658a9ac9b373c73f3 (patch)
parent b7516a8f8ebf4d6d03266992cbebc62ad12dde0f
Author: Alex Karle <alex@alexkarle.com>
Date:   Sun,  2 Oct 2022 02:04:30 -0400

Update scanner to be stateful beyond location (in-comment, etc)

I think this makes sense? It's 2am and the recursion is getting to me...

Diffstat:
Mfisl.scm | 2+-
Mscanner.scm | 92+++++++++++++++++++++++++++++++++++++++++++------------------------------------
2 files changed, 51 insertions(+), 43 deletions(-)

diff --git a/fisl.scm b/fisl.scm @@ -11,7 +11,7 @@ (define (run code fname) (let ((exit-code 0)) - (print (scan code fname)) + (map print (scan code fname)) exit-code)) (define (run-prompt) diff --git a/scanner.scm b/scanner.scm @@ -15,52 +15,60 @@ (len ,len))) (define (scan src fname) - (define (comment i) - ; parse comment until end, return stopping point - (let loop ((curr i)) - (if (and (< curr (string-length src)) - (not (eq? #\newline (string-ref src curr)))) - (loop (add1 curr)) - (- curr i)))) - (define (peek i) ; safe string-ref (if (< i (string-length src)) (string-ref src i) - 'nil)) + #f)) - (define (loop i line) - (define (tok type len) - (make-token type (substring src i (+ i len)) 'nil line len)) - (if (< i (string-length src)) - (begin - (let ((c (string-ref src i)) (n (peek (add1 i)))) - (let ((tok (cond - ((eq? #\( c) (tok 'LEFT_PAREN 1)) - ((eq? #\) c) (tok 'RIGHT_PAREN 1)) - ((eq? #\{ c) (tok 'LEFT_BRACE 1)) - ((eq? #\} c) (tok 'RIGHT_BRACE 1)) - ((eq? #\, c) (tok 'COMMA 1)) - ((eq? #\. c) (tok 'DOT 1)) - ((eq? #\- c) (tok 'MINUS 1)) - ((eq? #\+ c) (tok 'PLUS 1)) - ((eq? #\; c) (tok 'SEMICOLON 1)) - ((eq? #\* c) (tok 'STAR 1)) - ((eq? #\! c) (if (eq? #\= n) (tok 'BANG_EQUAL 2) (tok 'BANG 1))) - ((eq? #\= c) (if (eq? #\= n) (tok 'EQUAL_EQUAL 2) (tok 'EQUAL 1))) - ((eq? #\< c) (if (eq? #\< n) (tok 'LESS_EQUAL 2) (tok 'LESS 1))) - ((eq? #\> c) (if (eq? #\> n) (tok 'GREATER_EQUAL 2) (tok 'GREATER 1))) - ((eq? #\/ c) (if (eq? #\/ n) (tok 'COMMENT (comment i)) (tok 'SLASH 1))) - ((eq? #\space c) #f) - ((eq? #\tab c) #f) - ((eq? #\newline c) #f) - ;; TODO: set/return hadError (keep scanning) - (else (err (format "~A:~A:unexpected character: ~A" fname line c)) #f)))) - (if tok - (begin (print tok) - (loop (+ i (get tok 'len)) line)) - (loop (add1 i) line))))) - 'EOF)) - (loop 0 1)) + ; CI stores state as the pointer location, which works great + ; in a language that is prepared to update state within the + ; branches of the switch statement. + ; + ; Given that we're using recursion to loop over the characters, + ; it is an easier model to conceptualize the state of the + ; interpreter being instead what special tokens of arbitrary + ; length we might be in (as well as the pointer). + + (define (get-tokens start line) + ; Gets all tokens after start + (define (tok type i len) + ; helper to make a token and recurse + (cons (make-token type (substring src i (+ i len)) #f line len) + (get-tokens (+ i len) line))) + + (let loop ((i start) (nline line) (in-comment #f) (in-string #f) (in-number #f) (in-identifier #f)) + (let ((c (peek i)) (n (peek (add1 i)))) + ;(printf "c: ~A, n: ~A, i: ~A, com: ~A\n" c n i in-comment) + (if (not c) + (list (make-token 'EOF "" #f line 0)) + (cond + (in-comment (if (eq? #\newline c) + (loop (add1 i) (add1 nline) #f #f #f #f) + (loop (add1 i) nline #t #f #f #f))) + (in-string #f) + (in-number #f) + (in-identifier #f) + (else (cond + ((eq? #\( c) (tok 'LEFT_PAREN i 1)) + ((eq? #\) c) (tok 'RIGHT_PAREN i 1)) + ((eq? #\{ c) (tok 'LEFT_BRACE i 1)) + ((eq? #\} c) (tok 'RIGHT_BRACE i 1)) + ((eq? #\, c) (tok 'COMMA i 1)) + ((eq? #\. c) (tok 'DOT i 1)) + ((eq? #\- c) (tok 'MINUS i 1)) + ((eq? #\+ c) (tok 'PLUS i 1)) + ((eq? #\; c) (tok 'SEMICOLON i 1)) + ((eq? #\* c) (tok 'STAR i 1)) + ((eq? #\! c) (if (eq? #\= n) (tok 'BANG_EQUAL i 2) (tok 'BANG i 1))) + ((eq? #\= c) (if (eq? #\= n) (tok 'EQUAL_EQUAL i 2) (tok 'EQUAL i 1))) + ((eq? #\< c) (if (eq? #\< n) (tok 'LESS_EQUAL i 2) (tok 'LESS i 1))) + ((eq? #\> c) (if (eq? #\> n) (tok 'GREATER_EQUAL i 2) (tok 'GREATER i 1))) + ((eq? #\/ c) (if (eq? #\/ n) (loop (add1 i) nline #t #f #f #f) (tok 'SLASH i 1))) + ((eq? #\space c) (loop (add1 i) nline #f #f #f #f)) + ((eq? #\tab c) (loop (add1 i) nline #f #f #f #f)) + ((eq? #\newline c) (loop (add1 i) (add1 nline) #f #f #f #f)) + (else (err (format "~A:~A:unexpected character: ~A" fname 0 c)) #f)))))))) + (get-tokens 0 1)) ) ; end of module