commit 81c951c6e234c23e0cfbf3d658a9ac9b373c73f3 (patch)
parent b7516a8f8ebf4d6d03266992cbebc62ad12dde0f
Author: Alex Karle <alex@alexkarle.com>
Date: Sun, 2 Oct 2022 02:04:30 -0400
Update scanner to be stateful beyond location (in-comment, etc)
I think this makes sense? It's 2am and the recursion is getting to me...
Diffstat:
M | fisl.scm | | | 2 | +- |
M | scanner.scm | | | 92 | +++++++++++++++++++++++++++++++++++++++++++------------------------------------ |
2 files changed, 51 insertions(+), 43 deletions(-)
diff --git a/fisl.scm b/fisl.scm
@@ -11,7 +11,7 @@
(define (run code fname)
(let ((exit-code 0))
- (print (scan code fname))
+ (map print (scan code fname))
exit-code))
(define (run-prompt)
diff --git a/scanner.scm b/scanner.scm
@@ -15,52 +15,60 @@
(len ,len)))
(define (scan src fname)
- (define (comment i)
- ; parse comment until end, return stopping point
- (let loop ((curr i))
- (if (and (< curr (string-length src))
- (not (eq? #\newline (string-ref src curr))))
- (loop (add1 curr))
- (- curr i))))
-
(define (peek i)
; safe string-ref
(if (< i (string-length src))
(string-ref src i)
- 'nil))
+ #f))
- (define (loop i line)
- (define (tok type len)
- (make-token type (substring src i (+ i len)) 'nil line len))
- (if (< i (string-length src))
- (begin
- (let ((c (string-ref src i)) (n (peek (add1 i))))
- (let ((tok (cond
- ((eq? #\( c) (tok 'LEFT_PAREN 1))
- ((eq? #\) c) (tok 'RIGHT_PAREN 1))
- ((eq? #\{ c) (tok 'LEFT_BRACE 1))
- ((eq? #\} c) (tok 'RIGHT_BRACE 1))
- ((eq? #\, c) (tok 'COMMA 1))
- ((eq? #\. c) (tok 'DOT 1))
- ((eq? #\- c) (tok 'MINUS 1))
- ((eq? #\+ c) (tok 'PLUS 1))
- ((eq? #\; c) (tok 'SEMICOLON 1))
- ((eq? #\* c) (tok 'STAR 1))
- ((eq? #\! c) (if (eq? #\= n) (tok 'BANG_EQUAL 2) (tok 'BANG 1)))
- ((eq? #\= c) (if (eq? #\= n) (tok 'EQUAL_EQUAL 2) (tok 'EQUAL 1)))
- ((eq? #\< c) (if (eq? #\< n) (tok 'LESS_EQUAL 2) (tok 'LESS 1)))
- ((eq? #\> c) (if (eq? #\> n) (tok 'GREATER_EQUAL 2) (tok 'GREATER 1)))
- ((eq? #\/ c) (if (eq? #\/ n) (tok 'COMMENT (comment i)) (tok 'SLASH 1)))
- ((eq? #\space c) #f)
- ((eq? #\tab c) #f)
- ((eq? #\newline c) #f)
- ;; TODO: set/return hadError (keep scanning)
- (else (err (format "~A:~A:unexpected character: ~A" fname line c)) #f))))
- (if tok
- (begin (print tok)
- (loop (+ i (get tok 'len)) line))
- (loop (add1 i) line)))))
- 'EOF))
- (loop 0 1))
+ ; CI stores state as the pointer location, which works great
+ ; in a language that is prepared to update state within the
+ ; branches of the switch statement.
+ ;
+ ; Given that we're using recursion to loop over the characters,
+ ; it is an easier model to conceptualize the state of the
+ ; interpreter being instead what special tokens of arbitrary
+ ; length we might be in (as well as the pointer).
+
+ (define (get-tokens start line)
+ ; Gets all tokens after start
+ (define (tok type i len)
+ ; helper to make a token and recurse
+ (cons (make-token type (substring src i (+ i len)) #f line len)
+ (get-tokens (+ i len) line)))
+
+ (let loop ((i start) (nline line) (in-comment #f) (in-string #f) (in-number #f) (in-identifier #f))
+ (let ((c (peek i)) (n (peek (add1 i))))
+ ;(printf "c: ~A, n: ~A, i: ~A, com: ~A\n" c n i in-comment)
+ (if (not c)
+ (list (make-token 'EOF "" #f line 0))
+ (cond
+ (in-comment (if (eq? #\newline c)
+ (loop (add1 i) (add1 nline) #f #f #f #f)
+ (loop (add1 i) nline #t #f #f #f)))
+ (in-string #f)
+ (in-number #f)
+ (in-identifier #f)
+ (else (cond
+ ((eq? #\( c) (tok 'LEFT_PAREN i 1))
+ ((eq? #\) c) (tok 'RIGHT_PAREN i 1))
+ ((eq? #\{ c) (tok 'LEFT_BRACE i 1))
+ ((eq? #\} c) (tok 'RIGHT_BRACE i 1))
+ ((eq? #\, c) (tok 'COMMA i 1))
+ ((eq? #\. c) (tok 'DOT i 1))
+ ((eq? #\- c) (tok 'MINUS i 1))
+ ((eq? #\+ c) (tok 'PLUS i 1))
+ ((eq? #\; c) (tok 'SEMICOLON i 1))
+ ((eq? #\* c) (tok 'STAR i 1))
+ ((eq? #\! c) (if (eq? #\= n) (tok 'BANG_EQUAL i 2) (tok 'BANG i 1)))
+ ((eq? #\= c) (if (eq? #\= n) (tok 'EQUAL_EQUAL i 2) (tok 'EQUAL i 1)))
+ ((eq? #\< c) (if (eq? #\< n) (tok 'LESS_EQUAL i 2) (tok 'LESS i 1)))
+ ((eq? #\> c) (if (eq? #\> n) (tok 'GREATER_EQUAL i 2) (tok 'GREATER i 1)))
+ ((eq? #\/ c) (if (eq? #\/ n) (loop (add1 i) nline #t #f #f #f) (tok 'SLASH i 1)))
+ ((eq? #\space c) (loop (add1 i) nline #f #f #f #f))
+ ((eq? #\tab c) (loop (add1 i) nline #f #f #f #f))
+ ((eq? #\newline c) (loop (add1 i) (add1 nline) #f #f #f #f))
+ (else (err (format "~A:~A:unexpected character: ~A" fname 0 c)) #f))))))))
+ (get-tokens 0 1))
) ; end of module