From ac9973cc56b3a10e21339a6cc767433abcbf0ac8 Mon Sep 17 00:00:00 2001 From: Daniel Ziltener Date: Sat, 14 Sep 2024 14:53:27 +0200 Subject: [PATCH] Escape sequence interpreter --- README.org | 7 +++--- srfi-180.egg | 2 +- srfi-180.impl.scm | 48 ++++++++++++++++++++++++++++++++++----- srfi-180.org | 53 ++++++++++++++++++++++++++++++++++++------- srfi-180.release-info | 1 + tests/run.scm | 4 ++-- 6 files changed, 95 insertions(+), 20 deletions(-) diff --git a/README.org b/README.org index 571a2a4..3b91bb5 100644 --- a/README.org +++ b/README.org @@ -1,4 +1,4 @@ -# Created 2024-09-14 Sat 00:15 +# Created 2024-09-14 Sat 14:52 #+title: SRFI-180 #+author: Daniel Ziltener #+export_file_name: README.org @@ -182,8 +182,9 @@ Daniel Ziltener ** Version History #+name: version-history -| 1.5 | Reimplementation | -| 1.0 | Reference Implementation | +| 1.5.1 | Escape sequences | +| 1.5.0 | Reimplementation | +| 1.0.0 | Reference Implementation | * License diff --git a/srfi-180.egg b/srfi-180.egg index b121495..6ed63ba 100644 --- a/srfi-180.egg +++ b/srfi-180.egg @@ -3,7 +3,7 @@ (synopsis "A JSON parser and printer that supports JSON bigger than memory.") (category parsing) (license "BSD") - (version "1.5.0") + (version "1.5.1") (dependencies srfi-34 srfi-35 srfi-158) (test-dependencies test) (components diff --git a/srfi-180.impl.scm b/srfi-180.impl.scm index 3d2d863..0034d74 100644 --- a/srfi-180.impl.scm +++ b/srfi-180.impl.scm @@ -103,7 +103,7 @@ (let-values (((token next-char* new-charcount nesting-delta) ((determine-reader-proc next-char) json-number-of-characters next-char input-generator))) - (if (not (eq? '() token)) + (unless (null? token) (yield token)) (loop next-char* (+ (or json-nesting-depth 0) nesting-delta) new-charcount))))))))) @@ -146,6 +146,36 @@ next-char* (+ charcount 1) 0) (read-number (+ charcount 1) next-char* input-proc accu)))) +(define (translate-escape char input-proc) + (case char + ((#\") #\") + ((#\') #\') + ((#\\) #\\) + ((#\n) #\newline) + ((#\t) #\tab) + ((#\u) (read-unicode-escape input-proc)) + ((#\x) (read-hex-escape input-proc)) + ((#\O) #\null) + ((#\r) #\return) + ((#\|) #\|) + ((#\v) #\vtab) + ((#\a) #\alarm) + ((#\b) #\backspace))) + +(define (read-hex-escape input-proc) + (let ((pos1 (input-proc)) + (pos2 (input-proc))) + (integer->char + (string->number (list->string (list pos1 pos2)) 16)))) + +(define (read-unicode-escape input-proc) + (let ((pos1 (input-proc)) + (pos2 (input-proc)) + (pos3 (input-proc)) + (pos4 (input-proc))) + (integer->char + (string->number (list->string (list pos1 pos2 pos3 pos4)) 16)))) + (define (read-string charcount next-char input-proc #!optional (beginning? #t) (accu '()) (esc? #f)) (cond (beginning? @@ -156,10 +186,16 @@ ((and (not esc?) (char=? next-char #\")) (values (reverse-list->string accu) (input-proc) (+ charcount 1) 0)) - (else (read-string (+ charcount 1) - (input-proc) input-proc - #f (cons next-char accu) - (and (not esc?) (char=? next-char #\\)))))) + ((and (not esc?) (char=? next-char #\\)) + (read-string (+ charcount 1) (input-proc) input-proc #f accu #t)) + (else (let ((current-char (if esc? + (translate-escape next-char input-proc) + next-char))) + (read-string (+ charcount 1) + (input-proc) input-proc + #f + (cons current-char accu) + #f))))) (define-record json-foldstate mode cache accumulator) @@ -172,7 +208,7 @@ (cons obj (json-foldstate-accumulator foldstate))) foldstate)) ((%object) (begin - (if (equal? '() (json-foldstate-cache foldstate)) + (if (null? (json-foldstate-cache foldstate)) (begin (json-foldstate-cache-set! foldstate obj)) (begin diff --git a/srfi-180.org b/srfi-180.org index fcdf6c1..1cf7b35 100644 --- a/srfi-180.org +++ b/srfi-180.org @@ -305,7 +305,7 @@ Streaming event-based JSON reader. =PORT-OR-GENERATOR= default value is the valu (let-values (((token next-char* new-charcount nesting-delta) ((determine-reader-proc next-char) json-number-of-characters next-char input-generator))) - (if (not (eq? '() token)) + (unless (null? token) (yield token)) (loop next-char* (+ (or json-nesting-depth 0) nesting-delta) new-charcount))))))))) #+end_src @@ -571,6 +571,36 @@ String reader #+name: string-reader #+begin_src scheme :tangle srfi-180.impl.scm :exports none :results silent + (define (translate-escape char input-proc) + (case char + ((#\") #\") + ((#\') #\') + ((#\\) #\\) + ((#\n) #\newline) + ((#\t) #\tab) + ((#\u) (read-unicode-escape input-proc)) + ((#\x) (read-hex-escape input-proc)) + ((#\O) #\null) + ((#\r) #\return) + ((#\|) #\|) + ((#\v) #\vtab) + ((#\a) #\alarm) + ((#\b) #\backspace))) + + (define (read-hex-escape input-proc) + (let ((pos1 (input-proc)) + (pos2 (input-proc))) + (integer->char + (string->number (list->string (list pos1 pos2)) 16)))) + + (define (read-unicode-escape input-proc) + (let ((pos1 (input-proc)) + (pos2 (input-proc)) + (pos3 (input-proc)) + (pos4 (input-proc))) + (integer->char + (string->number (list->string (list pos1 pos2 pos3 pos4)) 16)))) + (define (read-string charcount next-char input-proc #!optional (beginning? #t) (accu '()) (esc? #f)) (cond (beginning? @@ -581,10 +611,16 @@ String reader ((and (not esc?) (char=? next-char #\")) (values (reverse-list->string accu) (input-proc) (+ charcount 1) 0)) - (else (read-string (+ charcount 1) - (input-proc) input-proc - #f (cons next-char accu) - (and (not esc?) (char=? next-char #\\)))))) + ((and (not esc?) (char=? next-char #\\)) + (read-string (+ charcount 1) (input-proc) input-proc #f accu #t)) + (else (let ((current-char (if esc? + (translate-escape next-char input-proc) + next-char))) + (read-string (+ charcount 1) + (input-proc) input-proc + #f + (cons current-char accu) + #f))))) #+end_src #+name: string-reader-test @@ -593,9 +629,9 @@ String reader <> <> (test-group "String reading" - (let ((input '(#\T #\e #\s #\t #\space #\T #\e #\\ #\s #\\ #\" #\t #\" #\space))) + (let ((input '(#\T #\e #\s #\t #\space #\T #\e #\s #\\ #\" #\t #\" #\space))) (test "String" - '("Test Te\\s\\\"t" #\space 14) + '("Test Tes\"t" #\space 13) (let-values (((val input charcount nesting-delta) (read-string 0 #\" (lambda () (let ((next (car input))) (set! input (cdr input)) @@ -635,7 +671,7 @@ Fundamental JSON iterator. (cons obj (json-foldstate-accumulator foldstate))) foldstate)) ((%object) (begin - (if (equal? '() (json-foldstate-cache foldstate)) + (if (null? (json-foldstate-cache foldstate)) (begin (json-foldstate-cache-set! foldstate obj)) (begin @@ -963,6 +999,7 @@ Daniel Ziltener ** Version History #+name: version-history +| 1.5.1 | Escape sequences | | 1.5.0 | Reimplementation | | 1.0.0 | Reference Implementation | diff --git a/srfi-180.release-info b/srfi-180.release-info index 2de4ead..f7b348e 100644 --- a/srfi-180.release-info +++ b/srfi-180.release-info @@ -1,5 +1,6 @@ ;; -*- Scheme -*- (repo git "https://gitea.lyrion.ch/Chicken/srfi-180.git") (uri targz "https://gitea.lyrion.ch/Chicken/srfi-180/archive/{egg-release}.tar.gz") +(release "1.5.1") ;; Escape sequences (release "1.5.0") ;; Reimplementation (release "1.0.0") ;; Reference Implementation diff --git a/tests/run.scm b/tests/run.scm index b16c597..694d659 100644 --- a/tests/run.scm +++ b/tests/run.scm @@ -79,9 +79,9 @@ (list val input charcount))))) (test-group "String reading" - (let ((input '(#\T #\e #\s #\t #\space #\T #\e #\\ #\s #\\ #\" #\t #\" #\space))) + (let ((input '(#\T #\e #\s #\t #\space #\T #\e #\s #\\ #\" #\t #\" #\space))) (test "String" - '("Test Te\\s\\\"t" #\space 14) + '("Test Tes\"t" #\space 13) (let-values (((val input charcount nesting-delta) (read-string 0 #\" (lambda () (let ((next (car input))) (set! input (cdr input))