X-Git-Url: http://dolda2000.com/gitweb/?p=lisp-utils.git;a=blobdiff_plain;f=charcode.lisp;h=dfe5a19b82e9b783c5c15179b53103209075a388;hp=2a051aedf1910378bdb020bddad84ebdf446cecf;hb=f8eaaa29f361170fd67519525a45cb21dd258691;hpb=fd26d811d11e158fbba41b2a3c71a3a0a61b5df6 diff --git a/charcode.lisp b/charcode.lisp index 2a051ae..dfe5a19 100644 --- a/charcode.lisp +++ b/charcode.lisp @@ -3,9 +3,10 @@ (defpackage :charcode (:use :cl #+sbcl :sb-gray #-sbcl :gray) - (:export "MAKE-ENCODER" "MAKE-DECODER" "ENCODE-STRING" "DECODE-STRING" + (:export "MAKE-ENCODER" "MAKE-DECODER" "ENCODE-STRING" "DECODE-STRING" "SYSTEM-CHARSET" "CODING-ERROR" - "LATIN-1" "LATIN1" "UTF-8" "UTF8")) + "MAKE-CODEC-CHARACTER-STREAM" + "ASCII" "LATIN-1" "LATIN1" "UTF-8" "UTF8")) (in-package :charcode) ;;; General stuff @@ -50,7 +51,11 @@ (defun make-decoder (name) (the decoder-fun (values (funcall (get name 'make-decoder))))) -(defun encode-string (string coding) +(defun system-charset () + ;; XXX: Replace me with something perhaps more sensible. + 'utf-8) + +(defun encode-string (string &optional (coding (system-charset))) (declare (type string string)) (let ((encoder (make-encoder coding)) (buf (make-array (list (length string)) :element-type '(unsigned-byte 8) :adjustable t :fill-pointer 0))) @@ -58,7 +63,7 @@ (coding-error string (length string) buf "Encoding of string in ~A ended prematurely." coding)) buf)) -(defun decode-string (buffer coding) +(defun decode-string (buffer &optional (coding (system-charset))) (declare (type (array (unsigned-byte 8)) buffer)) (let ((decoder (make-decoder coding)) (buf (make-array (list (length buffer)) :element-type 'character :adjustable t :fill-pointer 0))) @@ -75,6 +80,10 @@ (read-pos :initform 0) (buffer :initform (make-array '(64) :element-type 'character :adjustable t :fill-pointer 0)))) +(defun make-codec-character-stream (real-stream &optional (charset (system-charset))) + (declare (type stream real-stream)) + (make-instance 'codec-character-stream :decoder (make-decoder charset) :encoder (make-encoder charset) :back real-stream)) + (defmethod close ((stream codec-character-stream) &key abort) (with-slots (back) stream (close back :abort abort)) @@ -90,7 +99,7 @@ (with-slots (decoder back buffer read-pos) stream (let ((readbuf (make-array (list len) :element-type '(unsigned-byte 8)))) (loop (unless (< (- (length buffer) read-pos) len) (return t)) - (let ((readlen (read-sequence readbuf back))) + (let ((readlen (read-sequence readbuf back :end (- len (- (length buffer) read-pos))))) (when (= readlen 0) (return-from ccs-ensure-buffer nil)) (funcall decoder readbuf buffer :end readlen)))))) @@ -99,8 +108,8 @@ (declare (type codec-character-stream stream)) (with-slots (read-pos buffer) stream (replace buffer buffer :start2 read-pos) - (setf (fill-pointer buffer) (- (fill-pointer buffer) read-pos) - read-pos 0))) + (setf (fill-pointer buffer) (- (fill-pointer buffer) read-pos) + read-pos 0))) (defmethod stream-read-char ((stream codec-character-stream)) (unless (ccs-ensure-buffer stream 1) @@ -118,8 +127,7 @@ (adjust-array buffer (list (setf (fill-pointer buffer) (+ len 16))))) (replace buffer buffer :start1 16 :end2 len))) - (setf (aref buffer read-pos) char) - (decf read-pos) + (setf (aref buffer (decf read-pos)) char) nil)) (defun ccs-wont-hang-p (stream) @@ -186,6 +194,55 @@ (declare (type character char)) (char-code char)) +;;; ASCII + +(defun decode-ascii (byteseq charseq &key (start 0) (end (length byteseq))) + (declare (type (array (unsigned-byte 8)) byteseq) + (type (array character) charseq) + (type fixnum start end)) + (loop + (restart-case + (loop + (unless (< start end) (return-from decode-ascii t)) + (let ((byte (aref byteseq (prog1 start (incf start))))) + (unless (< byte 128) + (coding-error byteseq start charseq "Invalid byte ~D in ASCII stream." byte)) + (vector-push-extend (unicode->char byte) charseq))) + (:replace-char (&optional (replacement (unicode->char #xfffd))) + :report "Replace the invalid byte with a character." + (vector-push-extend replacement charseq)) + (:skip-char () + :report "Ignore the invalid byte." + nil)))) + +(defun encode-ascii (charseq byteseq &key (start 0) (end (length charseq))) + (declare (type (array (unsigned-byte 8)) byteseq) + (type (array character) charseq) + (type fixnum start end)) + (loop + (restart-case + (loop + (unless (< start end) (return-from encode-ascii t)) + (vector-push-extend (let ((cp (char->unicode (aref charseq (prog1 start (incf start)))))) + (unless (< cp 128) + (coding-error charseq start byteseq "ASCII cannot encode code-points higher than 128.")) + cp) + byteseq)) + (:replace-char (&optional (replacement #\?)) + :report "Replace this character with another." + (vector-push-extend (char->unicode replacement) byteseq)) + (:skip-char () + :report "Ignore this character." + nil)))) + +(define-decoder (ascii) + #'decode-ascii) + +(define-encoder (ascii) + #'encode-ascii) + +(define-codec-synonyms ascii :ascii) + ;;; Latin-1 (defun decode-latin-1 (byteseq charseq &key (start 0) (end (length byteseq))) @@ -223,7 +280,7 @@ (define-encoder (latin-1) #'encode-latin-1) -(define-codec-synonyms latin-1 latin1 iso-8859-1) +(define-codec-synonyms latin-1 latin1 iso-8859-1 :latin-1 :latin1 :iso-8859-1) ;;; UTF-8 @@ -307,4 +364,4 @@ (setf mlen 0)))))))) #'decode))) -(define-codec-synonyms utf-8 utf8) +(define-codec-synonyms utf-8 utf8 :utf-8 :utf8)