Changes to COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ASCII and COM.INFORMATIMAGO.COMMON-LISP.CESARUM.CHARACTER:

Pascal J. Bourguignon [2021-05-23 08:15]
Changes to COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ASCII and COM.INFORMATIMAGO.COMMON-LISP.CESARUM.CHARACTER:
- reversed dependency relationship between .ASCII and .CHARACTER.
- renamed CODE-ASCII as ASCII-PRINTABLE-CODE-CHAR, and implemented a CODE-ASCII that can accept ASCII control code on implementations supporting them.
- corrected REPLACE-ASCII-CHARACTERS, and optimized functions when the implementation (HAS-ASCII-CODE-P).
- clarified STANDARD-CHARACTER-IS-ASCII-CODED-P and HAS-ASCII-CODE-P.
- added a :HAS-ASCII-STANDARD-CHARACTERS feature.
Filename
common-lisp/cesarum/ascii-test.lisp
common-lisp/cesarum/ascii.lisp
common-lisp/cesarum/character.lisp
common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd
diff --git a/common-lisp/cesarum/ascii-test.lisp b/common-lisp/cesarum/ascii-test.lisp
index cbc9dcd..11bc51e 100644
--- a/common-lisp/cesarum/ascii-test.lisp
+++ b/common-lisp/cesarum/ascii-test.lisp
@@ -71,7 +71,18 @@ RETURN: :success
   #| TODO: Added more testing of bytes comparisons.|#)


+(define-test test/replace-ascii-characters ()
+  (multiple-value-bind (string end)
+      (replace-ascii-characters (make-string 10 :initial-element #\*)
+                                #(13 10)
+                                :newline :crlf)
+    (assert-true (char= #\newline (aref string 0)))
+    (assert-true (char= #\*       (aref string 1)))
+    (assert-true (= 10 (length string)))
+    (assert-true (= 1  end))))
+
 (define-test test/all ()
-  (test/ascii))
+  (test/ascii)
+  (test/replace-ascii-characters))

 ;;;; THE END ;;;;
diff --git a/common-lisp/cesarum/ascii.lisp b/common-lisp/cesarum/ascii.lisp
index 9a457bc..2d4ca40 100644
--- a/common-lisp/cesarum/ascii.lisp
+++ b/common-lisp/cesarum/ascii.lisp
@@ -8,6 +8,12 @@
 ;;;;AUTHORS
 ;;;;    <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
 ;;;;MODIFICATIONS
+;;;;    2021-05-22 <PJB> Made use of .character:has-ascii-code-p to
+;;;;                     optimize CODE-ASCII and ASCI-CODE.  Renamed
+;;;;                     CODE-ASCII to ASCII-PRINTABLE-CODE-CHAR, and
+;;;;                     implemented CODE-ASCII to map control codes
+;;;;                     to characters when has-ascii-code.
+;;;;                     Renamed CODE-ASCII-DIGIT-P to ASCII-DIGIT-P.
 ;;;;    2021-05-14 <PJB> Added REPLACE-ASCII-BYTES and REPLACE-ASCII-CHARACTERS.
 ;;;;    2012-04-20 <PJB> Added conditions;
 ;;;;                     Added :start and :stop to ASCII-STRING and ASCII-BYTES.
@@ -38,19 +44,30 @@
   (setf *readtable* (copy-readtable nil)))
 (defpackage "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ASCII"
   (:use "COMMON-LISP"
-        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ECMA048")
+        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ECMA048"
+        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.CHARACTER")
   (:shadow "ED")
-  (:export
+  (:export ; re-export from ECMA048
    "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK" "BEL" "BS" "HT" "LF" "VT"
    "FF" "CR" "SO" "SI" "DLE" "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB"
-   "CAN" "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "DEL" "SP"
+   "CAN" "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL")
+  (:export
    "*NEWLINE*" "*ASCII-CHARACTERS*"  "*HEXADECIMAL-DIGITS*"
    "ENCODING-ERROR" "ENCODING-ERROR-CHARACTER"
    "ENCODING-ERROR-CODING-SYSTEM" "ENCODING-ERROR-MESSAGE"
    "DECODING-ERROR" "DECODING-ERROR-CODE"
    "DECODING-ERROR-CODING-SYSTEM" "DECODING-ERROR-MESSAGE"
-   "ASCII-CODE"   "CODE-ASCII"   "CODE-ASCII-DIGIT-P"
-   "ASCII-CONTROL-CODE-P"    "ASCII-PRINTABLE-CODE-P"
+
+   "ASCII-CODE"
+
+   "ASCII-DIGIT-P"
+   "ASCII-CODE-P"
+   "ASCII-CONTROL-CODE-P"
+   "ASCII-PRINTABLE-CODE-P"
+
+   "CODE-ASCII"
+   "ASCII-PRINTABLE-CODE-CHAR"
+
    "ASCII-STRING" "ASCII-BYTES"  "ASCII-DISPATCH-MACRO"
    "READ-ASCII-LINE" "ASCII-FORMAT"
    "BYTES=" "BYTES/=" "BYTES<" "BYTES<=" "BYTES>=" "BYTES>"
@@ -58,7 +75,8 @@
   (:documentation "

 Some ASCII code utilities, to process sequences of ASCII code bytes as
-easily as strings.
+easily as strings.  Note: this works even on implementations that
+don't use ASCII for CHAR-CODE.

 Examples:

@@ -69,7 +87,7 @@ License:

     AGPL3

-    Copyright Pascal J. Bourguignon 2006 - 2015
+    Copyright Pascal J. Bourguignon 2006 - 2021

     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as published by
@@ -91,41 +109,42 @@ License:



-#-(and) (eval-when (:compile-toplevel :load-toplevel :execute) ;; defined in ecma048
-          ;; Control codes:
-          (defconstant nul       #x00  "^@  ASCII Control Code Null character ")
-          (defconstant soh       #x01  "^A  ASCII Control Code Start of Header")
-          (defconstant stx       #x02  "^B  ASCII Control Code Start of Text")
-          (defconstant etx       #x03  "^C  ASCII Control Code End of Text")
-          (defconstant eot       #x04  "^D  ASCII Control Code End of Transmission")
-          (defconstant enq       #x05  "^E  ASCII Control Code Enquiry")
-          (defconstant ack       #x06  "^F  ASCII Control Code Acknowledgement")
-          (defconstant bel       #x07  "^G  ASCII Control Code Bell")
-          (defconstant bs        #x08  "^H  ASCII Control Code Backspace")
-          (defconstant ht        #x09  "^I  ASCII Control Code Horizontal Tab")
-          (defconstant lf        #x0a  "^J  ASCII Control Code Line feed")
-          (defconstant vt        #x0b  "^K  ASCII Control Code Vectical Tab")
-          (defconstant ff        #x0c  "^L  ASCII Control Code Form feed")
-          (defconstant cr        #x0d  "^M  ASCII Control Code Carriage return")
-          (defconstant so        #x0e  "^N  ASCII Control Code Shift Out")
-          (defconstant si        #x0f  "^O  ASCII Control Code Shift In")
-          (defconstant dle       #x10  "^P  ASCII Control Code Data Link Escape")
-          (defconstant dc1       #x11  "^Q  ASCII Control Code Device Control 1 (X-ON)")
-          (defconstant dc2       #x12  "^R  ASCII Control Code Device Control 2")
-          (defconstant dc3       #x13  "^S  ASCII Control Code Device Control 3 (X-OFF)")
-          (defconstant dc4       #x14  "^T  ASCII Control Code Device Control 4")
-          (defconstant nak       #x15  "^U  ASCII Control Code Negative Acknowledge")
-          (defconstant syn       #x16  "^V  ASCII Control Code Synchronous Idle")
-          (defconstant etb       #x17  "^W  ASCII Control Code End of Transmision Block")
-          (defconstant can       #x18  "^X  ASCII Control Code Cancel")
-          (defconstant em        #x19  "^Y  ASCII Control Code End of Medium")
-          (defconstant sub       #x1a  "^Z  ASCII Control Code Substitute")
-          (defconstant esc       #x1b  "^[  ASCII Control Code Escape")
-          (defconstant fs        #x1c  "^\  ASCII Control Code File Separator")
-          (defconstant gs        #x1d  "^]  ASCII Control Code Group Separator")
-          (defconstant rs        #x1e  "^^  ASCII Control Code Record Separator")
-          (defconstant us        #x1f  "^_  ASCII Control Code Unit Separator")
-          (defconstant del       #x7f  "^?  ASCII Control Code Delete "))
+#-(and)
+(eval-when (:compile-toplevel :load-toplevel :execute) ;; defined in ecma048
+  ;; Control codes:
+  (defconstant nul       #x00  "^@  ASCII Control Code Null character ")
+  (defconstant soh       #x01  "^A  ASCII Control Code Start of Header")
+  (defconstant stx       #x02  "^B  ASCII Control Code Start of Text")
+  (defconstant etx       #x03  "^C  ASCII Control Code End of Text")
+  (defconstant eot       #x04  "^D  ASCII Control Code End of Transmission")
+  (defconstant enq       #x05  "^E  ASCII Control Code Enquiry")
+  (defconstant ack       #x06  "^F  ASCII Control Code Acknowledgement")
+  (defconstant bel       #x07  "^G  ASCII Control Code Bell")
+  (defconstant bs        #x08  "^H  ASCII Control Code Backspace")
+  (defconstant ht        #x09  "^I  ASCII Control Code Horizontal Tab")
+  (defconstant lf        #x0a  "^J  ASCII Control Code Line feed")
+  (defconstant vt        #x0b  "^K  ASCII Control Code Vectical Tab")
+  (defconstant ff        #x0c  "^L  ASCII Control Code Form feed")
+  (defconstant cr        #x0d  "^M  ASCII Control Code Carriage return")
+  (defconstant so        #x0e  "^N  ASCII Control Code Shift Out")
+  (defconstant si        #x0f  "^O  ASCII Control Code Shift In")
+  (defconstant dle       #x10  "^P  ASCII Control Code Data Link Escape")
+  (defconstant dc1       #x11  "^Q  ASCII Control Code Device Control 1 (X-ON)")
+  (defconstant dc2       #x12  "^R  ASCII Control Code Device Control 2")
+  (defconstant dc3       #x13  "^S  ASCII Control Code Device Control 3 (X-OFF)")
+  (defconstant dc4       #x14  "^T  ASCII Control Code Device Control 4")
+  (defconstant nak       #x15  "^U  ASCII Control Code Negative Acknowledge")
+  (defconstant syn       #x16  "^V  ASCII Control Code Synchronous Idle")
+  (defconstant etb       #x17  "^W  ASCII Control Code End of Transmision Block")
+  (defconstant can       #x18  "^X  ASCII Control Code Cancel")
+  (defconstant em        #x19  "^Y  ASCII Control Code End of Medium")
+  (defconstant sub       #x1a  "^Z  ASCII Control Code Substitute")
+  (defconstant esc       #x1b  "^[  ASCII Control Code Escape")
+  (defconstant fs        #x1c  "^\  ASCII Control Code File Separator")
+  (defconstant gs        #x1d  "^]  ASCII Control Code Group Separator")
+  (defconstant rs        #x1e  "^^  ASCII Control Code Record Separator")
+  (defconstant us        #x1f  "^_  ASCII Control Code Unit Separator")
+  (defconstant del       #x7f  "^?  ASCII Control Code Delete "))

 ;; NUL Null: The all-zeros character which may serve to accomplish time
 ;;     fill and media fill.
@@ -259,7 +278,6 @@ License:
   ;; Printable character:
   (defconstant sp        #x20 "     Code of ASCII Character SPACE")

-
   (defgeneric encoding-error-character (err)
     (:documentation "The character that cannot be encoded."))
   (defgeneric encoding-error-coding-system (err)
@@ -279,7 +297,6 @@ License:
                        (encoding-error-coding-system condition)
                        (encoding-error-message condition)))))

-
   (defgeneric decoding-error-code (err)
     (:documentation "The code that corresponds to no character."))
   (defgeneric decoding-error-coding-system (err)
@@ -298,7 +315,6 @@ License:
                        (decoding-error-coding-system condition)
                        (decoding-error-message condition)))))

-
   (defparameter *ascii-characters*
     #.(concatenate 'string
                    " !\"#$%&'()*+,-./0123456789:;<=>?"
@@ -316,27 +332,38 @@ RETURN:  The ASCII code of the character ch, or raise an error if the character
          has no ascii code.
          Only printable characters are accepted. No control code.
 "
-    (let ((code (position ch *ascii-characters*)))
-      (if code
-          (+ sp code)
-          (error 'encoding-error
-                 :character ch
-                 :coding-system :us-ascii
-                 :message "This character cannot be encoded in ASCII")))))
-
+    (let ((code #+has-ascii-code (let ((code (char-code ch)))
+                                   (when (and code (<= 0 code 127)) code))
+                #-has-ascii-code (let ((code (position ch *ascii-characters*)))
+                                   (when code (+ sp code)))))
+      (unless code
+        (error 'encoding-error
+               :character ch
+               :coding-system :us-ascii
+               :message "This character cannot be encoded in ASCII"))
+      code)))

 (defparameter *newline* :crlf
-  "(OR (MEMBER :CRLF :CR :LF)
-     (CONS (MEMBER :CRLF :CR :LF) (MEMBER :CRLF :CR :LF :ANY))
-The encoding used for #\newline for output and for input.
+  "(OR (MEMBER :CRLF :CR :LF :DOS :MAC :UNIX)
+     (CONS (MEMBER :CRLF :CR :LF :DOS :MAC :UNIX)
+           (MEMBER :CRLF :CR :LF :DOS :MAC :UNIX :ANY))
+The encoding used for #\\Newline for output and for input.
 If it's a keyword, it's used for both output and input.
+:DOS is an alias for :CRLF,
+:MAC is an alias for :CR,
+:UNIX is an alias for LF.
 If it's a CONS cell, the CAR specifies the newline encoding for output
 and the CDR specifies the newline encoding for input (it may be :ANY to
 accept any of CR-LF, CR or LF; LF-CR would read as two newlines).")
-(declaim (inline input-newline output-newline))
-(defun input-newline  (newline) (if (consp newline) (cdr newline) newline))
-(defun output-newline (newline) (if (consp newline) (car newline) newline))

+(declaim (inline map-newline-alias input-newline output-newline))
+(defun map-newline-alias (keyword)
+  (ecase keyword
+    ((:dos  :crlf) :crlf)
+    ((:mac  :cr)   :cr)
+    ((:unix :lf)   :lf)))
+(defun input-newline  (newline) (map-newline-alias (if (consp newline) (cdr newline) newline)))
+(defun output-newline (newline) (map-newline-alias (if (consp newline) (car newline) newline)))

 (declaim (inline ascii-error))
 (defun ascii-error (code)
@@ -344,80 +371,123 @@ accept any of CR-LF, CR or LF; LF-CR would read as two newlines).")
          :code code
          :coding-system :us-ascii
          :message (cond
-                    ((or (< code sp) (= code del))
+                    ((or (< code SP) (= code DEL))
                      "ASCII control codes cannot be converted to characters.")
-                    ((< del code)
+                    ((< DEL code)
                      "Codes greater than 127 are not ASCII codes.")
                     (t
                      "[SHOULD NOT OCCUR]"))))

-
-(declaim (inline code-ascii))
-(defun code-ascii (code)
+(declaim (inline ascii-digit-p))
+(defun ascii-digit-p (code)
   "
-RETURN:  The character corresponding to the given ASCII code.
-         Only codes for printable characters are accepted,
-         and both CR and LF are mapped to #\newline.
+RETURN:  The decimal digit value of the character encoded by the ASCII CODE,
+         or NIL if CODE is not the ASCII code of a digit character.
 "
-  (cond
-    ((<= sp code  (1- del))       (aref *ascii-characters* (- code sp)))
-    ((or (= code cr) (= code lf)) #\newline)
-    (t                            (ascii-error code))))
-
-
-(declaim (inline ascii-printable-code-p))
-(defun ascii-printable-code-p (code)
-  "RETURN:  Whether CODE is the code of an ASCII printable character."
-  (<= sp code (1- del)))
+  (and (<= #.(ascii-code #\0) code #.(ascii-code #\9))
+       (- code #.(ascii-code #\0))))

+(declaim (inline ascii-code-p))
+(defun ascii-code-p (code)
+  "RETURN:  Whether CODE is an ASCII code.
+(ascii-code-p code) <=> (or (ascii-control-code-p code)
+                            (ascii-printable-code-p code))"
+  (<= nul code del))

 (declaim (inline ascii-control-code-p))
 (defun ascii-control-code-p (code)
   "RETURN:  Whether CODE is an ASCII control code."
   (or (<= nul code (1- sp)) (= del code)))

+(declaim (inline ascii-printable-code-p))
+(defun ascii-printable-code-p (code)
+  "RETURN:  Whether CODE is the code of an ASCII printable character."
+  (<= sp code (1- del)))

+(declaim (inline ascii-printable-code-char))
+(defun ascii-printable-code-char (code)
+  "
+RETURN:  The character corresponding to the given ASCII code.
+         Only codes for printable characters are accepted,
+         and both CR and LF are mapped to #\newline.
+"
+  (cond
+    ((ascii-printable-code-p code)
+     #+has-ascii-code (code-char code)
+     #-has-ascii-code (aref *ascii-characters* (- code sp)))
+    ((or (= code cr) (= code lf))
+     #\newline)
+    (t
+     (ascii-error code))))

-(declaim (inline code-ascii-digit-p))
-(defun code-ascii-digit-p (code)
+(declaim (inline code-ascii))
+(defun code-ascii (code)
   "
-RETURN:  The decimal digit value of the character encoded by the ASCII CODE,
-         or NIL if CODE is not the ASCII code of a digit character.
+RETURN:  The character corresponding to the given ASCII code.
+
+         If (has-ascii-code-p) then control codes are also converted
+         to characters, otherewise NIL is returned, but for CR and LF:
+         both CR and LF are mapped to #\newline.
 "
-  (and (<= #.(ascii-code #\0) code #.(ascii-code #\9))
-       (- code #.(ascii-code #\0))))
+  #+has-ascii-code
+  (if (ascii-code-p code)
+      (code-char code)
+      (ascii-error code))
+  #-has-ascii-code
+  (cond
+    ((ascii-printable-code-p code) #+has-ascii-standard-characters
+                                   (code-char code)
+                                   #-has-ascii-standard-characters
+                                   (aref *ascii-characters* (- code sp)))
+    ((or (= code CR) (= code LF))  #\newline)
+    (t                             nil)))
+


 (defun replace-ascii-characters (string bytes &key (newline *newline*) (start1 0) end1 (start2 0) (end2 (length bytes)))
-  (loop
-    :with newline := (input-newline newline)
-    :with endd := (or end1 (length string))
-    :with len := (- end2 start2)
-    :with j := start2
-    :for code := (aref bytes j)
-    :for i :from start1 :below endd
-    :while (< j end2)
-    :do (if (<= sp code 126)
-            (setf (aref string i) (aref *ascii-characters* (- code sp)))
-            (case code
-              ((#.cr)
-               (ecase newline
-                 ((:crlf) (if (and (< (1+ j) len) (= lf (aref bytes (1+ j))))
-                              (progn (incf j)
-                                     (setf (aref string i) #\newline))
-                              (ascii-error code)))
-                 ((:any)  (if (and (< (1+ j) len) (= lf (aref bytes (1+ j))))
-                              (progn (incf j)
-                                     (setf (aref string i) #\newline))
-                              (setf (aref string i) #\newline)))
-                 ((:cr)   (setf (aref string i) #\newline))
-                 ((:lf)   (ascii-error code))))
-              ((#.lf)
-               (ecase newline
-                 ((:any  :lf) (setf (aref string i) #\newline))
-                 ((:crlf :cr) (ascii-error code))))
-              (otherwise (ascii-error code))))
-    :finally (return string)))
+  "Store in STRING, from START1 to END1, the ASCII characters that are
+encoded in the BYTES vector from START2 to END2.  If a code that is
+not a printable ascii code or CR or LF is encountered, then a
+DECODING-ERROR signaled.
+RESULT:  STRING and the position beyond the last character stored."
+  (cond
+    ((= start2 end2) (values string start1))
+    ((> start2 end2)
+     (assert (< start2 end2) (start2 end2)
+             "Invalid bounding index designators START2=~A END2=~A (we must have start2<=end2)"
+             start2 end2))
+    (t
+     (loop
+       :with newline := (input-newline newline)
+       :with endd := (or end1 (length string))
+       :with len := (- end2 start2)
+       :with i := start1
+       :with j := start2
+       :while (and (< i endd) (< j end2))
+       :do (let ((code (aref bytes j)))
+             (incf j)
+             (if (ascii-printable-code-p code)
+                 (setf (aref string i) (code-ascii code))
+                 (case code
+                   ((#.cr)
+                    (ecase newline
+                      ((:crlf) (if (and (< j len) (= lf (aref bytes j)))
+                                   (progn (incf j)
+                                          (setf (aref string i) #\newline))
+                                   (ascii-error code)))
+                      ((:any)  (if (and (< j len) (= lf (aref bytes j)))
+                                   (progn (incf j)
+                                          (setf (aref string i) #\newline))
+                                   (setf (aref string i) #\newline)))
+                      ((:cr)   (setf (aref string i) #\newline))
+                      ((:lf)   (ascii-error code))))
+                   ((#.lf)
+                    (ecase newline
+                      ((:any  :lf) (setf (aref string i) #\newline))
+                      ((:crlf :cr) (ascii-error code))))
+                   (otherwise (ascii-error code))))
+             (incf i))
+       :finally (return (values string i))))))


 (defun ascii-string (bytes &key (newline *newline*) (start 0) (end (length bytes)))
@@ -430,10 +500,13 @@ NEWLINE:  (member :crlf :cr :lf :any) ; the default is *NEWLINE*.
 START:    index of the first byte to be converted.
 END:      index beyond the last byte to be converted.
 "
-  (replace-ascii-characters (make-array (- end start)
-                                        :element-type 'character
-                                        :adjustable t :fill-pointer 0)
-                            bytes :newline newline :start2 start :end2 end))
+  (multiple-value-bind (string size)
+      (replace-ascii-characters (make-array (- end start)
+                                            :element-type 'character
+                                            :adjustable t :fill-pointer (- end start))
+                                bytes :newline newline :start2 start :end2 end)
+    (setf (fill-pointer string) size)
+    string))


 (defun replace-ascii-bytes (bytes string &key (newline *newline*) (start1 0) end1 (start2 0) (end2 (length string)))
diff --git a/common-lisp/cesarum/character.lisp b/common-lisp/cesarum/character.lisp
index 8cf2552..7794e8e 100644
--- a/common-lisp/cesarum/character.lisp
+++ b/common-lisp/cesarum/character.lisp
@@ -11,12 +11,13 @@
 ;;;;AUTHORS
 ;;;;    <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
 ;;;;MODIFICATIONS
+;;;;    2021-05-22 <PJB> Removed dependency to .ascii, and added dependency to .ecma048
 ;;;;    2013-07-27 <PJB> Created.
 ;;;;BUGS
 ;;;;LEGAL
 ;;;;    AGPL3
 ;;;;
-;;;;    Copyright Pascal J. Bourguignon 2013 - 2016
+;;;;    Copyright Pascal J. Bourguignon 2013 - 2021
 ;;;;
 ;;;;    This program is free software: you can redistribute it and/or modify
 ;;;;    it under the terms of the GNU Affero General Public License as published by
@@ -35,10 +36,12 @@
   (setf *readtable* (copy-readtable nil)))
 (defpackage "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.CHARACTER"
   (:use "COMMON-LISP"
-        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ASCII")
+        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ECMA048")
+  (:shadowing-import-from "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ECMA048"
+                          "ED")
   (:export "STANDARD-CHARACTER-IS-ASCII-CODED-P"
            "STANDARD-CHARACTERS"
-           "HAS-ASCII-CODES-P"
+           "HAS-ASCII-CODE-P"
            "HAS-CHARACTER-NAMED-P"
            "PUSH-FEATURE-FOR-CHARACTER-NAMED")
   (:documentation "
@@ -57,22 +60,29 @@ present semi-standard character names and other ASCII features.
    #+has-vt        can read #\vt
    #+has-null      can read #\null

+   #+has-ascii-standard-characters
+                     The characters in the STANDARD-CHARACTER
+                     set are encoded with the ASCII code by
+                     CHAR-CODE;
+
    #+has-ascii-code  The characters in the STANDARD-CHARACTER
                      set are encoded with the ASCII code by
-                     char-code, and the codes between 0 and 31
+                     CHAR-CODE; and the codes between 0 and 31
                      inclusive plus 127 have a bijection with
-                     other characters, thru code-char and
-                     char-code.
+                     other characters, thru CODE-CHAR and
+                     char-code; and the optional named characters
+                     have codes matching their ASCII control code:
+                     (= (CHAR-CODE #\Return) CR), etc.

-   #+newline-is-return   <=> (char= #\newline #\return)
-   #+newline-is-linefeed <=> (char= #\newline #\linefeed)
+   #+newline-is-return   <=> (CHAR= #\Newline #\Return)
+   #+newline-is-linefeed <=> (CHAR= #\Newline #\Linefeed)


 License:

     AGPL3

-    Copyright Pascal J. Bourguignon 2013 - 2013
+    Copyright Pascal J. Bourguignon 2013 - 2021

     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as published by
@@ -97,7 +107,8 @@ License:
 (setf *features* (append '(:newline-is-linefeed :has-ascii-code
                            :has-vt :has-bell :has-escape :has-linefeed
                            :has-return :has-backspace :has-tab
-                           :has-page :has-rubout) *features*))
+                           :has-page :has-rubout)
+                         *features*))


 (eval-when (:compile-toplevel :load-toplevel :execute)
@@ -111,12 +122,10 @@ License:
 Notice: it's the same character set as
 COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ASCII:*ASCII-CHARACTERS*.")

-
   (defun standard-characters ()
     "Return a string containing all the standard-characters."
     (copy-seq *standard-characters*))

-
   (defun has-character-named-p (name)
     "
 NAME:       A case-insensitive string designator for the semi-standard
@@ -133,7 +142,6 @@ Return:     Whether reading #\{name} will not produce an error.
 "
     (ignore-errors (read-from-string (format nil "#\\~A" name))))

-
   (defun push-feature-for-character-named (name)
     "
 NAME:       A case-insensitive string designator for the semi-standard
@@ -156,14 +164,13 @@ DO:         If the implementation has the semi standard character
                        (load-time-value (find-package"KEYWORD")))
                *features*)))

-
   #-mocl
   (dolist (name '("Rubout" "Page" "Tab" "Backspace" "Return" "Linefeed"
                   ;; Non standard character names:
                   "Escape" "Bell" "Vt" "Null"))
     (push-feature-for-character-named name))

-  );;eval-when
+  ) ;;eval-when



@@ -171,34 +178,64 @@ DO:         If the implementation has the semi standard character
 ;; Must be a separate form:
 (eval-when (:compile-toplevel :load-toplevel :execute)

+  (defconstant SP #x20 "     Code of ASCII Character SPACE")
+
   (defun standard-character-is-ascii-coded-p ()
-    "Whether the char-code of the standard-characters are their ASCII codes."
+    "Whether the char-code of the standard-characters are their ASCII codes.
+This exclude testing the presence or encoding of ASCII control codes."
     (load-time-value
      (ignore-errors
-       (every (lambda (ch) (= (char-code ch) (ascii-code ch)))
-              *standard-characters*))))
+      (flet ((ascii-code  (ch)
+               "
+RETURN:  The ASCII code of the character ch, or NIL if the character
+         has no ascii code.
+         Only printable characters are accepted.  No control code.
+"
+               (let ((code (position ch *standard-characters*)))
+                 (when code (+ SP code)))))
+
+        (every (lambda (ch) (equal (char-code ch) (ascii-code ch)))
+               *standard-characters*)))))

   (defun has-ascii-code-p ()
-    "Whether it looks like ASCII is implemented by char-code and code-char."
+    "Whether it looks like ASCII is implemented by char-code and code-char.
+Including control codes from 0 to 31 and 127."
     (let ((codes (cons 127 (loop :for code :from 0 :to 31 :collect code))))
-      (and (standard-character-is-ascii-coded-p)
-           (ignore-errors
-             (every (lambda (code) (= (char-code (code-char code)) code))
-                    codes))
-           (= 33 (length (delete-duplicates (mapcar (function code-char) codes)
-                                            :test (function char=))))
-           #+has-bell      (=   7 (char-code #\bell))
-           #+has-backspace (=   8 (char-code #\backspace))
-           #+has-tab       (=   9 (char-code #\tab))
-           #+has-linefeed  (=  10 (char-code #\linefeed))
-           #+has-vt        (=  11 (char-code #\vt))
-           #+has-page      (=  12 (char-code #\page))
-           #+has-return    (=  13 (char-code #\return))
-           #+has-escape    (=  27 (char-code #\escape))
-           #+has-rubout    (= 127 (char-code #\rubout)))))
+      (and
+       ;; printable characters are ASCII-CODED:
+       (standard-character-is-ascii-coded-p)
+
+       ;; all control-codes have characters that map back to the code:
+       (ignore-errors
+        (every (lambda (code)
+                 (let ((char (code-char code)))
+                   (when char
+                     (equal (char-code char) code))))
+               codes))
+
+       ;; the mapping between control codes and characters is a bijection:
+       (let ((chars (delete-duplicates (mapcar (function code-char) codes)
+                                       :test (function equal))))
+         (and (not (member nil chars))
+              (= 33 (length chars))))
+
+       ;; The optional character names match the ASCII codes they represent:
+       #+has-bell      (= BEL (char-code #\bell))
+       #+has-backspace (= BS  (char-code #\backspace))
+       #+has-tab       (= HT  (char-code #\tab))
+       #+has-linefeed  (= LF  (char-code #\linefeed))
+       #+has-vt        (= VT  (char-code #\vt))
+       #+has-page      (= FF  (char-code #\page))
+       #+has-return    (= CR  (char-code #\return))
+       #+has-escape    (= ESC (char-code #\escape))
+       #+has-rubout    (= DEL (char-code #\rubout)))))

   #-mocl
   (progn
+
+    (when (standard-character-is-ascii-coded-p)
+      (pushnew :has-ascii-standard-characters *features*))
+
     (when (has-ascii-code-p)
       (pushnew :has-ascii-code *features*))

@@ -208,7 +245,7 @@ DO:         If the implementation has the semi standard character
     #+has-linefeed (when (char= #\newline #\linefeed)
                      (pushnew :newline-is-linefeed *features*)))

-  );;eval-when
+  ) ;;eval-when


 ;;;; THEN END ;;;;
diff --git a/common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd b/common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd
index d62c4b4..1c961a0 100644
--- a/common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd
+++ b/common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd
@@ -104,8 +104,8 @@ all written in 100% conforming Common Lisp.

                ;; Standards:
                (:file "ecma048"         :depends-on ("utility"))
-               (:file "ascii"           :depends-on ("ecma048"))
-               (:file "character"       :depends-on ("ascii"))
+               (:file "character"       :depends-on ("ecma048"))
+               (:file "ascii"           :depends-on ("ecma048" "character"))
                (:file "character-sets"  :depends-on ("string"))
                (:file "iso3166"         :depends-on ())
                (:file "iso4217"         :depends-on ())
ViewGit