Added .cesarum.character to define features according to the availability of semi-standard character names, and implementation of ASCII by char-code/code-char.

Pascal J. Bourguignon [2013-07-27 11:00]
Added .cesarum.character to define features according to the availability of semi-standard character names, and implementation of ASCII by char-code/code-char.
Filename
common-lisp/cesarum/character.lisp
common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd
common-lisp/parser/scanner.lisp
diff --git a/common-lisp/cesarum/character.lisp b/common-lisp/cesarum/character.lisp
new file mode 100644
index 0000000..0567ab4
--- /dev/null
+++ b/common-lisp/cesarum/character.lisp
@@ -0,0 +1,189 @@
+;;;; -*- mode:lisp;coding:utf-8 -*-
+;;;;**************************************************************************
+;;;;FILE:               characters.lisp
+;;;;LANGUAGE:           Common-Lisp
+;;;;SYSTEM:             Common-Lisp
+;;;;USER-INTERFACE:     NONE
+;;;;DESCRIPTION
+;;;;
+;;;;    See package docstring.
+;;;;
+;;;;AUTHORS
+;;;;    <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
+;;;;MODIFICATIONS
+;;;;    2013-07-27 <PJB> Created.
+;;;;BUGS
+;;;;LEGAL
+;;;;    AGPL3
+;;;;
+;;;;    Copyright Pascal J. Bourguignon 2013 - 2013
+;;;;
+;;;;    This program is free software: you can redistribute it and/or modify
+;;;;    it under the terms of the GNU Affero General Public License as published by
+;;;;    the Free Software Foundation, either version 3 of the License, or
+;;;;    (at your option) any later version.
+;;;;
+;;;;    This program is distributed in the hope that it will be useful,
+;;;;    but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;;    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;;;    GNU Affero General Public License for more details.
+;;;;
+;;;;    You should have received a copy of the GNU Affero General Public License
+;;;;    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+;;;;**************************************************************************
+
+(defpackage "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.CHARACTERS"
+  (:use "COMMON-LISP"
+        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ASCII")
+  (:export "STANDARD-CHARACTER-IS-ASCII-CODED-P"
+           "HAS-ASCII-CODES-AS-CHARACTERS-P"
+           "HAS-CHARACTER-NAMED-P"
+           "PUSH-FEATURE-FOR-CHARACTER-NAMED")
+  (:documentation "
+Define features for present semi-standard character names and
+other ASCII features.
+
+   #+has-rubout    can read #\rubout
+   #+has-page      can read #\page
+   #+has-tab       can read #\tab
+   #+has-backspace can read #\backspace
+   #+has-return    can read #\return
+   #+has-linefeed  can read #\linefeed
+
+   #+has-escape    can read #\escape
+   #+has-bell      can read #\bell
+   #+has-vt        can read #\vt
+
+   #+has-ascii-code  The characters in the STANDARD-CHARACTER
+                     set are encoded with the ASCII code by
+                     char-code, and the codes between 0 and 31
+                     inclusive plus 127 have a bijection with
+                     other characters, thru code-char and
+                     char-code.
+
+   #+newline-is-return   <=> (char= #\newline #\return)
+   #+newline-is-linefeed <=> (char= #\newline #\linefeed)
+
+
+License:
+
+    AGPL3
+
+    Copyright Pascal J. Bourguignon 2013 - 2013
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.
+    If not, see <http://www.gnu.org/licenses/>
+
+"))
+(in-package  "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.CHARACTERS")
+
+
+(defparameter *standard-characters*
+  #.(concatenate 'string
+      " !\"#$%&'()*+,-./0123456789:;<=>?"
+      "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"
+      "`abcdefghijklmnopqrstuvwxyz{|}~")
+  "A string containing all the STANDARD-CHARACTER.
+Notice: it's the same character set as
+COM.INFORMATIMAGO.COMMON-LISP.CESARUM.ASCII:*ASCII-CHARACTERS*.")
+
+
+(defun has-character-named-p (name)
+  "
+NAME:       A case-insensitive string designator for the semi-standard
+            character names:
+
+                Rubout      The rubout or delete character.
+                Page        The form-feed or page-separator character.
+                Tab         The tabulate character.
+                Backspace   The backspace character.
+                Return      The carriage return character.
+                Linefeed    The line-feed character.
+
+Return:     Whether reading #\{name} will not produce an error.
+"
+  (ignore-errors (read-from-string (format nil "#\\~A" name))))
+
+
+(defun push-feature-for-character-named (name)
+  "
+NAME:       A case-insensitive string designator for the semi-standard
+            character names:
+
+                Rubout      The rubout or delete character.
+                Page        The form-feed or page-separator character.
+                Tab         The tabulate character.
+                Backspace   The backspace character.
+                Return      The carriage return character.
+                Linefeed    The line-feed character.
+
+DO:         If the implementation has the semi standard character
+            named NAME, then push a feature :HAS-{NAME}, with NAME
+            upcased.
+
+"
+  (when (has-character-named-p name)
+    (pushnew  (intern (format nil "~:@(HAS-~A~)" name)
+                      (load-time-value (find-package"KEYWORD")))
+              *features*)))
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+
+  (dolist (name '("Rubout" "Page" "Tab" "Backspace" "Return" "Linefeed"
+                  ;; Non standard character names:
+                  "Escape" "Bell" "Vt"))
+    (push-feature-for-character-named name))
+
+  );;eval-when
+
+
+(defun standard-character-is-ascii-coded-p ()
+  (load-time-value
+   (ignore-errors
+     (every (lambda (ch) (= (char-code ch) (ascii-code ch)))
+            *standard-characters*))))
+
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+
+  (defun has-ascii-codes-as-characters-p ()
+    (let ((codes (cons 127 (loop :for code :from 0 :to 31 :collect code))))
+      (and (standard-character-is-ascii-coded-p)
+           (ignore-errors
+             (every (lambda (code) (= (char-code (code-char code)) code))
+                    codes))
+           (= 33 (length (delete-duplicates (mapcar (function code-char) codes)
+                                            :test (function char=))))
+           #+has-rubout    (= 127 (char-code #\rubout))
+           #+has-page      (=  12 (char-code #\page))
+           #+has-tab       (=   9 (char-code #\tab))
+           #+has-backspace (=   8 (char-code #\backspace))
+           #+has-return    (=  13 (char-code #\return))
+           #+has-linefeed  (=  10 (char-code #\linefeed))
+           #+has-escape    (=  27 (char-code #\escape))
+           #+has-bell      (=   7 (char-code #\bell))
+           #+has-vt        (=  11 (char-code #\vt)))))
+
+  (when (has-ascii-codes-as-characters-p)
+    (pushnew :has-ascii-code *features*))
+
+  #+has-return   (when (char= #\newline #\return)
+                   (pushnew :newline-is-return *features*))
+
+  #+has-linefeed (when (char= #\newline #\linefeed)
+                   (pushnew :newline-is-linefeed *features*))
+  );;eval-when
+
+
+;;;; THEN END ;;;;
diff --git a/common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd b/common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd
index 4611d68..79fbcec 100644
--- a/common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd
+++ b/common-lisp/cesarum/com.informatimago.common-lisp.cesarum.asd
@@ -108,6 +108,7 @@ all written in 100% conforming Common Lisp.

                  ;; Standards:
                  (:file "ascii"           :depends-on ())
+                 (:file "character"       :depends-on ("ascii"))
                  (:file "character-sets"  :depends-on ("string"))
                  (:file "ecma048"         :depends-on ("utility"))
                  (:file "iso3166"         :depends-on ())
diff --git a/common-lisp/parser/scanner.lisp b/common-lisp/parser/scanner.lisp
index 20dc44a..e279180 100644
--- a/common-lisp/parser/scanner.lisp
+++ b/common-lisp/parser/scanner.lisp
@@ -18,7 +18,7 @@
 ;;;;LEGAL
 ;;;;    AGPL3
 ;;;;
-;;;;    Copyright Pascal J. Bourguignon 2004 - 2012
+;;;;    Copyright Pascal J. Bourguignon 2004 - 2013
 ;;;;
 ;;;;    This program is free software: you can redistribute it and/or modify
 ;;;;    it under the terms of the GNU Affero General Public License as published by
@@ -60,9 +60,7 @@
    ;; SCANNER methods:
    "SKIP-SPACES" "SCAN-NEXT-TOKEN"
    ;; PEEK-STREAM methods specialized on SCANNER:
-   "NEXTCHAR" "UNGETCHAR" "GETCHAR" "READLINE"
-   ;; Internal
-   "CHAR-NAME-SUPPORTED-P")
+   "NEXTCHAR" "UNGETCHAR" "GETCHAR" "READLINE")
   (:documentation
    "
 An abstract scanner class.
@@ -74,7 +72,7 @@ License:

     AGPL3

-    Copyright Pascal J. Bourguignon 2004 - 2012
+    Copyright Pascal J. Bourguignon 2004 - 2013

     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as published by
@@ -132,21 +130,17 @@ License:
    (column     :accessor token-column
                :initarg :column
                :initform 1
-               :type (integer 1))
+               :type (integer 0)) ; 0 is unknown.
    (line       :accessor token-line
                :initarg :line
                :initform 1
-               :type (integer 0)))
+               :type (integer 0))) ; 0 is unknown.
   (:documentation "A syntactic element."))


 (defmethod print-object ((token token) stream)
-  (print-unreadable-object (token stream :identity t :type t)
-    (prin1 (list (slot-value token 'kind)
-                 (slot-value token 'text)
-                 (slot-value token 'column)
-                 (slot-value token 'line)) stream))
-  token)
+  (print-parseable-object (token stream :type t :identity t)
+                          kind text column line))


 ;;----------------------------------------------------------------------
@@ -158,45 +152,13 @@ License:
      (ignore-errors (read-from-string (format nil "#\\~A" name)))))


-(defvar *spaces*
-  (let ((spaces '()))
-    (dolist (name '("Page" "Linefeed" "Return" "Tab" "Newline" "Space"))
-      (let ((ch (char-name-supported-p name)))
-        (when ch (push ch spaces))))
-    (coerce spaces 'string)))
-
-
-
-;; Some tools can't deal with #+#.(...) well, so we go thru *feature*
-;; instead.
-
-(eval-when (:compile-toplevel :load-toplevel :execute)
-
-  ;; Semi-standard character names:
-
-  (when (let ((ch (char-name-supported-p "Linefeed")))
-          (and ch (char/= ch #\Newline)))
-    (pushnew :linefeed *features*))
-
-  (when  (char-name-supported-p "Page")
-    (pushnew :page *features*))
-
-  (when  (char-name-supported-p "Backspace")
-    (pushnew :backspace *features*))
-
-  (when  (char-name-supported-p "Tab")
-    (pushnew :tab *features*))
-
-  ;; Non-standard character names:
-
-  (when  (char-name-supported-p "Bell")
-    (pushnew :bell *features*))
-
-  (when  (char-name-supported-p "Vt")
-    (pushnew :vt *features*))
-
-  );;eval-when
-
+;; Note: the features are defined in .cesarum.characters
+(defvar *spaces*  '(#\Space
+                    #+has-newline #\Newline
+                    #+has-tab #\Tab
+                    #+(and has-return (not newline-is-return)) #\return
+                    #+(and has-linefeed (not newline-is-linefeed)) #\linefeed
+                    #+has-page #\page))


 ;; Note we copy some fields in the condition from the scanner, so that
@@ -327,9 +289,9 @@ RETURN: line; column
     :do (case ch
           ((#\Space
             #\Newline
-            #+linefeed #\Linefeed
-            #+page     #\Page
-            #+tab      #\Tab))
+            #+has-linefeed #\Linefeed
+            #+has-page     #\Page
+            #+has-tab      #\Tab))
           (otherwise
            (loop-finish)))
     :finally (progn
@@ -392,11 +354,11 @@ RETURN:       (scanner-current-token scanner).
       ((#\Newline)
        (incf (scanner-line scanner))
        (setf (scanner-column scanner) 1))
-      #+tab
+      #+has-tab
       ((#\Tab)
        (increment-column-to-next-tab-stop scanner))
       (otherwise
-       ;; including #\Return #+linefeed #\Linefeed #+page #\Page
+       ;; including #\Return #+has-linefeed #\Linefeed #+has-page #\Page
        (incf (scanner-column scanner))))
     ch))

@@ -410,13 +372,13 @@ RETURN:       (scanner-current-token scanner).
        ;; We don't know the length of the last line.
        (decf (scanner-line scanner))
        (setf (scanner-column scanner) 0))
-      #+tab
+      #+has-tab
       ((#\Tab)
        ;; We don't know how many characters there was in the last tab-width.
        (setf (scanner-column scanner)  (truncate (1- (scanner-column scanner))
                                                  (scanner-tab-width scanner))))
       (otherwise
-       ;; including #\Return #+linefeed #\Linefeed #+page #\Page
+       ;; including #\Return #+has-linefeed #\Linefeed #+has-page #\Page
        (decf (scanner-column scanner))))
     ch))

@@ -426,7 +388,7 @@ RETURN:       (scanner-current-token scanner).
   (with-output-to-string (out)
     (loop
       :for ch = (getchar scanner)
-      :until (find ch #(#\Newline #\Return #+linefeed #\Linefeed #+page #\Page))
+      :until (find ch #(#\Newline #\Return #+has-linefeed #\Linefeed #+has-page #\Page))
       :do (write-char ch out)))
   (prog1 (readline (scanner-stream scanner))
     (incf (scanner-line scanner))
ViewGit