;;;; -*- mode:lisp;coding:utf-8 -*-
;;;;**************************************************************************
;;;;FILE:               character-sets.lisp
;;;;LANGUAGE:           Common-Lisp
;;;;SYSTEM:             Common-Lisp
;;;;USER-INTERFACE:     NONE
;;;;DESCRIPTION
;;;;
;;;;    Portability layer over character sets and external-formats.
;;;;
;;;;AUTHORS
;;;;    <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
;;;;MODIFICATIONS
;;;;    2012-04-06 <PJB> Extracted from
;;;;                     com.informatimago.common-lisp.cesarum.character-sets.
;;;;BUGS
;;;;LEGAL
;;;;    AGPL3
;;;;
;;;;    Copyright Pascal J. Bourguignon 2012 - 2016
;;;;
;;;;    This program is free software: you can redistribute it and/or modify
;;;;    it under the terms of the GNU Affero General Public License as published by
;;;;    the Free Software Foundation, either version 3 of the License, or
;;;;    (at your option) any later version.
;;;;
;;;;    This program is distributed in the hope that it will be useful,
;;;;    but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;;    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;;;    GNU Affero General Public License for more details.
;;;;
;;;;    You should have received a copy of the GNU Affero General Public License
;;;;    along with this program.  If not, see <http://www.gnu.org/licenses/>
;;;;**************************************************************************
(eval-when (:compile-toplevel :load-toplevel :execute)
  (setf *readtable* (copy-readtable nil)))
(defpackage "COM.INFORMATIMAGO.CLEXT.CHARACTER-SETS"
  (:use "COMMON-LISP"
        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.STRING"
        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.LIST"
        "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.CHARACTER-SETS")
  (:export
   "MAKE-EXTERNAL-FORMAT"
   "EXTERNAL-FORMAT-CHARACTER-ENCODING"
   "EXTERNAL-FORMAT-LINE-TERMINATION"
   "CHARACTER-SET-TO-LISP-ENCODING"
   "CHARACTER-SET-FOR-LISP-ENCODING"
   "CHARACTER-SET-TO-EMACS-ENCODING"
   "CHARACTER-SET-FROM-EMACS-ENCODING"
   "EMACS-ENCODING-TO-LISP-EXTERNAL-FORMAT")
  (:documentation "
This package exports functions to manage character-sets,
character encodings, coding systems and external format.
It's all the same, but everyone likes to have his own terms...

Copyright Pascal J. Bourguignon 2005 - 2012
This package is provided under the GNU General Public Licence.
See the source file for details.
"))
(in-package "COM.INFORMATIMAGO.CLEXT.CHARACTER-SETS")


(defparameter *aliases*
  '(
    ;; clisp, emacs:
    ("UNICODE-32-LITTLE-ENDIAN" "UTF-32-LE" "UTF-32LE")
    ("UNICODE-32-BIG-ENDIAN"    "UTF-32-BE" "UTF-32BE")
    ("UNICODE-16-LITTLE-ENDIAN" "UTF-16-LE" "UTF-16LE")
    ("UNICODE-16-BIG-ENDIAN"    "UTF-16-BE" "UTF-16BE")
    ;; clisp
    ("CP437-IBM" "CP437")
    ("CP852-IBM" "CP852")
    ("CP860-IBM" "CP860")
    ("CP861-IBM" "CP861")
    ("CP862-IBM" "CP862")
    ("CP863-IBM" "CP863")
    ("CP864-IBM" "CP864")
    ("CP865-IBM" "CP865")
    ("CP869-IBM" "CP869")
    ("CP874-IBM" "CP874")
    ;; emacs:
    ("VSCII" "VISCII")
    ;; Aliases for other implementations:
    ("LATIN1"  "ISO-8859-1")
    ("LATIN2"  "ISO-8859-2")
    ("LATIN3"  "ISO-8859-3")
    ("LATIN4"  "ISO-8859-4")
    ("LATIN5"  "ISO-8859-9")
    ("LATIN6"  "ISO-8859-10")
    ("LATIN8"  "ISO-8859-14")
    ("LATIN9"  "ISO-8859-15")

    ("LATIN-1"  "ISO-8859-1")
    ("LATIN-2"  "ISO-8859-2")
    ("LATIN-3"  "ISO-8859-3")
    ("LATIN-4"  "ISO-8859-4")
    ("LATIN-5"  "ISO-8859-9")
    ("LATIN-6"  "ISO-8859-10")
    ("LATIN-8"  "ISO-8859-14")
    ("LATIN-9"  "ISO-8859-15")
    )

  "A list of lists of aliases for character-set.")



(defun add-aliases-to-group (encoding-name-and-aliases aliases)
  "
ENCODING-NAME-AND-ALIASES:
            A list of name and aliases of character-sets.

ALIASES:    A list of lists of aliases, each sublist naming the same character-set.

RETURN:     A new list of name and aliases, with the ALIASES added, if
            they name the same character-set as ENCODING-NAME-AND-ALIASES.
"
  (let ((alias (find-if
                (lambda (alias)
                  (intersection encoding-name-and-aliases alias :test (function string-equal)))
                aliases)))
    (if alias
        (remove-duplicates (cons (car encoding-name-and-aliases)
                                 (union (cdr encoding-name-and-aliases) alias
                                        :test (function string-equal)))
                           :test (function string-equal))
        encoding-name-and-aliases)))




(defparameter *lisp-encodings*

  ;; #+ccl-1.9
  ;; ccl:list-character-encodings.  Use it
  ;; in ccl:describe-character-encodings

  #+(and ccl (not ccl-1.6))
  (mapcar (lambda (x) (mapcar (function string-upcase) x))
          '((:iso-8859-1 :iso_8859-1 :latin1 :l1 :ibm819 :cp819 :csisolatin1)
            (:iso-8859-2 :iso_8859-2 :latin-2 :l2 :csisolatin2)
            (:iso-8859-3 :iso_8859-3 :latin3 :l3 :csisolatin3)
            (:iso-8859-4 :iso_8859-4 :latin4 :l4 :csisolatin4)
            (:iso-8859-5 :iso_8859-5 :cyrillic :csisolatincyrillic :iso-ir-144)
            (:iso-8859-6 :iso_8859-6 :arabic :csisolatinarabic :iso-ir-127)
            (:iso-8859-7 :iso_8859-7 :greek :greek8 :csisolatingreek :iso-ir-126 :elot_928 :ecma-118)
            (:iso-8859-8 :iso_8859-8 :hebrew :csisolatinhebrew :iso-ir-138)
            (:iso-8859-9 :iso_8859-9 :latin5 :csisolatin5 :iso-ir-148)
            (:iso-8859-10 :iso_8859-10 :latin6 :csisolatin6 :iso-ir-157)
            (:iso-8859-11)
            (:iso-8859-13)
            (:iso-8859-14 :iso_8859-14 :iso-ir-199 :latin8 :l8 :iso-celtic)
            (:iso-8859-15 :iso_8859-15 :latin9)
            (:iso-8859-16 :iso_8859-16 :iso-ir-199 :latin8 :l8 :iso-celtic)
            (:macintosh :macos-roman :macosroman :mac-roman :macroman)
            (:ucs-2)
            (:ucs-2be)
            (:ucs-2le)
            (:us-ascii :csascii :cp637 :ibm637 :us :iso646-us :ascii :iso-ir-6)
            (:utf-16)
            (:utf-16be)
            (:utf-16le)
            (:utf-32 :utf-4)
            (:utf-32be :ucs-4be)
            (:utf-8)
            (:utf-32le :ucs-4le)
            (:windows-31j  :cp932 :cswindows31j)
            (:euc-jp :eucjp)))

  #+(and ccl ccl-1.6)
  (mapcar (lambda (x) (mapcar (function string-upcase) x))
          '((:iso-8859-1 :iso_8859-1  :latin1  :l1  :ibm819  :cp819  :csisolatin1)
            (:iso-8859-2 :iso_8859-2  :latin-2  :l2  :csisolatin2)
            (:iso-8859-3 :iso_8859-3  :latin3 :l3  :csisolatin3)
            (:iso-8859-4 :iso_8859-4  :latin4  :l4  :csisolatin4)
            (:iso-8859-5 :iso_8859-5  :cyrillic  :csisolatincyrillic  :iso-ir-144)
            (:iso-8859-6 :iso_8859-6  :arabic  :csisolatinarabic  :iso-ir-127)
            (:iso-8859-7 :iso_8859-7  :greek  :greek8  :csisolatingreek  :iso-ir-126  :elot_928  :ecma-118)
            (:iso-8859-8 :iso_8859-8  :hebrew  :csisolatinhebrew  :iso-ir-138)
            (:iso-8859-9 :iso_8859-9  :latin5  :csisolatin5  :iso-ir-148)
            (:iso-8859-10 :iso_8859-10  :latin6  :csisolatin6  :iso-ir-157)
            (:iso-8859-11)
            (:iso-8859-13)
            (:iso-8859-14 :iso_8859-14  :iso-ir-199  :latin8  :l8  :iso-celtic)
            (:iso-8859-15 :iso_8859-15  :latin9)
            (:iso-8859-16 :iso_8859-16  :iso-ir-199  :latin8  :l8  :iso-celtic)
            (:macintosh :macos-roman  :macosroman  :mac-roman  :macroman)
            (:ucs-2)
            (:ucs-2be)
            (:ucs-2le)
            (:us-ascii :csascii  :cp637 :ibm637  :us  :iso646-us  :ascii  :iso-ir-6)
            (:utf-16)
            (:utf-16be)
            (:utf-16le)
            (:utf-32 :utf-4)
            (:utf-32be :ucs-4be)
            (:utf-8)
            (:utf-32le :ucs-4le)
            (:windows-31j :cp932  :cswindows31j)
            (:euc-jp :eucjp)
            (:gb2312 :gb2312-80 :gb2312-1980 :euc-cn :euccn)
            (:cp936 :gbk :ms936 :windows-936)))

  #+clisp
  (let ((h (make-hash-table)))
    (do-external-symbols (s "CHARSET")
      (push (string-upcase s) (gethash (ext:encoding-charset s) h)))
    (let ((r '()))
      (maphash (lambda (k v) (declare (ignore k)) (push  v r)) h)
      r))

  #+cmu   '(("ISO-8859-1"))          ; :iso-latin-1-unix ;  what else?

  #+ecl   '(("ISO-8859-1")
            #+unicode ("UTF-8"))

  #+sbcl
  (mapcar (lambda (x)
            (mapcar (function string-upcase)
                    (sb-impl::ef-names x)))
          (etypecase sb-impl::*external-formats*
            (hash-table (let ((result '()))
                          (maphash (lambda (name encoding)
                                     (declare (ignore name))
                                     (pushnew encoding result))
                                   sb-impl::*external-formats*)
                          result))
            (vector (remove 'SB-IMPL::EXTERNAL-FORMAT
                            (flatten (coerce sb-impl::*external-formats* 'list))
                            :key (function type-of)
                            :test-not (function eql)))
            (list   (remove 'SB-IMPL::EXTERNAL-FORMAT
                            (flatten sb-impl::*external-formats*)
                            :key (function type-of)
                            :test-not (function eql)))))

  ;; From Java7:
  ;; Every implementation of the Java platform is required to support
  ;; the following standard charsets. Consult the release
  ;; documentation for your implementation to see if any other
  ;; charsets are supported. The behavior of such optional charsets
  ;; may differ between implementations.
  ;;
  ;; Other external formats are also possible with abcl.
  #+abcl
  (remove-duplicates
   (append '(("US-ASCII") ("ISO-8859-1") ("UTF-8") ("UTF-16BE") ("UTF-16LE") ("UTF-16"))
           (mapcar (lambda (encoding)
                     (let ((n (symbol-name encoding))
                           (u (string-upcase encoding)))
                       (if (string= n u)
                           (list n)
                           (list n u))))
                   (system:available-encodings)))
   :test (function equal))


  #-(or abcl ccl clisp cmu ecl sbcl)
  (progn
    (warn "What are the available external formats in ~A ?"
          (lisp-implementation-type))
    '(("US-ASCII")))

  "Give an a-list of name and list of aliases of encoding systems in
the current Common Lisp implementation.  Those names and aliases are strings.")



(defun fill-character-set-lisp-encoding ()
  "
DO:         Set the cs-lisp-encoding of the character-sets present in
            the current implementation.
"
  (dolist (lsl *lisp-encodings* (values))
    (let* ((aliases (add-aliases-to-group lsl *aliases*))
           (cs (some (function find-character-set) aliases)))
      (when cs
        ;; We don't add the aliases to the lisp-encoding, since this
        ;; list is used to make the implementation specific encodings
        ;; and external-formats.
        (setf (cs-lisp-encoding cs) lsl)))))






(defgeneric make-external-format (character-encoding &optional line-termination)
  (:documentation "Makes an implementation specific external-format.")

  (:method ((cs character-set) &optional line-termination)
    (if (cs-lisp-encoding cs)
        (let ((encoding         (first (cs-lisp-encoding cs)))
              (line-termination (or line-termination
                                    #+ccl ccl:*default-line-termination*
                                    #-ccl :unix)))
          (check-type line-termination (member :unix :mac :dos))

          #+ccl
          (ccl:make-external-format :domain nil
                                    :character-encoding (intern encoding "KEYWORD")
                                    :line-termination (case line-termination
                                                        ((:unix :dos) line-termination)
                                                        ((:mac) :macos)))

          #+clisp
          (ext:make-encoding :charset (symbol-value (intern encoding "CHARSET"))
                             :line-terminator line-termination
                             :input-error-action :error
                             :output-error-action :error)

          #+cmu
          (if (string-equal encoding "ISO-8859-1")
              :iso-latin-1-unix
              (progn #|should not occur|#
                (cerror 'character-set-error
                        :character-set cs
                        :format-control "The character-set ~S has no lisp-encoding in ~A"
                        :format-arguments (list (cs-name cs) (lisp-implementation-type)))
                :default))

          #+ecl
          (cond
            ((string-equal encoding "ISO-8859-1")
             :iso-8859-1)
            #+unicode
            ((string-equal encoding "UTF-8")
             :utf-8)
            (t  #|should not occur|#
             (cerror 'character-set-error
                     :character-set cs
                     :format-control "The character-set ~S has no lisp-encoding in ~A"
                     :format-arguments (list (cs-name cs) (lisp-implementation-type)))
             :default))

          #+sbcl
          (intern encoding "KEYWORD")

          #+abcl
          (intern encoding "KEYWORD")

          #-(or abcl ccl clisp cmu ecl sbcl)
          (values
           (find (lambda (cs) (member encoding (cs-lisp-encoding cs)
                                      :test (function string-equal)))
                 *character-sets*)
           :unix))
        (error 'character-set-error
               :character-set cs
               :format-control "The character-set ~S has no lisp-encoding in ~A"
               :format-arguments (list (cs-name cs) (lisp-implementation-type)))))

  (:method ((character-set-name string) &optional line-termination)
    (let ((cs (find-character-set character-set-name)))
      (if cs
          (make-external-format cs line-termination)
          (error 'character-set-error
                 :character-set (string character-set-name)
                 :format-control "There is no character-set named ~S"
                 :format-arguments (list (string character-set-name))))))

  (:method ((character-set symbol) &optional line-termination)
    (make-external-format (string character-set) line-termination)))


(defun external-format-character-encoding (external-format)
  #+ccl (ccl:external-format-character-encoding external-format)
  #+(and clisp unicode) (string (ext:encoding-charset external-format))
  #+cmu (string external-format)
  #+ecl (string external-format)
  #+sbcl (string external-format)
  #+abcl (string external-format)
  #-(or abcl ccl (and clisp unicode) cmu ecl sbcl)
  (error "~S: How to decode an external-format in ~A"
         'external-format-character-encoding
         (lisp-implementation-type)))


(defun external-format-line-termination (external-format)
  #+(or cmu ecl sbcl abcl) (declare (ignore external-format))
  #+ccl (ccl:external-format-line-termination external-format)
  #+(and clisp unicode) (string (ext:encoding-line-terminator external-format))
  #+cmu :unix
  #+ecl :unix
  #+sbcl :unix
  #+abcl :unix ; TODO: ???
  #-(or abcl ccl (and clisp unicode) cmu ecl sbcl)
  (error "~S: How to decode an external-format in ~A"
         'external-format-line-termination
         (lisp-implementation-type)))

(defun character-set-to-lisp-encoding (cs &key (line-termination :unix))
  "
CS:     A character set designator (ie.  a character-set, or a string or
        symbol naming a character-set).
RETURN: An implementation specific object representing the  encoding for
        the given character-set and line-termination.
SIGNAL: An error if line-termination is not (member :unix :mac :dos nil) or
        if cs has no emacs encoding.
"
  (assert (member line-termination '(:unix :mac :dos nil)))
  (make-external-format (etypecase cs
                          (character-set cs)
                          ((or string symbol) (find-character-set cs)))
                        line-termination))

(defun character-set-for-lisp-encoding (encoding)
  "
ENCODING:  An implementation specific object representing an encoding.
           possibly with line-termination.
RETURN:    The character-set that correspond to this emacs-encoding ;
           the line-termination.
"
  (values (external-format-character-encoding encoding)
          (external-format-line-termination   encoding)))



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Emacs coding systems
;;;

(defparameter *emacs-encodings*
  #||
  ;; emacs lisp code to generate the following list.
  (require 'cl)
  (sort*
   (mapcar
    (lambda (sl) (mapcar (lambda (sym) (upcase (symbol-name sym))) sl))
    (delete-duplicates
     (mapcar (lambda (coding-system)
               (or (coding-system-get coding-system 'alias-coding-systems)
                   (list coding-system)))
             (let ((coding-system-list '()))
               (mapatoms (lambda (sym) (when (and sym (coding-system-p sym))
                                    (push sym coding-system-list))))
               coding-system-list))
     :test (function equal)))
   (function string<) :key (function first))
  ||#
  (quote
   (("CHINESE-BIG5" "BIG5" "CN-BIG5" "CP950")
    ("CHINESE-HZ" "HZ-GB-2312" "HZ")
    ("CHINESE-ISO-8BIT" "CN-GB-2312" "EUC-CHINA" "EUC-CN" "CN-GB" "GB2312" "CP936")
    ("CHINESE-ISO-8BIT-WITH-ESC")
    ("COMPOUND-TEXT" "X-CTEXT" "CTEXT")
    ("COMPOUND-TEXT-WITH-EXTENSIONS" "X-CTEXT-WITH-EXTENSIONS" "CTEXT-WITH-EXTENSIONS")
    ("CP1125" "RUSCII" "CP866U")
    ("CP437")
    ("CP720")
    ("CP737")
    ("CP775")
    ("CP850")
    ("CP851")
    ("CP852")
    ("CP855")
    ("CP857")
    ("CP860")
    ("CP861")
    ("CP862")
    ("CP863")
    ("CP864")
    ("CP865")
    ("CP866")
    ("CP869")
    ("CP874")
    ("CTEXT-NO-COMPOSITIONS")
    ("CYRILLIC-ALTERNATIVNYJ" "ALTERNATIVNYJ")
    ("CYRILLIC-ISO-8BIT" "ISO-8859-5")
    ("CYRILLIC-ISO-8BIT-WITH-ESC")
    ("CYRILLIC-KOI8" "KOI8-R" "KOI8" "CP878")
    ("EMACS-MULE")
    ("EUC-TW" "EUC-TAIWAN")
    ("GEORGIAN-PS")
    ("GREEK-ISO-8BIT" "ISO-8859-7")
    ("GREEK-ISO-8BIT-WITH-ESC")
    ("HEBREW-ISO-8BIT" "ISO-8859-8" "ISO-8859-8-E" "ISO-8859-8-I")
    ("HEBREW-ISO-8BIT-WITH-ESC")
    ("IN-IS13194" "DEVANAGARI")
    ("IN-IS13194-WITH-ESC")
    ("ISO-2022-7BIT")
    ("ISO-2022-7BIT-LOCK" "ISO-2022-INT-1")
    ("ISO-2022-7BIT-LOCK-SS2" "ISO-2022-CJK")
    ("ISO-2022-7BIT-SS2")
    ("ISO-2022-8BIT-SS2")
    ("ISO-2022-CN" "CHINESE-ISO-7BIT")
    ("ISO-2022-CN-EXT")
    ("ISO-2022-JP" "JUNET")
    ("ISO-2022-JP-2")
    ("ISO-2022-KR" "KOREAN-ISO-7BIT-LOCK")
    ("ISO-8859-11")
    ("ISO-8859-6" "ARABIC-ISO-8BIT")
    ("ISO-LATIN-1" "ISO-8859-1" "LATIN-1")
    ("ISO-LATIN-1-WITH-ESC")
    ("ISO-LATIN-10" "ISO-8859-16" "LATIN-10")
    ("ISO-LATIN-2" "ISO-8859-2" "LATIN-2")
    ("ISO-LATIN-2-WITH-ESC")
    ("ISO-LATIN-3" "ISO-8859-3" "LATIN-3")
    ("ISO-LATIN-3-WITH-ESC")
    ("ISO-LATIN-4" "ISO-8859-4" "LATIN-4")
    ("ISO-LATIN-4-WITH-ESC")
    ("ISO-LATIN-5" "ISO-8859-9" "LATIN-5")
    ("ISO-LATIN-5-WITH-ESC")
    ("ISO-LATIN-6" "ISO-8859-10" "LATIN-6")
    ("ISO-LATIN-7" "ISO-8859-13" "LATIN-7")
    ("ISO-LATIN-8" "ISO-8859-14" "LATIN-8")
    ("ISO-LATIN-8-WITH-ESC")
    ("ISO-LATIN-9" "ISO-8859-15" "LATIN-9" "LATIN-0")
    ("ISO-LATIN-9-WITH-ESC")
    ("ISO-SAFE" "US-ASCII")
    ("JAPANESE-ISO-7BIT-1978-IRV" "ISO-2022-JP-1978-IRV" "OLD-JIS")
    ("JAPANESE-ISO-8BIT" "EUC-JAPAN-1990" "EUC-JAPAN" "EUC-JP")
    ("JAPANESE-ISO-8BIT-WITH-ESC")
    ("JAPANESE-SHIFT-JIS" "SHIFT_JIS" "SJIS" "CP932")
    ("KOI8-T" "CYRILLIC-KOI8-T")
    ("KOI8-U")
    ("KOREAN-ISO-8BIT" "EUC-KR" "EUC-KOREA" "CP949")
    ("KOREAN-ISO-8BIT-WITH-ESC")
    ("LAO")
    ("LAO-WITH-ESC")
    ("MAC-ROMAN")
    ("MIK")
    ("MULE-UTF-16" "UTF-16")
    ("MULE-UTF-16BE" "UTF-16BE")
    ("MULE-UTF-16BE-WITH-SIGNATURE" "UTF-16BE-WITH-SIGNATURE"
                                    "MULE-UTF-16-BE" "UTF-16-BE")
    ("MULE-UTF-16LE" "UTF-16LE")
    ("MULE-UTF-16LE-WITH-SIGNATURE" "UTF-16LE-WITH-SIGNATURE"
                                    "MULE-UTF-16-LE" "UTF-16-LE")
    ("MULE-UTF-8" "UTF-8")
    ("NEXT")
    ("NO-CONVERSION")
    ("PT154")
    ("RAW-TEXT")
    ("THAI-TIS620" "TH-TIS620" "TIS620" "TIS-620")
    ("THAI-TIS620-WITH-ESC")
    ("TIBETAN-ISO-8BIT" "TIBETAN")
    ("TIBETAN-ISO-8BIT-WITH-ESC")
    ("UNDECIDED")
    ("UTF-7")
    ("VIETNAMESE-TCVN" "TCVN" "TCVN-5712")
    ("VIETNAMESE-VIQR" "VIQR")
    ("VIETNAMESE-VISCII" "VISCII")
    ("VIETNAMESE-VSCII" "VSCII")
    ("W3M-EUC-JAPAN")
    ("W3M-ISO-LATIN-1")
    ("WINDOWS-1250" "CP1250")
    ("WINDOWS-1251" "CP1251" "CP1251")
    ("WINDOWS-1252" "CP1252" "CP1252")
    ("WINDOWS-1253" "CP1253")
    ("WINDOWS-1254" "CP1254")
    ("WINDOWS-1255" "CP1255")
    ("WINDOWS-1256" "CP1256")
    ("WINDOWS-1257" "CP1257")
    ("WINDOWS-1258" "CP1258")))
  "List of emacs encoding, grouped by aliases")



(defun fill-character-set-emacs-encoding ()
  "
DO:         Set the cs-emacs-encoding of the character-sets present in
            the current implementation.
"
  (dolist (ecsl *emacs-encodings* (values))
    (let ((cs (some (function find-character-set)
                    (add-aliases-to-group ecsl *aliases*))))
      (when cs
        (setf (cs-emacs-encoding cs) ecsl)))))

(defun character-set-to-emacs-encoding (cs &key (line-termination :unix))
  "
CS:     A character set designator (ie.  a character-set, or a string or
        symbol naming a character-set).
RETURN: A string naming the emacs encoding for the given character-set
        and line-termination.
SIGNAL: An error if line-termination is not (member :unix :mac :dos nil) or
        if cs has no emacs encoding.
"
  (assert (member line-termination '(:unix :mac :dos nil)))
  (unless  (cs-emacs-encoding (etypecase cs
                                (character-set cs)
                                ((or string symbol) (find-character-set cs))))
    (error "The character-set ~A has no corresponding emacs encoding"
           (cs-name cs)))
  (format nil "~(~A~:[~;~:*-~A~]~)" (first (cs-emacs-encoding cs))
          line-termination))


(defun character-set-from-emacs-encoding (ecs)
  "
ECS:    A string or symbol naming the emacs encoding,
        possibly suffixed by a line-termination.
RETURN: The character-set that correspond to this emacs-encoding ;
        the line-termination.
"
  (let ((line-termination nil)
        (ecs (string ecs)))
    (cond
      ((suffixp "-unix" ecs :test (function char-equal))
       (setf ecs  (subseq ecs 0 (- (length ecs) 5))
             line-termination :unix))
      ((suffixp "-dos" ecs :test (function char-equal))
       (setf ecs (subseq ecs 0 (- (length ecs) 4))
             line-termination :dos))
      ((suffixp "-mac" ecs :test (function char-equal))
       (setf ecs (subseq ecs 0 (- (length ecs) 4))
             line-termination :mac)))
    (values
     (find-if (lambda (cs) (member ecs (cs-emacs-encoding cs)
                                   :test (function string-equal)))
              *character-sets*)
     line-termination)))


(defun emacs-encoding-to-lisp-external-format (emacs-encoding)
  "
RETURN:  the external-format value corresponding to this EMACS-ENCODING.
"
  (multiple-value-bind (charset line-termination)
      (character-set-from-emacs-encoding emacs-encoding)
    (when charset
      (character-set-to-lisp-encoding charset :line-termination line-termination))))



(eval-when (:load-toplevel :execute)
  (fill-character-set-emacs-encoding)
  (fill-character-set-lisp-encoding))




;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; The rest was used to generate the data in
;;; COM.INFORMATIMAGO.COMMON-LISP.CESARUM.CHARACTER-SETS
;;;

#+(and (and) clisp)
(defun compute-character-set-ranges ()
  "
DO:     Read the character-set file and build the *character-sets* list,
        then update the character sets with emacs encodings, lisp encodings,
        and character set ranges (found in clisp).
RETURN: *character-sets*
"
  (setf *character-sets* (read-character-sets-file "character-sets"))
  (fill-character-set-emacs-encoding)
  (fill-character-set-lisp-encoding)
  (dolist (cs *character-sets*)
    (when (cs-lisp-encoding cs)
      (let ((charset (find-symbol (first (cs-lisp-encoding cs)) "CHARSET")))
        (setf (cs-ranges cs)
              #+#.(cl:if #+mocl (cl:and (cl:find-package "SYSTEM")
                                        (cl:find-symbol "GET-CHARSET-RANGE" "SYSTEM"))
                         #-mocl (cl:ignore-errors
                                 (cl:find-symbol "GET-CHARSET-RANGE" "SYSTEM"))
                         '(:and) '(:or))
              (map 'vector (function char-code)
                   (system::get-charset-range charset))
              #-#.(cl:if #+mocl (cl:and (cl:find-package "SYSTEM")
                                        (cl:find-symbol "GET-CHARSET-RANGE" "SYSTEM"))
                         #-mocl (cl:ignore-errors
                                 (cl:find-symbol "GET-CHARSET-RANGE" "SYSTEM"))
                         '(:and) '(:or))
              (coerce
               (loop
                  :with charset = (symbol-value charset)
                  :with i = 0
                  :for start = (loop
                                  :until (or (< char-code-limit i)
                                             (typep (code-char i) charset))
                                  :do (incf i)
                                  :finally (return (when (<= i char-code-limit)
                                                     i)))
                  :while start
                  :nconc (list start
                               (loop
                                  :while (and (<= i char-code-limit)
                                              (typep (code-char i) charset))
                                  :do (incf i)
                                  :finally (return (1- i)))))
               'vector)))))
  *character-sets*)


;;; Provide a default value for  *CHARACTER-SETS*
#-(and)
(let ((*print-right-margin* 72))
  (pprint
   `(setf *character-sets*
      (list
       ,@(mapcar
          (lambda (cs)
            `(make-character-set
              :mib-enum ,(cs-mib-enum cs)
              :name ,(cs-name cs)
              :aliases ',(cs-aliases cs)
              :mime-encoding  ',(cs-mime-encoding cs)
              :source ',(cs-source cs)
              :comments ',(cs-comments cs)
              :references ',(cs-references cs)
              :ranges ,(cs-ranges cs)))
          (compute-character-set-ranges))))))


;;;; THE END ;;;;
ViewGit