Added future/rivest-sexp

Pascal J. Bourguignon [2015-07-16 20:14]
Added future/rivest-sexp
Filename
common-lisp/data-encoding/data-encoding-test.lisp
future/rivest-sexp/NOTES.txt
future/rivest-sexp/draft-rivest-sexp-00.txt
future/rivest-sexp/parse-rivest.lisp
future/rivest-sexp/rivest-sexp.txt
future/rivest-sexp/simple-sexp/read.lisp
future/rivest-sexp/simple-sexp/reps.lisp
diff --git a/common-lisp/data-encoding/data-encoding-test.lisp b/common-lisp/data-encoding/data-encoding-test.lisp
index 49c0518..734480a 100644
--- a/common-lisp/data-encoding/data-encoding-test.lisp
+++ b/common-lisp/data-encoding/data-encoding-test.lisp
@@ -66,7 +66,7 @@


 (def-encrecord test-rec
-    (buint8   buint8)
+  (buint8   buint8)
   (buint24  buint24)
   (luint8   luint8)
   (luint24  luint24)
@@ -212,13 +212,14 @@
 (define-test test/data ()
   (assert-true
    (equalp
-    (with-open-file (file "/tmp/test.data"
-                          :direction :input
-                          :if-does-not-exist :error
-                          :element-type '(unsigned-byte 8))
-      (let ((buffer  (make-array '(304) :element-type '(unsigned-byte 8))))
-        (assert-true (= 304 (read-sequence buffer file)))
-        buffer))
+    (print
+     (with-open-file (file "/tmp/test.data"
+                           :direction :input
+                           :if-does-not-exist :error
+                           :element-type '(unsigned-byte 8))
+       (let ((buffer  (make-array '(304) :element-type '(unsigned-byte 8))))
+         (assert-true (= 304 (read-sequence buffer file)))
+         buffer)))
     #(18 18 52 86 18 86 52 18 18 52 52 18 18 52 86 120 120 86 52 18 254
       254 220 186 254 186 220 254 254 220 220 254 254 220 186 152 152 186
       220 254 125 124 86 52 18 0 0 0 125 0 0 0 18 52 86 124 0 0 0 0 0 0 0 0
diff --git a/future/rivest-sexp/NOTES.txt b/future/rivest-sexp/NOTES.txt
new file mode 100644
index 0000000..7c144bd
--- /dev/null
+++ b/future/rivest-sexp/NOTES.txt
@@ -0,0 +1,52 @@
+Rivest sexps are either byte-strings ("octet-strings") or lists of
+simpler S-expressions.  Optionnaly, octet-strings may be qualified
+with a mime-type.
+
+
+We want to be able to store typed data for exchange, and display,
+between various programming languages, using Common Lisp default
+syntax.
+
+    integer                         [-+]?[0-9]+
+    rational                        [-+]?[0-9]+/[1-9][0-9]*
+    floating-point                  [-+]?[0-9]+.[0-9]*([sSeEdDlL][-+]?[0-9]+)?
+    complex integer                 #C( integer integer )
+    complex rational                #C( rational rational ) #C( rational integer ) #C( integer rational )
+    complex floating-point          #C( floating-point floating-point ) #C( floating-point integer ) #C( integer floating-point )
+    character                       #\c  #\newline #\space ...
+    string                          "abc\\def\"ghi"
+    symbol                          abc package:symbol  more:package::and::packages:and:symbol
+    list                            ( element ... element [ . last-cdr ] )
+    vector                          #( element ... element )
+    arrays                          #nA( row ... row )
+    hash-table                      #S(HASH-TABLE :TEST EQL pairs...)
+    structures                      #S(struct-type :key value ...)
+
+    #1=(circular structures . #1#)
+    #| comment |#
+
+Some programming languages don't distinguish between some of these
+types.  For example, vectors and lists (Ruby, Perl), or various
+numbers (idem).  Or characters and integers (emacs lisp, C, C++).
+
+Some programming languages may discriminate more types, like short,
+int, long (C, C++), or fixnum, bignums (Common Lisp).
+
+
+(defstruct integer-box          value)
+(defstruct rational-box         nominator denominator)
+(defstruct float-box            magnitude exponent)
+(defstruct complex-integer-box  real-part imag-part)
+(defstruct complex-rational-box real-part imag-part)
+(defstruct complex-float-box    real-part imag-part)
+(defstruct character-box        unicode)
+(defstruct string-box           characters)
+(defstruct symbol-box           package-name symbol-name)
+(defstruct list-box             first rest)
+(defstruct vector-box           elements)
+(defstruct array-box            rank elements)
+(defstruct hash-table-box       test pairs)
+(defstruct structure-box        type pairs)
+
+
+==> Check the format used by Configuration::ManagerConfig::Tool::toString...
diff --git a/future/rivest-sexp/draft-rivest-sexp-00.txt b/future/rivest-sexp/draft-rivest-sexp-00.txt
new file mode 100644
index 0000000..0a48059
--- /dev/null
+++ b/future/rivest-sexp/draft-rivest-sexp-00.txt
@@ -0,0 +1,699 @@
+Network  Working Group                                         R. Rivest
+Internet Draft                                               May 4, 1997
+Expires November 4, 1997
+
+
+                              S-Expressions
+                        draft-rivest-sexp-00.txt
+
+
+Status of this Memo
+
+   Distribution of this memo is unlimited.
+
+   This document is an Internet-Draft.  Internet Drafts are working
+   documents of the Internet Engineering Task Force (IETF), its Areas,
+   and its Working Groups.  Note that other groups may also distribute
+   working documents as Internet Drafts.
+
+   Internet Drafts are draft documents valid for a maximum of six
+   months, and may be updated, replaced, or obsoleted by other documents
+   at any time.  It is not appropriate to use Internet Drafts as
+   reference material, or to cite them other than as a ``working draft''
+   or ``work in progress.''
+
+   To learn the current status of any Internet-Draft, please check the
+   ``1id-abstracts.txt'' listing contained in the internet-drafts Shadow
+   Directories on: ftp.is.co.za (Africa), nic.nordu.net (Europe),
+   ds.internic.net (US East Coast), ftp.isi.edu (US West Coast),
+   or munnari.oz.au (Pacific Rim)
+
+
+Abstract
+
+This memo describes a data structure called "S-expressions" that are
+suitable for representing arbitrary complex data structures.  We make
+precise the encodings of S-expressions: we give a "canonical form" for
+S-expressions, described two "transport" representations, and also
+describe an "advanced" format for display to people.
+
+
+
+1. Introduction
+
+S-expressions are data structures for representing complex data.  They
+are either byte-strings ("octet-strings") or lists of simpler
+S-expressions.  Here is a sample S-expression:
+
+        (snicker "abc" (#03# |YWJj|))
+
+It is a list of length three:
+
+        -- the octet-string "snicker"
+
+        -- the octet-string "abc"
+
+        -- a sub-list containing two elements:
+                - the hexadecimal constant #03#
+                - the base-64 constant |YWJj| (which is the same as "abc")
+
+This note gives a specific proposal for constructing and utilizing
+S-expressions.  The proposal is independent of any particular application.
+
+Here are the design goals for S-expressions:
+
+  -- generality: S-expressions should be good at representing arbitrary
+     data.
+
+  -- readability: it should be easy for someone to examine and
+     understand the structure of an S-expression.
+
+  -- economy: S-expressions should represent data compactly.
+
+  -- tranportability: S-expressions should be easy to transport
+     over communication media (such as email) that are known to be
+     less than perfect.
+
+  -- flexibility: S-expressions should make it relatively simple to
+     modify and extend data structures.
+
+  -- canonicalization: it should be easy to produce a unique
+     "canonical" form of an S-expression, for digital signature purposes.
+
+  -- efficiency: S-expressions should admit in-memory representations
+     that allow efficient processing.
+
+
+Section 2 gives an introduction to S-expressions.
+Section 3 discusses the character sets used.
+Section 4 presents the various representations of octet-strings.
+Section 5 describes how to represent lists.
+Section 6 discusses how S-expressions are represented for various uses.
+Section 7 gives a BNF syntax for S-expressions.
+Section 8 talks about how S-expressions might be represented in memory.
+Section 9 briefly describes implementations for handling S-expressions.
+Section 10 discusses how applications might utilize S-expressions.
+Section 11 gives historical notes on S-expressions.
+Section 12 gives references.
+
+2. S-expressions -- informal introduction
+
+Informally, an S-expression is either:
+        -- an octet-string, or
+        -- a finite list of simpler S-expressions.
+
+An octet-string is a finite sequence of eight-bit octets.  There may be
+many different but equivalent ways of representing an octet-string
+
+        abc             -- as a token
+
+        "abc"           -- as a quoted string
+
+        #616263#        -- as a hexadecimal string
+
+        3:abc           -- as a length-prefixed "verbatim" encoding
+
+        {MzphYmM=}      -- as a base-64 encoding of the verbatim encoding
+                           (that is, an encoding of "3:abc")
+
+        |YWJj|          -- as a base-64 encoding of the octet-string "abc"
+
+These encodings are all equivalent; they all denote the same octet string.
+
+We will give details of these encodings later on, and also describe how to
+give a "display type" to a byte string.
+
+A list is a finite sequence of zero or more simpler S-expressions.  A list
+may be represented by using parentheses to surround the sequence of encodings
+of its elements, as in:
+
+        (abc (de #6667#) "ghi jkl")
+
+As we see, there is variability possible in the encoding of an
+S-expression.  In some cases, it is desirable to standardize or
+restrict the encodings; in other cases it is desirable to have no
+restrictions.  The following are the target cases we aim to handle:
+
+        -- a "transport" encoding for transporting the S-expression between
+           computers.
+
+        -- a "canonical" encoding, used when signing the S-expression.
+
+        -- an "advanced" encoding used for input/output to people.
+
+        -- an "in-memory" encoding used for processing the S-expression in
+           the computer.
+
+These need not be different; in this proposal the canonical encoding
+is the same as the transport encoding, for example.  In this note we
+propose (related) encoding techniques for each of these uses.
+
+3. Character set
+
+We will be describing encodings of S-expressions.  Except when giving
+"verbatim" encodings, the character set used is limited to the following
+characters in US-ASCII:
+        Alphabetic:     A B ... Z a b ... z
+        numeric:        0 1 ... 9
+        whitespace:     space, horizontal tab, vertical tab, form-feed
+                        carriage-return, line-feed
+        The following graphics characters, which we call "pseudo-alphabetic":
+                        - hyphen or minus
+                        . period
+                        / slash
+                        _ underscore
+                        : colon
+                        * asterisk
+                        + plus
+                        = equal
+        The following graphics characters, which are "reserved punctuation":
+                        ( left parenthesis
+                        ) right parenthesis
+                        [ left bracket
+                        ] right bracket
+                        { left brace
+                        } right brace
+                        | vertical bar
+                        # number sign
+                        " double quote
+                        & ampersand
+                        \ backslash
+        The following characters are unused and unavailable, except in
+        "verbatim" encodings:
+                        ! exclamation point
+                        % percent
+                        ^ circumflex
+                        ~ tilde
+                        ; semicolon
+                        ' apostrophe
+                        , comma
+                        < less than
+                        > greater than
+                        ? question mark
+
+
+4. Octet string representations
+
+This section describes in detail the ways in which an octet-string may
+be represented.
+
+We recall that an octet-string is any finite sequence of octets, and
+that the octet-string may have length zero.
+
+
+4.1 Verbatim representation
+
+A verbatim encoding of an octet string consists of four parts:
+
+        -- the length (number of octets) of the octet-string,
+           given in decimal most significant digit first, with
+           no leading zeros.
+
+        -- a colon ":"
+
+        -- the octet string itself, verbatim.
+
+There are no blanks or whitespace separating the parts.  No "escape
+sequences" are interpreted in the octet string.  This encoding is also
+called a "binary" or "raw" encoding.
+
+Here are some sample verbatim encodings:
+
+        3:abc
+        7:subject
+        4:::::
+        12:hello world!
+        10:abcdefghij
+        0:
+
+4.2 Quoted-string representation
+
+The quoted-string representation of an octet-string consists of:
+
+        -- an optional decimal length field
+
+        -- an initial double-quote (")
+
+        -- the octet string with "C" escape conventions (\n,etc)
+
+        -- a final double-quote (")
+
+The specified length is the length of the resulting string after any
+escape sequences have been handled.  The string does not have any
+"terminating NULL" that C includes, and the length does not count such
+a character.
+
+The length is optional.
+
+The escape conventions within the quoted string are as follows (these follow
+the "C" programming language conventions, with an extension for
+ignoring line terminators of just LF or CRLF):
+        \b              -- backspace
+        \t              -- horizontal tab
+        \v              -- vertical tab
+        \n              -- new-line
+        \f              -- form-feed
+        \r              -- carriage-return
+        \"              -- double-quote
+        \'              -- single-quote
+        \\              -- back-slash
+        \ooo            -- character with octal value ooo (all three digits
+                           must be present)
+        \xhh            -- character with hexadecimal value hh (both digits
+                           must be present)
+        \<carriage-return> -- causes carriage-return to be ignored.
+        \<line-feed>       -- causes linefeed to be ignored
+        \<carriage-return><line-feed> -- causes CRLF to be ignored.
+        \<line-feed><carriage-return> -- causes LFCR to be ignored.
+
+Here are some examples of quoted-string encodings:
+
+        "subject"
+        "hi there"
+        7"subject"
+        3"\n\n\n"
+        "This has\n two lines."
+        "This has\
+        one."
+        ""
+
+4.3 Token representation
+
+An octet string that meets the following conditions may be given
+directly as a "token".
+
+        -- it does not begin with a digit
+
+        -- it contains only characters that are
+                -- alphabetic (upper or lower case),
+                -- numeric, or
+                -- one of the eight "pseudo-alphabetic" punctuation marks:
+                        -   .   /   _   :  *  +  =
+        (Note: upper and lower case are not equivalent.)
+        (Note: A token may begin with punctuation, including ":").
+
+Here are some examples of token representations:
+
+        subject
+        not-before
+        class-of-1997
+        //microsoft.com/names/smith
+        *
+
+
+4.4 Hexadecimal representation
+
+An octet-string may be represented with a hexadecimal encoding consisting of:
+
+        -- an (optional) decimal length of the octet string
+
+        -- a sharp-sign "#"
+
+        -- a hexadecimal encoding of the octet string, with each octet
+           represented with two hexadecimal digits, most significant
+           digit first.
+
+        -- a sharp-sign "#"
+
+There may be whitespace inserted in the midst of the hexadecimal
+encoding arbitrarily; it is ignored.  It is an error to have
+characters other than whitespace and hexadecimal digits.
+
+Here are some examples of hexadecimal encodings:
+
+        #616263#                -- represents "abc"
+        3#616263#               -- also represents "abc"
+        # 616
+          263 #                 -- also represents "abc"
+
+
+4.5 Base-64 representation
+
+An octet-string may be represented in a base-64 coding consisting of:
+
+        -- an (optional) decimal length of the octet string
+
+        -- a vertical bar "|"
+
+        -- the rfc 1521 base-64 encoding of the octet string.
+
+        -- a final vertical bar "|"
+
+The base-64 encoding uses only the characters
+        A-Z  a-z  0-9  +  /  =
+It produces four characters of output for each three octets of input.
+If the input has one or two left-over octets of input, it produces an
+output block of length four ending in two or one equals signs, respectively.
+Output routines compliant with this standard MUST output the equals signs
+as specified.  Input routines MAY accept inputs where the equals signs are
+dropped.
+
+There may be whitespace inserted in the midst of the base-64 encoding
+arbitrarily; it is ignored.  It is an error to have characters other
+than whitespace and base-64 characters.
+
+Here are some examples of base-64 encodings:
+
+        |YWJj|          -- represents "abc"
+        | Y W
+          J j |         -- also represents "abc"
+        3|YWJj|         -- also represents "abc"
+        |YWJjZA==|      -- represents "abcd"
+        |YWJjZA|        -- also represents "abcd"
+
+
+4.6 Display hint
+
+Any octet string may be preceded by a single "display hint".
+
+The purposes of the display hint is to provide information on how
+to display the octet string to a user.  It has no other function.
+Many of the MIME types work here.
+
+A display-hint is an octet string surrounded by square brackets.
+There may be whitespace separating the octet string from the
+surrounding brackets.  Any of the legal formats may be used for the
+octet string.
+
+Here are some examples of display-hints:
+
+        [image/gif]
+        [URI]
+        [charset=unicode-1-1]
+        [text/richtext]
+        [application/postscript]
+        [audio/basic]
+        ["http://abc.com/display-types/funky.html"]
+
+In applications an octet-string that is untyped may be considered to have
+a pre-specified "default" mime type.  The mime type
+                "text/plain; charset=iso-8859-1"
+is the standard default.
+
+
+4.7  Equality of octet-strings
+
+Two octet strings are considered to be "equal" if and only if they
+have the same display hint and the same data octet strings.
+
+Note that octet-strings are "case-sensitive"; the octet-string "abc"
+is not equal to the octet-string "ABC".
+
+An untyped octet-string can be compared to another octet-string (typed
+or not) by considering it as a typed octet-string with the default
+mime-type.
+
+
+5. Lists
+
+Just as with octet-strings, there are several ways to represent an
+S-expression.  Whitespace may be used to separate list elements, but
+they are only required to separate two octet strings when otherwise
+the two octet strings might be interpreted as one, as when one token
+follows another.  Also, whitespace may follow the initial left
+parenthesis, or precede the final right parenthesis.
+
+Here are some examples of encodings of lists:
+
+        (a b c)
+
+        ( a ( b c ) ( ( d e ) ( e f ) )  )
+
+        (11:certificate(6:issuer3:bob)(7:subject5:alice))
+
+        ({3Rt=} "1997" murphy 3:{XC++})
+
+
+6. Representation types
+
+There are three "types" of representations:
+
+        -- canonical
+
+        -- basic transport
+
+        -- advanced transport
+
+The first two MUST be supported by any implementation; the last is
+optional.
+
+
+6.1  Canonical representation
+
+This canonical representation is used for digital signature purposes,
+transmission, etc.  It is uniquely defined for each S-expression.  It
+is not particularly readable, but that is not the point.  It is
+intended to be very easy to parse, to be reasonably economical, and to
+be unique for any S-expression.
+
+The "canonical" form of an S-expression represents each octet-string
+in verbatim mode, and represents each list with no blanks separating
+elements from each other or from the surrounding parentheses.
+
+Here are some examples of canonical representations of S-expressions:
+
+        (6:issuer3:bob)
+
+        (4:icon[12:image/bitmap]9:xxxxxxxxx)
+
+        (7:subject(3:ref5:alice6:mother))
+
+
+6.2 Basic transport representation
+
+There are two forms of the "basic transport" representation:
+
+        -- the canonical representation
+
+        -- an rfc-2045 base-64 representation of the canonical representation,
+           surrounded by braces.
+
+The transport mechanism is intended to provide a universal means of
+representing S-expressions for transport from one machine to another.
+
+Here are some examples of an S-expression represented in basic
+transport mode:
+
+        (1:a1:b1:c)
+
+        {KDE6YTE6YjE6YykA}
+
+                (this is the same S-expression encoded in base-64)
+
+There is a difference between the brace notation for base-64 used here
+and the || notation for base-64'd octet-strings described above.  Here
+the base-64 contents are converted to octets, and then re-scanned as
+if they were given originally as octets.  With the || notation, the
+contents are just turned into an octet-string.
+
+
+6.3 Advanced transport representation
+
+The "advanced transport" representation is intended to provide more
+flexible and readable notations for documentation, design, debugging,
+and (in some cases) user interface.
+
+The advanced transport representation allows all of the representation
+forms described above, include quoted strings, base-64 and hexadecimal
+representation of strings, tokens, representations of strings with
+omitted lengths, and so on.
+
+
+7. BNF for syntax
+
+We give separate BNF's for canonical and advanced forms of S-expressions.
+We use the following notation:
+        <x>*            means 0 or more occurrences of <x>
+        <x>+            means 1 or more occurrences of <x>
+        <x>?            means 0 or 1 occurrences of <x>
+        parentheses     are used for grouping, as in (<x> | <y>)*
+
+For canonical and basic transport:
+
+<sexpr>         :: <string> | <list>
+<string>        :: <display>? <simple-string> ;
+<simple-string> :: <raw> ;
+<display>       :: "[" <simple-string> "]" ;
+<raw>           :: <decimal> ":" <bytes> ;
+<decimal>       :: <decimal-digit>+ ;
+                -- decimal numbers should have no unnecessary leading zeros
+<bytes>         -- any string of bytes, of the indicated length
+<list>          :: "(" <sexp>* ")" ;
+<decimal-digit> :: "0" | ... | "9" ;
+
+For advanced transport:
+
+<sexpr>         :: <string> | <list>
+<string>        :: <display>? <simple-string> ;
+<simple-string> :: <raw> | <token> | <base-64> | <hexadecimal> |
+                           <quoted-string> ;
+<display>       :: "[" <simple-string> "]" ;
+<raw>           :: <decimal> ":" <bytes> ;
+<decimal>       :: <decimal-digit>+ ;
+                -- decimal numbers should have no unnecessary leading zeros
+<bytes>         -- any string of bytes, of the indicated length
+<token>         :: <tokenchar>+ ;
+<base-64>       :: <decimal>? "|" ( <base-64-char> | <whitespace> )* "|" ;
+<hexadecimal>   :: "#" ( <hex-digit> | <white-space> )* "#" ;
+<quoted-string> :: <decimal>? <quoted-string-body>
+<quoted-string-body> :: "\"" <bytes> "\""
+<list>          :: "(" ( <sexp> | <whitespace> )* ")" ;
+<whitespace>    :: <whitespace-char>* ;
+<token-char>    :: <alpha> | <decimal-digit> | <simple-punc> ;
+<alpha>         :: <upper-case> | <lower-case> | <digit> ;
+<lower-case>    :: "a" | ... | "z" ;
+<upper-case>    :: "A" | ... | "Z" ;
+<decimal-digit> :: "0" | ... | "9" ;
+<hex-digit>     :: <decimal-digit> | "A" | ... | "F" | "a" | ... | "f" ;
+<simple-punc>   :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ;
+<whitespace-char> :: " " | "\t" | "\r" | "\n" ;
+<base-64-char>  :: <alpha> | <decimal-digit> | "+" | "/" | "=" ;
+<null>          :: "" ;
+
+8. In-memory representations
+
+For processing, the S-expression would typically be parsed and represented
+in memory in a more more amenable to efficient processing.  We suggest
+two alternatives:
+
+        -- "list-structure"
+
+        -- "array-layout"
+
+We only sketch these here, as they are only suggestive.  The code referenced
+below illustrates these styles in more detail.
+
+
+8.1. List-structure memory representation
+
+Here there are separate records for simple-strings, strings, and
+lists.  An S-expression of the form ("abc" "de") would require two
+records for the simple strings, two for the strings, and two for the
+list elements.  This is a fairly conventional representation, and
+details are omitted here.
+
+8.2 Array-layout memory representation
+
+Here each S-expression is represented as a contiguous array of bytes.
+The first byte codes the "type" of the S-expression:
+
+        01      octet-string
+
+        02      octet-string with display-hint
+
+        03      beginning of list (and 00 is used for "end of list")
+
+Each of the three types is immediately followed by a k-byte integer
+indicating the size (in bytes) of the following representation.  Here
+k is an integer that depends on the implementation, it might be
+anywhere from 2 to 8, but would be fixed for a given implementation;
+it determines the size of the objects that can be handled.  The transport
+and canonical representations are independent of the choice of k made by
+the implementation.
+
+Although the length of lists are not given in the usual S-expression
+notations, it is easy to fill them in when parsing; when you reach a
+right-parenthesis you know how long the list representation was, and
+where to go back to fill in the missing length.
+
+
+8.2.1 Octet string
+
+This is represented as follows:
+
+        01 <length> <octet-string>
+
+For example (here k = 2)
+
+        01 0003 a b c
+
+8.2.2 Octet-string with display-hint
+
+This is represented as follows:
+
+        02 <length>
+          01 <length> <octet-string>    /* for display-type */
+          01 <length> <octet-string>    /* for octet-string */
+
+For example, the S-expression
+
+        [gif] #61626364#
+
+would be represented as (with k = 2)
+
+        02 000d
+          01 0003  g  i  f
+          01 0004 61 62 63 64
+
+8.2.3 List
+
+This is represented as
+
+        03 <length> <item1> <item2> <item3> ... <itemn> 00
+
+For example, the list (abc [d]ef (g)) is represented in memory as (with k=2)
+
+        03 001b
+          01 0003 a b c
+          02 0009
+            01 0001 d
+            01 0002 e f
+          03 0005
+            01 0001 g
+          00
+        00
+
+9. Code
+
+There is code available for reading and parsing the various
+S-expression formats proposed here.
+
+See http://theory.lcs.mit.edu/~rivest/sexp.html
+
+
+10. Utilization of S-expressions
+
+This note has described S-expressions in general form.  Application writers
+may wish to restrict their use of S-expressions in various ways.  Here are
+some possible restrictions that might be considered:
+
+        -- no display-hints
+        -- no lengths on hexadecimal, quoted-strings, or base-64 encodings
+        -- no empty lists
+        -- no empty octet-strings
+        -- no lists having another list as its first element
+        -- no base-64 or hexadecimal encodings
+        -- fixed limits on the size of octet-strings
+
+11. Historical note
+
+The S-expression technology described here was originally developed
+for ``SDSI'' (the Simple Distributed Security Infrastructure by
+Lampson and Rivest [SDSI]) in 1996, although the origins clearly date
+back to McCarthy's LISP programming language.  It was further refined
+and improved during the merger of SDSI and SPKI [SPKI] during the
+first half of 1997.  S-expressions are similar to, but more readable
+and flexible than, Bernstein's "net-strings" [BERN].
+
+12. References
+
+[SDSI] "A Simple Distributed Security Architecture", by
+        Butler Lampson, and Ronald L. Rivest
+        http://theory.lcs.mit.edu/~cis/sdsi.html
+
+[SPKI] <a href="http://www.clark.net/pub/cme/html/spki.html">SPKI--A
+       Simple Public Key Infrastructure</a>
+
+[BERN] Dan Bernstein's "net-strings"; Internet Draft
+       draft-bernstein-netstrings-02.txt
+
+Author's Address
+
+      Ronald L. Rivest
+      Room 324, 545 Technology Square
+      MIT Laboratory for Computer Science
+      Cambridge, MA 02139
+
+      rivest@theory.lcs.mit.edu
+
+
diff --git a/future/rivest-sexp/parse-rivest.lisp b/future/rivest-sexp/parse-rivest.lisp
new file mode 100644
index 0000000..a4eb738
--- /dev/null
+++ b/future/rivest-sexp/parse-rivest.lisp
@@ -0,0 +1,90 @@
+;; Path: news.easynet.es!numbering.news.easynet.net!spool2.bllon.news.easynet.net!easynet-quince!easynet.net!newsfeeds.sol.net!newspump.sol.net!news.glorb.com!newsfeed2.telusplanet.net!newsfeed.telus.net!edtnps84.POSTED!53ab2750!not-for-mail
+;; From: Wade Humeniuk <whumeniu+anti+spam@telus.net>
+;; User-Agent: Mozilla Thunderbird 1.0 (Windows/20041206)
+;; X-Accept-Language: en-us, en
+;; MIME-Version: 1.0
+;; Newsgroups: comp.lang.lisp
+;; Subject: Re: S-expression grammar?
+;; References: <87y8a97u34.fsf@shadizar.dyndns.org> <S-expressions-20050520183401@ram.dialup.fu-berlin.de> <87ekc1tbxp.fsf@david-steuber.com> <S-Expressions-20050521125627@ram.dialup.fu-berlin.de>
+;; In-Reply-To: <S-Expressions-20050521125627@ram.dialup.fu-berlin.de>
+;; Content-Type: text/plain; charset=ISO-8859-1; format=flowed
+;; Content-Transfer-Encoding: 7bit
+;; Lines: 69
+;; Message-ID: <UyIje.4658$HI.1721@edtnps84>
+;; Date: Sat, 21 May 2005 15:35:16 GMT
+;; NNTP-Posting-Host: 142.59.106.101
+;; X-Trace: edtnps84 1116689716 142.59.106.101 (Sat, 21 May 2005 09:35:16 MDT)
+;; NNTP-Posting-Date: Sat, 21 May 2005 09:35:16 MDT
+;; Xref: news.easynet.es comp.lang.lisp:87328
+;;
+;; Stefan Ram wrote:
+;;
+;; >   draft-rivest-sexp-00.txt introduces Base64-atom-literals, like
+;; >   |YWJj|, which are not part of most Lisp-implementations,
+;; >   AFAIK, and on the other hand, does not seem to include dotted
+;; >   pairs.
+;; >
+;;
+;; Speaking of which, here is a possible parser for Rivest's Canonical/Transport
+;; sexprs (For LispWorks),
+;;
+;; CL-USER 8 > (parse-rivest-sexp "(4:icon[12:image/bitmap]9:xxxxxxxxx)")
+;; ("icon" (:DISPLAY-ENCODED "image/bitmap" #(120 120 120 120 120 120 120 120 120)))
+;; NIL
+;;
+;; CL-USER 9 > (parse-rivest-sexp "(7:subject(3:ref5:alice6:mother))")
+;; ("subject" ("ref" "alice" "mother"))
+;; NIL
+;;
+;; Wade
+
+
+(in-package :cl-user)
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+  (require "parsergen")
+  (use-package :parsergen))
+
+(defparser rivest-canonical-sexp-parser
+  ((s-expression sexpr))
+  ((sexpr string) $1)
+  ((sexpr list) $1)
+  ((string display simple-string)
+   (list :display-encoded $1
+         (map '(simple-array (unsigned-byte 8) (*)) #'char-int $2)))
+  ((string simple-string) $1)
+  ((display \[ simple-string \]) $2)
+  ((simple-string raw) $1)
+  ((elements sexpr) (list $1))
+  ((elements sexpr elements) (cons $1 $2))
+  ((list \( elements \)) $2))
+
+(defun rivest-canonical-sexp-lexer (stream)
+  (let ((c (read-char stream nil)))
+    (cond
+     ((null c) (values nil nil))
+     ((member c '(#\space #\tab #\newline)) (error "No Whitespace
+      Allowed in Rivest Canonical Form"))
+     ((char= c #\() (values '\( c))
+     ((char= c #\)) (values '\) c))
+     ((char= c #\[) (values '\[ c))
+     ((char= c #\]) (values '\] c))
+     ((digit-char-p c)
+      (let ((length (digit-char-p c)))
+        (loop for c = (read-char stream) do
+              (cond
+               ((digit-char-p c)
+                (setf length (+ (* 10 length) (digit-char-p c))))
+               ((char= #\: c)
+                (loop with string = (make-array length
+                                                :element-type 'character)
+                      for i from 0 below length
+                      do (setf (aref string  i) (read-char stream))
+                      finally (return-from rivest-canonical-sexp-lexer
+                                (values 'raw string))))
+               (t (error "Invalid Rivest Simple String")))))))))
+
+(defun parse-rivest-sexp (string)
+  (with-input-from-string (s string)
+    (rivest-canonical-sexp-parser (lambda () (rivest-canonical-sexp-lexer s)))))
+
diff --git a/future/rivest-sexp/rivest-sexp.txt b/future/rivest-sexp/rivest-sexp.txt
new file mode 100644
index 0000000..caf4542
--- /dev/null
+++ b/future/rivest-sexp/rivest-sexp.txt
@@ -0,0 +1,699 @@
+Network  Working Group                                         R. Rivest
+Internet Draft                                               May 4, 1997
+Expires November 4, 1997
+
+
+	                      S-Expressions
+                        draft-rivest-sexp-00.txt
+
+
+Status of this Memo
+
+   Distribution of this memo is unlimited.
+
+   This document is an Internet-Draft.  Internet Drafts are working
+   documents of the Internet Engineering Task Force (IETF), its Areas,
+   and its Working Groups.  Note that other groups may also distribute
+   working documents as Internet Drafts.
+
+   Internet Drafts are draft documents valid for a maximum of six
+   months, and may be updated, replaced, or obsoleted by other documents
+   at any time.  It is not appropriate to use Internet Drafts as
+   reference material, or to cite them other than as a ``working draft''
+   or ``work in progress.''
+
+   To learn the current status of any Internet-Draft, please check the
+   ``1id-abstracts.txt'' listing contained in the internet-drafts Shadow
+   Directories on: ftp.is.co.za (Africa), nic.nordu.net (Europe),
+   ds.internic.net (US East Coast), ftp.isi.edu (US West Coast),
+   or munnari.oz.au (Pacific Rim)
+
+
+Abstract
+
+This memo describes a data structure called "S-expressions" that are
+suitable for representing arbitrary complex data structures.  We make
+precise the encodings of S-expressions: we give a "canonical form" for
+S-expressions, described two "transport" representations, and also
+describe an "advanced" format for display to people.
+
+
+
+1. Introduction
+
+S-expressions are data structures for representing complex data.  They
+are either byte-strings ("octet-strings") or lists of simpler
+S-expressions.  Here is a sample S-expression:
+
+	(snicker "abc" (#03# |YWJj|))
+
+It is a list of length three:
+
+	-- the octet-string "snicker"
+
+	-- the octet-string "abc"
+
+	-- a sub-list containing two elements:
+		- the hexadecimal constant #03#
+		- the base-64 constant |YWJj| (which is the same as "abc")
+
+This note gives a specific proposal for constructing and utilizing
+S-expressions.  The proposal is independent of any particular application.
+
+Here are the design goals for S-expressions:
+
+  -- generality: S-expressions should be good at representing arbitrary
+     data.
+
+  -- readability: it should be easy for someone to examine and
+     understand the structure of an S-expression.
+
+  -- economy: S-expressions should represent data compactly.
+
+  -- tranportability: S-expressions should be easy to transport
+     over communication media (such as email) that are known to be
+     less than perfect.
+
+  -- flexibility: S-expressions should make it relatively simple to
+     modify and extend data structures.
+
+  -- canonicalization: it should be easy to produce a unique
+     "canonical" form of an S-expression, for digital signature purposes.
+
+  -- efficiency: S-expressions should admit in-memory representations
+     that allow efficient processing.
+
+
+Section 2 gives an introduction to S-expressions.
+Section 3 discusses the character sets used.
+Section 4 presents the various representations of octet-strings.
+Section 5 describes how to represent lists.
+Section 6 discusses how S-expressions are represented for various uses.
+Section 7 gives a BNF syntax for S-expressions.
+Section 8 talks about how S-expressions might be represented in memory.
+Section 9 briefly describes implementations for handling S-expressions.
+Section 10 discusses how applications might utilize S-expressions.
+Section 11 gives historical notes on S-expressions.
+Section 12 gives references.
+
+2. S-expressions -- informal introduction
+
+Informally, an S-expression is either:
+	-- an octet-string, or
+	-- a finite list of simpler S-expressions.
+
+An octet-string is a finite sequence of eight-bit octets.  There may be
+many different but equivalent ways of representing an octet-string
+
+	abc		-- as a token
+
+	"abc"		-- as a quoted string
+
+	#616263#	-- as a hexadecimal string
+
+	3:abc		-- as a length-prefixed "verbatim" encoding
+
+	{MzphYmM=}	-- as a base-64 encoding of the verbatim encoding
+			   (that is, an encoding of "3:abc")
+
+	|YWJj|		-- as a base-64 encoding of the octet-string "abc"
+
+These encodings are all equivalent; they all denote the same octet string.
+
+We will give details of these encodings later on, and also describe how to
+give a "display type" to a byte string.
+
+A list is a finite sequence of zero or more simpler S-expressions.  A list
+may be represented by using parentheses to surround the sequence of encodings
+of its elements, as in:
+
+	(abc (de #6667#) "ghi jkl")
+
+As we see, there is variability possible in the encoding of an
+S-expression.  In some cases, it is desirable to standardize or
+restrict the encodings; in other cases it is desirable to have no
+restrictions.  The following are the target cases we aim to handle:
+
+	-- a "transport" encoding for transporting the S-expression between
+	   computers.
+
+	-- a "canonical" encoding, used when signing the S-expression.
+
+	-- an "advanced" encoding used for input/output to people.
+
+	-- an "in-memory" encoding used for processing the S-expression in
+	   the computer.
+
+These need not be different; in this proposal the canonical encoding
+is the same as the transport encoding, for example.  In this note we
+propose (related) encoding techniques for each of these uses.
+
+3. Character set
+
+We will be describing encodings of S-expressions.  Except when giving
+"verbatim" encodings, the character set used is limited to the following
+characters in US-ASCII:
+	Alphabetic:	A B ... Z a b ... z
+	numeric:	0 1 ... 9
+	whitespace:	space, horizontal tab, vertical tab, form-feed
+			carriage-return, line-feed
+	The following graphics characters, which we call "pseudo-alphabetic":
+			- hyphen or minus
+			. period
+			/ slash
+			_ underscore
+			: colon
+			* asterisk
+			+ plus
+			= equal
+	The following graphics characters, which are "reserved punctuation":
+			( left parenthesis
+			) right parenthesis
+			[ left bracket
+			] right bracket
+			{ left brace
+			} right brace
+			| vertical bar
+			# number sign
+			" double quote
+			& ampersand
+			\ backslash
+	The following characters are unused and unavailable, except in
+	"verbatim" encodings:
+			! exclamation point
+			% percent
+			^ circumflex
+			~ tilde
+			; semicolon
+			' apostrophe
+			, comma
+			< less than
+			> greater than
+			? question mark
+
+
+4. Octet string representations
+
+This section describes in detail the ways in which an octet-string may
+be represented.
+
+We recall that an octet-string is any finite sequence of octets, and
+that the octet-string may have length zero.
+
+
+4.1 Verbatim representation
+
+A verbatim encoding of an octet string consists of four parts:
+
+	-- the length (number of octets) of the octet-string,
+	   given in decimal most significant digit first, with
+	   no leading zeros.
+
+	-- a colon ":"
+
+	-- the octet string itself, verbatim.
+
+There are no blanks or whitespace separating the parts.  No "escape
+sequences" are interpreted in the octet string.  This encoding is also
+called a "binary" or "raw" encoding.
+
+Here are some sample verbatim encodings:
+
+	3:abc
+	7:subject
+	4:::::
+	12:hello world!
+	10:abcdefghij
+	0:
+
+4.2 Quoted-string representation
+
+The quoted-string representation of an octet-string consists of:
+
+	-- an optional decimal length field
+
+	-- an initial double-quote (")
+
+	-- the octet string with "C" escape conventions (\n,etc)
+
+	-- a final double-quote (")
+
+The specified length is the length of the resulting string after any
+escape sequences have been handled.  The string does not have any
+"terminating NULL" that C includes, and the length does not count such
+a character.
+
+The length is optional.
+
+The escape conventions within the quoted string are as follows (these follow
+the "C" programming language conventions, with an extension for
+ignoring line terminators of just LF or CRLF):
+	\b		-- backspace
+	\t		-- horizontal tab
+	\v 		-- vertical tab
+	\n		-- new-line
+	\f		-- form-feed
+	\r		-- carriage-return
+	\"		-- double-quote
+	\'		-- single-quote
+	\\		-- back-slash
+	\ooo		-- character with octal value ooo (all three digits
+			   must be present)
+	\xhh		-- character with hexadecimal value hh (both digits
+			   must be present)
+	\<carriage-return> -- causes carriage-return to be ignored.
+	\<line-feed>       -- causes linefeed to be ignored
+	\<carriage-return><line-feed> -- causes CRLF to be ignored.
+	\<line-feed><carriage-return> -- causes LFCR to be ignored.
+
+Here are some examples of quoted-string encodings:
+
+	"subject"
+	"hi there"
+	7"subject"
+	3"\n\n\n"
+	"This has\n two lines."
+	"This has\
+	one."
+	""
+
+4.3 Token representation
+
+An octet string that meets the following conditions may be given
+directly as a "token".
+
+	-- it does not begin with a digit
+
+	-- it contains only characters that are
+		-- alphabetic (upper or lower case),
+		-- numeric, or
+		-- one of the eight "pseudo-alphabetic" punctuation marks:
+			-   .   /   _   :  *  +  =
+	(Note: upper and lower case are not equivalent.)
+	(Note: A token may begin with punctuation, including ":").
+
+Here are some examples of token representations:
+
+	subject
+	not-before
+	class-of-1997
+	//microsoft.com/names/smith
+	*
+
+
+4.4 Hexadecimal representation
+
+An octet-string may be represented with a hexadecimal encoding consisting of:
+
+	-- an (optional) decimal length of the octet string
+
+	-- a sharp-sign "#"
+
+	-- a hexadecimal encoding of the octet string, with each octet
+	   represented with two hexadecimal digits, most significant
+	   digit first.
+
+	-- a sharp-sign "#"
+
+There may be whitespace inserted in the midst of the hexadecimal
+encoding arbitrarily; it is ignored.  It is an error to have
+characters other than whitespace and hexadecimal digits.
+
+Here are some examples of hexadecimal encodings:
+
+	#616263#		-- represents "abc"
+	3#616263#		-- also represents "abc"
+	# 616
+	  263 #                 -- also represents "abc"
+
+
+4.5 Base-64 representation
+
+An octet-string may be represented in a base-64 coding consisting of:
+
+	-- an (optional) decimal length of the octet string
+
+	-- a vertical bar "|"
+
+	-- the rfc 1521 base-64 encoding of the octet string.
+
+	-- a final vertical bar "|"
+
+The base-64 encoding uses only the characters
+	A-Z  a-z  0-9  +  /  =
+It produces four characters of output for each three octets of input.
+If the input has one or two left-over octets of input, it produces an
+output block of length four ending in two or one equals signs, respectively.
+Output routines compliant with this standard MUST output the equals signs
+as specified.  Input routines MAY accept inputs where the equals signs are
+dropped.
+
+There may be whitespace inserted in the midst of the base-64 encoding
+arbitrarily; it is ignored.  It is an error to have characters other
+than whitespace and base-64 characters.
+
+Here are some examples of base-64 encodings:
+
+	|YWJj|		-- represents "abc"
+	| Y W
+	  J j |		-- also represents "abc"
+	3|YWJj|		-- also represents "abc"
+	|YWJjZA==|	-- represents "abcd"
+	|YWJjZA|	-- also represents "abcd"
+
+
+4.6 Display hint
+
+Any octet string may be preceded by a single "display hint".
+
+The purposes of the display hint is to provide information on how
+to display the octet string to a user.  It has no other function.
+Many of the MIME types work here.
+
+A display-hint is an octet string surrounded by square brackets.
+There may be whitespace separating the octet string from the
+surrounding brackets.  Any of the legal formats may be used for the
+octet string.
+
+Here are some examples of display-hints:
+
+	[image/gif]
+	[URI]
+	[charset=unicode-1-1]
+	[text/richtext]
+	[application/postscript]
+	[audio/basic]
+	["http://abc.com/display-types/funky.html"]
+
+In applications an octet-string that is untyped may be considered to have
+a pre-specified "default" mime type.  The mime type
+		"text/plain; charset=iso-8859-1"
+is the standard default.
+
+
+4.7  Equality of octet-strings
+
+Two octet strings are considered to be "equal" if and only if they
+have the same display hint and the same data octet strings.
+
+Note that octet-strings are "case-sensitive"; the octet-string "abc"
+is not equal to the octet-string "ABC".
+
+An untyped octet-string can be compared to another octet-string (typed
+or not) by considering it as a typed octet-string with the default
+mime-type.
+
+
+5. Lists
+
+Just as with octet-strings, there are several ways to represent an
+S-expression.  Whitespace may be used to separate list elements, but
+they are only required to separate two octet strings when otherwise
+the two octet strings might be interpreted as one, as when one token
+follows another.  Also,	whitespace may follow the initial left
+parenthesis, or precede the final right parenthesis.
+
+Here are some examples of encodings of lists:
+
+	(a b c)
+
+	( a ( b c ) ( ( d e ) ( e f ) )  )
+
+	(11:certificate(6:issuer3:bob)(7:subject5:alice))
+
+	({3Rt=} "1997" murphy 3:{XC++})
+
+
+6. Representation types
+
+There are three "types" of representations:
+
+	-- canonical
+
+	-- basic transport
+
+	-- advanced transport
+
+The first two MUST be supported by any implementation; the last is
+optional.
+
+
+6.1  Canonical representation
+
+This canonical representation is used for digital signature purposes,
+transmission, etc.  It is uniquely defined for each S-expression.  It
+is not particularly readable, but that is not the point.  It is
+intended to be very easy to parse, to be reasonably economical, and to
+be unique for any S-expression.
+
+The "canonical" form of an S-expression represents each octet-string
+in verbatim mode, and represents each list with no blanks separating
+elements from each other or from the surrounding parentheses.
+
+Here are some examples of canonical representations of S-expressions:
+
+	(6:issuer3:bob)
+
+	(4:icon[12:image/bitmap]9:xxxxxxxxx)
+
+	(7:subject(3:ref5:alice6:mother))
+
+
+6.2 Basic transport representation
+
+There are two forms of the "basic transport" representation:
+
+	-- the canonical representation
+
+	-- an rfc-2045 base-64 representation of the canonical representation,
+           surrounded by braces.
+
+The transport mechanism is intended to provide a universal means of
+representing S-expressions for transport from one machine to another.
+
+Here are some examples of an S-expression represented in basic
+transport mode:
+
+	(1:a1:b1:c)
+
+	{KDE6YTE6YjE6YykA}
+
+		(this is the same S-expression encoded in base-64)
+
+There is a difference between the brace notation for base-64 used here
+and the || notation for base-64'd octet-strings described above.  Here
+the base-64 contents are converted to octets, and then re-scanned as
+if they were given originally as octets.  With the || notation, the
+contents are just turned into an octet-string.
+
+
+6.3 Advanced transport representation
+
+The "advanced transport" representation is intended to provide more
+flexible and readable notations for documentation, design, debugging,
+and (in some cases) user interface.
+
+The advanced transport representation allows all of the representation
+forms described above, include quoted strings, base-64 and hexadecimal
+representation of strings, tokens, representations of strings with
+omitted lengths, and so on.
+
+
+7. BNF for syntax
+
+We give separate BNF's for canonical and advanced forms of S-expressions.
+We use the following notation:
+	<x>* 		means 0 or more occurrences of <x>
+	<x>+		means 1 or more occurrences of <x>
+	<x>?		means 0 or 1 occurrences of <x>
+	parentheses	are used for grouping, as in (<x> | <y>)*
+
+For canonical and basic transport:
+
+<sexpr>    	:: <string> | <list>
+<string>   	:: <display>? <simple-string> ;
+<simple-string>	:: <raw> ;
+<display>  	:: "[" <simple-string> "]" ;
+<raw>      	:: <decimal> ":" <bytes> ;
+<decimal>  	:: <decimal-digit>+ ;
+		-- decimal numbers should have no unnecessary leading zeros
+<bytes> 	-- any string of bytes, of the indicated length
+<list>     	:: "(" <sexp>* ")" ;
+<decimal-digit> :: "0" | ... | "9" ;
+
+For advanced transport:
+
+<sexpr>    	:: <string> | <list>
+<string>   	:: <display>? <simple-string> ;
+<simple-string>	:: <raw> | <token> | <base-64> | <hexadecimal> |
+		           <quoted-string> ;
+<display>  	:: "[" <simple-string> "]" ;
+<raw>      	:: <decimal> ":" <bytes> ;
+<decimal>  	:: <decimal-digit>+ ;
+		-- decimal numbers should have no unnecessary leading zeros
+<bytes> 	-- any string of bytes, of the indicated length
+<token>    	:: <tokenchar>+ ;
+<base-64>  	:: <decimal>? "|" ( <base-64-char> | <whitespace> )* "|" ;
+<hexadecimal>   :: "#" ( <hex-digit> | <white-space> )* "#" ;
+<quoted-string> :: <decimal>? <quoted-string-body>
+<quoted-string-body> :: "\"" <bytes> "\""
+<list>     	:: "(" ( <sexp> | <whitespace> )* ")" ;
+<whitespace> 	:: <whitespace-char>* ;
+<token-char>  	:: <alpha> | <decimal-digit> | <simple-punc> ;
+<alpha>       	:: <upper-case> | <lower-case> | <digit> ;
+<lower-case>  	:: "a" | ... | "z" ;
+<upper-case>  	:: "A" | ... | "Z" ;
+<decimal-digit> :: "0" | ... | "9" ;
+<hex-digit>     :: <decimal-digit> | "A" | ... | "F" | "a" | ... | "f" ;
+<simple-punc> 	:: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ;
+<whitespace-char> :: " " | "\t" | "\r" | "\n" ;
+<base-64-char> 	:: <alpha> | <decimal-digit> | "+" | "/" | "=" ;
+<null>        	:: "" ;
+
+8. In-memory representations
+
+For processing, the S-expression would typically be parsed and represented
+in memory in a more more amenable to efficient processing.  We suggest
+two alternatives:
+
+	-- "list-structure"
+
+	-- "array-layout"
+
+We only sketch these here, as they are only suggestive.  The code referenced
+below illustrates these styles in more detail.
+
+
+8.1. List-structure memory representation
+
+Here there are separate records for simple-strings, strings, and
+lists.  An S-expression of the form ("abc" "de") would require two
+records for the simple strings, two for the strings, and two for the
+list elements.  This is a fairly conventional representation, and
+details are omitted here.
+
+8.2 Array-layout memory representation
+
+Here each S-expression is represented as a contiguous array of bytes.
+The first byte codes the "type" of the S-expression:
+
+	01 	octet-string
+
+	02	octet-string with display-hint
+
+	03	beginning of list (and 00 is used for "end of list")
+
+Each of the three types is immediately followed by a k-byte integer
+indicating the size (in bytes) of the following representation.  Here
+k is an integer that depends on the implementation, it might be
+anywhere from 2 to 8, but would be fixed for a given implementation;
+it determines the size of the objects that can be handled.  The transport
+and canonical representations are independent of the choice of k made by
+the implementation.
+
+Although the length of lists are not given in the usual S-expression
+notations, it is easy to fill them in when parsing; when you reach a
+right-parenthesis you know how long the list representation was, and
+where to go back to fill in the missing length.
+
+
+8.2.1 Octet string
+
+This is represented as follows:
+
+	01 <length> <octet-string>
+
+For example (here k = 2)
+
+	01 0003 a b c
+
+8.2.2 Octet-string with display-hint
+
+This is represented as follows:
+
+	02 <length>
+	  01 <length> <octet-string>    /* for display-type */
+	  01 <length> <octet-string>    /* for octet-string */
+
+For example, the S-expression
+
+	[gif] #61626364#
+
+would be represented as (with k = 2)
+
+	02 000d
+	  01 0003  g  i  f
+	  01 0004 61 62 63 64
+
+8.2.3 List
+
+This is represented as
+
+	03 <length> <item1> <item2> <item3> ... <itemn> 00
+
+For example, the list (abc [d]ef (g)) is represented in memory as (with k=2)
+
+	03 001b
+	  01 0003 a b c
+          02 0009
+            01 0001 d
+            01 0002 e f
+          03 0005
+            01 0001 g
+          00
+        00
+
+9. Code
+
+There is code available for reading and parsing the various
+S-expression formats proposed here.
+
+See http://theory.lcs.mit.edu/~rivest/sexp.html
+
+
+10. Utilization of S-expressions
+
+This note has described S-expressions in general form.  Application writers
+may wish to restrict their use of S-expressions in various ways.  Here are
+some possible restrictions that might be considered:
+
+	-- no display-hints
+	-- no lengths on hexadecimal, quoted-strings, or base-64 encodings
+	-- no empty lists
+	-- no empty octet-strings
+	-- no lists having another list as its first element
+	-- no base-64 or hexadecimal encodings
+	-- fixed limits on the size of octet-strings
+
+11. Historical note
+
+The S-expression technology described here was originally developed
+for ``SDSI'' (the Simple Distributed Security Infrastructure by
+Lampson and Rivest [SDSI]) in 1996, although the origins clearly date
+back to McCarthy's LISP programming language.  It was further refined
+and improved during the merger of SDSI and SPKI [SPKI] during the
+first half of 1997.  S-expressions are similar to, but more readable
+and flexible than, Bernstein's "net-strings" [BERN].
+
+12. References
+
+[SDSI] "A Simple Distributed Security Architecture", by
+        Butler Lampson, and Ronald L. Rivest
+	http://theory.lcs.mit.edu/~cis/sdsi.html
+
+[SPKI] <a href="http://www.clark.net/pub/cme/html/spki.html">SPKI--A
+       Simple Public Key Infrastructure</a>
+
+[BERN] Dan Bernstein's "net-strings"; Internet Draft
+       draft-bernstein-netstrings-02.txt
+
+Author's Address
+
+      Ronald L. Rivest
+      Room 324, 545 Technology Square
+      MIT Laboratory for Computer Science
+      Cambridge, MA 02139
+
+      rivest@theory.lcs.mit.edu
+
+
diff --git a/future/rivest-sexp/simple-sexp/read.lisp b/future/rivest-sexp/simple-sexp/read.lisp
new file mode 100644
index 0000000..4941689
--- /dev/null
+++ b/future/rivest-sexp/simple-sexp/read.lisp
@@ -0,0 +1,955 @@
+;;;; -*- mode:lisp;coding:utf-8 -*-
+;;;;**************************************************************************
+;;;;FILE:               sexp.lisp
+;;;;LANGUAGE:           Common-Lisp
+;;;;SYSTEM:             Common-Lisp
+;;;;USER-INTERFACE:     NONE
+;;;;DESCRIPTION
+;;;;
+;;;;    Reads and writes simple S-Expressions.
+;;;;    This implements a simple parser, subset of the
+;;;;    standard Common Lisp parser.
+;;;;
+;;;;    We can read and write:
+;;;;        - integers (expressed in base ten),
+;;;;        - floating point numbers,
+;;;;        - strings,
+;;;;        - symbols,
+;;;;        - lists,
+;;;;        - arrays and vectors,
+;;;;        - structures,
+;;;;        - hash tables.
+;;;;
+;;;;    We don't implement most of the macro character or dispatching macro
+;;;;    characters.  No comment, no quote, one can use (quote x), nothing fancy.
+;;;;    Only " for strings, ( for lists and dotted lists, #( for vectors,
+;;;;    #nA for arrays, #S for structures and hash-tables.
+;;;;    Symbols may be qualified, but it's up to the user supplied %make-symbol
+;;;;    routine to handle the packages.
+;;;;
+;;;;    string ::= "\"([^\\\"]|\\\\|\\\)\""
+;;;;    number ::= "[-+]?[0-9]+(\.[0-9]*)([eE][-+]?[0-9]+)?"
+;;;;    cardinal ::= [0-9]+
+;;;;    symbol ::= [[ident]':']ident
+;;;;    ident  ::= constituent+
+;;;;    char   ::= #\\. | #\\space | #\\newline
+;;;;
+;;;;    vector ::= '#(' sexp* ')'
+;;;;    array ::= '#'cardinal'A(' sexp* ')'
+;;;;    list ::= '(' [ sexp+ [ '.' sexp ] ] ')'
+;;;;    hash ::= '#S(' 'HASH-TABLE' pair* ')'
+;;;;    pair ::= '(' sexp '.' sexp ')'
+;;;;
+;;;;    sexp ::= string | number | symbol | list | array | hash
+;;;;
+;;;;AUTHORS
+;;;;    <PJB> Pascal Bourguignon <pjb@informatimago.com>
+;;;;MODIFICATIONS
+;;;;    2007-10-25 <PJB>
+;;;;    Complete %stuff (arrays, conses).
+;;;;    Implement SIMPLE-PRIN1-TO-STRING
+;;;;    2007-10-25 <PJB> Created.
+;;;;BUGS
+;;;;
+;;;;    Perhaps we should implement River's Sexp format.
+;;;;
+;;;;    Simplify the implementation (use a subset of Common Lisp, to
+;;;;    be easily translatable into other programming languages).
+;;;;
+;;;;    Implement translators to other programming languages (to be
+;;;;    able to exchange data between languages).
+;;;;
+;;;;LEGAL
+;;;;    GPL
+;;;;
+;;;;    Copyright Pascal Bourguignon 2007 - 2007
+;;;;
+;;;;    This program is free software; you can redistribute it and/or
+;;;;    modify it under the terms of the GNU General Public License
+;;;;    as published by the Free Software Foundation; either version
+;;;;    2 of the License, or (at your option) any later version.
+;;;;
+;;;;    This program is distributed in the hope that it will be
+;;;;    useful, but WITHOUT ANY WARRANTY; without even the implied
+;;;;    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;;;;    PURPOSE.  See the GNU General Public License for more details.
+;;;;
+;;;;    You should have received a copy of the GNU General Public
+;;;;    License along with this program; if not, write to the Free
+;;;;    Software Foundation, Inc., 59 Temple Place, Suite 330,
+;;;;    Boston, MA 02111-1307 USA
+;;;;**************************************************************************
+
+(load "repr.lisp")
+
+
+
+;;;
+;;;--------------------
+
+
+(defun make-scanner (string)
+  (let ((i -1))
+    (lambda (message)
+      (ecase message
+        ((currchar) (when (<     i  (length string)) (aref string     i)))
+        ((nextchar) (when (< (1+ i) (length string)) (aref string (1+ i))))
+        ((advance)  (when (<     i  (length string)) (incf i))
+                    (when (<     i  (length string)) (aref string     i)))))))
+(defun advance  (s) (funcall s 'advance))
+(defun currchar (s) (funcall s 'currchar))
+(defun nextchar (s) (funcall s 'nextchar))
+
+(defun test-scanner ()
+  (let ((s (make-scanner "(\"a\" b c d)")))
+    (advance s)
+    (do ()
+        ((not  (currchar s)))
+      (advance s))))
+
+(define-condition simple-end-of-file (simple-error end-of-file) ())
+(define-condition simple-reader-error (simple-error reader-error) ())
+
+(defun reject-eos (object)
+  (unless object
+    (error 'simple-end-of-file
+           :format-control "Reached end of string while reading."))
+  object)
+
+(defun reject-dots (token)
+  (when (%dotsp token)
+    (error 'simple-reader-error
+           :format-control
+           "A token consisting only of dots cannot be meaningfully read in."))
+  token)
+
+
+
+(defmacro defparser (name arguments &body body)
+  "Defines a token parser function, which parses its argument token and returns
+three values: a ok flag; a type of value; and a value parsed from the token.
+When the ok flag is false, the type indicates whether it's a strong error,
+and the value returned is an error message.
+A strong error is a lexical error that is not ambiguous.  A weak error is
+when the token could still be of another lexical category.
+In the body of the parser, there are macrolet defined to REJECT or ACCEPT
+the token, and to describe the parsed syntax with ALT, ZERO-OR-MORE,
+ONE-OR-MORE and OPT-SIGN."
+  `(defun ,name ,arguments
+     ,@(when (stringp (first body)) (list (pop body)))
+     ,@(loop                            ; declarations
+          :while (and (listp (car body))  (eq 'declare (caar body)))
+          :collect (pop body))
+     (macrolet ((reject (strongp &rest ctrlstring-and-args)
+                  `(return-from ,',name
+                     (values nil ,strongp
+                             ,(when ctrlstring-and-args
+                                    `(format nil ,@ctrlstring-and-args)))))
+                (accept (type token)
+                  `(return-from ,',name (values t ,type ,token)))
+                (alt (&rest clauses)
+                  `(cond ,@clauses))
+                (zero-or-more (test &body body)
+                  `(loop :while ,test :do ,@body))
+                (one-or-more  (test &body body)
+                  `(progn
+                     (if ,test (progn ,@body) (reject nil))
+                     (loop :while ,test :do ,@body))))
+       ,@body)))
+
+
+(defparser parse-integer-token (token)
+  "integer ::= [sign] digit+ [decimal-point]"
+  (let ((sign 1)
+        (mant 0)
+        (i 0))
+    (unless (< i (length token)) (reject nil))
+    (alt ((char= #\- (aref token i)) (incf i) (setf sign -1))
+         ((char= #\+ (aref token i)) (incf i)))
+    (one-or-more (and  (< i (length token)) (digit-char-p (aref token i)))
+                 (setf mant (+ (* 10. mant) (digit-char-p (aref token i)))
+                       i (1+ i)))
+    (alt ((and (< i (length token)) (char= #\. (aref token i))) (incf i)))
+    (if (= i (length token))
+        (accept 'integer (* sign mant))
+        (reject t "Junk after integer in ~S" token))))
+
+
+(defparser parse-float-token (token)
+  "
+float ::= [sign] {decimal-digit}+ [decimal-point {decimal-digit}*] exponent
+float ::= [sign] {decimal-digit}* decimal-point {decimal-digit}+ [exponent]
+exponent ::=  exponent-marker [sign] {digit}+"
+  (let ((sign 1)
+        (mant 0)
+        (esgn 1)
+        (mexp 0)
+        (expo 0)
+        (i    0)
+        (type 'float)
+        (fp   nil))
+    (unless (< i (length token)) (reject nil))
+    (alt ((char= #\- (aref token i)) (incf i) (setf sign -1))
+         ((char= #\+ (aref token i)) (incf i)))
+    (zero-or-more (and  (< i (length token)) (digit-char-p (aref token i)))
+                  (setf mant (+ (* 10. mant) (digit-char-p (aref token i)))
+                        i (1+ i)))
+    (alt ((and (< i (length token)) (char= #\. (aref token i)))
+          (setf fp t)
+          (incf i)
+          (zero-or-more
+           (and  (< i (length token)) (digit-char-p (aref token i)))
+           (setf mant (+ (* 10. mant) (digit-char-p (aref token i)))
+                 mexp (1- mexp)
+                 i    (1+ i)))))
+    (when (and (< i (length token))
+               ;; Marker  Meaning
+               ;; D or d  double-float
+               ;; E or e  float (see *read-default-float-format*)
+               ;; F or f  single-float
+               ;; L or l  long-float
+               ;; S or s  short-float
+               (setf type (cdr (assoc (aref token i)
+                                      '((#\d . double-float)
+                                        (#\e . float)
+                                        (#\f . single-float)
+                                        (#\l . long-float)
+                                        (#\s . short-float))
+                                      :test (function char-equal)))))
+      (setf fp t)
+      (incf i)
+      (unless (< i (length token)) (reject nil))
+      (alt ((char= #\- (aref token i)) (incf i) (setf esgn -1))
+           ((char= #\+ (aref token i)) (incf i)))
+      (one-or-more (and  (< i (length token)) (digit-char-p (aref token i)))
+                   (setf expo (+ (* 10. expo) (digit-char-p (aref token i)))
+                         i (1+ i))))
+    (if fp
+        (if (= i (length token))
+            (accept type
+                    (* (coerce (* sign mant) type)
+                       (expt 10.0 (+ mexp (* esgn expo)))))
+            (reject t "Junk after floating point number ~S" token))
+        (reject nil))))
+
+
+
+ (defun test-%make-integer ()
+   (dolist (test '(("123"  123)
+                   ("+123" 123)
+                   ("-123" -123)
+                   ("123."  123)
+                   ("+123." 123)
+                   ("-123." -123)))
+     (assert (= (%make-integer (first test)) (second test))
+             () "(%MAKE-INTEGER ~S) returned ~S instead of ~S"
+             (first test) (%make-integer (first test)) (second test)))
+   :success)
+
+
+(defun test-%make-float ()
+  (dolist (test '(("123.0"  123.0)
+                  ("+123.0" 123.0)
+                  ("-123.0" -123.0)
+                  ("123.0"  123.0)
+                  ("+123.0" 123.0)
+                  ("-123.0" -123.0)
+
+
+                  ("123e0"  123e0)
+                  ("+123e0" 123e0)
+                  ("-123e0" -123e0)
+                  ("123e0"  123e0)
+                  ("+123e0" 123e0)
+                  ("-123e0" -123e0)
+
+                  (".123e3"  123e0)
+                  ("+.123e3" 123e0)
+                  ("-.123e3" -123e0)
+                  (".123e3"  123e0)
+                  ("+.123e3" 123e0)
+                  ("-.123e3" -123e0)
+
+                  ("0.123e3"  123e0)
+                  ("+0.123e3" 123e0)
+                  ("-0.123e3" -123e0)
+                  ("0.123e3"  123e0)
+                  ("+0.123e3" 123e0)
+                  ("-0.123e3" -123e0)
+
+                  (".123e+3"  123e0)
+                  ("+.123e+3" 123e0)
+                  ("-.123e+3" -123e0)
+                  (".123e+3"  123e0)
+                  ("+.123e+3" 123e0)
+                  ("-.123e+3" -123e0)
+
+                  ("0.123e+3"  123e0)
+                  ("+0.123e+3" 123e0)
+                  ("-0.123e+3" -123e0)
+                  ("0.123e+3"  123e0)
+                  ("+0.123e+3" 123e0)
+                  ("-0.123e+3" -123e0)
+
+                  ("1230e-1"  123e0)
+                  ("+1230e-1" 123e0)
+                  ("-1230e-1" -123e0)
+                  ("1230.0e-1"  123e0)
+                  ("+1230.0e-1" 123e0)
+                  ("-1230.0e-1" -123e0)
+
+
+
+                  ("123s0"  123s0)
+                  ("+123s0" 123s0)
+                  ("-123s0" -123s0)
+                  ("123s0"  123s0)
+                  ("+123s0" 123s0)
+                  ("-123s0" -123s0)
+
+                  (".123s3"  123s0)
+                  ("+.123s3" 123s0)
+                  ("-.123s3" -123s0)
+                  (".123s3"  123s0)
+                  ("+.123s3" 123s0)
+                  ("-.123s3" -123s0)
+
+                  ("0.123s3"  123s0)
+                  ("+0.123s3" 123s0)
+                  ("-0.123s3" -123s0)
+                  ("0.123s3"  123s0)
+                  ("+0.123s3" 123s0)
+                  ("-0.123s3" -123s0)
+
+                  (".123s+3"  123s0)
+                  ("+.123s+3" 123s0)
+                  ("-.123s+3" -123s0)
+                  (".123s+3"  123s0)
+                  ("+.123s+3" 123s0)
+                  ("-.123s+3" -123s0)
+
+                  ("0.123s+3"  123s0)
+                  ("+0.123s+3" 123s0)
+                  ("-0.123s+3" -123s0)
+                  ("0.123s+3"  123s0)
+                  ("+0.123s+3" 123s0)
+                  ("-0.123s+3" -123s0)
+
+                  ("1230s-1"  123s0)
+                  ("+1230s-1" 123s0)
+                  ("-1230s-1" -123s0)
+                  ("1230.0s-1"  123s0)
+                  ("+1230.0s-1" 123s0)
+                  ("-1230.0s-1" -123s0)
+
+
+
+                  ("123f0"  123f0)
+                  ("+123f0" 123f0)
+                  ("-123f0" -123f0)
+                  ("123f0"  123f0)
+                  ("+123f0" 123f0)
+                  ("-123f0" -123f0)
+
+                  (".123f3"  123f0)
+                  ("+.123f3" 123f0)
+                  ("-.123f3" -123f0)
+                  (".123f3"  123f0)
+                  ("+.123f3" 123f0)
+                  ("-.123f3" -123f0)
+
+                  ("0.123f3"  123f0)
+                  ("+0.123f3" 123f0)
+                  ("-0.123f3" -123f0)
+                  ("0.123f3"  123f0)
+                  ("+0.123f3" 123f0)
+                  ("-0.123f3" -123f0)
+
+                  (".123f+3"  123f0)
+                  ("+.123f+3" 123f0)
+                  ("-.123f+3" -123f0)
+                  (".123f+3"  123f0)
+                  ("+.123f+3" 123f0)
+                  ("-.123f+3" -123f0)
+
+                  ("0.123f+3"  123f0)
+                  ("+0.123f+3" 123f0)
+                  ("-0.123f+3" -123f0)
+                  ("0.123f+3"  123f0)
+                  ("+0.123f+3" 123f0)
+                  ("-0.123f+3" -123f0)
+
+                  ("1230f-1"  123f0)
+                  ("+1230f-1" 123f0)
+                  ("-1230f-1" -123f0)
+                  ("1230.0f-1"  123f0)
+                  ("+1230.0f-1" 123f0)
+                  ("-1230.0f-1" -123f0)
+
+
+                  ("123d0"  123d0)
+                  ("+123d0" 123d0)
+                  ("-123d0" -123d0)
+                  ("123d0"  123d0)
+                  ("+123d0" 123d0)
+                  ("-123d0" -123d0)
+
+                  (".123d3"  123d0)
+                  ("+.123d3" 123d0)
+                  ("-.123d3" -123d0)
+                  (".123d3"  123d0)
+                  ("+.123d3" 123d0)
+                  ("-.123d3" -123d0)
+
+                  ("0.123d3"  123d0)
+                  ("+0.123d3" 123d0)
+                  ("-0.123d3" -123d0)
+                  ("0.123d3"  123d0)
+                  ("+0.123d3" 123d0)
+                  ("-0.123d3" -123d0)
+
+                  (".123d+3"  123d0)
+                  ("+.123d+3" 123d0)
+                  ("-.123d+3" -123d0)
+                  (".123d+3"  123d0)
+                  ("+.123d+3" 123d0)
+                  ("-.123d+3" -123d0)
+
+                  ("0.123d+3"  123d0)
+                  ("+0.123d+3" 123d0)
+                  ("-0.123d+3" -123d0)
+                  ("0.123d+3"  123d0)
+                  ("+0.123d+3" 123d0)
+                  ("-0.123d+3" -123d0)
+
+                  ("1230d-1"  123d0)
+                  ("+1230d-1" 123d0)
+                  ("-1230d-1" -123d0)
+                  ("1230.0d-1"  123d0)
+                  ("+1230.0d-1" 123d0)
+                  ("-1230.0d-1" -123d0)
+
+
+
+                  ("123l0"  123l0)
+                  ("+123l0" 123l0)
+                  ("-123l0" -123l0)
+                  ("123l0"  123l0)
+                  ("+123l0" 123l0)
+                  ("-123l0" -123l0)
+
+                  (".123l3"  123l0)
+                  ("+.123l3" 123l0)
+                  ("-.123l3" -123l0)
+                  (".123l3"  123l0)
+                  ("+.123l3" 123l0)
+                  ("-.123l3" -123l0)
+
+                  ("0.123l3"  123l0)
+                  ("+0.123l3" 123l0)
+                  ("-0.123l3" -123l0)
+                  ("0.123l3"  123l0)
+                  ("+0.123l3" 123l0)
+                  ("-0.123l3" -123l0)
+
+                  (".123l+3"  123l0)
+                  ("+.123l+3" 123l0)
+                  ("-.123l+3" -123l0)
+                  (".123l+3"  123l0)
+                  ("+.123l+3" 123l0)
+                  ("-.123l+3" -123l0)
+
+                  ("0.123l+3"  123l0)
+                  ("+0.123l+3" 123l0)
+                  ("-0.123l+3" -123l0)
+                  ("0.123l+3"  123l0)
+                  ("+0.123l+3" 123l0)
+                  ("-0.123l+3" -123l0)
+
+                  ("1230l-1"  123l0)
+                  ("+1230l-1" 123l0)
+                  ("-1230l-1" -123l0)
+                  ("1230.0l-1"  123l0)
+                  ("+1230.0l-1" 123l0)
+                  ("-1230.0l-1" -123l0)
+
+                  ))
+    (assert (string= (format nil "~7,3F" (%make-float (first test)))
+                     (format nil "~7,3F" (second test)))
+            () "(%MAKE-FLOAT ~S) returned ~S instead of ~S"
+            (first test) (%make-float (first test)) (second test)))
+  :success)
+
+
+
+
+
+
+(declaim (inline whitespacep terminating-macro-char-p))
+(defun whitespacep (ch) (member ch '(#\space #\newline #\tab)))
+(defun terminating-macro-char-p (ch) (member ch '(#\( #\))))
+
+(defun skip-spaces (s)
+  (do ()
+      ((not (and (currchar s) (whitespacep (currchar s)))))
+    (advance s))
+  (assert (or (null (currchar s)) (not (whitespacep (currchar s))))))
+
+
+(defun unescape (token)
+  ;; WARNING: This destroys the contents of TOKEN, which must be mutable.
+  (let ((dst 0)
+        (state :normal))
+    (do ((src 0 (1+ src)))
+        ((>= src (length token))
+         (unless (eq state :normal)
+           (error "end-of-file with unfinished token escape."))
+         (subseq token 0 dst))
+      (ecase state
+        ((:normal)
+         (case (aref token src)
+           ((#\\) (setf state :single))
+           ((#\|) (setf state :double))
+           (otherwise (setf (aref token dst) (aref token src))
+                      (incf dst))))
+        ((:single)
+         (setf state :normal)
+         (setf (aref token dst) (aref token src))
+         (incf dst))
+        ((:double)
+         (case (aref token src)
+           ((#\|) (setf state :normal))
+           ((#\\) (setf state :double-single))
+           (otherwise (setf (aref token dst) (aref token src))
+                      (incf dst))))
+        ((:double-single)
+         (setf state :double)
+         (setf (aref token dst) (aref token src))
+         (incf dst))))))
+
+
+(defun test-unescape ()
+  (dolist (test '((""  "")
+                  ("Hello World"  "Hello World")
+                  ("xHello World!" "\\xHello \\World\\!")
+                  ("\\Hello \"World\"\\"  "\\\\Hello \\\"World\\\"\\\\")
+                  ("Hello World" "|Hello World|")
+                  ("Hello World" "|Hello|| World|")
+                  ("Hello World" "|Hello| |World|")
+                  ("Hello| |World" "|Hello\\| \\|World|")
+                  ("Hello\"\\World" "|Hello\\\"\\\\World|")))
+    (assert (string= (first test) (unescape (copy-seq (second test))))
+            ()
+            "(unescape ~S) should give ~S instead of ~S"
+            (second test) (first test) (unescape (copy-seq (second test)))))
+  :success)
+
+
+(defun parse-list (s)
+  (advance s)                           ; skip over #\(
+  (let ((list (%nil)))
+    (loop
+       (skip-spaces s)
+       (when (char= (reject-eos (currchar s)) #\))
+         (advance s)
+         (return-from parse-list (%nreverse list)))
+       (when (and list
+                  (char= (currchar s) #\.)
+                  (or (null (nextchar s))
+                      (whitespacep (nextchar s))
+                      (terminating-macro-char-p (nextchar s))))
+         (collect-token s)
+         (let ((last-cdr  (parse-object s)))
+           (if (char= (reject-eos (currchar s)) #\))
+               (progn
+                 (advance s)
+                 (return-from parse-list (%nreconc list last-cdr)))
+               (error "There can be only one object after the dot in a dotted-list."))))
+       (%push (parse-object s) list))))
+
+(defun parse-vector (length s)
+  (advance s)                           ; skip over #\(
+  (if length
+      (let ((object nil)
+            (vector  (%make-array (list length) nil)))
+        (skip-spaces s)
+        (do ((i 0 (1+ i)))
+            ((not (char/= (reject-eos (currchar s)) #\)))
+             (do ((i i (1+ i)))
+                 ((not (< i length)))
+               (setf (aref vector i) object))
+             (advance s)
+             vector)
+          (setf object (parse-object s))
+          (when (< i length)
+            (setf (aref vector i) object))
+           (skip-spaces s)))
+      #- (and)
+      (loop
+         :with object = nil
+         :with vector = (%make-array (list length) nil)
+         :for i :from 0
+         :do (skip-spaces s)
+         :while (char/= (reject-eos (currchar s)) #\))
+         :do (setf object (parse-object s))
+         :do (when (< i length)
+               (setf (aref vector i) object))
+         :finally (progn (loop
+                            :while (< i length)
+                            :do (setf (aref vector i) object))
+                         (advance s)
+                         (return vector)))
+      (let ((object nil)
+            (list '()))
+         (skip-spaces s)
+        (do ((i 0 (1+ i)))
+            ((not (char/= (reject-eos (currchar s)) #\)))
+             (advance s)
+             (%make-array (list (%length list)) (%nreverse list)))
+          (%push (parse-object s) list)
+          (skip-spaces s)))
+      #- (and)
+      (loop
+         :with object = nil
+         :with list = '()
+         :for i :from 0
+         :do (skip-spaces s)
+         :while (char/= (reject-eos (currchar s)) #\))
+         :do (push (parse-object s) list)
+         :finally (progn (advance s)
+                         (return (coerce (nreverse list) 'vector))))))
+
+
+
+(defun parse-struct-or-hash (s)
+  (let ((data (parse-list s)))
+    (if (%symbol-eql (%make-symbol "HASH-TABLE") (%car data))
+        (let ((cur (%cdr data)))
+          (do ((cur     (%cdr data)
+                        (%cdr (%cdr cur)))
+               (options (%nil)
+                        (%cons (%cons (%car cur) (%car (%cdr cur))) options)))
+              ((not (and (%symbolp (%car cur))
+                         (char= #\: (aref (%symbol-name (%car cur)) 0))))
+               (%make-hash-table options cur))))
+        #- (and)
+        (loop ; read a hash table
+           :with cur = (%cdr data)
+           :while (and (%symbolp (%car cur))
+                       (char= #\: (aref (%symbol-name (%car cur)) 0)))
+           :nconc (%cons (%car cur) (%car (%cdr cur))) :into options
+           :do (setf cur (%cdr (%cdr cur)))
+           :finally (return))
+        (%make-struct (%car data) (%cdr data)))))
+
+
+;; (let ((h (make-hash-table :test (function equal))))
+;;   (setf (gethash "One" h) 1
+;;         (gethash "Two" h) 2
+;;         (gethash "Three" h) 3)
+;;   h)
+;;
+
+
+(defun parse-array (dimensions s)
+  (let ((initial-contents (parse-object s)))
+    (labels ((collect-dimensions (n contents dimensions)
+             (if (zerop n)
+                 (nreverse dimensions)
+                 (collect-dimensions (1- n) (first contents)
+                                     (cons (length contents) dimensions)))))
+      ;; TODO: we rely on make-array to raise some errors that it may not raise...
+      (%make-array (collect-dimensions dimensions initial-contents '())
+                   initial-contents))))
+
+
+(declaim (inline make-buffer))
+(defun make-buffer ()
+  (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character))
+
+
+(defun collect-token (s)
+  (case (currchar s)
+    ((#\")               ; a string; this should move to macro char...
+     (let ((string (make-buffer))
+           (state :normal))
+       (advance s)
+       (do ()
+           ((not (currchar s))
+            (error 'simple-end-of-file
+                   :format-control "Reached end-of-file while reading a string."))
+         (ecase state
+           ((:normal)
+            (case (currchar s)
+              ((#\")     (advance s)
+               (return-from collect-token (%make-string string)))
+              ((#\\)     (setf state :single))
+              (otherwise (vector-push-extend (currchar s) string))))
+           ((:single)
+            (vector-push-extend (currchar s) string)
+            (setf state :normal)))
+         (advance s))
+       #-(and)
+       (loop
+          :with state = :normal
+          :do (advance s)
+          :while (currchar s)
+          :do (ecase state
+                ((:normal)
+                 (case (currchar s)
+                   ((#\")     (advance s)
+                    (return-from collect-token (%make-string string)))
+                   ((#\\)     (setf state :single))
+                   (otherwise (vector-push-extend (currchar s) string))))
+                ((:single)
+                 (vector-push-extend (currchar s) string)
+                 (setf state :normal)))
+          :finally
+          (error 'simple-end-of-file
+                 :format-control "Reached end-of-file while reading a string."))))
+    (otherwise
+     (let ((escapedp nil)
+           (token (make-buffer))
+           (state :normal))
+       (do ()
+           ((not (and (currchar s)
+                      (not (or (whitespacep (currchar s))
+                               (terminating-macro-char-p (currchar s))))))
+            (unless (eq state :normal)
+              (error "end-of-file with unfinished token escape."))
+            (cond
+              (escapedp
+               (%make-symbol token))
+              ((every (lambda (ch) (char= #\. ch)) token)
+               (%make-dots    token))
+              ((%make-float   token))
+              ((%make-integer token))
+              (t
+               (%make-symbol token))))
+         (ecase state
+           ((:normal)
+            (case (currchar s)
+              ((#\\) (setf state :single escapedp t))
+              ((#\|) (setf state :double escapedp t))
+              (otherwise (vector-push-extend (currchar s) token))))
+           ((:single)
+            (setf state :normal)
+            (vector-push-extend (currchar s) token))
+           ((:double)
+            (case (currchar s)
+              ((#\|) (setf state :normal))
+              ((#\\) (setf state :double-single))
+              (otherwise (vector-push-extend (currchar s) token))))
+           ((:double-single)
+            (setf state :double)
+            (vector-push-extend (currchar s) token)))
+         (advance s)))
+     #- (and)
+     (loop
+        :with escapedp = nil
+        :with token = (make-buffer)
+        :with state = :normal
+        :while (and (currchar s)
+                    (not (or (whitespacep (currchar s))
+                             (terminating-macro-char-p (currchar s)))))
+        :do (progn
+              (ecase state
+                ((:normal)
+                 (case (currchar s)
+                   ((#\\) (setf state :single escapedp t))
+                   ((#\|) (setf state :double escapedp t))
+                   (otherwise (vector-push-extend (currchar s) token))))
+                ((:single)
+                 (setf state :normal)
+                 (vector-push-extend (currchar s) token))
+                ((:double)
+                 (case (currchar s)
+                   ((#\|) (setf state :normal))
+                   ((#\\) (setf state :double-single))
+                   (otherwise (vector-push-extend (currchar s) token))))
+                ((:double-single)
+                 (setf state :double)
+                 (vector-push-extend (currchar s) token)))
+              (advance s))
+        :finally (progn
+                   (unless (eq state :normal)
+                     (error "end-of-file with unfinished token escape."))
+                   (return
+                     (cond
+                       (escapedp
+                        (%make-symbol token))
+                       ((every (lambda (ch) (char= #\. ch)) token)
+                        (%make-dots    token))
+                       ((%make-float   token))
+                       ((%make-integer token))
+                       (t
+                        (%make-symbol token)))))))))
+
+
+(defun scan-cardinal (s)
+  (let ((token (make-buffer)))
+    (do ()
+        ((not  (digit-char-p (currchar s)))
+         (%make-integer token))
+      (vector-push-extend  (currchar s) token)
+      (advance s)))
+  #- (and)
+  (loop
+     :with token = (make-buffer)
+     :while (digit-char-p (currchar s))
+     :do (vector-push-extend  (currchar s) token) (advance s)
+     :finally (return (%make-integer token))))
+
+
+(defun parse-object (s)
+  (skip-spaces s)
+  (case (reject-eos (currchar s))
+    ((#\()
+     (parse-list s))
+    ((#\#)
+     (advance s)
+     (case (reject-eos (currchar s))
+       ((#\() (parse-vector nil s))
+       ((#\A)
+        (error "Missing a dimensions argument between # and A"))
+       ((#\S) (advance s) (parse-struct-or-hash s))
+       ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
+        (let ((arg (scan-cardinal s)))
+          (case (reject-eos (currchar s))
+            ((#\() (parse-vector arg s))
+            ((#\A) (advance s) (parse-array  arg s))
+            ((#\S)
+             (error "No number allowed between # and S"))
+            ((#\@ #\: #\,)
+             (error "This simple reader doesn't implement multiple argument or flags for dispatching reader macros."))
+            (otherwise
+             (error "This simple reader doesn't implement dispatching reader macros other than #(, #A and #S. Rejecting: #~A" (currchar s))))))
+       ((#\@ #\: #\,)
+        (error "This simple reader doesn't implement multiple argument or flags for dispatching reader macros."))))
+    (otherwise (reject-dots (collect-token s)))))
+
+
+(defun simple-read-from-string (string)
+  (let ((s (make-scanner string)))
+    (advance s)
+    (parse-object s)))
+
+
+
+(defun simple-prin1-to-string (object)
+  "
+OBJECT: is one of the objects made by the various %MAKE- functions.
+"
+  (with-output-to-string (out)
+    (cond
+      ((%symbolp object)
+       (princ (%symbol-name object) out))
+      ((%stringp object)
+       (princ "\"" out)
+       (do ((i 0 (1+ i)))
+           ((>= i (%string-length object)))
+         (let ((ch (%string-ref object i)))
+           (when (member ch '(#\\ #\"))
+             (princ "\\" out))
+           (princ ch out)))
+       #- (and)
+       (loop
+          :for ch :in object
+          :do (when (member ch '(#\\ #\"))
+                (princ "\\" out))
+          :do (princ ch out))
+       (princ "\"" out))
+      ((%integerp object) ; TODO: We'd need an accessor for the integer value
+       (princ object out))
+      ((%floatp object) ; TODO: We'd need an accessor for the float value (eg decode-float).
+       (princ object out))
+      ((%consp object)
+       (princ "(" out)
+       (do ((cur object (%cdr cur))
+            (sep "" " "))
+           ((not (%consp cur))
+            (unless (%null cur)
+              (princ " . " out)
+              (princ (simple-prin1-to-string cur) out)))
+         (princ sep out)
+         (princ (simple-prin1-to-string (%car cur)) out))
+       (princ ")" out))
+      ((%hash-table-p object)
+       (princ "#S(HASH-TABLE" out)
+       (dolist (item (%hash-table-options object))
+         (princ " " out)
+         (princ (simple-prin1-to-string (%car item)) out)
+         (princ " " out)
+         (princ (simple-prin1-to-string (%cdr item)) out))
+       (dolist (pair (%hash-table-data object))
+         (princ " (" out)
+         (princ (simple-prin1-to-string (%car pair)) out)
+         (princ " . " out)
+         (princ (simple-prin1-to-string (%cdr pair)) out)
+         (princ ")" out))
+       (princ ")" out))
+      ((%structp object)
+       (princ "#S(" out)
+       (princ (simple-prin1-to-string (%struct-type object)) out)
+       (dolist (item (%struct-data object))
+         (princ " " out)
+         (princ (simple-prin1-to-string item) out))
+       (princ ")" out))
+      ((%arrayp object)
+       (let ((dims  (%array-dimensions object))
+             (contents  (%array-collect-contents object)))
+         (if (= 1 (%length dims))
+             ;; a vector
+             (progn
+               (princ "#" out)
+               (princ (simple-prin1-to-string contents) out))
+             ;; a multi-D array
+             (progn
+               (princ "#" out) (princ (%length dims) out) (princ "A" out)
+               (princ (simple-prin1-to-string contents) out)))))
+      (t
+       (cond
+         ((subtypep (class-of (class-of object)) 'structure-class)
+          (princ "#S(" out)
+          (princ (symbol-name (type-of object)) out)
+          (dolist (slot (clos::class-slots (find-class 'dots)))
+            (princ " :" out)
+            (princ (symbol-name (clos:slot-definition-name slot)) out)
+            (princ " " out)
+            (princ (simple-prin1-to-string
+                    (slot-value object (clos:slot-definition-name slot))) out))
+          (princ ")" out))
+         (t
+          (error "Cannot print objects of type ~S like: ~S"
+                 (type-of object) object)))))))
+
+;; #- (and)
+;; (untrace make-scanner advance currchar nextchar reject-eos reject-dots
+;;        whitespacep terminating-macro-char-p skip-spaces unescape
+;;        test-unescape parse-list parse-vector parse-struct-or-hash parse-array
+;;        collect-token parse-object simple-read-from-string)
+;;
+;; (print (simple-read-from-string "(\"a\" (a b c) b c-c . d)"))
+;; (print (simple-read-from-string "(\"a\" #S(dots :contents \"...\") #(a b c) b c-c . (d 123 123.0 123l0))"))
+;; (print (simple-read-from-string "(#1A(1 2 3) #0Afoo \"a\"  #S(dots :contents \"...\") #S(HASH-TABLE :TEST EXT:FASTHASH-EQUAL (\"Three\" . 3) (\"Two\" . 2) (\"One\" . 1)) #(a b c) b c-c . (d 123 123.0 123l0))"))
+
+(defun test-read-print ()
+  (dolist
+      (test
+        '((#1="(\"a\" (a b c) b c-c . d)" #1#)
+          ("(\"a\" #S(dots :contents \"...\") #(a b c) b c-c . (d 123 123.0 123l0))"
+           "(\"a\" #S(dots :contents \"...\") #(a b c) b c-c d 123 123.0 123.0L0)")
+          ("(#1A(1 2 3) #0Afoo \"a\"  #S(dots :contents \"...\") #S(HASH-TABLE :TEST EXT:FASTHASH-EQUAL (\"Three\" . 3) (\"Two\" . 2) (\"One\" . 1)) #(a b c) b c-c . (d 123 123.0 123l0))"
+           "(#(1 2 3) #0Afoo \"a\" #S(dots :contents \"...\") #S(HASH-TABLE :TEST EXT:FASTHASH-EQUAL (\"Three\" . 3) (\"Two\" . 2) (\"One\" . 1)) #(a b c) b c-c d 123 123.0 123.0L0)")))
+    (assert (string= (simple-prin1-to-string
+                      (simple-read-from-string (first test)))
+                     (second test))
+            ()
+            "Test failed:~% ~S~% ~S"
+            (simple-prin1-to-string (simple-read-from-string (first test)))
+            (second test)))
+  :success)
+
+
+;;;; THE END ;;;;
+
diff --git a/future/rivest-sexp/simple-sexp/reps.lisp b/future/rivest-sexp/simple-sexp/reps.lisp
new file mode 100644
index 0000000..ba3560e
--- /dev/null
+++ b/future/rivest-sexp/simple-sexp/reps.lisp
@@ -0,0 +1,150 @@
+;;;; -*- mode:lisp;coding:utf-8 -*-
+;;;;**************************************************************************
+;;;;FILE:               reps.lisp
+;;;;LANGUAGE:           Common-Lisp
+;;;;SYSTEM:             Common-Lisp
+;;;;USER-INTERFACE:     NONE
+;;;;DESCRIPTION
+;;;;
+;;;;    This file defines representations for the read.lisp
+;;;;    simple lisp reader/printer.
+;;;;
+;;;;AUTHORS
+;;;;    <PJB> Pascal Bourguignon <pjb@informatimago.com>
+;;;;MODIFICATIONS
+;;;;    2007-10-26 <PJB> Created.
+;;;;BUGS
+;;;;LEGAL
+;;;;    GPL
+;;;;
+;;;;    Copyright Pascal Bourguignon 2007 - 2007
+;;;;
+;;;;    This program is free software; you can redistribute it and/or
+;;;;    modify it under the terms of the GNU General Public License
+;;;;    as published by the Free Software Foundation; either version
+;;;;    2 of the License, or (at your option) any later version.
+;;;;
+;;;;    This program is distributed in the hope that it will be
+;;;;    useful, but WITHOUT ANY WARRANTY; without even the implied
+;;;;    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;;;;    PURPOSE.  See the GNU General Public License for more details.
+;;;;
+;;;;    You should have received a copy of the GNU General Public
+;;;;    License along with this program; if not, write to the Free
+;;;;    Software Foundation, Inc., 59 Temple Place, Suite 330,
+;;;;    Boston, MA 02111-1307 USA
+;;;;**************************************************************************
+
+;;;--------------------
+;;; Representations
+;;;--------------------
+
+;;; Strings:
+(defun %make-string  (contents) (assert (stringp contents)) contents)
+(defun %stringp (object) (stringp object))
+(defun %string-length (object) (assert (%stringp object)) (length object))
+(defun %string-ref (object i) (assert (%stringp object)) (char object i))
+
+;;; Symbols:
+;;;   These symbol name contains the package and colons in addition to
+;;;   the name.
+(defstruct sym name)
+(defun %make-symbol  (name) (make-sym :name name))
+(defun %symbolp      (object) (typep object 'sym))
+(defun %symbol-eql   (a b) (string= (sym-name a) (sym-name b)))
+(defun %symbol-name  (s) (sym-name s))
+
+
+;;; Integers:
+(defun %make-integer (contents)
+  (assert (stringp contents))
+   (multiple-value-bind (ok strongp value) (parse-integer-token contents)
+     (cond
+       (ok      (assert (integerp value)) value)
+       (strongp (error value))
+       (t       nil))))
+(defun %integerp (object) (integerp object))
+
+;;; Floating points:
+(defun %make-float (contents)
+  (assert (stringp contents))
+  (multiple-value-bind (ok strongp value) (parse-float-token contents)
+    (cond
+      (ok      (assert (floatp value)) value)
+      (strongp (error value))
+      (t       nil))))
+(defun %floatp (object) (floatp object))
+
+
+;;; Conses:
+(defun %cons (car cdr) (cons car cdr))
+(defun %consp (object) (consp object))
+(defun %car (cons)     (car cons))
+(defun %cdr (cons)     (cdr cons))
+(defun %nil () '())
+(defun %null (object) (eq (%nil) object))
+(defun %nreverse (list) (nreverse list))
+(defun %length (list) (length list))
+(defun %nreconc (list tail) (nreconc list tail))
+(defmacro %push (object list) `(push ,object ,list))
+(defun %list (&rest args)
+  (do ((sgra (reverse args) (cdr sgra))
+       (list (%nil) (%cons (car sgra) list)))
+      ((null sgra) list)))
+
+;;; Hashes:
+(defstruct hash   options data)
+(defun %make-hash-table (options data)
+  "
+OPTIONS: an %alist of (%cons key value) options.
+DATA:    an %alist of (%cons key value) data.
+"
+  (make-hash :options options :data data))
+(defun %hash-table-p (object) (typep object 'hash))
+(defun %hash-table-options (object)
+  (assert (%hash-table-p object))
+  (hash-options object))
+(defun %hash-table-data (object)
+  (assert (%hash-table-p object))
+  (hash-data object))
+
+
+;;; Structures:
+(defstruct struct type data)
+(defun %make-struct (type data) (make-struct :type type :data data))
+(defun %structp (object) (typep object 'struct))
+(defun %struct-type (object) (assert (%structp object)) (struct-type object))
+(defun %struct-data (object) (assert (%structp object)) (struct-data object))
+
+;;; Arrays:
+(defun %make-array (dimensions contents)
+  (if contents
+      (make-array dimensions :initial-contents contents)
+      (make-array dimensions)))
+(defun %arrayp (object) (arrayp object))
+(defun %array-ref (array &rest indices)
+  (apply (function aref) array indices))
+(defun %array-dimensions (array)
+  (array-dimensions array))
+(defun %array-collect-contents (array)
+  "
+RETURN: A %list of the array contents.
+"
+  (labels ((collect (indices dims)
+             (if (null dims)
+                 (apply (function %array-ref) array indices)
+                 (let ((row (%nil)))
+                   (dotimes (i (first dims) (%nreverse row))
+                     (%push (collect (append indices (%cons i (%nil)))
+                                     (%cdr dims)) row))))))
+    (collect (%nil) (%array-dimensions array))))
+
+
+;;; Dots:
+(defstruct dots contents)
+(defun %make-dots    (contents) (make-dots :contents contents))
+(defun %dotsp (object)  (typep object 'dots))
+(defun %dots-contents (object) (assert (%dots)))
+
+
+;;;; THE END ;;;;
ViewGit