;;;; -*- mode:lisp;coding:utf-8 -*-
;;;;**************************************************************************
;;;;FILE:               parser.lisp
;;;;LANGUAGE:           Common-Lisp
;;;;SYSTEM:             Common-Lisp
;;;;USER-INTERFACE:     NONE
;;;;DESCRIPTION
;;;;
;;;;    A simple latex parser.
;;;;
;;;;AUTHORS
;;;;    <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
;;;;MODIFICATIONS
;;;;    2020-05-16 <PJB> Corrected a few syntax (thanks phoe) and macro errors.
;;;;    2013-02-09 <PJB> Created.
;;;;BUGS
;;;;LEGAL
;;;;    AGPL3
;;;;
;;;;    Copyright Pascal J. Bourguignon 2013 - 2016
;;;;
;;;;    This program is free software: you can redistribute it and/or modify
;;;;    it under the terms of the GNU Affero General Public License as published by
;;;;    the Free Software Foundation, either version 3 of the License, or
;;;;    (at your option) any later version.
;;;;
;;;;    This program is distributed in the hope that it will be useful,
;;;;    but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;;    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;;;    GNU Affero General Public License for more details.
;;;;
;;;;    You should have received a copy of the GNU Affero General Public License
;;;;    along with this program.  If not, see <http://www.gnu.org/licenses/>.
;;;;**************************************************************************
(eval-when (:compile-toplevel :load-toplevel :execute)
  (setf *readtable* (copy-readtable nil)))
(defpackage "COM.INFORMATIMAGO.COMMON-LISP.LATEX"
  (:use "COMMON-LISP")
  (:shadow "SET"))
(in-package "COM.INFORMATIMAGO.COMMON-LISP.LATEX")


(defun latex-env-name (text)
  (let* ((left  (position #\{ text))
         (right (position #\} text :start left)))
    (subseq text (1+ left) right)))

(defun test/latex-env-name ()
  (assert (string= (latex-env-name "{Hello} {world}!") "Hello"))
  (assert (string= (latex-env-name "Hello {world}!") "world"))
  (assert (string= (latex-env-name "{world}") "world"))
  (assert (string= (latex-env-name "{world") "world"))
  (assert (null (ignore-errors (latex-env-name "world")))))

(defun escape-quotes (text)
  (with-output-to-string (*standard-output*)
    (loop :for ch :across text :do
      (when (char= #\" ch) (princ "\\"))
      (princ ch))))

(defun test/escape-quotes ()
  (assert (string= (escape-quotes "Hello\"World\"!") "Hello\\\"World\\\"!"))
  (assert (string= (escape-quotes "Hello '\"' World!") "Hello '\\\"' World!"))
  (assert (string= (escape-quotes "Hello 'world'!") "Hello 'world'!"))
  (assert (string= (escape-quotes "Hello world!") "Hello world!")))

(defun get-macro-name (text)
  "Return the suffix of TEXT starting after the last backslash."
  (subseq text (1+ (or (position #\\ text :from-end t) -1))))

(defun test/get-macro-name ()
  (assert (string= (get-macro-name "\\Hello") "Hello"))
  (assert (string= (get-macro-name "\\Hello\\World") "World"))
  (assert (string= (get-macro-name "Hello") "Hello")))


(defparameter *max-include-depth* 10)


(defun set     (chars)          `(set     ,chars))
(defun set-not (chars)          `(set-not ,chars))
(defun alt     (&rest options)  `(alt     ,@options))
(defun opt     (&rest options)  `(opt     ,@options))
(defun rep+    (re)             `(rep+    ,re))
(defun rep*    (re)             `(rep*    ,re))
(defun seq     (&rest res)      `(seq     ,@res))
(defun any     ()               `(any))

(defun reject () (error "Failure reading the file."))

(defmacro scan-rules ((token-variable stream-variable)
                      &body rules)
  (let ((vstate (gensym "state"))
        (vscan-buffer (gensym "buffer")))
    `(let ((,vstate         nil)
           (,token-variable nil)
           (,vscan-buffer   (make-array 8 :element-type 'character :adjustable t :fill-pointer 0)))
       (flet ((goto (state)
                (setf ,vstate state))
              (scan-current-stream ()
                ,stream-variable)
              ((setf scan-current-stream) (new-stream)
                (setf ,stream-variable new-stream))
              (scan-current-buffer ()
                ,vscan-buffer)
              ((setf scan-current-buffer) (new-buffer)
                (assert (and (stringp new-buffer) (adjustable-array-p new-buffer) (array-has-fill-pointer-p new-buffer)))
                (setf ,vscan-buffer new-buffer))
              (scan-reset-current-buffer ()
                (setf ,vscan-buffer (make-array 8 :element-type 'character :adjustable t :fill-pointer 0))))
         (macrolet ((when-state (state &body body)
                      `(when (eq ,',vstate ,state)
                         ,@body))
                    (rule (regexp &body body)
                      `(when (setf ,',token-variable (scan-match-regexp ,',stream-variable ,regexp))
                         ,@body)))
           ,@rules)))))

(defun scan-match-regexp (stream regex)
  (declare (ignore stream regex))
  (cerror "Return NIL." "Not implemented yet"))

(defun scan (stream &key (read-tex-dir #P".")
                      (external-format :default)
                      (if-include-fails :continue) ; or :error or some value to return from SCAN.
                      )
  (let* ((token-text               "")
         (include-stack            '())

         (chapter-flag             nil)
         (section-flag             nil)
         (subsection-flag          nil)
         (subsubsection-flag       nil)
         (aparagraph-flag          nil)
         (inline-quote-flag        nil)
         (inline-math-flag         nil)
         (display-math-flag        nil)
         (table-count              0)
         (array-count              0)
         (saved-math-state         nil)
         (brace-count              nil)
         (list-environment-count   0)

         (list-stack               nil)
         (current-list             nil)

         (states '(:math :macro :picture))

         (A-Z                      "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
         (CONTROL-M                #\return)
         (CONTROL-L                #\page)
         (ALPHABET                 (set A-Z))
         (ACCENT-ACUTE             "\\'")
         (ACCENT-GRAV              "\\`")
         (ACCENT-UMLAUT            "\\\"" )
         (ACCENT-CIRCUMFLEX        "\\^")
         (ACCENT                   (alt ACCENT-CIRCUMFLEX ACCENT-GRAV ACCENT-ACUTE ACCENT-UMLAUT))
         (FILENAME-CHAR            (set A-Z))
         (LETTER                   (alt (set A-Z) #\- ACCENT))
         (DIGIT                    (set "0123456789"))
         (NUMBER                   (rep+ DIGIT))
         (MATH-NUMBER              (seq (rep* DIGIT) (opt ".") NUMBER))
         (TEXT-NUMBER              (seq NUMBER (rep* (seq (opt ",") MATH-NUMBER))))
         (TIME-NUMBER              (seq (rep+ DIGIT) (opt ":") (rep+ digit)))
         (ALPHABET-STRING          (rep* LETTER))
         (WHITESPACE               (alt #\space #\tab #\newline))
         (H-SPACE                  (alt #\space #\tab))
         (NEWLINE                  #\newline)
         (BLANK-LINE               (seq (rep* H-SPACE) NEWLINE) )
         (PARAGRAPH-SEPARATOR      (seq NEWLINE (rep+ BLANK-LINE)))
         (PUNCT                    (set "-=_^/.,:;!?\"()@[]|<>*+'`"))
         (BACKSLASH                #\\)
         (LATEX-SPECIAL-CHAR       (set "#$%&~_^{}\\"))
         (MATH-OP                  (set "+*/'<>=-"))
         (POS-TAB-ARG              (seq "[" (rep+ (set "hbtp")) "]"))
         (OPT-TAB-ARG              (seq "{" (rep* (set "lrc|")) "}"))
         (BEGIN-ENV-OPEN           (seq BACKSLASH "begin" (rep* H-SPACE) "{"))
         (BEGIN-ENV-CLOSE          (seq "}"(rep* WHITESPACE)))
         (BEGIN-DISPLAY-MATH       "$$")
         (END-DISPLAY-MATH         "$$")

         )
    (scan-rules (token-text stream)

      (labels ((terminate ()
                 (cerror "Return NIL." "Not implemented yet"))
               (get-tex-filename-without-braces (text)
                 "Return the tex file path designated by the token-text \"filename\".
               Use variable READ-TEX-DIR."
                 (let ((dirname read-tex-dir)
                       (filename text))
                   (concatenate 'string (or dirname "") filename (if (find #\. filename) "" ".tex"))))
               (get-tex-filename (text)
                 "Return the tex file path designated by the token-text \"{filename}\".
               Use variable READ-TEX-DIR."
                 (get-tex-filename-without-braces (latex-env-name text)))
               (open-list (&rest prefix)
                 (push current-list list-stack)
                 (setf current-list (reverse prefix)))
               (close-list ()
                 (let ((new-list (nreverse current-list)))
                   (setf current-list (pop list-stack))
                   (push new-list current-list)
                   new-list))
               (add-as-string (text)
                 (push text current-list))
               (add-sexp (sexp)
                 (push sexp current-list))
               (clean-up-quotes ()
                 (when inline-quote-flag
                   (setf inline-quote-flag nil)
                   (close-list)))
               (list-env-p (text)
                 (member text '("list"
                                "exercises"
                                "Exercises"
                                "closeitemize"
                                "alphlist"
                                "Alphlist"
                                "chapterex") :test (function string=)))
               (push-include (filename)
                 (if (<= (length include-stack) *max-include-depth* )
                     (error "Includes nested too deeply.")
                     (let ((infile (open filename
                                         :external-format external-format
                                         :if-does-not-exist (if (eq if-include-fails :error)
                                                                :error
                                                                nil))))
                       (if infile
                           (case if-include-fails
                             (:continue (goto nil))
                             (:error    (error "Could not open include file ~S" filename))
                             (otherwise (return-from scan if-include-fails)))
                           (progn
                             (push (cons (scan-current-stream) (scan-current-buffer)) include-stack)
                             (setf (scan-current-stream) infile)
                             (scan-reset-current-buffer)
                             (format *trace-output* "Reading input from include file ~S~%" filename)
                             (goto :initial))))))
               (pop-include ()
                 (when include-stack
                   (close (scan-current-stream))
                   (setf (scan-current-stream) (car (car include-stack))
                         (scan-current-buffer) (cdr (car include-stack)))
                   (pop include-stack)
                   t))
               (terminate-includes ()
                 (loop
                   :while include-stack
                   :do (pop-include)))
               (aparagraph-closer (closer)
                 (lambda ()
                   (when aparagraph-flag
                     (funcall closer)
                     (close-list)
                     (setf aparagraph-flag nil))))
               (subsubsection-closer (closer)
                 (lambda ()
                   (when subsubsection-flag
                     (funcall closer)
                     (close-list)
                     (setf subsubsection-flag nil))))
               (subsection-closer (closer)
                 (lambda ()
                   (when subsection-flag
                     (funcall closer)
                     (close-list)
                     (setf subsection-flag nil))))
               (section-closer (closer)
                 (lambda ()
                   (when section-flag
                     (funcall closer)
                     (clean-up-quotes)
                     (close-list)
                     (setf section-flag nil))))
               (open-group (flag tag close-subgroup)
                 (if flag
                     (progn
                       (funcall close-subgroup)
                       (close-list)
                       (open-list tag)
                       nil)
                     (progn
                       (clean-up-quotes)
                       (open-list tag)
                       t))))
        (unwind-protect
             (progn

               (rule (seq "\\end " (rep* H-SPACE))
                     (error "space after \\end can cause problems"))

               (rule "\\input"
                     (goto :include))

               (rule "\\include"
                     (goto :include))

               (when-state :include
                           (rule (rep* H-SPACE)
                                 #|eat the whitespace|#)
                           (rule (seq "{" (rep+ (set-not (alt "}" WHITESPACE))) "}")
                                 (let ((filename (get-tex-filename token-text)))
                                   (push-include filename)))
                           (rule (rep+ filename-char)
                                 (let ((filename (get-tex-filename-without-braces token-text)))
                                   (push-include filename))))

               (when-state :<<EOF>>
                           (unless (pop-include)
                             (terminate)))

               ;; Dectalk abbreviations:
               (rule (alt "Jan." "Feb." "Mar." "Apr." "Aug." "Sep." "Oct." "Nov." "Dec.")
                     (add-as-string token-text))

               (rule "i.e."
                     (add-sexp '(cs "ie")))
               (rule "e.g."
                     (add-sexp '(cs "eg")))

               ;; Misc abbreviations:
               (rule (alt "Corp." "etc.")
                     (add-as-string token-text))

               ;; Initials:
               (rule (seq ALPHABET ".")
                     ;; A letter followed by a period is probably an initial.
                     (add-as-string token-text))

               ;; Salutations:
               (rule (alt "Prof." "Mr." "Mrs." "St." "PhD." "Phd." "Dr." )
                     (add-as-string token-text))

               ;; Days of the week:
               (rule (alt "Sun." "Mon." "Tue." "Wed." "Thu." "Fri." "Sat.")
                     (add-as-string token-text))

               ;; tex commands that are stripped
               (rule (alt "\\hspace*"
                          "\\bigskip"
                          "\\noalign"
                          "\\bigl"
                          "\\bigr"
                          "\\biggl"
                          "\\biggr"
                          "\\bigggl"
                          "\\bigggr"
                          (seq "\\thispagestyle{" (rep* (set-not #\})) "}"  )
                          "\\medskip"
                          "\\advance"
                          "\\leftskip"
                          "\\rightskip"
                          (seq "by" (rep+ digit) "pc")
                          (seq "by" (rep+ digit) "pt")
                          (seq "by" (rep+ digit) "in")
                          "\\protect"
                          "\\smallskip"
                          "\\displaystyle"
                          "\\left"
                          "\\right"
                          "\\boldmath"
                          "\\unboldmath"
                          "\\raggedright"
                          "\\vfil"
                          "\\vfill"
                          "\\hfil"
                          "\\hfill"
                          "\\scriptstyle"
                          (seq "\\vskip" (rep* H-space) (opt (seq (rep+ digit) "pt")))
                          (seq "\\hspace*" (rep* H-space) (opt (seq (rep+ digit) "in")))
                          (seq "\\vspace*" (rep* H-space) (opt (seq (rep+ digit) "pt")))
                          (seq "\\vspace*" (rep* H-space) (opt (seq (rep+ digit) "in")))
                          (seq "\\kern*"  (rep* H-space) (opt (seq (rep+ digit) "pt")))
                          "\\eject"
                          "\\maketitle"
                          "\\noindent"
                          "\\goodbreak"
                          "\\,"
                          "\\!"
                          "\\:"
                          "\\"
                          "\\hline"
                          "\\ "
                          "\\/"
                          "\\mathstrut"
                          "\\pagebreak"
                          "\\linebreak"
                          )
                     #|nothing|#)

               (rule "\\@"
                     (add-sexp '(cs "@")))
               (rule "\\ul"
                     (add-sexp '(cs "em")))

               ;; Some special verb commands
               (rule "\\verb|[|"
                     (add-as-string "["))
               (rule "\\verb|{|"
                     (add-as-string "{"))

               ;; special tex and latex characters */
               (rule (alt "--" "---")
                     (add-as-string token-text))
               (rule "~"
                     (add-as-string " ")) ; or ignore?

               (rule (alt "\\cr" "\\headrow" "\\newrow" (seq BACKSLASH BACKSLASH))
                     (if (or (plusp table-count) (plusp array-count))
                         (progn
                           (close-list) ; End current array element.
                           (close-list) ; End current row.
                           (open-list)  ; Now start next row
                           (open-list))) ; and start its first element
                     (add-sexp '(newline)))
               (rule (seq backslash latex-special-char)
                     ;; print it after stripping off the backslash
                     (add-as-string (escape-quotes (subseq token-text 1))))
               (rule "&"
                     (if (or (plusp table-count) (plusp array-count))
                         (progn
                           (close-list)
                           (open-list))
                         ;; output field separator so that we can handle tex matrix command
                         (add-sexp '(field-separator))))

               (when-state :math
                           (rule "^" (add-as-string "^"))
                           (rule "_" (add-as-string "_")))

               ;; Skip pictures
               (rule (seq BEGIN-ENV-OPEN "picture" BEGIN-ENV-CLOSE)
                     (goto :picture))

               (rule (seq (rep* whitespace) backslash "end{picture}" (rep* h-space))
                     (goto nil))

               (when-state :picture
                           (rule (rep* (any))
                                 #|ignore|#))

               ;; kluges for handling macro definitions
               (rule (seq backslash "def" (rep* whitespace) backslash (rep* letter))
                     ;; handle macro definitions
                     (add-sexp `(cs "def" ,(get-macro-name token-text)))
                     ;; start arg list
                     (open-list 'arglist)
                     (goto :macro))

               (rule (seq "#" digit)
                     ;; Convert #1 to arg1 etc.
                     (add-as-string (format nil "arg~C" (aref token-text 1))))

               (when-state :macro
                           (rule "{"
                                 ;; ending of arg list seen so mark it and remember
                                 (close-list)
                                 (open-list 'block)
                                 (goto nil)))

               ;; braces start groups

               (when-state :math
                           (rule "{"
                                 (incf brace-count)
                                 (open-list 'subformula)))

               (rule "{"
                     (incf brace-count)
                     (open-list 'block))

               (rule "}"
                     (decf brace-count)
                     (close-list)
                     (when (zerop brace-count)
                       ;; reset state to math if necessary
                       (case saved-math-state
                         ((nil))
                         ((:inline-math)
                          (setf inline-math-flag t
                                saved-math-state nil)
                          (goto :math))
                         ((:display-math)
                          (setf display-math-flag t
                                saved-math-state nil)
                          (goto :math))
                         (otherwise
                          (error "Unknown saved-math-state ~S" saved-math-state)))))

               ;; hbox and mbox change state
               (rule (alt (seq backslash "fbox" (rep* h-space) "{")
                          (seq backslash "hbox" (rep* h-space) "{")
                          (seq backslash "mbox" (rep* h-space) "{"))
                     (setf brace-count 1) ; reset brace count
                     ;;  When brace_count reaches 0 we have seen the
                     ;;  matching close brace and can close the hbox.
                     ;;  Both hbox and mbox marked as mbox and will be
                     ;;  processed as if they were user defined macros
                     ;;  ie: using define-text-object
                     (clean-up-quotes)
                     ;; first set up state
                     (cond
                       (display-math-flag
                        (setf display-math-flag nil
                              inline-math-flag nil ; kludge
                              saved-math-state :display-math)
                        (goto nil))
                       (inline-math-flag
                        (setf display-math-flag nil ; kludge
                              inline-math-flag nil
                              saved-math-state :inline-math)
                        (goto nil)))
                     (add-sexp `(cs "mbox"))
                     (open-list 'block))

               ;; Brackets in latex
               (when-state :math
                           (rule "[" (add-as-string "["))
                           (rule "]" (add-as-string "]")))

               (rule "["                ; optional args in a block
                     (open-list 'block))

               (rule "]"
                     (close-list))

               ;; paragraph breaks
               (rule (alt (seq backslash "par")
                          (seq backslash "paragraph")
                          PARAGRAPH-SEPARATOR)
                     ;; Don't put parbreaks inside math.
                     (unless (or inline-math-flag display-math-flag)
                       (clean-up-quotes)
                       ;; Paragraph delimiter is a newline followed by
                       ;; an arbitrary number blank lines, where a
                       ;; blank is defined as a line with an arbitrary
                       ;; amount of optional h-space followed by a
                       ;; newline.  Close apar if one opened.
                       (when aparagraph-flag
                         (close-list)
                         (setf aparagraph-flag nil))
                       (add-sexp 'parbreak)))

               ;; Beginning and ending math mode

               (rule "\\["
                     (clean-up-quotes)
                     (setf display-math-flag t)
                     (open-list 'display-math)
                     (goto :math))

               (rule "\\]"
                     (setf display-math-flag nil)
                     (close-list)
                     (goto nil))

               (rule "$$"
                     (when inline-math-flag
                       (error "Display math started inside inline math? Probably an inline math was closed and immediately opened. Check the latex file."))
                     (if display-math-flag
                         (progn
                           (setf display-math-flag nil)
                           (close-list)
                           (goto nil))
                         (progn
                           (clean-up-quotes)
                           (setf display-math-flag t)
                           (open-list 'display-math)
                           (goto :math))))

               (rule "\\("
                     (setf inline-math-flag t)
                     (open-list 'inline-math)
                     (goto :math))

               (rule "\\)"
                     (setf inline-math-flag nil)
                     (close-list)
                     (goto :math))

               (rule "$"
                     (if inline-math-flag
                         (progn
                           (setf inline-math-flag nil)
                           (close-list)
                           (goto nil))
                         (progn
                           (setf inline-math-flag t)
                           (open-list 'inline-math)
                           (goto :math))))

               ;; Math operators
               (when-state :math

                           (rule "'" ; catch single quote in math mode
                                 (add-as-string "prime"))

                           (rule (seq "''" (rep* h-space)) ; catch double prime in math mode
                                 (add-as-string "double-prime"))

                           (rule MATH-OP
                                 (add-as-string token-text)))

               ;; tex comment
               (rule (seq "%" (rep* (any))) ; Latex comments run to the end of the line.
                     ;; Not doing anything with comments, so throw them away.
                     ;; (open-list 'comment)
                     ;; (add-as-string (escape-quotes token-text))
                     ;; (close-list)
                     )

               ;; begin various environments

               (rule (seq begin-env-open (alt "document" "abstract" "center") begin-env-close)
                     (open-list (intern (latex-env-name token-text))))

               (rule (seq begin-env-open (alt "quote" "quotation" "verbatim") begin-env-close)
                     (clean-up-quotes)
                     (open-list (intern (latex-env-name token-text))))

               (rule (seq begin-env-open (alt "description" "deflist" "enumerate" "itemize") begin-env-close)
                     (clean-up-quotes)
                     (incf list-environment-count)
                     (open-list (intern (latex-env-name token-text)))
                     ;; Generate dummy item
                     ;; this is to allow \item to be handled cleanly
                     (open-list 'item))

               (rule (seq (rep* whitespace) backslash "item")
                     (clean-up-quotes)
                     ;; begin a new item after ending previous item
                     (close-list)
                     (when (zerop list-environment-count)
                       (error "An item was found outside known list environment."))
                     ;; Note this is a quick fix,
                     ;; and will leave a null list as the first item of each enumerated list
                     (open-list 'item))

               ;; begin equation
               (rule (seq begin-env-open "equation" begin-env-close)
                     (clean-up-quotes)
                     (open-list 'equation)
                     (setf display-math-flag t)
                     (goto :math))

               ;; begin eqnarray
               (rule (seq begin-env-open (alt "eqnarray*" "eqnarray" "eqalign" "eqalign*") begin-env-close)
                     ;; starting an eqnarray or an eqalign
                     (incf array-count)
                     (setf display-math-flag t)
                     (goto :math)
                     (open-list (intern (string-trim "*" (latex-env-name token-text))))
                     (open-list)    ; start the first eqnarray row
                     (open-list))   ; start the first eqnarray element

               (rule (seq begin-env-open (opt "tabular" "array") "}"
                          (opt POS-TAB-ARG) (opt OPT-TAB-ARG) (rep* WHITESPACE))
                     (clean-up-quotes)
                     (let ((op (intern (string-trim "*" (latex-env-name token-text)))))
                       (if (eq op 'tabular)
                           (incf table-count)
                           (incf array-count))
                       (open-list op))
                     (open-list)       ; start the first table row
                     (open-list))      ; start the first table element

               ;; begin cases

               ;; Cases handled like table environment.  Allow for text
               ;; inside math mode by saving state. If this works, use
               ;; similar approach for mbox hbox etc.

               (rule (seq begin-env-open "cases" "}" (opt POS-TAB-ARG) (rep* WHITESPACE))
                     (clean-up-quotes)
                     (cond
                       (display-math-flag
                        (setf display-math-flag nil
                              saved-math-state :display-math)
                        (goto nil))
                       (inline-math-flag
                        (setf inline-math-flag nil
                              saved-math-state :inline-math)
                        (goto nil)))
                     (incf table-count)
                     (open-list 'cases)
                     (open-list)       ; start the first table row
                     (open-list))      ; start the first table element

               (rule (alt (seq (rep* blank-line) begin-env-open "slide}{}")
                          (seq begin-env-open "slide" begin-env-close))
                     (clean-up-quotes)
                     ;; starting a latex slide
                     (open-list 'slide))

               (rule (seq begin-env-open "displaymath" begin-env-close)
                     (clean-up-quotes)
                     (setf display-math-flag t)
                     (open-list 'display-math)
                     (goto :math))

               ;; unrecognized environment
               (rule (alt (seq begin-env-open alphabet-string "}" (opt pos-tab-arg) (opt pos-tab-arg))
                          (seq begin-env-open (rep* (set-not "}")) "}")) ; environment names can have more that alphabets
                     (clean-up-quotes)
                     ;; Some new environments maybe declared as
                     ;; enumerable by adding their name to
                     ;; list_env_names.  Handle this by checking if the
                     ;; env name present in the table.
                     (let ((name (latex-env-name token-text)))
                       (if (list-env-p name)
                           (progn
                             (open-list 'new-environment name)
                             ;; expect items in this env
                             (incf list-environment-count)
                             ;; generate dummy item
                             ;; this is to allow \item to be handled cleanly
                             (open-list 'item))
                           (open-list 'new-environment name))))

               (rule (seq (rep* whitespace) backslash (alt "grieschapter" "chapter*" "chapterx" "chapter"))
                     (setf chapter-flag (open-group chapter-flag 'chapter
                                                    (section-closer
                                                     (subsection-closer
                                                      (subsubsection-closer
                                                       (lambda ())))))))
               (rule (seq (rep* whitespace) backslash (alt "griessection" "section*" "section"))
                     (setf section-flag (open-group section-flag 'section
                                                    (subsection-closer
                                                     (subsubsection-closer
                                                      (lambda ()))))))
               (rule (seq (rep* whitespace) backslash (alt "subsection*" "subsection"))
                     (setf subsection-flag (open-group subsection-flag 'subsection
                                                       (subsubsection-closer
                                                        (lambda ())))))
               (rule (seq (rep* whitespace) backslash (alt "subsubsection*" "subsubsection"))
                     (setf subsubsection-flag (open-group subsubsection-flag 'subsubsection
                                                          (lambda ()))))


               ;; absolute sectioning constructs
               (rule (seq (rep* whitespace) backslash "achapter")
                     (setf chapter-flag (open-group chapter-flag 'achapter
                                                    (section-closer
                                                     (subsection-closer
                                                      (subsubsection-closer
                                                       (aparagraph-closer
                                                        (lambda ()))))))))
               (rule (seq (rep* whitespace) backslash  "asection")
                     (setf section-flag (open-group section-flag 'asection
                                                    (subsection-closer
                                                     (subsubsection-closer
                                                      (aparagraph-closer
                                                       (lambda ())))))))
               (rule (seq (rep* whitespace) backslash "asubsection")
                     (setf subsection-flag (open-group subsection-flag 'asubsection
                                                       (subsubsection-closer
                                                        (aparagraph-closer
                                                         (lambda ()))))))
               (rule (seq (rep* whitespace) backslash "asubsubsection")
                     (setf subsubsection-flag (open-group subsubsection-flag 'asubsubsection
                                                          (aparagraph-closer
                                                           (lambda ())))))
               (rule (seq (rep* whitespace) backslash "apar")
                     (setf aparagraph-flag (open-group aparagraph-flag 'apar
                                                       (lambda ()))))

               ;; end various environments

               (rule (seq (rep* whitespace) backslash "end{abstract}" (rep* h-space))
                     (clean-up-quotes)
                     (when aparagraph-flag
                       (close-list)
                       (setf aparagraph-flag nil))
                     (close-list))

               (rule (seq (rep* whitespace) backslash "end{" (alt "center" "quote" "quotation") "}" (rep* h-space))
                     (clean-up-quotes)
                     (close-list))

               (rule (seq (rep* whitespace) backslash "end{equation}" (rep* h-space))
                     (clean-up-quotes)
                     (close-list)
                     (setf display-math-flag nil)
                     (goto nil))

               (rule (seq (rep* whitespace) backslash "end{"
                          (alt "eqnarray*" "eqnarray" "eqalign*" "eqalign") "}" (rep* h-space))
                     (decf array-count)
                     (close-list)
                     (close-list)
                     (close-list)
                     (setf display-math-flag nil)
                     (goto nil))

               (rule (seq (rep* whitespace) backslash "end{array}" (rep* h-space))
                     (decf array-count)
                     (close-list)
                     (close-list)
                     (close-list))

               (rule (seq (rep* whitespace) backslash "end{tabular}" (rep* h-space))
                     (decf table-count)
                     (close-list)
                     (close-list)
                     (close-list))

               ;;  Cases handled like tabular

               (rule (seq (rep* whitespace) backslash "end{tabular}" (rep* h-space))
                     (decf table-count)
                     (close-list)
                     (close-list)
                     (close-list)
                     (case saved-math-state
                       ((nil))
                       ((:inline-math)
                        (setf inline-math-flag t
                              saved-math-state nil)
                        (goto :math))
                       ((:display-math)
                        (setf display-math-flag t
                              saved-math-state nil)
                        (goto :math))
                       (otherwise (error "Unknown saved-math-state ~S" saved-math-state))))

               (rule (seq (rep* whitespace) backslash "end{" (alt "enumerate" "description" "itemize") "}" (rep* h-space))
                     (clean-up-quotes)
                     (decf list-environment-count)
                     (close-list)
                     (close-list))

               (rule (seq (rep* whitespace) backslash "end{document}")
                     (clean-up-quotes)
                     (when aparagraph-flag
                       (setf aparagraph-flag nil)
                       (close-list))
                     (when subsubsection-flag
                       (setf subsubsection-flag nil)
                       (close-list))
                     (when subsection-flag
                       (setf subsection-flag nil)
                       (close-list))
                     (when section-flag
                       (setf section-flag nil)
                       (close-list))
                     (when chapter-flag
                       (setf chapter-flag nil)
                       (close-list))
                     (close-list))

               (rule (seq (rep* whitespace) backslash "end{slide}")
                     (clean-up-quotes)
                     (close-list))

               (rule (seq (rep* whitespace) backslash "end{displaymath}")
                     (close-list)
                     (setf display-math-flag nil)
                     (goto nil))

               (rule (seq backslash "end{" (alt (seq alphabet-string)
                                                (seq (rep* (set-not "}")))) (opt "*") "}")
                     (clean-up-quotes)
                     (let ((name (latex-env-name token-text)))
                       (if (list-env-p name)
                           (progn
                             (decf list-environment-count)
                             (close-list)
                             (close-list))
                           (close-list))))

               ;; tex control sequences eg macro names

               (rule (seq backslash (set-not alphabet))
                     ;; handle single characters with a backslash in front eg \. etc.
                     (add-as-string (escape-quotes (subseq token-text 1))))

               (when-state :math
                           (rule (seq backslash (rep* alphabet))
                                 (open-list 'math-cs (subseq token-text 1))
                                 (close-list)))

               (rule (seq backslash (rep* alphabet))
                     (open-list 'cs (subseq token-text 1))
                     (close-list))

               ;; be smart about numbers

               (when-state :math
                           (rule (seq backslash math-number)
                                 (open-list 'math-number token-text)
                                 (close-list)))

               (rule time-number
                     ;; Dectalk speaks time numbers correctly
                     (add-as-string token-text))

               (rule text-number
                     (open-list 'text-number token-text)
                     (close-list))

               ;; words handled according to mode
               (when-state :math
                           (rule (seq "''" (rep* h-space))
                                 (add-as-string "double-prime")))

               (rule (alt alphabet
                          (seq alphabet (rep* letter) (opt "'") (rep+ alphabet)))
                     (if (or inline-math-flag display-math-flag)
                         ;; In math mode, the string should be broken up
                         ;; into strings of one character ie "a+b" is "a"
                         ;; "+" "b" since TeX allows for only plain
                         ;; single letter variables
                         (loop :for ch :across token-text
                               :do (add-as-string (string ch)))
                         ;; Convert text to strings.  Not escape quotes
                         ;; since umlaut now handled as a letter if this
                         ;; causes trouble, reintroduce escape_quote as
                         ;; in.
                         (add-as-string token-text)))

               (rule (seq (rep* h-space) "\"")
                     (if inline-quote-flag
                         (reject)
                         (progn
                           (setf inline-quote-flag t)
                           (open-list 'inline-quote))))

               (rule (seq (rep* h-space) "``")
                     (unless inline-quote-flag
                       (setf inline-quote-flag t)
                       (open-list 'inline-quote)))

               (rule (seq (alt "\"" "''") (rep* h-space))
                     ;; matching " here is a concession
                     (if inline-quote-flag
                         (progn         ; Marking matched inline-quote
                           (add-as-string "''")
                           (close-list)
                           (setf inline-quote-flag nil))
                         (progn ; This does not match a quotation, so just put it in the text:
                           (add-as-string (escape-quotes token-text)))))

               (rule PUNCT
                     (add-as-string (escape-quotes token-text)))

               (rule (rep* h-space)
                     (add-as-string token-text))

               ;; Trap things that are not caught and echo to stderr
               (rule (alt control-m control-l)
                     #|ignore|#)

               (rule "#"
                     (add-as-string "#"))

               (rule (any)
                     (error "This escaped ~S" token-text)))
          (terminate-includes))))))


(defun read-token (stream)
  (let ((ch (read-char stream)))
    (case ch
      ;; ((#\\))
      ((#\%))
      (otherwise
       (loop
         :with text = (make-array 4 :element-type 'character :adjustable t :fill-pointer 0)
         :while (and (char/= #\\ ch) (char/= #\% ch))
         :do (vector-push-extend ch text (length text))
         :finally (unread-char ch stream) (return text))))))


#+testing
(with-open-file (latex #-(and) #P"~/library/informatique/standards-and-protocol/cplusplus-draft/source/grammar.tex"
                       #P"/Volumes/USER/srv/books/informatics/standards/cplusplus-draft/source/grammar.tex")
  (loop
    :for token = (read-token latex)
    :while token
    :do (print token)))

#+testing
(defun getenv (var)
  #+clisp (ext:getenv var)
  #+ccl   (ccl:getenv var)
  #-(or clisp ccl) (error "Please implement getenv in ~A" (lisp-implementation-type)))

#+testing
(with-open-file (latex #-(and) #P"~/library/informatique/standards-and-protocol/cplusplus-draft/source/grammar.tex"
                       #P"/Volumes/USER/srv/books/informatics/standards/cplusplus-draft/source/grammar.tex")
  (scan latex :read-tex-dir (getenv "READ_TEX_DIR")))

ViewGit