Added SPLIT-SEQUENCE-ON-INDICATOR. Added methods on PREFIXP and SUFFIXP for NULL to avoid problem with methods on string designators.

Pascal J. Bourguignon [2021-05-15 16:38]
Added SPLIT-SEQUENCE-ON-INDICATOR. Added methods on PREFIXP and SUFFIXP for NULL to avoid problem with methods on string designators.
Filename
common-lisp/cesarum/sequence-test.lisp
common-lisp/cesarum/sequence.lisp
diff --git a/common-lisp/cesarum/sequence-test.lisp b/common-lisp/cesarum/sequence-test.lisp
index 0f3a3e8..e525a55 100644
--- a/common-lisp/cesarum/sequence-test.lisp
+++ b/common-lisp/cesarum/sequence-test.lisp
@@ -132,10 +132,10 @@


 (defun as-vector (sequence)
-  (make-array (length sequence) :initial-contents sequence))
+  (coerce sequence 'vector))

 (defun as-list (sequence)
-  (replace (make-list (length sequence)) sequence))
+  (coerce sequence 'list))


 (define-test test/prefixp ()
@@ -146,12 +146,21 @@
                                          ("" "HELLO")
                                          ("HELLO" "HELLO")
                                          ("HELLO" "HELLO WORLD"))
-                        :do (assert-true (prefixp (funcall pf p) (funcall sf s))))
+                        :do (assert-true (prefixp (funcall pf p) (funcall sf s))
+                                         (pf p sf s)
+                                         "applicable methods = ~S"
+                                         (compute-applicable-methods (function prefixp)
+                                                                     (list (funcall pf p) (funcall sf s)))))
+
                       (loop
                         :for (p s) :in '(("HELLO" "HELL")
                                          ("HELLO" "SAY HELLO WORLD")
                                          ("HELLO" "SAY HELLO"))
-                        :do (assert-false (prefixp (funcall pf p) (funcall sf s)))))))
+                        :do (assert-false (prefixp (funcall pf p) (funcall sf s))
+                                          (pf p sf s)
+                                          "applicable methods = ~S"
+                                         (compute-applicable-methods (function prefixp)
+                                                                     (list (funcall pf p) (funcall sf s))))))))

 (define-test test/suffixp ()
   (loop :for pf :in '(identity as-vector as-list)
@@ -161,30 +170,57 @@
                                          ("" "HELLO")
                                          ("HELLO" "HELLO")
                                          ("WORLD" "HELLO WORLD"))
-                        :do (assert-true (suffixp (funcall pf p) (funcall sf s))))
+                        :do (assert-true (suffixp (funcall pf p) (funcall sf s))
+                                         (pf p sf s)
+                                         "applicable methods = ~S"
+                                         (compute-applicable-methods (function suffixp)
+                                                                     (list (funcall pf p) (funcall sf s)))))
                       (loop
                         :for (p s) :in '(("HELLO" "ELLO")
                                          ("HELLO" "SAY HELLO WORLD")
                                          ("HELLO" "SAY WORLD"))
-                        :do (assert-false (suffixp (funcall pf p) (funcall sf s)))))))
+                        :do (assert-false (suffixp (funcall pf p) (funcall sf s))
+                                          (pf p sf s)
+                                          "applicable methods = ~S"
+                                         (compute-applicable-methods (function suffixp)
+                                                                     (list (funcall pf p) (funcall sf s))))))))

 (define-test test/split-sequence-if ()
-  (assert (equal (split-sequence-if (function digit-char-p) (coerce "aaaa0bbb23ccc456dddd" 'list))
-                 '((#\a #\a #\a #\a) (#\b #\b #\b) nil (#\c #\c #\c) nil nil (#\d #\d #\d #\d))))
-  (assert (equal (split-sequence-if (function digit-char-p) (coerce "aaaa0bbb23ccc456dddd" 'list) :remove-empty-subseqs t)
-                 '((#\a #\a #\a #\a) (#\b #\b #\b) (#\c #\c #\c) (#\d #\d #\d #\d))))
-  (assert (equal (split-sequence-if (function digit-char-p)  "aaaa0bbb23ccc456dddd")
-                 '("aaaa" "bbb" "" "ccc" "" "" "dddd")))
-  (assert (equal (split-sequence-if (function digit-char-p) "aaaa0bbb23ccc456dddd" :remove-empty-subseqs t)
-                 '("aaaa" "bbb" "ccc" "dddd")))
-  (assert (equal (split-sequence-if (function digit-char-p) "12aa45bb" :remove-empty-subseqs nil)
-                 '("" "" "aa" "" "bb")))
-  (assert (equal (split-sequence-if (function digit-char-p) "12aa45bb" :remove-empty-subseqs t)
-                 '("aa" "bb")))
-  (assert (equal (split-sequence-if (function digit-char-p) "12aa45" :remove-empty-subseqs nil)
-                 '("" "" "aa" "" "")))
-  (assert (equal (split-sequence-if (function digit-char-p) "12aa45" :remove-empty-subseqs t)
-                 '("aa"))))
+  (check equal (split-sequence-if (function digit-char-p) (coerce "aaaa0bbb23ccc456dddd" 'list))
+         '((#\a #\a #\a #\a) (#\b #\b #\b) nil (#\c #\c #\c) nil nil (#\d #\d #\d #\d)))
+  (check equal (split-sequence-if (function digit-char-p) (coerce "aaaa0bbb23ccc456dddd" 'list) :remove-empty-subseqs t)
+         '((#\a #\a #\a #\a) (#\b #\b #\b) (#\c #\c #\c) (#\d #\d #\d #\d)))
+  (check equal (split-sequence-if (function digit-char-p)  "aaaa0bbb23ccc456dddd")
+         '("aaaa" "bbb" "" "ccc" "" "" "dddd"))
+  (check equal (split-sequence-if (function digit-char-p) "aaaa0bbb23ccc456dddd" :remove-empty-subseqs t)
+         '("aaaa" "bbb" "ccc" "dddd"))
+  (check equal (split-sequence-if (function digit-char-p) "12aa45bb" :remove-empty-subseqs nil)
+         '("" "" "aa" "" "bb"))
+  (check equal (split-sequence-if (function digit-char-p) "12aa45bb" :remove-empty-subseqs t)
+         '("aa" "bb"))
+  (check equal (split-sequence-if (function digit-char-p) "12aa45" :remove-empty-subseqs nil)
+         '("" "" "aa" "" ""))
+  (check equal (split-sequence-if (function digit-char-p) "12aa45" :remove-empty-subseqs t)
+         '("aa")))
+
+
+(define-test test/split-sequence-on-indicator ()
+  (check equal (split-sequence-on-indicator "AAAbbbCCCddd"
+                                            (lambda (a b)
+                                              (or (and (upper-case-p a) (lower-case-p b))
+                                                  (and (upper-case-p b) (lower-case-p a)))))
+         '("AAA" "bbb" "CCC" "ddd"))
+  (check equal (split-sequence-on-indicator "AAAbbbCCCddd"
+                                            (lambda (a b)
+                                              (and (upper-case-p b) (lower-case-p a))))
+         '("AAAbbb" "CCCddd"))
+  (check equal (split-sequence-on-indicator "Hello World" (constantly t))
+         '("H" "e" "l" "l" "o" " " "W" "o" "r" "l" "d"))
+  (check equal (split-sequence-on-indicator "A" (constantly t))
+         '("A"))
+  (check equal (split-sequence-on-indicator "" (constantly t))
+         '()))
+

 (define-test test/all ()
   (test/replace-subseq)
@@ -193,7 +229,8 @@
   (test/concatenate-sequences)
   (test/prefixp)
   (test/suffixp)
-  (test/split-sequence-if))
+  (test/split-sequence-if)
+  (test/split-sequence-on-indicator))


 ;;;; THE END ;;;;
diff --git a/common-lisp/cesarum/sequence.lisp b/common-lisp/cesarum/sequence.lisp
index daa3b4b..8ff9b07 100644
--- a/common-lisp/cesarum/sequence.lisp
+++ b/common-lisp/cesarum/sequence.lisp
@@ -48,7 +48,8 @@
            "PREFIXP"
            "SUFFIXP"
            "MAPCONCAT"
-           "SPLIT-SEQUENCE-IF")
+           "SPLIT-SEQUENCE-IF"
+           "SPLIT-SEQUENCE-ON-INDICATOR")
   (:documentation
    "

@@ -430,11 +431,20 @@ RETURN:  Whether SUFFIX is a suffix of the (subseq SEQUENCE START END).
   (let ((mis (mismatch  prefix sequence :start2 start :end2 end :test test)))
     (or (null mis) (<= (length prefix) mis))))

-
 (defmethod suffixp ((suffix sequence) (sequence sequence) &key (start 0) (end nil) (test (function eql)))
-  (zerop (or (mismatch  suffix  sequence :start2 start :end2 end :test test
-                                         :from-end t)
-             0)) )
+  (zerop (or (mismatch suffix sequence :start2 start :end2 end :test test
+                                       :from-end t)
+             0)))
+
+;; We need to define methods for nil,  because cesarum.string defines
+;; methods for string-designators, so (null string) would be handled
+;; differently.
+
+(defmethod prefixp ((prefix null) (sequence sequence) &key (start 0) (end nil) (test (function eql)))
+  t)
+
+(defmethod suffixp ((suffix null) (sequence sequence) &key (start 0) (end nil) (test (function eql)))
+  t)


 (defun mapconcat (function sequence separator)
@@ -485,6 +495,18 @@ RETURN:  Whether SUFFIX is a suffix of the (subseq SEQUENCE START END).
          ""))))


+
+(defun remove-empty-subseqs (subsequences remove-empty-subseqs)
+  (if remove-empty-subseqs
+      (delete-if (lambda (seq)
+                   (typecase seq
+                     (vector  (zerop (length seq)))
+                     (null    t)
+                     (t       nil)))
+                 subsequences)
+      subsequences))
+
+
 (defun split-sequence-if (predicate sequence &key remove-empty-subseqs)
   "
 PREDICATE:      A predicate on elements of the SEQUENCE sequence.  When
@@ -522,7 +544,7 @@ EXAMPLES:       (split-sequence-if (function zerop) '(1 2 0 3 4 5 0 6 7 8 0 9))
                           nextpos  position)
                     (when (= position length)
                       (push (subseq sequence 0 0) chunks))))
-      (list   (loop
+      (cons   (loop
                 :with start := sequence
                 :while start
                 :do (let ((end (loop
@@ -532,15 +554,48 @@ EXAMPLES:       (split-sequence-if (function zerop) '(1 2 0 3 4 5 0 6 7 8 0 9))
                                  :do (pop current)
                                  :finally (return current))))
                       (push (ldiff start end) chunks)
-                      (setf start (cdr end))))))
-    (if remove-empty-subseqs
-        (delete-if (lambda (seq)
-                     (typecase seq
-                       (vector  (zerop (length seq)))
-                       (null    t)
-                       (t       nil)))
-                   (nreverse chunks))
-        (nreverse chunks))))
+                      (setf start (cdr end)))))
+      (null))
+    (remove-empty-subseqs (nreverse chunks) remove-empty-subseqs)))
+
+(defun split-sequence-on-indicator (sequence indicator)
+  "
+RETURN: a list of subsequence of SEQUENCE,  the SEQUENCE is
+        splited between consecutive items A and B for which
+        (funcall INDICATOR A B) returns true.
+"
+  (declare (type (function (t t) t) indicator))
+  (let ((chunks '()))
+    (etypecase sequence
+      (vector (loop :with start := 0
+                    :with length := (length sequence)
+                    :for i :from 1 :below length
+                    :for a := (aref sequence 0) :then b
+                    :for b := (aref sequence i)
+                    :if (funcall indicator a b)
+                      :do
+                         (push (subseq sequence start i) chunks)
+                         (setf start i)
+                    :finally (when (< start length)
+                               (push (subseq sequence start length) chunks))))
+      (cons   (loop :with sublist := sequence
+                    :with current := sequence
+                    :with next    := (cdr current)
+                    :while next
+                    :if (funcall indicator (car current) (car next))
+                      :do ;; split
+                          (setf (cdr current) nil)
+                          (push sublist chunks)
+                          (setq current next)
+                          (setq next (cdr current))
+                          (setq sublist current)
+                    :else :do ;; keep
+                              (setq current next)
+                              (setq next (cdr current))
+                    :finally (push sublist chunks)))
+      (null))
+    ;; There cannot be empty subseqs, since they contain at list the indicator items.
+    (nreverse chunks)))


 ;;;; THE END ;;;;
ViewGit