Added SPLIT-SEQUENCE-IF. Added SPLIT-STRING-IF and implemented it with SPLIT-STRING using SPLIT-SEQUENCE-IF.

Pascal J. Bourguignon [2021-05-15 15:15]
Added SPLIT-SEQUENCE-IF. Added SPLIT-STRING-IF and implemented it with SPLIT-STRING using SPLIT-SEQUENCE-IF.
Filename
common-lisp/cesarum/sequence-test.lisp
common-lisp/cesarum/sequence.lisp
common-lisp/cesarum/string.lisp
diff --git a/common-lisp/cesarum/sequence-test.lisp b/common-lisp/cesarum/sequence-test.lisp
index 7cab55c..0f3a3e8 100644
--- a/common-lisp/cesarum/sequence-test.lisp
+++ b/common-lisp/cesarum/sequence-test.lisp
@@ -11,12 +11,13 @@
 ;;;;AUTHORS
 ;;;;    <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
 ;;;;MODIFICATIONS
+;;;;    2021-05-15 <PJB> Added test/split-sequence-if
 ;;;;    2015-02-25 <PJB> Extracted from sequence.lisp.
 ;;;;BUGS
 ;;;;LEGAL
 ;;;;    AGPL3
 ;;;;
-;;;;    Copyright Pascal J. Bourguignon 2015 - 2016
+;;;;    Copyright Pascal J. Bourguignon 2015 - 2021
 ;;;;
 ;;;;    This program is free software: you can redistribute it and/or modify
 ;;;;    it under the terms of the GNU Affero General Public License as published by
@@ -167,7 +168,23 @@
                                          ("HELLO" "SAY WORLD"))
                         :do (assert-false (suffixp (funcall pf p) (funcall sf s)))))))

-
+(define-test test/split-sequence-if ()
+  (assert (equal (split-sequence-if (function digit-char-p) (coerce "aaaa0bbb23ccc456dddd" 'list))
+                 '((#\a #\a #\a #\a) (#\b #\b #\b) nil (#\c #\c #\c) nil nil (#\d #\d #\d #\d))))
+  (assert (equal (split-sequence-if (function digit-char-p) (coerce "aaaa0bbb23ccc456dddd" 'list) :remove-empty-subseqs t)
+                 '((#\a #\a #\a #\a) (#\b #\b #\b) (#\c #\c #\c) (#\d #\d #\d #\d))))
+  (assert (equal (split-sequence-if (function digit-char-p)  "aaaa0bbb23ccc456dddd")
+                 '("aaaa" "bbb" "" "ccc" "" "" "dddd")))
+  (assert (equal (split-sequence-if (function digit-char-p) "aaaa0bbb23ccc456dddd" :remove-empty-subseqs t)
+                 '("aaaa" "bbb" "ccc" "dddd")))
+  (assert (equal (split-sequence-if (function digit-char-p) "12aa45bb" :remove-empty-subseqs nil)
+                 '("" "" "aa" "" "bb")))
+  (assert (equal (split-sequence-if (function digit-char-p) "12aa45bb" :remove-empty-subseqs t)
+                 '("aa" "bb")))
+  (assert (equal (split-sequence-if (function digit-char-p) "12aa45" :remove-empty-subseqs nil)
+                 '("" "" "aa" "" "")))
+  (assert (equal (split-sequence-if (function digit-char-p) "12aa45" :remove-empty-subseqs t)
+                 '("aa"))))

 (define-test test/all ()
   (test/replace-subseq)
@@ -175,7 +192,8 @@
   (test/parse-sequence-type)
   (test/concatenate-sequences)
   (test/prefixp)
-  (test/suffixp))
+  (test/suffixp)
+  (test/split-sequence-if))


 ;;;; THE END ;;;;
diff --git a/common-lisp/cesarum/sequence.lisp b/common-lisp/cesarum/sequence.lisp
index 596bf8a..daa3b4b 100644
--- a/common-lisp/cesarum/sequence.lisp
+++ b/common-lisp/cesarum/sequence.lisp
@@ -11,13 +11,14 @@
 ;;;;AUTHORS
 ;;;;    <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
 ;;;;MODIFICATIONS
+;;;;    2021-05-15 <PJB> Added test/split-sequence-if
 ;;;;    2012-06-24 <PJB> Added REPLACE-SUBSEQ.
 ;;;;    2012-02-19 <PJB> Extracted from list.lisp and some other code.
 ;;;;BUGS
 ;;;;LEGAL
 ;;;;    AGPL3
 ;;;;
-;;;;    Copyright Pascal J. Bourguignon 2012 - 2016
+;;;;    Copyright Pascal J. Bourguignon 2012 - 2021
 ;;;;
 ;;;;    This program is free software: you can redistribute it and/or modify
 ;;;;    it under the terms of the GNU Affero General Public License as published by
@@ -46,7 +47,8 @@
            "CONCATENATE-SEQUENCES"
            "PREFIXP"
            "SUFFIXP"
-           "MAPCONCAT")
+           "MAPCONCAT"
+           "SPLIT-SEQUENCE-IF")
   (:documentation
    "

@@ -57,7 +59,7 @@ License:

     AGPL3

-    Copyright Pascal J. Bourguignon 2004 - 2015
+    Copyright Pascal J. Bourguignon 2004 - 2021

     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as published by
@@ -483,4 +485,62 @@ RETURN:  Whether SUFFIX is a suffix of the (subseq SEQUENCE START END).
          ""))))


+(defun split-sequence-if (predicate sequence &key remove-empty-subseqs)
+  "
+PREDICATE:      A predicate on elements of the SEQUENCE sequence.  When
+                returning true, the sequence is split before and after
+                the element, which are removed from the resulting
+                subsequences.
+
+SEQUENCE:       A sequence.
+
+REMOVE-EMPTY-SUBSEQS:
+                A boolean.  If true, empty subsequences are removed from the result.
+
+RETURN:         A list of subsequences of SEQUENCE, split upon any
+                element for which the PREDICATE is true.
+
+EXAMPLES:       (split-sequence-if (function zerop) '(1 2 0 3 4 5 0 6 7 8 0 9))
+                --> ((1 2) (3 4 5) (6 7 8) (9))
+                (split-sequence-if (function zerop) #(1 2 0 3 4 5 0 6 7 8 0 9))
+                --> (#(1 2) #(3 4 5) #(6 7 8) #(9))
+                (split-sequence-if (lambda (x) (find x #(#\\space #\\0))) \"1 2 0 3 4 5 0 6 7 8\" )
+                --> (\"1\" \"2\" \"\" \"\" \"3\" \"4\" \"5\" \"\" \"\" \"6\" \"7\" \"8\")
+"
+  (let ((chunks  '()))
+    (etypecase sequence
+      (vector (loop
+                :with position := 0
+                :with nextpos  := 0
+                :with length   := (length sequence)
+                :while (< position length)
+                :do (loop :while (and (< nextpos length)
+                                      (not (funcall predicate (aref sequence nextpos))))
+                          :do (incf nextpos))
+                    (push (subseq sequence position nextpos) chunks)
+                    (setf position (1+ nextpos)
+                          nextpos  position)
+                    (when (= position length)
+                      (push (subseq sequence 0 0) chunks))))
+      (list   (loop
+                :with start := sequence
+                :while start
+                :do (let ((end (loop
+                                 :with current := start
+                                 :while (and current
+                                             (not (funcall predicate (car current))))
+                                 :do (pop current)
+                                 :finally (return current))))
+                      (push (ldiff start end) chunks)
+                      (setf start (cdr end))))))
+    (if remove-empty-subseqs
+        (delete-if (lambda (seq)
+                     (typecase seq
+                       (vector  (zerop (length seq)))
+                       (null    t)
+                       (t       nil)))
+                   (nreverse chunks))
+        (nreverse chunks))))
+
+
 ;;;; THE END ;;;;
diff --git a/common-lisp/cesarum/string.lisp b/common-lisp/cesarum/string.lisp
index 80c8de4..d4b1d2d 100644
--- a/common-lisp/cesarum/string.lisp
+++ b/common-lisp/cesarum/string.lisp
@@ -11,6 +11,8 @@
 ;;;;AUTHORS
 ;;;;    <PJB> Pascal J. Bourguignon <pjb@informatimago.com>
 ;;;;MODIFICATIONS
+;;;;    2021-05-15 <PJB> Added SPLIT-STRING-IF and implemented it
+;;;;                     with SPLIT-STRING using SPLIT-SEQUENCE-IF.
 ;;;;    2016-01-16 <PJB> Added an ignorable declaration to prefixp and suffixp
 ;;;;                     to avoid a warning.
 ;;;;    2015-09-15 <PJB> prefixp and suffixp moved to sequence,
@@ -31,7 +33,7 @@
 ;;;;LEGAL
 ;;;;    AGPL3
 ;;;;
-;;;;    Copyright Pascal J. Bourguignon 2002 - 2016
+;;;;    Copyright Pascal J. Bourguignon 2002 - 2021
 ;;;;
 ;;;;    This program is free software: you can redistribute it and/or modify
 ;;;;    it under the terms of the GNU Affero General Public License as published by
@@ -63,7 +65,8 @@
    "NO-LOWER-CASE-P" "NO-UPPER-CASE-P" "MIXED-CASE-P"
    "LOCALIZE" "DEFTRANSLATION" "STRING-JUSTIFY-LEFT" "STRING-PAD"
    "PREFIXP" "SUFFIXP"
-   "SPLIT-NAME-VALUE" "STRING-REPLACE" "UNSPLIT-STRING" "SPLIT-STRING"
+   "SPLIT-NAME-VALUE" "STRING-REPLACE"
+   "UNSPLIT-STRING" "SPLIT-STRING-IF" "SPLIT-STRING"
    "SPLIT-ESCAPED-STRING" "IMPLODE-STRING" "EXPLODE-STRING"
    "IMPLODE" "EXPLODE"
    "CONCATENATE-STRINGS")
@@ -77,7 +80,7 @@ License:

     AGPL3

-    Copyright Pascal J. Bourguignon 2002 - 2015
+    Copyright Pascal J. Bourguignon 2002 - 2021

     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as published by
@@ -310,33 +313,24 @@ RETURN:             A list of substrings of the string denoted by
             (t (incf curr)))))))


-(defun split-string (string &optional (separators " ") (remove-empty nil))
+(defun split-string-if (predicate sequence &key remove-empty-subseqs)
+  (split-sequence-if predicate sequence :remove-empty-subseqs remove-empty-subseqs))
+
+(defun split-string (string &optional (separators " ") (omit-nulls nil))
   "
 STRING:         A sequence.

 SEPARATOR:      A sequence.

+OMIT-NULLS:     A boolean.  If true, empty subsequences are removed from the result.
+
 RETURN:         A list of subsequence of STRING, split upon any element of SEPARATORS.
                 Separators are compared to elements of the STRING with EQL.

-NOTE:           It's actually a simple split-sequence now.
-
-EXAMPLES:       (split-string '(1 2 0 3 4 5 0 6 7 8 0 9) '(0))
-                --> ((1 2) (3 4 5) (6 7 8) (9))
-                (split-string #(1 2 0 3 4 5 0 6 7 8 0 9) #(0))
-                --> (#(1 2) #(3 4 5) #(6 7 8) #(9))
-                (split-string \"1 2 0 3 4 5 0 6 7 8\" '(#\space #\0))
+EXAMPLES:       (split-string \"1 2 0 3 4 5 0 6 7 8\" '(#\space #\0))
                 --> (\"1\" \"2\" \"\" \"\" \"3\" \"4\" \"5\" \"\" \"\" \"6\" \"7\" \"8\")
 "
-  (loop
-    :with strlen = (length string)
-    :for position = 0 :then (1+ nextpos)
-    :for nextpos = (position-if (lambda (e) (find e separators)) string :start position)
-    :unless (and remove-empty
-                 (or (and (= position strlen) (null nextpos))
-                     (eql position nextpos)))
-    :collect (subseq string position nextpos)
-    :while nextpos))
+  (split-sequence-if (lambda (ch) (find ch separators)) string :remove-empty-subseqs omit-nulls))


 (defun unsplit-string (string-list separator &key (adjustable nil) (fill-pointer nil) (size-increment 0))
ViewGit