[ prog / sol / mona ]

prog


What are you working on?

135 2020-07-12 10:47

[2/2]

(define (irregex-fold/fast irx kons knil str . o)
  (if (not (string? str)) (error "irregex-fold: not a string" str))
  (let* ((irx (irregex irx))
         (matches (irregex-new-matches irx))
         (finish (or (and (pair? o) (car o)) (lambda (i acc) acc)))
         (start (if (and (pair? o) (pair? (cdr o))) (cadr o) 0))
         (end (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o)))
                  (caddr o)
                  (string-length str)))
         (init-src (list str start end))
         (init (cons init-src start)))
    (if (not (and (integer? start) (exact? start)))
        (error "irregex-fold: not an exact integer" start))
    (if (not (and (integer? end) (exact? end)))
        (error "irregex-fold: not an exact integer" end))
    (irregex-match-chunker-set! matches irregex-basic-string-chunker)
    (let lp ((src init-src) (i start) (acc knil))
      (if (>= i end)
          (finish i acc)
          (let ((m (irregex-search/matches
                    irx
                    irregex-basic-string-chunker
                    init
                    src
                    i
                    matches)))
            (if (not m)
                (finish i acc)
                (let ((j (%irregex-match-end-index m 0))
                      (acc (kons i m acc)))
                  (irregex-reset-matches! matches)
                  (cond
                   ((flag-set? (irregex-flags irx) ~consumer?)
                    (finish j acc))
                   ((= j i)
                    ;; skip one char forward if we match the empty string
                    (lp (list str (+ j 1) end) (+ j 1) acc))
                   (else
                    (lp (list str j end) j acc))))))))))

The 'init' value is constructed once and never modified, but 'src' is modified by both loop calls to 'lp' by advancing the start position. The current position is already advanced as 'i', so advancing the current chunk's start position in sync looks suspicious. The modified chunk will no longer count as the initial chunk because they are compared with eq?, and plain strings have a single chunk. This tampering with the current chunk's start position is from:

Fixing folds on conditional begin patterns which aren't treated as searchers
ashinn committed Nov 28, 2012
https://github.com/ashinn/irregex/commit/2949a461474e0ac30d8c72f0dc81127b19c04d0d

By contrast irregex-fold/chunked/fast doesn't tamper with the current chunk and communicates the advancing of the current position within a chunk solely through the index 'i'.
https://github.com/ashinn/irregex/blob/353b8db8472f9b36a7b08ca21a4227d827750d93/irregex.scm#L3825

(define (irregex-fold/chunked/fast irx kons knil cnk start . o)
  (let* ((irx (irregex irx))
         (matches (irregex-new-matches irx))
         (finish (or (and (pair? o) (car o)) (lambda (src i acc) acc)))
         (i (if (and (pair? o) (pair? (cdr o)))
                (cadr o)
                ((chunker-get-start cnk) start)))
         (init (cons start i)))
    (if (not (integer? i)) (error "irregex-fold/chunked: not an integer" i))
    (irregex-match-chunker-set! matches cnk)
    (let lp ((start start) (i i) (acc knil))
      (if (not start)
          (finish start i acc)
          (let ((m (irregex-search/matches irx cnk init start i matches)))
            (if (not m)
                (finish start i acc)
                (let ((end-src (%irregex-match-end-chunk m 0))
                      (end-index (%irregex-match-end-index m 0)))
                  (if (and (eq? end-src start) (= end-index i))
                      (if (>= end-index ((chunker-get-end cnk) end-src ))
                          (let ((next ((chunker-get-next cnk) end-src)))
                            (lp next ((chunker-get-start cnk) next) acc))
                          (lp end-src (+ end-index 1) acc))
                      (let ((acc (kons start i m acc)))
                        (irregex-reset-matches! matches)
                        (if (flag-set? (irregex-flags irx) ~consumer?)
                            (finish end-src end-index acc)
                            (lp end-src end-index acc)))))))))))

Applying this non-tampering approach to irregex-fold/fast yields the diff:

199


VIP:

do not edit these