From: Richard Kettlewell <rjk@greenend.org.uk>
Date: Sun, 18 Nov 2007 22:53:32 +0000 (+0000)
Subject: more efficient utf32_iterator_set()
X-Git-Tag: debian-1_5_99dev9~1^2~33
X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/commitdiff_plain/5617aaff51ba333441230e3808bc697e66540492?ds=inline

more efficient utf32_iterator_set()
---

diff --git a/lib/unicode.c b/lib/unicode.c
index 95c9f6d..5b48b3c 100644
--- a/lib/unicode.c
+++ b/lib/unicode.c
@@ -398,17 +398,23 @@ size_t utf32_iterator_where(utf32_iterator it) {
  * of @p n is specified then the iterator is not changed.
  */
 int utf32_iterator_set(utf32_iterator it, size_t n) {
-  /* TODO figure out how far we must back up to be able to re-synchronize; see
-   * UAX #29 s6.4. */
-  if(n > it->ns)
+  /* We can't just jump to position @p n; the @p last[] values will be wrong.
+   * What we need is to jump a bit behind @p n and then advance forward,
+   * updating @p last[] along the way.  How far back?  We need to cross two
+   * non-ignorable code points as we advance forwards, so we'd better pass two
+   * such characters on the way back (if such are available).
+   */
+  size_t m = n;
+  int i;
+
+  if(n > it->ns)                        /* range check */
     return -1;
-  if(n >= it->n)
-    n -= it->n;
-  else {
-    it->n = 0;
-    it->last[0] = it->last[1] = -1;
-  }
-  return utf32_iterator_advance(it, n);
+  for(i = 0; i < 2; ++i)
+    while(m > 0
+          && utf32__boundary_ignorable(utf32__word_break(it->s[m - 1])))
+      --m;
+  it->n = m;
+  return utf32_iterator_advance(it, n - m);
 }
 
 /** @brief Advance an iterator