findloop.c

   1 /*
   2  * Routine for finding loops in graphs, reusable across multiple
   3  * puzzles.
   4  *
   5  * The strategy is Tarjan's bridge-finding algorithm, which is
   6  * designed to list all edges whose removal would disconnect a
   7  * previously connected component of the graph. We're interested in
   8  * exactly the reverse - edges that are part of a loop in the graph
   9  * are precisely those which _wouldn't_ disconnect anything if removed
  10  * (individually) - but of course flipping the sense of the output is
  11  * easy.
  12  */
  13
  14 #include "puzzles.h"
  15
  16 struct findloopstate {
  17     int parent, child, sibling, visited;
  18     int index, minindex, maxindex;
  19     int minreachable, maxreachable;
  20     int bridge;
  21 };
  22
  23 struct findloopstate *findloop_new_state(int nvertices)
  24 {
  25     /*
  26      * Allocate a findloopstate structure for each vertex, and one
  27      * extra one at the end which will be the overall root of a
  28      * 'super-tree', which links the whole graph together to make it
  29      * as easy as possible to iterate over all the connected
  30      * components.
  31      */
  32     return snewn(nvertices + 1, struct findloopstate);
  33 }
  34
  35 void findloop_free_state(struct findloopstate *state)
  36 {
  37     sfree(state);
  38 }
  39
  40 int findloop_is_loop_edge(struct findloopstate *pv, int u, int v)
  41 {
  42     /*
  43      * Since the algorithm is intended for finding bridges, and a
  44      * bridge must be part of any spanning tree, it follows that there
  45      * is at most one bridge per vertex.
  46      *
  47      * Furthermore, by finding a _rooted_ spanning tree (so that each
  48      * bridge is a parent->child link), you can find an injection from
  49      * bridges to vertices (namely, map each bridge to the vertex at
  50      * its child end).
  51      *
  52      * So if the u-v edge is a bridge, then either v was u's parent
  53      * when the algorithm ran and we set pv[u].bridge = v, or vice
  54      * versa.
  55      */
  56     return !(pv[u].bridge == v || pv[v].bridge == u);
  57 }
  58
  59 int findloop_run(struct findloopstate *pv, int nvertices,
  60                  neighbour_fn_t neighbour, void *ctx)
  61 {
  62     int u, v, w, root, index;
  63     int nbridges, nedges;
  64
  65     root = nvertices;
  66
  67     /*
  68      * First pass: organise the graph into a rooted spanning forest.
  69      * That is, a tree structure with a clear up/down orientation -
  70      * every node has exactly one parent (which may be 'root') and
  71      * zero or more children, and every parent-child link corresponds
  72      * to a graph edge.
  73      *
  74      * (A side effect of this is to find all the connected components,
  75      * which of course we could do less confusingly with a dsf - but
  76      * then we'd have to do that *and* build the tree, so it's less
  77      * effort to do it all at once.)
  78      */
  79     for (v = 0; v <= nvertices; v++) {
  80         pv[v].parent = root;
  81         pv[v].child = -2;
  82         pv[v].sibling = -1;
  83         pv[v].visited = FALSE;
  84     }
  85     pv[root].child = -1;
  86     nedges = 0;
  87     debug(("------------- new find_loops, nvertices=%d\n", nvertices));
  88     for (v = 0; v < nvertices; v++) {
  89         if (pv[v].parent == root) {
  90             /*
  91              * Found a new connected component. Enumerate and treeify
  92              * it.
  93              */
  94             pv[v].sibling = pv[root].child;
  95             pv[root].child = v;
  96             debug(("%d is new child of root\n", v));
  97
  98             u = v;
  99             while (1) {
 100                 if (!pv[u].visited) {
 101                     pv[u].visited = TRUE;
 102
 103                     /*
 104                      * Enumerate the neighbours of u, and any that are
 105                      * as yet not in the tree structure (indicated by
 106                      * child==-2, and distinct from the 'visited'
 107                      * flag) become children of u.
 108                      */
 109                     debug(("  component pass: processing %d\n", u));
 110                     for (w = neighbour(u, ctx); w >= 0;
 111                          w = neighbour(-1, ctx)) {
 112                         debug(("    edge %d-%d\n", u, w));
 113                         if (pv[w].child == -2) {
 114                             debug(("      -> new child\n"));
 115                             pv[w].child = -1;
 116                             pv[w].sibling = pv[u].child;
 117                             pv[w].parent = u;
 118                             pv[u].child = w;
 119                         }
 120
 121                         /* While we're here, count the edges in the whole
 122                          * graph, so that we can easily check at the end
 123                          * whether all of them are bridges, i.e. whether
 124                          * no loop exists at all. */
 125                         if (w > u) /* count each edge only in one direction */
 126                             nedges++;
 127                     }
 128
 129                     /*
 130                      * Now descend in depth-first search.
 131                      */
 132                     if (pv[u].child >= 0) {
 133                         u = pv[u].child;
 134                         debug(("    descending to %d\n", u));
 135                         continue;
 136                     }
 137                 }
 138
 139                 if (u == v) {
 140                     debug(("      back at %d, done this component\n", u));
 141                     break;
 142                 } else if (pv[u].sibling >= 0) {
 143                     u = pv[u].sibling;
 144                     debug(("    sideways to %d\n", u));
 145                 } else {
 146                     u = pv[u].parent;
 147                     debug(("    ascending to %d\n", u));
 148                 }
 149             }
 150         }
 151     }
 152
 153     /*
 154      * Second pass: index all the vertices in such a way that every
 155      * subtree has a contiguous range of indices. (Easily enough done,
 156      * by iterating through the tree structure we just built and
 157      * numbering its elements as if they were those of a sorted list.)
 158      *
 159      * For each vertex, we compute the min and max index of the
 160      * subtree starting there.
 161      *
 162      * (We index the vertices in preorder, per Tarjan's original
 163      * description, so that each vertex's min subtree index is its own
 164      * index; but that doesn't actually matter; either way round would
 165      * do. The important thing is that we have a simple arithmetic
 166      * criterion that tells us whether a vertex is in a given subtree
 167      * or not.)
 168      */
 169     debug(("--- begin indexing pass\n"));
 170     index = 0;
 171     for (v = 0; v < nvertices; v++)
 172         pv[v].visited = FALSE;
 173     pv[root].visited = TRUE;
 174     u = pv[root].child;
 175     while (1) {
 176         if (!pv[u].visited) {
 177             pv[u].visited = TRUE;
 178
 179             /*
 180              * Index this node.
 181              */
 182             pv[u].minindex = pv[u].index = index;
 183             debug(("  vertex %d <- index %d\n", u, index));
 184             index++;
 185
 186             /*
 187              * Now descend in depth-first search.
 188              */
 189             if (pv[u].child >= 0) {
 190                 u = pv[u].child;
 191                 debug(("    descending to %d\n", u));
 192                 continue;
 193             }
 194         }
 195
 196         if (u == root) {
 197             debug(("      back at %d, done indexing\n", u));
 198             break;
 199         }
 200
 201         /*
 202          * As we re-ascend to here from its children (or find that we
 203          * had no children to descend to in the first place), fill in
 204          * its maxindex field.
 205          */
 206         pv[u].maxindex = index-1;
 207         debug(("  vertex %d <- maxindex %d\n", u, pv[u].maxindex));
 208
 209         if (pv[u].sibling >= 0) {
 210             u = pv[u].sibling;
 211             debug(("    sideways to %d\n", u));
 212         } else {
 213             u = pv[u].parent;
 214             debug(("    ascending to %d\n", u));
 215         }
 216     }
 217
 218     /*
 219      * We're ready to generate output now, so initialise the output
 220      * fields.
 221      */
 222     for (v = 0; v < nvertices; v++)
 223         pv[v].bridge = -1;
 224
 225     /*
 226      * Final pass: determine the min and max index of the vertices
 227      * reachable from every subtree, not counting the link back to
 228      * each vertex's parent. Then our criterion is: given a vertex u,
 229      * defining a subtree consisting of u and all its descendants, we
 230      * compare the range of vertex indices _in_ that subtree (which is
 231      * just the minindex and maxindex of u) with the range of vertex
 232      * indices in the _neighbourhood_ of the subtree (computed in this
 233      * final pass, and not counting u's own edge to its parent), and
 234      * if the latter includes anything outside the former, then there
 235      * must be some path from u to outside its subtree which does not
 236      * go through the parent edge - i.e. the edge from u to its parent
 237      * is part of a loop.
 238      */
 239     debug(("--- begin min-max pass\n"));
 240     nbridges = 0;
 241     for (v = 0; v < nvertices; v++)
 242         pv[v].visited = FALSE;
 243     u = pv[root].child;
 244     pv[root].visited = TRUE;
 245     while (1) {
 246         if (!pv[u].visited) {
 247             pv[u].visited = TRUE;
 248
 249             /*
 250              * Look for vertices reachable directly from u, including
 251              * u itself.
 252              */
 253             debug(("  processing vertex %d\n", u));
 254             pv[u].minreachable = pv[u].maxreachable = pv[u].minindex;
 255             for (w = neighbour(u, ctx); w >= 0; w = neighbour(-1, ctx)) {
 256                 debug(("    edge %d-%d\n", u, w));
 257                 if (w != pv[u].parent) {
 258                     int i = pv[w].index;
 259                     if (pv[u].minreachable > i)
 260                         pv[u].minreachable = i;
 261                     if (pv[u].maxreachable < i)
 262                         pv[u].maxreachable = i;
 263                 }
 264             }
 265             debug(("    initial min=%d max=%d\n",
 266                    pv[u].minreachable, pv[u].maxreachable));
 267
 268             /*
 269              * Now descend in depth-first search.
 270              */
 271             if (pv[u].child >= 0) {
 272                 u = pv[u].child;
 273                 debug(("    descending to %d\n", u));
 274                 continue;
 275             }
 276         }
 277
 278         if (u == root) {
 279             debug(("      back at %d, done min-maxing\n", u));
 280             break;
 281         }
 282
 283         /*
 284          * As we re-ascend to this vertex, go back through its
 285          * immediate children and do a post-update of its min/max.
 286          */
 287         for (v = pv[u].child; v >= 0; v = pv[v].sibling) {
 288             if (pv[u].minreachable > pv[v].minreachable)
 289                 pv[u].minreachable = pv[v].minreachable;
 290             if (pv[u].maxreachable < pv[v].maxreachable)
 291                 pv[u].maxreachable = pv[v].maxreachable;
 292         }
 293
 294         debug(("  postorder update of %d: min=%d max=%d (indices %d-%d)\n", u,
 295                pv[u].minreachable, pv[u].maxreachable,
 296                pv[u].minindex, pv[u].maxindex));
 297
 298         /*
 299          * And now we know whether each to our own parent is a bridge.
 300          */
 301         if ((v = pv[u].parent) != root) {
 302             if (pv[u].minreachable >= pv[u].minindex &&
 303                 pv[u].maxreachable <= pv[u].maxindex) {
 304                 /* Yes, it's a bridge. */
 305                 pv[u].bridge = v;
 306                 nbridges++;
 307                 debug(("  %d-%d is a bridge\n", v, u));
 308             } else {
 309                 debug(("  %d-%d is not a bridge\n", v, u));
 310             }
 311         }
 312
 313         if (pv[u].sibling >= 0) {
 314             u = pv[u].sibling;
 315             debug(("    sideways to %d\n", u));
 316         } else {
 317             u = pv[u].parent;
 318             debug(("    ascending to %d\n", u));
 319         }
 320     }
 321
 322     debug(("finished, nedges=%d nbridges=%d\n", nedges, nbridges));
 323
 324     /*
 325      * Done.
 326      */
 327     return nbridges < nedges;
 328 }
 329
 330 /*
 331  * Appendix: the long and painful history of loop detection in these puzzles
 332  * =========================================================================
 333  *
 334  * For interest, I thought I'd write up the five loop-finding methods
 335  * I've gone through before getting to this algorithm. It's a case
 336  * study in all the ways you can solve this particular problem
 337  * wrongly, and also how much effort you can waste by not managing to
 338  * find the existing solution in the literature :-(
 339  *
 340  * Vertex dsf
 341  * ----------
 342  *
 343  * Initially, in puzzles where you need to not have any loops in the
 344  * solution graph, I detected them by using a dsf to track connected
 345  * components of vertices. Iterate over each edge unifying the two
 346  * vertices it connects; but before that, check if the two vertices
 347  * are _already_ known to be connected. If so, then the new edge is
 348  * providing a second path between them, i.e. a loop exists.
 349  *
 350  * That's adequate for automated solvers, where you just need to know
 351  * _whether_ a loop exists, so as to rule out that move and do
 352  * something else. But during play, you want to do better than that:
 353  * you want to _point out_ the loops with error highlighting.
 354  *
 355  * Graph pruning
 356  * -------------
 357  *
 358  * So my second attempt worked by iteratively pruning the graph. Find
 359  * a vertex with degree 1; remove that edge; repeat until you can't
 360  * find such a vertex any more. This procedure will remove *every*
 361  * edge of the graph if and only if there were no loops; so if there
 362  * are any edges remaining, highlight them.
 363  *
 364  * This successfully highlights loops, but not _only_ loops. If the
 365  * graph contains a 'dumb-bell' shaped subgraph consisting of two
 366  * loops connected by a path, then we'll end up highlighting the
 367  * connecting path as well as the loops. That's not what we wanted.
 368  *
 369  * Vertex dsf with ad-hoc loop tracing
 370  * -----------------------------------
 371  *
 372  * So my third attempt was to go back to the dsf strategy, only this
 373  * time, when you detect that a particular edge connects two
 374  * already-connected vertices (and hence is part of a loop), you try
 375  * to trace round that loop to highlight it - before adding the new
 376  * edge, search for a path between its endpoints among the edges the
 377  * algorithm has already visited, and when you find one (which you
 378  * must), highlight the loop consisting of that path plus the new
 379  * edge.
 380  *
 381  * This solves the dumb-bell problem - we definitely now cannot
 382  * accidentally highlight any edge that is *not* part of a loop. But
 383  * it's far from clear that we'll highlight *every* edge that *is*
 384  * part of a loop - what if there were multiple paths between the two
 385  * vertices? It would be difficult to guarantee that we'd always catch
 386  * every single one.
 387  *
 388  * On the other hand, it is at least guaranteed that we'll highlight
 389  * _something_ if any loop exists, and in other error highlighting
 390  * situations (see in particular the Tents connected component
 391  * analysis) I've been known to consider that sufficient. So this
 392  * version hung around for quite a while, until I had a better idea.
 393  *
 394  * Face dsf
 395  * --------
 396  *
 397  * Round about the time Loopy was being revamped to include non-square
 398  * grids, I had a much cuter idea, making use of the fact that the
 399  * graph is planar, and hence has a concept of faces.
 400  *
 401  * In Loopy, there are really two graphs: the 'grid', consisting of
 402  * all the edges that the player *might* fill in, and the solution
 403  * graph of the edges the player actually *has* filled in. The
 404  * algorithm is: set up a dsf on the *faces* of the grid. Iterate over
 405  * each edge of the grid which is _not_ marked by the player as an
 406  * edge of the solution graph, unifying the faces on either side of
 407  * that edge. This groups the faces into connected components. Now,
 408  * there is more than one connected component iff a loop exists, and
 409  * moreover, an edge of the solution graph is part of a loop iff the
 410  * faces on either side of it are in different connected components!
 411  *
 412  * This is the first algorithm I came up with that I was confident
 413  * would successfully highlight exactly the correct set of edges in
 414  * all cases. It's also conceptually elegant, and very easy to
 415  * implement and to be confident you've got it right (since it just
 416  * consists of two very simple loops over the edge set, one building
 417  * the dsf and one reading it off). I was very pleased with it.
 418  *
 419  * Doing the same thing in Slant is slightly more difficult because
 420  * the set of edges the user can fill in do not form a planar graph
 421  * (the two potential edges in each square cross in the middle). But
 422  * you can still apply the same principle by considering the 'faces'
 423  * to be diamond-shaped regions of space around each horizontal or
 424  * vertical grid line. Equivalently, pretend each edge added by the
 425  * player is really divided into two edges, each from a square-centre
 426  * to one of the square's corners, and now the grid graph is planar
 427  * again.
 428  *
 429  * However, it fell down when - much later - I tried to implement the
 430  * same algorithm in Net.
 431  *
 432  * Net doesn't *absolutely need* loop detection, because of its system
 433  * of highlighting squares connected to the source square: an argument
 434  * involving counting vertex degrees shows that if any loop exists,
 435  * then it must be counterbalanced by some disconnected square, so
 436  * there will be _some_ error highlight in any invalid grid even
 437  * without loop detection. However, in large complicated cases, it's
 438  * still nice to highlight the loop itself, so that once the player is
 439  * clued in to its existence by a disconnected square elsewhere, they
 440  * don't have to spend forever trying to find it.
 441  *
 442  * The new wrinkle in Net, compared to other loop-disallowing puzzles,
 443  * is that it can be played with wrapping walls, or - topologically
 444  * speaking - on a torus. And a torus has a property that algebraic
 445  * topologists would know of as a 'non-trivial H_1 homology group',
 446  * which essentially means that there can exist a loop on a torus
 447  * which *doesn't* separate the surface into two regions disconnected
 448  * from each other.
 449  *
 450  * In other words, using this algorithm in Net will do fine at finding
 451  * _small_ localised loops, but a large-scale loop that goes (say) off
 452  * the top of the grid, back on at the bottom, and meets up in the
 453  * middle again will not be detected.
 454  *
 455  * Footpath dsf
 456  * ------------
 457  *
 458  * To solve this homology problem in Net, I hastily thought up another
 459  * dsf-based algorithm.
 460  *
 461  * This time, let's consider each edge of the graph to be a road, with
 462  * a separate pedestrian footpath down each side. We'll form a dsf on
 463  * those imaginary segments of footpath.
 464  *
 465  * At each vertex of the graph, we go round the edges leaving that
 466  * vertex, in order around the vertex. For each pair of edges adjacent
 467  * in this order, we unify their facing pair of footpaths (e.g. if
 468  * edge E appears anticlockwise of F, then we unify the anticlockwise
 469  * footpath of F with the clockwise one of E) . In particular, if a
 470  * vertex has degree 1, then the two footpaths on either side of its
 471  * single edge are unified.
 472  *
 473  * Then, an edge is part of a loop iff its two footpaths are not
 474  * reachable from one another.
 475  *
 476  * This algorithm is almost as simple to implement as the face dsf,
 477  * and it works on a wider class of graphs embedded in plane-like
 478  * surfaces; in particular, it fixes the torus bug in the face-dsf
 479  * approach. However, it still depends on the graph having _some_ sort
 480  * of embedding in a 2-manifold, because it relies on there being a
 481  * meaningful notion of 'order of edges around a vertex' in the first
 482  * place, so you couldn't use it on a wildly nonplanar graph like the
 483  * diamond lattice. Also, more subtly, it depends on the graph being
 484  * embedded in an _orientable_ surface - and that's a thing that might
 485  * much more plausibly change in future puzzles, because it's not at
 486  * all unlikely that at some point I might feel moved to implement a
 487  * puzzle that can be played on the surface of a Mobius strip or a
 488  * Klein bottle. And then even this algorithm won't work.
 489  *
 490  * Tarjan's bridge-finding algorithm
 491  * ---------------------------------
 492  *
 493  * And so, finally, we come to the algorithm above. This one is pure
 494  * graph theory: it doesn't depend on any concept of 'faces', or 'edge
 495  * ordering around a vertex', or any other trapping of a planar or
 496  * quasi-planar graph embedding. It should work on any graph
 497  * whatsoever, and reliably identify precisely the set of edges that
 498  * form part of some loop. So *hopefully* this long string of failures
 499  * has finally come to an end...
 500  */