Implementation of the Hopcroft-Karp algorithm.

author Simon Tatham <anakin@pobox.com>

Wed, 18 Apr 2018 19:28:17 +0000 (20:28 +0100)

committer Simon Tatham <anakin@pobox.com>

Sun, 22 Apr 2018 15:45:37 +0000 (16:45 +0100)
author Simon Tatham <anakin@pobox.com>
Wed, 18 Apr 2018 19:28:17 +0000 (20:28 +0100)
committer Simon Tatham <anakin@pobox.com>
Sun, 22 Apr 2018 15:45:37 +0000 (16:45 +0100)
diff --git a/Recipe b/Recipe

index a7db32b79590630b3ac7173a546896e7508ac29b..822afb923269dbc1750a335a19f46dabdbd59c9f 100644 (file)
--- a/Recipe
+++ b/Recipe
@@ -69,6 +69,10 @@ obfusc : [U] obfusc STANDALONE
  latincheck : [U] latin[STANDALONE_LATIN_TEST] LATIN_DEPS STANDALONE
  latincheck : [C] latin[STANDALONE_LATIN_TEST] LATIN_DEPS STANDALONE
  
+# Test program built from matching.c.
+matching : [U] matching[STANDALONE_MATCHING_TEST] tree234 STANDALONE
+matching : [C] matching[STANDALONE_MATCHING_TEST] tree234 STANDALONE
+
  puzzles  : [G] windows[COMBINED] WINDOWS_COMMON COMMON ALL noicon.res
  
  # Mac OS X unified application containing all the puzzles.
diff --git a/matching.c b/matching.c

new file mode 100644 (file)

index 0000000..89f67ee
--- /dev/null
+++ b/matching.c
@@ -0,0 +1,753 @@
+/*
+ * Implementation of matching.h.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "puzzles.h"
+#include "matching.h"
+
+struct scratch {
+    /*
+     * Current contents of the in-progress matching. LtoR is an array
+     * of nl integers, each of which holds a value in {0,1,...,nr-1},
+     * or -1 for no current assignment. RtoL is exactly the reverse.
+     *
+     * Invariant: LtoR[i] is non-empty and equal to j if and only if
+     * RtoL[j] is non-empty and equal to i.
+     */
+    int *LtoR, *RtoL;
+
+    /*
+     * Arrays of nl and nr integer respectively, giving the layer
+     * assigned to each integer in the breadth-first search step of
+     * the algorithm.
+     */
+    int *Llayer, *Rlayer;
+
+    /*
+     * Arrays of nl and nr integers respectively, used to hold the
+     * to-do queues in the breadth-first search.
+     */
+    int *Lqueue, *Rqueue;
+
+    /*
+     * An augmenting path of vertices, alternating between L vertices
+     * (in the even-numbered positions, starting at 0) and R (in the
+     * odd positions). Must be long enough to hold any such path that
+     * never repeats a vertex, i.e. must be at least 2*min(nl,nr) in
+     * size.
+     */
+    int *augpath;
+
+    /*
+     * Track the progress of the depth-first search at each
+     * even-numbered layer. Has one element for each even-numbered
+     * position in augpath.
+     */
+    int *dfsstate;
+
+    /*
+     * Store a random permutation of the L vertex indices, if we're
+     * randomising the dfs phase.
+     */
+    int *Lorder;
+};
+
+size_t matching_scratch_size(int nl, int nr)
+{
+    size_t n;
+    int nmin = (nl < nr ? nl : nr);
+
+    n = (sizeof(struct scratch) + sizeof(int)-1)/sizeof(int);
+    n += nl;                           /* LtoR */
+    n += nr;                           /* RtoL */
+    n += nl;                           /* Llayer */
+    n += nr;                           /* Rlayer */
+    n += nl;                           /* Lqueue */
+    n += nr;                           /* Rqueue */
+    n += 2*nmin;                       /* augpath */
+    n += nmin;                         /* dfsstate */
+    n += nl;                           /* Lorder */
+    return n * sizeof(int);
+}
+
+int matching_with_scratch(void *scratchv,
+                          int nl, int nr, int **adjlists, int *adjsizes,
+                          random_state *rs, int *outl, int *outr)
+{
+    struct scratch *s = (struct scratch *)scratchv;
+    int L, R, i, j;
+
+    /*
+     * Set up the various array pointers in the scratch space.
+     */
+    {
+        int *p = scratchv;
+        int nmin = (nl < nr ? nl : nr);
+
+        p += (sizeof(struct scratch) + sizeof(int)-1)/sizeof(int);
+        s->LtoR = p; p += nl;
+        s->RtoL = p; p += nr;
+        s->Llayer = p; p += nl;
+        s->Rlayer = p; p += nr;
+        s->Lqueue = p; p += nl;
+        s->Rqueue = p; p += nr;
+        s->augpath = p; p += 2*nmin;
+        s->dfsstate = p; p += nmin;
+        s->Lorder = p; p += nl;
+    }
+
+    /*
+     * Set up the initial matching, which is empty.
+     */
+    for (L = 0; L < nl; L++)
+        s->LtoR[L] = -1;
+    for (R = 0; R < nr; R++)
+        s->RtoL[R] = -1;
+
+    while (1) {
+        /*
+         * Breadth-first search starting from the unassigned left
+         * vertices, traversing edges from left to right only if they
+         * are _not_ part of the matching, and from right to left only
+         * if they _are_. We assign a 'layer number' to all vertices
+         * visited by this search, with the starting vertices being
+         * layer 0 and every successor of a layer-n node being layer
+         * n+1.
+         */
+        int Lqs, Rqs, layer, target_layer;
+
+        for (L = 0; L < nl; L++)
+            s->Llayer[L] = -1;
+        for (R = 0; R < nr; R++)
+            s->Rlayer[R] = -1;
+
+        Lqs = 0;
+        for (L = 0; L < nl; L++) {
+            if (s->LtoR[L] == -1) {
+                s->Llayer[L] = 0;
+                s->Lqueue[Lqs++] = L;
+            }
+        }
+
+        layer = 0;
+        while (1) {
+            int found_free_R_vertex = FALSE;
+
+            Rqs = 0;
+            for (i = 0; i < Lqs; i++) {
+                L = s->Lqueue[i];
+                assert(s->Llayer[L] == layer);
+
+                for (j = 0; j < adjsizes[L]; j++) {
+                    R = adjlists[L][j];
+                    if (R != s->LtoR[L] && s->Rlayer[R] == -1) {
+                        s->Rlayer[R] = layer+1;
+                        s->Rqueue[Rqs++] = R;
+                        if (s->RtoL[R] == -1)
+                            found_free_R_vertex = TRUE;
+                    }
+                }
+            }
+            layer++;
+
+            if (found_free_R_vertex)
+                break;
+
+            if (Rqs == 0)
+                goto done;
+
+            Lqs = 0;
+            for (j = 0; j < Rqs; j++) {
+                R = s->Rqueue[j];
+                assert(s->Rlayer[R] == layer);
+                if ((L = s->RtoL[R]) != -1 && s->Llayer[L] == -1) {
+                    s->Llayer[L] = layer+1;
+                    s->Lqueue[Lqs++] = L;
+                }
+            }
+            layer++;
+
+            if (Lqs == 0)
+                goto done;
+        }
+
+        target_layer = layer;
+
+        /*
+         * Vertices in the target layer are only interesting if
+         * they're actually unassigned. Blanking out the others here
+         * will save us a special case in the dfs loop below.
+         */
+        for (R = 0; R < nr; R++)
+            if (s->Rlayer[R] == target_layer && s->RtoL[R] != -1)
+                s->Rlayer[R] = -1;
+
+        /*
+         * Choose an ordering in which to try the L vertices at the
+         * start of the next pass.
+         */
+        for (L = 0; L < nl; L++)
+            s->Lorder[L] = L;
+        if (rs)
+            shuffle(s->Lorder, nl, sizeof(*s->Lorder), rs);
+
+        /*
+         * Now depth-first search through that layered set of vertices
+         * to find as many (vertex-)disjoint augmenting paths as we
+         * can, and for each one we find, augment the matching.
+         */
+        s->dfsstate[0] = 0;
+        i = 0;
+        while (1) {
+            /*
+             * Find the next vertex to go on the end of augpath.
+             */
+            if (i == 0) {
+                /* In this special case, we're just looking for L
+                 * vertices that are not yet assigned. */
+                if (s->dfsstate[i] == nl)
+                    break;             /* entire DFS has finished */
+
+                L = s->Lorder[s->dfsstate[i]++];
+
+                if (s->Llayer[L] != 2*i)
+                    continue;          /* skip this vertex */
+            } else {
+                /* In the more usual case, we're going through the
+                 * adjacency list for the previous L vertex. */
+                L = s->augpath[2*i-2];
+                j = s->dfsstate[i]++;
+                if (j == adjsizes[L]) {
+                    /* Run out of neighbours of the previous vertex. */
+                    i--;
+                    continue;
+                }
+                if (rs && adjsizes[L] - j > 1) {
+                    int which = j + random_upto(rs, adjsizes[L] - j);
+                    int tmp = adjlists[L][which];
+                    adjlists[L][which] = adjlists[L][j];
+                    adjlists[L][j] = tmp;
+                }
+                R = adjlists[L][j];
+
+                if (s->Rlayer[R] != 2*i-1)
+                    continue;          /* skip this vertex */
+
+                s->augpath[2*i-1] = R;
+                s->Rlayer[R] = -1;     /* mark vertex as visited */
+
+                if (2*i-1 == target_layer) {
+                    /*
+                     * We've found an augmenting path, in the form of
+                     * an even-sized list of vertices alternating
+                     * L,R,...,L,R, with the initial L and final R
+                     * vertex free and otherwise each R currently
+                     * connected to the next L. Adjust so that each L
+                     * connects to the next R, increasing the edge
+                     * count in the matching by 1.
+                     */
+                    for (j = 0; j < 2*i; j += 2) {
+                        s->LtoR[s->augpath[j]] = s->augpath[j+1];
+                        s->RtoL[s->augpath[j+1]] = s->augpath[j];
+                    }
+
+                    /*
+                     * Having dealt with that path, and already marked
+                     * all its vertices as visited, rewind right to
+                     * the start and resume our DFS from a new
+                     * starting L-vertex.
+                     */
+                    i = 0;
+                    continue;
+                }
+
+                L = s->RtoL[R];
+                if (s->Llayer[L] != 2*i)
+                    continue;          /* skip this vertex */
+            }
+
+            s->augpath[2*i] = L;
+            s->Llayer[L] = -1;         /* mark vertex as visited */
+            i++;
+            s->dfsstate[i] = 0;
+        }
+    }
+
+  done:
+    /*
+     * Fill in the output arrays.
+     */
+    if (outl) {
+        for (i = 0; i < nl; i++)
+            outl[i] = s->LtoR[i];
+    }
+    if (outr) {
+        for (j = 0; j < nr; j++)
+            outr[j] = s->RtoL[j];
+    }
+
+    /*
+     * Return the number of matching edges.
+     */
+    for (i = j = 0; i < nl; i++)
+        if (s->LtoR[i] != -1)
+            j++;
+    return j;
+}
+
+int matching(int nl, int nr, int **adjlists, int *adjsizes,
+             random_state *rs, int *outl, int *outr)
+{
+    void *scratch;
+    int size;
+    int ret;
+
+    size = matching_scratch_size(nl, nr);
+    scratch = malloc(size);
+    if (!scratch)
+       return -1;
+
+    ret = matching_with_scratch(scratch, nl, nr, adjlists, adjsizes,
+                                rs, outl, outr);
+
+    free(scratch);
+
+    return ret;
+}
+
+#ifdef STANDALONE_MATCHING_TEST
+
+/*
+ * Diagnostic routine used in testing this algorithm. It is passed a
+ * pointer to a piece of scratch space that's just been used by
+ * matching_with_scratch, and extracts from it a labelling of the
+ * input graph that acts as a 'witness' to the maximality of the
+ * returned matching.
+ *
+ * The output parameter 'witness' should be an array of (nl+nr)
+ * integers, indexed such that witness[L] corresponds to an L-vertex (for
+ * L=0,1,...,nl-1) and witness[nl+R] corresponds to an R-vertex (for
+ * R=0,1,...,nr-1). On return, this array will assign each vertex a
+ * label which is either 0 or 1, and the following properties will
+ * hold:
+ *
+ *  + all vertices not paired up by the matching are type L0 or R1
+ *  + every L0->R1 edge is used by the matching
+ *  + no L1->R0 edge is used by the matching.
+ *
+ * The mere existence of such a labelling is enough to prove the
+ * maximality of the matching, because if there is any larger matching
+ * then its symmetric difference with this one must include at least
+ * one 'augmenting path', which starts at a free L-vertex and ends at
+ * a free R-vertex, traversing only unused L->R edges and only used
+ * R->L edges. But that would mean it starts at an L0, ends at an R1,
+ * and never follows an edge that can get from an 0 to a 1.
+ */
+static void matching_witness(void *scratchv, int nl, int nr, int *witness)
+{
+    struct scratch *s = (struct scratch *)scratchv;
+    int i, j;
+
+    for (i = 0; i < nl; i++)
+        witness[i] = s->Llayer[i] == -1;
+    for (j = 0; j < nr; j++)
+        witness[nl + j] = s->Rlayer[j] == -1;
+}
+
+/*
+ * Standalone tool to run the matching algorithm.
+ */
+
+#include <string.h>
+#include <ctype.h>
+#include <time.h>
+
+#include "tree234.h"
+
+int nl, nr, count;
+int **adjlists, *adjsizes;
+int *adjdata, *outl, *outr, *witness;
+void *scratch;
+random_state *rs;
+
+void allocate(int nl_, int nr_, int maxedges)
+{
+    nl = nl_;
+    nr = nr_;
+    adjdata = snewn(maxedges, int);
+    adjlists = snewn(nl, int *);
+    adjsizes = snewn(nl, int);
+    outl = snewn(nl, int);
+    outr = snewn(nr, int);
+    witness = snewn(nl+nr, int);
+    scratch = smalloc(matching_scratch_size(nl, nr));
+}
+
+void deallocate(void)
+{
+    sfree(adjlists);
+    sfree(adjsizes);
+    sfree(adjdata);
+    sfree(outl);
+    sfree(outr);
+    sfree(witness);
+    sfree(scratch);
+}
+
+void find_and_check_matching(void)
+{
+    int i, j, k;
+
+    count = matching_with_scratch(scratch, nl, nr, adjlists, adjsizes,
+                                  rs, outl, outr);
+    matching_witness(scratch, nl, nr, witness);
+
+    for (i = j = 0; i < nl; i++) {
+        if (outl[i] != -1) {
+            assert(0 <= outl[i] && outl[i] < nr);
+            assert(outr[outl[i]] == i);
+            j++;
+
+            for (k = 0; k < adjsizes[i]; k++)
+                if (adjlists[i][k] == outl[i])
+                    break;
+            assert(k < adjsizes[i]);
+        }
+    }
+    assert(j == count);
+
+    for (i = j = 0; i < nr; i++) {
+        if (outr[i] != -1) {
+            assert(0 <= outr[i] && outr[i] < nl);
+            assert(outl[outr[i]] == i);
+            j++;
+        }
+    }
+    assert(j == count);
+
+    for (i = 0; i < nl; i++) {
+        if (outl[i] == -1)
+            assert(witness[i] == 0);
+    }
+    for (i = 0; i < nr; i++) {
+        if (outr[i] == -1)
+            assert(witness[nl+i] == 1);
+    }
+    for (i = 0; i < nl; i++) {
+        for (j = 0; j < adjsizes[i]; j++) {
+            k = adjlists[i][j];
+
+            if (outl[i] == k)
+                assert(!(witness[i] == 1 && witness[nl+k] == 0));
+            else
+                assert(!(witness[i] == 0 && witness[nl+k] == 1));
+        }
+    }
+}
+
+struct nodename {
+    const char *name;
+    int index;
+};
+
+int compare_nodes(void *av, void *bv)
+{
+    const struct nodename *a = (const struct nodename *)av;
+    const struct nodename *b = (const struct nodename *)bv;
+    return strcmp(a->name, b->name);
+}
+
+int node_index(tree234 *n2i, tree234 *i2n, const char *name)
+{
+    struct nodename *nn, *nn_prev;
+    char *namedup = dupstr(name);
+
+    nn = snew(struct nodename);
+    nn->name = namedup;
+    nn->index = count234(n2i);
+
+    nn_prev = add234(n2i, nn);
+    if (nn_prev != nn) {
+        sfree(nn);
+        sfree(namedup);
+    } else {
+        addpos234(i2n, nn, nn->index);
+    }
+
+    return nn_prev->index;
+}
+
+struct edge {
+    int L, R;
+};
+
+int compare_edges(void *av, void *bv)
+{
+    const struct edge *a = (const struct edge *)av;
+    const struct edge *b = (const struct edge *)bv;
+    if (a->L < b->L) return -1;
+    if (a->L > b->L) return +1;
+    if (a->R < b->R) return -1;
+    if (a->R > b->R) return +1;
+    return 0;
+}
+
+void matching_from_user_input(FILE *fp, const char *filename)
+{
+    tree234 *Ln2i, *Li2n, *Rn2i, *Ri2n, *edges;
+    char *line = NULL;
+    struct edge *e;
+    int i, lineno = 0;
+    int *adjptr;
+
+    Ln2i = newtree234(compare_nodes);
+    Rn2i = newtree234(compare_nodes);
+    Li2n = newtree234(NULL);
+    Ri2n = newtree234(NULL);
+    edges = newtree234(compare_edges);
+
+    while (sfree(line), lineno++, (line = fgetline(fp)) != NULL) {
+        char *p, *Lname, *Rname;
+
+        p = line;
+        while (*p && isspace((unsigned char)*p)) p++;
+        if (!*p)
+            continue;
+
+        Lname = p;
+        while (*p && !isspace((unsigned char)*p)) p++;
+        if (*p)
+            *p++ = '\0';
+        while (*p && isspace((unsigned char)*p)) p++;
+
+        if (!*p) {
+            fprintf(stderr, "%s:%d: expected 2 words, found 1\n",
+                    filename, lineno);
+            exit(1);
+        }
+
+        Rname = p;
+        while (*p && !isspace((unsigned char)*p)) p++;
+        if (*p)
+            *p++ = '\0';
+        while (*p && isspace((unsigned char)*p)) p++;
+
+        if (*p) {
+            fprintf(stderr, "%s:%d: expected 2 words, found more\n",
+                    filename, lineno);
+            exit(1);
+        }
+
+        e = snew(struct edge);
+        e->L = node_index(Ln2i, Li2n, Lname);
+        e->R = node_index(Rn2i, Ri2n, Rname);
+        if (add234(edges, e) != e) {
+            fprintf(stderr, "%s:%d: duplicate edge\n",
+                    filename, lineno);
+            exit(1);
+        }
+    }
+
+    allocate(count234(Ln2i), count234(Rn2i), count234(edges));
+
+    adjptr = adjdata;
+    for (i = 0; i < nl; i++)
+        adjlists[i] = NULL;
+    for (i = 0; (e = index234(edges, i)) != NULL; i++) {
+        if (!adjlists[e->L])
+            adjlists[e->L] = adjptr;
+        *adjptr++ = e->R;
+        adjsizes[e->L] = adjptr - adjlists[e->L];
+    }
+
+    find_and_check_matching();
+
+    for (i = 0; i < nl; i++) {
+        if (outl[i] != -1) {
+            struct nodename *Lnn = index234(Li2n, i);
+            struct nodename *Rnn = index234(Ri2n, outl[i]);
+            printf("%s %s\n", Lnn->name, Rnn->name);
+        }
+    }
+}
+
+void test_subsets(void)
+{
+    int b = 8;
+    int n = 1 << b;
+    int i, j, nruns, expected_size;
+    int *adjptr;
+    int *edgecounts;
+    struct stats {
+        int min, max;
+        double n, sx, sxx;
+    } *stats;
+    static const char seed[] = "fixed random seed for repeatability";
+
+    /*
+     * Generate a graph in which every subset of [b] = {1,...,b}
+     * (represented as a b-bit integer 0 <= i < n) has an edge going
+     * to every subset obtained by removing exactly one element.
+     *
+     * This graph is the disjoint union of the corresponding graph for
+     * each layer (collection of same-sized subset) of the power set
+     * of [b]. Each of those graphs has a matching of size equal to
+     * the smaller of its vertex sets. So we expect the overall size
+     * of the output matching to be less than n by the size of the
+     * largest layer, that is, to be n - binomial(n, floor(n/2)).
+     *
+     * We run the generation repeatedly, randomising it every time,
+     * and we expect to see every possible edge appear sooner or
+     * later.
+     */
+
+    rs = random_new(seed, strlen(seed));
+
+    allocate(n, n, n*b);
+    adjptr = adjdata;
+    expected_size = 0;
+    for (i = 0; i < n; i++) {
+        adjlists[i] = adjptr;
+        for (j = 0; j < b; j++) {
+            if (i & (1 << j))
+                *adjptr++ = i & ~(1 << j);
+        }
+        adjsizes[i] = adjptr - adjlists[i];
+        if (adjsizes[i] != b/2)
+            expected_size++;
+    }
+
+    edgecounts = snewn(n*b, int);
+    for (i = 0; i < n*b; i++)
+        edgecounts[i] = 0;
+
+    stats = snewn(b, struct stats);
+
+    nruns = 0;
+    while (nruns < 10000) {
+        nruns++;
+        find_and_check_matching();
+        assert(count == expected_size);
+
+        for (i = 0; i < n; i++)
+            for (j = 0; j < b; j++)
+                if ((i ^ outl[i]) == (1 << j))
+                    edgecounts[b*i+j]++;
+
+        if (nruns % 1000 == 0) {
+            for (i = 0; i < b; i++) {
+                struct stats *st = &stats[i];
+                st->min = st->max = -1;
+                st->n = st->sx = st->sxx = 0;
+            }
+
+            for (i = 0; i < n; i++) {
+                int pop = 0;
+                for (j = 0; j < b; j++)
+                    if (i & (1 << j))
+                        pop++;
+                pop--;
+
+                for (j = 0; j < b; j++) {
+                    if (i & (1 << j)) {
+                        struct stats *st = &stats[pop];
+                        int x = edgecounts[b*i+j];
+                        if (st->max == -1 || st->max < x)
+                            st->max = x;
+                        if (st->min == -1 || st->min > x)
+                            st->min = x;
+                        st->n++;
+                        st->sx += x;
+                        st->sxx += (double)x*x;
+                    } else {
+                        assert(edgecounts[b*i+j] == 0);
+                    }
+                }
+            }
+        }
+    }
+
+    printf("after %d runs:\n", nruns);
+    for (j = 0; j < b; j++) {
+        struct stats *st = &stats[j];
+        printf("edges between layers %d,%d:"
+               " min=%d max=%d mean=%f variance=%f\n",
+               j, j+1, st->min, st->max, st->sx/st->n,
+               (st->sxx - st->sx*st->sx/st->n) / st->n);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    static const char stdin_identifier[] = "<standard input>";
+    const char *infile = NULL;
+    int doing_opts = TRUE;
+    enum { USER_INPUT, AUTOTEST } mode = USER_INPUT;
+
+    while (--argc > 0) {
+        const char *arg = *++argv;
+
+        if (doing_opts && arg[0] == '-' && arg[1]) {
+            if (!strcmp(arg, "--")) {
+                doing_opts = FALSE;
+            } else if (!strcmp(arg, "--random")) {
+                char buf[64];
+                int len = sprintf(buf, "%lu", (unsigned long)time(NULL));
+                rs = random_new(buf, len);
+            } else if (!strcmp(arg, "--autotest")) {
+                mode = AUTOTEST;
+            } else {
+                fprintf(stderr, "matching: unrecognised option '%s'\n", arg);
+                return 1;
+            }
+        } else {
+            if (!infile) {
+                infile = (!strcmp(arg, "-") ? stdin_identifier : arg);
+            } else {
+                fprintf(stderr, "matching: too many arguments\n");
+                return 1;
+            }
+        }
+    }
+
+    if (mode == USER_INPUT) {
+        FILE *fp;
+
+        if (!infile)
+            infile = stdin_identifier;
+
+        if (infile != stdin_identifier) {
+            fp = fopen(infile, "r");
+            if (!fp) {
+                fprintf(stderr, "matching: could not open input file '%s'\n",
+                        infile);
+                return 1;
+            }
+        } else {
+            fp = stdin;
+        }
+
+        matching_from_user_input(fp, infile);
+
+        if (infile != stdin_identifier)
+            fclose(fp);
+    }
+
+    if (mode == AUTOTEST) {
+        if (infile) {
+            fprintf(stderr, "matching: expected no filename argument "
+                    "with --autotest\n");
+            return 1;
+        }
+
+        test_subsets();
+    }
+
+    return 0;
+}
+
+#endif /* STANDALONE_MATCHING_TEST */
diff --git a/matching.h b/matching.h

new file mode 100644 (file)

index 0000000..a4d3098
--- /dev/null
+++ b/matching.h
@@ -0,0 +1,80 @@
+/*
+ * Hopcroft-Karp algorithm for finding a maximal matching in a
+ * bipartite graph.
+ */
+
+#ifndef MATCHING_MATCHING_H
+#define MATCHING_MATCHING_H
+
+/*
+ * The actual algorithm.
+ * 
+ * Inputs:
+ * 
+ *  - 'scratch' is previously allocated scratch space of a size
+ *    previously determined by calling 'matching_scratch_size'.
+ * 
+ *  - 'nl' is the number of vertices on the left side of the graph.
+ *    Left vertices are numbered from 0 to nl-1.
+ * 
+ *  - 'nr' is the number of vertices on the left side of the graph.
+ *    Right vertices are numbered from 0 to nr-1.
+ * 
+ *  - 'adjlists' and 'adjsizes' represents the graph in adjacency-list
+ *    form. For each left vertex L, adjlists[L] points to an array of
+ *    adjsizes[L] integers giving the list of right vertices adjacent
+ *    to L.
+ *
+ *  - 'rs', if not NULL, is a random_state used to perturb the
+ *    progress of the algorithm so as to choose randomly from the
+ *    possible matchings if there's more than one. (The exact
+ *    probability distribution can't be guaranteed, but at the very
+ *    least, any matching that exists should be a _possible_ output.)
+ *
+ * If 'rs' is not NULL, then each list in adjlists[] will be permuted
+ * during the course of the algorithm as a side effect. (That's why
+ * it's not an array of _const_ int pointers.)
+ * 
+ * Output:
+ * 
+ *  - 'outl' may be NULL. If non-NULL, it is an array of 'nl'
+ *    integers, and outl[L] will be assigned the index of the right
+ *    vertex that the output matching paired with the left vertex L,
+ *    or -1 if L is unpaired.
+ * 
+ *  - 'outr' may be NULL. If non-NULL, it is an array of 'nr'
+ *    integers, and outr[R] will be assigned the index of the left
+ *    vertex that the output matching paired with the right vertex R,
+ *    or -1 if R is unpaired.
+ * 
+ *  - the returned value from the function is the total number of
+ *    edges in the matching.
+ */
+int matching_with_scratch(void *scratch,
+                          int nl, int nr, int **adjlists, int *adjsizes,
+                          random_state *rs, int *outl, int *outr);
+
+/*
+ * The above function expects its 'scratch' parameter to have already
+ * been set up. This function tells you how much space is needed for a
+ * given size of graph, so that you can allocate a single instance of
+ * scratch space and run the algorithm multiple times without the
+ * overhead of an alloc and free every time.
+ */
+size_t matching_scratch_size(int nl, int nr);
+
+/*
+ * Simplified version of the above function. All parameters are the
+ * same, except that 'scratch' is constructed internally and freed on
+ * exit. This is the simplest way to call the algorithm as a one-off;
+ * however, if you need to call it multiple times on the same size of
+ * graph, it is probably better to call the above version directly so
+ * that you only construct 'scratch' once.
+ *
+ * Additional return value is now -1, meaning that scratch space
+ * could not be allocated.
+ */
+int matching(int nl, int nr, int **adjlists, int *adjsizes,
+             random_state *rs, int *outl, int *outr);
+
+#endif /* MATCHING_MATCHING_H */
author	Simon Tatham <anakin@pobox.com>
	Wed, 18 Apr 2018 19:28:17 +0000 (20:28 +0100)
committer	Simon Tatham <anakin@pobox.com>
	Sun, 22 Apr 2018 15:45:37 +0000 (16:45 +0100)
Recipe		patch \| blob \| history
matching.c	[new file with mode: 0644]	patch \| blob
matching.h	[new file with mode: 0644]	patch \| blob