chiark / gitweb /
choose: Use a one-pass algorithm to select tracks.
[disorder] / server / choose.c
... / ...
CommitLineData
1/*
2 * This file is part of DisOrder
3 * Copyright (C) 2008 Richard Kettlewell
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18 * USA
19 */
20/** @file choose.c
21 * @brief Random track chooser
22 *
23 * Picks a track at random and writes it to standard output. If for
24 * any reason no track can be picked - even a trivial reason like a
25 * deadlock - it just exits and expects the server to try again.
26 */
27
28#include <config.h>
29#include "types.h"
30
31#include <getopt.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <db.h>
35#include <locale.h>
36#include <errno.h>
37#include <sys/types.h>
38#include <unistd.h>
39#include <pcre.h>
40#include <string.h>
41#include <fcntl.h>
42#include <syslog.h>
43#include <time.h>
44
45#include "configuration.h"
46#include "log.h"
47#include "defs.h"
48#include "mem.h"
49#include "kvp.h"
50#include "syscalls.h"
51#include "printf.h"
52#include "trackdb.h"
53#include "trackdb-int.h"
54#include "version.h"
55#include "trackname.h"
56#include "queue.h"
57#include "server-queue.h"
58
59static DB_TXN *global_tid;
60
61static const struct option options[] = {
62 { "help", no_argument, 0, 'h' },
63 { "version", no_argument, 0, 'V' },
64 { "config", required_argument, 0, 'c' },
65 { "debug", no_argument, 0, 'd' },
66 { "no-debug", no_argument, 0, 'D' },
67 { "syslog", no_argument, 0, 's' },
68 { "no-syslog", no_argument, 0, 'S' },
69 { 0, 0, 0, 0 }
70};
71
72/* display usage message and terminate */
73static void help(void) {
74 xprintf("Usage:\n"
75 " disorder-choose [OPTIONS]\n"
76 "Options:\n"
77 " --help, -h Display usage message\n"
78 " --version, -V Display version number\n"
79 " --config PATH, -c PATH Set configuration file\n"
80 " --debug, -d Turn on debugging\n"
81 " --[no-]syslog Enable/disable logging to syslog\n"
82 "\n"
83 "Track choose for DisOrder. Not intended to be run\n"
84 "directly.\n");
85 xfclose(stdout);
86 exit(0);
87}
88/** @brief Sum of all weights */
89static unsigned long long total_weight;
90
91/** @brief The winning track */
92static const char *winning = 0;
93
94/** @brief Count of tracks */
95static long ntracks;
96
97static char **required_tags;
98static char **prohibited_tags;
99
100static int queue_contains(const struct queue_entry *head,
101 const char *track) {
102 const struct queue_entry *q;
103
104 for(q = head->next; q != head; q = q->next)
105 if(!strcmp(q->track, track))
106 return 1;
107 return 0;
108}
109
110/** @brief Compute the weight of a track
111 * @param track Track name (UTF-8)
112 * @param data Track data
113 * @param prefs Track preferences
114 * @return Track weight (non-negative)
115 *
116 * Tracks to be excluded entirely are given a weight of 0.
117 */
118static unsigned long compute_weight(const char *track,
119 struct kvp *data,
120 struct kvp *prefs) {
121 const char *s;
122 char **track_tags;
123 time_t last, now;
124
125 /* Reject tracks not in any collection (race between edit config and
126 * rescan) */
127 if(!find_track_root(track)) {
128 info("found track not in any collection: %s", track);
129 return 0;
130 }
131
132 /* Reject aliases to avoid giving aliased tracks extra weight */
133 if(kvp_get(data, "_alias_for"))
134 return 0;
135
136 /* Reject tracks with random play disabled */
137 if((s = kvp_get(prefs, "pick_at_random"))
138 && !strcmp(s, "0"))
139 return 0;
140
141 /* Reject tracks played within the last 8 hours */
142 if((s = kvp_get(prefs, "played_time"))) {
143 last = atoll(s);
144 now = time(0);
145 if(now < last + config->replay_min)
146 return 0;
147 }
148
149 /* Reject tracks currently in the queue or in the recent list */
150 if(queue_contains(&qhead, track)
151 || queue_contains(&phead, track))
152 return 0;
153
154 /* We'll need tags for a number of things */
155 track_tags = parsetags(kvp_get(prefs, "tags"));
156
157 /* Reject tracks with prohibited tags */
158 if(prohibited_tags && tag_intersection(track_tags, prohibited_tags))
159 return 0;
160
161 /* Reject tracks that lack required tags */
162 if(*required_tags && !tag_intersection(track_tags, required_tags))
163 return 0;
164
165 /* Use the configured weight if available */
166 if((s = kvp_get(prefs, "weight"))) {
167 long n;
168 errno = 0;
169
170 n = strtol(s, 0, 10);
171 if((errno == 0 || errno == ERANGE) && n >= 0)
172 return n;
173 }
174
175 return 90000;
176}
177
178/** @brief Pick a random integer uniformly from [0, limit) */
179static void random_bytes(unsigned char *buf, size_t n) {
180 static int fd = -1;
181 int r;
182
183 if(fd < 0) {
184 if((fd = open("/dev/urandom", O_RDONLY)) < 0)
185 fatal(errno, "opening /dev/urandom");
186 }
187 if((r = read(fd, buf, n)) < 0)
188 fatal(errno, "reading /dev/urandom");
189 if((size_t)r < n)
190 fatal(0, "short read from /dev/urandom");
191}
192
193/** @brief Pick a random integer uniformly from [0, limit) */
194static unsigned long long pick_weight(unsigned long long limit) {
195 unsigned char buf[(sizeof(unsigned long long) * CHAR_BIT + 7)/8], m;
196 unsigned long long t, r, slop;
197 int i, nby, nbi;
198
199 //info("pick_weight: limit = %llu", limit);
200
201 /* First, decide how many bits of output we actually need; do bytes first
202 * (they're quicker) and then bits.
203 *
204 * To speed this up, we could use a binary search if we knew where to
205 * start. (Note that shifting by ULLONG_BITS or more (if such a constant
206 * existed) is undefined behaviour, so we mustn't do that.) Figuring out a
207 * start point involves preprocessor and/or autoconf magic.
208 */
209 for (nby = 1, t = (limit - 1) >> 8; t; nby++, t >>= 8)
210 ;
211 nbi = (nby - 1) << 3; t = limit >> nbi;
212 if (t >> 4) { t >>= 4; nbi += 4; }
213 if (t >> 2) { t >>= 2; nbi += 2; }
214 if (t >> 1) { t >>= 1; nbi += 1; }
215 nbi++;
216 //info("nby = %d; nbi = %d", nby, nbi);
217
218 /* Main randomness collection loop. We read a number of bytes from the
219 * randomness source, and glue them together into an integer (dropping
220 * bits off the top byte as necessary). Call the result r; we have
221 * 2^{nbi - 1) <= limit < 2^nbi and r < 2^nbi. If r < limit then we win;
222 * otherwise we try again. Given the above bounds, we expect fewer than 2
223 * iterations.
224 *
225 * Unfortunately there are subtleties. In particular, 2^nbi may in fact be
226 * zero due to overflow. So in fact what we do is compute slop = 2^nbi -
227 * limit > 0; if r < slop then we try again, otherwise r - slop is our
228 * winner.
229 */
230 slop = (2 << (nbi - 1)) - limit;
231 m = nbi & 7 ? (1 << (nbi & 7)) - 1 : 0xff;
232 //info("slop = %llu", slop);
233 //info("m = 0x%02x", m);
234
235 do {
236 /* Actually get some random data. */
237 random_bytes(buf, nby);
238
239 /* Clobber the top byte. */
240 buf[0] &= m;
241
242 /* Turn it into an integer. */
243 for (r = 0, i = 0; i < nby; i++)
244 r = (r << 8) | buf[i];
245 //info("r = %llu", r);
246 } while (r < slop);
247
248 return r - slop;
249}
250
251/** @brief Called for each track */
252static int collect_tracks_callback(const char *track,
253 struct kvp *data,
254 struct kvp *prefs,
255 void attribute((unused)) *u,
256 DB_TXN attribute((unused)) *tid) {
257 unsigned long weight = compute_weight(track, data, prefs);
258
259 /* Decide whether this is the winning track.
260 *
261 * Suppose that we have n things, and thing i, for 0 <= i < n, has weight
262 * w_i. Let c_i = w_0 + ... + w_{i-1} be the cumulative weight of the
263 * things previous to thing i, and let W = c_n = w_0 + ... + w_{i-1} be the
264 * total weight. We can clearly choose a random thing with the correct
265 * weightings by picking a random number r in [0, W) and chooeing thing i
266 * where c_i <= r < c_i + w_i. But this involves having an enormous list
267 * and taking two passes over it (which has bad locality and is ugly).
268 *
269 * Here's another way. Initialize v = -1. Examine the things in order;
270 * for thing i, choose a random number r_i in [0, c_i + w_i). If r_i < w_i
271 * then set v <- i.
272 *
273 * Claim. For all 0 <= i < n, the above algorithm chooses thing i with
274 * probability w_i/W.
275 *
276 * Proof. Induction on n. The claim is clear for n = 1. Suppose it's
277 * true for n - 1. Let L be the event that we choose thing n - 1. Clearly
278 * Pr[L] = w_{n-1}/W. Condition on not-L: then the probabilty that we
279 * choose thing i, for 0 <= i < n - 1, is w_i/c_{n-1} (induction
280 * hypothesis); undoing the conditioning gives the desired result.
281 */
282 if(weight) {
283 total_weight += weight;
284 if (pick_weight(total_weight) < weight)
285 winning = track;
286 }
287 ntracks++;
288 return 0;
289}
290
291int main(int argc, char **argv) {
292 int n, logsyslog = !isatty(2), err;
293 const char *tags;
294
295 set_progname(argv);
296 mem_init();
297 if(!setlocale(LC_CTYPE, "")) fatal(errno, "error calling setlocale");
298 while((n = getopt_long(argc, argv, "hVc:dDSs", options, 0)) >= 0) {
299 switch(n) {
300 case 'h': help();
301 case 'V': version("disorder-choose");
302 case 'c': configfile = optarg; break;
303 case 'd': debugging = 1; break;
304 case 'D': debugging = 0; break;
305 case 'S': logsyslog = 0; break;
306 case 's': logsyslog = 1; break;
307 default: fatal(0, "invalid option");
308 }
309 }
310 if(logsyslog) {
311 openlog(progname, LOG_PID, LOG_DAEMON);
312 log_default = &log_syslog;
313 }
314 if(config_read(0)) fatal(0, "cannot read configuration");
315 /* Find out current queue/recent list */
316 queue_read();
317 recent_read();
318 /* Generate the candidate track list */
319 trackdb_init(TRACKDB_NO_RECOVER);
320 trackdb_open(TRACKDB_NO_UPGRADE|TRACKDB_READ_ONLY);
321 global_tid = trackdb_begin_transaction();
322 if((err = trackdb_get_global_tid("required-tags", global_tid, &tags)))
323 fatal(0, "error getting required-tags: %s", db_strerror(err));
324 required_tags = parsetags(tags);
325 if((err = trackdb_get_global_tid("prohibited-tags", global_tid, &tags)))
326 fatal(0, "error getting prohibited-tags: %s", db_strerror(err));
327 prohibited_tags = parsetags(tags);
328 if(trackdb_scan(0, collect_tracks_callback, 0, global_tid))
329 exit(1);
330 trackdb_commit_transaction(global_tid);
331 trackdb_close();
332 trackdb_deinit();
333 //info("ntracks=%ld total_weight=%lld", ntracks, total_weight);
334 if(!total_weight)
335 fatal(0, "no tracks match random choice criteria");
336 if(!winning)
337 fatal(0, "internal: failed to pick a track");
338 /* Pick a track */
339 xprintf("%s", winning);
340 xfclose(stdout);
341 return 0;
342}
343
344/*
345Local Variables:
346c-basic-offset:2
347comment-column:40
348fill-column:79
349indent-tabs-mode:nil
350End:
351*/