From: kay.sievers@vrfy.org Date: Thu, 14 Oct 2004 06:13:26 +0000 (-0700) Subject: [PATCH] prevent deadlocks on an corrupt udev database X-Git-Tag: 037~8 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=commitdiff_plain;h=7e89a569cc4db0c1482662dc4df5f60df7aef3ff [PATCH] prevent deadlocks on an corrupt udev database Here is the patch, that should prevent all of the known deadlocks with corrupt tdb databases we discovered. Thanks to Frank Steiner , who tested all this endlessly with a NFS mounted /dev. The conclusion is, that udev will not work on filesystems without proper record locking, but we should prevent the endless loops anyway. This patch implements: o recovery from a corrupted udev database. udev will continue without database support now, instead of doing nothing. So the node should be generated in any case, remove will obviously not work for custom names. o added iteration limits to the tdb-code at the places we discovered endless loops. In the case tdb tries to find more than 100.000 entries with the same hash, we better give up :) o prevent a {all_partitions} loop caused by corrupt db data o log all tdb errors to syslog o switch sleep() to usleep() cause we want to use alarm() --- diff --git a/namedev.c b/namedev.c index 695fb5541..9276b0cbb 100644 --- a/namedev.c +++ b/namedev.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -353,7 +352,6 @@ static struct bus_file { {} }; -#define SECONDS_TO_WAIT_FOR_FILE 10 static void wait_for_device_to_initialize(struct sysfs_device *sysfs_device) { /* sleep until we see the file for this specific bus type show up this @@ -367,14 +365,14 @@ static void wait_for_device_to_initialize(struct sysfs_device *sysfs_device) struct bus_file *b = &bus_files[0]; struct sysfs_attribute *tmpattr; int found = 0; - int loop = SECONDS_TO_WAIT_FOR_FILE; + int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ; while (1) { if (b->bus == NULL) { if (!found) break; - /* sleep to give the kernel a chance to create the file */ - sleep(1); + /* give the kernel a chance to create the file */ + usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ); --loop; if (loop == 0) break; @@ -394,7 +392,8 @@ static void wait_for_device_to_initialize(struct sysfs_device *sysfs_device) } if (!found) dbg("did not find bus type '%s' on list of bus_id_files, " - "contact greg@kroah.com", sysfs_device->bus); + "please report to ", + sysfs_device->bus); exit: return; /* here to prevent compiler warning... */ } @@ -680,7 +679,6 @@ static struct sysfs_device *get_sysfs_device(struct sysfs_class_device *class_de { struct sysfs_device *sysfs_device; struct sysfs_class_device *class_dev_parent; - struct timespec tspec; int loop; /* Figure out where the device symlink is at. For char devices this will @@ -696,16 +694,14 @@ static struct sysfs_device *get_sysfs_device(struct sysfs_class_device *class_de if (class_dev_parent != NULL) dbg("given class device has a parent, use this instead"); - tspec.tv_sec = 0; - tspec.tv_nsec = 10000000; /* sleep 10 millisec */ - loop = 10; + loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ; while (loop--) { if (udev_sleep) { if (whitelist_search(class_dev)) { sysfs_device = NULL; goto exit; } - nanosleep(&tspec, NULL); + usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ); } if (class_dev_parent) @@ -727,11 +723,9 @@ device_found: if (sysfs_device->bus[0] != '\0') goto bus_found; - loop = 10; - tspec.tv_nsec = 10000000; while (loop--) { if (udev_sleep) - nanosleep(&tspec, NULL); + usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ); sysfs_get_device_bus(sysfs_device); if (sysfs_device->bus[0] != '\0') diff --git a/tdb/tdb.c b/tdb/tdb.c index e87ea3692..4cc1e0702 100644 --- a/tdb/tdb.c +++ b/tdb/tdb.c @@ -45,7 +45,12 @@ #define STANDALONE #define TDB_DEBUG #define HAVE_MMAP 1 - +/* this should prevent deadlocks loops on corrupt databases + * we've discovered. Most deadlocks happend by iterating over the + * list of entries with the same hash value. */ +#define LOOP_MAX 100000 +#define TDB_LOG(x) TDB_LOG_UDEV x +#define TDB_LOG_UDEV(tdb, level, format, arg...) info(format, ##arg) #ifdef STANDALONE #if HAVE_CONFIG_H @@ -66,6 +71,7 @@ #include "tdb.h" #include "spinlock.h" #include "../udev_lib.h" +#include "../logging.h" #else #include "includes.h" #endif @@ -89,7 +95,9 @@ /* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style * argument. */ +#ifndef TDB_LOG #define TDB_LOG(x) (tdb->log_fn?((tdb->log_fn x),0) : 0) +#endif /* lock offsets */ #define GLOBAL_LOCK 0 @@ -268,7 +276,7 @@ static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype) if (tdb->locked[list+1].count == 0) { if (!tdb->read_only && tdb->header.rwlocks) { if (tdb_spinlock(tdb, list, ltype)) { - TDB_LOG((tdb, 0, "tdb_lock spinlock failed on list ltype=%d\n", + TDB_LOG((tdb, 0, "tdb_lock spinlock failed on list %d ltype=%d\n", list, ltype)); return -1; } @@ -481,7 +489,7 @@ static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec) if (rec->magic == TDB_MAGIC) { /* this happens when a app is showdown while deleting a record - we should not completely fail when this happens */ - TDB_LOG((tdb, 0,"rec_free_read non-free magic at offset=%d - fixing\n", + TDB_LOG((tdb, 0,"rec_free_read non-free magic 0x%x at offset=%d - fixing\n", rec->magic, off)); rec->magic = TDB_FREE_MAGIC; if (tdb_write(tdb, off, rec, sizeof(*rec)) == -1) @@ -617,8 +625,10 @@ int tdb_printfreelist(TDB_CONTEXT *tdb) static int remove_from_freelist(TDB_CONTEXT *tdb, tdb_off off, tdb_off next) { tdb_off last_ptr, i; + int maxloop; /* read in the freelist top */ + maxloop = LOOP_MAX; last_ptr = FREELIST_TOP; while (ofs_read(tdb, last_ptr, &i) != -1 && i != 0) { if (i == off) { @@ -627,6 +637,12 @@ static int remove_from_freelist(TDB_CONTEXT *tdb, tdb_off off, tdb_off next) } /* Follow chain (next offset is at start of record) */ last_ptr = i; + + maxloop--; + if (maxloop == 0) { + TDB_LOG((tdb, 0, "remove_from_freelist: maxloop reached; corrupt database!\n")); + return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); + } } TDB_LOG((tdb, 0,"remove_from_freelist: not on list at off=%d\n", off)); return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); @@ -853,6 +869,7 @@ static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length, { tdb_off rec_ptr, last_ptr, newrec_ptr; struct list_struct newrec; + int maxloop; if (tdb_lock(tdb, -1, F_WRLCK) == -1) return 0; @@ -868,6 +885,7 @@ static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length, goto fail; /* keep looking until we find a freelist record big enough */ + maxloop = LOOP_MAX; while (rec_ptr) { if (rec_free_read(tdb, rec_ptr, rec) == -1) goto fail; @@ -919,6 +937,12 @@ static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length, /* move to the next record */ last_ptr = rec_ptr; rec_ptr = rec->next; + + maxloop--; + if (maxloop == 0) { + TDB_LOG((tdb, 0, "tdb_allocate: maxloop reached; corrupt database!\n")); + return TDB_ERRCODE(TDB_ERR_CORRUPT, 0); + } } /* we didn't find enough space. See if we can expand the database and if we can then try again */ @@ -981,12 +1005,14 @@ static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, struct list_struct *r) { tdb_off rec_ptr; - + int maxloop; + /* read in the hash top */ if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) return 0; /* keep looking until we find the right record */ + maxloop = LOOP_MAX; while (rec_ptr) { if (rec_read(tdb, rec_ptr, r) == -1) return 0; @@ -1006,6 +1032,12 @@ static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, SAFE_FREE(k); } rec_ptr = r->next; + + maxloop--; + if (maxloop == 0) { + TDB_LOG((tdb, 0, "tdb_find maxloop reached; corrupt database!\n")); + return TDB_ERRCODE(TDB_ERR_CORRUPT, 0); + } } return TDB_ERRCODE(TDB_ERR_NOEXIST, 0); } @@ -1188,6 +1220,7 @@ static int do_delete(TDB_CONTEXT *tdb, tdb_off rec_ptr, struct list_struct*rec) { tdb_off last_ptr, i; struct list_struct lastrec; + int maxloop; if (tdb->read_only) return -1; @@ -1202,10 +1235,19 @@ static int do_delete(TDB_CONTEXT *tdb, tdb_off rec_ptr, struct list_struct*rec) /* find previous record in hash chain */ if (ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1) return -1; - for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next) + + maxloop = LOOP_MAX; + for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next) { if (rec_read(tdb, i, &lastrec) == -1) return -1; + maxloop--; + if (maxloop == 0) { + TDB_LOG((tdb, 0, "(tdb)do_delete: maxloop reached; corrupt database!\n")); + return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); + } + } + /* unlink it: next ptr is at start of record. */ if (last_ptr == 0) last_ptr = TDB_HASH_TOP(rec->full_hash); @@ -1789,8 +1831,8 @@ TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags, /* Is it already in the open list? If so, fail. */ if (tdb_already_open(st.st_dev, st.st_ino)) { TDB_LOG((tdb, 2, "tdb_open_ex: " - "%s (%d,%d) is already open in this process\n", - name, st.st_dev, st.st_ino)); + "%s (%d:%d,%ld) is already open in this process\n", + name, major(st.st_dev), minor(st.st_dev), st.st_ino)); errno = EBUSY; goto fail; } diff --git a/udev.c b/udev.c index e6f2744fc..974b9582d 100644 --- a/udev.c +++ b/udev.c @@ -36,6 +36,9 @@ #include "namedev.h" #include "udevdb.h" +/* timeout flag for udevdb */ +extern sig_atomic_t gotalarm; + /* global variables */ char **main_argv; char **main_envp; @@ -58,6 +61,10 @@ void log_message(int level, const char *format, ...) asmlinkage static void sig_handler(int signum) { switch (signum) { + case SIGALRM: + gotalarm = 1; + info("error: timeout reached, event probably not handled correctly"); + break; case SIGINT: case SIGTERM: udevdb_exit(); @@ -94,7 +101,8 @@ int main(int argc, char *argv[], char *envp[]) dbg("version %s", UDEV_VERSION); - /* initialize our configuration */ + init_logging("udev"); + udev_init_config(); if (strstr(argv[0], "udevstart")) { @@ -146,16 +154,19 @@ int main(int argc, char *argv[], char *envp[]) /* set signal handlers */ act.sa_handler = sig_handler; + sigemptyset (&act.sa_mask); - act.sa_flags = SA_RESTART; + /* alarm must interrupt syscalls*/ + sigaction(SIGALRM, &act, NULL); sigaction(SIGINT, &act, NULL); sigaction(SIGTERM, &act, NULL); + /* trigger timout to interrupt blocking syscalls */ + alarm(ALARM_TIMEOUT); + /* initialize udev database */ - if (udevdb_init(UDEVDB_DEFAULT) != 0) { - dbg("unable to initialize database"); - goto exit; - } + if (udevdb_init(UDEVDB_DEFAULT) != 0) + info("error: unable to initialize database, continuing without database"); switch(act_type) { case UDEVSTART: diff --git a/udev.h b/udev.h index 46c961c2d..d66e9f626 100644 --- a/udev.h +++ b/udev.h @@ -26,6 +26,9 @@ #include #include "libsysfs/sysfs/libsysfs.h" +#define ALARM_TIMEOUT 30 +#define WAIT_FOR_FILE_SECONDS 10 +#define WAIT_FOR_FILE_RETRY_FREQ 10 #define COMMENT_CHARACTER '#' #define NAME_SIZE 256 diff --git a/udev_add.c b/udev_add.c index e1e145de8..78c78a110 100644 --- a/udev_add.c +++ b/udev_add.c @@ -348,11 +348,10 @@ exit: /* wait for the "dev" file to show up in the directory in sysfs. * If it doesn't happen in about 10 seconds, give up. */ -#define SECONDS_TO_WAIT_FOR_FILE 10 static int sleep_for_file(const char *path, char* file) { char filename[SYSFS_PATH_MAX + 6]; - int loop = SECONDS_TO_WAIT_FOR_FILE; + int loop = WAIT_FOR_FILE_SECONDS * WAIT_FOR_FILE_RETRY_FREQ; int retval; strfieldcpy(filename, sysfs_path); @@ -368,7 +367,7 @@ static int sleep_for_file(const char *path, char* file) goto exit; /* sleep to give the kernel a chance to create the dev file */ - sleep(1); + usleep(1000 * 1000 / WAIT_FOR_FILE_RETRY_FREQ); } retval = -ENODEV; exit: diff --git a/udev_remove.c b/udev_remove.c index 7ad7c2402..d4be8bd6f 100644 --- a/udev_remove.c +++ b/udev_remove.c @@ -109,6 +109,7 @@ static int delete_node(struct udevice *dev) int i; char *pos; int len; + int num; strfieldcpy(filename, udev_root); strfieldcat(filename, dev->name); @@ -118,10 +119,15 @@ static int delete_node(struct udevice *dev) if (retval) return retval; - /* remove partition nodes */ - if (dev->partitions > 0) { - info("removing partitions '%s[1-%i]'", filename, dev->partitions); - for (i = 1; i <= dev->partitions; i++) { + /* remove all_partitions nodes */ + num = dev->partitions; + if (num > 0) { + info("removing all_partitions '%s[1-%i]'", filename, num); + if (num > PARTITIONS_COUNT) { + info("garbage from udev database, skip all_partitions removal"); + return -1; + } + for (i = 1; i <= num; i++) { strfieldcpy(partitionname, filename); strintcat(partitionname, i); secure_unlink(partitionname); diff --git a/udevdb.c b/udevdb.c index c4dc4f005..a218b6617 100644 --- a/udevdb.c +++ b/udevdb.c @@ -42,13 +42,16 @@ #include "tdb/tdb.h" static TDB_CONTEXT *udevdb; - +sig_atomic_t gotalarm; int udevdb_add_dev(const char *path, const struct udevice *dev) { TDB_DATA key, data; char keystr[SYSFS_PATH_MAX]; + if (udevdb == NULL) + return -1; + if ((path == NULL) || (dev == NULL)) return -ENODEV; @@ -68,6 +71,9 @@ int udevdb_get_dev(const char *path, struct udevice *dev) { TDB_DATA key, data; + if (udevdb == NULL) + return -1; + if (path == NULL) return -ENODEV; @@ -88,6 +94,9 @@ int udevdb_delete_dev(const char *path) TDB_DATA key; char keystr[SYSFS_PATH_MAX]; + if (udevdb == NULL) + return -1; + if (path == NULL) return -EINVAL; @@ -121,6 +130,8 @@ int udevdb_init(int init_flag) if (init_flag != UDEVDB_DEFAULT && init_flag != UDEVDB_INTERNAL) return -EINVAL; + tdb_set_lock_alarm(&gotalarm); + udevdb = tdb_open(udev_db_filename, 0, init_flag, O_RDWR | O_CREAT, 0644); if (udevdb == NULL) { if (init_flag == UDEVDB_INTERNAL) @@ -160,6 +171,9 @@ int udevdb_call_foreach(int (*user_record_handler) (char *path, struct udevice * { int retval = 0; + if (udevdb == NULL) + return -1; + if (user_record_handler == NULL) { dbg("invalid user record handling function"); return -EINVAL; diff --git a/wait_for_sysfs.c b/wait_for_sysfs.c index f87253d04..a5d8ab411 100644 --- a/wait_for_sysfs.c +++ b/wait_for_sysfs.c @@ -101,11 +101,11 @@ static int wait_for_class_device_attributes(struct sysfs_class_device *class_dev return -1; } -/* skip waiting for physical device */ +/* check if we need to wait for a physical device */ static int class_device_expect_no_device_link(struct sysfs_class_device *class_dev) { - /* List of devices without a "device" symlink - * set .device to NULL to accept all devices in that subsystem */ + /* list of devices without a "device" symlink to the physical device + * if device is set to NULL, no devices in that subsystem has a link */ static struct class_device { char *subsystem; char *device; @@ -160,10 +160,9 @@ static int class_device_expect_no_device_link(struct sysfs_class_device *class_d struct class_device *classdevice; int len; - /* look if we want to look for another file instead of "dev" */ for (classdevice = class_device; classdevice->subsystem != NULL; classdevice++) { if (strcmp(class_dev->classname, classdevice->subsystem) == 0) { - /* if device is NULL, all devices in this class are ok */ + /* see if no device in this class is expected to have a device-link */ if (classdevice->device == NULL) return 1; @@ -173,11 +172,11 @@ static int class_device_expect_no_device_link(struct sysfs_class_device *class_d if (strncmp(class_dev->name, classdevice->device, len) != 0) continue; - /* exact match */ + /* exact name match */ if (strlen(class_dev->name) == len) return 1; - /* instance numbers are matching too */ + /* name match with instance number */ if (isdigit(class_dev->name[len])) return 1; }