• Herbert Xu's avatar
    [PATCH] nbd: fix TX/RX race condition · 4b2f0260
    Herbert Xu authored
    
    Janos Haar of First NetCenter Bt.  reported numerous crashes involving the
    NBD driver.  With his help, this was tracked down to bogus bio vectors
    which in turn was the result of a race condition between the
    receive/transmit routines in the NBD driver.
    
    The bug manifests itself like this:
    
    CPU0				CPU1
    do_nbd_request
    	add req to queuelist
    	nbd_send_request
    		send req head
    		for each bio
    			kmap
    			send
    				nbd_read_stat
    					nbd_find_request
    					nbd_end_request
    			kunmap
    
    When CPU1 finishes nbd_end_request, the request and all its associated
    bio's are freed.  So when CPU0 calls kunmap whose argument is derived from
    the last bio, it may crash.
    
    Under normal circumstances, the race occurs only on the last bio.  However,
    if an error is encountered on the remote NBD server (such as an incorrect
    magic number in the request), or if there were a bug in the server, it is
    possible for the nbd_end_request to occur any time after the request's
    addition to the queuelist.
    
    The following patch fixes this problem by making sure that requests are not
    added to the queuelist until after they have been completed transmission.
    
    In order for the receiving side to be ready for responses involving
    requests still being transmitted, the patch introduces the concept of the
    active request.
    
    When a response matches the current active request, its processing is
    delayed until after the tranmission has come to a stop.
    
    This has been tested by Janos and it has been successful in curing this
    race condition.
    
    From: Herbert Xu <herbert@gondor.apana.org.au>
    
      Here is an updated patch which removes the active_req wait in
      nbd_clear_queue and the associated memory barrier.
    
      I've also clarified this in the comment.
    Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
    Cc: <djani22@dynamicweb.hu>
    Cc: Paul Clements <Paul.Clements@SteelEye.com>
    Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
    Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
    Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
    4b2f0260
nbd.h 2.43 KB
/*
 * 1999 Copyright (C) Pavel Machek, pavel@ucw.cz. This code is GPL.
 * 1999/11/04 Copyright (C) 1999 VMware, Inc. (Regis "HPReg" Duchesne)
 *            Made nbd_end_request() use the io_request_lock
 * 2001 Copyright (C) Steven Whitehouse
 *            New nbd_end_request() for compatibility with new linux block
 *            layer code.
 * 2003/06/24 Louis D. Langholtz <ldl@aros.net>
 *            Removed unneeded blksize_bits field from nbd_device struct.
 *            Cleanup PARANOIA usage & code.
 * 2004/02/19 Paul Clements
 *            Removed PARANOIA, plus various cleanup and comments
 */

#ifndef LINUX_NBD_H
#define LINUX_NBD_H

#define NBD_SET_SOCK	_IO( 0xab, 0 )
#define NBD_SET_BLKSIZE	_IO( 0xab, 1 )
#define NBD_SET_SIZE	_IO( 0xab, 2 )
#define NBD_DO_IT	_IO( 0xab, 3 )
#define NBD_CLEAR_SOCK	_IO( 0xab, 4 )
#define NBD_CLEAR_QUE	_IO( 0xab, 5 )
#define NBD_PRINT_DEBUG	_IO( 0xab, 6 )
#define NBD_SET_SIZE_BLOCKS	_IO( 0xab, 7 )
#define NBD_DISCONNECT  _IO( 0xab, 8 )

enum {
	NBD_CMD_READ = 0,
	NBD_CMD_WRITE = 1,
	NBD_CMD_DISC = 2
};

#define nbd_cmd(req) ((req)->cmd[0])
#define MAX_NBD 128

/* userspace doesn't need the nbd_device structure */
#ifdef __KERNEL__

#include <linux/wait.h>

/* values for flags field */
#define NBD_READ_ONLY 0x0001
#define NBD_WRITE_NOCHK 0x0002

struct request;

struct nbd_device {
	int flags;
	int harderror;		/* Code of hard error			*/
	struct socket * sock;
	struct file * file; 	/* If == NULL, device is not ready, yet	*/
	int magic;

	spinlock_t queue_lock;
	struct list_head queue_head;/* Requests are added here...	*/
	struct request *active_req;
	wait_queue_head_t active_wq;

	struct semaphore tx_lock;
	struct gendisk *disk;
	int blksize;
	u64 bytesize;
};

#endif

/* These are sent over the network in the request/reply magic fields */

#define NBD_REQUEST_MAGIC 0x25609513
#define NBD_REPLY_MAGIC 0x67446698
/* Do *not* use magics: 0x12560953 0x96744668. */

/*
 * This is the packet used for communication between client and
 * server. All data are in network byte order.
 */
struct nbd_request {
	u32 magic;
	u32 type;	/* == READ || == WRITE 	*/
	char handle[8];
	u64 from;
	u32 len;
}
#ifdef __GNUC__
	__attribute__ ((packed))
#endif
;

/*
 * This is the reply packet that nbd-server sends back to the client after
 * it has completed an I/O request (or an error occurs).
 */
struct nbd_reply {
	u32 magic;
	u32 error;		/* 0 = ok, else error	*/
	char handle[8];		/* handle you got from request	*/
};
#endif