DragonFly BSD
DragonFly bugs List (threaded) for 2005-07
[Date Prev][Date Next]  [Thread Prev][Thread Next]  [Date Index][Thread Index]

patch #4 (rollup patch for testing)


From: Matthew Dillon <dillon@xxxxxxxxxxxxxxxxxxxx>
Date: Mon, 18 Jul 2005 08:35:22 -0700 (PDT)

    This is a rollup patch for testing that is supposed to fix
    two SMP related bugs, beef-up the IPS driver a bit, and
    add some debugging to the UFS inode code.

    I've sent this patch in various forms to several people,
    but my list has grown to the point where I think I should
    just post it to bugs@. 

    A number of other bug fixes have already been committed to
    HEAD (shutdown fix, umass panic, etc) and have not been included
    in this patch.

    The two bugs being worked on are primarily related to SMP
    operation and address issues with the LWKT scheduler's 
    handling of the big giant lock, and sockbufs.

    There is still at least one open issue with the UMASS device related
    to pulling out a device without unmounting the related filesystem 
    which has not yet been addressed.

    I will post updates as I diagnose further bug reports.

					-Matt

Index: dev/raid/ips/ips.c
===================================================================
RCS file: /cvs/src/sys/dev/raid/ips/ips.c,v
retrieving revision 1.11
diff -u -r1.11 ips.c
--- dev/raid/ips/ips.c	7 Jun 2005 00:51:13 -0000	1.11
+++ dev/raid/ips/ips.c	16 Jul 2005 17:05:42 -0000
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/dev/ips/ips.c,v 1.12 2004/05/30 04:01:29 scottl Exp $
- * $DragonFly: src/sys/dev/raid/ips/ips.c,v 1.11 2005/06/07 00:51:13 y0netan1 Exp $
+ * $DragonFly$
  */
 
 #include <dev/raid/ips/ips.h>
@@ -477,6 +477,30 @@
 		sc->max_cmds = min(128, sc->adapter_info.max_concurrent_cmds);
 	else
 		sc->max_cmds = 32;
+
+	/*
+	 * Limit simultanious commands for lite adapters (taken from
+	 * the linux driver).
+	 */
+	switch(sc->adapter_type) {
+	case IPS_ADAPTER_CLARINETLITE:
+	case IPS_ADAPTER_MORPHEUSLITE:
+	case IPS_ADAPTER_NEOLITE:
+		if (sc->max_cmds > 32)
+			sc->max_cmds = 32;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * I don't trust adaptec to handle the edge case (matt's paranoia).
+	 * Note that linux reserves one command for special operations,
+	 * which has virtually the same effect.
+	 */
+	if (sc->max_cmds >= 8)
+		--sc->max_cmds;
+
 	if (ips_cmdqueue_init(sc)) {
 		device_printf(sc->dev,
 		    "failed to initialize command buffers\n");
@@ -489,6 +513,7 @@
 	dev->si_drv1 = sc;
 	ips_diskdev_init(sc);
 	callout_reset(&sc->timer, 10 * hz, ips_timeout, sc);
+	device_printf(sc->dev, "type=%d cmds=%d\n", (int)sc->adapter_type, (int)sc->max_cmds);
 	return 0;
 error:
 	ips_adapter_free(sc);
Index: dev/raid/ips/ips.h
===================================================================
RCS file: /cvs/src/sys/dev/raid/ips/ips.h,v
retrieving revision 1.6
diff -u -r1.6 ips.h
--- dev/raid/ips/ips.h	7 Jun 2005 00:52:34 -0000	1.6
+++ dev/raid/ips/ips.h	16 Jul 2005 17:05:42 -0000
@@ -26,7 +26,7 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/dev/ips/ips.h,v 1.10 2004/05/30 20:08:34 phk Exp $
- * $DragonFly: src/sys/dev/raid/ips/ips.h,v 1.6 2005/06/07 00:52:34 y0netan1 Exp $
+ * $DragonFly$
  */
 
 
@@ -404,6 +404,12 @@
    volatile u_int32_t    value;
 } ips_cmd_status_t;
 
+/* local copyback of status after command completion */
+typedef struct {
+    ips_cmd_status_t	status;
+    int			completed;
+} ips_cmd_status_return_t;
+
 /* used to keep track of current commands to the card */
 typedef struct ips_command {
 	u_int8_t		command_number;
Index: dev/raid/ips/ips_commands.c
===================================================================
RCS file: /cvs/src/sys/dev/raid/ips/ips_commands.c,v
retrieving revision 1.8
diff -u -r1.8 ips_commands.c
--- dev/raid/ips/ips_commands.c	10 Dec 2004 04:09:46 -0000	1.8
+++ dev/raid/ips/ips_commands.c	16 Jul 2005 17:05:42 -0000
@@ -25,10 +25,11 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/dev/ips/ips_commands.c,v 1.10 2004/05/30 04:01:29 scottl Exp $
- * $DragonFly: src/sys/dev/raid/ips/ips_commands.c,v 1.8 2004/12/10 04:09:46 y0netan1 Exp $
+ * $DragonFly$
  */
 
 #include <dev/raid/ips/ips.h>
+#include <sys/thread2.h>
 
 /*
  * This is an interrupt callback.  It is called from
@@ -40,16 +41,49 @@
 static void
 ips_wakeup_callback(ips_command_t *command)
 {
-	ips_cmd_status_t *status;
+	ips_cmd_status_return_t *status;
 
 	status = command->arg;
-	status->value = command->status.value;
+	status->status.value = command->status.value;
 	bus_dmamap_sync(command->sc->command_dmatag, command->command_dmamap,
 			BUS_DMASYNC_POSTWRITE);
+	status->completed = 1;
+	wakeup(status);
+}
+
+static void
+ips_early_error(ips_command_t *command)
+{
+	ips_cmd_status_return_t *status;
+
+	status = command->arg;
+	status->status.value = IPS_ERROR_STATUS; /* a lovely error value */
+	status->completed = 1;
 	wakeup(status);
 }
 
 /*
+ * Wait for completion of a synchronous command
+ */
+static int
+ips_synchronous_wait(ips_cmd_status_return_t *status, const char *id, int to)
+{
+	int error = 0;
+
+	while (status->completed == 0) {
+		crit_enter();
+		if (status->completed == 0) {
+			if (tsleep(status, 0, id, to) == EWOULDBLOCK) {
+				error = ETIMEDOUT;
+				break;
+			}
+		}
+		crit_exit();
+	}
+	return(error);
+}
+
+/*
  * Below are a series of functions for sending an IO request
  * to the adapter.  The flow order is: start, send, callback, finish.
  * The caller must have already assembled an iorequest struct to hold
@@ -203,9 +237,7 @@
 	ips_adapter_info_cmd *command_struct;
 	sc = command->sc;
 	if (error) {
-		ips_cmd_status_t * status = command->arg;
-		status->value = IPS_ERROR_STATUS; /* a lovely error value */
-		ips_insert_free_cmd(sc, command);
+		ips_early_error(command);
 		printf("ips: error = %d in ips_get_adapter_info\n", error);
 		return;
 	}
@@ -225,8 +257,8 @@
 ips_send_adapter_info_cmd(ips_command_t *command)
 {
 	ips_softc_t *sc = command->sc;
-	ips_cmd_status_t *status = command->arg;
-	int error = 0;
+	ips_cmd_status_return_t *status = command->arg;
+	int error;
 
 	if (bus_dma_tag_create(	/* parent    */	sc->adapter_dmatag,
 				/* alignemnt */	1,
@@ -245,7 +277,7 @@
 		goto exit;
 	}
 	if (bus_dmamem_alloc(command->data_dmatag, &command->data_buffer,
-	   BUS_DMA_NOWAIT, &command->data_dmamap)) {
+	    BUS_DMA_NOWAIT, &command->data_dmamap)) {
 		error = ENOMEM;
 		goto exit;
 	}
@@ -253,9 +285,7 @@
 	bus_dmamap_load(command->data_dmatag, command->data_dmamap,
 	    command->data_buffer, IPS_ADAPTER_INFO_LEN,
 	    ips_adapter_info_callback, command, BUS_DMA_NOWAIT);
-	if ((status->value == IPS_ERROR_STATUS) ||
-	    tsleep(status, 0, "ips", 30 * hz) == EWOULDBLOCK)
-		error = ETIMEDOUT;
+	error = ips_synchronous_wait(status, "ips", 30 * hz);
 	if (error == 0) {
 		bus_dmamap_sync(command->data_dmatag, command->data_dmamap,
 		    BUS_DMASYNC_POSTREAD);
@@ -276,16 +306,16 @@
 ips_get_adapter_info(ips_softc_t *sc)
 {
 	int error = 0;
-	ips_cmd_status_t *status;
+	ips_cmd_status_return_t *status;
 
-	status = malloc(sizeof(ips_cmd_status_t), M_IPSBUF, M_INTWAIT | M_ZERO);
+	status = malloc(sizeof(*status), M_IPSBUF, M_INTWAIT | M_ZERO);
 	if (ips_get_free_cmd(sc, ips_send_adapter_info_cmd, status,
 	    IPS_NOWAIT_FLAG) > 0) {
 		device_printf(sc->dev, "unable to get adapter configuration\n");
 		free(status, M_IPSBUF);
 		return ENXIO;
 	}
-	if (COMMAND_ERROR(status))
+	if (COMMAND_ERROR(&status->status))
 		error = ENXIO;
 	free(status, M_IPSBUF);
 	return error;
@@ -307,10 +337,7 @@
 
 	sc = command->sc;
 	if (error) {
-		ips_cmd_status_t *status = command->arg;
-
-		status->value = IPS_ERROR_STATUS;
-		ips_insert_free_cmd(sc, command);
+		ips_early_error(command);
 		printf("ips: error = %d in ips_get_drive_info\n", error);
 		return;
 	}
@@ -330,7 +357,7 @@
 {
 	int error = 0;
 	ips_softc_t *sc = command->sc;
-	ips_cmd_status_t *status = command->arg;
+	ips_cmd_status_return_t *status = command->arg;
 	ips_drive_info_t *driveinfo;
 
 	if (bus_dma_tag_create(	/* parent    */	sc->adapter_dmatag,
@@ -358,10 +385,7 @@
 	bus_dmamap_load(command->data_dmatag, command->data_dmamap,
 	    command->data_buffer,IPS_DRIVE_INFO_LEN,
 	    ips_drive_info_callback, command, BUS_DMA_NOWAIT);
-	if ((status->value == IPS_ERROR_STATUS) ||
-	    tsleep(status, 0, "ips", 10 * hz) == EWOULDBLOCK)
-		error = ETIMEDOUT;
-
+	error = ips_synchronous_wait(status, "ips", 10 * hz);
 	if (error == 0) {
 		bus_dmamap_sync(command->data_dmatag, command->data_dmamap,
 		    BUS_DMASYNC_POSTREAD);
@@ -384,16 +408,16 @@
 ips_get_drive_info(ips_softc_t *sc)
 {
 	int error = 0;
-	ips_cmd_status_t *status;
+	ips_cmd_status_return_t *status;
 
-	status = malloc(sizeof(ips_cmd_status_t), M_IPSBUF, M_INTWAIT | M_ZERO);
+	status = malloc(sizeof(*status), M_IPSBUF, M_INTWAIT | M_ZERO);
 	if (ips_get_free_cmd(sc, ips_send_drive_info_cmd, status,
 	    IPS_NOWAIT_FLAG) > 0) {
 		free(status, M_IPSBUF);
 		device_printf(sc->dev, "unable to get drive configuration\n");
 		return ENXIO;
 	}
-	if (COMMAND_ERROR(status))
+	if (COMMAND_ERROR(&status->status))
 		error = ENXIO;
 	free(status, M_IPSBUF);
 	return error;
@@ -407,7 +431,7 @@
 ips_send_flush_cache_cmd(ips_command_t *command)
 {
 	ips_softc_t *sc = command->sc;
-	ips_cmd_status_t *status = command->arg;
+	ips_cmd_status_return_t *status = command->arg;
 	ips_generic_cmd *command_struct;
 
 	PRINTF(10,"ips test: got a command, building flush command\n");
@@ -418,8 +442,7 @@
 	bus_dmamap_sync(sc->command_dmatag, command->command_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 	sc->ips_issue_cmd(command);
-	if (status->value != IPS_ERROR_STATUS)
-		tsleep(status, 0, "flush2", 0);
+	ips_synchronous_wait(status, "flush2", 0);
 	ips_insert_free_cmd(sc, command);
 	return 0;
 }
@@ -427,9 +450,9 @@
 int
 ips_flush_cache(ips_softc_t *sc)
 {
-	ips_cmd_status_t *status;
+	ips_cmd_status_return_t *status;
 
-	status = malloc(sizeof(ips_cmd_status_t), M_IPSBUF, M_INTWAIT | M_ZERO);
+	status = malloc(sizeof(*status), M_IPSBUF, M_INTWAIT | M_ZERO);
 	device_printf(sc->dev, "flushing cache\n");
 	if (ips_get_free_cmd(sc, ips_send_flush_cache_cmd, status,
 	    IPS_NOWAIT_FLAG)) {
@@ -438,7 +461,7 @@
 		    "can't flush cache!\n");
 		return 1;
 	}
-	if (COMMAND_ERROR(status)) {
+	if (COMMAND_ERROR(&status->status)) {
 		free(status, M_IPSBUF);
 		device_printf(sc->dev, "ERROR: cache flush command failed!\n");
 		return 1;
@@ -496,7 +519,7 @@
 ips_send_ffdc_reset_cmd(ips_command_t *command)
 {
 	ips_softc_t *sc = command->sc;
-	ips_cmd_status_t *status = command->arg;
+	ips_cmd_status_return_t *status = command->arg;
 	ips_adapter_ffdc_cmd *command_struct;
 
 	PRINTF(10, "ips test: got a command, building ffdc reset command\n");
@@ -510,8 +533,7 @@
 	bus_dmamap_sync(sc->command_dmatag, command->command_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 	sc->ips_issue_cmd(command);
-	if (status->value != IPS_ERROR_STATUS)
-		tsleep(status, 0, "ffdc", 0);
+	ips_synchronous_wait(status, "ffdc", 0);
 	ips_insert_free_cmd(sc, command);
 	return 0;
 }
@@ -519,9 +541,9 @@
 int
 ips_ffdc_reset(ips_softc_t *sc)
 {
-	ips_cmd_status_t *status;
+	ips_cmd_status_return_t *status;
 
-	status = malloc(sizeof(ips_cmd_status_t), M_IPSBUF, M_INTWAIT | M_ZERO);
+	status = malloc(sizeof(*status), M_IPSBUF, M_INTWAIT | M_ZERO);
 	if (ips_get_free_cmd(sc, ips_send_ffdc_reset_cmd, status,
 	    IPS_NOWAIT_FLAG)) {
 		free(status, M_IPSBUF);
@@ -529,7 +551,7 @@
 		    "can't send ffdc reset!\n");
 		return 1;
 	}
-	if (COMMAND_ERROR(status)) {
+	if (COMMAND_ERROR(&status->status)) {
 		free(status, M_IPSBUF);
 		device_printf(sc->dev, "ERROR: ffdc reset command failed!\n");
 		return 1;
@@ -575,10 +597,7 @@
 
 	sc = command->sc;
 	if (error) {
-		ips_cmd_status_t *status = command->arg;
-
-		status->value = IPS_ERROR_STATUS;
-		ips_insert_free_cmd(sc, command);
+		ips_early_error(command);
 		printf("ips: error = %d in ips_read_nvram_callback\n", error);
 		return;
 	}
@@ -601,7 +620,7 @@
 {
 	int error = 0;
 	ips_softc_t *sc = command->sc;
-	ips_cmd_status_t *status = command->arg;
+	ips_cmd_status_return_t *status = command->arg;
 
 	if (bus_dma_tag_create(	/* parent    */	sc->adapter_dmatag,
 				/* alignemnt */	1,
@@ -628,9 +647,7 @@
 	bus_dmamap_load(command->data_dmatag, command->data_dmamap,
 	    command->data_buffer, IPS_NVRAM_PAGE_SIZE, ips_read_nvram_callback,
 	    command, BUS_DMA_NOWAIT);
-	if ((status->value == IPS_ERROR_STATUS) ||
-	    tsleep(status, 0, "ips", 0) == EWOULDBLOCK)
-		error = ETIMEDOUT;
+	error = ips_synchronous_wait(status, "ips", 0);
 	if (error == 0) {
 		bus_dmamap_sync(command->data_dmatag, command->data_dmamap,
 				BUS_DMASYNC_POSTWRITE);
@@ -647,16 +664,16 @@
 int
 ips_update_nvram(ips_softc_t *sc)
 {
-	ips_cmd_status_t *status;
+	ips_cmd_status_return_t *status;
 
-	status = malloc(sizeof(ips_cmd_status_t), M_IPSBUF, M_INTWAIT | M_ZERO);
+	status = malloc(sizeof(*status), M_IPSBUF, M_INTWAIT | M_ZERO);
 	if (ips_get_free_cmd(sc, ips_read_nvram, status, IPS_NOWAIT_FLAG)) {
 		free(status, M_IPSBUF);
 		device_printf(sc->dev, "ERROR: unable to get a command! "
 		    "can't update nvram\n");
 		return 1;
 	}
-	if (COMMAND_ERROR(status)) {
+	if (COMMAND_ERROR(&status->status)) {
 		free(status, M_IPSBUF);
 		device_printf(sc->dev, "ERROR: nvram update command failed!\n");
 		return 1;
@@ -669,7 +686,7 @@
 ips_send_config_sync_cmd(ips_command_t *command)
 {
 	ips_softc_t *sc = command->sc;
-	ips_cmd_status_t *status = command->arg;
+	ips_cmd_status_return_t *status = command->arg;
 	ips_generic_cmd *command_struct;
 
 	PRINTF(10, "ips test: got a command, building flush command\n");
@@ -681,8 +698,7 @@
 	bus_dmamap_sync(sc->command_dmatag, command->command_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 	sc->ips_issue_cmd(command);
-	if (status->value != IPS_ERROR_STATUS)
-		tsleep(status, 0, "ipssyn", 0);
+	ips_synchronous_wait(status, "ipssyn", 0);
 	ips_insert_free_cmd(sc, command);
 	return 0;
 }
@@ -691,7 +707,7 @@
 ips_send_error_table_cmd(ips_command_t *command)
 {
 	ips_softc_t *sc = command->sc;
-	ips_cmd_status_t *status = command->arg;
+	ips_cmd_status_return_t *status = command->arg;
 	ips_generic_cmd *command_struct;
 
 	PRINTF(10, "ips test: got a command, building errortable command\n");
@@ -703,8 +719,7 @@
 	bus_dmamap_sync(sc->command_dmatag, command->command_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 	sc->ips_issue_cmd(command);
-	if (status->value != IPS_ERROR_STATUS)
-		tsleep(status, 0, "ipsetc", 0);
+	ips_synchronous_wait(status, "ipsetc", 0);
 	ips_insert_free_cmd(sc, command);
 	return 0;
 }
@@ -712,9 +727,9 @@
 int
 ips_clear_adapter(ips_softc_t *sc)
 {
-	ips_cmd_status_t *status;
+	ips_cmd_status_return_t *status;
 
-	status = malloc(sizeof(ips_cmd_status_t), M_IPSBUF, M_INTWAIT | M_ZERO);
+	status = malloc(sizeof(*status), M_IPSBUF, M_INTWAIT | M_ZERO);
 	device_printf(sc->dev, "syncing config\n");
 	if (ips_get_free_cmd(sc, ips_send_config_sync_cmd, status,
 	    IPS_NOWAIT_FLAG)) {
@@ -723,12 +738,13 @@
 		    "can't sync cache!\n");
 		return 1;
 	}
-	if (COMMAND_ERROR(status)) {
+	if (COMMAND_ERROR(&status->status)) {
 		free(status, M_IPSBUF);
 		device_printf(sc->dev, "ERROR: cache sync command failed!\n");
 		return 1;
 	}
 	device_printf(sc->dev, "clearing error table\n");
+	bzero(status, sizeof(*status));
 	if (ips_get_free_cmd(sc, ips_send_error_table_cmd, status,
 	    IPS_NOWAIT_FLAG)) {
 		free(status, M_IPSBUF);
@@ -736,7 +752,7 @@
 		    "can't sync cache!\n");
 		return 1;
 	}
-	if (COMMAND_ERROR(status)) {
+	if (COMMAND_ERROR(&status->status)) {
 		device_printf(sc->dev, "ERROR: etable command failed!\n");
 		free(status, M_IPSBUF);
 		return 1;
Index: dev/raid/ips/ips_ioctl.c
===================================================================
RCS file: /cvs/src/sys/dev/raid/ips/ips_ioctl.c,v
retrieving revision 1.4
diff -u -r1.4 ips_ioctl.c
--- dev/raid/ips/ips_ioctl.c	6 Sep 2004 16:39:47 -0000	1.4
+++ dev/raid/ips/ips_ioctl.c	16 Jul 2005 17:05:42 -0000
@@ -25,11 +25,12 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/dev/ips/ips_ioctl.c,v 1.5 2004/05/30 04:01:29 scottl Exp $
- * $DragonFly: src/sys/dev/raid/ips/ips_ioctl.c,v 1.4 2004/09/06 16:39:47 joerg Exp $
+ * $DragonFly$
  */
 
 #include <dev/raid/ips/ips.h>
 #include <dev/raid/ips/ips_ioctl.h>
+#include <sys/thread2.h>
 
 static void
 ips_ioctl_finish(ips_command_t *command)
@@ -128,8 +129,10 @@
 		error = ENOMEM;
 		goto exit;
 	}
+	crit_enter();
 	while (ioctl_cmd->status.value == 0xffffffff)
 		tsleep(ioctl_cmd, 0, "ips", hz / 10);
+	crit_exit();
 	if (COMMAND_ERROR(&ioctl_cmd->status))
 		error = EIO;
 	else
Index: i386/i386/vm86.c
===================================================================
RCS file: /cvs/src/sys/i386/i386/vm86.c,v
retrieving revision 1.13
diff -u -r1.13 vm86.c
--- i386/i386/vm86.c	31 Jan 2005 04:35:17 -0000	1.13
+++ i386/i386/vm86.c	18 Jul 2005 14:56:01 -0000
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  *
  * $FreeBSD: src/sys/i386/i386/vm86.c,v 1.31.2.2 2001/10/05 06:18:55 peter Exp $
- * $DragonFly: src/sys/i386/i386/vm86.c,v 1.13 2005/01/31 04:35:17 dillon Exp $
+ * $DragonFly$
  */
 
 #include <sys/param.h>
@@ -623,7 +623,7 @@
 		return (EINVAL);
 
 	crit_enter();
-	ASSERT_MP_LOCK_HELD();
+	ASSERT_MP_LOCK_HELD(curthread);
 
 	vm86_setup_timer_fault();
 	vmf->vmf_trapno = intnum;
@@ -662,7 +662,7 @@
 	int i, entry, retval;
 
 	crit_enter();
-	ASSERT_MP_LOCK_HELD();
+	ASSERT_MP_LOCK_HELD(curthread);
 
 	for (i = 0; i < vmc->npages; i++) {
 		page = vtophys(vmc->pmap[i].kva & PG_FRAME);
Index: i386/include/lock.h
===================================================================
RCS file: /cvs/src/sys/i386/include/lock.h,v
retrieving revision 1.10
diff -u -r1.10 lock.h
--- i386/include/lock.h	20 Nov 2004 20:50:36 -0000	1.10
+++ i386/include/lock.h	18 Jul 2005 14:56:01 -0000
@@ -32,7 +32,7 @@
  * SUCH DAMAGE.
  * 
  * $FreeBSD: src/sys/i386/include/lock.h,v 1.11.2.2 2000/09/30 02:49:34 ps Exp $
- * $DragonFly: src/sys/i386/include/lock.h,v 1.10 2004/11/20 20:50:36 dillon Exp $
+ * $DragonFly$
  */
 
 #ifndef _MACHINE_LOCK_H_
@@ -196,7 +196,7 @@
 extern u_int	mp_lock;
 
 #define MP_LOCK_HELD()   (mp_lock == mycpu->gd_cpuid)
-#define ASSERT_MP_LOCK_HELD()   KKASSERT(MP_LOCK_HELD())
+#define ASSERT_MP_LOCK_HELD(td)   KASSERT(MP_LOCK_HELD(), ("MP_LOCK_HELD(): not held thread %p", td))
 
 static __inline void
 cpu_rel_mplock(void)
@@ -209,7 +209,7 @@
 #define get_mplock()
 #define try_mplock()	1
 #define rel_mplock()
-#define ASSERT_MP_LOCK_HELD()
+#define ASSERT_MP_LOCK_HELD(td)
 
 #endif	/* SMP */
 #endif  /* _KERNEL || _UTHREAD */
Index: kern/lwkt_thread.c
===================================================================
RCS file: /cvs/src/sys/kern/lwkt_thread.c,v
retrieving revision 1.76
diff -u -r1.76 lwkt_thread.c
--- kern/lwkt_thread.c	7 Jul 2005 20:28:26 -0000	1.76
+++ kern/lwkt_thread.c	18 Jul 2005 15:20:44 -0000
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/kern/lwkt_thread.c,v 1.76 2005/07/07 20:28:26 hmp Exp $
+ * $DragonFly$
  */
 
 /*
@@ -576,26 +576,44 @@
 	     * or if the target is holding tokens and we could not 
 	     * gain ownership of the tokens, continue looking for a
 	     * thread to schedule and spin instead of HLT if we can't.
+	     *
+	     * NOTE: the mpheld variable invalid after this conditional, it
+	     * can change due to both cpu_try_mplock() returning success
+	     * AND interactions in lwkt_chktokens() due to the fact that
+	     * we are trying to check the mpcount of a thread other then
+	     * the current thread.  Because of this, if the current thread
+	     * is not holding td_mpcount, an IPI indirectly run via
+	     * lwkt_chktokens() can obtain and release the MP lock and
+	     * cause the core MP lock to be released. 
 	     */
 	    if ((ntd->td_mpcount && mpheld == 0 && !cpu_try_mplock()) ||
 		(ntd->td_toks && lwkt_chktokens(ntd) == 0)
 	    ) {
 		u_int32_t rqmask = gd->gd_runqmask;
+
+		mpheld = MP_LOCK_HELD();
+		ntd = NULL;
 		while (rqmask) {
 		    TAILQ_FOREACH(ntd, &gd->gd_tdrunq[nq], td_threadq) {
 			if (ntd->td_mpcount && !mpheld && !cpu_try_mplock()) {
-				/* spinning due to MP lock being held */
+			    /* spinning due to MP lock being held */
 #ifdef	INVARIANTS
-				++mplock_contention_count;
+			    ++mplock_contention_count;
 #endif
+			    /* mplock still not held, 'mpheld' still valid */
 			    continue;
 			}
-			mpheld = MP_LOCK_HELD();
+
+			/*
+			 * mpheld state invalid after chktokens call but only
+			 * needed for the loop.
+			 */
 			if (ntd->td_toks && !lwkt_chktokens(ntd)) {
-				/* spinning due to token contention */
+			    /* spinning due to token contention */
 #ifdef	INVARIANTS
-				++token_contention_count;
+			    ++token_contention_count;
 #endif
+			    mpheld = MP_LOCK_HELD();
 			    continue;
 			}
 			break;
@@ -608,6 +626,7 @@
 		if (ntd == NULL) {
 		    ntd = &gd->gd_idlethread;
 		    ntd->td_flags |= TDF_IDLE_NOHLT;
+		    KASSERT(ntd->td_mpcount == 0, ("Idlex thread %p was holding the BGL!", ntd));
 		} else {
 		    TAILQ_REMOVE(&gd->gd_tdrunq[nq], ntd, td_threadq);
 		    TAILQ_INSERT_TAIL(&gd->gd_tdrunq[nq], ntd, td_threadq);
@@ -628,6 +647,9 @@
 	    ntd = &gd->gd_idlethread;
 	    if (gd->gd_reqflags & RQF_IDLECHECK_MASK)
 		ntd->td_flags |= TDF_IDLE_NOHLT;
+#ifdef SMP
+	    KASSERT(ntd->td_mpcount == 0, ("Idley thread %p was holding the BGL!", ntd));
+#endif
 	}
     }
     KASSERT(ntd->td_pri >= TDPRI_CRIT,
@@ -643,7 +665,7 @@
 	if (MP_LOCK_HELD())
 	    cpu_rel_mplock();
     } else {
-	ASSERT_MP_LOCK_HELD();
+	ASSERT_MP_LOCK_HELD(ntd);
     }
 #endif
     if (td != ntd) {
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.35
diff -u -r1.35 uipc_socket.c
--- kern/uipc_socket.c	15 Jul 2005 17:54:47 -0000	1.35
+++ kern/uipc_socket.c	17 Jul 2005 07:28:57 -0000
@@ -792,18 +792,18 @@
 	struct mbuf **controlp;
 	int *flagsp;
 {
-	struct mbuf *m, **mp;
+	struct mbuf *m, *n, **mp;
+	struct mbuf *free_chain = NULL;
 	int flags, len, error, offset;
 	struct protosw *pr = so->so_proto;
-	struct mbuf *nextrecord;
 	int moff, type = 0;
 	int orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa)
-		*psa = 0;
+		*psa = NULL;
 	if (controlp)
-		*controlp = 0;
+		*controlp = NULL;
 	if (flagsp)
 		flags = *flagsp &~ MSG_EOR;
 	else
@@ -826,15 +826,15 @@
 		return (error);
 	}
 	if (mp)
-		*mp = (struct mbuf *)0;
+		*mp = NULL;
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		so_pru_rcvd(so, 0);
 
 restart:
+	crit_enter();
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
-		return (error);
-	crit_enter();
+		goto done;
 
 	m = so->so_rcv.sb_mb;
 	/*
@@ -848,12 +848,12 @@
 	 * we have to do the receive in sections, and thus risk returning
 	 * a short count if a timeout or signal occurs after we start.
 	 */
-	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
+	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 	    so->so_rcv.sb_cc < uio->uio_resid) &&
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
-		KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
+		KASSERT(m != NULL || !so->so_rcv.sb_cc, ("receive 1"));
 		if (so->so_error) {
 			if (m)
 				goto dontblock;
@@ -868,11 +868,12 @@
 			else
 				goto release;
 		}
-		for (; m; m = m->m_next)
+		for (; m; m = m->m_next) {
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
+		}
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (pr->pr_flags & PR_CONNREQUIRED)) {
 			error = ENOTCONN;
@@ -886,46 +887,53 @@
 		}
 		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
-		crit_exit();
 		if (error)
-			return (error);
+			goto done;
+		crit_exit();
 		goto restart;
 	}
 dontblock:
 	if (uio->uio_td && uio->uio_td->td_proc)
 		uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
-	nextrecord = m->m_nextpkt;
+
+	/*
+	 * note: m should be == sb_mb here.  Cache the next record while
+	 * cleaning up.  Note that calling m_free*() will break out critical
+	 * section.
+	 */
+	KKASSERT(m == so->so_rcv.sb_mb);
+
+	/*
+	 * Skip any address mbufs prepending the record.
+	 */
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
 		orig_resid = 0;
 		if (psa)
 			*psa = dup_sockaddr(mtod(m, struct sockaddr *));
-		if (flags & MSG_PEEK) {
+		if (flags & MSG_PEEK)
 			m = m->m_next;
-		} else {
-			sbfree(&so->so_rcv, m);
-			m->m_nextpkt = NULL;
-			so->so_rcv.sb_mb = m_free(m);
-			m = so->so_rcv.sb_mb;
-		}
+		else
+			m = sbunlinkmbuf(&so->so_rcv, m, &free_chain);
 	}
+
+	/*
+	 * Skip any control mbufs prepending the record.
+	 */
 #ifdef SCTP
 	if (pr->pr_flags & PR_ADDR_OPT) {
 		/*
 		 * For SCTP we may be getting a
 		 * whole message OR a partial delivery.
 		 */
-		if (m->m_type == MT_SONAME) {
+		if (m && m->m_type == MT_SONAME) {
 			orig_resid = 0;
 			if (psa)
 				*psa = dup_sockaddr(mtod(m, struct sockaddr *));
-			if (flags & MSG_PEEK) {
+			if (flags & MSG_PEEK)
 				m = m->m_next;
-			} else {
-				sbfree(&so->so_rcv, m);
-				so->so_rcv.sb_mb = m_free(m);
-				m = so->so_rcv.sb_mb;
-			}
+			else
+				m = sbunlinkmbuf(&so->so_rcv, m, &free_chain);
 		}
 	}
 #endif /* SCTP */
@@ -933,36 +941,38 @@
 		if (flags & MSG_PEEK) {
 			if (controlp)
 				*controlp = m_copy(m, 0, m->m_len);
-			m = m->m_next;
+			m = m->m_next;	/* XXX race */
 		} else {
-			sbfree(&so->so_rcv, m);
-			m->m_nextpkt = NULL;
 			if (controlp) {
+				n = sbunlinkmbuf(&so->so_rcv, m, NULL);
 				if (pr->pr_domain->dom_externalize &&
 				    mtod(m, struct cmsghdr *)->cmsg_type ==
 				    SCM_RIGHTS)
 				   error = (*pr->pr_domain->dom_externalize)(m);
 				*controlp = m;
-				so->so_rcv.sb_mb = m->m_next;
-				m->m_next = NULL;
-				m = so->so_rcv.sb_mb;
+				m = n;
 			} else {
-				so->so_rcv.sb_mb = m_free(m);
-				m = so->so_rcv.sb_mb;
+				m = sbunlinkmbuf(&so->so_rcv, m, &free_chain);
 			}
 		}
-		if (controlp) {
+		if (controlp && *controlp) {
 			orig_resid = 0;
 			controlp = &(*controlp)->m_next;
 		}
 	}
+
+	/*
+	 * flag OOB data.
+	 */
 	if (m) {
-		if ((flags & MSG_PEEK) == 0)
-			m->m_nextpkt = nextrecord;
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	}
+
+	/*
+	 * Copy to the UIO or mbuf return chain (*mp).
+	 */
 	moff = 0;
 	offset = 0;
 	while (m && uio->uio_resid > 0 && error == 0) {
@@ -988,14 +998,19 @@
 		 * we must note any additions to the sockbuf when we
 		 * block interrupts again.
 		 */
-		if (mp == 0) {
+		if (mp == NULL) {
 			crit_exit();
 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
 			crit_enter();
 			if (error)
 				goto release;
-		} else
+		} else {
 			uio->uio_resid -= len;
+		}
+
+		/*
+		 * Eat the entire mbuf or just a piece of it
+		 */
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
@@ -1007,26 +1022,19 @@
 				m = m->m_next;
 				moff = 0;
 			} else {
-				nextrecord = m->m_nextpkt;
-				m->m_nextpkt = NULL;
-				sbfree(&so->so_rcv, m);
 				if (mp) {
+					n = sbunlinkmbuf(&so->so_rcv, m, NULL);
 					*mp = m;
 					mp = &m->m_next;
-					so->so_rcv.sb_mb = m = m->m_next;
-					*mp = (struct mbuf *)0;
+					m = n;
 				} else {
-					so->so_rcv.sb_mb = m = m_free(m);
+					m = sbunlinkmbuf(&so->so_rcv, m, &free_chain);
 				}
-				if (m)
-					m->m_nextpkt = nextrecord;
-				else
-					so->so_rcv.sb_lastmbuf = NULL;
 			}
 		} else {
-			if (flags & MSG_PEEK)
+			if (flags & MSG_PEEK) {
 				moff += len;
-			else {
+			} else {
 				if (mp)
 					*mp = m_copym(m, 0, len, MB_WAIT);
 				m->m_data += len;
@@ -1056,8 +1064,9 @@
 		 * with a short count but without error.
 		 * Keep sockbuf locked against other readers.
 		 */
-		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
-		    !sosendallatonce(so) && !nextrecord) {
+		while (flags & MSG_WAITALL && m == NULL && 
+		    uio->uio_resid > 0 && !sosendallatonce(so) && 
+		    so->so_rcv.sb_mb == NULL) {
 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
 				break;
 			/*
@@ -1071,31 +1080,27 @@
 			error = sbwait(&so->so_rcv);
 			if (error) {
 				sbunlock(&so->so_rcv);
-				return (0);
+				error = 0;
+				goto done;
 			}
 			m = so->so_rcv.sb_mb;
-			if (m)
-				nextrecord = m->m_nextpkt;
 		}
 	}
 
+	/*
+	 * If an atomic read was requested but unread data still remains
+	 * in the record, set MSG_TRUNC.
+	 */
 	if (m && pr->pr_flags & PR_ATOMIC)
 		flags |= MSG_TRUNC;
-	if (!(flags & MSG_PEEK)) {
-		if (m == NULL) {
-			so->so_rcv.sb_mb = nextrecord;
-			so->so_rcv.sb_lastmbuf = NULL;
-		} else {
-			if (pr->pr_flags & PR_ATOMIC)
-				sbdroprecord(&so->so_rcv);
-			else if (m->m_nextpkt == NULL) {
-				KASSERT(so->so_rcv.sb_mb == m,
-				    ("sb_mb %p != m %p", so->so_rcv.sb_mb, m));
-				so->so_rcv.sb_lastrecord = m;
-			}
-		}
-		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+
+	/*
+	 * Cleanup.  If an atomic read was requested drop any unread data.
+	 */
+	if ((flags & MSG_PEEK) == 0) {
+		if (m && (pr->pr_flags & PR_ATOMIC))
+			sbdroprecord(&so->so_rcv);
+		if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb)
 			so_pru_rcvd(so, flags);
 	}
 
@@ -1110,7 +1115,10 @@
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
+done:
 	crit_exit();
+	if (free_chain)
+		m_freem(free_chain);
 	return (error);
 }
 
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.21
diff -u -r1.21 uipc_socket2.c
--- kern/uipc_socket2.c	7 Jun 2005 19:08:55 -0000	1.21
+++ kern/uipc_socket2.c	17 Jul 2005 07:45:20 -0000
@@ -479,22 +479,21 @@
 {
 	struct mbuf *n;
 
-	if (m == NULL)
-		return;
-	n = sb->sb_mb;
-	if (n) {
-		while (n->m_nextpkt)
-			n = n->m_nextpkt;
-		do {
-			if (n->m_flags & M_EOR) {
-				sbappendrecord(sb, m); /* XXXXXX!!!! */
-				return;
-			}
-		} while (n->m_next && (n = n->m_next));
+	if (m) {
+		n = sb->sb_mb;
+		if (n) {
+			while (n->m_nextpkt)
+				n = n->m_nextpkt;
+			do {
+				if (n->m_flags & M_EOR) {
+					/* XXXXXX!!!! */
+					sbappendrecord(sb, m);
+					return;
+				}
+			} while (n->m_next && (n = n->m_next));
+		}
+		sbcompress(sb, m, n);
 	}
-	sbcompress(sb, m, n);
-	if (n == NULL)
-		sb->sb_lastrecord = sb->sb_mb;
 }
 
 /*
@@ -511,29 +510,53 @@
 }
 
 #ifdef SOCKBUF_DEBUG
+
 void
-sbcheck(sb)
-	struct sockbuf *sb;
+_sbcheck(struct sockbuf *sb)
 {
 	struct mbuf *m;
-	struct mbuf *n = 0;
+	struct mbuf *n = NULL;
 	u_long len = 0, mbcnt = 0;
 
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
+	    if (n == NULL && sb->sb_lastrecord != m) {
+		    printf("sockbuf %p mismatched lastrecord %p vs %p\n", sb, sb->sb_lastrecord, m);
+		    panic("sbcheck1");
+		
+	    }
 	    for (; m; m = m->m_next) {
 		len += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
+		if (n == NULL && m->m_next == NULL) {
+			if (sb->sb_lastmbuf != m) {
+				printf("sockbuf %p mismatched lastmbuf %p vs %p\n", sb, sb->sb_lastmbuf, m);
+				panic("sbcheck2");
+			}
+		}
+	    }
+	}
+	if (sb->sb_mb == NULL) {
+	    if (sb->sb_lastrecord != NULL) {
+		printf("sockbuf %p is empty, lastrecord not NULL: %p\n",
+			sb, sb->sb_lastrecord);
+		panic("sbcheck3");
+	    }
+	    if (sb->sb_lastmbuf != NULL) {
+		printf("sockbuf %p is empty, lastmbuf not NULL: %p\n",
+			sb, sb->sb_lastmbuf);
+		panic("sbcheck4");
 	    }
 	}
 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
-		printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
-		    mbcnt, sb->sb_mbcnt);
-		panic("sbcheck");
+		printf("sockbuf %p cc %ld != %ld || mbcnt %ld != %ld\n",
+		    sb, len, sb->sb_cc, mbcnt, sb->sb_mbcnt);
+		panic("sbcheck5");
 	}
 }
+
 #endif
 
 /*
@@ -548,6 +571,8 @@
 	if (m0 == NULL)
 		return;
 
+	sbcheck(sb);
+
 	/*
 	 * Break the first mbuf off from the rest of the mbuf chain.
 	 */
@@ -557,13 +582,15 @@
 
 	/*
 	 * Insert the first mbuf of the m0 mbuf chain as the last record of
-	 * the sockbuf.  Note this permits zero length records!
+	 * the sockbuf.  Note this permits zero length records!  Keep the
+	 * sockbuf state consistent.
 	 */
 	if (sb->sb_mb == NULL)
 		sb->sb_mb = firstmbuf;
 	else
 		sb->sb_lastrecord->m_nextpkt = firstmbuf;
 	sb->sb_lastrecord = firstmbuf;	/* update hint for new last record */
+	sb->sb_lastmbuf = firstmbuf;	/* update hint for new last mbuf */
 
 	if ((firstmbuf->m_flags & M_EOR) && (secondmbuf != NULL)) {
 		/* propagate the EOR flag */
@@ -581,6 +608,7 @@
 	sbcompress(sb, secondmbuf, firstmbuf);
 }
 
+#if 0
 /*
  * As above except that OOB data is inserted at the beginning of the sockbuf,
  * but after any other OOB data.
@@ -591,7 +619,7 @@
 	struct mbuf *m;
 	struct mbuf **mp;
 
-	if (m0 == 0)
+	if (m0 == NULL)
 		return;
 	for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
 	    m = *mp;
@@ -619,13 +647,14 @@
 		sb->sb_lastrecord = m0;
 
 	m = m0->m_next;
-	m0->m_next = 0;
+	m0->m_next = NULL;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	sbcompress(sb, m, m0);
 }
+#endif
 
 /*
  * Append address and data, and optionally, control (ancillary) data
@@ -644,6 +673,7 @@
 
 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 		panic("sbappendaddr");
+	sbcheck(sb);
 
 	if (m0)
 		space += m0->m_pkthdr.len;
@@ -657,8 +687,9 @@
 	if (asa->sa_len > MLEN)
 		return (0);
 	MGET(m, MB_DONTWAIT, MT_SONAME);
-	if (m == 0)
+	if (m == NULL)
 		return (0);
+	KKASSERT(m->m_nextpkt == NULL);
 	m->m_len = asa->sa_len;
 	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
 	if (n)
@@ -674,6 +705,9 @@
 	else
 		sb->sb_lastrecord->m_nextpkt = m;
 	sb->sb_lastrecord = m;
+	while (m->m_next)
+		m = m->m_next;
+	sb->sb_lastmbuf = m;
 
 	return (1);
 }
@@ -689,6 +723,8 @@
 	u_int length, cmbcnt, m0mbcnt;
 
 	KASSERT(control != NULL, ("sbappendcontrol"));
+	KKASSERT(control->m_nextpkt == NULL);
+	sbcheck(sb);
 
 	length = m_countm(control, &n, &cmbcnt) + m_countm(m0, NULL, &m0mbcnt);
 	if (length > sbspace(sb))
@@ -701,6 +737,7 @@
 	else
 		sb->sb_lastrecord->m_nextpkt = control;
 	sb->sb_lastrecord = control;
+	sb->sb_lastmbuf = m0;
 
 	sb->sb_cc += length;
 	sb->sb_mbcnt += cmbcnt + m0mbcnt;
@@ -717,7 +754,9 @@
 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *tailm)
 {
 	int eor = 0;
+	struct mbuf *free_chain = NULL;
 
+	sbcheck(sb);
 	while (m) {
 		struct mbuf *o;
 
@@ -726,12 +765,18 @@
 		 * Disregard empty mbufs as long as we don't encounter
 		 * an end-of-record or there is a trailing mbuf of
 		 * the same type to propagate the EOR flag to.
+		 *
+		 * Defer the m_free() call because it can block and break
+		 * the atomicy of the sockbuf.
 		 */
 		if (m->m_len == 0 &&
 		    (eor == 0 ||
 		     (((o = m->m_next) || (o = tailm)) &&
 		      o->m_type == m->m_type))) {
-			m = m_free(m);
+			o = m->m_next;
+			m->m_next = free_chain;
+			free_chain = m;
+			m = o;
 			continue;
 		}
 
@@ -745,7 +790,10 @@
 			      (unsigned)m->m_len);
 			tailm->m_len += m->m_len;
 			sb->sb_cc += m->m_len;		/* update sb counter */
-			m = m_free(m);
+			o = m->m_next;
+			m->m_next = free_chain;
+			free_chain = m;
+			m = o;
 			continue;
 		}
 
@@ -753,7 +801,8 @@
 		if (tailm == NULL) {
 			KASSERT(sb->sb_mb == NULL,
 				("sbcompress: sb_mb not NULL"));
-			sb->sb_mb = m;		/* put at front of sockbuf */
+			sb->sb_mb = m;		/* only mbuf in sockbuf */
+			sb->sb_lastrecord = m;	/* new last record */
 		} else {
 			tailm->m_next = m;	/* tack m on following tailm */
 		}
@@ -770,12 +819,23 @@
 		tailm->m_flags &= ~M_EOR;
 	}
 
+	/*
+	 * Propogate EOR to the last mbuf
+	 */
 	if (eor) {
 		if (tailm)
-			tailm->m_flags |= eor;	/* propagate EOR to last mbuf */
+			tailm->m_flags |= eor;
 		else
 			printf("semi-panic: sbcompress");
 	}
+
+	/*
+	 * Clean up any defered frees.
+	 */
+	while (free_chain)
+		free_chain = m_free(free_chain);
+
+	sbcheck(sb);
 }
 
 /*
@@ -812,19 +872,16 @@
 	int len;
 {
 	struct mbuf *m;
-	struct mbuf *nextpkt;
+	struct mbuf *free_chain = NULL;
+
+	sbcheck(sb);
+	crit_enter();
 
+	/*
+	 * Remove mbufs from multiple records until the count is exhausted.
+	 */
 	m = sb->sb_mb;
-	nextpkt = (m != NULL) ? m->m_nextpkt : NULL;
-	while (len > 0) {
-		if (m == NULL) {
-			if (nextpkt == NULL)
-				panic("sbdrop");
-			m = nextpkt;
-			nextpkt = m->m_nextpkt;
-			m->m_nextpkt = NULL;
-			continue;
-		}
+	while (m && len > 0) {
 		if (m->m_len > len) {
 			m->m_len -= len;
 			m->m_data += len;
@@ -832,41 +889,94 @@
 			break;
 		}
 		len -= m->m_len;
-		sbfree(sb, m);
-		m = m_free(m);
+		m = sbunlinkmbuf(sb, m, &free_chain);
+		if (m == NULL && len)
+			m = sb->sb_mb;
 	}
+
+	/*
+	 * Remove any trailing 0-length mbufs in the current record.  If
+	 * the last record for which data was removed is now empty, m will be
+	 * NULL.
+	 */
 	while (m && m->m_len == 0) {
-		sbfree(sb, m);
-		m = m_free(m);
-	}
-	if (m != NULL) {
-		sb->sb_mb = m;
-		m->m_nextpkt = nextpkt;
-	} else {
-		sb->sb_mb = nextpkt;
-		sb->sb_lastmbuf = NULL;		/* invalidate hint */
+		m = sbunlinkmbuf(sb, m, &free_chain);
 	}
+	crit_exit();
+	if (free_chain)
+		m_freem(free_chain);
+	sbcheck(sb);
 }
 
 /*
- * Drop a record off the front of a sockbuf
- * and move the next record to the front.
+ * Drop a record off the front of a sockbuf and move the next record
+ * to the front.
+ *
+ * Must be called while holding a critical section.
  */
 void
 sbdroprecord(sb)
 	struct sockbuf *sb;
 {
 	struct mbuf *m;
+	struct mbuf *n;
 
+	sbcheck(sb);
 	m = sb->sb_mb;
 	if (m) {
-		sb->sb_mb = m->m_nextpkt;
+		if ((sb->sb_mb = m->m_nextpkt) == NULL) {
+			sb->sb_lastrecord = NULL;
+			sb->sb_lastmbuf = NULL;
+		}
 		m->m_nextpkt = NULL;
-		do {
-			sbfree(sb, m);
-			m = m_free(m);
-		} while (m);
+		for (n = m; n; n = n->m_next)
+			sbfree(sb, n);
+		m_freem(m);
+		sbcheck(sb);
+	}
+}
+
+/*
+ * Drop the first mbuf off the sockbuf and move the next mbuf to the front.
+ * Currently only the head mbuf of the sockbuf may be dropped this way.
+ *
+ * The next mbuf in the same record as the mbuf being removed is returned
+ * or NULL if the record is exhausted.  Note that other records may remain
+ * in the sockbuf when NULL is returned.
+ *
+ * Must be called while holding a critical section.
+ */
+struct mbuf *
+sbunlinkmbuf(struct sockbuf *sb, struct mbuf *m, struct mbuf **free_chain)
+{
+	struct mbuf *n;
+
+	KKASSERT(sb->sb_mb == m);
+	sbfree(sb, m);
+	n = m->m_next;
+	if (n) {
+		sb->sb_mb = n;
+		if (sb->sb_lastrecord == m)
+			sb->sb_lastrecord = n;
+		KKASSERT(sb->sb_lastmbuf != m);
+		n->m_nextpkt = m->m_nextpkt;
+	} else {
+		sb->sb_mb = m->m_nextpkt;
+		if (sb->sb_lastrecord == m) {
+			KKASSERT(sb->sb_mb == NULL);
+			sb->sb_lastrecord = NULL;
+		}
+		if (sb->sb_mb == NULL)
+			sb->sb_lastmbuf = NULL;
+	}
+	m->m_nextpkt = NULL;
+	if (free_chain) {
+		m->m_next = *free_chain;
+		*free_chain = m;
+	} else {
+		m->m_next = NULL;
 	}
+	return(n);
 }
 
 /*
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.19
diff -u -r1.19 socketvar.h
--- sys/socketvar.h	13 Jul 2005 01:38:53 -0000	1.19
+++ sys/socketvar.h	17 Jul 2005 07:21:24 -0000
@@ -180,6 +180,12 @@
  * Macros for sockets and socket buffering.
  */
 
+#ifdef SOCKBUF_DEBUG
+#define sbcheck(sb)	_sbcheck(sb)
+#else
+#define sbcheck(sb)
+#endif
+
 /*
  * Do we need to notify the other side when I/O is possible?
  */
@@ -337,12 +343,14 @@
 	    struct mbuf *control);
 void	sbappendrecord (struct sockbuf *sb, struct mbuf *m0);
 void	sbappendstream (struct sockbuf *sb, struct mbuf *m);
-void	sbcheck (struct sockbuf *sb);
+void	_sbcheck (struct sockbuf *sb);
 void	sbcompress (struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
 	sbcreatecontrol (caddr_t p, int size, int type, int level);
 void	sbdrop (struct sockbuf *sb, int len);
 void	sbdroprecord (struct sockbuf *sb);
+struct mbuf *
+	sbunlinkmbuf (struct sockbuf *, struct mbuf *, struct mbuf **);
 void	sbflush (struct sockbuf *sb);
 void	sbinsertoob (struct sockbuf *sb, struct mbuf *m0);
 void	sbrelease (struct sockbuf *sb, struct socket *so);
Index: vfs/ufs/ufs_inode.c
===================================================================
RCS file: /cvs/src/sys/vfs/ufs/ufs_inode.c,v
retrieving revision 1.12
diff -u -r1.12 ufs_inode.c
--- vfs/ufs/ufs_inode.c	14 Dec 2004 23:59:47 -0000	1.12
+++ vfs/ufs/ufs_inode.c	17 Jul 2005 22:40:18 -0000
@@ -44,6 +44,7 @@
 #include "opt_ufs.h"
 
 #include <sys/param.h>
+#include <sys/systm.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/malloc.h>
@@ -125,6 +126,11 @@
 		ip->i_flag |= IN_MODIFIED;
 		UFS_UPDATE(vp, 0);
 	}
+	if (ip && (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE))) {
+		printf("WARNING: INODE %ld flags %08x: modified inode being released!\n", (long)ip->i_number, (int)ip->i_flag);
+		ip->i_flag |= IN_MODIFIED;
+		UFS_UPDATE(vp, 0);
+	}
 	/*
 	 * Remove the inode from its hash chain and purge namecache
 	 * data associated with the vnode.



[Date Prev][Date Next]  [Thread Prev][Thread Next]  [Date Index][Thread Index]