From 0f4bd46ec252887f44f1f065b41867cac8f70dfb Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 22 Dec 2009 03:15:43 +0000
Subject: kmsg_dump: Dump on crash_kexec as well

crash_kexec gets called before kmsg_dump(KMSG_DUMP_OOPS) if
panic_on_oops is set, so the kernel log buffer is not stored
for this case.

This patch adds a KMSG_DUMP_KEXEC dump type which gets called
when crash_kexec() is invoked. To avoid getting double dumps,
the old KMSG_DUMP_PANIC is moved below crash_kexec(). The
mtdoops driver is modified to handle KMSG_DUMP_KEXEC in the
same way as a panic.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/kmsg_dump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index e32aa268efac..24b44145a886 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -17,6 +17,7 @@
 enum kmsg_dump_reason {
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_PANIC,
+	KMSG_DUMP_KEXEC,
 };
 
 /**
-- 
cgit v1.2.3


From 59b015133cd0034f5904a76969d73476380aac46 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 5 Jan 2010 17:56:02 -0800
Subject: Input: serio - fix potential deadlock when unbinding drivers

sysfs_remove_group() waits for sysfs attributes to be removed, therefore
we do not need to worry about driver-specific attributes being accessed
after driver has been detached from the device. In fact, attempts to take
serio->drv_mutex in attribute methods may lead to the following deadlock:

                                          sysfs_read_file()
                                            fill_read_buffer()
                                              sysfs_get_active_two()
                                                psmouse_attr_show_helper()
                                                  serio_pin_driver()
serio_disconnect_driver()
  mutex_lock(&serio->drv_mutex);
                                <-------->        mutex_lock(&serio_drv_mutex);
    psmouse_disconnect()
      sysfs_remove_group(... psmouse_attr_group);
        ....
        sysfs_deactivate();
          wait_for_completion();

Fix this by removing calls to serio_[un]pin_driver() and functions themselves
and using driver-private mutexes to serialize access to attribute's set()
methods that may change device state.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c     | 59 ++++++++++++++++----------------------
 drivers/input/mouse/psmouse-base.c | 32 ++-------------------
 include/linux/serio.h              | 19 ------------
 3 files changed, 28 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 1f5e2ce327d6..1cf32a7814d0 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -225,8 +225,10 @@ struct atkbd {
 
 	struct delayed_work event_work;
 	unsigned long event_jiffies;
-	struct mutex event_mutex;
 	unsigned long event_mask;
+
+	/* Serializes reconnect(), attr->set() and event work */
+	struct mutex mutex;
 };
 
 /*
@@ -577,7 +579,7 @@ static void atkbd_event_work(struct work_struct *work)
 {
 	struct atkbd *atkbd = container_of(work, struct atkbd, event_work.work);
 
-	mutex_lock(&atkbd->event_mutex);
+	mutex_lock(&atkbd->mutex);
 
 	if (!atkbd->enabled) {
 		/*
@@ -596,7 +598,7 @@ static void atkbd_event_work(struct work_struct *work)
 			atkbd_set_repeat_rate(atkbd);
 	}
 
-	mutex_unlock(&atkbd->event_mutex);
+	mutex_unlock(&atkbd->mutex);
 }
 
 /*
@@ -612,7 +614,7 @@ static void atkbd_schedule_event_work(struct atkbd *atkbd, int event_bit)
 
 	atkbd->event_jiffies = jiffies;
 	set_bit(event_bit, &atkbd->event_mask);
-	wmb();
+	mb();
 	schedule_delayed_work(&atkbd->event_work, delay);
 }
 
@@ -849,12 +851,13 @@ static void atkbd_disconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 
+	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
+
 	atkbd_disable(atkbd);
 
 	/* make sure we don't have a command in flight */
 	cancel_delayed_work_sync(&atkbd->event_work);
 
-	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
 	input_unregister_device(atkbd->dev);
 	serio_close(serio);
 	serio_set_drvdata(serio, NULL);
@@ -1087,7 +1090,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 	atkbd->dev = dev;
 	ps2_init(&atkbd->ps2dev, serio);
 	INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work);
-	mutex_init(&atkbd->event_mutex);
+	mutex_init(&atkbd->mutex);
 
 	switch (serio->id.type) {
 
@@ -1160,19 +1163,23 @@ static int atkbd_reconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 	struct serio_driver *drv = serio->drv;
+	int retval = -1;
 
 	if (!atkbd || !drv) {
 		printk(KERN_DEBUG "atkbd: reconnect request, but serio is disconnected, ignoring...\n");
 		return -1;
 	}
 
+	mutex_lock(&atkbd->mutex);
+
 	atkbd_disable(atkbd);
 
 	if (atkbd->write) {
 		if (atkbd_probe(atkbd))
-			return -1;
+			goto out;
+
 		if (atkbd->set != atkbd_select_set(atkbd, atkbd->set, atkbd->extra))
-			return -1;
+			goto out;
 
 		atkbd_activate(atkbd);
 
@@ -1190,8 +1197,11 @@ static int atkbd_reconnect(struct serio *serio)
 	}
 
 	atkbd_enable(atkbd);
+	retval = 0;
 
-	return 0;
+ out:
+	mutex_unlock(&atkbd->mutex);
+	return retval;
 }
 
 static struct serio_device_id atkbd_serio_ids[] = {
@@ -1235,47 +1245,28 @@ static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf,
 				ssize_t (*handler)(struct atkbd *, char *))
 {
 	struct serio *serio = to_serio_port(dev);
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	retval = handler((struct atkbd *)serio_get_drvdata(serio), buf);
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return handler(atkbd, buf);
 }
 
 static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count,
 				ssize_t (*handler)(struct atkbd *, const char *, size_t))
 {
 	struct serio *serio = to_serio_port(dev);
-	struct atkbd *atkbd;
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 	int retval;
 
-	retval = serio_pin_driver(serio);
+	retval = mutex_lock_interruptible(&atkbd->mutex);
 	if (retval)
 		return retval;
 
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	atkbd = serio_get_drvdata(serio);
 	atkbd_disable(atkbd);
 	retval = handler(atkbd, buf, count);
 	atkbd_enable(atkbd);
 
-out:
-	serio_unpin_driver(serio);
+	mutex_unlock(&atkbd->mutex);
+
 	return retval;
 }
 
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 401ac6b6edd4..d59e18b24ede 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1450,24 +1450,10 @@ ssize_t psmouse_attr_show_helper(struct device *dev, struct device_attribute *de
 	struct serio *serio = to_serio_port(dev);
 	struct psmouse_attribute *attr = to_psmouse_attr(devattr);
 	struct psmouse *psmouse;
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
 
 	psmouse = serio_get_drvdata(serio);
 
-	retval = attr->show(psmouse, attr->data, buf);
-
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return attr->show(psmouse, attr->data, buf);
 }
 
 ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *devattr,
@@ -1478,18 +1464,9 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 	struct psmouse *psmouse, *parent = NULL;
 	int retval;
 
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out_unpin;
-	}
-
 	retval = mutex_lock_interruptible(&psmouse_mutex);
 	if (retval)
-		goto out_unpin;
+		goto out;
 
 	psmouse = serio_get_drvdata(serio);
 
@@ -1519,8 +1496,7 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 
  out_unlock:
 	mutex_unlock(&psmouse_mutex);
- out_unpin:
-	serio_unpin_driver(serio);
+ out:
 	return retval;
 }
 
@@ -1582,9 +1558,7 @@ static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, void *data, co
 		}
 
 		mutex_unlock(&psmouse_mutex);
-		serio_unpin_driver(serio);
 		serio_unregister_child_port(serio);
-		serio_pin_driver_uninterruptible(serio);
 		mutex_lock(&psmouse_mutex);
 
 		if (serio->drv != &psmouse_drv) {
diff --git a/include/linux/serio.h b/include/linux/serio.h
index e2f3044d4a4a..813d26c247ec 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -136,25 +136,6 @@ static inline void serio_continue_rx(struct serio *serio)
 	spin_unlock_irq(&serio->lock);
 }
 
-/*
- * Use the following functions to pin serio's driver in process context
- */
-static inline int serio_pin_driver(struct serio *serio)
-{
-	return mutex_lock_interruptible(&serio->drv_mutex);
-}
-
-static inline void serio_pin_driver_uninterruptible(struct serio *serio)
-{
-	mutex_lock(&serio->drv_mutex);
-}
-
-static inline void serio_unpin_driver(struct serio *serio)
-{
-	mutex_unlock(&serio->drv_mutex);
-}
-
-
 #endif
 
 /*
-- 
cgit v1.2.3


From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Jan 2010 16:11:06 -0500
Subject: x86, ACPI: delete acpi_boot_table_init() return value

cleanup only.

setup_arch(), doesn't care care if ACPI initialization succeeded
or failed, so delete acpi_boot_table_init()'s return value.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 22 ++++++----------------
 include/linux/acpi.h        |  6 +++---
 2 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fb1035cd9a6a..036d28adf59d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1529,16 +1529,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
  *	if acpi_blacklisted() acpi_disabled = 1;
  *	acpi_irq_model=...
  *	...
- *
- * return value: (currently ignored)
- *	0: success
- *	!0: failure
  */
 
-int __init acpi_boot_table_init(void)
+void __init acpi_boot_table_init(void)
 {
-	int error;
-
 	dmi_check_system(acpi_dmi_table);
 
 	/*
@@ -1546,15 +1540,14 @@ int __init acpi_boot_table_init(void)
 	 * One exception: acpi=ht continues far enough to enumerate LAPICs
 	 */
 	if (acpi_disabled && !acpi_ht)
-		return 1;
+		return; 
 
 	/*
 	 * Initialize the ACPI boot-time table parser.
 	 */
-	error = acpi_table_init();
-	if (error) {
+	if (acpi_table_init()) {
 		disable_acpi();
-		return error;
+		return;
 	}
 
 	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
@@ -1562,18 +1555,15 @@ int __init acpi_boot_table_init(void)
 	/*
 	 * blacklist may disable ACPI entirely
 	 */
-	error = acpi_blacklisted();
-	if (error) {
+	if (acpi_blacklisted()) {
 		if (acpi_force) {
 			printk(KERN_WARNING PREFIX "acpi=force override\n");
 		} else {
 			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
 			disable_acpi();
-			return error;
+			return;
 		}
 	}
-
-	return 0;
 }
 
 int __init early_acpi_boot_init(void)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 36924255c0d5..b926afe8c03e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 void __acpi_unmap_table(char *map, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
-int acpi_boot_table_init (void);
+void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
@@ -321,9 +321,9 @@ static inline int acpi_boot_init(void)
 	return 0;
 }
 
-static inline int acpi_boot_table_init(void)
+static inline void acpi_boot_table_init(void)
 {
-	return 0;
+	return;
 }
 
 static inline int acpi_mps_check(void)
-- 
cgit v1.2.3


From dd3d145d49c5816b79acc6761ebbd842bc50b0ee Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:48 -0500
Subject: block: Fix discard alignment calculation and printing

Discard alignment reporting for partitions was incorrect.  Update to
match the algorithm used elsewhere.

The alignment can be negative (misaligned).  Fix format string
accordingly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c          | 2 +-
 include/linux/blkdev.h | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index b11a4ad7d571..d13ba76a169c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -867,7 +867,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue));
+	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
 }
 
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9b98173a8184..a41bcc8e140f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1148,8 +1148,11 @@ static inline int queue_discard_alignment(struct request_queue *q)
 static inline int queue_sector_discard_alignment(struct request_queue *q,
 						 sector_t sector)
 {
-	return ((sector << 9) - q->limits.discard_alignment)
-		& (q->limits.discard_granularity - 1);
+	struct queue_limits *lim = &q->limits;
+	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+
+	return (lim->discard_granularity + lim->discard_alignment - alignment)
+		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
-- 
cgit v1.2.3


From 17be8c245054b9c7786545af3ba3ca4e54cd4ad9 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:49 -0500
Subject: block: bdev_stack_limits wrapper

DM does not want to know about partition offsets.  Add a partition-aware
wrapper that DM can use when stacking block devices.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 22 ++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 127f82551855..5eeb9e0d256e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -639,6 +639,28 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 }
 EXPORT_SYMBOL(blk_stack_limits);
 
+/**
+ * bdev_stack_limits - adjust queue limits for stacked drivers
+ * @t:	the stacking driver limits (top device)
+ * @bdev:  the component block_device (bottom)
+ * @start:  first data sector within component device
+ *
+ * Description:
+ *    Merges queue limits for a top device and a block_device.  Returns
+ *    0 if alignment didn't change.  Returns -1 if adding the bottom
+ *    device caused misalignment.
+ */
+int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+		      sector_t start)
+{
+	struct request_queue *bq = bdev_get_queue(bdev);
+
+	start += get_start_sect(bdev);
+
+	return blk_stack_limits(t, &bq->limits, start << 9);
+}
+EXPORT_SYMBOL(bdev_stack_limits);
+
 /**
  * disk_stack_limits - adjust queue limits for stacked drivers
  * @disk:  MD/DM gendisk (top)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a41bcc8e140f..5c8018977efa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -938,6 +938,8 @@ extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
+extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
-- 
cgit v1.2.3


From ce289321b7dc1eb108e3df0dec872b7429ef49f7 Mon Sep 17 00:00:00 2001
From: Kirill Afonshin <kirill_nnov@mail.ru>
Date: Fri, 8 Jan 2010 22:09:59 +0300
Subject: block: removed unused as_io_context

It isn't used anymore, since AS was deleted.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-ioc.c           |  5 -----
 include/linux/iocontext.h | 27 ---------------------------
 2 files changed, 32 deletions(-)

(limited to 'include')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index cbdabb0dd6d7..98e6bf61b0ac 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -39,8 +39,6 @@ int put_io_context(struct io_context *ioc)
 
 	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		rcu_read_lock();
-		if (ioc->aic && ioc->aic->dtor)
-			ioc->aic->dtor(ioc->aic);
 		cfq_dtor(ioc);
 		rcu_read_unlock();
 
@@ -76,8 +74,6 @@ void exit_io_context(struct task_struct *task)
 	task_unlock(task);
 
 	if (atomic_dec_and_test(&ioc->nr_tasks)) {
-		if (ioc->aic && ioc->aic->exit)
-			ioc->aic->exit(ioc->aic);
 		cfq_exit(ioc);
 
 	}
@@ -97,7 +93,6 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 		ret->ioprio = 0;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
-		ret->aic = NULL;
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
 		INIT_HLIST_HEAD(&ret->cic_list);
 		ret->ioc_data = NULL;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index a63235996309..78ef023227d4 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -4,32 +4,6 @@
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
 
-/*
- * This is the per-process anticipatory I/O scheduler state.
- */
-struct as_io_context {
-	spinlock_t lock;
-
-	void (*dtor)(struct as_io_context *aic); /* destructor */
-	void (*exit)(struct as_io_context *aic); /* called on task exit */
-
-	unsigned long state;
-	atomic_t nr_queued; /* queued reads & sync writes */
-	atomic_t nr_dispatched; /* number of requests gone to the drivers */
-
-	/* IO History tracking */
-	/* Thinktime */
-	unsigned long last_end_request;
-	unsigned long ttime_total;
-	unsigned long ttime_samples;
-	unsigned long ttime_mean;
-	/* Layout pattern */
-	unsigned int seek_samples;
-	sector_t last_request_pos;
-	u64 seek_total;
-	sector_t seek_mean;
-};
-
 struct cfq_queue;
 struct cfq_io_context {
 	void *key;
@@ -78,7 +52,6 @@ struct io_context {
 	unsigned long last_waited; /* Time last woken after wait for request */
 	int nr_batch_requests;     /* Number of requests left in the batch */
 
-	struct as_io_context *aic;
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
 	void *ioc_data;
-- 
cgit v1.2.3


From 7af92f8754b87bc78cbfd447d5f4096b25c46682 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 6 Jan 2010 15:45:55 -0800
Subject: genhd: overlapping variable definition

This fixes the sparse warning:
fs/ext4/super.c:2390:40: warning: symbol 'i' shadows an earlier one
fs/ext4/super.c:2368:22: originally declared here

Using 'i' in a macro is dubious practice.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/genhd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c6c0c41af35f..9717081c75ad 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -256,9 +256,9 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
 #define part_stat_read(part, field)					\
 ({									\
 	typeof((part)->dkstats->field) res = 0;				\
-	int i;								\
-	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr((part)->dkstats, i)->field;		\
+	unsigned int _cpu;						\
+	for_each_possible_cpu(_cpu)					\
+		res += per_cpu_ptr((part)->dkstats, _cpu)->field;	\
 	res;								\
 })
 
-- 
cgit v1.2.3


From a393db6f10ef2d4f28257234cfc730e744dfb6a4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 22 Dec 2009 13:35:52 +0100
Subject: drbd: Allow online resizing of DRBD devices while peer not reachable
 (needs to be explicitly forced)

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  5 ++---
 drivers/block/drbd/drbd_nl.c       | 17 +++++++++++------
 drivers/block/drbd/drbd_receiver.c |  4 ++--
 include/linux/drbd_nl.h            |  1 +
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 79d8e22c4d0d..2bf3a6ef3684 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1371,10 +1371,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
-extern sector_t drbd_new_dev_size(struct drbd_conf *,
-		struct drbd_backing_dev *);
+extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local);
+extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
 extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3313901a4861..1292e0620663 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -510,7 +510,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local)
+enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, int force) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size;
@@ -541,7 +541,7 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho
 	/* TODO: should only be some assert here, not (re)init... */
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
-	size = drbd_new_dev_size(mdev, mdev->ldev);
+	size = drbd_new_dev_size(mdev, mdev->ldev, force);
 
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
@@ -596,7 +596,7 @@ out:
 }
 
 sector_t
-drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
 {
 	sector_t p_size = mdev->p_size;   /* partner's disk size. */
 	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
@@ -606,6 +606,11 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
 	m_size = drbd_get_max_capacity(bdev);
 
+	if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
+		dev_warn(DEV, "Resize while not connected was forced by the user!\n");
+		p_size = m_size;
+	}
+
 	if (p_size && m_size) {
 		size = min_t(sector_t, p_size, m_size);
 	} else {
@@ -965,7 +970,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	/* Prevent shrinking of consistent devices ! */
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
-	   drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) {
+	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
 		retcode = ERR_DISK_TO_SMALL;
 		goto force_diskless_dec;
@@ -1052,7 +1057,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1504,7 +1509,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	}
 
 	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, rs.resize_force);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e3716fadc6a5..f22a5283128a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2870,7 +2870,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 
 		/* Never shrink a device with usable data during connect.
 		   But allow online shrinking if we are connected. */
-		if (drbd_new_dev_size(mdev, mdev->ldev) <
+		if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
 		   drbd_get_capacity(mdev->this_bdev) &&
 		   mdev->state.disk >= D_OUTDATED &&
 		   mdev->state.conn < C_CONNECTED) {
@@ -2885,7 +2885,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 #undef min_not_zero
 
 	if (get_ldev(mdev)) {
-		dd = drbd_determin_dev_size(mdev);
+	  dd = drbd_determin_dev_size(mdev, 0);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
 			return FALSE;
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index db5721ad50d1..a4d82f895994 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -69,6 +69,7 @@ NL_PACKET(disconnect, 6, )
 
 NL_PACKET(resize, 7,
 	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
+	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
 )
 
 NL_PACKET(syncer_conf, 8,
-- 
cgit v1.2.3


From 2ebccd71a71e6078920bc65b40f120e72b71c2b6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 12 Jan 2010 10:09:07 +0100
Subject: drbd: The kernel code is now equivalent to out of tree release 8.3.7

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index e84f4733cb55..78962272338a 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.6"
+#define REL_VERSION "8.3.7"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 91
-- 
cgit v1.2.3


From f6a8c60960bbea378142d1fa1b3d111555ee41c7 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sun, 29 Nov 2009 15:23:51 +0000
Subject: mtd: Really add ARM pismo support

(Commit 7cb777a3d71f9d1f7eb149c7a504d21f24219ae8 (mtd: add ARM pismo support)
intended to add this, but seems only to have patched the Makefile without
touching Kconfig or providing any code...)

The following patch adds support for PISMO modules found on ARM Ltd
development platforms.  These are MTD modules, and can have a
selection of SRAM, flash or DOC devices as described by an on-board
I2C EEPROM.

We support SRAM and NOR flash devices only by registering appropriate
conventional MTD platform devices as children of the 'pismo' device.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/Kconfig  |  17 +++
 drivers/mtd/maps/pismo.c  | 320 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/pismo.h |  17 +++
 3 files changed, 354 insertions(+)
 create mode 100644 drivers/mtd/maps/pismo.c
 create mode 100644 include/linux/mtd/pismo.h

(limited to 'include')

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 4c364d44ad59..2de0cc823d60 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -549,4 +549,21 @@ config MTD_VMU
 	  To build this as a module select M here, the module will be called
 	  vmu-flash.
 
+config MTD_PISMO
+	tristate "MTD discovery driver for PISMO modules"
+	depends on I2C
+	depends on ARCH_VERSATILE
+	help
+	  This driver allows for discovery of PISMO modules - see
+	  <http://www.pismoworld.org/>.  These are small modules containing
+	  up to five memory devices (eg, SRAM, flash, DOC) described by an
+	  I2C EEPROM.
+
+	  This driver does not create any MTD maps itself; instead it
+	  creates MTD physmap and MTD SRAM platform devices.  If you
+	  enable this option, you should consider enabling MTD_PHYSMAP
+	  and/or MTD_PLATRAM according to the devices on your module.
+
+	  When built as a module, it will be called pismo.ko
+
 endmenu
diff --git a/drivers/mtd/maps/pismo.c b/drivers/mtd/maps/pismo.c
new file mode 100644
index 000000000000..c48cad271f5d
--- /dev/null
+++ b/drivers/mtd/maps/pismo.c
@@ -0,0 +1,320 @@
+/*
+ * PISMO memory driver - http://www.pismoworld.org/
+ *
+ * For ARM Realview and Versatile platforms
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/mtd/physmap.h>
+#include <linux/mtd/plat-ram.h>
+#include <linux/mtd/pismo.h>
+
+#define PISMO_NUM_CS	5
+
+struct pismo_cs_block {
+	u8	type;
+	u8	width;
+	__le16	access;
+	__le32	size;
+	u32	reserved[2];
+	char	device[32];
+} __packed;
+
+struct pismo_eeprom {
+	struct pismo_cs_block cs[PISMO_NUM_CS];
+	char	board[15];
+	u8	sum;
+} __packed;
+
+struct pismo_mem {
+	phys_addr_t base;
+	u32	size;
+	u16	access;
+	u8	width;
+	u8	type;
+};
+
+struct pismo_data {
+	struct i2c_client	*client;
+	void			(*vpp)(void *, int);
+	void			*vpp_data;
+	struct platform_device	*dev[PISMO_NUM_CS];
+};
+
+/* FIXME: set_vpp could do with a better calling convention */
+static struct pismo_data *vpp_pismo;
+static DEFINE_MUTEX(pismo_mutex);
+
+static int pismo_setvpp_probe_fix(struct pismo_data *pismo)
+{
+	mutex_lock(&pismo_mutex);
+	if (vpp_pismo) {
+		mutex_unlock(&pismo_mutex);
+		kfree(pismo);
+		return -EBUSY;
+	}
+	vpp_pismo = pismo;
+	mutex_unlock(&pismo_mutex);
+	return 0;
+}
+
+static void pismo_setvpp_remove_fix(struct pismo_data *pismo)
+{
+	mutex_lock(&pismo_mutex);
+	if (vpp_pismo == pismo)
+		vpp_pismo = NULL;
+	mutex_unlock(&pismo_mutex);
+}
+
+static void pismo_set_vpp(struct map_info *map, int on)
+{
+	struct pismo_data *pismo = vpp_pismo;
+
+	pismo->vpp(pismo->vpp_data, on);
+}
+/* end of hack */
+
+
+static unsigned int __devinit pismo_width_to_bytes(unsigned int width)
+{
+	width &= 15;
+	if (width > 2)
+		return 0;
+	return 1 << width;
+}
+
+static int __devinit pismo_eeprom_read(struct i2c_client *client, void *buf,
+	u8 addr, size_t size)
+{
+	int ret;
+	struct i2c_msg msg[] = {
+		{
+			.addr = client->addr,
+			.len = sizeof(addr),
+			.buf = &addr,
+		}, {
+			.addr = client->addr,
+			.flags = I2C_M_RD,
+			.len = size,
+			.buf = buf,
+		},
+	};
+
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+
+	return ret == ARRAY_SIZE(msg) ? size : -EIO;
+}
+
+static int __devinit pismo_add_device(struct pismo_data *pismo, int i,
+	struct pismo_mem *region, const char *name, void *pdata, size_t psize)
+{
+	struct platform_device *dev;
+	struct resource res = { };
+	phys_addr_t base = region.base;
+	int ret;
+
+	if (base == ~0)
+		return -ENXIO;
+
+	res.start = base;
+	res.end = base + region->size - 1;
+	res.flags = IORESOURCE_MEM;
+
+	dev = platform_device_alloc(name, i);
+	if (!dev)
+		return -ENOMEM;
+	dev->dev.parent = &pismo->client->dev;
+
+	do {
+		ret = platform_device_add_resources(dev, &res, 1);
+		if (ret)
+			break;
+
+		ret = platform_device_add_data(dev, pdata, psize);
+		if (ret)
+			break;
+
+		ret = platform_device_add(dev);
+		if (ret)
+			break;
+
+		pismo->dev[i] = dev;
+		return 0;
+	} while (0);
+
+	platform_device_put(dev);
+	return ret;
+}
+
+static int __devinit pismo_add_nor(struct pismo_data *pismo, int i,
+	struct pismo_mem *region)
+{
+	struct physmap_flash_data data = {
+		.width = region->width,
+	};
+
+	if (pismo->vpp)
+		data.set_vpp = pismo_set_vpp;
+
+	return pismo_add_device(pismo, i, region, "physmap-flash",
+		&data, sizeof(data));
+}
+
+static int __devinit pismo_add_sram(struct pismo_data *pismo, int i,
+	struct pismo_mem *region)
+{
+	struct platdata_mtd_ram data = {
+		.bankwidth = region->width,
+	};
+
+	return pismo_add_device(pismo, i, region, "mtd-ram",
+		&data, sizeof(data));
+}
+
+static void __devinit pismo_add_one(struct pismo_data *pismo, int i,
+	const struct pismo_cs_block *cs, phys_addr_t base)
+{
+	struct device *dev = &pismo->client->dev;
+	struct pismo_mem region;
+
+	region.base = base;
+	region.type = cs->type;
+	region.width = pismo_width_to_bytes(cs->width);
+	region.access = le16_to_cpu(cs->access);
+	region.size = le32_to_cpu(cs->size);
+
+	if (region.width == 0) {
+		dev_err(dev, "cs%u: bad width: %02x, ignoring\n", i, cs->width);
+		return;
+	}
+
+	/*
+	 * FIXME: may need to the platforms memory controller here, but at
+	 * the moment we assume that it has already been correctly setup.
+	 * The memory controller can also tell us the base address as well.
+	 */
+
+	dev_info(dev, "cs%u: %.32s: type %02x access %u00ps size %uK\n",
+		i, cs->device, region.type, region.access, region.size / 1024);
+
+	switch (region.type) {
+	case 0:
+		break;
+	case 1:
+		/* static DOC */
+		break;
+	case 2:
+		/* static NOR */
+		pismo_add_nor(pismo, i, &region);
+		break;
+	case 3:
+		/* static RAM */
+		pismo_add_sram(pismo, i, &region);
+		break;
+	}
+}
+
+static int __devexit pismo_remove(struct i2c_client *client)
+{
+	struct pismo_data *pismo = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pismo->dev); i++)
+		platform_device_unregister(pismo->dev[i]);
+
+	/* FIXME: set_vpp needs saner arguments */
+	pismo_setvpp_remove_fix(pismo);
+
+	kfree(pismo);
+
+	return 0;
+}
+
+static int __devinit pismo_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct pismo_pdata *pdata = client->dev.platform_data;
+	struct pismo_eeprom eeprom;
+	struct pismo_data *pismo;
+	int ret, i;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "functionality mismatch\n");
+		return -EIO;
+	}
+
+	pismo = kzalloc(sizeof(*pismo), GFP_KERNEL);
+	if (!pismo)
+		return -ENOMEM;
+
+	/* FIXME: set_vpp needs saner arguments */
+	ret = pismo_setvpp_probe_fix(pismo);
+	if (ret)
+		return ret;
+
+	pismo->client = client;
+	if (pdata) {
+		pismo->vpp = pdata->set_vpp;
+		pismo->vpp_data = pdata->vpp_data;
+	}
+	i2c_set_clientdata(client, pismo);
+
+	ret = pismo_eeprom_read(client, &eeprom, 0, sizeof(eeprom));
+	if (ret < 0) {
+		dev_err(&client->dev, "error reading EEPROM: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(&client->dev, "%.15s board found\n", eeprom.board);
+
+	for (i = 0; i < ARRAY_SIZE(eeprom.cs); i++)
+		if (eeprom.cs[i].type != 0xff)
+			pismo_add_one(pismo, i, &eeprom.cs[i],
+				      pdata->cs_addrs[i]);
+
+	return 0;
+}
+
+static const struct i2c_device_id pismo_id[] = {
+	{ "pismo" },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, pismo_id);
+
+static struct i2c_driver pismo_driver = {
+	.driver	= {
+		.name	= "pismo",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pismo_probe,
+	.remove		= __devexit_p(pismo_remove),
+	.id_table	= pismo_id,
+};
+
+static int __init pismo_init(void)
+{
+	BUILD_BUG_ON(sizeof(struct pismo_cs_block) != 48);
+	BUILD_BUG_ON(sizeof(struct pismo_eeprom) != 256);
+
+	return i2c_add_driver(&pismo_driver);
+}
+module_init(pismo_init);
+
+static void __exit pismo_exit(void)
+{
+	i2c_del_driver(&pismo_driver);
+}
+module_exit(pismo_exit);
+
+MODULE_AUTHOR("Russell King <linux@arm.linux.org.uk>");
+MODULE_DESCRIPTION("PISMO memory driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mtd/pismo.h b/include/linux/mtd/pismo.h
new file mode 100644
index 000000000000..8dfb7e1421c5
--- /dev/null
+++ b/include/linux/mtd/pismo.h
@@ -0,0 +1,17 @@
+/*
+ * PISMO memory driver - http://www.pismoworld.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef __LINUX_MTD_PISMO_H
+#define __LINUX_MTD_PISMO_H
+
+struct pismo_pdata {
+	void			(*set_vpp)(void *, int);
+	void			*vpp_data;
+	phys_addr_t		cs_addrs[5];
+};
+
+#endif
-- 
cgit v1.2.3


From cd65c3c7d1081290b7365897c2290a84aa967d4d Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Wed, 13 Jan 2010 18:10:36 -0800
Subject: net: fix build erros with CONFIG_BUG=n, CONFIG_GENERIC_BUG=n

Fixed build errors introduced by commit 7ad6848c (ip: fix mc_loop
checks for tunnels with multicast outer addresses)

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index d9a0e74d8923..fb63371c07a8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -338,7 +338,7 @@ static inline int sk_mc_loop(struct sock *sk)
 		return inet6_sk(sk)->mc_loop;
 #endif
 	}
-	__WARN();
+	WARN_ON(1);
 	return 1;
 }
 
-- 
cgit v1.2.3


From 3f09ea4ecdcbcea05541f83e557d6ce2e56626d8 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 13 Jan 2010 22:28:40 +0100
Subject: drm/ttm: Add a swap_notify callback.

This is needed for a bugfix in the vmwgfx driver.
Drivers may have GPU bindings on buffers that core TTM is not aware of,
and TTM may view those buffers as ordinary system memory buffers.
Add a notifier to such drivers when TTM is about to move the buffer
contents out to swappable memory. The driver must then release any
private GPU bindings on those buffers.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    | 3 +++
 include/drm/ttm/ttm_bo_driver.h | 5 +++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2920f9a279e1..e10a04e7c4e9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1844,6 +1844,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	 * anyone tries to access a ttm page.
 	 */
 
+	if (bo->bdev->driver->swap_notify)
+		bo->bdev->driver->swap_notify(bo);
+
 	ret = ttm_tt_swapout(bo->ttm, bo->persistant_swap_storage);
 out:
 
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index ff7664e0c3cd..4c4e0f8375b3 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -353,6 +353,11 @@ struct ttm_bo_driver {
 	/* notify the driver we are taking a fault on this BO
 	 * and have reserved it */
 	void (*fault_reserve_notify)(struct ttm_buffer_object *bo);
+
+	/**
+	 * notify the driver that we're about to swap out this bo
+	 */
+	void (*swap_notify) (struct ttm_buffer_object *bo);
 };
 
 /**
-- 
cgit v1.2.3


From 6d125529c6cbfe570ce3bf9a0728548f087499da Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Dec 2009 06:58:56 -0500
Subject: Fix ACC_MODE() for real

commit 5300990c0370e804e49d9a59d928c5d53fb73487 had stepped on a rather
nasty mess: definitions of ACC_MODE used to be different.  Fixed the
resulting breakage, converting them to variant that takes O_... value;
all callers have that and it actually simplifies life (see tomoyo part
of changes).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               | 2 +-
 include/linux/fs.h       | 2 +-
 security/tomoyo/tomoyo.c | 7 +------
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 1b26b1620664..d930f1856ed2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1620,7 +1620,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 		open_flag |= O_DSYNC;
 
 	if (!acc_mode)
-		acc_mode = MAY_OPEN | ACC_MODE(flag);
+		acc_mode = MAY_OPEN | ACC_MODE(open_flag);
 
 	/* O_TRUNC implies we need access checks for write permissions */
 	if (flag & O_TRUNC)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9147ca88f253..b1bcb275b596 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2463,7 +2463,7 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
 
 #endif /* __KERNEL__ */
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 8a00ade85166..2aceebf5f354 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -80,9 +80,8 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 		return tomoyo_find_next_domain(bprm);
 	/*
 	 * Read permission is checked against interpreters using next domain.
-	 * '1' is the result of open_to_namei_flags(O_RDONLY).
 	 */
-	return tomoyo_check_open_permission(domain, &bprm->file->f_path, 1);
+	return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
 }
 
 static int tomoyo_path_truncate(struct path *path, loff_t length,
@@ -184,10 +183,6 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
 static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
 {
 	int flags = f->f_flags;
-
-	if ((flags + 1) & O_ACCMODE)
-		flags++;
-	flags |= f->f_flags & (O_APPEND | O_TRUNC);
 	/* Don't check read permission here if called from do_execve(). */
 	if (current->in_execve)
 		return 0;
-- 
cgit v1.2.3


From d5f1fb53353edc38da326445267c1df0c9676df2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:53:55 +0800
Subject: lib: Introduce strnstr()

It differs strstr() in that it limits the length to be searched
in the first string.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8743.6030805@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/string.h |  5 ++++-
 lib/string.c           | 27 ++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/string.h b/include/linux/string.h
index 651839a2a755..a716ee2a8adb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -72,7 +72,10 @@ static inline __must_check char *strstrip(char *str)
 }
 
 #ifndef __HAVE_ARCH_STRSTR
-extern char * strstr(const char *,const char *);
+extern char * strstr(const char *, const char *);
+#endif
+#ifndef __HAVE_ARCH_STRNSTR
+extern char * strnstr(const char *, const char *, size_t);
 #endif
 #ifndef __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
diff --git a/lib/string.c b/lib/string.c
index 9f75b4ec50b8..a1cdcfcc42d0 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -667,7 +667,7 @@ EXPORT_SYMBOL(memscan);
  */
 char *strstr(const char *s1, const char *s2)
 {
-	int l1, l2;
+	size_t l1, l2;
 
 	l2 = strlen(s2);
 	if (!l2)
@@ -684,6 +684,31 @@ char *strstr(const char *s1, const char *s2)
 EXPORT_SYMBOL(strstr);
 #endif
 
+#ifndef __HAVE_ARCH_STRNSTR
+/**
+ * strnstr - Find the first substring in a length-limited string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ * @len: the maximum number of characters to search
+ */
+char *strnstr(const char *s1, const char *s2, size_t len)
+{
+	size_t l1 = len, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strnstr);
+#endif
+
 #ifndef __HAVE_ARCH_MEMCHR
 /**
  * memchr - Find a character in an area of memory.
-- 
cgit v1.2.3


From c084ca704a3661bf77690a05bc6bd2c305d87c34 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Thu, 10 Dec 2009 19:56:45 +0800
Subject: ACPI: don't cond_resched if irq is disabled

commit 8bd108d adds preemption point after each opcode parse, then
a sleeping function called from invalid context bug was founded
during suspend/resume stage. this was fixed in commit abe1dfa by
don't cond_resched when irq_disabled. But recent commit 138d156 changes
the behaviour to don't cond_resched when in_atomic. This makes the
sleeping function called from invalid context bug happen again, which
is reported in http://lkml.org/lkml/2009/12/1/371.

This patch also fixes http://bugzilla.kernel.org/show_bug.cgi?id=14483

Reported-and-bisected-by: Larry Finger <Larry.Finger@lwfinger.net>
Reported-and-bisected-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Acked-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/platform/aclinux.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 9d7febde10a1..09469971472f 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -152,7 +152,7 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 #include <linux/hardirq.h>
 #define ACPI_PREEMPTION_POINT() \
 	do { \
-		if (!in_atomic_preempt_off()) \
+		if (!in_atomic_preempt_off() && !irqs_disabled()) \
 			cond_resched(); \
 	} while (0)
 
-- 
cgit v1.2.3


From d00c362f1b0ff54161e0a42b4554ac621a9ef92d Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sat, 16 Jan 2010 01:04:04 -0800
Subject: ax25: netrom: rose: Fix timer oopses

Wrong ax25_cb refcounting in ax25_send_frame() and by its callers can
cause timer oopses (first reported with 2.6.29.6 kernel).

Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14905

Reported-by: Bernard Pidoux <bpidoux@free.fr>
Tested-by: Bernard Pidoux <bpidoux@free.fr>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netrom.h  |  2 ++
 net/ax25/ax25_out.c   |  6 ++++++
 net/netrom/nr_route.c | 11 ++++++-----
 net/rose/rose_link.c  |  8 ++++++++
 net/rose/rose_route.c |  5 +++++
 5 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netrom.h b/include/net/netrom.h
index 15696b1fd30f..ab170a60e7d3 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -132,6 +132,8 @@ static __inline__ void nr_node_put(struct nr_node *nr_node)
 static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh)
 {
 	if (atomic_dec_and_test(&nr_neigh->refcount)) {
+		if (nr_neigh->ax25)
+			ax25_cb_put(nr_neigh->ax25);
 		kfree(nr_neigh->digipeat);
 		kfree(nr_neigh);
 	}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index bf706f83a5c9..14912600ec57 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -92,6 +92,12 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax2
 #endif
 	}
 
+	/*
+	 * There is one ref for the state machine; a caller needs
+	 * one more to put it back, just like with the existing one.
+	 */
+	ax25_cb_hold(ax25);
+
 	ax25_cb_add(ax25);
 
 	ax25->state = AX25_STATE_1;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index aacba76070fc..e2e2d33cafdf 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -843,12 +843,13 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 	dptr  = skb_push(skb, 1);
 	*dptr = AX25_P_NETROM;
 
-	ax25s = ax25_send_frame(skb, 256, (ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev);
-	if (nr_neigh->ax25 && ax25s) {
-		/* We were already holding this ax25_cb */
+	ax25s = nr_neigh->ax25;
+	nr_neigh->ax25 = ax25_send_frame(skb, 256,
+					 (ax25_address *)dev->dev_addr,
+					 &nr_neigh->callsign,
+					 nr_neigh->digipeat, nr_neigh->dev);
+	if (ax25s)
 		ax25_cb_put(ax25s);
-	}
-	nr_neigh->ax25 = ax25s;
 
 	dev_put(dev);
 	ret = (nr_neigh->ax25 != NULL);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index bd86a63960ce..5ef5f6988a2e 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -101,13 +101,17 @@ static void rose_t0timer_expiry(unsigned long param)
 static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
 {
 	ax25_address *rose_call;
+	ax25_cb *ax25s;
 
 	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
 		rose_call = (ax25_address *)neigh->dev->dev_addr;
 	else
 		rose_call = &rose_callsign;
 
+	ax25s = neigh->ax25;
 	neigh->ax25 = ax25_send_frame(skb, 260, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
+	if (ax25s)
+		ax25_cb_put(ax25s);
 
 	return (neigh->ax25 != NULL);
 }
@@ -120,13 +124,17 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
 static int rose_link_up(struct rose_neigh *neigh)
 {
 	ax25_address *rose_call;
+	ax25_cb *ax25s;
 
 	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
 		rose_call = (ax25_address *)neigh->dev->dev_addr;
 	else
 		rose_call = &rose_callsign;
 
+	ax25s = neigh->ax25;
 	neigh->ax25 = ax25_find_cb(rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
+	if (ax25s)
+		ax25_cb_put(ax25s);
 
 	return (neigh->ax25 != NULL);
 }
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 795c4b025e31..70a0b3b4b4d2 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -235,6 +235,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 
 	if ((s = rose_neigh_list) == rose_neigh) {
 		rose_neigh_list = rose_neigh->next;
+		if (rose_neigh->ax25)
+			ax25_cb_put(rose_neigh->ax25);
 		kfree(rose_neigh->digipeat);
 		kfree(rose_neigh);
 		return;
@@ -243,6 +245,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 	while (s != NULL && s->next != NULL) {
 		if (s->next == rose_neigh) {
 			s->next = rose_neigh->next;
+			if (rose_neigh->ax25)
+				ax25_cb_put(rose_neigh->ax25);
 			kfree(rose_neigh->digipeat);
 			kfree(rose_neigh);
 			return;
@@ -812,6 +816,7 @@ void rose_link_failed(ax25_cb *ax25, int reason)
 
 	if (rose_neigh != NULL) {
 		rose_neigh->ax25 = NULL;
+		ax25_cb_put(ax25);
 
 		rose_del_route_by_neigh(rose_neigh);
 		rose_kill_by_neigh(rose_neigh);
-- 
cgit v1.2.3


From 7e105057a34c83cea542dacc55ff0528bce67afa Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Fri, 15 Jan 2010 17:01:02 -0800
Subject: kfifo: fix kfifo_out_locked race bug

Fix a wrong optimization in include/linux/kfifo.h which could cause a race
in kfifo_out_locked.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Reported-by: Johan Hovold <jhovold@gmail.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7c6b32a1421c..c4ac88b3c302 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -228,13 +228,6 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 	ret = kfifo_out(fifo, to, n);
 
-	/*
-	 * optimization: if the FIFO is empty, set the indices to 0
-	 * so we don't wrap the next time
-	 */
-	if (kfifo_is_empty(fifo))
-		kfifo_reset(fifo);
-
 	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
-- 
cgit v1.2.3


From 2427b8e3eaea3719e53bbed7b3375382c3aa6f13 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 15 Jan 2010 17:01:11 -0800
Subject: tty.h: make tty_port_get() static inline

I get a few dozen of these warnings when using
  gcc (GCC) 4.4.1 20090725 (Red Hat 4.4.1-2):

In file included from mmotm-2010-0113-1217/init/do_mounts.c:5:
mmotm-2010-0113-1217/include/linux/tty.h: In function 'tty_port_get':
mmotm-2010-0113-1217/include/linux/tty.h:469: warning: '______f' is static but declared in inline function 'tty_port_get' which is not static

so make the function static inline.

[akpm@linux-foundation.org: may as well convert tty_port_users() also]
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index ef3a2947b102..6abfcf5b5887 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -464,7 +464,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_put(struct tty_port *port);
 
-extern inline struct tty_port *tty_port_get(struct tty_port *port)
+static inline struct tty_port *tty_port_get(struct tty_port *port)
 {
 	if (port)
 		kref_get(&port->kref);
@@ -486,7 +486,7 @@ extern void tty_port_close(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
 extern int tty_port_open(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
-extern inline int tty_port_users(struct tty_port *port)
+static inline int tty_port_users(struct tty_port *port)
 {
 	return port->count + port->blocked_open;
 }
-- 
cgit v1.2.3


From 8ecc2951534af10e04ddb5e5ff5c6d217b79f5c2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:12 -0800
Subject: kfifo: use void * pointers for user buffers

The pointers to user buffers are currently unsigned char *, which requires
a lot of casting in the caller for any non-char typed buffers.  Use void *
instead.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 +++++-----
 kernel/kfifo.c        |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c4ac88b3c302..6fb495ea956a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -104,15 +104,15 @@ union { \
 
 #undef __kfifo_initializer
 
-extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+extern void kfifo_init(struct kfifo *fifo, void *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int kfifo_in(struct kfifo *fifo,
-				const unsigned char *from, unsigned int len);
+				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
-				unsigned char *to, unsigned int len);
+				void *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -194,7 +194,7 @@ static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
  * bytes copied.
  */
 static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
-		const unsigned char *from, unsigned int n, spinlock_t *lock)
+		const void *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
@@ -219,7 +219,7 @@ static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
  * @to buffer and returns the number of copied bytes.
  */
 static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
-	unsigned char *to, unsigned int n, spinlock_t *lock)
+	void *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index e92d519f93b1..ab615e695052 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,7 +28,7 @@
 #include <linux/log2.h>
 #include <linux/uaccess.h>
 
-static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+static void _kfifo_init(struct kfifo *fifo, void *buffer,
 		unsigned int size)
 {
 	fifo->buffer = buffer;
@@ -44,7 +44,7 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @size: the size of the internal buffer, this have to be a power of 2.
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
+void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(__kfifo_in_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+unsigned int kfifo_in(struct kfifo *fifo, const void *from,
 				unsigned int len)
 {
 	len = min(kfifo_avail(fifo), len);
@@ -277,7 +277,7 @@ EXPORT_SYMBOL(__kfifo_out_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 {
 	len = min(kfifo_len(fifo), len);
 
-- 
cgit v1.2.3


From 64ce1037c5434b1d036cd99ecaee6e00496bc2e9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:15 -0800
Subject: kfifo: sanitize *_user error handling

Right now for kfifo_*_user it's not easily possible to distingush between
a user copy failing and the FIFO not containing enough data.  The problem
is that both conditions are multiplexed into the same return code.

Avoid this by moving the "copy length" into a separate output parameter
and only return 0/-EFAULT in the main return value.

I didn't fully adapt the weird "record" variants, those seem
to be unused anyways and were rather messy (should they be just removed?)

I would appreciate some double checking if I did all the conversions
correctly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  8 +++---
 kernel/kfifo.c        | 76 +++++++++++++++++++++++++++++++++------------------
 2 files changed, 53 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6fb495ea956a..86ad50a900c8 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -235,11 +235,11 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
 
-extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int n);
+extern __must_check int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned *lenout);
 
-extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int n);
+extern __must_check int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned *lenout);
 
 /*
  * __kfifo_add_out internal helper function for updating the out offset
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index ab615e695052..b50bb622e8b0 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -159,8 +159,9 @@ static inline void __kfifo_out_data(struct kfifo *fifo,
 	memcpy(to + l, fifo->buffer, len - l);
 }
 
-static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
-	 const void __user *from, unsigned int len, unsigned int off)
+static inline int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off,
+	 unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -177,16 +178,20 @@ static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	if (unlikely(ret)) {
+		*lenout = ret;
+		return -EFAULT;
+	}
+	*lenout = l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	return copy_from_user(fifo->buffer, from + l, len - l);
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	*lenout += ret ? ret : len - l;
+	return ret ? -EFAULT : 0;
 }
 
-static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
-		void __user *to, unsigned int len, unsigned int off)
+static inline int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -203,12 +208,21 @@ static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
 	ret = copy_to_user(to, fifo->buffer + off, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	*lenout = l;
+	if (unlikely(ret)) {
+		*lenout -= ret;
+		return -EFAULT;
+	}
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	return copy_to_user(to + l, fifo->buffer, len - l);
+	len -= l;
+	ret = copy_to_user(to + l, fifo->buffer, len);
+	if (unlikely(ret)) {
+		*lenout += len - ret;
+		return -EFAULT;
+	}
+	*lenout += len;
+	return 0;
 }
 
 unsigned int __kfifo_in_n(struct kfifo *fifo,
@@ -299,10 +313,13 @@ EXPORT_SYMBOL(__kfifo_out_generic);
 unsigned int __kfifo_from_user_n(struct kfifo *fifo,
 	const void __user *from, unsigned int len, unsigned int recsize)
 {
+	unsigned total;
+
 	if (kfifo_avail(fifo) < len + recsize)
 		return len + 1;
 
-	return __kfifo_from_user_data(fifo, from, len, recsize);
+	__kfifo_from_user_data(fifo, from, len, recsize, &total);
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_from_user_n);
 
@@ -313,18 +330,21 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
  * @len: the length of the data to be added.
  *
  * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
+ * FIFO depending and returns -EFAULT/0.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+int kfifo_from_user(struct kfifo *fifo,
+        const void __user *from, unsigned int len, unsigned *total)
 {
+	int ret;
 	len = min(kfifo_avail(fifo), len);
-	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	ret = __kfifo_from_user_data(fifo, from, len, 0, total);
+	if (ret)
+		return ret;
 	__kfifo_add_in(fifo, len);
-	return len;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_from_user);
 
@@ -339,17 +359,17 @@ unsigned int __kfifo_to_user_n(struct kfifo *fifo,
 	void __user *to, unsigned int len, unsigned int reclen,
 	unsigned int recsize)
 {
-	unsigned int ret;
+	unsigned int ret, total;
 
 	if (kfifo_len(fifo) < reclen + recsize)
 		return len;
 
-	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
 
 	if (likely(ret == 0))
 		__kfifo_add_out(fifo, reclen + recsize);
 
-	return ret;
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_to_user_n);
 
@@ -358,20 +378,22 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
+ @ @lenout: pointer to output variable with copied data
  *
  * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
+ * @to buffer and 0 or -EFAULT.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned *lenout)
 {
+	int ret;
 	len = min(kfifo_len(fifo), len);
-	len -= __kfifo_to_user_data(fifo, to, len, 0);
-	__kfifo_add_out(fifo, len);
-	return len;
+	ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
+	__kfifo_add_out(fifo, *lenout);
+	return ret;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
-- 
cgit v1.2.3


From a5b9e2c1063046421ce01dcf5ddd7ec12567f3e1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:16 -0800
Subject: kfifo: add kfifo_out_peek

In some upcoming code it's useful to peek into a FIFO without permanentely
removing data.  This patch implements a new kfifo_out_peek() to do this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  3 +++
 kernel/kfifo.c        | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 86ad50a900c8..7ad6d32dd673 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -113,6 +113,9 @@ extern unsigned int kfifo_in(struct kfifo *fifo,
 				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 				void *to, unsigned int len);
+extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
+				void *to, unsigned int len, unsigned offset);
+
 
 /**
  * kfifo_reset - removes the entire FIFO contents
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index b50bb622e8b0..7384f120be87 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -302,6 +302,27 @@ unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_out);
 
+/**
+ * kfifo_out_peek - copy some data from the FIFO, but do not remove it
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ * @offset: offset into the fifo
+ *
+ * This function copies at most @len bytes at @offset from the FIFO
+ * into the @to buffer and returns the number of copied bytes.
+ * The data is not removed from the FIFO.
+ */
+unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
+			    unsigned offset)
+{
+	len = min(kfifo_len(fifo), len + offset);
+
+	__kfifo_out_data(fifo, to, len, offset);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out_peek);
+
 unsigned int __kfifo_out_generic(struct kfifo *fifo,
 	void *to, unsigned int len, unsigned int recsize,
 	unsigned int *total)
-- 
cgit v1.2.3


From d994ffc247f7c4a48b848f10c4c01c9b06411ada Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: add kfifo_initialized

Simple inline that checks if kfifo_init() has been executed on a fifo.

This is useful for walking all per CPU fifos, when some of them might not
have been brought up yet.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7ad6d32dd673..c8618243ca5a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -116,6 +116,16 @@ extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
 				void *to, unsigned int len, unsigned offset);
 
+/**
+ * kfifo_initialized - Check if kfifo is initialized.
+ * @fifo: fifo to check
+ * Return %true if FIFO is initialized, otherwise %false.
+ * Assumes the fifo was 0 before.
+ */
+static inline bool kfifo_initialized(struct kfifo *fifo)
+{
+	return fifo->buffer != 0;
+}
 
 /**
  * kfifo_reset - removes the entire FIFO contents
-- 
cgit v1.2.3


From 5dab600e6a153ceb64832f608069e6c08185411a Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: document everywhere that size has to be power of two

On my first try using them I missed that the fifos need to be power of
two, resulting in a runtime bug.  Document that requirement everywhere
(and fix one grammar bug)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 4 ++--
 kernel/kfifo.c        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c8618243ca5a..6f6c5f300af6 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -67,7 +67,7 @@ struct kfifo {
 /**
  * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used inside struct or union declaration
  * Note2: the macro creates two objects:
@@ -91,7 +91,7 @@ union { \
 /**
  * DEFINE_KFIFO - macro to define and initialize a kfifo
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used for global and local kfifo data type variables
  * Note2: the macro creates two objects:
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 7384f120be87..32c5c15d750d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -41,7 +41,7 @@ static void _kfifo_init(struct kfifo *fifo, void *buffer,
  * kfifo_init - initialize a FIFO using a preallocated buffer
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
- * @size: the size of the internal buffer, this have to be a power of 2.
+ * @size: the size of the internal buffer, this has to be a power of 2.
  *
  */
 void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
-- 
cgit v1.2.3


From cc8ef6eb21e964b1c5eb97b2d0e8ac9893e1bf86 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Fri, 15 Jan 2010 17:01:22 -0800
Subject: kernel.h: add BUILD_BUG_ON_NOT_POWER_OF_2()

Add BUILD_BUG_ON_NOT_POWER_OF_2()

When code relies on a constant being a power of 2:

	#define FOO	512	/* must be a power of 2 */

it would be nice to be able to do:

	BUILD_BUG_ON(!is_power_of_2(FOO));

However applying an inline function does not result in a compile-time
constant that can be used with BUILD_BUG_ON(), so trying that gives
results in:

	error: bit-field '<anonymous>' width not an integer constant

As suggested by akpm, rather than monkeying around with is_power_of_2()
and risking gcc warts about constant expressions, just create a macro
BUILD_BUG_ON_NOT_POWER_OF_2() to encapsulate this common requirement.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: David Dillow <dave@thedillows.org>
Cc: "Robert P. J. Day" <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fc9f5aab5f8..328bca609b9b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -734,6 +734,10 @@ struct sysinfo {
 /* Force a compilation error if condition is constant and true */
 #define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
 
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
+	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
-- 
cgit v1.2.3


From 1e2ae599d37e60958c03ca5e46b1f657619a30cd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:33 -0800
Subject: nommu: struct vm_region's vm_usage count need not be atomic

The vm_usage count field in struct vm_region does not need to be atomic as
it's only even modified whilst nommu_region_sem is write locked.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 +-
 mm/nommu.c               | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84d020bed083..80cfa78a8cf6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -122,7 +122,7 @@ struct vm_region {
 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
 	struct file	*vm_file;	/* the backing file or NULL */
 
-	atomic_t	vm_usage;	/* region usage count */
+	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
 						* this region */
 };
diff --git a/mm/nommu.c b/mm/nommu.c
index 17773862619b..5e39294f8ea8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -552,11 +552,11 @@ static void free_page_series(unsigned long from, unsigned long to)
 static void __put_nommu_region(struct vm_region *region)
 	__releases(nommu_region_sem)
 {
-	kenter("%p{%d}", region, atomic_read(&region->vm_usage));
+	kenter("%p{%d}", region, region->vm_usage);
 
 	BUG_ON(!nommu_region_tree.rb_node);
 
-	if (atomic_dec_and_test(&region->vm_usage)) {
+	if (--region->vm_usage == 0) {
 		if (region->vm_top > region->vm_start)
 			delete_nommu_region(region);
 		up_write(&nommu_region_sem);
@@ -1205,7 +1205,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	if (!vma)
 		goto error_getting_vma;
 
-	atomic_set(&region->vm_usage, 1);
+	region->vm_usage = 1;
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
@@ -1272,7 +1272,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 			}
 
 			/* we've found a region we can share */
-			atomic_inc(&pregion->vm_usage);
+			pregion->vm_usage++;
 			vma->vm_region = pregion;
 			start = pregion->vm_start;
 			start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
@@ -1289,7 +1289,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 					vma->vm_region = NULL;
 					vma->vm_start = 0;
 					vma->vm_end = 0;
-					atomic_dec(&pregion->vm_usage);
+					pregion->vm_usage--;
 					pregion = NULL;
 					goto error_just_free;
 				}
@@ -1444,7 +1444,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* we're only permitted to split anonymous regions that have a single
 	 * owner */
 	if (vma->vm_file ||
-	    atomic_read(&vma->vm_region->vm_usage) != 1)
+	    vma->vm_region->vm_usage != 1)
 		return -ENOMEM;
 
 	if (mm->map_count >= sysctl_max_map_count)
@@ -1518,7 +1518,7 @@ static int shrink_vma(struct mm_struct *mm,
 
 	/* cut the backing region down to size */
 	region = vma->vm_region;
-	BUG_ON(atomic_read(&region->vm_usage) != 1);
+	BUG_ON(region->vm_usage != 1);
 
 	down_write(&nommu_region_sem);
 	delete_nommu_region(region);
-- 
cgit v1.2.3


From efc1a3b16930c41d64ffefde16b87d82f603a8a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:35 -0800
Subject: nommu: don't need get_unmapped_area() for NOMMU

get_unmapped_area() is unnecessary for NOMMU as no-one calls it.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 include/linux/sched.h    |  7 +++++--
 mm/nommu.c               | 21 ---------------------
 mm/util.c                |  2 +-
 4 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 80cfa78a8cf6..36f96271306c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -205,10 +205,12 @@ struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
 	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+#ifdef CONFIG_MMU
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+#endif
 	unsigned long mmap_base;		/* base of mmap area */
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d4991be9d53..6f7bba93929b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -377,6 +377,8 @@ extern int sysctl_max_map_count;
 
 #include <linux/aio.h>
 
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
@@ -386,6 +388,9 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long flags);
 extern void arch_unmap_area(struct mm_struct *, unsigned long);
 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
 
 #if USE_SPLIT_PTLOCKS
 /*
@@ -2491,8 +2496,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
-
 #ifdef CONFIG_TRACING
 extern void
 __trace_special(void *__tr, void *__data,
diff --git a/mm/nommu.c b/mm/nommu.c
index d6dd656264a2..32be0cf51ba6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1760,27 +1760,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/*
- * ask for an unmapped area at which to create a mapping on a file
- */
-unsigned long get_unmapped_area(struct file *file, unsigned long addr,
-				unsigned long len, unsigned long pgoff,
-				unsigned long flags)
-{
-	unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
-				  unsigned long, unsigned long);
-
-	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op && file->f_op->get_unmapped_area)
-		get_area = file->f_op->get_unmapped_area;
-
-	if (!get_area)
-		return -ENOSYS;
-
-	return get_area(file, addr, len, pgoff, flags);
-}
-EXPORT_SYMBOL(get_unmapped_area);
-
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..834db7be240f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -220,7 +220,7 @@ char *strndup_user(const char __user *s, long n)
 }
 EXPORT_SYMBOL(strndup_user);
 
-#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	mm->mmap_base = TASK_UNMAPPED_BASE;
-- 
cgit v1.2.3


From 7e6608724c640924aad1d556d17df33ebaa6124d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:39 -0800
Subject: nommu: fix shared mmap after truncate shrinkage problems

Fix a problem in NOMMU mmap with ramfs whereby a shared mmap can happen
over the end of a truncation.  The problem is that
ramfs_nommu_check_mappings() checks that the reduced file size against the
VMA tree, but not the vm_region tree.

The following sequence of events can cause the problem:

	fd = open("/tmp/x", O_RDWR|O_TRUNC|O_CREAT, 0600);
	ftruncate(fd, 32 * 1024);
	a = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	b = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	munmap(a, 32 * 1024);
	ftruncate(fd, 16 * 1024);
	c = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'a' creates a vm_region covering 32KB of the file.  Mapping 'b'
sees that the vm_region from 'a' is covering the region it wants and so
shares it, pinning it in memory.

Mapping 'a' then goes away and the file is truncated to the end of VMA
'b'.  However, the region allocated by 'a' is still in effect, and has
_not_ been reduced.

Mapping 'c' is then created, and because there's a vm_region covering the
desired region, get_unmapped_area() is _not_ called to repeat the check,
and the mapping is granted, even though the pages from the latter half of
the mapping have been discarded.

However:

	d = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'd' should work, and should end up sharing the region allocated by
'a'.

To deal with this, we shrink the vm_region struct during the truncation,
lest do_mmap_pgoff() take it as licence to share the full region
automatically without calling the get_unmapped_area() file op again.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/file-nommu.c | 31 +-------------------------
 include/linux/mm.h    |  1 +
 mm/nommu.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 266531343aae..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -121,35 +121,6 @@ add_error:
 	return ret;
 }
 
-/*****************************************************************************/
-/*
- * check that file shrinkage doesn't leave any VMAs dangling in midair
- */
-static int ramfs_nommu_check_mappings(struct inode *inode,
-				      size_t newsize, size_t size)
-{
-	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
-
-	down_write(&nommu_region_sem);
-
-	/* search for VMAs that fall within the dead zone */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      newsize >> PAGE_SHIFT,
-			      (size + PAGE_SIZE - 1) >> PAGE_SHIFT
-			      ) {
-		/* found one - only interested if it's shared out of the page
-		 * cache */
-		if (vma->vm_flags & VM_SHARED) {
-			up_write(&nommu_region_sem);
-			return -ETXTBSY; /* not quite true, but near enough */
-		}
-	}
-
-	up_write(&nommu_region_sem);
-	return 0;
-}
-
 /*****************************************************************************/
 /*
  *
@@ -169,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
 
 	/* check that a decrease in size doesn't cut off any shared mappings */
 	if (newsize < size) {
-		ret = ramfs_nommu_check_mappings(inode, newsize, size);
+		ret = nommu_shrink_inode_mappings(inode, size, newsize);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2265f28eb47a..60c467bfbabd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1089,6 +1089,7 @@ extern void zone_pcp_update(struct zone *zone);
 
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
+extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
 
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
diff --git a/mm/nommu.c b/mm/nommu.c
index 32be0cf51ba6..48a2ecfaf059 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1914,3 +1914,65 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 	mmput(mm);
 	return len;
 }
+
+/**
+ * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
+ * @inode: The inode to check
+ * @size: The current filesize of the inode
+ * @newsize: The proposed filesize of the inode
+ *
+ * Check the shared mappings on an inode on behalf of a shrinking truncate to
+ * make sure that that any outstanding VMAs aren't broken and then shrink the
+ * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't
+ * automatically grant mappings that are too large.
+ */
+int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
+				size_t newsize)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	struct vm_region *region;
+	pgoff_t low, high;
+	size_t r_size, r_top;
+
+	low = newsize >> PAGE_SHIFT;
+	high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	down_write(&nommu_region_sem);
+
+	/* search for VMAs that fall within the dead zone */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      low, high) {
+		/* found one - only interested if it's shared out of the page
+		 * cache */
+		if (vma->vm_flags & VM_SHARED) {
+			up_write(&nommu_region_sem);
+			return -ETXTBSY; /* not quite true, but near enough */
+		}
+	}
+
+	/* reduce any regions that overlap the dead zone - if in existence,
+	 * these will be pointed to by VMAs that don't overlap the dead zone
+	 *
+	 * we don't check for any regions that start beyond the EOF as there
+	 * shouldn't be any
+	 */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      0, ULONG_MAX) {
+		if (!(vma->vm_flags & VM_SHARED))
+			continue;
+
+		region = vma->vm_region;
+		r_size = region->vm_top - region->vm_start;
+		r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size;
+
+		if (r_top > newsize) {
+			region->vm_top -= r_top - newsize;
+			if (region->vm_end > region->vm_top)
+				region->vm_end = region->vm_top;
+		}
+	}
+
+	up_write(&nommu_region_sem);
+	return 0;
+}
-- 
cgit v1.2.3


From eda05a28ec52be40086400a1b606d211276f0e41 Mon Sep 17 00:00:00 2001
From: Harish Zunjarrao <harish.zunjarrao@qlogic.com>
Date: Tue, 12 Jan 2010 12:59:50 -0800
Subject: [SCSI] fc-transport: Use packed modifier for fc_bsg_request
 structure.

The 32bit kernel does not add padding bytes in the fc_bsg_request structure
whereas the 64bit kernel adds padding bytes in the fc_bsg_request structure.
Due to this, structure elements gets mismatched with 32bit application and
64bit kernel.To resolve this, used packed modifier to avoid adding padding bytes.
Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/scsi_bsg_fc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_bsg_fc.h b/include/scsi/scsi_bsg_fc.h
index a4b233318179..91a4e4ff9a9b 100644
--- a/include/scsi/scsi_bsg_fc.h
+++ b/include/scsi/scsi_bsg_fc.h
@@ -292,7 +292,7 @@ struct fc_bsg_request {
 		struct fc_bsg_rport_els		r_els;
 		struct fc_bsg_rport_ct		r_ct;
 	} rqst_data;
-};
+} __attribute__((packed));
 
 
 /* response (request sense data) structure of the sg_io_v4 */
-- 
cgit v1.2.3


From 9dffe2a32b0deef52605d50527c0d240b15cabf7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 4 Jan 2010 18:05:00 +0000
Subject: mfd: Correct WM835x ISINK ramp time defines

The constants used to specify ISINK ramp times for WM835x had the
wrong shifts so that the on times applied to the off ramp and vice
versa. The masks for the bitfields are correct.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8350/pmic.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index be3264e286e0..e786fe9841ef 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -666,20 +666,20 @@
 #define WM8350_ISINK_FLASH_DUR_64MS		(1 << 8)
 #define WM8350_ISINK_FLASH_DUR_96MS		(2 << 8)
 #define WM8350_ISINK_FLASH_DUR_1024MS		(3 << 8)
-#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 4)
-#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 4)
-#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 4)
-#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 0)
+#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 4)
 
 /*
  * Regulator Interrupts.
-- 
cgit v1.2.3


From 64e8867ba8098b69889c1af94997a5ba2348fb26 Mon Sep 17 00:00:00 2001
From: Ian Molton <ian@mnementh.co.uk>
Date: Wed, 6 Jan 2010 13:51:48 +0100
Subject: mfd: tmio_mmc hardware abstraction for CNF area

This patch abstracts out the CNF area code from tmio_mmc which
is not present in all hardware that can use this driver. This
is required so that we can support non-toshiba based hardware.

ASIC3 support by Philipp Zabel

Signed-off-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile        |   6 +--
 drivers/mfd/asic3.c         |  40 ++++++++++++---
 drivers/mfd/t7l66xb.c       |  55 +++++++++++++-------
 drivers/mfd/tc6387xb.c      | 119 ++++++++++++++++++++++++++++++++------------
 drivers/mfd/tc6393xb.c      |  56 +++++++++++++++++----
 drivers/mfd/tmio_core.c     |  52 +++++++++++++++++++
 drivers/mmc/host/tmio_mmc.c |  59 +++++++---------------
 drivers/mmc/host/tmio_mmc.h |  46 +++--------------
 include/linux/mfd/tmio.h    |  39 +++++++++++++++
 9 files changed, 323 insertions(+), 149 deletions(-)
 create mode 100644 drivers/mfd/tmio_core.c

(limited to 'include')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index ca2f2c4ff05e..8f0d18409ede 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -11,9 +11,9 @@ obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 
-obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o
-obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o
-obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o
+obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o tmio_core.o
 
 obj-$(CONFIG_MFD_WM8400)	+= wm8400-core.o
 wm831x-objs			:= wm831x-core.o wm831x-irq.o wm831x-otp.o
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index e22128c3e9a8..95c1e6bd1729 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -80,6 +80,7 @@ struct asic3 {
 	u16 irq_bothedge[4];
 	struct gpio_chip gpio;
 	struct device *dev;
+	void __iomem *tmio_cnf;
 
 	struct asic3_clk clocks[ARRAY_SIZE(asic3_clk_init)];
 };
@@ -685,8 +686,24 @@ static struct mfd_cell asic3_cell_ds1wm = {
 	.resources     = ds1wm_resources,
 };
 
+static void asic3_mmc_pwr(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_pwr(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
+static void asic3_mmc_clk_div(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_clk_div(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
 static struct tmio_mmc_data asic3_mmc_data = {
-	.hclk = 24576000,
+	.hclk           = 24576000,
+	.set_pwr        = asic3_mmc_pwr,
+	.set_clk_div    = asic3_mmc_clk_div,
 };
 
 static struct resource asic3_mmc_resources[] = {
@@ -695,11 +712,6 @@ static struct resource asic3_mmc_resources[] = {
 		.end   = ASIC3_SD_CTRL_BASE + 0x3ff,
 		.flags = IORESOURCE_MEM,
 	},
-	{
-		.start = ASIC3_SD_CONFIG_BASE,
-		.end   = ASIC3_SD_CONFIG_BASE + 0x1ff,
-		.flags = IORESOURCE_MEM,
-	},
 	{
 		.start = 0,
 		.end   = 0,
@@ -743,6 +755,10 @@ static int asic3_mmc_enable(struct platform_device *pdev)
 	asic3_set_register(asic, ASIC3_OFFSET(SDHWCTRL, SDCONF),
 			   ASIC3_SDHWCTRL_SDPWR, 1);
 
+	/* ASIC3_SD_CTRL_BASE assumes 32-bit addressing, TMIO is 16-bit */
+	tmio_core_mmc_enable(asic->tmio_cnf, 1 - asic->bus_shift,
+			     ASIC3_SD_CTRL_BASE >> 1);
+
 	return 0;
 }
 
@@ -797,10 +813,15 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 	asic3_cell_ds1wm.data_size = sizeof(asic3_cell_ds1wm);
 
 	/* MMC */
+	asic->tmio_cnf = ioremap((ASIC3_SD_CONFIG_BASE >> asic->bus_shift) +
+				 mem_sdio->start, 0x400 >> asic->bus_shift);
+	if (!asic->tmio_cnf) {
+		ret = -ENOMEM;
+		dev_dbg(asic->dev, "Couldn't ioremap SD_CONFIG\n");
+		goto out;
+	}
 	asic3_mmc_resources[0].start >>= asic->bus_shift;
 	asic3_mmc_resources[0].end   >>= asic->bus_shift;
-	asic3_mmc_resources[1].start >>= asic->bus_shift;
-	asic3_mmc_resources[1].end   >>= asic->bus_shift;
 
 	asic3_cell_mmc.platform_data = &asic3_cell_mmc;
 	asic3_cell_mmc.data_size = sizeof(asic3_cell_mmc);
@@ -820,7 +841,10 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 
 static void asic3_mfd_remove(struct platform_device *pdev)
 {
+	struct asic3 *asic = platform_get_drvdata(pdev);
+
 	mfd_remove_devices(&pdev->dev);
+	iounmap(asic->tmio_cnf);
 }
 
 /* Core */
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 0a255c1f1ce7..bcf4687d4af5 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -38,6 +38,19 @@ enum {
 	T7L66XB_CELL_MMC,
 };
 
+static const struct resource t7l66xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end	= 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_T7L66XB_MMC,
+		.end	= IRQ_T7L66XB_MMC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 #define SCR_REVID	0x08		/* b Revision ID	*/
 #define SCR_IMR		0x42		/* b Interrupt Mask	*/
 #define SCR_DEV_CTL	0xe0		/* b Device control	*/
@@ -83,6 +96,9 @@ static int t7l66xb_mmc_enable(struct platform_device *mmc)
 
 	spin_unlock_irqrestore(&t7l66xb->lock, flags);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 
@@ -106,28 +122,28 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 	return 0;
 }
 
+static void t7l66xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(t7l66xb->scr + 0x200, 0, state);
+}
+
+static void t7l66xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(t7l66xb->scr + 0x200, 0, state);
+}
+
 /*--------------------------------------------------------------------------*/
 
 static struct tmio_mmc_data t7166xb_mmc_data = {
 	.hclk = 24000000,
-};
-
-static const struct resource t7l66xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end	= 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end	= 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = IRQ_T7L66XB_MMC,
-		.end	= IRQ_T7L66XB_MMC,
-		.flags = IORESOURCE_IRQ,
-	},
+	.set_pwr = t7l66xb_mmc_pwr,
+	.set_clk_div = t7l66xb_mmc_clk_div,
 };
 
 static const struct resource t7l66xb_nand_resources[] = {
@@ -282,6 +298,9 @@ static int t7l66xb_resume(struct platform_device *dev)
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 3280ab33f88a..5c7f04343d5c 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -22,28 +22,52 @@ enum {
 	TC6387XB_CELL_MMC,
 };
 
+struct tc6387xb {
+	void __iomem *scr;
+	struct clk *clk32k;
+	struct resource rscr;
+};
+
+static struct resource tc6387xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end   = 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = 0,
+		.end   = 0,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/*--------------------------------------------------------------------------*/
+
 #ifdef CONFIG_PM
 static int tc6387xb_suspend(struct platform_device *dev, pm_message_t state)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
 static int tc6387xb_resume(struct platform_device *dev)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_resume(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
@@ -53,12 +77,32 @@ static int tc6387xb_resume(struct platform_device *dev)
 
 /*--------------------------------------------------------------------------*/
 
+static void tc6387xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6387xb->scr + 0x200, 0, state);
+}
+
+static void tc6387xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6387xb->scr + 0x200, 0, state);
+}
+
+
 static int tc6387xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
+
+	tmio_core_mmc_enable(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
 
 	return 0;
 }
@@ -66,36 +110,20 @@ static int tc6387xb_mmc_enable(struct platform_device *mmc)
 static int tc6387xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
-/*--------------------------------------------------------------------------*/
-
 static struct tmio_mmc_data tc6387xb_mmc_data = {
 	.hclk = 24000000,
+	.set_pwr = tc6387xb_mmc_pwr,
+	.set_clk_div = tc6387xb_mmc_clk_div,
 };
 
-static struct resource tc6387xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end   = 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end   = 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0,
-		.end   = 0,
-		.flags = IORESOURCE_IRQ,
-	},
-};
+/*--------------------------------------------------------------------------*/
 
 static struct mfd_cell tc6387xb_cells[] = {
 	[TC6387XB_CELL_MMC] = {
@@ -111,8 +139,9 @@ static struct mfd_cell tc6387xb_cells[] = {
 static int tc6387xb_probe(struct platform_device *dev)
 {
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
-	struct resource *iomem;
+	struct resource *iomem, *rscr;
 	struct clk *clk32k;
+	struct tc6387xb *tc6387xb;
 	int irq, ret;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
@@ -120,18 +149,40 @@ static int tc6387xb_probe(struct platform_device *dev)
 		return -EINVAL;
 	}
 
+	tc6387xb = kzalloc(sizeof *tc6387xb, GFP_KERNEL);
+	if (!tc6387xb)
+		return -ENOMEM;
+
 	ret  = platform_get_irq(dev, 0);
 	if (ret >= 0)
 		irq = ret;
 	else
-		goto err_resource;
+		goto err_no_irq;
 
 	clk32k = clk_get(&dev->dev, "CLK_CK32K");
 	if (IS_ERR(clk32k)) {
 		ret = PTR_ERR(clk32k);
+		goto err_no_clk;
+	}
+
+	rscr = &tc6387xb->rscr;
+	rscr->name = "tc6387xb-core";
+	rscr->start = iomem->start;
+	rscr->end = iomem->start + 0xff;
+	rscr->flags = IORESOURCE_MEM;
+
+	ret = request_resource(iomem, rscr);
+	if (ret)
 		goto err_resource;
+
+	tc6387xb->scr = ioremap(rscr->start, rscr->end - rscr->start + 1);
+	if (!tc6387xb->scr) {
+		ret = -ENOMEM;
+		goto err_ioremap;
 	}
-	platform_set_drvdata(dev, clk32k);
+
+	tc6387xb->clk32k = clk32k;
+	platform_set_drvdata(dev, tc6387xb);
 
 	if (pdata && pdata->enable)
 		pdata->enable(dev);
@@ -149,8 +200,13 @@ static int tc6387xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
-	clk_put(clk32k);
+err_ioremap:
+	release_resource(&tc6387xb->rscr);
 err_resource:
+	clk_put(clk32k);
+err_no_clk:
+err_no_irq:
+	kfree(tc6387xb);
 	return ret;
 }
 
@@ -195,3 +251,4 @@ MODULE_DESCRIPTION("Toshiba TC6387XB core driver");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton");
 MODULE_ALIAS("platform:tc6387xb");
+
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 1429a7341a9a..4bc5a08a2b09 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -136,10 +136,6 @@ static int tc6393xb_nand_enable(struct platform_device *nand)
 	return 0;
 }
 
-static struct tmio_mmc_data tc6393xb_mmc_data = {
-	.hclk = 24000000,
-};
-
 static struct resource __devinitdata tc6393xb_nand_resources[] = {
 	{
 		.start	= 0x1000,
@@ -164,11 +160,6 @@ static struct resource __devinitdata tc6393xb_mmc_resources[] = {
 		.end	= 0x9ff,
 		.flags	= IORESOURCE_MEM,
 	},
-	{
-		.start	= 0x200,
-		.end	= 0x2ff,
-		.flags	= IORESOURCE_MEM,
-	},
 	{
 		.start	= IRQ_TC6393_MMC,
 		.end	= IRQ_TC6393_MMC,
@@ -346,6 +337,50 @@ int tc6393xb_lcd_mode(struct platform_device *fb,
 }
 EXPORT_SYMBOL(tc6393xb_lcd_mode);
 
+static int tc6393xb_mmc_enable(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_enable(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static int tc6393xb_mmc_resume(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_resume(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static void tc6393xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6393xb->scr + 0x200, 0, state);
+}
+
+static void tc6393xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6393xb->scr + 0x200, 0, state);
+}
+
+static struct tmio_mmc_data tc6393xb_mmc_data = {
+	.hclk = 24000000,
+	.set_pwr = tc6393xb_mmc_pwr,
+	.set_clk_div = tc6393xb_mmc_clk_div,
+};
+
 static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	[TC6393XB_CELL_NAND] = {
 		.name = "tmio-nand",
@@ -355,6 +390,8 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	},
 	[TC6393XB_CELL_MMC] = {
 		.name = "tmio-mmc",
+		.enable = tc6393xb_mmc_enable,
+		.resume = tc6393xb_mmc_resume,
 		.driver_data = &tc6393xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
@@ -836,3 +873,4 @@ MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov and Dirk Opfer");
 MODULE_DESCRIPTION("tc6393xb Toshiba Mobile IO Controller");
 MODULE_ALIAS("platform:tc6393xb");
+
diff --git a/drivers/mfd/tmio_core.c b/drivers/mfd/tmio_core.c
new file mode 100644
index 000000000000..eddc19ae464b
--- /dev/null
+++ b/drivers/mfd/tmio_core.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright(c) 2009 Ian Molton <spyro@f2s.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mfd/tmio.h>
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base)
+{
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	/* Disable SD power during suspend */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_3, 0x01);
+
+	/* The below is required but why? FIXME */
+	sd_config_write8(cnf, shift, CNF_STOP_CLK_CTL, 0x1f);
+
+	/* Power down SD bus */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, 0x00);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_enable);
+
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base)
+{
+
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_resume);
+
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, state ? 0x02 : 0x00);
+}
+EXPORT_SYMBOL(tmio_core_mmc_pwr);
+
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_SD_CLK_MODE, state ? 1 : 0);
+}
+EXPORT_SYMBOL(tmio_core_mmc_clk_div);
+
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 7cccc8523747..e22c3fa3516a 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -46,7 +46,9 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 		clk |= 0x100;
 	}
 
-	sd_config_write8(host, CNF_SD_CLK_MODE, clk >> 22);
+	if (host->set_clk_div)
+		host->set_clk_div(host->pdev, (clk>>22) & 1);
+
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
 }
 
@@ -427,12 +429,13 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	/* Power sequence - OFF -> ON -> UP */
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF: /* power down SD bus */
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 0);
 		tmio_mmc_clk_stop(host);
 		break;
 	case MMC_POWER_ON: /* power up SD bus */
-
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x02);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 1);
 		break;
 	case MMC_POWER_UP: /* start bus clock */
 		tmio_mmc_clk_start(host);
@@ -485,21 +488,15 @@ static int tmio_mmc_resume(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct mmc_host *mmc = platform_get_drvdata(dev);
-	struct tmio_mmc_host *host = mmc_priv(mmc);
 	int ret = 0;
 
 	/* Tell the MFD core we are ready to be enabled */
-	if (cell->enable) {
-		ret = cell->enable(dev);
+	if (cell->resume) {
+		ret = cell->resume(dev);
 		if (ret)
 			goto out;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
 	mmc_resume_host(mmc);
 
 out:
@@ -514,17 +511,16 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct tmio_mmc_data *pdata;
-	struct resource *res_ctl, *res_cnf;
+	struct resource *res_ctl;
 	struct tmio_mmc_host *host;
 	struct mmc_host *mmc;
 	int ret = -EINVAL;
 
-	if (dev->num_resources != 3)
+	if (dev->num_resources != 2)
 		goto out;
 
 	res_ctl = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	res_cnf = platform_get_resource(dev, IORESOURCE_MEM, 1);
-	if (!res_ctl || !res_cnf)
+	if (!res_ctl)
 		goto out;
 
 	pdata = cell->driver_data;
@@ -539,8 +535,12 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;
+	host->pdev = dev;
 	platform_set_drvdata(dev, mmc);
 
+	host->set_pwr = pdata->set_pwr;
+	host->set_clk_div = pdata->set_clk_div;
+
 	/* SD control register space size is 0x200, 0x400 for bus_shift=1 */
 	host->bus_shift = resource_size(res_ctl) >> 10;
 
@@ -548,10 +548,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (!host->ctl)
 		goto host_free;
 
-	host->cnf = ioremap(res_cnf->start, resource_size(res_cnf));
-	if (!host->cnf)
-		goto unmap_ctl;
-
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
 	mmc->f_max = pdata->hclk;
@@ -562,23 +558,9 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (cell->enable) {
 		ret = cell->enable(dev);
 		if (ret)
-			goto unmap_cnf;
+			goto unmap_ctl;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
-	/* Disable SD power during suspend */
-	sd_config_write8(host, CNF_PWR_CTL_3, 0x01);
-
-	/* The below is required but why? FIXME */
-	sd_config_write8(host, CNF_STOP_CLK_CTL, 0x1f);
-
-	/* Power down SD bus*/
-	sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
-
 	tmio_mmc_clk_stop(host);
 	reset(host);
 
@@ -586,14 +568,14 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (ret >= 0)
 		host->irq = ret;
 	else
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	disable_mmc_irqs(host, TMIO_MASK_ALL);
 
 	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
 		IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host);
 	if (ret)
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	mmc_add_host(mmc);
 
@@ -605,8 +587,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	return 0;
 
-unmap_cnf:
-	iounmap(host->cnf);
 unmap_ctl:
 	iounmap(host->ctl);
 host_free:
@@ -626,7 +606,6 @@ static int __devexit tmio_mmc_remove(struct platform_device *dev)
 		mmc_remove_host(mmc);
 		free_irq(host->irq, host);
 		iounmap(host->ctl);
-		iounmap(host->cnf);
 		mmc_free_host(mmc);
 	}
 
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 9fa998594974..692dc23363b9 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -11,26 +11,6 @@
 
 #include <linux/highmem.h>
 
-#define CNF_CMD     0x04
-#define CNF_CTL_BASE   0x10
-#define CNF_INT_PIN  0x3d
-#define CNF_STOP_CLK_CTL 0x40
-#define CNF_GCLK_CTL 0x41
-#define CNF_SD_CLK_MODE 0x42
-#define CNF_PIN_STATUS 0x44
-#define CNF_PWR_CTL_1 0x48
-#define CNF_PWR_CTL_2 0x49
-#define CNF_PWR_CTL_3 0x4a
-#define CNF_CARD_DETECT_MODE 0x4c
-#define CNF_SD_SLOT 0x50
-#define CNF_EXT_GCLK_CTL_1 0xf0
-#define CNF_EXT_GCLK_CTL_2 0xf1
-#define CNF_EXT_GCLK_CTL_3 0xf9
-#define CNF_SD_LED_EN_1 0xfa
-#define CNF_SD_LED_EN_2 0xfe
-
-#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
-
 #define CTL_SD_CMD 0x00
 #define CTL_ARG_REG 0x04
 #define CTL_STOP_INTERNAL_ACTION 0x08
@@ -110,7 +90,6 @@
 
 
 struct tmio_mmc_host {
-	void __iomem *cnf;
 	void __iomem *ctl;
 	unsigned long bus_shift;
 	struct mmc_command      *cmd;
@@ -119,10 +98,16 @@ struct tmio_mmc_host {
 	struct mmc_host         *mmc;
 	int                     irq;
 
+	/* Callbacks for clock / power control */
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
+
 	/* pio related stuff */
 	struct scatterlist      *sg_ptr;
 	unsigned int            sg_len;
 	unsigned int            sg_off;
+
+	struct platform_device *pdev;
 };
 
 #include <linux/io.h>
@@ -163,25 +148,6 @@ static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr,
 	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
 }
 
-static inline void sd_config_write8(struct tmio_mmc_host *host, int addr,
-		u8 val)
-{
-	writeb(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write16(struct tmio_mmc_host *host, int addr,
-		u16 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write32(struct tmio_mmc_host *host, int addr,
-		u32 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-	writew(val >> 16, host->cnf + ((addr + 2) << host->bus_shift));
-}
-
 #include <linux/scatterlist.h>
 #include <linux/blkdev.h>
 
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 6b9c5d06690c..9cb1834deffa 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -2,6 +2,8 @@
 #define MFD_TMIO_H
 
 #include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
 
 #define tmio_ioread8(addr) readb(addr)
 #define tmio_ioread16(addr) readw(addr)
@@ -18,11 +20,48 @@
 	writew((val) >> 16, (addr) + 2); \
 	} while (0)
 
+#define CNF_CMD     0x04
+#define CNF_CTL_BASE   0x10
+#define CNF_INT_PIN  0x3d
+#define CNF_STOP_CLK_CTL 0x40
+#define CNF_GCLK_CTL 0x41
+#define CNF_SD_CLK_MODE 0x42
+#define CNF_PIN_STATUS 0x44
+#define CNF_PWR_CTL_1 0x48
+#define CNF_PWR_CTL_2 0x49
+#define CNF_PWR_CTL_3 0x4a
+#define CNF_CARD_DETECT_MODE 0x4c
+#define CNF_SD_SLOT 0x50
+#define CNF_EXT_GCLK_CTL_1 0xf0
+#define CNF_EXT_GCLK_CTL_2 0xf1
+#define CNF_EXT_GCLK_CTL_3 0xf9
+#define CNF_SD_LED_EN_1 0xfa
+#define CNF_SD_LED_EN_2 0xfe
+
+#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
+
+#define sd_config_write8(base, shift, reg, val) \
+	tmio_iowrite8((val), (base) + ((reg) << (shift)))
+#define sd_config_write16(base, shift, reg, val) \
+	tmio_iowrite16((val), (base) + ((reg) << (shift)))
+#define sd_config_write32(base, shift, reg, val) \
+	do { \
+		tmio_iowrite16((val), (base) + ((reg) << (shift)));   \
+		tmio_iowrite16((val) >> 16, (base) + ((reg + 2) << (shift))); \
+	} while (0)
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
+
 /*
  * data for the MMC controller
  */
 struct tmio_mmc_data {
 	const unsigned int		hclk;
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
 };
 
 /*
-- 
cgit v1.2.3


From ec51b7f538c440bfa5a4d538133c659071c02155 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 19 Jan 2010 00:27:58 -0800
Subject: Input: ad7879 - support auxiliary GPIOs via gpiolib

Drop the simple fancy sysfs hooks for the aux GPIOs and expose these via
the gpiolib interface so that other drivers can use them.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ad7879.c | 197 ++++++++++++++++++++++++++-----------
 include/linux/spi/ad7879.h         |  12 ++-
 2 files changed, 149 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index c21e6d3a8844..794d070c6900 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -47,6 +47,7 @@
 #include <linux/workqueue.h>
 #include <linux/spi/spi.h>
 #include <linux/i2c.h>
+#include <linux/gpio.h>
 
 #include <linux/spi/ad7879.h>
 
@@ -132,7 +133,9 @@ struct ad7879 {
 	struct input_dev	*input;
 	struct work_struct	work;
 	struct timer_list	timer;
-
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip	gc;
+#endif
 	struct mutex		mutex;
 	unsigned		disabled:1;	/* P: mutex */
 
@@ -150,11 +153,9 @@ struct ad7879 {
 	u8			median;
 	u16			x_plate_ohms;
 	u16			pressure_max;
-	u16			gpio_init;
 	u16			cmd_crtl1;
 	u16			cmd_crtl2;
 	u16			cmd_crtl3;
-	unsigned		gpio:1;
 };
 
 static int ad7879_read(bus_device *, u8);
@@ -237,24 +238,6 @@ static irqreturn_t ad7879_irq(int irq, void *handle)
 
 static void ad7879_setup(struct ad7879 *ts)
 {
-	ts->cmd_crtl3 = AD7879_YPLUS_BIT |
-			AD7879_XPLUS_BIT |
-			AD7879_Z2_BIT |
-			AD7879_Z1_BIT |
-			AD7879_TEMPMASK_BIT |
-			AD7879_AUXVBATMASK_BIT |
-			AD7879_GPIOALERTMASK_BIT;
-
-	ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR |
-			AD7879_AVG(ts->averaging) |
-			AD7879_MFS(ts->median) |
-			AD7879_FCD(ts->first_conversion_delay) |
-			ts->gpio_init;
-
-	ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 |
-			AD7879_ACQ(ts->acquisition_time) |
-			AD7879_TMR(ts->pen_down_acc_interval);
-
 	ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
 	ad7879_write(ts->bus, AD7879_REG_CTRL3, ts->cmd_crtl3);
 	ad7879_write(ts->bus, AD7879_REG_CTRL1, ts->cmd_crtl1);
@@ -324,48 +307,132 @@ static ssize_t ad7879_disable_store(struct device *dev,
 
 static DEVICE_ATTR(disable, 0664, ad7879_disable_show, ad7879_disable_store);
 
-static ssize_t ad7879_gpio_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
+static struct attribute *ad7879_attributes[] = {
+	&dev_attr_disable.attr,
+	NULL
+};
+
+static const struct attribute_group ad7879_attr_group = {
+	.attrs = ad7879_attributes,
+};
+
+#ifdef CONFIG_GPIOLIB
+static int ad7879_gpio_direction_input(struct gpio_chip *chip,
+					unsigned gpio)
 {
-	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	int err;
 
-	return sprintf(buf, "%u\n", ts->gpio);
+	mutex_lock(&ts->mutex);
+	ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIODIR | AD7879_GPIOPOL;
+	err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+
+	return err;
 }
 
-static ssize_t ad7879_gpio_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
+static int ad7879_gpio_direction_output(struct gpio_chip *chip,
+					unsigned gpio, int level)
 {
-	struct ad7879 *ts = dev_get_drvdata(dev);
-	unsigned long val;
-	int error;
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	int err;
 
-	error = strict_strtoul(buf, 10, &val);
-	if (error)
-		return error;
+	mutex_lock(&ts->mutex);
+	ts->cmd_crtl2 &= ~AD7879_GPIODIR;
+	ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIOPOL;
+	if (level)
+		ts->cmd_crtl2 |= AD7879_GPIO_DATA;
+	else
+		ts->cmd_crtl2 &= ~AD7879_GPIO_DATA;
+
+	err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+
+	return err;
+}
+
+static int ad7879_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	u16 val;
 
 	mutex_lock(&ts->mutex);
-	ts->gpio = !!val;
-	error = ad7879_write(ts->bus, AD7879_REG_CTRL2,
-			   ts->gpio ?
-				ts->cmd_crtl2 & ~AD7879_GPIO_DATA :
-				ts->cmd_crtl2 | AD7879_GPIO_DATA);
+	val = ad7879_read(ts->bus, AD7879_REG_CTRL2);
 	mutex_unlock(&ts->mutex);
 
-	return error ? : count;
+	return !!(val & AD7879_GPIO_DATA);
 }
 
-static DEVICE_ATTR(gpio, 0664, ad7879_gpio_show, ad7879_gpio_store);
+static void ad7879_gpio_set_value(struct gpio_chip *chip,
+				  unsigned gpio, int value)
+{
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
 
-static struct attribute *ad7879_attributes[] = {
-	&dev_attr_disable.attr,
-	&dev_attr_gpio.attr,
-	NULL
-};
+	mutex_lock(&ts->mutex);
+	if (value)
+		ts->cmd_crtl2 |= AD7879_GPIO_DATA;
+	else
+		ts->cmd_crtl2 &= ~AD7879_GPIO_DATA;
 
-static const struct attribute_group ad7879_attr_group = {
-	.attrs = ad7879_attributes,
-};
+	ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+}
+
+static int __devinit ad7879_gpio_add(struct device *dev)
+{
+	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879_platform_data *pdata = dev->platform_data;
+	int ret = 0;
+
+	if (pdata->gpio_export) {
+		ts->gc.direction_input = ad7879_gpio_direction_input;
+		ts->gc.direction_output = ad7879_gpio_direction_output;
+		ts->gc.get = ad7879_gpio_get_value;
+		ts->gc.set = ad7879_gpio_set_value;
+		ts->gc.can_sleep = 1;
+		ts->gc.base = pdata->gpio_base;
+		ts->gc.ngpio = 1;
+		ts->gc.label = "AD7879-GPIO";
+		ts->gc.owner = THIS_MODULE;
+		ts->gc.dev = dev;
+
+		ret = gpiochip_add(&ts->gc);
+		if (ret)
+			dev_err(dev, "failed to register gpio %d\n",
+				ts->gc.base);
+	}
+
+	return ret;
+}
+
+/*
+ * We mark ad7879_gpio_remove inline so there is a chance the code
+ * gets discarded when not needed. We can't do __devinit/__devexit
+ * markup since it is used in both probe and remove methods.
+ */
+static inline void ad7879_gpio_remove(struct device *dev)
+{
+	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879_platform_data *pdata = dev->platform_data;
+	int ret;
+
+	if (pdata->gpio_export) {
+		ret = gpiochip_remove(&ts->gc);
+		if (ret)
+			dev_err(dev, "failed to remove gpio %d\n",
+				ts->gc.base);
+	}
+}
+#else
+static inline int ad7879_gpio_add(struct device *dev)
+{
+	return 0;
+}
+
+static inline void ad7879_gpio_remove(struct device *dev)
+{
+}
+#endif
 
 static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 {
@@ -403,12 +470,6 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 	ts->pen_down_acc_interval = pdata->pen_down_acc_interval;
 	ts->median = pdata->median;
 
-	if (pdata->gpio_output)
-		ts->gpio_init = AD7879_GPIO_EN |
-				(pdata->gpio_default ? 0 : AD7879_GPIO_DATA);
-	else
-		ts->gpio_init = AD7879_GPIO_EN | AD7879_GPIODIR;
-
 	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&bus->dev));
 
 	input_dev->name = "AD7879 Touchscreen";
@@ -446,6 +507,23 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 		goto err_free_mem;
 	}
 
+	ts->cmd_crtl3 = AD7879_YPLUS_BIT |
+			AD7879_XPLUS_BIT |
+			AD7879_Z2_BIT |
+			AD7879_Z1_BIT |
+			AD7879_TEMPMASK_BIT |
+			AD7879_AUXVBATMASK_BIT |
+			AD7879_GPIOALERTMASK_BIT;
+
+	ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR |
+			AD7879_AVG(ts->averaging) |
+			AD7879_MFS(ts->median) |
+			AD7879_FCD(ts->first_conversion_delay);
+
+	ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 |
+			AD7879_ACQ(ts->acquisition_time) |
+			AD7879_TMR(ts->pen_down_acc_interval);
+
 	ad7879_setup(ts);
 
 	err = request_irq(bus->irq, ad7879_irq,
@@ -460,15 +538,21 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 	if (err)
 		goto err_free_irq;
 
-	err = input_register_device(input_dev);
+	err = ad7879_gpio_add(&bus->dev);
 	if (err)
 		goto err_remove_attr;
 
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_remove_gpio;
+
 	dev_info(&bus->dev, "Rev.%d touchscreen, irq %d\n",
 		 revid >> 8, bus->irq);
 
 	return 0;
 
+err_remove_gpio:
+	ad7879_gpio_remove(&bus->dev);
 err_remove_attr:
 	sysfs_remove_group(&bus->dev.kobj, &ad7879_attr_group);
 err_free_irq:
@@ -481,6 +565,7 @@ err_free_mem:
 
 static int __devexit ad7879_destroy(bus_device *bus, struct ad7879 *ts)
 {
+	ad7879_gpio_remove(&bus->dev);
 	ad7879_disable(ts);
 	sysfs_remove_group(&ts->bus->dev.kobj, &ad7879_attr_group);
 	free_irq(ts->bus->irq, ts);
diff --git a/include/linux/spi/ad7879.h b/include/linux/spi/ad7879.h
index 4231104c9afa..6334cee1a3be 100644
--- a/include/linux/spi/ad7879.h
+++ b/include/linux/spi/ad7879.h
@@ -28,8 +28,12 @@ struct ad7879_platform_data {
 	 * 1 = 4, 2 = 8, 3 = 16 (median > averaging)
 	 */
 	u8	median;
-	/* 1 = AUX/VBAT/GPIO set to GPIO Output */
-	u8	gpio_output;
-	/* Initial GPIO pin state (valid if gpio_output = 1) */
-	u8	gpio_default;
+	/* 1 = AUX/VBAT/GPIO export GPIO to gpiolib
+	 * requires CONFIG_GPIOLIB
+	 */
+	bool	gpio_export;
+	/* identifies the first GPIO number handled by this chip;
+	 * or, if negative, requests dynamic ID allocation.
+	 */
+	s32	gpio_base;
 };
-- 
cgit v1.2.3


From 4f9c85a1b03bfa5c0a0d8488a3a7766f3c9fb756 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Mon, 18 Jan 2010 05:37:16 +0000
Subject: phylib: Move workqueue initialization to a proper place

commit 541cd3ee00a4fe975b22fac6a3bc846bacef37f7 ("phylib: Fix deadlock
on resume") caused TI DaVinci EMAC ethernet driver to oops upon resume:

 PM: resume of devices complete after 237.098 msecs
 Restarting tasks ... done.
 kernel BUG at kernel/workqueue.c:354!
 Unable to handle kernel NULL pointer dereference at virtual address 00000000
 [...]
 Backtrace:
 [<c002c598>] (__bug+0x0/0x2c) from [<c0052a54>] (queue_delayed_work_on+0x74/0xf8)
 [<c00529e0>] (queue_delayed_work_on+0x0/0xf8) from [<c0052b30>] (queue_delayed_work+0x2c/0x30)

The oops pops up because TI DaVinci EMAC driver detaches PHY on
suspend and attaches it back on resume. Attaching makes phylib call
phy_start_machine() that initializes a workqueue. On the other hand,
PHY's resume routine will call phy_start_machine() again, and that
will cause the oops since we just destroyed the already scheduled
workqueue.

This patch fixes the issue by moving workqueue initialization to
phy_device_create().

p.s. We don't see this oops with ucc_geth and gianfar drivers because
they perform a fine-grained suspend, i.e. they just stop the PHYs
without detaching.

Reported-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 4 +---
 drivers/net/phy/phy_device.c | 1 +
 include/linux/phy.h          | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index b0e9f9c51721..0295097d6c44 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -410,7 +410,6 @@ EXPORT_SYMBOL(phy_start_aneg);
 
 
 static void phy_change(struct work_struct *work);
-static void phy_state_machine(struct work_struct *work);
 
 /**
  * phy_start_machine - start PHY state machine tracking
@@ -430,7 +429,6 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
 
-	INIT_DELAYED_WORK(&phydev->state_queue, phy_state_machine);
 	schedule_delayed_work(&phydev->state_queue, HZ);
 }
 
@@ -761,7 +759,7 @@ EXPORT_SYMBOL(phy_start);
  * phy_state_machine - Handle the state machine
  * @work: work_struct that describes the work to be done
  */
-static void phy_state_machine(struct work_struct *work)
+void phy_state_machine(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct phy_device *phydev =
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8212b2b93422..adbc0fded130 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -177,6 +177,7 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 	dev->state = PHY_DOWN;
 
 	mutex_init(&dev->lock);
+	INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
 
 	return dev;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7968defd2fa7..6a7eb402165d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -485,6 +485,7 @@ void phy_driver_unregister(struct phy_driver *drv);
 int phy_driver_register(struct phy_driver *new_driver);
 void phy_prepare_link(struct phy_device *phydev,
 		void (*adjust_link)(struct net_device *));
+void phy_state_machine(struct work_struct *work);
 void phy_start_machine(struct phy_device *phydev,
 		void (*handler)(struct net_device *));
 void phy_stop_machine(struct phy_device *phydev);
-- 
cgit v1.2.3


From 04a723ea9c53ba608b0411aa36948bb57c51a08e Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 6 Jan 2010 10:16:51 -0800
Subject: USB: Fix duplicate sysfs problem after device reset.

Borislav Petkov reports issues with duplicate sysfs endpoint files after a
resume from a hibernate.  It turns out that the code to support alternate
settings under xHCI has issues when a device with a non-default alternate
setting is reset during the hibernate:

[  427.681810] Restarting tasks ...
[  427.681995] hub 1-0:1.0: state 7 ports 6 chg 0004 evt 0000
[  427.682019] usb usb3: usb resume
[  427.682030] ohci_hcd 0000:00:12.0: wakeup root hub
[  427.682191] hub 1-0:1.0: port 2, status 0501, change 0000, 480 Mb/s
[  427.682205] usb 1-2: usb wakeup-resume
[  427.682226] usb 1-2: finish reset-resume
[  427.682886] done.
[  427.734658] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.734663] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.746682] hub 3-0:1.0: hub_reset_resume
[  427.746693] hub 3-0:1.0: trying to enable port power on non-switchable hub
[  427.786715] usb 1-2: reset high speed USB device using ehci_hcd and address 2
[  427.839653] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.839666] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.847717] ohci_hcd 0000:00:12.0: GetStatus roothub.portstatus [1] = 0x00010100 CSC PPS
[  427.915497] hub 1-2:1.0: remove_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 1
[  427.915774] hub 1-2:1.0: remove_intf_ep_devs: bNumEndpoints: 1
[  427.915934] hub 1-2:1.0: if: ffff88022f9e8800: endpoint devs removed.
[  427.916158] hub 1-2:1.0: create_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 0, ->unregistering: 0
[  427.916434] hub 1-2:1.0: create_intf_ep_devs: bNumEndpoints: 1
[  427.916609]  ep_81: create, parent hub
[  427.916632] ------------[ cut here ]------------
[  427.916644] WARNING: at fs/sysfs/dir.c:477 sysfs_add_one+0x82/0x96()
[  427.916649] Hardware name: System Product Name
[  427.916653] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:12.2/usb1/1-2/1-2:1.0/ep_81'
[  427.916658] Modules linked in: binfmt_misc kvm_amd kvm powernow_k8 cpufreq_ondemand cpufreq_powersave cpufreq_userspace freq_table cpufreq_conservative ipv6 vfat fat
+8250_pnp 8250 pcspkr ohci_hcd serial_core k10temp edac_core
[  427.916694] Pid: 278, comm: khubd Not tainted 2.6.33-rc2-00187-g08d869a-dirty #13
[  427.916699] Call Trace:

The problem is caused by a mismatch between the USB core's view of the
device state and the USB device and xHCI host's view of the device state.

After the device reset and re-configuration, the device and the xHCI host
think they are using alternate setting 0 of all interfaces.  However, the
USB core keeps track of the old state, which may include non-zero
alternate settings.  It uses intf->cur_altsetting to keep the endpoint
sysfs files for the old state across the reset.

The bandwidth allocation functions need to know what the xHCI host thinks
the current alternate settings are, so original patch set
intf->cur_altsetting to the alternate setting 0.  This caused duplicate
endpoint files to be created.

The solution is to not set intf->cur_altsetting before calling
usb_set_interface() in usb_reset_and_verify_device().  Instead, we add a
new flag to struct usb_interface to tell usb_hcd_alloc_bandwidth() to use
alternate setting 0 as the currently installed alternate setting.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Tested-by: Borislav Petkov <petkovbb@googlemail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c | 18 ++++++++++++++++++
 drivers/usb/core/hub.c | 15 +++++----------
 include/linux/usb.h    |  1 +
 3 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0495fa651225..80995ef0868c 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1684,6 +1684,24 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev,
 		}
 	}
 	if (cur_alt && new_alt) {
+		struct usb_interface *iface = usb_ifnum_to_if(udev,
+				cur_alt->desc.bInterfaceNumber);
+
+		if (iface->resetting_device) {
+			/*
+			 * The USB core just reset the device, so the xHCI host
+			 * and the device will think alt setting 0 is installed.
+			 * However, the USB core will pass in the alternate
+			 * setting installed before the reset as cur_alt.  Dig
+			 * out the alternate setting 0 structure, or the first
+			 * alternate setting if a broken device doesn't have alt
+			 * setting 0.
+			 */
+			cur_alt = usb_altnum_to_altsetting(iface, 0);
+			if (!cur_alt)
+				cur_alt = &iface->altsetting[0];
+		}
+
 		/* Drop all the endpoints in the current alt setting */
 		for (i = 0; i < cur_alt->desc.bNumEndpoints; i++) {
 			ret = hcd->driver->drop_endpoint(hcd, udev,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b9f5fcd713e2..35cc8b9ba1f5 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3695,19 +3695,14 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 			usb_enable_interface(udev, intf, true);
 			ret = 0;
 		} else {
-			/* We've just reset the device, so it will think alt
-			 * setting 0 is installed.  For usb_set_interface() to
-			 * work properly, we need to set the current alternate
-			 * interface setting to 0 (or the first alt setting, if
-			 * the device doesn't have alt setting 0).
+			/* Let the bandwidth allocation function know that this
+			 * device has been reset, and it will have to use
+			 * alternate setting 0 as the current alternate setting.
 			 */
-			intf->cur_altsetting =
-				usb_find_alt_setting(config, i, 0);
-			if (!intf->cur_altsetting)
-				intf->cur_altsetting =
-					&config->intf_cache[i]->altsetting[0];
+			intf->resetting_device = 1;
 			ret = usb_set_interface(udev, desc->bInterfaceNumber,
 					desc->bAlternateSetting);
+			intf->resetting_device = 0;
 		}
 		if (ret < 0) {
 			dev_err(&udev->dev, "failed to restore interface %d "
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e101a2d04d75..d7ace1b80f09 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -192,6 +192,7 @@ struct usb_interface {
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
 	unsigned needs_binding:1;	/* needs delayed unbind/rebind */
 	unsigned reset_running:1;
+	unsigned resetting_device:1;	/* true: bandwidth alloc after reset */
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-- 
cgit v1.2.3


From 50b926e439620c469565e8be0f28be78f5fca1ce Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 4 Jan 2010 14:44:56 +0100
Subject: sched: Fix vmark regression on big machines

SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't
enabled, leading to many cache misses on large machines as we traverse
looking for an idle shared cache to wake to.  Change the enabler of
select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the
sibling domain level.

Reported-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1262612696.15495.15.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/topology.h | 2 +-
 kernel/sched_fair.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 57e63579bfdd..5b81156780b1 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
-				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 42ac3c9f66f6..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1508,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 			 * If there's an idle sibling in this domain, make that
 			 * the wake_affine target instead of the current cpu.
 			 */
-			if (tmp->flags & SD_PREFER_SIBLING)
+			if (tmp->flags & SD_SHARE_PKG_RESOURCES)
 				target = select_idle_sibling(p, tmp, target);
 
 			if (target >= 0) {
-- 
cgit v1.2.3


From 92b6759857ea3ad19bc6871044e373f6251841d3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 18 Jan 2010 14:02:16 +0100
Subject: perf: Change the is_software_event() definition

The is_software_event() definition always confuses me because its an
exclusive expression, make it an inclusive one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f75eea..8fa71874113f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -814,9 +814,14 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
  */
 static inline int is_software_event(struct perf_event *event)
 {
-	return (event->attr.type != PERF_TYPE_RAW) &&
-		(event->attr.type != PERF_TYPE_HARDWARE) &&
-		(event->attr.type != PERF_TYPE_HW_CACHE);
+	switch (event->attr.type) {
+	case PERF_TYPE_SOFTWARE:
+	case PERF_TYPE_TRACEPOINT:
+	/* for now the breakpoint stuff also works as software event */
+	case PERF_TYPE_BREAKPOINT:
+		return 1;
+	}
+	return 0;
 }
 
 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
-- 
cgit v1.2.3


From e071041be037eca208b62b84469a06bdfc692bea Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 23 Jan 2010 13:37:10 +0000
Subject: netns xfrm: fix "ip xfrm state|policy count" misreport

"ip xfrm state|policy count" report SA/SP count from init_net,
not from netns of caller process.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  4 ++--
 net/xfrm/xfrm_policy.c | 16 ++++++++--------
 net/xfrm/xfrm_state.c  |  6 +++---
 net/xfrm/xfrm_user.c   | 14 ++++++++------
 4 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6d85861ab990..60c27706e7b9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1367,8 +1367,8 @@ struct xfrmk_spdinfo {
 extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info);
-extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si);
-extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si);
+extern void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
+extern void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 extern int xfrm_replay_check(struct xfrm_state *x,
 			     struct sk_buff *skb, __be32 seq);
 extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4725a549ad4d..d2c8cb57ee4c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -469,16 +469,16 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total)
 	return 0;
 }
 
-void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
+void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
 {
 	read_lock_bh(&xfrm_policy_lock);
-	si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN];
-	si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT];
-	si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD];
-	si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
-	si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
-	si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
-	si->spdhcnt = init_net.xfrm.policy_idx_hmask;
+	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
+	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
+	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
+	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
+	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
+	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
+	si->spdhcnt = net->xfrm.policy_idx_hmask;
 	si->spdhmcnt = xfrm_policy_hashmax;
 	read_unlock_bh(&xfrm_policy_lock);
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d847f1a52b44..b36cc344474b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -641,11 +641,11 @@ out:
 }
 EXPORT_SYMBOL(xfrm_state_flush);
 
-void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
+void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 {
 	spin_lock_bh(&xfrm_state_lock);
-	si->sadcnt = init_net.xfrm.state_num;
-	si->sadhcnt = init_net.xfrm.state_hmask;
+	si->sadcnt = net->xfrm.state_num;
+	si->sadhcnt = net->xfrm.state_hmask;
 	si->sadhmcnt = xfrm_state_hashmax;
 	spin_unlock_bh(&xfrm_state_lock);
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1ada6186933c..d5a712976004 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -781,7 +781,8 @@ static inline size_t xfrm_spdinfo_msgsize(void)
 	       + nla_total_size(sizeof(struct xfrmu_spdhinfo));
 }
 
-static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+static int build_spdinfo(struct sk_buff *skb, struct net *net,
+			 u32 pid, u32 seq, u32 flags)
 {
 	struct xfrmk_spdinfo si;
 	struct xfrmu_spdinfo spc;
@@ -795,7 +796,7 @@ static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 
 	f = nlmsg_data(nlh);
 	*f = flags;
-	xfrm_spd_getinfo(&si);
+	xfrm_spd_getinfo(net, &si);
 	spc.incnt = si.incnt;
 	spc.outcnt = si.outcnt;
 	spc.fwdcnt = si.fwdcnt;
@@ -828,7 +829,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_spdinfo(r_skb, spid, seq, *flags) < 0)
+	if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0)
 		BUG();
 
 	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
@@ -841,7 +842,8 @@ static inline size_t xfrm_sadinfo_msgsize(void)
 	       + nla_total_size(4); /* XFRMA_SAD_CNT */
 }
 
-static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+static int build_sadinfo(struct sk_buff *skb, struct net *net,
+			 u32 pid, u32 seq, u32 flags)
 {
 	struct xfrmk_sadinfo si;
 	struct xfrmu_sadhinfo sh;
@@ -854,7 +856,7 @@ static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 
 	f = nlmsg_data(nlh);
 	*f = flags;
-	xfrm_sad_getinfo(&si);
+	xfrm_sad_getinfo(net, &si);
 
 	sh.sadhmcnt = si.sadhmcnt;
 	sh.sadhcnt = si.sadhcnt;
@@ -882,7 +884,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
+	if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0)
 		BUG();
 
 	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
-- 
cgit v1.2.3


From d7c7544c3d5f59033d1bf3236bc7b289f5f26b75 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 24 Jan 2010 22:47:53 -0800
Subject: netns xfrm: deal with dst entries in netns

GC is non-existent in netns, so after you hit GC threshold, no new
dst entries will be created until someone triggers cleanup in init_net.

Make xfrm4_dst_ops and xfrm6_dst_ops per-netns.
This is not done in a generic way, because it woule waste
(AF_MAX - 2) * sizeof(struct dst_ops) bytes per-netns.

Reorder GC threshold initialization so it'd be done before registering
XFRM policies.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  6 +++++
 net/ipv4/xfrm4_policy.c  | 14 +++++++-----
 net/ipv6/xfrm6_policy.c  | 25 +++++++++++---------
 net/xfrm/xfrm_policy.c   | 59 +++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 84 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 56f8e5585df7..74f119a2829a 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -5,6 +5,7 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
+#include <net/dst_ops.h>
 
 struct ctl_table_header;
 
@@ -42,6 +43,11 @@ struct netns_xfrm {
 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
 	struct work_struct	policy_hash_work;
 
+	struct dst_ops		xfrm4_dst_ops;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct dst_ops		xfrm6_dst_ops;
+#endif
+
 	struct sock		*nlsk;
 	struct sock		*nlsk_stash;
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8c08a28d8f83..67107d63c1cd 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,7 +15,6 @@
 #include <net/xfrm.h>
 #include <net/ip.h>
 
-static struct dst_ops xfrm4_dst_ops;
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
 
 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
@@ -190,8 +189,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 {
-	xfrm4_policy_afinfo.garbage_collect(&init_net);
-	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
+	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
+
+	xfrm4_policy_afinfo.garbage_collect(net);
+	return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
 }
 
 static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -268,7 +269,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 static struct ctl_table xfrm4_policy_table[] = {
 	{
 		.procname       = "xfrm4_gc_thresh",
-		.data           = &xfrm4_dst_ops.gc_thresh,
+		.data           = &init_net.xfrm.xfrm4_dst_ops.gc_thresh,
 		.maxlen         = sizeof(int),
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec,
@@ -295,8 +296,6 @@ static void __exit xfrm4_policy_fini(void)
 
 void __init xfrm4_init(int rt_max_size)
 {
-	xfrm4_state_init();
-	xfrm4_policy_init();
 	/*
 	 * Select a default value for the gc_thresh based on the main route
 	 * table hash size.  It seems to me the worst case scenario is when
@@ -308,6 +307,9 @@ void __init xfrm4_init(int rt_max_size)
 	 * and start cleaning when were 1/2 full
 	 */
 	xfrm4_dst_ops.gc_thresh = rt_max_size/2;
+
+	xfrm4_state_init();
+	xfrm4_policy_init();
 #ifdef CONFIG_SYSCTL
 	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
 						xfrm4_policy_table);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 7254e3f899a7..dbdc696f5fc5 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -24,7 +24,6 @@
 #include <net/mip6.h>
 #endif
 
-static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
 
 static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
@@ -224,8 +223,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 {
-	xfrm6_policy_afinfo.garbage_collect(&init_net);
-	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
+	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
+
+	xfrm6_policy_afinfo.garbage_collect(net);
+	return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
 }
 
 static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -310,7 +311,7 @@ static void xfrm6_policy_fini(void)
 static struct ctl_table xfrm6_policy_table[] = {
 	{
 		.procname       = "xfrm6_gc_thresh",
-		.data	   	= &xfrm6_dst_ops.gc_thresh,
+		.data	   	= &init_net.xfrm.xfrm6_dst_ops.gc_thresh,
 		.maxlen	 	= sizeof(int),
 		.mode	   	= 0644,
 		.proc_handler   = proc_dointvec,
@@ -326,13 +327,6 @@ int __init xfrm6_init(void)
 	int ret;
 	unsigned int gc_thresh;
 
-	ret = xfrm6_policy_init();
-	if (ret)
-		goto out;
-
-	ret = xfrm6_state_init();
-	if (ret)
-		goto out_policy;
 	/*
 	 * We need a good default value for the xfrm6 gc threshold.
 	 * In ipv4 we set it to the route hash table size * 8, which
@@ -346,6 +340,15 @@ int __init xfrm6_init(void)
 	 */
 	gc_thresh = FIB6_TABLE_HASHSZ * 8;
 	xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+
+	ret = xfrm6_policy_init();
+	if (ret)
+		goto out;
+
+	ret = xfrm6_state_init();
+	if (ret)
+		goto out_policy;
+
 #ifdef CONFIG_SYSCTL
 	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path,
 						xfrm6_policy_table);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d2c8cb57ee4c..0ecb16a9a883 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1309,15 +1309,28 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
 	return tos;
 }
 
-static inline struct xfrm_dst *xfrm_alloc_dst(int family)
+static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct dst_ops *dst_ops;
 	struct xfrm_dst *xdst;
 
 	if (!afinfo)
 		return ERR_PTR(-EINVAL);
 
-	xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
+	switch (family) {
+	case AF_INET:
+		dst_ops = &net->xfrm.xfrm4_dst_ops;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		dst_ops = &net->xfrm.xfrm6_dst_ops;
+		break;
+#endif
+	default:
+		BUG();
+	}
+	xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
 
 	xfrm_policy_put_afinfo(afinfo);
 
@@ -1366,6 +1379,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 					    struct flowi *fl,
 					    struct dst_entry *dst)
 {
+	struct net *net = xp_net(policy);
 	unsigned long now = jiffies;
 	struct net_device *dev;
 	struct dst_entry *dst_prev = NULL;
@@ -1389,7 +1403,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	dst_hold(dst);
 
 	for (; i < nx; i++) {
-		struct xfrm_dst *xdst = xfrm_alloc_dst(family);
+		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
 		struct dst_entry *dst1 = &xdst->u.dst;
 
 		err = PTR_ERR(xdst);
@@ -2279,6 +2293,7 @@ EXPORT_SYMBOL(xfrm_bundle_ok);
 
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
+	struct net *net;
 	int err = 0;
 	if (unlikely(afinfo == NULL))
 		return -EINVAL;
@@ -2302,6 +2317,27 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 		xfrm_policy_afinfo[afinfo->family] = afinfo;
 	}
 	write_unlock_bh(&xfrm_policy_afinfo_lock);
+
+	rtnl_lock();
+	for_each_net(net) {
+		struct dst_ops *xfrm_dst_ops;
+
+		switch (afinfo->family) {
+		case AF_INET:
+			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
+			break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		case AF_INET6:
+			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
+			break;
+#endif
+		default:
+			BUG();
+		}
+		*xfrm_dst_ops = *afinfo->dst_ops;
+	}
+	rtnl_unlock();
+
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2332,6 +2368,22 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
 
+static void __net_init xfrm_dst_ops_init(struct net *net)
+{
+	struct xfrm_policy_afinfo *afinfo;
+
+	read_lock_bh(&xfrm_policy_afinfo_lock);
+	afinfo = xfrm_policy_afinfo[AF_INET];
+	if (afinfo)
+		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	afinfo = xfrm_policy_afinfo[AF_INET6];
+	if (afinfo)
+		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
+#endif
+	read_unlock_bh(&xfrm_policy_afinfo_lock);
+}
+
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
 {
 	struct xfrm_policy_afinfo *afinfo;
@@ -2494,6 +2546,7 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_policy_init(net);
 	if (rv < 0)
 		goto out_policy;
+	xfrm_dst_ops_init(net);
 	rv = xfrm_sysctl_init(net);
 	if (rv < 0)
 		goto out_sysctl;
-- 
cgit v1.2.3


From cb289d6244a37cf932c571d6deb0daa8030f931b Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Wed, 13 Jan 2010 09:34:36 -0800
Subject: eventfd - allow atomic read and waitqueue remove

KVM needs a wait to atomically remove themselves from the eventfd ->poll()
wait queue head, in order to handle correctly their IRQfd deassign
operation.

This patch introduces such API, plus a way to read an eventfd from its
context.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 fs/eventfd.c            | 89 ++++++++++++++++++++++++++++++++++++++++---------
 include/linux/eventfd.h | 16 +++++++++
 2 files changed, 90 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06ea..7758cc382ef0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+{
+	*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+	ctx->count -= *cnt;
+}
+
+/**
+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
+ * @ctx: [in] Pointer to eventfd context.
+ * @wait: [in] Wait queue to be removed.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked.
+ *
+ * This is used to atomically remove a wait queue entry from the eventfd wait
+ * queue head, and read/reset the counter value.
+ */
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+				  __u64 *cnt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->wqh.lock, flags);
+	eventfd_ctx_do_read(ctx, cnt);
+	__remove_wait_queue(&ctx->wqh, wait);
+	if (*cnt != 0 && waitqueue_active(&ctx->wqh))
+		wake_up_locked_poll(&ctx->wqh, POLLOUT);
+	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+
+	return *cnt != 0 ? 0 : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
+
+/**
+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
+ * @ctx: [in] Pointer to eventfd context.
+ * @no_wait: [in] Different from zero if the operation should not block.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked but @no_wait was nonzero.
+ * -ERESTARTSYS : A signal interrupted the wait operation.
+ *
+ * If @no_wait is zero, the function might sleep until the eventfd internal
+ * counter becomes greater than zero.
+ */
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
 {
-	struct eventfd_ctx *ctx = file->private_data;
 	ssize_t res;
-	__u64 ucnt = 0;
 	DECLARE_WAITQUEUE(wait, current);
 
-	if (count < sizeof(ucnt))
-		return -EINVAL;
 	spin_lock_irq(&ctx->wqh.lock);
+	*cnt = 0;
 	res = -EAGAIN;
 	if (ctx->count > 0)
-		res = sizeof(ucnt);
-	else if (!(file->f_flags & O_NONBLOCK)) {
+		res = 0;
+	else if (!no_wait) {
 		__add_wait_queue(&ctx->wqh, &wait);
-		for (res = 0;;) {
+		for (;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (ctx->count > 0) {
-				res = sizeof(ucnt);
+				res = 0;
 				break;
 			}
 			if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 		__remove_wait_queue(&ctx->wqh, &wait);
 		__set_current_state(TASK_RUNNING);
 	}
-	if (likely(res > 0)) {
-		ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
-		ctx->count -= ucnt;
+	if (likely(res == 0)) {
+		eventfd_ctx_do_read(ctx, cnt);
 		if (waitqueue_active(&ctx->wqh))
 			wake_up_locked_poll(&ctx->wqh, POLLOUT);
 	}
 	spin_unlock_irq(&ctx->wqh.lock);
-	if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
-		return -EFAULT;
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
+
+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+			    loff_t *ppos)
+{
+	struct eventfd_ctx *ctx = file->private_data;
+	ssize_t res;
+	__u64 cnt;
+
+	if (count < sizeof(cnt))
+		return -EINVAL;
+	res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
+	if (res < 0)
+		return res;
+
+	return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
+}
 
 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
 			     loff_t *ppos)
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 94dd10366a78..91bb4f27238c 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,7 @@
 
 #include <linux/fcntl.h>
 #include <linux/file.h>
+#include <linux/wait.h>
 
 /*
  * CAREFUL: Check include/asm-generic/fcntl.h when defining
@@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 int eventfd_signal(struct eventfd_ctx *ctx, int n);
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+				  __u64 *cnt);
 
 #else /* CONFIG_EVENTFD */
 
@@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
 
 }
 
+static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
+				       __u64 *cnt)
+{
+	return -ENOSYS;
+}
+
+static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
+						wait_queue_t *wait, __u64 *cnt)
+{
+	return -ENOSYS;
+}
+
 #endif
 
 #endif /* _LINUX_EVENTFD_H */
-- 
cgit v1.2.3


From 6d3faf6f431bafb25f4b9926c50a7e5c267738c6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 24 Jan 2010 14:48:00 +0100
Subject: firewire: cdev: add_descriptor documentation fix

struct fw_cdev_add_descriptor.length is in quadlets, not in bytes.
Also remove any doubts about the endianess of descriptor data.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 1f716d9f714b..520ecf86cbb3 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -380,7 +380,7 @@ struct fw_cdev_initiate_bus_reset {
  * @immediate:	If non-zero, immediate key to insert before pointer
  * @key:	Upper 8 bits of root directory pointer
  * @data:	Userspace pointer to contents of descriptor block
- * @length:	Length of descriptor block data, in bytes
+ * @length:	Length of descriptor block data, in quadlets
  * @handle:	Handle to the descriptor, written by the kernel
  *
  * Add a descriptor block and optionally a preceding immediate key to the local
@@ -394,6 +394,8 @@ struct fw_cdev_initiate_bus_reset {
  * If not 0, the @immediate field specifies an immediate key which will be
  * inserted before the root directory pointer.
  *
+ * @immediate, @key, and @data array elements are CPU-endian quadlets.
+ *
  * If successful, the kernel adds the descriptor and writes back a handle to the
  * kernel-side object to be used for later removal of the descriptor block and
  * immediate key.
-- 
cgit v1.2.3


From 0531b2aac59c2296570ac52bfc032ef2ace7d5e1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Jan 2010 09:20:03 -0800
Subject: mm: add new 'read_cache_page_gfp()' helper function

It's a simplified 'read_cache_page()' which takes a page allocation
flag, so that different paths can control how aggressive the memory
allocations are that populate a address space.

In particular, the intel GPU object mapping code wants to be able to do
a certain amount of own internal memory management by automatically
shrinking the address space when memory starts getting tight.  This
allows it to dynamically use different memory allocation policies on a
per-allocation basis, rather than depend on the (static) address space
gfp policy.

The actual new function is a one-liner, but re-organizing the helper
functions to the point where you can do this with a single line of code
is what most of the patch is all about.

Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |   2 +
 mm/filemap.c            | 100 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 70 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ed5d7501e181..3c62ed408492 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -253,6 +253,8 @@ extern struct page * read_cache_page_async(struct address_space *mapping,
 extern struct page * read_cache_page(struct address_space *mapping,
 				pgoff_t index, filler_t *filler,
 				void *data);
+extern struct page * read_cache_page_gfp(struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
 extern int read_cache_pages(struct address_space *mapping,
 		struct list_head *pages, filler_t *filler, void *data);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0eb6cb..e3736923220e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
 static struct page *__read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *,struct page*),
-				void *data)
+				void *data,
+				gfp_t gfp)
 {
 	struct page *page;
 	int err;
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = page_cache_alloc_cold(mapping);
+		page = __page_cache_alloc(gfp | __GFP_COLD);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
@@ -1661,31 +1662,18 @@ repeat:
 	return page;
 }
 
-/**
- * read_cache_page_async - read into page cache, fill it if needed
- * @mapping:	the page's address_space
- * @index:	the page index
- * @filler:	function to perform the read
- * @data:	destination for read data
- *
- * Same as read_cache_page, but don't wait for page to become unlocked
- * after submitting it to the filler.
- *
- * Read into the page cache. If a page already exists, and PageUptodate() is
- * not set, try to fill the page but don't wait for it to become unlocked.
- *
- * If the page does not get brought uptodate, return -EIO.
- */
-struct page *read_cache_page_async(struct address_space *mapping,
+static struct page *do_read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *,struct page*),
-				void *data)
+				void *data,
+				gfp_t gfp)
+
 {
 	struct page *page;
 	int err;
 
 retry:
-	page = __read_cache_page(mapping, index, filler, data);
+	page = __read_cache_page(mapping, index, filler, data, gfp);
 	if (IS_ERR(page))
 		return page;
 	if (PageUptodate(page))
@@ -1710,8 +1698,67 @@ out:
 	mark_page_accessed(page);
 	return page;
 }
+
+/**
+ * read_cache_page_async - read into page cache, fill it if needed
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @filler:	function to perform the read
+ * @data:	destination for read data
+ *
+ * Same as read_cache_page, but don't wait for page to become unlocked
+ * after submitting it to the filler.
+ *
+ * Read into the page cache. If a page already exists, and PageUptodate() is
+ * not set, try to fill the page but don't wait for it to become unlocked.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_async(struct address_space *mapping,
+				pgoff_t index,
+				int (*filler)(void *,struct page*),
+				void *data)
+{
+	return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
+}
 EXPORT_SYMBOL(read_cache_page_async);
 
+static struct page *wait_on_page_read(struct page *page)
+{
+	if (!IS_ERR(page)) {
+		wait_on_page_locked(page);
+		if (!PageUptodate(page)) {
+			page_cache_release(page);
+			page = ERR_PTR(-EIO);
+		}
+	}
+	return page;
+}
+
+/**
+ * read_cache_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @gfp:	the page allocator flags to use if allocating
+ *
+ * This is the same as "read_mapping_page(mapping, index, NULL)", but with
+ * any new page allocations done using the specified allocation flags. Note
+ * that the Radix tree operations will still use GFP_KERNEL, so you can't
+ * expect to do this atomically or anything like that - but you can pass in
+ * other page requirements.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_gfp(struct address_space *mapping,
+				pgoff_t index,
+				gfp_t gfp)
+{
+	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+
+	return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
+}
+EXPORT_SYMBOL(read_cache_page_gfp);
+
 /**
  * read_cache_page - read into page cache, fill it if needed
  * @mapping:	the page's address_space
@@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping,
 				int (*filler)(void *,struct page*),
 				void *data)
 {
-	struct page *page;
-
-	page = read_cache_page_async(mapping, index, filler, data);
-	if (IS_ERR(page))
-		goto out;
-	wait_on_page_locked(page);
-	if (!PageUptodate(page)) {
-		page_cache_release(page);
-		page = ERR_PTR(-EIO);
-	}
- out:
-	return page;
+	return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
 }
 EXPORT_SYMBOL(read_cache_page);
 
-- 
cgit v1.2.3


From bb209c8287d2d55ec4a67e3933346e0a3ee0da76 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 26 Jan 2010 17:10:03 +0000
Subject: powerpc/pci: Add calls to set_pcie_port_type() and
 set_pcie_hotplug_bridge()

We are missing these when building the pci_dev from scratch off
the Open Firmware device-tree

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci_of_scan.c | 2 ++
 drivers/pci/probe.c               | 4 ++--
 include/linux/pci.h               | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 7311fdfb9bf8..693eb9a25bfa 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -140,6 +140,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	dev->devfn = devfn;
 	dev->multifunction = 0;		/* maybe a lie? */
 	dev->needs_freset = 0;		/* pcie fundamental reset required */
+	set_pcie_port_type(dev);
 
 	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
 	dev->device = get_int_prop(node, "device-id", 0xffff);
@@ -164,6 +165,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 		/* a PCI-PCI bridge */
 		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
 		dev->rom_base_reg = PCI_ROM_ADDRESS1;
+		set_pcie_hotplug_bridge(dev);
 	} else if (!strcmp(type, "cardbus")) {
 		dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
 	} else {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 98ffb2de22e9..446e4a94d7d3 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -681,7 +681,7 @@ static void pci_read_irq(struct pci_dev *dev)
 	dev->irq = irq;
 }
 
-static void set_pcie_port_type(struct pci_dev *pdev)
+void set_pcie_port_type(struct pci_dev *pdev)
 {
 	int pos;
 	u16 reg16;
@@ -695,7 +695,7 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
-static void set_pcie_hotplug_bridge(struct pci_dev *pdev)
+void set_pcie_hotplug_bridge(struct pci_dev *pdev)
 {
 	int pos;
 	u16 reg16;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 174e5392e51e..c1968f464c38 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -756,6 +756,10 @@ pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 
+/* For use by arch with custom probe code */
+void set_pcie_port_type(struct pci_dev *pdev);
+void set_pcie_hotplug_bridge(struct pci_dev *pdev);
+
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 #ifdef CONFIG_HOTPLUG
-- 
cgit v1.2.3


From cb6ecf6f7afece066265e243657b0ac28150a7b2 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Thu, 28 Jan 2010 22:28:27 -0800
Subject: Input: add the ABS_MT_PRESSURE event

For pressure-based multi-touch devices, a direct way to send sensor
intensity data per finger is needed. This patch adds the ABS_MT_PRESSURE
event to the MT protocol.

Requested-by: Yoonyoung Shim <jy0922.shim@samsung.com>
Requested-by: Mika Kuoppala <mika.kuoppala@nokia.com>
Requested-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 1 +
 include/linux/input.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 30b503b8d67b..86cb2d2196ff 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -46,6 +46,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = {
 	ABS_MT_TOOL_TYPE,
 	ABS_MT_BLOB_ID,
 	ABS_MT_TRACKING_ID,
+	ABS_MT_PRESSURE,
 	0
 };
 static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)];
diff --git a/include/linux/input.h b/include/linux/input.h
index 7be8a6537b57..735ceaf1bc2d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -660,6 +660,7 @@ struct input_absinfo {
 #define ABS_MT_TOOL_TYPE	0x37	/* Type of touching device */
 #define ABS_MT_BLOB_ID		0x38	/* Group a set of packets as a blob */
 #define ABS_MT_TRACKING_ID	0x39	/* Unique ID of initiated contact */
+#define ABS_MT_PRESSURE		0x3a	/* Pressure on contact area */
 
 #define ABS_MAX			0x3f
 #define ABS_CNT			(ABS_MAX+1)
-- 
cgit v1.2.3


From 221af7f87b97431e3ee21ce4b0e77d5411cf1549 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 28 Jan 2010 22:14:42 -0800
Subject: Split 'flush_old_exec' into two functions

'flush_old_exec()' is the point of no return when doing an execve(), and
it is pretty badly misnamed.  It doesn't just flush the old executable
environment, it also starts up the new one.

Which is very inconvenient for things like setting up the new
personality, because we want the new personality to affect the starting
of the new environment, but at the same time we do _not_ want the new
personality to take effect if flushing the old one fails.

As a result, the x86-64 '32-bit' personality is actually done using this
insane "I'm going to change the ABI, but I haven't done it yet" bit
(TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the
personality, but just the "pending" bit, so that "flush_thread()" can do
the actual personality magic.

This patch in no way changes any of that insanity, but it does split the
'flush_old_exec()' function up into a preparatory part that can fail
(still called flush_old_exec()), and a new part that will actually set
up the new exec environment (setup_new_exec()).  All callers are changed
to trivially comply with the new world order.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/kernel/process_64.c |  2 +-
 arch/x86/ia32/ia32_aout.c   | 10 ++++++----
 fs/binfmt_aout.c            |  1 +
 fs/binfmt_elf.c             | 27 ++-------------------------
 fs/binfmt_elf_fdpic.c       |  3 +++
 fs/binfmt_flat.c            |  1 +
 fs/binfmt_som.c             |  1 +
 fs/exec.c                   | 26 ++++++++++++++++----------
 include/linux/binfmts.h     |  1 +
 include/linux/sched.h       |  2 +-
 10 files changed, 33 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 31f80c61b031..ec79faf6f021 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -368,7 +368,7 @@ void exit_thread(void)
 void flush_thread(void)
 {
 
-	/* Called by fs/exec.c (flush_old_exec) to remove traces of a
+	/* Called by fs/exec.c (setup_new_exec) to remove traces of a
 	 * previously running executable. */
 #ifdef CONFIG_SH_FPU
 	if (last_task_used_math == current) {
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 2a4d073d2cf1..435d2a5323da 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -308,15 +308,17 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	if (retval)
 		return retval;
 
-	regs->cs = __USER32_CS;
-	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
-		regs->r13 = regs->r14 = regs->r15 = 0;
-
 	/* OK, This is the point of no return */
 	set_personality(PER_LINUX);
 	set_thread_flag(TIF_IA32);
 	clear_thread_flag(TIF_ABI_PENDING);
 
+	setup_new_exec(bprm);
+
+	regs->cs = __USER32_CS;
+	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
+		regs->r13 = regs->r14 = regs->r15 = 0;
+
 	current->mm->end_code = ex.a_text +
 		(current->mm->start_code = N_TXTADDR(ex));
 	current->mm->end_data = ex.a_data +
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 346b69405363..fdd397099172 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 #else
 	set_personality(PER_LINUX);
 #endif
+	setup_new_exec(bprm);
 
 	current->mm->end_code = ex.a_text +
 		(current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index edd90c49003c..fd5b2ea5d299 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 				goto out_free_interp;
 
-			/*
-			 * The early SET_PERSONALITY here is so that the lookup
-			 * for the interpreter happens in the namespace of the 
-			 * to-be-execed image.  SET_PERSONALITY can select an
-			 * alternate root.
-			 *
-			 * However, SET_PERSONALITY is NOT allowed to switch
-			 * this task into the new images's memory mapping
-			 * policy - that is, TASK_SIZE must still evaluate to
-			 * that which is appropriate to the execing application.
-			 * This is because exit_mmap() needs to have TASK_SIZE
-			 * evaluate to the size of the old image.
-			 *
-			 * So if (say) a 64-bit application is execing a 32-bit
-			 * application it is the architecture's responsibility
-			 * to defer changing the value of TASK_SIZE until the
-			 * switch really is going to happen - do this in
-			 * flush_thread().	- akpm
-			 */
-			SET_PERSONALITY(loc->elf_ex);
-
 			interpreter = open_exec(elf_interpreter);
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		/* Verify the interpreter has a valid arch */
 		if (!elf_check_arch(&loc->interp_elf_ex))
 			goto out_free_dentry;
-	} else {
-		/* Executables without an interpreter also need a personality  */
-		SET_PERSONALITY(loc->elf_ex);
 	}
 
 	/* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		current->flags |= PF_RANDOMIZE;
-	arch_pick_mmap_layout(current->mm);
+
+	setup_new_exec(bprm);
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c57d9ce5ff7e..18d77297ccc8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	set_personality(PER_LINUX_FDPIC);
 	if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
+
+	setup_new_exec(bprm);
+
 	set_binfmt(&elf_fdpic_format);
 
 	current->mm->start_code = 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d4a00ea1054c..42c6b4a54445 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 		/* OK, This is the point of no return */
 		set_personality(PER_LINUX_32BIT);
+		setup_new_exec(bprm);
 	}
 
 	/*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 2a9b5330cc5e..cc8560f6c9b0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	/* OK, This is the point of no return */
 	current->flags &= ~PF_FORKNOEXEC;
 	current->personality = PER_HPUX;
+	setup_new_exec(bprm);
 
 	/* Set the task size for HP-UX processes such that
 	 * the gateway page is outside the address space.
diff --git a/fs/exec.c b/fs/exec.c
index 632b02e34ec7..675c3f44c2ea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 
 int flush_old_exec(struct linux_binprm * bprm)
 {
-	char * name;
-	int i, ch, retval;
-	char tcomm[sizeof(current->comm)];
+	int retval;
 
 	/*
 	 * Make sure we have a private signal table and that
@@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm)
 		goto out;
 
 	bprm->mm = NULL;		/* We're using it now */
+	return 0;
+
+out:
+	return retval;
+}
+EXPORT_SYMBOL(flush_old_exec);
+
+void setup_new_exec(struct linux_binprm * bprm)
+{
+	int i, ch;
+	char * name;
+	char tcomm[sizeof(current->comm)];
+
+	arch_pick_mmap_layout(current->mm);
 
 	/* This is the point of no return */
 	current->sas_ss_sp = current->sas_ss_size = 0;
@@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm)
 			
 	flush_signal_handlers(current, 0);
 	flush_old_files(current->files);
-
-	return 0;
-
-out:
-	return retval;
 }
-
-EXPORT_SYMBOL(flush_old_exec);
+EXPORT_SYMBOL(setup_new_exec);
 
 /*
  * Prepare credentials and lock ->cred_guard_mutex.
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index cd4349bdc34e..89c6249fc561 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -109,6 +109,7 @@ extern int prepare_binprm(struct linux_binprm *);
 extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
 extern int flush_old_exec(struct linux_binprm * bprm);
+extern void setup_new_exec(struct linux_binprm * bprm);
 
 extern int suid_dumpable;
 #define SUID_DUMP_DISABLE	0	/* No setuid dumping */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..abdfacc58653 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1369,7 +1369,7 @@ struct task_struct {
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
 				       it with task_lock())
-				     - initialized normally by flush_old_exec */
+				     - initialized normally by setup_new_exec */
 /* file system info */
 	int link_count, total_link_count;
 #ifdef CONFIG_SYSVIPC
-- 
cgit v1.2.3


From 5352ae638e2d7d5c9b2e4d528676bbf2af6fd6f3 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 28 Jan 2010 17:04:43 -0600
Subject: perf, hw_breakpoint, kgdb: Do not take mutex for kernel debugger

This patch fixes the regression in functionality where the
kernel debugger and the perf API do not nicely share hw
breakpoint reservations.

The kernel debugger cannot use any mutex_lock() calls because it
can start the kernel running from an invalid context.

A mutex free version of the reservation API needed to get
created for the kernel debugger to safely update hw breakpoint
reservations.

The possibility for a breakpoint reservation to be concurrently
processed at the time that kgdb interrupts the system is
improbable. Should this corner case occur the end user is
warned, and the kernel debugger will prohibit updating the
hardware breakpoint reservations.

Any time the kernel debugger reserves a hardware breakpoint it
will be a system wide reservation.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: torvalds@linux-foundation.org
LKML-Reference: <1264719883-7285-3-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kgdb.c        | 51 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/hw_breakpoint.h |  2 ++
 kernel/hw_breakpoint.c        | 52 ++++++++++++++++++++++++++++++++++---------
 3 files changed, 95 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 62bea7307eaa..bfba6019d762 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -239,6 +239,49 @@ static void kgdb_correct_hw_break(void)
 	hw_breakpoint_restore();
 }
 
+static int hw_break_reserve_slot(int breakno)
+{
+	int cpu;
+	int cnt = 0;
+	struct perf_event **pevent;
+
+	for_each_online_cpu(cpu) {
+		cnt++;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_reserve_bp_slot(*pevent))
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	for_each_online_cpu(cpu) {
+		cnt--;
+		if (!cnt)
+			break;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		dbg_release_bp_slot(*pevent);
+	}
+	return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+	struct perf_event **pevent;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_release_bp_slot(*pevent))
+			/*
+			 * The debugger is responisble for handing the retry on
+			 * remove failure.
+			 */
+			return -1;
+	}
+	return 0;
+}
+
 static int
 kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 {
@@ -250,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 	if (i == 4)
 		return -1;
 
+	if (hw_break_release_slot(i)) {
+		printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr);
+		return -1;
+	}
 	breakinfo[i].enabled = 0;
 
 	return 0;
@@ -316,6 +363,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 		return -1;
 	}
 	breakinfo[i].addr = addr;
+	if (hw_break_reserve_slot(i)) {
+		breakinfo[i].addr = 0;
+		return -1;
+	}
 	breakinfo[i].enabled = 1;
 
 	return 0;
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 41235c93e4e9..070ba0621738 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -75,6 +75,8 @@ extern int __register_perf_hw_breakpoint(struct perf_event *bp);
 extern void unregister_hw_breakpoint(struct perf_event *bp);
 extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
 
+extern int dbg_reserve_bp_slot(struct perf_event *bp);
+extern int dbg_release_bp_slot(struct perf_event *bp);
 extern int reserve_bp_slot(struct perf_event *bp);
 extern void release_bp_slot(struct perf_event *bp);
 
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c030ae657f20..8a5c7d55ac9f 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
  *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
  *            + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
  */
-int reserve_bp_slot(struct perf_event *bp)
+static int __reserve_bp_slot(struct perf_event *bp)
 {
 	struct bp_busy_slots slots = {0};
-	int ret = 0;
-
-	mutex_lock(&nr_bp_mutex);
 
 	fetch_bp_busy_slots(&slots, bp);
 
 	/* Flexible counters need to keep at least one slot */
-	if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
-		ret = -ENOSPC;
-		goto end;
-	}
+	if (slots.pinned + (!!slots.flexible) == HBP_NUM)
+		return -ENOSPC;
 
 	toggle_bp_slot(bp, true);
 
-end:
+	return 0;
+}
+
+int reserve_bp_slot(struct perf_event *bp)
+{
+	int ret;
+
+	mutex_lock(&nr_bp_mutex);
+
+	ret = __reserve_bp_slot(bp);
+
 	mutex_unlock(&nr_bp_mutex);
 
 	return ret;
 }
 
+static void __release_bp_slot(struct perf_event *bp)
+{
+	toggle_bp_slot(bp, false);
+}
+
 void release_bp_slot(struct perf_event *bp)
 {
 	mutex_lock(&nr_bp_mutex);
 
-	toggle_bp_slot(bp, false);
+	__release_bp_slot(bp);
 
 	mutex_unlock(&nr_bp_mutex);
 }
 
+/*
+ * Allow the kernel debugger to reserve breakpoint slots without
+ * taking a lock using the dbg_* variant of for the reserve and
+ * release breakpoint slots.
+ */
+int dbg_reserve_bp_slot(struct perf_event *bp)
+{
+	if (mutex_is_locked(&nr_bp_mutex))
+		return -1;
+
+	return __reserve_bp_slot(bp);
+}
+
+int dbg_release_bp_slot(struct perf_event *bp)
+{
+	if (mutex_is_locked(&nr_bp_mutex))
+		return -1;
+
+	__release_bp_slot(bp);
+
+	return 0;
+}
 
 int register_perf_hw_breakpoint(struct perf_event *bp)
 {
-- 
cgit v1.2.3


From fa5829b36539067f3c675f5d437531dedcfc4ad8 Mon Sep 17 00:00:00 2001
From: Marcin Kościelnicki <koriakin@0x04.net>
Date: Sat, 23 Jan 2010 10:25:28 +1000
Subject: drm/kms: Remove incorrect comment in struct drm_mode_modeinfo

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_mode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index bc4fdf27bd2e..c5ba1636613c 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -85,7 +85,7 @@ struct drm_mode_modeinfo {
 	__u16 hdisplay, hsync_start, hsync_end, htotal, hskew;
 	__u16 vdisplay, vsync_start, vsync_end, vtotal, vscan;
 
-	__u32 vrefresh; /* vertical refresh * 1000 */
+	__u32 vrefresh;
 
 	__u32 flags;
 	__u32 type;
-- 
cgit v1.2.3


From d6ad3e286d2c075a60b9f11075a2c55aeeeca2ad Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 27 Jan 2010 16:25:22 -0600
Subject: softlockup: Add sched_clock_tick() to avoid kernel warning on kgdb
 resume

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, sched_clock() gets
the time from hardware such as the TSC on x86. In this
configuration kgdb will report a softlock warning message on
resuming or detaching from a debug session.

Sequence of events in the problem case:

 1) "cpu sched clock" and "hardware time" are at 100 sec prior
    to a call to kgdb_handle_exception()

 2) Debugger waits in kgdb_handle_exception() for 80 sec and on
    exit the following is called ...  touch_softlockup_watchdog() -->
    __raw_get_cpu_var(touch_timestamp) = 0;

 3) "cpu sched clock" = 100s (it was not updated, because the
    interrupt was disabled in kgdb) but the "hardware time" = 180 sec

 4) The first timer interrupt after resuming from
    kgdb_handle_exception updates the watchdog from the "cpu sched clock"

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> check (touch_timestamp == 0) (it is "YES"
here, we have set "touch_timestamp = 0" at kgdb) -->
__touch_softlockup_watchdog() ***(A)--> reset "touch_timestamp"
to "get_timestamp()" (Here, the "touch_timestamp" will still be
set to 100s.)  ...

    scheduler_tick() ***(B)--> sched_clock_tick() (update "cpu sched
    clock" to "hardware time" = 180s) ...  }

 5) The Second timer interrupt handler appears to have a large
    jump and trips the softlockup warning.

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> "cpu sched clock" - "touch_timestamp" =
180s-100s > 60s --> printk "soft lockup error messages" ...  }

note: ***(A) reset "touch_timestamp" to
"get_timestamp(this_cpu)"

Why is "touch_timestamp" 100 sec, instead of 180 sec?

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, the call trace of
get_timestamp() is:

get_timestamp(this_cpu)
 -->cpu_clock(this_cpu)
 -->sched_clock_cpu(this_cpu)
 -->__update_sched_clock(sched_clock_data, now)

The __update_sched_clock() function uses the GTOD tick value to
create a window to normalize the "now" values.  So if "now"
value is too big for sched_clock_data, it will be ignored.

The fix is to invoke sched_clock_tick() to update "cpu sched
clock" in order to recover from this state.  This is done by
introducing the function touch_softlockup_watchdog_sync(). This
allows kgdb to request that the sched clock is updated when the
watchdog thread runs the first time after a resume from kgdb.

[yong.zhang0@gmail.com: Use per cpu instead of an array]
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Dongdong Deng <Dongdong.Deng@windriver.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: peterz@infradead.org
LKML-Reference: <1264631124-4837-2-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 ++++
 kernel/kgdb.c         |  6 +++---
 kernel/softlockup.c   | 15 +++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..89232151a9d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -310,6 +310,7 @@ extern void sched_show_task(struct task_struct *p);
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
+extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
 				    void __user *buffer,
@@ -323,6 +324,9 @@ static inline void softlockup_tick(void)
 static inline void touch_softlockup_watchdog(void)
 {
 }
+static inline void touch_softlockup_watchdog_sync(void)
+{
+}
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..87f2cc557553 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -596,7 +596,7 @@ static void kgdb_wait(struct pt_regs *regs)
 
 	/* Signal the primary CPU that we are done: */
 	atomic_set(&cpu_in_kgdb[cpu], 0);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 }
@@ -1450,7 +1450,7 @@ acquirelock:
 	    (kgdb_info[cpu].task &&
 	     kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
 		atomic_set(&kgdb_active, -1);
-		touch_softlockup_watchdog();
+		touch_softlockup_watchdog_sync();
 		clocksource_touch_watchdog();
 		local_irq_restore(flags);
 
@@ -1550,7 +1550,7 @@ kgdb_restore:
 	}
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
 static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
 static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(bool, softlock_touch_sync);
 
 static int __read_mostly did_panic;
 int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
+void touch_softlockup_watchdog_sync(void)
+{
+	__raw_get_cpu_var(softlock_touch_sync) = true;
+	__raw_get_cpu_var(softlockup_touch_ts) = 0;
+}
+
 void touch_all_softlockup_watchdogs(void)
 {
 	int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
 	}
 
 	if (touch_ts == 0) {
+		if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
+			/*
+			 * If the time stamp was touched atomically
+			 * make sure the scheduler tick is up to date.
+			 */
+			per_cpu(softlock_touch_sync, this_cpu) = false;
+			sched_clock_tick();
+		}
 		__touch_softlockup_watchdog();
 		return;
 	}
-- 
cgit v1.2.3


From f98bfbd78c37c5946cc53089da32a5f741efdeb7 Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Tue, 2 Feb 2010 15:58:48 -0800
Subject: connector: Delete buggy notification code.

On Tue, Feb 02, 2010 at 02:57:14PM -0800, Greg KH (gregkh@suse.de) wrote:
> > There are at least two ways to fix it: using a big cannon and a small
> > one. The former way is to disable notification registration, since it is
> > not used by anyone at all. Second way is to check whether calling
> > process is root and its destination group is -1 (kind of priveledged
> > one) before command is dispatched to workqueue.
>
> Well if no one is using it, removing it makes the most sense, right?
>
> No objection from me, care to make up a patch either way for this?

Getting it is not used, let's drop support for notifications about
(un)registered events from connector.
Another option was to check credentials on receiving, but we can always
restore it without bugs if needed, but genetlink has a wider code base
and none complained, that userspace can not get notification when some
other clients were (un)registered.

Kudos for Sebastian Krahmer <krahmer@suse.de>, who found a bug in the
code.

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c | 175 ------------------------------------------
 include/linux/connector.h     |  32 --------
 2 files changed, 207 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index f06024668f99..537c29ac4487 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -36,17 +36,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Generic userspace <-> kernelspace connector.");
 
-static u32 cn_idx = CN_IDX_CONNECTOR;
-static u32 cn_val = CN_VAL_CONNECTOR;
-
-module_param(cn_idx, uint, 0);
-module_param(cn_val, uint, 0);
-MODULE_PARM_DESC(cn_idx, "Connector's main device idx.");
-MODULE_PARM_DESC(cn_val, "Connector's main device val.");
-
-static DEFINE_MUTEX(notify_lock);
-static LIST_HEAD(notify_list);
-
 static struct cn_dev cdev;
 
 static int cn_already_initialized;
@@ -209,54 +198,6 @@ static void cn_rx_skb(struct sk_buff *__skb)
 	}
 }
 
-/*
- * Notification routing.
- *
- * Gets id and checks if there are notification request for it's idx
- * and val.  If there are such requests notify the listeners with the
- * given notify event.
- *
- */
-static void cn_notify(struct cb_id *id, u32 notify_event)
-{
-	struct cn_ctl_entry *ent;
-
-	mutex_lock(&notify_lock);
-	list_for_each_entry(ent, &notify_list, notify_entry) {
-		int i;
-		struct cn_notify_req *req;
-		struct cn_ctl_msg *ctl = ent->msg;
-		int idx_found, val_found;
-
-		idx_found = val_found = 0;
-
-		req = (struct cn_notify_req *)ctl->data;
-		for (i = 0; i < ctl->idx_notify_num; ++i, ++req) {
-			if (id->idx >= req->first &&
-					id->idx < req->first + req->range) {
-				idx_found = 1;
-				break;
-			}
-		}
-
-		for (i = 0; i < ctl->val_notify_num; ++i, ++req) {
-			if (id->val >= req->first &&
-					id->val < req->first + req->range) {
-				val_found = 1;
-				break;
-			}
-		}
-
-		if (idx_found && val_found) {
-			struct cn_msg m = { .ack = notify_event, };
-
-			memcpy(&m.id, id, sizeof(m.id));
-			cn_netlink_send(&m, ctl->group, GFP_KERNEL);
-		}
-	}
-	mutex_unlock(&notify_lock);
-}
-
 /*
  * Callback add routing - adds callback with given ID and name.
  * If there is registered callback with the same ID it will not be added.
@@ -276,8 +217,6 @@ int cn_add_callback(struct cb_id *id, char *name,
 	if (err)
 		return err;
 
-	cn_notify(id, 0);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cn_add_callback);
@@ -295,111 +234,9 @@ void cn_del_callback(struct cb_id *id)
 	struct cn_dev *dev = &cdev;
 
 	cn_queue_del_callback(dev->cbdev, id);
-	cn_notify(id, 1);
 }
 EXPORT_SYMBOL_GPL(cn_del_callback);
 
-/*
- * Checks two connector's control messages to be the same.
- * Returns 1 if they are the same or if the first one is corrupted.
- */
-static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
-{
-	int i;
-	struct cn_notify_req *req1, *req2;
-
-	if (m1->idx_notify_num != m2->idx_notify_num)
-		return 0;
-
-	if (m1->val_notify_num != m2->val_notify_num)
-		return 0;
-
-	if (m1->len != m2->len)
-		return 0;
-
-	if ((m1->idx_notify_num + m1->val_notify_num) * sizeof(*req1) !=
-	    m1->len)
-		return 1;
-
-	req1 = (struct cn_notify_req *)m1->data;
-	req2 = (struct cn_notify_req *)m2->data;
-
-	for (i = 0; i < m1->idx_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	for (i = 0; i < m1->val_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	return 1;
-}
-
-/*
- * Main connector device's callback.
- *
- * Used for notification of a request's processing.
- */
-static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct cn_ctl_msg *ctl;
-	struct cn_ctl_entry *ent;
-	u32 size;
-
-	if (msg->len < sizeof(*ctl))
-		return;
-
-	ctl = (struct cn_ctl_msg *)msg->data;
-
-	size = (sizeof(*ctl) + ((ctl->idx_notify_num +
-				 ctl->val_notify_num) *
-				sizeof(struct cn_notify_req)));
-
-	if (msg->len != size)
-		return;
-
-	if (ctl->len + sizeof(*ctl) != msg->len)
-		return;
-
-	/*
-	 * Remove notification.
-	 */
-	if (ctl->group == 0) {
-		struct cn_ctl_entry *n;
-
-		mutex_lock(&notify_lock);
-		list_for_each_entry_safe(ent, n, &notify_list, notify_entry) {
-			if (cn_ctl_msg_equals(ent->msg, ctl)) {
-				list_del(&ent->notify_entry);
-				kfree(ent);
-			}
-		}
-		mutex_unlock(&notify_lock);
-
-		return;
-	}
-
-	size += sizeof(*ent);
-
-	ent = kzalloc(size, GFP_KERNEL);
-	if (!ent)
-		return;
-
-	ent->msg = (struct cn_ctl_msg *)(ent + 1);
-
-	memcpy(ent->msg, ctl, size - sizeof(*ent));
-
-	mutex_lock(&notify_lock);
-	list_add(&ent->notify_entry, &notify_list);
-	mutex_unlock(&notify_lock);
-}
-
 static int cn_proc_show(struct seq_file *m, void *v)
 {
 	struct cn_queue_dev *dev = cdev.cbdev;
@@ -437,11 +274,8 @@ static const struct file_operations cn_file_ops = {
 static int __devinit cn_init(void)
 {
 	struct cn_dev *dev = &cdev;
-	int err;
 
 	dev->input = cn_rx_skb;
-	dev->id.idx = cn_idx;
-	dev->id.val = cn_val;
 
 	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR,
 					 CN_NETLINK_USERS + 0xf,
@@ -457,14 +291,6 @@ static int __devinit cn_init(void)
 
 	cn_already_initialized = 1;
 
-	err = cn_add_callback(&dev->id, "connector", &cn_callback);
-	if (err) {
-		cn_already_initialized = 0;
-		cn_queue_free_dev(dev->cbdev);
-		netlink_kernel_release(dev->nls);
-		return -EINVAL;
-	}
-
 	proc_net_fops_create(&init_net, "connector", S_IRUGO, &cn_file_ops);
 
 	return 0;
@@ -478,7 +304,6 @@ static void __devexit cn_fini(void)
 
 	proc_net_remove(&init_net, "connector");
 
-	cn_del_callback(&dev->id);
 	cn_queue_free_dev(dev->cbdev);
 	netlink_kernel_release(dev->nls);
 }
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 72ba63eb83c5..3a779ffba60b 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -24,9 +24,6 @@
 
 #include <linux/types.h>
 
-#define CN_IDX_CONNECTOR		0xffffffff
-#define CN_VAL_CONNECTOR		0xffffffff
-
 /*
  * Process Events connector unique ids -- used for message routing
  */
@@ -75,30 +72,6 @@ struct cn_msg {
 	__u8 data[0];
 };
 
-/*
- * Notify structure - requests notification about
- * registering/unregistering idx/val in range [first, first+range].
- */
-struct cn_notify_req {
-	__u32 first;
-	__u32 range;
-};
-
-/*
- * Main notification control message
- * *_notify_num 	- number of appropriate cn_notify_req structures after 
- *				this struct.
- * group 		- notification receiver's idx.
- * len 			- total length of the attached data.
- */
-struct cn_ctl_msg {
-	__u32 idx_notify_num;
-	__u32 val_notify_num;
-	__u32 group;
-	__u32 len;
-	__u8 data[0];
-};
-
 #ifdef __KERNEL__
 
 #include <asm/atomic.h>
@@ -151,11 +124,6 @@ struct cn_callback_entry {
 	u32 seq, group;
 };
 
-struct cn_ctl_entry {
-	struct list_head notify_entry;
-	struct cn_ctl_msg *msg;
-};
-
 struct cn_dev {
 	struct cb_id id;
 
-- 
cgit v1.2.3


From cd757645fbdc34a8343c04bb0e74e06fccc2cb10 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Sat, 30 Jan 2010 10:25:18 +0530
Subject: perf: Make bp_len type to u64 generic across the arch

Change 'bp_len' type to __u64 to make it work across archs as
the s390 architecture watch point length can be upto 2^64.

reference:
	http://lkml.org/lkml/2010/1/25/212

This is an ABI change that is not backward compatible with
the previous hardware breakpoint info layout integrated in this
development cycle, a rebuilt of perf tools is necessary for
versions based on 2.6.33-rc1 - 2.6.33-rc6 to work with a
kernel based on this patch.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin <schwidefsky@de.ibm.com>
LKML-Reference: <20100130045518.GA20776@in.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/hw_breakpoint.h | 2 +-
 include/linux/perf_event.h    | 6 ++----
 kernel/hw_breakpoint.c        | 2 +-
 kernel/perf_event.c           | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 070ba0621738..5977b724f7c6 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -44,7 +44,7 @@ static inline int hw_breakpoint_type(struct perf_event *bp)
 	return bp->attr.bp_type;
 }
 
-static inline int hw_breakpoint_len(struct perf_event *bp)
+static inline unsigned long hw_breakpoint_len(struct perf_event *bp)
 {
 	return bp->attr.bp_len;
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8fa71874113f..a177698d95e2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -211,11 +211,9 @@ struct perf_event_attr {
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
 
-	__u32			__reserved_2;
-
-	__u64			bp_addr;
 	__u32			bp_type;
-	__u32			bp_len;
+	__u64			bp_addr;
+	__u64			bp_len;
 };
 
 /*
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 8a5c7d55ac9f..967e66143e11 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -360,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
 	u64 old_addr = bp->attr.bp_addr;
+	u64 old_len = bp->attr.bp_len;
 	int old_type = bp->attr.bp_type;
-	int old_len = bp->attr.bp_len;
 	int err = 0;
 
 	perf_event_disable(bp);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index d27746bd3a06..2b19297742cb 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4580,7 +4580,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (attr->type >= PERF_TYPE_MAX)
 		return -EINVAL;
 
-	if (attr->__reserved_1 || attr->__reserved_2)
+	if (attr->__reserved_1)
 		return -EINVAL;
 
 	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-- 
cgit v1.2.3


From f7acede65d6b65919aee5b6a360a17cedb11f2f7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 Jan 2010 13:30:11 +0100
Subject: libata: fix ata_id_logical_per_physical_sectors

The value we get from the low byte of the ATA_ID_SECTOR_SIZE word is not not
a plain multiple, but the log of it, so fix the helper to give the correct
answer.  Without this we'll get an incorrect minimal I/O size in the block
limits VPD page for 4k sector drives.

Also change the return value of ata_id_logical_per_physical_sectors to u16
for the unlikely case of very large logical sectors.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/ata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 38a6948ce0c2..20f31567ccee 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -647,9 +647,9 @@ static inline int ata_id_has_large_logical_sectors(const u16 *id)
 	return id[ATA_ID_SECTOR_SIZE] & (1 << 13);
 }
 
-static inline u8 ata_id_logical_per_physical_sectors(const u16 *id)
+static inline u16 ata_id_logical_per_physical_sectors(const u16 *id)
 {
-	return id[ATA_ID_SECTOR_SIZE] & 0xf;
+	return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf);
 }
 
 static inline int ata_id_has_lba48(const u16 *id)
-- 
cgit v1.2.3


From 2938429501b73f6aeb312236eac7ed0416a07cd5 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 5 Feb 2010 16:09:11 +1100
Subject: percpu: add __percpu for sparse

This is to make the annotation of percpu variables during the next merge
window less painfull.

Extracted from a patch by Rusty Russell.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 5be3dab4a695..188fcae10a99 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -15,6 +15,7 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu	__attribute__((noderef, address_space(3)))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __acquire(x) (void)0
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
+# define __percpu
 #endif
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 8eb988c70e7709b7bd1a69f0ec53d19ac20dea84 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 20 Jan 2010 15:35:41 -0500
Subject: fix ima breakage

The "Untangling ima mess, part 2 with counters" patch messed
up the counters.  Based on conversations with Al Viro, this patch
streamlines ima_path_check() by removing the counter maintaince.
The counters are now updated independently, from measuring the file,
in __dentry_open() and alloc_file() by calling ima_counts_get().
ima_path_check() is called from nfsd and do_filp_open().
It also did not measure all files that should have been measured.
Reason: ima_path_check() got bogus value passed as mask.
[AV: mea culpa]
[AV: add missing nfsd bits]

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        |   6 +-
 fs/nfsd/vfs.c                     |   3 +-
 include/linux/ima.h               |   4 +-
 security/integrity/ima/ima_main.c | 236 +++++++++++++++-----------------------
 4 files changed, 97 insertions(+), 152 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 94a5e60779f9..cd77b6375efd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,8 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(&filp->f_path, filp->f_mode &
-				       (MAY_READ | MAY_WRITE | MAY_EXEC));
+			error = ima_path_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1797,8 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(&filp->f_path, filp->f_mode &
-			       (MAY_READ | MAY_WRITE | MAY_EXEC));
+		error = ima_path_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 325959e264ce..32477e3a645c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,8 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(&(*filp)->f_path,
-				  access & (MAY_READ | MAY_WRITE | MAY_EXEC));
+	host_err = ima_path_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 99dc6d5cf7e5..aa55a8f1f5b9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask);
+extern int ima_path_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask)
+static inline int ima_path_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index a89f44d5e030..75aee18f6163 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -84,6 +84,36 @@ out:
 	return found;
 }
 
+/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
+ *
+ * When opening a file for read, if the file is already open for write,
+ * the file could change, resulting in a file measurement error.
+ *
+ * Opening a file for write, if the file is already open for read, results
+ * in a time of measure, time of use (ToMToU) error.
+ *
+ * In either case invalidate the PCR.
+ */
+enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
+static void ima_read_write_check(enum iint_pcr_error error,
+				 struct ima_iint_cache *iint,
+				 struct inode *inode,
+				 const unsigned char *filename)
+{
+	switch (error) {
+	case TOMTOU:
+		if (iint->readcount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "ToMToU");
+		break;
+	case OPEN_WRITERS:
+		if (iint->writecount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "open_writers");
+		break;
+	}
+}
+
 /*
  * Update the counts given an fmode_t
  */
@@ -98,6 +128,47 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
 		iint->writecount++;
 }
 
+/*
+ * ima_counts_get - increment file counts
+ *
+ * Maintain read/write counters for all files, but only
+ * invalidate the PCR for measured files:
+ * 	- Opening a file for write when already open for read,
+ *	  results in a time of measure, time of use (ToMToU) error.
+ *	- Opening a file for read when already open for write,
+ * 	  could result in a file measurement error.
+ *
+ */
+void ima_counts_get(struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	fmode_t mode = file->f_mode;
+	struct ima_iint_cache *iint;
+	int rc;
+
+	if (!ima_initialized || !S_ISREG(inode->i_mode))
+		return;
+	iint = ima_iint_find_get(inode);
+	if (!iint)
+		return;
+	mutex_lock(&iint->mutex);
+	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
+	if (rc < 0)
+		goto out;
+
+	if (mode & FMODE_WRITE) {
+		ima_read_write_check(TOMTOU, iint, inode, dentry->d_name.name);
+		goto out;
+	}
+	ima_read_write_check(OPEN_WRITERS, iint, inode, dentry->d_name.name);
+out:
+	ima_inc_counts(iint, file->f_mode);
+	mutex_unlock(&iint->mutex);
+
+	kref_put(&iint->refcount, iint_free);
+}
+
 /*
  * Decrement ima counts
  */
@@ -153,123 +224,6 @@ void ima_file_free(struct file *file)
 	kref_put(&iint->refcount, iint_free);
 }
 
-/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
- *
- * When opening a file for read, if the file is already open for write,
- * the file could change, resulting in a file measurement error.
- *
- * Opening a file for write, if the file is already open for read, results
- * in a time of measure, time of use (ToMToU) error.
- *
- * In either case invalidate the PCR.
- */
-enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
-static void ima_read_write_check(enum iint_pcr_error error,
-				 struct ima_iint_cache *iint,
-				 struct inode *inode,
-				 const unsigned char *filename)
-{
-	switch (error) {
-	case TOMTOU:
-		if (iint->readcount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "ToMToU");
-		break;
-	case OPEN_WRITERS:
-		if (iint->writecount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "open_writers");
-		break;
-	}
-}
-
-static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
-				const unsigned char *filename)
-{
-	int rc = 0;
-
-	ima_inc_counts(iint, file->f_mode);
-
-	rc = ima_collect_measurement(iint, file);
-	if (!rc)
-		ima_store_measurement(iint, file, filename);
-	return rc;
-}
-
-/**
- * ima_path_check - based on policy, collect/store measurement.
- * @path: contains a pointer to the path to be measured
- * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
- *
- * Measure the file being open for readonly, based on the
- * ima_must_measure() policy decision.
- *
- * Keep read/write counters for all files, but only
- * invalidate the PCR for measured files:
- * 	- Opening a file for write when already open for read,
- *	  results in a time of measure, time of use (ToMToU) error.
- *	- Opening a file for read when already open for write,
- * 	  could result in a file measurement error.
- *
- * Always return 0 and audit dentry_open failures.
- * (Return code will be based upon measurement appraisal.)
- */
-int ima_path_check(struct path *path, int mask)
-{
-	struct inode *inode = path->dentry->d_inode;
-	struct ima_iint_cache *iint;
-	struct file *file = NULL;
-	int rc;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return 0;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return 0;
-
-	mutex_lock(&iint->mutex);
-
-	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
-	if (rc < 0)
-		goto out;
-
-	if ((mask & MAY_WRITE) || (mask == 0))
-		ima_read_write_check(TOMTOU, iint, inode,
-				     path->dentry->d_name.name);
-
-	if ((mask & (MAY_WRITE | MAY_READ | MAY_EXEC)) != MAY_READ)
-		goto out;
-
-	ima_read_write_check(OPEN_WRITERS, iint, inode,
-			     path->dentry->d_name.name);
-	if (!(iint->flags & IMA_MEASURED)) {
-		struct dentry *dentry = dget(path->dentry);
-		struct vfsmount *mnt = mntget(path->mnt);
-
-		file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
-				   current_cred());
-		if (IS_ERR(file)) {
-			int audit_info = 0;
-
-			integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-					    dentry->d_name.name,
-					    "add_measurement",
-					    "dentry_open failed",
-					    1, audit_info);
-			file = NULL;
-			goto out;
-		}
-		rc = get_path_measurement(iint, file, dentry->d_name.name);
-	}
-out:
-	mutex_unlock(&iint->mutex);
-	if (file)
-		fput(file);
-	kref_put(&iint->refcount, iint_free);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ima_path_check);
-
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
 {
@@ -297,33 +251,6 @@ out:
 	return rc;
 }
 
-/*
- * ima_counts_get - increment file counts
- *
- * - for IPC shm and shmat file.
- * - for nfsd exported files.
- *
- * Increment the counts for these files to prevent unnecessary
- * imbalance messages.
- */
-void ima_counts_get(struct file *file)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ima_iint_cache *iint;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-	mutex_lock(&iint->mutex);
-	ima_inc_counts(iint, file->f_mode);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
-}
-EXPORT_SYMBOL_GPL(ima_counts_get);
-
 /**
  * ima_file_mmap - based on policy, collect/store measurement.
  * @file: pointer to the file to be measured (May be NULL)
@@ -369,6 +296,27 @@ int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
+/**
+ * ima_path_check - based on policy, collect/store measurement.
+ * @file: pointer to the file to be measured
+ * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
+ *
+ * Measure files based on the ima_must_measure() policy decision.
+ *
+ * Always return 0 and audit dentry_open failures.
+ * (Return code will be based upon measurement appraisal.)
+ */
+int ima_path_check(struct file *file, int mask)
+{
+	int rc;
+
+	rc = process_measurement(file, file->f_dentry->d_name.name,
+				 mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+				 PATH_CHECK);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ima_path_check);
+
 static int __init init_ima(void)
 {
 	int error;
-- 
cgit v1.2.3


From 9bbb6cad0173e6220f3ac609e26beb48dab3b7cd Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 26 Jan 2010 17:02:40 -0500
Subject: ima: rename ima_path_check to ima_file_check

ima_path_check actually deals with files!  call it ima_file_check instead.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        | 4 ++--
 fs/nfsd/vfs.c                     | 2 +-
 include/linux/ima.h               | 4 ++--
 security/integrity/ima/ima_main.c | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index cd77b6375efd..d62fdc875f22 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,7 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(filp, acc_mode);
+			error = ima_file_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1796,7 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(filp, acc_mode);
+		error = ima_file_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 32477e3a645c..97d79eff6b7f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,7 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(*filp, access);
+	host_err = ima_file_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index aa55a8f1f5b9..975837e7d6c0 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct file *file, int mask);
+extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct file *file, int mask)
+static inline int ima_file_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index eb1cf6498cc9..b76e1f03ea2b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -14,7 +14,7 @@
  *
  * File: ima_main.c
  *	implements the IMA hooks: ima_bprm_check, ima_file_mmap,
- *	and ima_path_check.
+ *	and ima_file_check.
  */
 #include <linux/module.h>
 #include <linux/file.h>
@@ -306,7 +306,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * Always return 0 and audit dentry_open failures.
  * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct file *file, int mask)
+int ima_file_check(struct file *file, int mask)
 {
 	int rc;
 
@@ -315,7 +315,7 @@ int ima_path_check(struct file *file, int mask)
 				 PATH_CHECK);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ima_path_check);
+EXPORT_SYMBOL_GPL(ima_file_check);
 
 static int __init init_ima(void)
 {
-- 
cgit v1.2.3


From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Feb 2010 11:16:56 -0800
Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep

nf_conntrack_cachep is currently shared by all netns instances, but
because of SLAB_DESTROY_BY_RCU special semantics, this is wrong.

If we use a shared slab cache, one object can instantly flight between
one hash table (netns ONE) to another one (netns TWO), and concurrent
reader (doing a lookup in netns ONE, 'finding' an object of netns TWO)
can be fooled without notice, because no RCU grace period has to be
observed between object freeing and its reuse.

We dont have this problem with UDP/TCP slab caches because TCP/UDP
hashtables are global to the machine (and each object has a pointer to
its netns).

If we use per netns conntrack hash tables, we also *must* use per netns
conntrack slab caches, to guarantee an object can not escape from one
namespace to another one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
[Patrick: added unique slab name allocation]
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/conntrack.h     |  2 ++
 net/netfilter/nf_conntrack_core.c | 39 +++++++++++++++++++++++----------------
 2 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index ba1ba0c5efd1..aed23b6c8478 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
 	struct hlist_nulls_head	unconfirmed;
@@ -28,5 +29,6 @@ struct netns_ct {
 #endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
+	char			*slabname;
 };
 #endif
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 37e2b88313f2..9de4bd4c0dd7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
-static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	 * Do not use kmem_cache_zalloc(), as this cache uses
 	 * SLAB_DESTROY_BY_RCU.
 	 */
-	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
+	ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
 		atomic_dec(&net->ct.count);
@@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	nf_ct_ext_destroy(ct);
 	atomic_dec(&net->ct.count);
 	nf_ct_ext_free(ct);
-	kmem_cache_free(nf_conntrack_cachep, ct);
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
@@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
-	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+	kfree(net->ct.slabname);
 	free_percpu(net->ct.stat);
 }
 
@@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void)
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
 
-	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
-						sizeof(struct nf_conn),
-						0, SLAB_DESTROY_BY_RCU, NULL);
-	if (!nf_conntrack_cachep) {
-		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-		ret = -ENOMEM;
-		goto err_cache;
-	}
-
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto err_proto;
@@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void)
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
-	kmem_cache_destroy(nf_conntrack_cachep);
-err_cache:
 	return ret;
 }
 
@@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_stat;
 	}
+
+	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+	if (!net->ct.slabname) {
+		ret = -ENOMEM;
+		goto err_slabname;
+	}
+
+	net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
+							sizeof(struct nf_conn), 0,
+							SLAB_DESTROY_BY_RCU, NULL);
+	if (!net->ct.nf_conntrack_cachep) {
+		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+		ret = -ENOMEM;
+		goto err_cache;
+	}
 	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
@@ -1352,6 +1355,10 @@ err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_hash:
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+err_cache:
+	kfree(net->ct.slabname);
+err_slabname:
 	free_percpu(net->ct.stat);
 err_stat:
 	return ret;
-- 
cgit v1.2.3


From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 11:18:07 -0800
Subject: netfilter: nf_conntrack: fix hash resizing with namespaces

As noticed by Jon Masters <jonathan@jonmasters.org>, the conntrack hash
size is global and not per namespace, but modifiable at runtime through
/sys/module/nf_conntrack/hashsize. Changing the hash size will only
resize the hash in the current namespace however, so other namespaces
will use an invalid hash size. This can cause crashes when enlarging
the hashsize, or false negative lookups when shrinking it.

Move the hash size into the per-namespace data and only use the global
hash size to initialize the per-namespace value when instanciating a
new namespace. Additionally restrict hash resizing to init_net for
now as other namespaces are not handled currently.

Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/conntrack.h                      |  1 +
 include/net/netns/ipv4.h                           |  1 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c     |  2 +-
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  4 +-
 net/ipv4/netfilter/nf_nat_core.c                   | 22 ++++-----
 net/netfilter/nf_conntrack_core.c                  | 53 ++++++++++++----------
 net/netfilter/nf_conntrack_expect.c                |  2 +-
 net/netfilter/nf_conntrack_helper.c                |  2 +-
 net/netfilter/nf_conntrack_netlink.c               |  2 +-
 net/netfilter/nf_conntrack_standalone.c            |  7 +--
 10 files changed, 49 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index aed23b6c8478..63d449807d9b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	unsigned int		htable_size;
 	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2eb3814d6258..9a4b8b714079 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -40,6 +40,7 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_security;
 	struct xt_table		*nat_table;
 	struct hlist_head	*nat_bysource;
+	unsigned int		nat_htable_size;
 	int			nat_vmalloced;
 #endif
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d171b123a656..d1ea38a7c490 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	},
 	{
 		.procname	= "ip_conntrack_buckets",
-		.data		= &nf_conntrack_htable_size,
+		.data		= &init_net.ct.htable_size,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 8668a3defda6..2fb7b76da94f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe1a64479dd0..26066a2327ad 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock);
 
 static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
-/* Calculated at init based on memory size */
-static unsigned int nf_nat_htable_size __read_mostly;
-
 #define MAX_IP_NAT_PROTO 256
 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
 						__read_mostly;
@@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
 
@@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple)
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
 			    (__force u32)tuple->src.u.all,
 			    tuple->dst.protonum, 0);
-	return ((u64)hash * nf_nat_htable_size) >> 32;
+	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -147,7 +144,7 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(tuple);
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
 	const struct hlist_node *n;
@@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (have_to_hash) {
 		unsigned int srchash;
 
-		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		spin_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
@@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 
 static int __net_init nf_nat_net_init(struct net *net)
 {
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
-						      &net->ipv4.nat_vmalloced, 0);
+	/* Leave them the same for the moment. */
+	net->ipv4.nat_htable_size = net->ct.htable_size;
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
+						       &net->ipv4.nat_vmalloced, 0);
 	if (!net->ipv4.nat_bysource)
 		return -ENOMEM;
 	return 0;
@@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-			     nf_nat_htable_size);
+			     net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
@@ -724,9 +723,6 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	/* Leave them the same for the moment. */
-	nf_nat_htable_size = nf_conntrack_htable_size;
-
 	ret = register_pernet_subsys(&nf_nat_net_ops);
 	if (ret < 0)
 		goto cleanup_extend;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9de4bd4c0dd7..4d79e3c1616c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -30,6 +30,7 @@
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <linux/mm.h>
+#include <linux/nsproxy.h>
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
+static inline u_int32_t hash_conntrack(const struct net *net,
+				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, nf_conntrack_htable_size,
+	return __hash_conntrack(tuple, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
@@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	int dropped = 0;
 
 	rcu_read_lock();
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 					 hnnode) {
 			tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 		if (cnt >= NF_CT_EVICTION_RANGE)
 			break;
 
-		hash = (hash + 1) % nf_conntrack_htable_size;
+		hash = (hash + 1) % net->ct.htable_size;
 	}
 	rcu_read_unlock();
 
@@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(orig);
+		unsigned int hash = hash_conntrack(net, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 	struct hlist_nulls_node *n;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+	for (; *bucket < net->ct.htable_size; (*bucket)++) {
 		hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
@@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
@@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
 	int i, bucket, vmalloced, old_vmalloced;
 	unsigned int hashsize, old_size;
-	int rnd;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
 	/* On boot, we can set this without any fancy locking. */
 	if (!nf_conntrack_htable_size)
 		return param_set_uint(val, kp);
@@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hash)
 		return -ENOMEM;
 
-	/* We have to rehahs for the new table anyway, so we also can
-	 * use a newrandom seed */
-	get_random_bytes(&rnd, sizeof(rnd));
-
 	/* Lookups in the old hash might happen in parallel, which means we
 	 * might get false negatives during connection lookup. New connections
 	 * created because of a false negative won't make it into the hash
 	 * though since that required taking the lock.
 	 */
 	spin_lock_bh(&nf_conntrack_lock);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < init_net.ct.htable_size; i++) {
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+			bucket = __hash_conntrack(&h->tuple, hashsize,
+						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
 	}
-	old_size = nf_conntrack_htable_size;
+	old_size = init_net.ct.htable_size;
 	old_vmalloced = init_net.ct.hash_vmalloc;
 	old_hash = init_net.ct.hash;
 
-	nf_conntrack_htable_size = hashsize;
+	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
 	init_net.ct.hash_vmalloc = vmalloced;
 	init_net.ct.hash = hash;
-	nf_conntrack_hash_rnd = rnd;
 	spin_unlock_bh(&nf_conntrack_lock);
 
 	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
@@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_cache;
 	}
-	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+
+	net->ct.htable_size = nf_conntrack_htable_size;
+	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
 		ret = -ENOMEM;
@@ -1353,7 +1356,7 @@ err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 err_hash:
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4ad7d1d809af..2f25ff610982 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net)
 
 	if (net_eq(net, &init_net)) {
 		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+			nf_ct_expect_hsize = net->ct.htable_size / 256;
 			if (!nf_ct_expect_hsize)
 				nf_ct_expect_hsize = 1;
 		}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 65c2a7bc3afc..4b1a56bd074c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	/* Get rid of expecteds, set helpers to NULL. */
 	hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
 		unhelp(h, me);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 			unhelp(h, me);
 	}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 42f21c01a93e..0ffe689dfe97 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rcu_read_lock();
 	last = (struct nf_conn *)cb->args[1];
-	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
+	for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) {
 restart:
 		hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
 					 hnnode) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 028aba667ef7..e310f1561bb2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
@@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	},
 	{
 		.procname       = "nf_conntrack_buckets",
-		.data           = &nf_conntrack_htable_size,
+		.data           = &init_net.ct.htable_size,
 		.maxlen         = sizeof(unsigned int),
 		.mode           = 0444,
 		.proc_handler   = proc_dointvec,
@@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		goto out_kmemdup;
 
 	table[1].data = &net->ct.count;
+	table[2].data = &net->ct.htable_size;
 	table[3].data = &net->ct.sysctl_checksum;
 	table[4].data = &net->ct.sysctl_log_invalid;
 
-- 
cgit v1.2.3


From 69c9700b544e496dc3ccf472a4f3a76dcf4abaf7 Mon Sep 17 00:00:00 2001
From: Marcin Kościelnicki <koriakin@0x04.net>
Date: Tue, 26 Jan 2010 18:39:20 +0000
Subject: drm/nouveau: Add getparam to get available PGRAPH units.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On nv50, this will be needed by applications using CUDA to know
how much stack/local memory to allocate.

Signed-off-by: Marcin Kościelnicki <koriakin@0x04.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_reg.h   | 1 +
 drivers/gpu/drm/nouveau/nouveau_state.c | 9 +++++++++
 include/drm/nouveau_drm.h               | 1 +
 3 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index 251f1b3b38b9..aa9b310e41be 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -99,6 +99,7 @@
  * the card will hang early on in the X init process.
  */
 #    define NV_PMC_ENABLE_UNK13                               (1<<13)
+#define NV40_PMC_GRAPH_UNITS				   0x00001540
 #define NV40_PMC_BACKLIGHT				   0x000015f0
 #	define NV40_PMC_BACKLIGHT_MASK			   0x001f0000
 #define NV40_PMC_1700                                      0x00001700
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 241e24d60eb4..fcd7610817a1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -825,6 +825,15 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 	case NOUVEAU_GETPARAM_VM_VRAM_BASE:
 		getparam->value = dev_priv->vm_vram_base;
 		break;
+	case NOUVEAU_GETPARAM_GRAPH_UNITS:
+		/* NV40 and NV50 versions are quite different, but register
+		 * address is the same. User is supposed to know the card
+		 * family anyway... */
+		if (dev_priv->chipset >= 0x40) {
+			getparam->value = nv_rd32(dev, NV40_PMC_GRAPH_UNITS);
+			break;
+		}
+		/* FALLTHRU */
 	default:
 		NV_ERROR(dev, "unknown parameter %lld\n", getparam->param);
 		return -EINVAL;
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index 1e67c441ea82..f745948b61e4 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -77,6 +77,7 @@ struct drm_nouveau_gpuobj_free {
 #define NOUVEAU_GETPARAM_PCI_PHYSICAL    10
 #define NOUVEAU_GETPARAM_CHIPSET_ID      11
 #define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
+#define NOUVEAU_GETPARAM_GRAPH_UNITS     13
 struct drm_nouveau_getparam {
 	uint64_t param;
 	uint64_t value;
-- 
cgit v1.2.3


From f77cef3db357aeea22d82a2aa4f0ef8fbae41d47 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 9 Feb 2010 19:41:55 +0000
Subject: drm/vmwgfx: Update the user-space interface.

When time-based throttling is implemented, we need to bump minor.
When the old way of detecting scanout is removed, we need to bump major.
In the meantime, this change should not break existing user-space.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  6 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c    |  6 ++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |  5 +++--
 include/drm/vmwgfx_drm.h                 | 12 +++++++++---
 4 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 135be9688c90..0eaf68273eaf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -39,10 +39,10 @@
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
 
-#define VMWGFX_DRIVER_DATE "20090724"
+#define VMWGFX_DRIVER_DATE "20100118"
 #define VMWGFX_DRIVER_MAJOR 0
-#define VMWGFX_DRIVER_MINOR 1
-#define VMWGFX_DRIVER_PATCHLEVEL 2
+#define VMWGFX_DRIVER_MINOR 9
+#define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 778851f9f1d6..1c7a316454d8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -48,6 +48,12 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	case DRM_VMW_PARAM_FIFO_OFFSET:
 		param->value = dev_priv->mmio_start;
 		break;
+	case DRM_VMW_PARAM_HW_CAPS:
+		param->value = dev_priv->capabilities;
+		break;
+	case DRM_VMW_PARAM_FIFO_CAPS:
+		param->value = dev_priv->fifo.capabilities;
+		break;
 	default:
 		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
 			  param->param);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index c7efbd47ab84..933e90d82866 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -610,9 +610,10 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		 */
 		srf->flags &= ~SVGA3D_SURFACE_HINT_SCANOUT;
 		srf->scanout = true;
-	} else {
+	} else if (req->scanout)
+		srf->scanout = true;
+	else
 		srf->scanout = false;
-	}
 
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index 2be7e1249b6f..dfaf3c2d2c8e 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -68,7 +68,8 @@
 #define DRM_VMW_PARAM_NUM_FREE_STREAMS 1
 #define DRM_VMW_PARAM_3D               2
 #define DRM_VMW_PARAM_FIFO_OFFSET      3
-
+#define DRM_VMW_PARAM_HW_CAPS          4
+#define DRM_VMW_PARAM_FIFO_CAPS        5
 
 /**
  * struct drm_vmw_getparam_arg
@@ -181,6 +182,8 @@ struct drm_vmw_context_arg {
  * The size of the array should equal the total number of mipmap levels.
  * @shareable: Boolean whether other clients (as identified by file descriptors)
  * may reference this surface.
+ * @scanout: Boolean whether the surface is intended to be used as a
+ * scanout.
  *
  * Input data to the DRM_VMW_CREATE_SURFACE Ioctl.
  * Output data from the DRM_VMW_REF_SURFACE Ioctl.
@@ -192,7 +195,7 @@ struct drm_vmw_surface_create_req {
 	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
 	uint64_t size_addr;
 	int32_t shareable;
-	uint32_t pad64;
+	int32_t scanout;
 };
 
 /**
@@ -295,6 +298,9 @@ union drm_vmw_surface_reference_arg {
  *
  * @commands: User-space address of a command buffer cast to an uint64_t.
  * @command-size: Size in bytes of the command buffer.
+ * @throttle-us: Sleep until software is less than @throttle_us
+ * microseconds ahead of hardware. The driver may round this value
+ * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
  * uint64_t.
  *
@@ -304,7 +310,7 @@ union drm_vmw_surface_reference_arg {
 struct drm_vmw_execbuf_arg {
 	uint64_t commands;
 	uint32_t command_size;
-	uint32_t pad64;
+	uint32_t throttle_us;
 	uint64_t fence_rep;
 };
 
-- 
cgit v1.2.3


From a87897edbae2d60db7bcb6bb0a75e82013d68305 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 9 Feb 2010 21:29:47 +0000
Subject: drm/vmwgfx: Drop scanout flag compat and add execbuf ioctl parameter
 members. Bumps major.

Even if this bumps the version to 1 it does not mean the driver is
out of staging. From what we know this is the last backwards
incompatible change to the driver.

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  6 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 17 +----------------
 include/drm/vmwgfx_drm.h                 |  8 ++++++++
 3 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 0eaf68273eaf..3e4e670d3216 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -39,9 +39,9 @@
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
 
-#define VMWGFX_DRIVER_DATE "20100118"
-#define VMWGFX_DRIVER_MAJOR 0
-#define VMWGFX_DRIVER_MINOR 9
+#define VMWGFX_DRIVER_DATE "20100209"
+#define VMWGFX_DRIVER_MAJOR 1
+#define VMWGFX_DRIVER_MINOR 0
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 933e90d82866..f8fbbc67a406 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -35,11 +35,6 @@
 #define VMW_RES_SURFACE ttm_driver_type1
 #define VMW_RES_STREAM ttm_driver_type2
 
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. Find a better way to accomplish this.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
 struct vmw_user_context {
 	struct ttm_base_object base;
 	struct vmw_resource res;
@@ -579,6 +574,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 
 	srf->flags = req->flags;
 	srf->format = req->format;
+	srf->scanout = req->scanout;
 	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
 	srf->num_sizes = 0;
 	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
@@ -604,17 +600,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	if (unlikely(ret != 0))
 		goto out_err1;
 
-	if (srf->flags & SVGA3D_SURFACE_HINT_SCANOUT) {
-		/* we should not send this flag down to hardware since
-		 * its not a official one
-		 */
-		srf->flags &= ~SVGA3D_SURFACE_HINT_SCANOUT;
-		srf->scanout = true;
-	} else if (req->scanout)
-		srf->scanout = true;
-	else
-		srf->scanout = false;
-
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
 	    srf->sizes[0].width == 64 &&
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index dfaf3c2d2c8e..c7645f480d12 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -303,15 +303,23 @@ union drm_vmw_surface_reference_arg {
  * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
  * uint64_t.
+ * @version: Allows expanding the execbuf ioctl parameters without breaking
+ * backwards compatibility, since user-space will always tell the kernel
+ * which version it uses.
+ * @flags: Execbuf flags. None currently.
  *
  * Argument to the DRM_VMW_EXECBUF Ioctl.
  */
 
+#define DRM_VMW_EXECBUF_VERSION 0
+
 struct drm_vmw_execbuf_arg {
 	uint64_t commands;
 	uint32_t command_size;
 	uint32_t throttle_us;
 	uint64_t fence_rep;
+	 uint32_t version;
+	 uint32_t flags;
 };
 
 /**
-- 
cgit v1.2.3


From 5a5e0f4c7038168e38d1db6af09d1ac715ee9888 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 27 Jan 2010 17:09:38 +0300
Subject: kfifo: Don't use integer as NULL pointer

This patch fixes following sparse warnings:

include/linux/kfifo.h:127:25: warning: Using plain integer as NULL pointer
kernel/kfifo.c:83:21: warning: Using plain integer as NULL pointer

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kfifo.h | 2 +-
 kernel/kfifo.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6f6c5f300af6..bc0fc795bd35 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,7 +124,7 @@ extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
  */
 static inline bool kfifo_initialized(struct kfifo *fifo)
 {
-	return fifo->buffer != 0;
+	return fifo->buffer != NULL;
 }
 
 /**
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 559fb5582b60..35edbe22e9a9 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0);
+		_kfifo_init(fifo, NULL, 0);
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3


From 3b77fd8ee6a8ae34e349651e9d5f5000d1cc206e Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 17 Feb 2010 12:21:45 -0800
Subject: Input: add KEY_RFKILL

Most laptops have keys that are intended to toggle all device state, not
just wifi. These are currently generally mapped to KEY_WLAN. As a result,
rfkill will only kill or enable wifi in response to the key press. This
confuses users and can make it difficult for them to enable bluetooth
and wwan devices.

This patch adds a new keycode, KEY_RFKILL. It indicates that the system
should toggle the state of all rfkillable devices.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/input.h b/include/linux/input.h
index 735ceaf1bc2d..663208afb64c 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -376,6 +376,7 @@ struct input_absinfo {
 #define KEY_DISPLAY_OFF		245	/* display device to off state */
 
 #define KEY_WIMAX		246
+#define KEY_RFKILL		247	/* Key that controls all radios */
 
 /* Range 248 - 255 is reserved for special needs of AT keyboard driver */
 
-- 
cgit v1.2.3


From 4e70af56319e56423d6eb1ce25fc321cdf8cd41d Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Mon, 15 Feb 2010 11:16:11 +0000
Subject: fs: inode - remove 8 bytes of padding on 64bits allowing 1 more
 objects/slab under slub

This removes 8 bytes of padding from struct inode on 64bit builds, and
so allows 1 more object/slab in the inode_cache when using slub.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
----
patch against 2.6.33-rc8
compiled & tested on x86_64 AMDX2

I've been running this patch for over a week with no obvious problems
regards
Richard
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b1bcb275b596..ebb1cd5bc241 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -729,6 +729,7 @@ struct inode {
 	uid_t			i_uid;
 	gid_t			i_gid;
 	dev_t			i_rdev;
+	unsigned int		i_blkbits;
 	u64			i_version;
 	loff_t			i_size;
 #ifdef __NEED_I_SIZE_ORDERED
@@ -738,7 +739,6 @@ struct inode {
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
 	blkcnt_t		i_blocks;
-	unsigned int		i_blkbits;
 	unsigned short          i_bytes;
 	umode_t			i_mode;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
-- 
cgit v1.2.3


From 4e10ae11317b238609fc3ec9d50a5dee9473e045 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sat, 20 Feb 2010 09:41:30 +0100
Subject: ARM: 5951/1: ARM: fix documentation of the PrimeCell bus

This fixes the filepath encoded in <linux/amba/bus.h> and adds
some documentation as to what this bus really means.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index ab94335b4bb9..6816be6c3f77 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -1,5 +1,9 @@
 /*
- *  linux/include/asm-arm/hardware/amba.h
+ *  linux/include/amba/bus.h
+ *
+ *  This device type deals with ARM PrimeCells and anything else that
+ *  presents a proper CID (0xB105F00D) at the end of the I/O register
+ *  region or that is derived from a PrimeCell.
  *
  *  Copyright (C) 2003 Deep Blue Solutions Ltd, All Rights Reserved.
  *
-- 
cgit v1.2.3


From 79da0644a8e0838522828f106e4049639eea6baf Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 23 Feb 2010 08:40:43 +0100
Subject: Revert "block: improve queue_should_plug() by looking at IO depths"

This reverts commit fb1e75389bd06fd5987e9cda1b4e0305c782f854.

"Benjamin S." <sbenni@gmx.de> reports that the patch in question
causes a big drop in sequential throughput for him, dropping from
200MB/sec down to only 70MB/sec.

Needs to be investigated more fully, for now lets just revert the
offending commit.

Conflicts:

	include/linux/blkdev.h

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 11 ++---------
 include/linux/blkdev.h |  4 +---
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e6d37f..d1a9a0a64f95 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1147,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
  */
 static inline bool queue_should_plug(struct request_queue *q)
 {
-	return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
+	return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
 }
 
 static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1859,15 +1859,8 @@ void blk_dequeue_request(struct request *rq)
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
-	if (blk_account_rq(rq)) {
+	if (blk_account_rq(rq))
 		q->in_flight[rq_is_sync(rq)]++;
-		/*
-		 * Mark this device as supporting hardware queuing, if
-		 * we have more IOs in flight than 4.
-		 */
-		if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
-			set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
-	}
 }
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c8018977efa..1896e868854f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -461,8 +461,7 @@ struct request_queue
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
-#define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
-#define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
+#define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -586,7 +585,6 @@ enum {
 
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
-#define blk_queue_queuing(q)	test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
-- 
cgit v1.2.3