From 308a878210cde6ab19df9f392c24db53ad6f56bf Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@efs.americas.sgi.com>
Date: Tue, 18 Apr 2006 11:26:34 -0500
Subject: [IA64] Remove unused variable in sn_sal.h

cnodeid was being set but not used.  The dead code was
left over from a previous version that grabbed a per node lock.

Signed-off-by: Russ Anderson (rja@sgi.com)
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/sn/sn_sal.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index bf4cc867a69..7ddce80e191 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -8,7 +8,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All rights reserved.
  */
 
 
@@ -705,10 +705,8 @@ static inline int
 sn_change_memprotect(u64 paddr, u64 len, u64 perms, u64 *nasid_array)
 {
 	struct ia64_sal_retval ret_stuff;
-	int cnodeid;
 	unsigned long irq_flags;
 
-	cnodeid = nasid_to_cnodeid(get_node_number(paddr));
 	local_irq_save(irq_flags);
 	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_MEMPROTECT, paddr, len,
 				(u64)nasid_array, perms, 0, 0, 0);
-- 
cgit v1.2.3


From 0d9adec525b87d8ab7e64efeabffb5b3f293056e Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 18 Apr 2006 15:00:45 -0500
Subject: [IA64] - Fix MAX_PXM_DOMAINS for systems with > 256 nodes

Correctly size the PXM-related arrays for systems that have more than
256 nodes.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/acpi.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h
index d734585a23c..09a5dd0e44a 100644
--- a/include/asm-ia64/acpi.h
+++ b/include/asm-ia64/acpi.h
@@ -110,9 +110,8 @@ extern void prefill_possible_map(void);
 extern int additional_cpus;
 
 #ifdef CONFIG_ACPI_NUMA
-/* Proximity bitmap length; _PXM is at most 255 (8 bit)*/
-#ifdef CONFIG_IA64_NR_NODES
-#define MAX_PXM_DOMAINS CONFIG_IA64_NR_NODES
+#if MAX_NUMNODES > 256
+#define MAX_PXM_DOMAINS MAX_NUMNODES
 #else
 #define MAX_PXM_DOMAINS (256)
 #endif
-- 
cgit v1.2.3


From 86db2f4239e2556cd37b853c2307aa9d43041458 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@efs.americas.sgi.com>
Date: Thu, 20 Apr 2006 17:05:43 -0700
Subject: [IA64-SGI] SN SAL call to inject memory errors

The SGI Altix SAL provides an interface for modifying
the ECC on memory to create memory errors.  The SAL call
can be used to inject memory errors for testing MCA recovery
code.

Signed-off-by: Russ Anderson (rja@sgi.com)
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/sn/sn_sal.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 7ddce80e191..51aca022cf3 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -85,6 +85,7 @@
 
 #define  SN_SAL_GET_PROM_FEATURE_SET		   0x02000065
 #define  SN_SAL_SET_OS_FEATURE_SET		   0x02000066
+#define  SN_SAL_INJECT_ERROR			   0x02000067
 
 /*
  * Service-specific constants
@@ -1138,4 +1139,16 @@ ia64_sn_set_os_feature(int feature)
 	return rv.status;
 }
 
+static inline int
+sn_inject_error(u64 paddr, u64 *data, u64 *ecc)
+{
+	struct ia64_sal_retval ret_stuff;
+	unsigned long irq_flags;
+
+	local_irq_save(irq_flags);
+	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_INJECT_ERROR, paddr, (u64)data,
+				(u64)ecc, 0, 0, 0, 0);
+	local_irq_restore(irq_flags);
+	return ret_stuff.status;
+}
 #endif /* _ASM_IA64_SN_SN_SAL_H */
-- 
cgit v1.2.3


From a72391e42f0a13116995045b3d492d660f96697d Mon Sep 17 00:00:00 2001
From: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Date: Thu, 20 Apr 2006 18:49:48 +0900
Subject: [IA64] eliminate compile time warnings

This patch removes following compile time warnings:

drivers/pci/pci-sysfs.c: In function `pci_read_legacy_io':
drivers/pci/pci-sysfs.c:257: warning: implicit declaration of function `ia64_pci_legacy_read'
drivers/pci/pci-sysfs.c: In function `pci_write_legacy_io':
drivers/pci/pci-sysfs.c:280: warning: implicit declaration of function `ia64_pci_legacy_write'

It also fixes wrong definition of ia64_pci_legacy_write (type of `bus' is not
`pci_dev', but `pci_bus').

Signed-Off-By: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/pci/pci.c        | 3 +--
 include/asm-ia64/machvec.h | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 9ba32b2d96d..ab829a22f8a 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -31,7 +31,6 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 
-
 /*
  * Low-level SAL-based PCI configuration access functions. Note that SAL
  * calls are already serialized (via sal_lock), so we don't need another
@@ -707,7 +706,7 @@ int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
  *
  * Simply writes @size bytes of @val to @port.
  */
-int ia64_pci_legacy_write(struct pci_dev *bus, u16 port, u32 val, u8 size)
+int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 {
 	int ret = size;
 
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index c3e4ed8a3e1..a9c995a86c2 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -347,9 +347,11 @@ extern ia64_mv_dma_supported		swiotlb_dma_supported;
 #endif
 #ifndef platform_pci_legacy_read
 # define platform_pci_legacy_read	ia64_pci_legacy_read
+extern int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size);
 #endif
 #ifndef platform_pci_legacy_write
 # define platform_pci_legacy_write	ia64_pci_legacy_write
+extern int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size);
 #endif
 #ifndef platform_inb
 # define platform_inb		__ia64_inb
-- 
cgit v1.2.3


From e5ecc192dfc5e0b325dd8c99ce4c755714c9acbf Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Thu, 13 Apr 2006 18:23:53 -0700
Subject: [IA64] Setup an IA64 specific reclaim distance

RECLAIM_DISTANCE is checked on bootup against the SLIT table distances.
Zone reclaim is important for system that have higher latencies but not for
systems that have multiple nodes on one motherboard and therefore low latencies.

We found that on motherboard latencies are typically 1 to 1.4 of local memory
access speed whereas multinode systems which benefit from zone reclaim have
usually more than 1.5 times the latency of a local access.

Set the reclaim distance for IA64 to 1.5 times.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/topology.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index 3ee19dfa46d..616b5ed2aa7 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -22,6 +22,11 @@
 /* Nodes w/o CPUs are preferred for memory allocations, see build_zonelists */
 #define PENALTY_FOR_NODE_WITH_CPUS 255
 
+/*
+ * Distance above which we begin to use zone reclaim
+ */
+#define RECLAIM_DISTANCE 15
+
 /*
  * Returns the number of the node containing CPU 'cpu'
  */
-- 
cgit v1.2.3


From 818667f7c40dd0bd14029b5ac1d0f5282e12310e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 20 Apr 2006 20:02:03 +0200
Subject: [PATCH] softmac: fix SIOCSIWAP

There are some bugs in the current implementation of the SIOCSIWAP wext,
for example that when you do it twice and it fails, it may still try
another access point for some reason. This patch fixes this by introducing
a new flag that tells the association code that the bssid that is in use
was fixed by the user and shouldn't be deviated from.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/ieee80211softmac.h                  |  5 ++++-
 net/ieee80211/softmac/ieee80211softmac_assoc.c  | 20 ++++++++++++++----
 net/ieee80211/softmac/ieee80211softmac_module.c |  2 ++
 net/ieee80211/softmac/ieee80211softmac_wx.c     | 27 ++++++++++++++++---------
 4 files changed, 39 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/ieee80211softmac.h b/include/net/ieee80211softmac.h
index 6b3693f05ca..b1ebfbae397 100644
--- a/include/net/ieee80211softmac.h
+++ b/include/net/ieee80211softmac.h
@@ -96,10 +96,13 @@ struct ieee80211softmac_assoc_info {
 	 *
 	 * bssvalid is true if we found a matching network
 	 * and saved it's BSSID into the bssid above.
+	 *
+	 * bssfixed is used for SIOCSIWAP.
 	 */
 	u8 static_essid:1,
 	   associating:1,
-	   bssvalid:1;
+	   bssvalid:1,
+	   bssfixed:1;
 
 	/* Scan retries remaining */
 	int scan_retry;
diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c
index 4498023841d..fb79ce7d643 100644
--- a/net/ieee80211/softmac/ieee80211softmac_assoc.c
+++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c
@@ -144,6 +144,12 @@ network_matches_request(struct ieee80211softmac_device *mac, struct ieee80211_ne
 	if (!we_support_all_basic_rates(mac, net->rates_ex, net->rates_ex_len))
 		return 0;
 
+	/* assume that users know what they're doing ...
+	 * (note we don't let them select a net we're incompatible with) */
+	if (mac->associnfo.bssfixed) {
+		return !memcmp(mac->associnfo.bssid, net->bssid, ETH_ALEN);
+	}
+
 	/* if 'ANY' network requested, take any that doesn't have privacy enabled */
 	if (mac->associnfo.req_essid.len == 0 
 	    && !(net->capability & WLAN_CAPABILITY_PRIVACY))
@@ -176,7 +182,7 @@ ieee80211softmac_assoc_work(void *d)
 		ieee80211softmac_disassoc(mac, WLAN_REASON_DISASSOC_STA_HAS_LEFT);
 
 	/* try to find the requested network in our list, if we found one already */
-	if (mac->associnfo.bssvalid)
+	if (mac->associnfo.bssvalid || mac->associnfo.bssfixed)
 		found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid);	
 	
 	/* Search the ieee80211 networks for this network if we didn't find it by bssid,
@@ -241,19 +247,25 @@ ieee80211softmac_assoc_work(void *d)
 			if (ieee80211softmac_start_scan(mac))
 				dprintk(KERN_INFO PFX "Associate: failed to initiate scan. Is device up?\n");
 			return;
-		}
-		else {
+		} else {
 			spin_lock_irqsave(&mac->lock, flags);
 			mac->associnfo.associating = 0;
 			mac->associated = 0;
 			spin_unlock_irqrestore(&mac->lock, flags);
 
 			dprintk(KERN_INFO PFX "Unable to find matching network after scan!\n");
+			/* reset the retry counter for the next user request since we
+			 * break out and don't reschedule ourselves after this point. */
+			mac->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT;
 			ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_NET_NOT_FOUND, NULL);
 			return;
 		}
 	}
-	
+
+	/* reset the retry counter for the next user request since we
+	 * now found a net and will try to associate to it, but not
+	 * schedule this function again. */
+	mac->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT;
 	mac->associnfo.bssvalid = 1;
 	memcpy(mac->associnfo.bssid, found->bssid, ETH_ALEN);
 	/* copy the ESSID for displaying it */
diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c
index 60f06a31f0d..be83bdc1644 100644
--- a/net/ieee80211/softmac/ieee80211softmac_module.c
+++ b/net/ieee80211/softmac/ieee80211softmac_module.c
@@ -45,6 +45,8 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv)
 	softmac->ieee->handle_disassoc = ieee80211softmac_handle_disassoc;
 	softmac->scaninfo = NULL;
 
+	softmac->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT;
+
 	/* TODO: initialise all the other callbacks in the ieee struct
 	 *	 (once they're written)
 	 */
diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c
index 00f0d4f7189..27edb2b5581 100644
--- a/net/ieee80211/softmac/ieee80211softmac_wx.c
+++ b/net/ieee80211/softmac/ieee80211softmac_wx.c
@@ -27,7 +27,8 @@
 #include "ieee80211softmac_priv.h"
 
 #include <net/iw_handler.h>
-
+/* for is_broadcast_ether_addr and is_zero_ether_addr */
+#include <linux/etherdevice.h>
 
 int
 ieee80211softmac_wx_trigger_scan(struct net_device *net_dev,
@@ -83,7 +84,6 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
 			sm->associnfo.static_essid = 1;
 		}
 	}
-	sm->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT;
 
 	/* set our requested ESSID length.
 	 * If applicable, we have already copied the data in */
@@ -310,8 +310,6 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
 			    char *extra)
 {
 	struct ieee80211softmac_device *mac = ieee80211_priv(net_dev);
-	static const unsigned char any[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-	static const unsigned char off[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
 	unsigned long flags;
 
 	/* sanity check */
@@ -320,10 +318,17 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
 	}
 
 	spin_lock_irqsave(&mac->lock, flags);
-	if (!memcmp(any, data->ap_addr.sa_data, ETH_ALEN) ||
-	    !memcmp(off, data->ap_addr.sa_data, ETH_ALEN)) {
-		schedule_work(&mac->associnfo.work);
-		goto out;
+	if (is_broadcast_ether_addr(data->ap_addr.sa_data)) {
+		/* the bssid we have is not to be fixed any longer,
+		 * and we should reassociate to the best AP. */
+		mac->associnfo.bssfixed = 0;
+		/* force reassociation */
+		mac->associnfo.bssvalid = 0;
+		if (mac->associated)
+			schedule_work(&mac->associnfo.work);
+	} else if (is_zero_ether_addr(data->ap_addr.sa_data)) {
+		/* the bssid we have is no longer fixed */
+		mac->associnfo.bssfixed = 0;
         } else {
 		if (!memcmp(mac->associnfo.bssid, data->ap_addr.sa_data, ETH_ALEN)) {
 			if (mac->associnfo.associating || mac->associated) {
@@ -333,12 +338,14 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
 		} else {
 			/* copy new value in data->ap_addr.sa_data to bssid */
 			memcpy(mac->associnfo.bssid, data->ap_addr.sa_data, ETH_ALEN);
-		}	
+		}
+		/* tell the other code that this bssid should be used no matter what */
+		mac->associnfo.bssfixed = 1;
 		/* queue associate if new bssid or (old one again and not associated) */
 		schedule_work(&mac->associnfo.work);
         }
 
-out:
+ out:
 	spin_unlock_irqrestore(&mac->lock, flags);
 	return 0;
 }
-- 
cgit v1.2.3


From 912d35f86781e64d73be1ef358f703c08905ac37 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 26 Apr 2006 10:59:21 +0200
Subject: [PATCH] Add support for the sys_vmsplice syscall

sys_splice() moves data to/from pipes with a file input/output. sys_vmsplice()
moves data to a pipe, with the input being a user address range instead.

This uses an approach suggested by Linus, where we can hold partial ranges
inside the pages[] map. Hopefully this will be useful for network
receive support as well.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 arch/ia64/kernel/entry.S                    |   1 +
 arch/powerpc/kernel/systbl.S                |   1 +
 arch/powerpc/platforms/cell/spu_callbacks.c |   1 +
 fs/splice.c                                 | 292 ++++++++++++++++++++++++----
 include/asm-i386/unistd.h                   |   3 +-
 include/asm-ia64/unistd.h                   |   3 +-
 include/asm-powerpc/unistd.h                |   3 +-
 include/asm-x86_64/unistd.h                 |   4 +-
 include/linux/syscalls.h                    |   3 +
 9 files changed, 268 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e3079881121..bcb80ca5cf4 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1610,5 +1610,6 @@ sys_call_table:
 	data8 sys_get_robust_list
 	data8 sys_sync_file_range		// 1300
 	data8 sys_tee
+	data8 sys_vmsplice
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 8d152269050..0b98eea73c5 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -324,6 +324,7 @@ COMPAT_SYS(ppoll)
 SYSCALL(unshare)
 SYSCALL(splice)
 SYSCALL(tee)
+SYSCALL(vmsplice)
 
 /*
  * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index deb3afb9448..b283380a2a1 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -318,6 +318,7 @@ void *spu_syscall_table[] = {
 	[__NR_unshare]			sys_unshare,
 	[__NR_splice]			sys_splice,
 	[__NR_tee]			sys_tee,
+	[__NR_vmsplice]			sys_vmsplice,
 };
 
 long spu_sys_callback(struct spu_syscall_block *s)
diff --git a/fs/splice.c b/fs/splice.c
index 8c6030c762e..0b2c1f060ca 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -27,6 +27,7 @@
 #include <linux/buffer_head.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/uio.h>
 
 /*
  * Passed to the actors
@@ -38,6 +39,22 @@ struct splice_desc {
 	loff_t pos;			/* file position */
 };
 
+struct partial_page {
+	unsigned int offset;
+	unsigned int len;
+};
+
+/*
+ * Passed to move_to_pipe
+ */
+struct splice_pipe_desc {
+	struct page **pages;		/* page map */
+	struct partial_page *partial;	/* pages[] may not be contig */
+	int nr_pages;			/* number of pages in map */
+	unsigned int flags;		/* splice flags */
+	struct pipe_buf_operations *ops;/* ops associated with output pipe */
+};
+
 /*
  * Attempt to steal a page from a pipe buffer. This should perhaps go into
  * a vm helper function, it's already simplified quite a bit by the
@@ -128,6 +145,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
 	kunmap(buf->page);
 }
 
+static void *user_page_pipe_buf_map(struct file *file,
+				    struct pipe_inode_info *pipe,
+				    struct pipe_buffer *buf)
+{
+	return kmap(buf->page);
+}
+
+static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe,
+				     struct pipe_buffer *buf)
+{
+	kunmap(buf->page);
+}
+
 static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
 				    struct pipe_buffer *buf)
 {
@@ -143,19 +173,33 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.get = page_cache_pipe_buf_get,
 };
 
+static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
+				    struct pipe_buffer *buf)
+{
+	return 1;
+}
+
+static struct pipe_buf_operations user_page_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = user_page_pipe_buf_map,
+	.unmap = user_page_pipe_buf_unmap,
+	.release = page_cache_pipe_buf_release,
+	.steal = user_page_pipe_buf_steal,
+	.get = page_cache_pipe_buf_get,
+};
+
 /*
  * Pipe output worker. This sets up our pipe format with the page cache
  * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
  */
-static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
-			    int nr_pages, unsigned long len,
-			    unsigned int offset, unsigned int flags)
+static ssize_t move_to_pipe(struct pipe_inode_info *pipe,
+			    struct splice_pipe_desc *spd)
 {
-	int ret, do_wakeup, i;
+	int ret, do_wakeup, page_nr;
 
 	ret = 0;
 	do_wakeup = 0;
-	i = 0;
+	page_nr = 0;
 
 	if (pipe->inode)
 		mutex_lock(&pipe->inode->i_mutex);
@@ -171,27 +215,19 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
 		if (pipe->nrbufs < PIPE_BUFFERS) {
 			int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
 			struct pipe_buffer *buf = pipe->bufs + newbuf;
-			struct page *page = pages[i++];
-			unsigned long this_len;
 
-			this_len = PAGE_CACHE_SIZE - offset;
-			if (this_len > len)
-				this_len = len;
-
-			buf->page = page;
-			buf->offset = offset;
-			buf->len = this_len;
-			buf->ops = &page_cache_pipe_buf_ops;
+			buf->page = spd->pages[page_nr];
+			buf->offset = spd->partial[page_nr].offset;
+			buf->len = spd->partial[page_nr].len;
+			buf->ops = spd->ops;
 			pipe->nrbufs++;
+			page_nr++;
+			ret += buf->len;
+
 			if (pipe->inode)
 				do_wakeup = 1;
 
-			ret += this_len;
-			len -= this_len;
-			offset = 0;
-			if (!--nr_pages)
-				break;
-			if (!len)
+			if (!--spd->nr_pages)
 				break;
 			if (pipe->nrbufs < PIPE_BUFFERS)
 				continue;
@@ -199,7 +235,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
 			break;
 		}
 
-		if (flags & SPLICE_F_NONBLOCK) {
+		if (spd->flags & SPLICE_F_NONBLOCK) {
 			if (!ret)
 				ret = -EAGAIN;
 			break;
@@ -234,8 +270,8 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	}
 
-	while (i < nr_pages)
-		page_cache_release(pages[i++]);
+	while (page_nr < spd->nr_pages)
+		page_cache_release(spd->pages[page_nr++]);
 
 	return ret;
 }
@@ -246,17 +282,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 			   unsigned int flags)
 {
 	struct address_space *mapping = in->f_mapping;
-	unsigned int loff, offset, nr_pages;
+	unsigned int loff, nr_pages;
 	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_BUFFERS];
 	struct page *page;
 	pgoff_t index, end_index;
 	loff_t isize;
-	size_t bytes;
-	int i, error;
+	size_t total_len;
+	int error;
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &page_cache_pipe_buf_ops,
+	};
 
 	index = *ppos >> PAGE_CACHE_SHIFT;
-	loff = offset = *ppos & ~PAGE_CACHE_MASK;
-	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	loff = *ppos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 	if (nr_pages > PIPE_BUFFERS)
 		nr_pages = PIPE_BUFFERS;
@@ -266,15 +309,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	 * read-ahead if this is a non-zero offset (we are likely doing small
 	 * chunk splice and the page is already there) for a single page.
 	 */
-	if (!offset || nr_pages > 1)
-		do_page_cache_readahead(mapping, in, index, nr_pages);
+	if (!loff || spd.nr_pages > 1)
+		do_page_cache_readahead(mapping, in, index, spd.nr_pages);
 
 	/*
 	 * Now fill in the holes:
 	 */
 	error = 0;
-	bytes = 0;
-	for (i = 0; i < nr_pages; i++, index++) {
+	total_len = 0;
+	for (spd.nr_pages = 0; spd.nr_pages < nr_pages; spd.nr_pages++, index++) {
 		unsigned int this_len;
 
 		if (!len)
@@ -367,26 +410,29 @@ readpage:
 			 */
 			if (end_index == index) {
 				loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
-				if (bytes + loff > isize) {
+				if (total_len + loff > isize) {
 					page_cache_release(page);
 					break;
 				}
 				/*
 				 * force quit after adding this page
 				 */
-				nr_pages = i;
+				nr_pages = spd.nr_pages;
 				this_len = min(this_len, loff);
+				loff = 0;
 			}
 		}
 fill_it:
-		pages[i] = page;
-		bytes += this_len;
+		pages[spd.nr_pages] = page;
+		partial[spd.nr_pages].offset = loff;
+		partial[spd.nr_pages].len = this_len;
 		len -= this_len;
+		total_len += this_len;
 		loff = 0;
 	}
 
-	if (i)
-		return move_to_pipe(pipe, pages, i, bytes, offset, flags);
+	if (spd.nr_pages)
+		return move_to_pipe(pipe, &spd);
 
 	return error;
 }
@@ -1018,6 +1064,174 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 	return -EINVAL;
 }
 
+/*
+ * Map an iov into an array of pages and offset/length tupples. With the
+ * partial_page structure, we can map several non-contiguous ranges into
+ * our ones pages[] map instead of splitting that operation into pieces.
+ * Could easily be exported as a generic helper for other users, in which
+ * case one would probably want to add a 'max_nr_pages' parameter as well.
+ */
+static int get_iovec_page_array(const struct iovec __user *iov,
+				unsigned int nr_vecs, struct page **pages,
+				struct partial_page *partial)
+{
+	int buffers = 0, error = 0;
+
+	/*
+	 * It's ok to take the mmap_sem for reading, even
+	 * across a "get_user()".
+	 */
+	down_read(&current->mm->mmap_sem);
+
+	while (nr_vecs) {
+		unsigned long off, npages;
+		void __user *base;
+		size_t len;
+		int i;
+
+		/*
+		 * Get user address base and length for this iovec.
+		 */
+		error = get_user(base, &iov->iov_base);
+		if (unlikely(error))
+			break;
+		error = get_user(len, &iov->iov_len);
+		if (unlikely(error))
+			break;
+
+		/*
+		 * Sanity check this iovec. 0 read succeeds.
+		 */
+		if (unlikely(!len))
+			break;
+		error = -EFAULT;
+		if (unlikely(!base))
+			break;
+
+		/*
+		 * Get this base offset and number of pages, then map
+		 * in the user pages.
+		 */
+		off = (unsigned long) base & ~PAGE_MASK;
+		npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		if (npages > PIPE_BUFFERS - buffers)
+			npages = PIPE_BUFFERS - buffers;
+
+		error = get_user_pages(current, current->mm,
+				       (unsigned long) base, npages, 0, 0,
+				       &pages[buffers], NULL);
+
+		if (unlikely(error <= 0))
+			break;
+
+		/*
+		 * Fill this contiguous range into the partial page map.
+		 */
+		for (i = 0; i < error; i++) {
+			const int plen = min_t(size_t, len, PAGE_SIZE) - off;
+
+			partial[buffers].offset = off;
+			partial[buffers].len = plen;
+
+			off = 0;
+			len -= plen;
+			buffers++;
+		}
+
+		/*
+		 * We didn't complete this iov, stop here since it probably
+		 * means we have to move some of this into a pipe to
+		 * be able to continue.
+		 */
+		if (len)
+			break;
+
+		/*
+		 * Don't continue if we mapped fewer pages than we asked for,
+		 * or if we mapped the max number of pages that we have
+		 * room for.
+		 */
+		if (error < npages || buffers == PIPE_BUFFERS)
+			break;
+
+		nr_vecs--;
+		iov++;
+	}
+
+	up_read(&current->mm->mmap_sem);
+
+	if (buffers)
+		return buffers;
+
+	return error;
+}
+
+/*
+ * vmsplice splices a user address range into a pipe. It can be thought of
+ * as splice-from-memory, where the regular splice is splice-from-file (or
+ * to file). In both cases the output is a pipe, naturally.
+ *
+ * Note that vmsplice only supports splicing _from_ user memory to a pipe,
+ * not the other way around. Splicing from user memory is a simple operation
+ * that can be supported without any funky alignment restrictions or nasty
+ * vm tricks. We simply map in the user memory and fill them into a pipe.
+ * The reverse isn't quite as easy, though. There are two possible solutions
+ * for that:
+ *
+ *	- memcpy() the data internally, at which point we might as well just
+ *	  do a regular read() on the buffer anyway.
+ *	- Lots of nasty vm tricks, that are neither fast nor flexible (it
+ *	  has restriction limitations on both ends of the pipe).
+ *
+ * Alas, it isn't here.
+ *
+ */
+static long do_vmsplice(struct file *file, const struct iovec __user *iov,
+			unsigned long nr_segs, unsigned int flags)
+{
+	struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
+	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_BUFFERS];
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &user_page_pipe_buf_ops,
+	};
+
+	if (unlikely(!pipe))
+		return -EBADF;
+	if (unlikely(nr_segs > UIO_MAXIOV))
+		return -EINVAL;
+	else if (unlikely(!nr_segs))
+		return 0;
+
+	spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial);
+	if (spd.nr_pages <= 0)
+		return spd.nr_pages;
+
+	return move_to_pipe(pipe, &spd);
+}
+
+asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
+			     unsigned long nr_segs, unsigned int flags)
+{
+	struct file *file;
+	long error;
+	int fput;
+
+	error = -EBADF;
+	file = fget_light(fd, &fput);
+	if (file) {
+		if (file->f_mode & FMODE_WRITE)
+			error = do_vmsplice(file, iov, nr_segs, flags);
+
+		fput_light(file, fput);
+	}
+
+	return error;
+}
+
 asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
 			   int fd_out, loff_t __user *off_out,
 			   size_t len, unsigned int flags)
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index d81d6cfc1bb..eb4b152c82f 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -321,8 +321,9 @@
 #define __NR_splice		313
 #define __NR_sync_file_range	314
 #define __NR_tee		315
+#define __NR_vmsplice		316
 
-#define NR_syscalls 316
+#define NR_syscalls 317
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index a40ebec6aee..7107763168b 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -290,12 +290,13 @@
 #define __NR_get_robust_list		1299
 #define __NR_sync_file_range		1300
 #define __NR_tee			1301
+#define __NR_vmsplice			1302
 
 #ifdef __KERNEL__
 
 #include <linux/config.h>
 
-#define NR_syscalls			278 /* length of syscall table */
+#define NR_syscalls			279 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index c612f1a6277..34325e29259 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -303,8 +303,9 @@
 #define __NR_unshare		282
 #define __NR_splice		283
 #define __NR_tee		284
+#define __NR_vmsplice		285
 
-#define __NR_syscalls		285
+#define __NR_syscalls		286
 
 #ifdef __KERNEL__
 #define __NR__exit __NR_exit
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 98c36eae567..feb77cb8c04 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -615,8 +615,10 @@ __SYSCALL(__NR_splice, sys_splice)
 __SYSCALL(__NR_tee, sys_tee)
 #define __NR_sync_file_range	277
 __SYSCALL(__NR_sync_file_range, sys_sync_file_range)
+#define __NR_vmsplice		278
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 
-#define __NR_syscall_max __NR_sync_file_range
+#define __NR_syscall_max __NR_vmsplice
 
 #ifndef __NO_STUBS
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d3ebc0e68b2..3996960fc56 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -574,6 +574,9 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
 			   int fd_out, loff_t __user *off_out,
 			   size_t len, unsigned int flags);
 
+asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
+			     unsigned long nr_segs, unsigned int flags);
+
 asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags);
 
 asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
-- 
cgit v1.2.3


From 734cbc363b159caee158d5a83408c72d98bcacf0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 25 Apr 2006 10:58:50 -0700
Subject: [PATCH] sky2: reschedule if irq still pending

This is a workaround for the case edge-triggered irq's. Several users
seem to have broken configurations sharing edge-triggered irq's. To avoid
losing IRQ's, reshedule if more work arrives.

The changes to netdevice.h are to extract the part that puts device
back in list into separate inline.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/net/sky2.c        | 20 ++++++++++++++++----
 include/linux/netdevice.h | 18 ++++++++++--------
 2 files changed, 26 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 67b0eab1658..618fde8622c 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -2093,6 +2093,7 @@ static int sky2_poll(struct net_device *dev0, int *budget)
 	int work_done = 0;
 	u32 status = sky2_read32(hw, B0_Y2_SP_EISR);
 
+ restart_poll:
 	if (unlikely(status & ~Y2_IS_STAT_BMU)) {
 		if (status & Y2_IS_HW_ERR)
 			sky2_hw_intr(hw);
@@ -2123,7 +2124,7 @@ static int sky2_poll(struct net_device *dev0, int *budget)
 	}
 
 	if (status & Y2_IS_STAT_BMU) {
-		work_done = sky2_status_intr(hw, work_limit);
+		work_done += sky2_status_intr(hw, work_limit - work_done);
 		*budget -= work_done;
 		dev0->quota -= work_done;
 
@@ -2133,9 +2134,22 @@ static int sky2_poll(struct net_device *dev0, int *budget)
 		sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ);
 	}
 
-	netif_rx_complete(dev0);
+	local_irq_disable();
+	__netif_rx_complete(dev0);
 
 	status = sky2_read32(hw, B0_Y2_SP_LISR);
+
+	if (unlikely(status)) {
+		/* More work pending, try and keep going */
+		if (__netif_rx_schedule_prep(dev0)) {
+			__netif_rx_reschedule(dev0, work_done);
+			status = sky2_read32(hw, B0_Y2_SP_EISR);
+			local_irq_enable();
+			goto restart_poll;
+		}
+	}
+
+	local_irq_enable();
 	return 0;
 }
 
@@ -2153,8 +2167,6 @@ static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs)
 	prefetch(&hw->st_le[hw->st_idx]);
 	if (likely(__netif_rx_schedule_prep(dev0)))
 		__netif_rx_schedule(dev0);
-	else
-		printk(KERN_DEBUG PFX "irq race detected\n");
 
 	return IRQ_HANDLED;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40ccf8cc423..01db7b88a2b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -829,19 +829,21 @@ static inline void netif_rx_schedule(struct net_device *dev)
 		__netif_rx_schedule(dev);
 }
 
-/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete().
- * Do not inline this?
- */
+
+static inline void  __netif_rx_reschedule(struct net_device *dev, int undo)
+{
+	dev->quota += undo;
+	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+}
+
+/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete(). */
 static inline int netif_rx_reschedule(struct net_device *dev, int undo)
 {
 	if (netif_rx_schedule_prep(dev)) {
 		unsigned long flags;
-
-		dev->quota += undo;
-
 		local_irq_save(flags);
-		list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
-		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+		__netif_rx_reschedule(dev, undo);
 		local_irq_restore(flags);
 		return 1;
 	}
-- 
cgit v1.2.3


From 00522fb41a2a9bf0f98a007c0e2b516a3873148c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 26 Apr 2006 14:39:29 +0200
Subject: [PATCH] splice: rearrange moving to/from pipe helpers

We need these for people writing their own ->splice_read/write hooks.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 fs/splice.c               | 35 +++++++++++------------------------
 include/linux/pipe_fs_i.h | 17 +++++++++++++++++
 2 files changed, 28 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/splice.c b/fs/splice.c
index 0b2c1f060ca..447ebc0a37f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -29,23 +29,13 @@
 #include <linux/syscalls.h>
 #include <linux/uio.h>
 
-/*
- * Passed to the actors
- */
-struct splice_desc {
-	unsigned int len, total_len;	/* current and remaining length */
-	unsigned int flags;		/* splice flags */
-	struct file *file;		/* file to read/write */
-	loff_t pos;			/* file position */
-};
-
 struct partial_page {
 	unsigned int offset;
 	unsigned int len;
 };
 
 /*
- * Passed to move_to_pipe
+ * Passed to splice_to_pipe
  */
 struct splice_pipe_desc {
 	struct page **pages;		/* page map */
@@ -192,8 +182,8 @@ static struct pipe_buf_operations user_page_pipe_buf_ops = {
  * Pipe output worker. This sets up our pipe format with the page cache
  * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
  */
-static ssize_t move_to_pipe(struct pipe_inode_info *pipe,
-			    struct splice_pipe_desc *spd)
+static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+			      struct splice_pipe_desc *spd)
 {
 	int ret, do_wakeup, page_nr;
 
@@ -432,7 +422,7 @@ fill_it:
 	}
 
 	if (spd.nr_pages)
-		return move_to_pipe(pipe, &spd);
+		return splice_to_pipe(pipe, &spd);
 
 	return error;
 }
@@ -666,17 +656,14 @@ out_nomem:
 	return ret;
 }
 
-typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
-			   struct splice_desc *);
-
 /*
  * Pipe input worker. Most of this logic works like a regular pipe, the
  * key here is the 'actor' worker passed in that actually moves the data
  * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
  */
-static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
-			      loff_t *ppos, size_t len, unsigned int flags,
-			      splice_actor *actor)
+ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
+			 loff_t *ppos, size_t len, unsigned int flags,
+			 splice_actor *actor)
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
@@ -795,7 +782,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 	struct address_space *mapping = out->f_mapping;
 	ssize_t ret;
 
-	ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+	ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
 	if (ret > 0) {
 		struct inode *inode = mapping->host;
 
@@ -837,7 +824,7 @@ EXPORT_SYMBOL(generic_file_splice_write);
 ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
 				loff_t *ppos, size_t len, unsigned int flags)
 {
-	return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
+	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
 }
 
 EXPORT_SYMBOL(generic_splice_sendpage);
@@ -924,7 +911,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 		/*
 		 * We don't have an immediate reader, but we'll read the stuff
-		 * out of the pipe right after the move_to_pipe(). So set
+		 * out of the pipe right after the splice_to_pipe(). So set
 		 * PIPE_READERS appropriately.
 		 */
 		pipe->readers = 1;
@@ -1210,7 +1197,7 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 	if (spd.nr_pages <= 0)
 		return spd.nr_pages;
 
-	return move_to_pipe(pipe, &spd);
+	return splice_to_pipe(pipe, &spd);
 }
 
 asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index ef7f33c0be1..0008d4bd405 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -61,4 +61,21 @@ void __free_pipe_info(struct pipe_inode_info *);
 				 /* from/to, of course */
 #define SPLICE_F_MORE	(0x04)	/* expect more data */
 
+/*
+ * Passed to the actors
+ */
+struct splice_desc {
+	unsigned int len, total_len;	/* current and remaining length */
+	unsigned int flags;		/* splice flags */
+	struct file *file;		/* file to read/write */
+	loff_t pos;			/* file position */
+};
+
+typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
+			   struct splice_desc *);
+
+extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
+				loff_t *, size_t, unsigned int,
+				splice_actor *);
+
 #endif
-- 
cgit v1.2.3


From ebf43500ef148a380bd132743c3fc530111ac620 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 27 Apr 2006 08:46:01 +0200
Subject: [PATCH] Add find_get_pages_contig(): contiguous variant of
 find_get_pages()

find_get_pages_contig() will break out if we hit a hole in the page cache.
From Andrew Morton, small modifications and documentation by me.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/linux/pagemap.h |  2 ++
 mm/filemap.c            | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9539efd4f7e..7a1af574ded 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -78,6 +78,8 @@ extern struct page * find_or_create_page(struct address_space *mapping,
 				unsigned long index, gfp_t gfp_mask);
 unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 			unsigned int nr_pages, struct page **pages);
+unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
+			       unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			int tag, unsigned int nr_pages, struct page **pages);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 3ef20739e72..fd57442186c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -697,6 +697,38 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 	return ret;
 }
 
+/**
+ * find_get_pages_contig - gang contiguous pagecache lookup
+ * @mapping:	The address_space to search
+ * @index:	The starting page index
+ * @nr_pages:	The maximum number of pages
+ * @pages:	Where the resulting pages are placed
+ *
+ * find_get_pages_contig() works exactly like find_get_pages(), except
+ * that the returned number of pages are guaranteed to be contiguous.
+ *
+ * find_get_pages_contig() returns the number of pages which were found.
+ */
+unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
+			       unsigned int nr_pages, struct page **pages)
+{
+	unsigned int i;
+	unsigned int ret;
+
+	read_lock_irq(&mapping->tree_lock);
+	ret = radix_tree_gang_lookup(&mapping->page_tree,
+				(void **)pages, index, nr_pages);
+	for (i = 0; i < ret; i++) {
+		if (pages[i]->mapping == NULL || pages[i]->index != index)
+			break;
+
+		page_cache_get(pages[i]);
+		index++;
+	}
+	read_unlock_irq(&mapping->tree_lock);
+	return i;
+}
+
 /*
  * Like find_get_pages, except we only return pages which are tagged with
  * `tag'.   We update *index to index the next page for the traversal.
-- 
cgit v1.2.3


From bc818247203a7bfc40296a3f5b760de84fb8e0d1 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Mon, 17 Apr 2006 21:19:12 +0900
Subject: [MIPS] Fix bitops for MIPS32/MIPS64 CPUs.

With recent rewrite for generic bitops, fls() for 32bit kernel with
MIPS64_CPU is broken.  Also, ffs(), fls() should be defined the same
way as the libc and compiler built-in routines (returns int instead of
unsigned long).

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/bitops.h | 56 ++++++++++++++++++++---------------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index a1728f8c070..d2f444537e4 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -467,64 +467,56 @@ static inline unsigned long __ffs(unsigned long word)
 }
 
 /*
- * ffs - find first bit set.
+ * fls - find last bit set.
  * @word: The word to search
  *
- * Returns 1..SZLONG
- * Returns 0 if no bit exists
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
  */
-
-static inline unsigned long ffs(unsigned long word)
+static inline int fls(int word)
 {
-	if (!word)
-		return 0;
+	__asm__ ("clz %0, %1" : "=r" (word) : "r" (word));
 
-	return __ffs(word) + 1;
+	return 32 - word;
 }
 
-/*
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long ffz(unsigned long word)
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPS64)
+static inline int fls64(__u64 word)
 {
-	return __ffs (~word);
+	__asm__ ("dclz %0, %1" : "=r" (word) : "r" (word));
+
+	return 64 - word;
 }
+#else
+#include <asm-generic/bitops/fls64.h>
+#endif
 
 /*
- * fls - find last bit set.
+ * ffs - find first bit set.
  * @word: The word to search
  *
- * Returns 1..SZLONG
- * Returns 0 if no bit exists
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
  */
-static inline unsigned long fls(unsigned long word)
+static inline int ffs(int word)
 {
-#ifdef CONFIG_CPU_MIPS32
-	__asm__ ("clz %0, %1" : "=r" (word) : "r" (word));
-
-	return 32 - word;
-#endif
-
-#ifdef CONFIG_CPU_MIPS64
-	__asm__ ("dclz %0, %1" : "=r" (word) : "r" (word));
+	if (!word)
+		return 0;
 
-	return 64 - word;
-#endif
+	return fls(word & -word);
 }
 
 #else
 
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/ffs.h>
-#include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/fls64.h>
 
 #endif /*defined(CONFIG_CPU_MIPS32) || defined(CONFIG_CPU_MIPS64) */
 
-#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/find.h>
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 7a8341969fe0df4a1fffa141435e742456270ffd Mon Sep 17 00:00:00 2001
From: Chris Dearman <chris@mips.com>
Date: Sat, 15 Apr 2006 00:31:16 +0100
Subject: [MIPS] 24K LV: Add core card id.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mips-boards/generic/init.c    | 1 +
 arch/mips/mips-boards/generic/pci.c     | 1 +
 arch/mips/mips-boards/malta/malta_int.c | 3 +++
 include/asm-mips/mips-boards/generic.h  | 1 +
 4 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/arch/mips/mips-boards/generic/init.c b/arch/mips/mips-boards/generic/init.c
index 17dfe6a8cab..df4e9473560 100644
--- a/arch/mips/mips-boards/generic/init.c
+++ b/arch/mips/mips-boards/generic/init.c
@@ -337,6 +337,7 @@ void __init prom_init(void)
 	case MIPS_REVISION_CORID_CORE_MSC:
 	case MIPS_REVISION_CORID_CORE_FPGA2:
 	case MIPS_REVISION_CORID_CORE_FPGA3:
+	case MIPS_REVISION_CORID_CORE_24K:
 	case MIPS_REVISION_CORID_CORE_EMUL_MSC:
 		_pcictrl_msc = (unsigned long)ioremap(MIPS_MSC01_PCI_REG_BASE, 0x2000);
 
diff --git a/arch/mips/mips-boards/generic/pci.c b/arch/mips/mips-boards/generic/pci.c
index 1f6f9df74ab..9337f6c8873 100644
--- a/arch/mips/mips-boards/generic/pci.c
+++ b/arch/mips/mips-boards/generic/pci.c
@@ -198,6 +198,7 @@ void __init mips_pcibios_init(void)
 	case MIPS_REVISION_CORID_CORE_MSC:
 	case MIPS_REVISION_CORID_CORE_FPGA2:
 	case MIPS_REVISION_CORID_CORE_FPGA3:
+	case MIPS_REVISION_CORID_CORE_24K:
 	case MIPS_REVISION_CORID_CORE_EMUL_MSC:
 		/* Set up resource ranges from the controller's registers.  */
 		MSC_READ(MSC01_PCI_SC2PMBASL, start);
diff --git a/arch/mips/mips-boards/malta/malta_int.c b/arch/mips/mips-boards/malta/malta_int.c
index 64db07d4dbe..7cc0ba4f553 100644
--- a/arch/mips/mips-boards/malta/malta_int.c
+++ b/arch/mips/mips-boards/malta/malta_int.c
@@ -57,6 +57,7 @@ static inline int mips_pcibios_iack(void)
 	case MIPS_REVISION_CORID_CORE_MSC:
 	case MIPS_REVISION_CORID_CORE_FPGA2:
 	case MIPS_REVISION_CORID_CORE_FPGA3:
+	case MIPS_REVISION_CORID_CORE_24K:
 	case MIPS_REVISION_CORID_CORE_EMUL_MSC:
 	        MSC_READ(MSC01_PCI_IACK, irq);
 		irq &= 0xff;
@@ -143,6 +144,7 @@ void corehi_irqdispatch(struct pt_regs *regs)
         case MIPS_REVISION_CORID_CORE_MSC:
         case MIPS_REVISION_CORID_CORE_FPGA2:
         case MIPS_REVISION_CORID_CORE_FPGA3:
+        case MIPS_REVISION_CORID_CORE_24K:
         case MIPS_REVISION_CORID_CORE_EMUL_MSC:
                 ll_msc_irq(regs);
                 break;
@@ -309,6 +311,7 @@ void __init arch_init_irq(void)
         case MIPS_REVISION_CORID_CORE_MSC:
         case MIPS_REVISION_CORID_CORE_FPGA2:
         case MIPS_REVISION_CORID_CORE_FPGA3:
+        case MIPS_REVISION_CORID_CORE_24K:
         case MIPS_REVISION_CORID_CORE_EMUL_MSC:
 		if (cpu_has_veic)
 			init_msc_irqs (MSC01E_INT_BASE, msc_eicirqmap, msc_nr_eicirqs);
diff --git a/include/asm-mips/mips-boards/generic.h b/include/asm-mips/mips-boards/generic.h
index 25b6ffc2662..fa8b913cc3e 100644
--- a/include/asm-mips/mips-boards/generic.h
+++ b/include/asm-mips/mips-boards/generic.h
@@ -67,6 +67,7 @@
 #define MIPS_REVISION_CORID_CORE_FPGA2     7
 #define MIPS_REVISION_CORID_CORE_FPGAR2    8
 #define MIPS_REVISION_CORID_CORE_FPGA3     9
+#define MIPS_REVISION_CORID_CORE_24K       10
 
 /**** Artificial corid defines ****/
 /*
-- 
cgit v1.2.3


From 6e5882cfa24e1456702e463f6920fc0ca3c3d2b8 Mon Sep 17 00:00:00 2001
From: Zachary Amsden <zach@vmware.com>
Date: Thu, 27 Apr 2006 11:32:29 -0700
Subject: [PATCH] x86/PAE: Fix pte_clear for the >4GB RAM case

Proposed fix for ptep_get_and_clear_full PAE bug.  Pte_clear had the same bug,
so use the same fix for both.  Turns out pmd_clear had it as well, but pgds
are not affected.

The problem is rather intricate.  Page table entries in PAE mode are 64-bits
wide, but the only atomic 8-byte write operation available in 32-bit mode is
cmpxchg8b, which is expensive (at least on P4), and thus avoided.  But it can
happen that the processor may prefetch entries into the TLB in the middle of an
operation which clears a page table entry.  So one must always clear the P-bit
in the low word of the page table entry first when clearing it.

Since the sequence *ptep = __pte(0) leaves the order of the write dependent on
the compiler, it must be coded explicitly as a clear of the low word followed
by a clear of the high word.  Further, there must be a write memory barrier
here to enforce proper ordering by the compiler (and, in the future, by the
processor as well).

On > 4GB memory machines, the implementation of pte_clear for PAE was clearly
deficient, as it could leave virtual mappings of physical memory above 4GB
aliased to memory below 4GB in the TLB.  The implementation of
ptep_get_and_clear_full has a similar bug, although not nearly as likely to
occur, since the mappings being cleared are in the process of being destroyed,
and should never be dereferenced again.

But, as luck would have it, it is possible to trigger bugs even without ever
dereferencing these bogus TLB mappings, even if the clear is followed fairly
soon after with a TLB flush or invalidation.  The problem is that memory above
4GB may now be aliased into the first 4GB of memory, and in fact, may hit a
region of memory with non-memory semantics.  These regions include AGP and PCI
space.  As such, these memory regions are not cached by the processor.  This
introduces the bug.

The processor can speculate memory operations, including memory writes, as long
as they are committed with the proper ordering.  Speculating a memory write to
a linear address that has a bogus TLB mapping is possible.  Normally, the
speculation is harmless.  But for cached memory, it does leave the falsely
speculated cacheline unmodified, but in a dirty state.  This cache line will be
eventually written back.  If this cacheline happens to intersect a region of
memory that is not protected by the cache coherency protocol, it can corrupt
data in I/O memory, which is generally a very bad thing to do, and can cause
total system failure or just plain undefined behavior.

These bugs are extremely unlikely, but the severity is of such magnitude, and
the fix so simple that I think fixing them immediately is justified.  Also,
they are nearly impossible to debug.

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/pgtable-2level.h |  3 +++
 include/asm-i386/pgtable-3level.h | 20 ++++++++++++++++++++
 include/asm-i386/pgtable.h        |  4 +---
 3 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 27bde973abc..2756d4b04c2 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -18,6 +18,9 @@
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
 
+#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+
 #define ptep_get_and_clear(mm,addr,xp)	__pte(xchg(&(xp)->pte_low, 0))
 #define pte_same(a, b)		((a).pte_low == (b).pte_low)
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index 36a5aa63cbb..dccb1b3337a 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -85,6 +85,26 @@ static inline void pud_clear (pud_t * pud) { }
 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
 			pmd_index(address))
 
+/*
+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
+ * entry, so clear the bottom half first and enforce ordering with a compiler
+ * barrier.
+ */
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	ptep->pte_low = 0;
+	smp_wmb();
+	ptep->pte_high = 0;
+}
+
+static inline void pmd_clear(pmd_t *pmd)
+{
+	u32 *tmp = (u32 *)pmd;
+	*tmp = 0;
+	smp_wmb();
+	*(tmp + 1) = 0;
+}
+
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	pte_t res;
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index ee056c41a9f..672c3f76b9d 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -204,12 +204,10 @@ extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
 extern unsigned long pg0[];
 
 #define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 
 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
 #define pmd_none(x)	(!(unsigned long)pmd_val(x))
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 #define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
 
 
@@ -268,7 +266,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 	pte_t pte;
 	if (full) {
 		pte = *ptep;
-		*ptep = __pte(0);
+		pte_clear(mm, addr, ptep);
 	} else {
 		pte = ptep_get_and_clear(mm, addr, ptep);
 	}
-- 
cgit v1.2.3


From 4d17ffda331ba6030bb8c233c73d6a87954d8ea7 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 25 Apr 2006 15:37:26 +0200
Subject: [PATCH] Kobject: fix build error

This fixes a build error for various odd combinations of CONFIG_HOTPLUG
and CONFIG_NET.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Cc: Nigel Cunningham <ncunningham@cyclades.com>
Cc: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 2 +-
 lib/kobject_uevent.c    | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index dcd0623be89..e38bb357282 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -259,7 +259,7 @@ struct subsys_attribute {
 extern int subsys_create_file(struct subsystem * , struct subsys_attribute *);
 extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *);
 
-#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
+#if defined(CONFIG_HOTPLUG)
 void kobject_uevent(struct kobject *kobj, enum kobject_action action);
 
 int add_uevent_var(char **envp, int num_envp, int *cur_index,
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 982226daf93..7f20e7b857c 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -25,11 +25,13 @@
 #define BUFFER_SIZE	2048	/* buffer for the variables */
 #define NUM_ENVP	32	/* number of env pointers */
 
-#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
+#if defined(CONFIG_HOTPLUG)
 u64 uevent_seqnum;
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug";
 static DEFINE_SPINLOCK(sequence_lock);
+#if defined(CONFIG_NET)
 static struct sock *uevent_sock;
+#endif
 
 static char *action_to_string(enum kobject_action action)
 {
@@ -155,6 +157,7 @@ void kobject_uevent(struct kobject *kobj, enum kobject_action action)
 	spin_unlock(&sequence_lock);
 	sprintf(seq_buff, "SEQNUM=%llu", (unsigned long long)seq);
 
+#if defined(CONFIG_NET)
 	/* send netlink message */
 	if (uevent_sock) {
 		struct sk_buff *skb;
@@ -179,6 +182,7 @@ void kobject_uevent(struct kobject *kobj, enum kobject_action action)
 			netlink_broadcast(uevent_sock, skb, 0, 1, GFP_KERNEL);
 		}
 	}
+#endif
 
 	/* call uevent_helper, usually only enabled during early boot */
 	if (uevent_helper[0]) {
@@ -249,6 +253,7 @@ int add_uevent_var(char **envp, int num_envp, int *cur_index,
 }
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
+#if defined(CONFIG_NET)
 static int __init kobject_uevent_init(void)
 {
 	uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
@@ -264,5 +269,6 @@ static int __init kobject_uevent_init(void)
 }
 
 postcore_initcall(kobject_uevent_init);
+#endif
 
 #endif /* CONFIG_HOTPLUG */
-- 
cgit v1.2.3


From bde11d794206ae8d72defd0e8a481181200f7dc4 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 18 Apr 2006 21:30:22 -0700
Subject: [PATCH] Fix OCFS2 warning when DEBUG_FS is not enabled

Fix the following warning which happens when OCFS2_FS is enabled but
DEBUG_FS isn't:

fs/ocfs2/dlmglue.c: In function `ocfs2_dlm_init_debug':
fs/ocfs2/dlmglue.c:2036: warning: passing arg 5 of `debugfs_create_file' discards qualifiers from pointer target type

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Joel Becker <Joel.Becker@oracle.com>
Acked-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/debugfs.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 176e2d37157..047567d34ca 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -58,9 +58,8 @@ struct dentry *debugfs_create_blob(const char *name, mode_t mode,
  */
 
 static inline struct dentry *debugfs_create_file(const char *name, mode_t mode,
-						 struct dentry *parent,
-						 void *data,
-						 struct file_operations *fops)
+					struct dentry *parent, void *data,
+					const struct file_operations *fops)
 {
 	return ERR_PTR(-ENODEV);
 }
-- 
cgit v1.2.3


From 5b3ef14e3e9d745a512d65fcb4ef9be541226d80 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 22 Apr 2006 12:14:44 +0200
Subject: [PATCH] Kobject: possible cleanups

This patch contains the following possible cleanups:
- #if 0 the following unused global function:
  - subsys_remove_file()
- remove the following unused EXPORT_SYMBOL's:
  - kset_find_obj
  - subsystem_init
- remove the following unused EXPORT_SYMBOL_GPL:
  - kobject_add_dir

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 1 -
 lib/kobject.c           | 7 ++-----
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index e38bb357282..c187c53cecd 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -257,7 +257,6 @@ struct subsys_attribute {
 };
 
 extern int subsys_create_file(struct subsystem * , struct subsys_attribute *);
-extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *);
 
 #if defined(CONFIG_HOTPLUG)
 void kobject_uevent(struct kobject *kobj, enum kobject_action action);
diff --git a/lib/kobject.c b/lib/kobject.c
index 01d95751394..b46350c2783 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -422,7 +422,6 @@ struct kobject *kobject_add_dir(struct kobject *parent, const char *name)
 
 	return k;
 }
-EXPORT_SYMBOL_GPL(kobject_add_dir);
 
 /**
  *	kset_init - initialize a kset for use
@@ -569,7 +568,7 @@ int subsys_create_file(struct subsystem * s, struct subsys_attribute * a)
  *	@s:	subsystem.
  *	@a:	attribute desciptor.
  */
-
+#if 0
 void subsys_remove_file(struct subsystem * s, struct subsys_attribute * a)
 {
 	if (subsys_get(s)) {
@@ -577,6 +576,7 @@ void subsys_remove_file(struct subsystem * s, struct subsys_attribute * a)
 		subsys_put(s);
 	}
 }
+#endif  /*  0  */
 
 EXPORT_SYMBOL(kobject_init);
 EXPORT_SYMBOL(kobject_register);
@@ -588,10 +588,7 @@ EXPORT_SYMBOL(kobject_del);
 
 EXPORT_SYMBOL(kset_register);
 EXPORT_SYMBOL(kset_unregister);
-EXPORT_SYMBOL(kset_find_obj);
 
-EXPORT_SYMBOL(subsystem_init);
 EXPORT_SYMBOL(subsystem_register);
 EXPORT_SYMBOL(subsystem_unregister);
 EXPORT_SYMBOL(subsys_create_file);
-EXPORT_SYMBOL(subsys_remove_file);
-- 
cgit v1.2.3


From f0fe253c4719faf76d40f581cdc0e8aef77273bb Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Sat, 22 Apr 2006 09:36:07 -0500
Subject: [IA64-SGI] - Fix discover of nearest cpu node to IO node

Fix a bug that causes discovery of the nearest node/cpu to
a TIO (IO node) to fail.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/sn2/sn_hwperf.c | 4 ++--
 include/asm-ia64/sn/sn2/sn_hwperf.h | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index d917afa30b2..7ec65bc0ccf 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -284,6 +284,8 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb
 	/* find nearest node with cpus and nearest memory */
 	for (router=NULL, j=0; j < op->ports; j++) {
 		dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id);
+		if (dest && SN_HWPERF_IS_ROUTER(dest))
+			router = dest;
 		if (!dest || SN_HWPERF_FOREIGN(dest) ||
 		    !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) {
 			continue;
@@ -299,8 +301,6 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb
 				*near_mem_node = c;
 			found_mem++;
 		}
-		if (SN_HWPERF_IS_ROUTER(dest))
-			router = dest;
 	}
 
 	if (router && (!found_cpu || !found_mem)) {
diff --git a/include/asm-ia64/sn/sn2/sn_hwperf.h b/include/asm-ia64/sn/sn2/sn_hwperf.h
index 291ef3d69da..e61ebac38cd 100644
--- a/include/asm-ia64/sn/sn2/sn_hwperf.h
+++ b/include/asm-ia64/sn/sn2/sn_hwperf.h
@@ -45,8 +45,12 @@ struct sn_hwperf_object_info {
 #define SN_HWPERF_IS_NODE(x)		((x) && strstr((x)->name, "SHub"))
 #define SN_HWPERF_IS_NODE_SHUB2(x)	((x) && strstr((x)->name, "SHub 2."))
 #define SN_HWPERF_IS_IONODE(x)		((x) && strstr((x)->name, "TIO"))
-#define SN_HWPERF_IS_ROUTER(x)		((x) && strstr((x)->name, "Router"))
 #define SN_HWPERF_IS_NL3ROUTER(x)	((x) && strstr((x)->name, "NL3Router"))
+#define SN_HWPERF_IS_NL4ROUTER(x)	((x) && strstr((x)->name, "NL4Router"))
+#define SN_HWPERF_IS_OLDROUTER(x)	((x) && strstr((x)->name, "Router"))
+#define SN_HWPERF_IS_ROUTER(x)		(SN_HWPERF_IS_NL3ROUTER(x) || 		\
+					 	SN_HWPERF_IS_NL4ROUTER(x) || 	\
+					 	SN_HWPERF_IS_OLDROUTER(x))
 #define SN_HWPERF_FOREIGN(x)		((x) && !(x)->sn_hwp_this_part && !(x)->sn_hwp_is_shared)
 #define SN_HWPERF_SAME_OBJTYPE(x,y)	((SN_HWPERF_IS_NODE(x) && SN_HWPERF_IS_NODE(y)) ||\
 					(SN_HWPERF_IS_IONODE(x) && SN_HWPERF_IS_IONODE(y)) ||\
-- 
cgit v1.2.3


From 1df57c0c21c92a6d4fcfe5304c84151ed9beb7a2 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Tue, 25 Apr 2006 10:47:48 -0500
Subject: [IA64] enable dumps to capture second page of kernel stack

In SLES10 (2.6.16) crash dumping (in my experience, LKCD) is unable to
capture the second page of the 2-page task/stack allocation.
This is particularly troublesome for dump analysis, as the stack traceback
cannot be done.
  (A similar convention is probably needed throughout the kernel to make
   kernel multi-page allocations detectable for dumping)

Multi-page kernel allocations are represented by the single page structure
associated with the first page of the allocation.  The page structures
associated with the other pages are unintialized.

If the dumper is selecting only kernel pages it has no way to identify
any but the first page of the allocation.

The fix is to make the task/stack allocation a compound page.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/thread_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 56394a2c705..e5392c4d30c 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -67,7 +67,7 @@ struct thread_info {
 #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
 
 #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-#define alloc_task_struct()	((task_t *)__get_free_pages(GFP_KERNEL, KERNEL_STACK_SIZE_ORDER))
+#define alloc_task_struct()	((task_t *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
 #define free_task_struct(tsk)	free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
 
 #endif /* !__ASSEMBLY */
-- 
cgit v1.2.3


From 13e87ec68641fd54f3fa04eef3419d034ed2115a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 27 Apr 2006 18:39:18 -0700
Subject: [PATCH] request_irq(): remove warnings from irq probing

- Add new SA_PROBEIRQ which suppresses the new sharing-mismatch warning.
  Some drivers like to use request_irq() to find an unused interrupt slot.

- Use it in i82365.c

- Kill unused SA_PROBE.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/pcmcia/i82365.c     | 7 ++++---
 include/asm-xtensa/signal.h | 2 +-
 include/linux/signal.h      | 4 +++-
 kernel/irq/manage.c         | 6 ++++--
 4 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index bd0308e8981..a2f05f48515 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -509,7 +509,8 @@ static irqreturn_t i365_count_irq(int irq, void *dev, struct pt_regs *regs)
 static u_int __init test_irq(u_short sock, int irq)
 {
     debug(2, "  testing ISA irq %d\n", irq);
-    if (request_irq(irq, i365_count_irq, 0, "scan", i365_count_irq) != 0)
+    if (request_irq(irq, i365_count_irq, SA_PROBEIRQ, "scan",
+			i365_count_irq) != 0)
 	return 1;
     irq_hits = 0; irq_sock = sock;
     msleep(10);
@@ -561,7 +562,7 @@ static u_int __init isa_scan(u_short sock, u_int mask0)
     } else {
 	/* Fallback: just find interrupts that aren't in use */
 	for (i = 0; i < 16; i++)
-	    if ((mask0 & (1 << i)) && (_check_irq(i, 0) == 0))
+	    if ((mask0 & (1 << i)) && (_check_irq(i, SA_PROBEIRQ) == 0))
 		mask1 |= (1 << i);
 	printk("default");
 	/* If scan failed, default to polled status */
@@ -725,7 +726,7 @@ static void __init add_pcic(int ns, int type)
 	u_int cs_mask = mask & ((cs_irq) ? (1<<cs_irq) : ~(1<<12));
 	for (cs_irq = 15; cs_irq > 0; cs_irq--)
 	    if ((cs_mask & (1 << cs_irq)) &&
-		(_check_irq(cs_irq, 0) == 0))
+		(_check_irq(cs_irq, SA_PROBEIRQ) == 0))
 		break;
 	if (cs_irq) {
 	    grab_irq = 1;
diff --git a/include/asm-xtensa/signal.h b/include/asm-xtensa/signal.h
index 5d6fc9cdf58..a99c9aec64e 100644
--- a/include/asm-xtensa/signal.h
+++ b/include/asm-xtensa/signal.h
@@ -118,9 +118,9 @@ typedef struct {
  * SA_INTERRUPT is also used by the irq handling routines.
  * SA_SHIRQ is for shared interrupt support on PCI and EISA.
  */
-#define SA_PROBE		SA_ONESHOT
 #define SA_SAMPLE_RANDOM	SA_RESTART
 #define SA_SHIRQ		0x04000000
+#define SA_PROBEIRQ		0x08000000
 #endif
 
 #define SIG_BLOCK          0	/* for blocking signals */
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 162a8fd10b2..70739f51a09 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -14,10 +14,12 @@
  *
  * SA_INTERRUPT is also used by the irq handling routines.
  * SA_SHIRQ is for shared interrupt support on PCI and EISA.
+ * SA_PROBEIRQ is set by callers when they expect sharing mismatches to occur
  */
-#define SA_PROBE		SA_ONESHOT
 #define SA_SAMPLE_RANDOM	SA_RESTART
 #define SA_SHIRQ		0x04000000
+#define SA_PROBEIRQ		0x08000000
+
 /*
  * As above, these correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ac766ad573e..1279e349953 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -246,8 +246,10 @@ int setup_irq(unsigned int irq, struct irqaction * new)
 
 mismatch:
 	spin_unlock_irqrestore(&desc->lock, flags);
-	printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
-	dump_stack();
+	if (!(new->flags & SA_PROBEIRQ)) {
+		printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
+		dump_stack();
+	}
 	return -EBUSY;
 }
 
-- 
cgit v1.2.3


From 3363fbdd6fb4992ffe6c17c0dd7388ffa22d99e6 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 27 Apr 2006 18:40:12 -0700
Subject: [PATCH] s390: futex atomic operations

Add support for atomic futex operations.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-s390/futex.h | 123 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 119 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h
index 6a332a9f099..40c25e166a9 100644
--- a/include/asm-s390/futex.h
+++ b/include/asm-s390/futex.h
@@ -1,6 +1,121 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
+#ifndef _ASM_S390_FUTEX_H
+#define _ASM_S390_FUTEX_H
 
-#include <asm-generic/futex.h>
+#ifdef __KERNEL__
 
-#endif
+#include <linux/futex.h>
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+#ifndef __s390x__
+#define __futex_atomic_fixup \
+		     ".section __ex_table,\"a\"\n"			\
+		     "   .align 4\n"					\
+		     "   .long  0b,2b,1b,2b\n"				\
+		     ".previous"
+#else /* __s390x__ */
+#define __futex_atomic_fixup \
+		     ".section __ex_table,\"a\"\n"			\
+		     "   .align 8\n"					\
+		     "   .quad  0b,2b,1b,2b\n"				\
+		     ".previous"
+#endif /* __s390x__ */
+
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)	\
+	asm volatile("   l   %1,0(%6)\n"				\
+		     "0: " insn						\
+		     "   cs  %1,%2,0(%6)\n"				\
+		     "1: jl  0b\n"					\
+		     "   lhi %0,0\n"					\
+		     "2:\n"						\
+		     __futex_atomic_fixup				\
+		     : "=d" (ret), "=&d" (oldval), "=&d" (newval),	\
+		       "=m" (*uaddr)					\
+		     : "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
+		       "m" (*uaddr) : "cc" );
+
+static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, newval, ret;
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	inc_preempt_count();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("lr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("lr %2,%1\nar %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("lr %2,%1\nor %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	dec_preempt_count();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+	asm volatile("   cs   %1,%4,0(%5)\n"
+		     "0: lr   %0,%1\n"
+		     "1:\n"
+#ifndef __s390x__
+		     ".section __ex_table,\"a\"\n"
+		     "   .align 4\n"
+		     "   .long  0b,1b\n"
+		     ".previous"
+#else /* __s390x__ */
+		     ".section __ex_table,\"a\"\n"
+		     "   .align 8\n"
+		     "   .quad  0b,1b\n"
+		     ".previous"
+#endif /* __s390x__ */
+		     : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+		     : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+		     : "cc", "memory" );
+	return oldval;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_S390_FUTEX_H */
-- 
cgit v1.2.3


From 58268b97f679108d32a882a7fc029585da801975 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <cborntra@de.ibm.com>
Date: Thu, 27 Apr 2006 18:40:24 -0700
Subject: [PATCH] s390: add read_mostly optimization

Add a read_mostly section and define __read_mostly to prevent cache line
pollution due to writes for mostly read variables.  In addition fix the
incorrect alignment of the cache_line_aligned data section.  s390 has a
cacheline size of 256 bytes.

Signed-off-by: Christian Borntraeger <cborntra@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/kernel/vmlinux.lds.S | 4 +++-
 include/asm-s390/cache.h       | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 9289face302..9f34bb54c05 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -58,9 +58,11 @@ SECTIONS
   . = ALIGN(4096);
   .data.page_aligned : { *(.data.idt) }
 
-  . = ALIGN(32);
+  . = ALIGN(256);
   .data.cacheline_aligned : { *(.data.cacheline_aligned) }
 
+  . = ALIGN(256);
+  .data.read_mostly : { *(.data.read_mostly) }
   _edata = .;			/* End of data section */
 
   . = ALIGN(8192);		/* init_task */
diff --git a/include/asm-s390/cache.h b/include/asm-s390/cache.h
index e20cdd9074d..cdf431b061b 100644
--- a/include/asm-s390/cache.h
+++ b/include/asm-s390/cache.h
@@ -16,4 +16,6 @@
 
 #define ARCH_KMALLOC_MINALIGN	8
 
+#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+
 #endif
-- 
cgit v1.2.3