diff options
-rw-r--r-- | arch/i386/kernel/head.S | 38 | ||||
-rw-r--r-- | arch/i386/kernel/paravirt.c | 1 | ||||
-rw-r--r-- | arch/i386/kernel/vmlinux.lds.S | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 1 | ||||
-rw-r--r-- | drivers/hwmon/ams/ams-input.c | 2 | ||||
-rw-r--r-- | drivers/hwmon/applesmc.c | 2 | ||||
-rw-r--r-- | drivers/hwmon/hdaps.c | 2 | ||||
-rw-r--r-- | drivers/i2c/busses/i2c-at91.c | 7 | ||||
-rw-r--r-- | drivers/i2c/busses/i2c-pxa.c | 2 | ||||
-rw-r--r-- | drivers/md/raid1.c | 33 | ||||
-rw-r--r-- | fs/afs/afs_fs.h | 2 | ||||
-rw-r--r-- | fs/afs/fsclient.c | 217 | ||||
-rw-r--r-- | fs/afs/rxrpc.c | 2 | ||||
-rw-r--r-- | fs/afs/write.c | 18 | ||||
-rw-r--r-- | fs/ocfs2/cluster/masklog.c | 5 | ||||
-rw-r--r-- | include/asm-i386/paravirt.h | 5 | ||||
-rw-r--r-- | include/linux/kernel.h | 10 | ||||
-rw-r--r-- | include/linux/mm_types.h | 7 | ||||
-rw-r--r-- | init/Kconfig | 1 | ||||
-rw-r--r-- | kernel/timer.c | 10 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 159 | ||||
-rw-r--r-- | net/rxrpc/ar-peer.c | 4 |
23 files changed, 396 insertions, 140 deletions
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 9b10af65faa..f74dfc419b5 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -71,12 +71,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_ .section .text.head,"ax",@progbits ENTRY(startup_32) -#ifdef CONFIG_PARAVIRT - movl %cs, %eax - testl $0x3, %eax - jnz startup_paravirt -#endif - /* * Set segments to known values. */ @@ -501,38 +495,6 @@ ignore_int: iret .section .text -#ifdef CONFIG_PARAVIRT -startup_paravirt: - cld - movl $(init_thread_union+THREAD_SIZE),%esp - - /* We take pains to preserve all the regs. */ - pushl %edx - pushl %ecx - pushl %eax - - pushl $__start_paravirtprobe -1: - movl 0(%esp), %eax - cmpl $__stop_paravirtprobe, %eax - je unhandled_paravirt - pushl (%eax) - movl 8(%esp), %eax - call *(%esp) - popl %eax - - movl 4(%esp), %eax - movl 8(%esp), %ecx - movl 12(%esp), %edx - - addl $4, (%esp) - jmp 1b - -unhandled_paravirt: - /* Nothing wanted us: we're screwed. */ - ud2 -#endif - /* * Real beginning of normal "text" segment */ diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 5c10f376bce..faab09abca5 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -19,7 +19,6 @@ #include <linux/module.h> #include <linux/efi.h> #include <linux/bcd.h> -#include <linux/start_kernel.h> #include <linux/highmem.h> #include <asm/bug.h> diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 23e8614edee..80bec664023 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -78,12 +78,6 @@ SECTIONS CONSTRUCTORS } :data - .paravirtprobe : AT(ADDR(.paravirtprobe) - LOAD_OFFSET) { - __start_paravirtprobe = .; - *(.paravirtprobe) - __stop_paravirtprobe = .; - } - . = ALIGN(4096); .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { __nosave_begin = .; diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 8c2ac41187c..d28f01379b9 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -778,6 +778,7 @@ asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) return; if (notify_die(DIE_NMI_POST, "nmi_post", regs, reason, 2, 0) == NOTIFY_STOP) + return; if (!do_nmi_callback(regs,cpu)) unknown_nmi_error(reason, regs); diff --git a/drivers/hwmon/ams/ams-input.c b/drivers/hwmon/ams/ams-input.c index 18210164e30..ca7095d96ad 100644 --- a/drivers/hwmon/ams/ams-input.c +++ b/drivers/hwmon/ams/ams-input.c @@ -87,7 +87,7 @@ static void ams_input_enable(void) ams_info.idev->id.vendor = 0; ams_info.idev->open = ams_input_open; ams_info.idev->close = ams_input_close; - ams_info.idev->cdev.dev = &ams_info.of_dev->dev; + ams_info.idev->dev.parent = &ams_info.of_dev->dev; input_set_abs_params(ams_info.idev, ABS_X, -50, 50, 3, 0); input_set_abs_params(ams_info.idev, ABS_Y, -50, 50, 3, 0); diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index b51c104a28a..0c160675b3a 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -1100,7 +1100,7 @@ static int applesmc_create_accelerometer(void) /* initialize the input class */ applesmc_idev->name = "applesmc"; applesmc_idev->id.bustype = BUS_HOST; - applesmc_idev->cdev.dev = &pdev->dev; + applesmc_idev->dev.parent = &pdev->dev; applesmc_idev->evbit[0] = BIT(EV_ABS); applesmc_idev->open = applesmc_idev_open; applesmc_idev->close = applesmc_idev_close; diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c index f82fa2d23f9..e0cf5e6fe5b 100644 --- a/drivers/hwmon/hdaps.c +++ b/drivers/hwmon/hdaps.c @@ -574,7 +574,7 @@ static int __init hdaps_init(void) /* initialize the input class */ hdaps_idev->name = "hdaps"; - hdaps_idev->cdev.dev = &pdev->dev; + hdaps_idev->dev.parent = &pdev->dev; hdaps_idev->evbit[0] = BIT(EV_ABS); input_set_abs_params(hdaps_idev, ABS_X, -256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT); diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index f35156c5892..9c8b6d5eaec 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/version.h> #include <linux/kernel.h> +#include <linux/err.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/delay.h> @@ -226,13 +227,14 @@ static int __devinit at91_i2c_probe(struct platform_device *pdev) adapter->algo = &at91_algorithm; adapter->class = I2C_CLASS_HWMON; adapter->dev.parent = &pdev->dev; + /* adapter->id == 0 ... only one TWI controller for now */ platform_set_drvdata(pdev, adapter); clk_enable(twi_clk); /* enable peripheral clock */ at91_twi_hwinit(); /* initialize TWI controller */ - rc = i2c_add_adapter(adapter); + rc = i2c_add_numbered_adapter(adapter); if (rc) { dev_err(&pdev->dev, "Adapter %s registration failed\n", adapter->name); @@ -295,6 +297,9 @@ static int at91_i2c_resume(struct platform_device *pdev) #define at91_i2c_resume NULL #endif +/* work with "modprobe at91_i2c" from hotplugging or coldplugging */ +MODULE_ALIAS("at91_i2c"); + static struct platform_driver at91_i2c_driver = { .probe = at91_i2c_probe, .remove = __devexit_p(at91_i2c_remove), diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 873544ab598..8a0a99b9364 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -548,7 +548,7 @@ static inline void i2c_pxa_stop_message(struct pxa_i2c *i2c) */ icr = readl(_ICR(i2c)); icr &= ~(ICR_STOP | ICR_ACKNAK); - writel(icr, _IRC(i2c)); + writel(icr, _ICR(i2c)); } /* diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 97ee870b265..3a95cc5e029 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -271,21 +271,25 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int */ update_head_pos(mirror, r1_bio); - if (uptodate || (conf->raid_disks - conf->mddev->degraded) <= 1) { - /* - * Set R1BIO_Uptodate in our master bio, so that - * we will return a good error code for to the higher - * levels even if IO on some other mirrored buffer fails. - * - * The 'master' represents the composite IO operation to - * user-side. So if something waits for IO, then it will - * wait for the 'master' bio. + if (uptodate) + set_bit(R1BIO_Uptodate, &r1_bio->state); + else { + /* If all other devices have failed, we want to return + * the error upwards rather than fail the last device. + * Here we redefine "uptodate" to mean "Don't want to retry" */ - if (uptodate) - set_bit(R1BIO_Uptodate, &r1_bio->state); + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); + if (r1_bio->mddev->degraded == conf->raid_disks || + (r1_bio->mddev->degraded == conf->raid_disks-1 && + !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))) + uptodate = 1; + spin_unlock_irqrestore(&conf->device_lock, flags); + } + if (uptodate) raid_end_bio_io(r1_bio); - } else { + else { /* * oops, read error: */ @@ -992,13 +996,14 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; + set_bit(Faulty, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. */ set_bit(MD_RECOVERY_ERR, &mddev->recovery); - } - set_bit(Faulty, &rdev->flags); + } else + set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n" " Operation continuing on %d devices\n", diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index 2198006d2d0..d963ef4daee 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -31,6 +31,8 @@ enum AFS_FS_Operations { FSGETVOLUMEINFO = 148, /* AFS Get root volume information */ FSGETROOTVOLUME = 151, /* AFS Get root volume name */ FSLOOKUP = 161, /* AFS lookup file in directory */ + FSFETCHDATA64 = 65537, /* AFS Fetch file data */ + FSSTOREDATA64 = 65538, /* AFS Store file data */ }; enum AFS_FS_Errors { diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 025b1903d9e..56cc0efa2a0 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -293,9 +293,33 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, case 0: call->offset = 0; call->unmarshall++; + if (call->operation_ID != FSFETCHDATA64) { + call->unmarshall++; + goto no_msw; + } - /* extract the returned data length */ + /* extract the upper part of the returned data length of an + * FSFETCHDATA64 op (which should always be 0 using this + * client) */ case 1: + _debug("extract data length (MSW)"); + ret = afs_extract_data(call, skb, last, &call->tmp, 4); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + call->count = ntohl(call->tmp); + _debug("DATA length MSW: %u", call->count); + if (call->count > 0) + return -EBADMSG; + call->offset = 0; + call->unmarshall++; + + no_msw: + /* extract the returned data length */ + case 2: _debug("extract data length"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); switch (ret) { @@ -312,7 +336,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->unmarshall++; /* extract the returned data */ - case 2: + case 3: _debug("extract data"); if (call->count > 0) { page = call->reply3; @@ -331,7 +355,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->unmarshall++; /* extract the metadata */ - case 3: + case 4: ret = afs_extract_data(call, skb, last, call->buffer, (21 + 3 + 6) * 4); switch (ret) { @@ -349,7 +373,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->offset = 0; call->unmarshall++; - case 4: + case 5: _debug("trailer"); if (skb->len != 0) return -EBADMSG; @@ -381,6 +405,56 @@ static const struct afs_call_type afs_RXFSFetchData = { .destructor = afs_flat_call_destructor, }; +static const struct afs_call_type afs_RXFSFetchData64 = { + .name = "FS.FetchData64", + .deliver = afs_deliver_fs_fetch_data, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +/* + * fetch data from a very large file + */ +static int afs_fs_fetch_data64(struct afs_server *server, + struct key *key, + struct afs_vnode *vnode, + off_t offset, size_t length, + struct page *buffer, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + ASSERTCMP(length, <, ULONG_MAX); + + call = afs_alloc_flat_call(&afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->reply = vnode; + call->reply2 = NULL; /* volsync */ + call->reply3 = buffer; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->operation_ID = FSFETCHDATA64; + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSFETCHDATA64); + bp[1] = htonl(vnode->fid.vid); + bp[2] = htonl(vnode->fid.vnode); + bp[3] = htonl(vnode->fid.unique); + bp[4] = htonl(upper_32_bits(offset)); + bp[5] = htonl((u32) offset); + bp[6] = 0; + bp[7] = htonl((u32) length); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + /* * fetch data from a file */ @@ -394,6 +468,10 @@ int afs_fs_fetch_data(struct afs_server *server, struct afs_call *call; __be32 *bp; + if (upper_32_bits(offset) || upper_32_bits(offset + length)) + return afs_fs_fetch_data64(server, key, vnode, offset, length, + buffer, wait_mode); + _enter(""); call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); @@ -406,6 +484,7 @@ int afs_fs_fetch_data(struct afs_server *server, call->reply3 = buffer; call->service_id = FS_SERVICE; call->port = htons(AFS_FS_PORT); + call->operation_ID = FSFETCHDATA; /* marshall the parameters */ bp = call->request; @@ -1032,6 +1111,73 @@ static const struct afs_call_type afs_RXFSStoreData = { .destructor = afs_flat_call_destructor, }; +static const struct afs_call_type afs_RXFSStoreData64 = { + .name = "FS.StoreData64", + .deliver = afs_deliver_fs_store_data, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +/* + * store a set of pages to a very large file + */ +static int afs_fs_store_data64(struct afs_server *server, + struct afs_writeback *wb, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to, + loff_t size, loff_t pos, loff_t i_size, + const struct afs_wait_mode *wait_mode) +{ + struct afs_vnode *vnode = wb->vnode; + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(&afs_RXFSStoreData64, + (4 + 6 + 3 * 2) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->wb = wb; + call->key = wb->key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->mapping = vnode->vfs_inode.i_mapping; + call->first = first; + call->last = last; + call->first_offset = offset; + call->last_to = to; + call->send_pages = true; + call->store_version = vnode->status.data_version + 1; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA64); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + *bp++ = 0; /* mask */ + *bp++ = 0; /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = 0; /* unix mode */ + *bp++ = 0; /* segment size */ + + *bp++ = htonl(pos >> 32); + *bp++ = htonl((u32) pos); + *bp++ = htonl(size >> 32); + *bp++ = htonl((u32) size); + *bp++ = htonl(i_size >> 32); + *bp++ = htonl((u32) i_size); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + /* * store a set of pages */ @@ -1062,7 +1208,9 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, (unsigned long long) size, (unsigned long long) pos, (unsigned long long) i_size); - BUG_ON(i_size > 0xffffffff); // TODO: use 64-bit store + if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) + return afs_fs_store_data64(server, wb, first, last, offset, to, + size, pos, i_size, wait_mode); call = afs_alloc_flat_call(&afs_RXFSStoreData, (4 + 6 + 3) * 4, @@ -1158,6 +1306,61 @@ static const struct afs_call_type afs_RXFSStoreData_as_Status = { .destructor = afs_flat_call_destructor, }; +static const struct afs_call_type afs_RXFSStoreData64_as_Status = { + .name = "FS.StoreData64", + .deliver = afs_deliver_fs_store_status, + .abort_to_error = afs_abort_to_error, + .destructor = afs_flat_call_destructor, +}; + +/* + * set the attributes on a very large file, using FS.StoreData rather than + * FS.StoreStatus so as to alter the file size also + */ +static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, + struct afs_vnode *vnode, struct iattr *attr, + const struct afs_wait_mode *wait_mode) +{ + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(key), vnode->fid.vid, vnode->fid.vnode); + + ASSERT(attr->ia_valid & ATTR_SIZE); + + call = afs_alloc_flat_call(&afs_RXFSStoreData64_as_Status, + (4 + 6 + 3 * 2) * 4, + (21 + 6) * 4); + if (!call) + return -ENOMEM; + + call->key = key; + call->reply = vnode; + call->service_id = FS_SERVICE; + call->port = htons(AFS_FS_PORT); + call->store_version = vnode->status.data_version + 1; + call->operation_ID = FSSTOREDATA; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSSTOREDATA64); + *bp++ = htonl(vnode->fid.vid); + *bp++ = htonl(vnode->fid.vnode); + *bp++ = htonl(vnode->fid.unique); + + xdr_encode_AFS_StoreStatus(&bp, attr); + + *bp++ = 0; /* position of start of write */ + *bp++ = 0; + *bp++ = 0; /* size of write */ + *bp++ = 0; + *bp++ = htonl(attr->ia_size >> 32); /* new file length */ + *bp++ = htonl((u32) attr->ia_size); + + return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); +} + /* * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus * so as to alter the file size also @@ -1173,7 +1376,9 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, key_serial(key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); - ASSERTCMP(attr->ia_size, <=, 0xffffffff); // TODO: use 64-bit store + if (attr->ia_size >> 32) + return afs_fs_setattr_size64(server, key, vnode, attr, + wait_mode); call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status, (4 + 6 + 3) * 4, diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 04189c47d6a..1b36f45076a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -294,7 +294,7 @@ int afs_send_pages(struct afs_call *call, struct msghdr *msg, struct kvec *iov) put_page(pages[loop]); if (ret < 0) break; - } while (first < last); + } while (first <= last); _leave(" = %d", ret); return ret; diff --git a/fs/afs/write.c b/fs/afs/write.c index 83ff2926281..67ae4dbf66b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -122,7 +122,7 @@ static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, if (offset == 0 && to == PAGE_SIZE) return 0; - p = kmap(page); + p = kmap_atomic(page, KM_USER0); i_size = i_size_read(&vnode->vfs_inode); pos = (loff_t) page->index << PAGE_SHIFT; @@ -133,7 +133,7 @@ static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, memset(p, 0, offset); if (to < PAGE_SIZE) memset(p + to, 0, PAGE_SIZE - to); - kunmap(page); + kunmap_atomic(p, KM_USER0); return 0; } @@ -152,7 +152,7 @@ static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, memset(p + eof, 0, PAGE_SIZE - eof); } - kunmap(p); + kunmap_atomic(p, KM_USER0); ret = 0; if (offset > 0 || eof > to) { @@ -489,14 +489,6 @@ int afs_writepage(struct page *page, struct writeback_control *wbc) _enter("{%lx},", page->index); - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || !PageDirty(page)) { - unlock_page(page); - return 0; - } - wb = (struct afs_writeback *) page_private(page); ASSERT(wb != NULL); @@ -677,7 +669,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) pagevec_init(&pv, 0); do { - _debug("attach %lx-%lx", first, last); + _debug("done %lx-%lx", first, last); count = last - first + 1; if (count > PAGEVEC_SIZE) @@ -709,7 +701,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) } __pagevec_release(&pv); - } while (first < last); + } while (first <= last); _leave(""); } diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 2e975c0a35e..a93620ce4ac 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c @@ -144,7 +144,8 @@ static struct kobj_type mlog_ktype = { }; static struct kset mlog_kset = { - .kobj = {.name = "logmask", .ktype = &mlog_ktype}, + .kobj = {.name = "logmask"}, + .ktype = &mlog_ktype }; int mlog_sys_init(struct kset *o2cb_subsys) @@ -157,7 +158,7 @@ int mlog_sys_init(struct kset *o2cb_subsys) } mlog_attr_ptrs[i] = NULL; - kobj_set_kset_s(&mlog_kset, o2cb_subsys); + kobj_set_kset_s(&mlog_kset, *o2cb_subsys); return kset_register(&mlog_kset); } diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index bc5c12c1358..d7a0512f88e 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -222,11 +222,6 @@ struct paravirt_ops void (*iret)(void); }; -/* Mark a paravirt probe function. */ -#define paravirt_probe(fn) \ - static asmlinkage void (*__paravirtprobe_##fn)(void) __attribute_used__ \ - __attribute__((__section__(".paravirtprobe"))) = fn - extern struct paravirt_ops paravirt_ops; #define PARAVIRT_PATCH(x) \ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 144b615f3a8..8645181fca0 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -41,6 +41,16 @@ extern const char linux_proc_banner[]; #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) + #define KERN_EMERG "<0>" /* system is unusable */ #define KERN_ALERT "<1>" /* action must be taken immediately */ #define KERN_CRIT "<2>" /* critical conditions */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e30687bad07..d5bb1796e12 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -50,13 +50,16 @@ struct page { spinlock_t ptl; #endif struct { /* SLUB uses */ - struct page *first_page; /* Compound pages */ + void **lockless_freelist; struct kmem_cache *slab; /* Pointer to slab */ }; + struct { + struct page *first_page; /* Compound pages */ + }; }; union { pgoff_t index; /* Our offset within mapping. */ - void *freelist; /* SLUB: pointer to free object */ + void *freelist; /* SLUB: freelist req. slab lock */ }; struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! diff --git a/init/Kconfig b/init/Kconfig index e63a017c391..322b1f8c21b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -505,6 +505,7 @@ config VM_EVENT_COUNTERS config SLUB_DEBUG default y bool "Enable SLUB debugging support" if EMBEDDED + depends on SLUB help SLUB has extensive debug support features. Disabling these can result in significant savings in code size. This also disables diff --git a/kernel/timer.c b/kernel/timer.c index 59a28b1752f..a6c580ac084 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -92,24 +92,24 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; /* Functions below help us manage 'deferrable' flag */ static inline unsigned int tbase_get_deferrable(tvec_base_t *base) { - return (unsigned int)((unsigned long)base & TBASE_DEFERRABLE_FLAG); + return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); } static inline tvec_base_t *tbase_get_base(tvec_base_t *base) { - return (tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG); + return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); } static inline void timer_set_deferrable(struct timer_list *timer) { - timer->base = (tvec_base_t *)((unsigned long)timer->base | - TBASE_DEFERRABLE_FLAG); + timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | + TBASE_DEFERRABLE_FLAG)); } static inline void timer_set_base(struct timer_list *timer, tvec_base_t *new_base) { - timer->base = (tvec_base_t *)((unsigned long)new_base | + timer->base = (tvec_base_t *)((unsigned long)(new_base) | tbase_get_deferrable(timer->base)); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f9b5d6d5f4d..ae96dd84443 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2284,7 +2284,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) * was used and there are no special requirements, this is a convenient * alternative */ -int __init early_pfn_to_nid(unsigned long pfn) +int __meminit early_pfn_to_nid(unsigned long pfn) { int i; diff --git a/mm/slub.c b/mm/slub.c index bd2efae02bc..b39c8a69a4f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -81,10 +81,14 @@ * PageActive The slab is used as a cpu cache. Allocations * may be performed from the slab. The slab is not * on any slab list and cannot be moved onto one. + * The cpu slab may be equipped with an additioanl + * lockless_freelist that allows lockless access to + * free objects in addition to the regular freelist + * that requires the slab lock. * * PageError Slab requires special handling due to debug * options set. This moves slab handling out of - * the fast path. + * the fast path and disables lockless freelists. */ static inline int SlabDebug(struct page *page) @@ -1014,6 +1018,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, last, NULL); page->freelist = start; + page->lockless_freelist = NULL; page->inuse = 0; out: if (flags & __GFP_WAIT) @@ -1276,6 +1281,23 @@ static void putback_slab(struct kmem_cache *s, struct page *page) */ static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) { + /* + * Merge cpu freelist into freelist. Typically we get here + * because both freelists are empty. So this is unlikely + * to occur. + */ + while (unlikely(page->lockless_freelist)) { + void **object; + + /* Retrieve object from cpu_freelist */ + object = page->lockless_freelist; + page->lockless_freelist = page->lockless_freelist[page->offset]; + + /* And put onto the regular freelist */ + object[page->offset] = page->freelist; + page->freelist = object; + page->inuse--; + } s->cpu_slab[cpu] = NULL; ClearPageActive(page); @@ -1322,47 +1344,46 @@ static void flush_all(struct kmem_cache *s) } /* - * slab_alloc is optimized to only modify two cachelines on the fast path - * (aside from the stack): + * Slow path. The lockless freelist is empty or we need to perform + * debugging duties. + * + * Interrupts are disabled. * - * 1. The page struct - * 2. The first cacheline of the object to be allocated. + * Processing is still very fast if new objects have been freed to the + * regular freelist. In that case we simply take over the regular freelist + * as the lockless freelist and zap the regular freelist. * - * The only other cache lines that are read (apart from code) is the - * per cpu array in the kmem_cache struct. + * If that is not working then we fall back to the partial lists. We take the + * first element of the freelist as the object to allocate now and move the + * rest of the freelist to the lockless freelist. * - * Fastpath is not possible if we need to get a new slab or have - * debugging enabled (which means all slabs are marked with SlabDebug) + * And if we were unable to get a new slab from the partial slab lists then + * we need to allocate a new slab. This is slowest path since we may sleep. */ -static void *slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node, void *addr) +static void *__slab_alloc(struct kmem_cache *s, + gfp_t gfpflags, int node, void *addr, struct page *page) { - struct page *page; void **object; - unsigned long flags; - int cpu; + int cpu = smp_processor_id(); - local_irq_save(flags); - cpu = smp_processor_id(); - page = s->cpu_slab[cpu]; if (!page) goto new_slab; slab_lock(page); if (unlikely(node != -1 && page_to_nid(page) != node)) goto another_slab; -redo: +load_freelist: object = page->freelist; if (unlikely(!object)) goto another_slab; if (unlikely(SlabDebug(page))) goto debug; -have_object: - page->inuse++; - page->freelist = object[page->offset]; + object = page->freelist; + page->lockless_freelist = object[page->offset]; + page->inuse = s->objects; + page->freelist = NULL; slab_unlock(page); - local_irq_restore(flags); return object; another_slab: @@ -1370,11 +1391,11 @@ another_slab: new_slab: page = get_partial(s, gfpflags, node); - if (likely(page)) { + if (page) { have_slab: s->cpu_slab[cpu] = page; SetPageActive(page); - goto redo; + goto load_freelist; } page = new_slab(s, gfpflags, node); @@ -1397,7 +1418,7 @@ have_slab: discard_slab(s, page); page = s->cpu_slab[cpu]; slab_lock(page); - goto redo; + goto load_freelist; } /* New slab does not fit our expectations */ flush_slab(s, s->cpu_slab[cpu], cpu); @@ -1405,16 +1426,52 @@ have_slab: slab_lock(page); goto have_slab; } - local_irq_restore(flags); return NULL; debug: + object = page->freelist; if (!alloc_object_checks(s, page, object)) goto another_slab; if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_ALLOC, addr); trace(s, page, object, 1); init_object(s, object, 1); - goto have_object; + + page->inuse++; + page->freelist = object[page->offset]; + slab_unlock(page); + return object; +} + +/* + * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) + * have the fastpath folded into their functions. So no function call + * overhead for requests that can be satisfied on the fastpath. + * + * The fastpath works by first checking if the lockless freelist can be used. + * If not then __slab_alloc is called for slow processing. + * + * Otherwise we can simply pick the next object from the lockless free list. + */ +static void __always_inline *slab_alloc(struct kmem_cache *s, + gfp_t gfpflags, int node, void *addr) +{ + struct page *page; + void **object; + unsigned long flags; + + local_irq_save(flags); + page = s->cpu_slab[smp_processor_id()]; + if (unlikely(!page || !page->lockless_freelist || + (node != -1 && page_to_nid(page) != node))) + + object = __slab_alloc(s, gfpflags, node, addr, page); + + else { + object = page->lockless_freelist; + page->lockless_freelist = object[page->offset]; + } + local_irq_restore(flags); + return object; } void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) @@ -1432,20 +1489,19 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); #endif /* - * The fastpath only writes the cacheline of the page struct and the first - * cacheline of the object. + * Slow patch handling. This may still be called frequently since objects + * have a longer lifetime than the cpu slabs in most processing loads. * - * We read the cpu_slab cacheline to check if the slab is the per cpu - * slab for this processor. + * So we still attempt to reduce cache line usage. Just take the slab + * lock and free the item. If there is no additional partial page + * handling required then we can return immediately. */ -static void slab_free(struct kmem_cache *s, struct page *page, +static void __slab_free(struct kmem_cache *s, struct page *page, void *x, void *addr) { void *prior; void **object = (void *)x; - unsigned long flags; - local_irq_save(flags); slab_lock(page); if (unlikely(SlabDebug(page))) @@ -1475,7 +1531,6 @@ checks_ok: out_unlock: slab_unlock(page); - local_irq_restore(flags); return; slab_empty: @@ -1487,7 +1542,6 @@ slab_empty: slab_unlock(page); discard_slab(s, page); - local_irq_restore(flags); return; debug: @@ -1502,6 +1556,34 @@ debug: goto checks_ok; } +/* + * Fastpath with forced inlining to produce a kfree and kmem_cache_free that + * can perform fastpath freeing without additional function calls. + * + * The fastpath is only possible if we are freeing to the current cpu slab + * of this processor. This typically the case if we have just allocated + * the item before. + * + * If fastpath is not possible then fall back to __slab_free where we deal + * with all sorts of special processing. + */ +static void __always_inline slab_free(struct kmem_cache *s, + struct page *page, void *x, void *addr) +{ + void **object = (void *)x; + unsigned long flags; + + local_irq_save(flags); + if (likely(page == s->cpu_slab[smp_processor_id()] && + !SlabDebug(page))) { + object[page->offset] = page->lockless_freelist; + page->lockless_freelist = object; + } else + __slab_free(s, page, x, addr); + + local_irq_restore(flags); +} + void kmem_cache_free(struct kmem_cache *s, void *x) { struct page *page; @@ -2363,9 +2445,8 @@ void __init kmem_cache_init(void) register_cpu_notifier(&slab_notifier); #endif - if (nr_cpu_ids) /* Remove when nr_cpu_ids is fixed upstream ! */ - kmem_size = offsetof(struct kmem_cache, cpu_slab) - + nr_cpu_ids * sizeof(struct page *); + kmem_size = offsetof(struct kmem_cache, cpu_slab) + + nr_cpu_ids * sizeof(struct page *); printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," " Processors=%d, Nodes=%d\n", diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index ce08b78647c..90fa107a8af 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -59,14 +59,14 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) ret = ip_route_output_key(&rt, &fl); if (ret < 0) { - kleave(" [route err %d]", ret); + _leave(" [route err %d]", ret); return; } peer->if_mtu = dst_mtu(&rt->u.dst); dst_release(&rt->u.dst); - kleave(" [if_mtu %u]", peer->if_mtu); + _leave(" [if_mtu %u]", peer->if_mtu); } /* |