From dc61b1d65e353d638b2445f71fb8e5b5630f2415 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Jun 2010 11:40:42 +0200 Subject: sched: Fix PROVE_RCU vs cpu_cgroup PROVE_RCU has a few issues with the cpu_cgroup because the scheduler typically holds rq->lock around the css rcu derefs but the generic cgroup code doesn't (and can't) know about that lock. Provide means to add extra checks to the css dereference and use that in the scheduler to annotate its users. The addition of rq->lock to these checks is correct because the cgroup_subsys::attach() method takes the rq->lock for each task it moves, therefore by holding that lock, we ensure the task is pinned to the current cgroup and the RCU derefence is valid. That leaves one genuine race in __sched_setscheduler() where we used task_group() without holding any of the required locks and thus raced with the cgroup code. Solve this by moving the check under the appropriate lock. Signed-off-by: Peter Zijlstra Cc: "Paul E. McKenney" LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/cgroup.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0c621604baa..e3d00fdb858 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -525,13 +525,21 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( return cgrp->subsys[subsys_id]; } -static inline struct cgroup_subsys_state *task_subsys_state( - struct task_struct *task, int subsys_id) +/* + * function to get the cgroup_subsys_state which allows for extra + * rcu_dereference_check() conditions, such as locks used during the + * cgroup_subsys::attach() methods. + */ +#define task_subsys_state_check(task, subsys_id, __c) \ + rcu_dereference_check(task->cgroups->subsys[subsys_id], \ + rcu_read_lock_held() || \ + lockdep_is_held(&task->alloc_lock) || \ + cgroup_lock_is_held() || (__c)) + +static inline struct cgroup_subsys_state * +task_subsys_state(struct task_struct *task, int subsys_id) { - return rcu_dereference_check(task->cgroups->subsys[subsys_id], - rcu_read_lock_held() || - lockdep_is_held(&task->alloc_lock) || - cgroup_lock_is_held()); + return task_subsys_state_check(task, subsys_id, false); } static inline struct cgroup* task_cgroup(struct task_struct *task, -- cgit v1.2.3 From dd4c4f17d722ffeb2515bf781400675a30fcead7 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 28 May 2010 16:32:14 -0400 Subject: suspend: Move NVS save/restore code to generic suspend functionality Saving platform non-volatile state may be required for suspend to RAM as well as hibernation. Move it to more generic code. Signed-off-by: Matthew Garrett Acked-by: Rafael J. Wysocki Tested-by: Maxim Levitsky Signed-off-by: Len Brown --- arch/x86/kernel/e820.c | 2 +- drivers/acpi/sleep.c | 12 ++-- include/linux/suspend.h | 26 ++++----- kernel/power/Kconfig | 9 +-- kernel/power/Makefile | 2 +- kernel/power/hibernate_nvs.c | 136 ------------------------------------------- kernel/power/nvs.c | 136 +++++++++++++++++++++++++++++++++++++++++++ kernel/power/suspend.c | 6 ++ 8 files changed, 168 insertions(+), 161 deletions(-) delete mode 100644 kernel/power/hibernate_nvs.c create mode 100644 kernel/power/nvs.c (limited to 'include') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7bca3c6a02f..0d6fc71bedb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -729,7 +729,7 @@ static int __init e820_mark_nvs_memory(void) struct e820entry *ei = &e820.map[i]; if (ei->type == E820_NVS) - hibernate_nvs_register(ei->addr, ei->size); + suspend_nvs_register(ei->addr, ei->size); } return 0; diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 4ab2275b446..bcaa6efa813 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -393,7 +393,7 @@ static int acpi_hibernation_begin(void) { int error; - error = s4_no_nvs ? 0 : hibernate_nvs_alloc(); + error = s4_no_nvs ? 0 : suspend_nvs_alloc(); if (!error) { acpi_target_sleep_state = ACPI_STATE_S4; acpi_sleep_tts_switch(acpi_target_sleep_state); @@ -407,7 +407,7 @@ static int acpi_hibernation_pre_snapshot(void) int error = acpi_pm_prepare(); if (!error) - hibernate_nvs_save(); + suspend_nvs_save(); return error; } @@ -432,7 +432,7 @@ static int acpi_hibernation_enter(void) static void acpi_hibernation_finish(void) { - hibernate_nvs_free(); + suspend_nvs_free(); acpi_pm_finish(); } @@ -452,7 +452,7 @@ static void acpi_hibernation_leave(void) panic("ACPI S4 hardware signature mismatch"); } /* Restore the NVS memory area */ - hibernate_nvs_restore(); + suspend_nvs_restore(); } static int acpi_pm_pre_restore(void) @@ -501,7 +501,7 @@ static int acpi_hibernation_begin_old(void) if (!error) { if (!s4_no_nvs) - error = hibernate_nvs_alloc(); + error = suspend_nvs_alloc(); if (!error) acpi_target_sleep_state = ACPI_STATE_S4; } @@ -513,7 +513,7 @@ static int acpi_hibernation_pre_snapshot_old(void) int error = acpi_pm_disable_gpes(); if (!error) - hibernate_nvs_save(); + suspend_nvs_save(); return error; } diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5e781d824e6..bc7d6bb4cd8 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -256,22 +256,22 @@ static inline int hibernate(void) { return -ENOSYS; } static inline bool system_entering_hibernation(void) { return false; } #endif /* CONFIG_HIBERNATION */ -#ifdef CONFIG_HIBERNATION_NVS -extern int hibernate_nvs_register(unsigned long start, unsigned long size); -extern int hibernate_nvs_alloc(void); -extern void hibernate_nvs_free(void); -extern void hibernate_nvs_save(void); -extern void hibernate_nvs_restore(void); -#else /* CONFIG_HIBERNATION_NVS */ -static inline int hibernate_nvs_register(unsigned long a, unsigned long b) +#ifdef CONFIG_SUSPEND_NVS +extern int suspend_nvs_register(unsigned long start, unsigned long size); +extern int suspend_nvs_alloc(void); +extern void suspend_nvs_free(void); +extern void suspend_nvs_save(void); +extern void suspend_nvs_restore(void); +#else /* CONFIG_SUSPEND_NVS */ +static inline int suspend_nvs_register(unsigned long a, unsigned long b) { return 0; } -static inline int hibernate_nvs_alloc(void) { return 0; } -static inline void hibernate_nvs_free(void) {} -static inline void hibernate_nvs_save(void) {} -static inline void hibernate_nvs_restore(void) {} -#endif /* CONFIG_HIBERNATION_NVS */ +static inline int suspend_nvs_alloc(void) { return 0; } +static inline void suspend_nvs_free(void) {} +static inline void suspend_nvs_save(void) {} +static inline void suspend_nvs_restore(void) {} +#endif /* CONFIG_SUSPEND_NVS */ #ifdef CONFIG_PM_SLEEP void save_processor_state(void); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 5c36ea9d55d..ca6066a6952 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -99,9 +99,13 @@ config PM_SLEEP_ADVANCED_DEBUG depends on PM_ADVANCED_DEBUG default n +config SUSPEND_NVS + bool + config SUSPEND bool "Suspend to RAM and standby" depends on PM && ARCH_SUSPEND_POSSIBLE + select SUSPEND_NVS if HAS_IOMEM default y ---help--- Allow the system to enter sleep states in which main memory is @@ -130,13 +134,10 @@ config SUSPEND_FREEZER Turning OFF this setting is NOT recommended! If in doubt, say Y. -config HIBERNATION_NVS - bool - config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE - select HIBERNATION_NVS if HAS_IOMEM + select SUSPEND_NVS if HAS_IOMEM ---help--- Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 524e058dcf0..f9063c6b185 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -10,6 +10,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o -obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o +obj-$(CONFIG_SUSPEND_NVS) += nvs.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c deleted file mode 100644 index fdcad9ed5a7..00000000000 --- a/kernel/power/hibernate_nvs.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory - * - * Copyright (C) 2008,2009 Rafael J. Wysocki , Novell Inc. - * - * This file is released under the GPLv2. - */ - -#include -#include -#include -#include -#include -#include - -/* - * Platforms, like ACPI, may want us to save some memory used by them during - * hibernation and to restore the contents of this memory during the subsequent - * resume. The code below implements a mechanism allowing us to do that. - */ - -struct nvs_page { - unsigned long phys_start; - unsigned int size; - void *kaddr; - void *data; - struct list_head node; -}; - -static LIST_HEAD(nvs_list); - -/** - * hibernate_nvs_register - register platform NVS memory region to save - * @start - physical address of the region - * @size - size of the region - * - * The NVS region need not be page-aligned (both ends) and we arrange - * things so that the data from page-aligned addresses in this region will - * be copied into separate RAM pages. - */ -int hibernate_nvs_register(unsigned long start, unsigned long size) -{ - struct nvs_page *entry, *next; - - while (size > 0) { - unsigned int nr_bytes; - - entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); - if (!entry) - goto Error; - - list_add_tail(&entry->node, &nvs_list); - entry->phys_start = start; - nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); - entry->size = (size < nr_bytes) ? size : nr_bytes; - - start += entry->size; - size -= entry->size; - } - return 0; - - Error: - list_for_each_entry_safe(entry, next, &nvs_list, node) { - list_del(&entry->node); - kfree(entry); - } - return -ENOMEM; -} - -/** - * hibernate_nvs_free - free data pages allocated for saving NVS regions - */ -void hibernate_nvs_free(void) -{ - struct nvs_page *entry; - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) { - free_page((unsigned long)entry->data); - entry->data = NULL; - if (entry->kaddr) { - iounmap(entry->kaddr); - entry->kaddr = NULL; - } - } -} - -/** - * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions - */ -int hibernate_nvs_alloc(void) -{ - struct nvs_page *entry; - - list_for_each_entry(entry, &nvs_list, node) { - entry->data = (void *)__get_free_page(GFP_KERNEL); - if (!entry->data) { - hibernate_nvs_free(); - return -ENOMEM; - } - } - return 0; -} - -/** - * hibernate_nvs_save - save NVS memory regions - */ -void hibernate_nvs_save(void) -{ - struct nvs_page *entry; - - printk(KERN_INFO "PM: Saving platform NVS memory\n"); - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) { - entry->kaddr = ioremap(entry->phys_start, entry->size); - memcpy(entry->data, entry->kaddr, entry->size); - } -} - -/** - * hibernate_nvs_restore - restore NVS memory regions - * - * This function is going to be called with interrupts disabled, so it - * cannot iounmap the virtual addresses used to access the NVS region. - */ -void hibernate_nvs_restore(void) -{ - struct nvs_page *entry; - - printk(KERN_INFO "PM: Restoring platform NVS memory\n"); - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) - memcpy(entry->kaddr, entry->data, entry->size); -} diff --git a/kernel/power/nvs.c b/kernel/power/nvs.c new file mode 100644 index 00000000000..1836db60bbb --- /dev/null +++ b/kernel/power/nvs.c @@ -0,0 +1,136 @@ +/* + * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory + * + * Copyright (C) 2008,2009 Rafael J. Wysocki , Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include + +/* + * Platforms, like ACPI, may want us to save some memory used by them during + * suspend and to restore the contents of this memory during the subsequent + * resume. The code below implements a mechanism allowing us to do that. + */ + +struct nvs_page { + unsigned long phys_start; + unsigned int size; + void *kaddr; + void *data; + struct list_head node; +}; + +static LIST_HEAD(nvs_list); + +/** + * suspend_nvs_register - register platform NVS memory region to save + * @start - physical address of the region + * @size - size of the region + * + * The NVS region need not be page-aligned (both ends) and we arrange + * things so that the data from page-aligned addresses in this region will + * be copied into separate RAM pages. + */ +int suspend_nvs_register(unsigned long start, unsigned long size) +{ + struct nvs_page *entry, *next; + + while (size > 0) { + unsigned int nr_bytes; + + entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); + if (!entry) + goto Error; + + list_add_tail(&entry->node, &nvs_list); + entry->phys_start = start; + nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); + entry->size = (size < nr_bytes) ? size : nr_bytes; + + start += entry->size; + size -= entry->size; + } + return 0; + + Error: + list_for_each_entry_safe(entry, next, &nvs_list, node) { + list_del(&entry->node); + kfree(entry); + } + return -ENOMEM; +} + +/** + * suspend_nvs_free - free data pages allocated for saving NVS regions + */ +void suspend_nvs_free(void) +{ + struct nvs_page *entry; + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) { + free_page((unsigned long)entry->data); + entry->data = NULL; + if (entry->kaddr) { + iounmap(entry->kaddr); + entry->kaddr = NULL; + } + } +} + +/** + * suspend_nvs_alloc - allocate memory necessary for saving NVS regions + */ +int suspend_nvs_alloc(void) +{ + struct nvs_page *entry; + + list_for_each_entry(entry, &nvs_list, node) { + entry->data = (void *)__get_free_page(GFP_KERNEL); + if (!entry->data) { + suspend_nvs_free(); + return -ENOMEM; + } + } + return 0; +} + +/** + * suspend_nvs_save - save NVS memory regions + */ +void suspend_nvs_save(void) +{ + struct nvs_page *entry; + + printk(KERN_INFO "PM: Saving platform NVS memory\n"); + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) { + entry->kaddr = ioremap(entry->phys_start, entry->size); + memcpy(entry->data, entry->kaddr, entry->size); + } +} + +/** + * suspend_nvs_restore - restore NVS memory regions + * + * This function is going to be called with interrupts disabled, so it + * cannot iounmap the virtual addresses used to access the NVS region. + */ +void suspend_nvs_restore(void) +{ + struct nvs_page *entry; + + printk(KERN_INFO "PM: Restoring platform NVS memory\n"); + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) + memcpy(entry->kaddr, entry->data, entry->size); +} diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 56e7dbb8b99..f37cb7dd440 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -16,6 +16,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include "power.h" -- cgit v1.2.3 From c5444198ca210498e8ac0ba121b4cd3537aa12f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Jun 2010 18:15:15 +0200 Subject: writeback: simplify and split bdi_start_writeback bdi_start_writeback now never gets a superblock passed, so we can just remove that case. And to further untangle the code and flatten the call stack split it into two trivial helpers for it's two callers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 32 ++++++++++++++++++++------------ include/linux/backing-dev.h | 4 ++-- mm/page-writeback.c | 5 ++--- 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4fcca4f7494..0079bf59b58 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -200,7 +200,6 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args) /** * bdi_start_writeback - start writeback * @bdi: the backing device to write from - * @sb: write inodes from this super_block * @nr_pages: the number of pages to write * * Description: @@ -209,25 +208,34 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args) * completion. Caller need not hold sb s_umount semaphore. * */ -void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages) +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) { struct wb_writeback_args args = { - .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, }; - /* - * We treat @nr_pages=0 as the special case to do background writeback, - * ie. to sync pages until the background dirty threshold is reached. - */ - if (!nr_pages) { - args.nr_pages = LONG_MAX; - args.for_background = 1; - } + bdi_alloc_queue_work(bdi, &args); +} +/** + * bdi_start_background_writeback - start background writeback + * @bdi: the backing device to write from + * + * Description: + * This does WB_SYNC_NONE background writeback. The IO is only + * started when this function returns, we make no guarentees on + * completion. Caller need not hold sb s_umount semaphore. + */ +void bdi_start_background_writeback(struct backing_dev_info *bdi) +{ + struct wb_writeback_args args = { + .sync_mode = WB_SYNC_NONE, + .nr_pages = LONG_MAX, + .for_background = 1, + .range_cyclic = 1, + }; bdi_alloc_queue_work(bdi, &args); } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index aee5f6ce166..9ae2889096b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -105,8 +105,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); -void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages); +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); +void bdi_start_background_writeback(struct backing_dev_info *bdi); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_arm_supers_timer(void); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index bbd396ac954..54f28bd493d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping, (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - bdi_start_writeback(bdi, NULL, 0); + bdi_start_background_writeback(bdi); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -705,9 +705,8 @@ void laptop_mode_timer_fn(unsigned long data) * We want to write everything out, not just down to the dirty * threshold */ - if (bdi_has_dirty_io(&q->backing_dev_info)) - bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); + bdi_start_writeback(&q->backing_dev_info, nr_pages); } /* -- cgit v1.2.3 From fd247447c1d94a79d5cfc647430784306b3a8323 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 8 Jun 2010 10:49:08 +0200 Subject: ACPI / ACPICA: Fix low-level GPE manipulation code ACPICA uses acpi_ev_enable_gpe() for enabling GPEs at the low level, which is incorrect, because this function only enables the GPE if the corresponding bit in its enable register's enable_for_run mask is set. This causes acpi_set_gpe() to work incorrectly if used for enabling GPEs that were not previously enabled with acpi_enable_gpe(). As a result, among other things, wakeup-only GPEs are never enabled by acpi_enable_wakeup_device(), so the devices that use them are unable to wake up the system. To fix this issue remove acpi_ev_enable_gpe() and its counterpart acpi_ev_disable_gpe() and replace acpi_hw_low_disable_gpe() with acpi_hw_low_set_gpe() that will be used instead to manipulate GPE enable bits at the low level. Make the users of acpi_ev_enable_gpe() and acpi_ev_disable_gpe() call acpi_hw_low_set_gpe() instead and make sure that GPE enable masks are only updated by acpi_enable_gpe() and acpi_disable_gpe() when GPE reference counters change from 0 to 1 and from 1 to 0, respectively. Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- drivers/acpi/acpica/acevents.h | 4 -- drivers/acpi/acpica/achware.h | 3 +- drivers/acpi/acpica/evgpe.c | 108 +---------------------------------------- drivers/acpi/acpica/evxfevnt.c | 59 +++++++++++++++++++--- drivers/acpi/acpica/hwgpe.c | 26 ++++++++-- include/acpi/actypes.h | 2 +- 6 files changed, 79 insertions(+), 123 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 5e094a28cf5..138bbb52193 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -78,10 +78,6 @@ acpi_ev_queue_notify_request(struct acpi_namespace_node *node, acpi_status acpi_ev_update_gpe_enable_masks(struct acpi_gpe_event_info *gpe_event_info); -acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info); - -acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info); - struct acpi_gpe_event_info *acpi_ev_get_gpe_event_info(acpi_handle gpe_device, u32 gpe_number); diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h index c46277d179f..32391588e16 100644 --- a/drivers/acpi/acpica/achware.h +++ b/drivers/acpi/acpica/achware.h @@ -93,7 +93,8 @@ acpi_status acpi_hw_write_port(acpi_io_address address, u32 value, u32 width); u32 acpi_hw_gpe_register_bit(struct acpi_gpe_event_info *gpe_event_info, struct acpi_gpe_register_info *gpe_register_info); -acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info); +acpi_status +acpi_hw_low_set_gpe(struct acpi_gpe_event_info *gpe_event_info, u8 action); acpi_status acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info *gpe_event_info); diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c index 57eeb3bde41..66cd03835d6 100644 --- a/drivers/acpi/acpica/evgpe.c +++ b/drivers/acpi/acpica/evgpe.c @@ -99,106 +99,6 @@ acpi_ev_update_gpe_enable_masks(struct acpi_gpe_event_info *gpe_event_info) return_ACPI_STATUS(AE_OK); } -/******************************************************************************* - * - * FUNCTION: acpi_ev_enable_gpe - * - * PARAMETERS: gpe_event_info - GPE to enable - * - * RETURN: Status - * - * DESCRIPTION: Hardware-enable a GPE. Always enables the GPE, regardless - * of type or number of references. - * - * Note: The GPE lock should be already acquired when this function is called. - * - ******************************************************************************/ - -acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info) -{ - acpi_status status; - - - ACPI_FUNCTION_TRACE(ev_enable_gpe); - - - /* - * We will only allow a GPE to be enabled if it has either an - * associated method (_Lxx/_Exx) or a handler. Otherwise, the - * GPE will be immediately disabled by acpi_ev_gpe_dispatch the - * first time it fires. - */ - if (!(gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK)) { - return_ACPI_STATUS(AE_NO_HANDLER); - } - - /* Ensure the HW enable masks are current */ - - status = acpi_ev_update_gpe_enable_masks(gpe_event_info); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* Clear the GPE (of stale events) */ - - status = acpi_hw_clear_gpe(gpe_event_info); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* Enable the requested GPE */ - - status = acpi_hw_write_gpe_enable_reg(gpe_event_info); - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_disable_gpe - * - * PARAMETERS: gpe_event_info - GPE to disable - * - * RETURN: Status - * - * DESCRIPTION: Hardware-disable a GPE. Always disables the requested GPE, - * regardless of the type or number of references. - * - * Note: The GPE lock should be already acquired when this function is called. - * - ******************************************************************************/ - -acpi_status acpi_ev_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) -{ - acpi_status status; - - ACPI_FUNCTION_TRACE(ev_disable_gpe); - - - /* - * Note: Always disable the GPE, even if we think that that it is already - * disabled. It is possible that the AML or some other code has enabled - * the GPE behind our back. - */ - - /* Ensure the HW enable masks are current */ - - status = acpi_ev_update_gpe_enable_masks(gpe_event_info); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* - * Always H/W disable this GPE, even if we don't know the GPE type. - * Simply clear the enable bit for this particular GPE, but do not - * write out the current GPE enable mask since this may inadvertently - * enable GPEs too early. An example is a rogue GPE that has arrived - * during ACPICA initialization - possibly because AML or other code - * has enabled the GPE. - */ - status = acpi_hw_low_disable_gpe(gpe_event_info); - return_ACPI_STATUS(status); -} - /******************************************************************************* * @@ -450,10 +350,6 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context) return_VOID; } - /* Update the GPE register masks for return to enabled state */ - - (void)acpi_ev_update_gpe_enable_masks(gpe_event_info); - /* * Take a snapshot of the GPE info for this level - we copy the info to * prevent a race condition with remove_handler/remove_block. @@ -606,7 +502,7 @@ acpi_ev_gpe_dispatch(struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number) * Disable the GPE, so it doesn't keep firing before the method has a * chance to run (it runs asynchronously with interrupts enabled). */ - status = acpi_ev_disable_gpe(gpe_event_info); + status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "Unable to disable GPE[0x%2X]", @@ -643,7 +539,7 @@ acpi_ev_gpe_dispatch(struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number) * Disable the GPE. The GPE will remain disabled a handler * is installed or ACPICA is restarted. */ - status = acpi_ev_disable_gpe(gpe_event_info); + status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "Unable to disable GPE[0x%2X]", diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c index 7c7bbb4d402..e3d9f5c8e53 100644 --- a/drivers/acpi/acpica/evxfevnt.c +++ b/drivers/acpi/acpica/evxfevnt.c @@ -199,6 +199,44 @@ acpi_status acpi_enable_event(u32 event, u32 flags) ACPI_EXPORT_SYMBOL(acpi_enable_event) +/******************************************************************************* + * + * FUNCTION: acpi_clear_and_enable_gpe + * + * PARAMETERS: gpe_event_info - GPE to enable + * + * RETURN: Status + * + * DESCRIPTION: Clear the given GPE from stale events and enable it. + * + ******************************************************************************/ +static acpi_status +acpi_clear_and_enable_gpe(struct acpi_gpe_event_info *gpe_event_info) +{ + acpi_status status; + + /* + * We will only allow a GPE to be enabled if it has either an + * associated method (_Lxx/_Exx) or a handler. Otherwise, the + * GPE will be immediately disabled by acpi_ev_gpe_dispatch the + * first time it fires. + */ + if (!(gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK)) { + return_ACPI_STATUS(AE_NO_HANDLER); + } + + /* Clear the GPE (of stale events) */ + status = acpi_hw_clear_gpe(gpe_event_info); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Enable the requested GPE */ + status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE); + + return_ACPI_STATUS(status); +} + /******************************************************************************* * * FUNCTION: acpi_set_gpe @@ -240,11 +278,11 @@ acpi_status acpi_set_gpe(acpi_handle gpe_device, u32 gpe_number, u8 action) switch (action) { case ACPI_GPE_ENABLE: - status = acpi_ev_enable_gpe(gpe_event_info); + status = acpi_clear_and_enable_gpe(gpe_event_info); break; case ACPI_GPE_DISABLE: - status = acpi_ev_disable_gpe(gpe_event_info); + status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE); break; default: @@ -307,7 +345,11 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type) gpe_event_info->runtime_count++; if (gpe_event_info->runtime_count == 1) { - status = acpi_ev_enable_gpe(gpe_event_info); + status = acpi_ev_update_gpe_enable_masks(gpe_event_info); + if (ACPI_SUCCESS(status)) { + status = acpi_clear_and_enable_gpe(gpe_event_info); + } + if (ACPI_FAILURE(status)) { gpe_event_info->runtime_count--; goto unlock_and_exit; @@ -334,7 +376,7 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type) */ gpe_event_info->wakeup_count++; if (gpe_event_info->wakeup_count == 1) { - (void)acpi_ev_update_gpe_enable_masks(gpe_event_info); + status = acpi_ev_update_gpe_enable_masks(gpe_event_info); } } @@ -394,7 +436,12 @@ acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type gpe_event_info->runtime_count--; if (!gpe_event_info->runtime_count) { - status = acpi_ev_disable_gpe(gpe_event_info); + status = acpi_ev_update_gpe_enable_masks(gpe_event_info); + if (ACPI_SUCCESS(status)) { + status = acpi_hw_low_set_gpe(gpe_event_info, + ACPI_GPE_DISABLE); + } + if (ACPI_FAILURE(status)) { gpe_event_info->runtime_count++; goto unlock_and_exit; @@ -415,7 +462,7 @@ acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number, u8 gpe_type gpe_event_info->wakeup_count--; if (!gpe_event_info->wakeup_count) { - (void)acpi_ev_update_gpe_enable_masks(gpe_event_info); + status = acpi_ev_update_gpe_enable_masks(gpe_event_info); } } diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c index d989b8e786c..40388e23e10 100644 --- a/drivers/acpi/acpica/hwgpe.c +++ b/drivers/acpi/acpica/hwgpe.c @@ -78,23 +78,27 @@ u32 acpi_hw_gpe_register_bit(struct acpi_gpe_event_info *gpe_event_info, /****************************************************************************** * - * FUNCTION: acpi_hw_low_disable_gpe + * FUNCTION: acpi_hw_low_set_gpe * * PARAMETERS: gpe_event_info - Info block for the GPE to be disabled + * action - Enable or disable * * RETURN: Status * - * DESCRIPTION: Disable a single GPE in the enable register. + * DESCRIPTION: Enable or disable a single GPE in its enable register. * ******************************************************************************/ -acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) +acpi_status +acpi_hw_low_set_gpe(struct acpi_gpe_event_info *gpe_event_info, u8 action) { struct acpi_gpe_register_info *gpe_register_info; acpi_status status; u32 enable_mask; u32 register_bit; + ACPI_FUNCTION_ENTRY(); + /* Get the info block for the entire GPE register */ gpe_register_info = gpe_event_info->register_info; @@ -109,11 +113,23 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info) return (status); } - /* Clear just the bit that corresponds to this GPE */ + /* Set ot clear just the bit that corresponds to this GPE */ register_bit = acpi_hw_gpe_register_bit(gpe_event_info, gpe_register_info); - ACPI_CLEAR_BIT(enable_mask, register_bit); + switch (action) { + case ACPI_GPE_ENABLE: + ACPI_SET_BIT(enable_mask, register_bit); + break; + + case ACPI_GPE_DISABLE: + ACPI_CLEAR_BIT(enable_mask, register_bit); + break; + + default: + ACPI_ERROR((AE_INFO, "Invalid action\n")); + return (AE_BAD_PARAMETER); + } /* Write the updated enable mask */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index de5e99a9953..6881f5b7b7b 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -663,7 +663,7 @@ typedef u32 acpi_event_status; #define ACPI_GPE_MAX 0xFF #define ACPI_NUM_GPE 256 -/* Actions for acpi_set_gpe */ +/* Actions for acpi_set_gpe and acpi_hw_low_set_gpe */ #define ACPI_GPE_ENABLE 0 #define ACPI_GPE_DISABLE 1 -- cgit v1.2.3 From c9a8bbb7704cbf515c0fc68970abbe4e91d68521 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 8 Jun 2010 10:49:45 +0200 Subject: ACPI / ACPICA: Avoid writing full enable masks to GPE registers ACPICA uses acpi_hw_write_gpe_enable_reg() to re-enable a GPE after an event signaled by it has been handled. However, this function writes the entire GPE enable mask to the GPE's enable register which may not be correct. Namely, if one of the other GPEs in the same register was previously enabled by acpi_enable_gpe() and subsequently disabled using acpi_set_gpe(), acpi_hw_write_gpe_enable_reg() will re-enable it along with the target GPE. To fix this issue rework acpi_hw_write_gpe_enable_reg() so that it calls acpi_hw_low_set_gpe() with a special action value, ACPI_GPE_COND_ENABLE, that will make it only enable the GPE if the corresponding bit in its register's enable_for_run mask is set. Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- drivers/acpi/acpica/hwgpe.c | 18 +++++------------- include/acpi/actypes.h | 1 + 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c index 40388e23e10..3450309c278 100644 --- a/drivers/acpi/acpica/hwgpe.c +++ b/drivers/acpi/acpica/hwgpe.c @@ -118,6 +118,10 @@ acpi_hw_low_set_gpe(struct acpi_gpe_event_info *gpe_event_info, u8 action) register_bit = acpi_hw_gpe_register_bit(gpe_event_info, gpe_register_info); switch (action) { + case ACPI_GPE_COND_ENABLE: + if (!(register_bit & gpe_register_info->enable_for_run)) + return (AE_BAD_PARAMETER); + case ACPI_GPE_ENABLE: ACPI_SET_BIT(enable_mask, register_bit); break; @@ -154,23 +158,11 @@ acpi_hw_low_set_gpe(struct acpi_gpe_event_info *gpe_event_info, u8 action) acpi_status acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info) { - struct acpi_gpe_register_info *gpe_register_info; acpi_status status; ACPI_FUNCTION_ENTRY(); - /* Get the info block for the entire GPE register */ - - gpe_register_info = gpe_event_info->register_info; - if (!gpe_register_info) { - return (AE_NOT_EXIST); - } - - /* Write the entire GPE (runtime) enable register */ - - status = acpi_hw_write(gpe_register_info->enable_for_run, - &gpe_register_info->enable_address); - + status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_COND_ENABLE); return (status); } diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 6881f5b7b7b..15a4c68fad3 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -667,6 +667,7 @@ typedef u32 acpi_event_status; #define ACPI_GPE_ENABLE 0 #define ACPI_GPE_DISABLE 1 +#define ACPI_GPE_COND_ENABLE 2 /* gpe_types for acpi_enable_gpe and acpi_disable_gpe */ -- cgit v1.2.3 From d70326689b70b35527765bd3decbb1229459e928 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 26 May 2010 11:06:12 +0800 Subject: ACPICA: Fix namestring associated with AE_NO_HANDLER exception Was incorrectly AE_WAKE_ONLY_GPE. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acexcep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index 5958d7845bd..17714beb868 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -212,7 +212,7 @@ char const *acpi_gbl_exception_names_env[] = { "AE_NO_GLOBAL_LOCK", "AE_ABORT_METHOD", "AE_SAME_HANDLER", - "AE_WAKE_ONLY_GPE", + "AE_NO_HANDLER", "AE_OWNER_ID_LIMIT" }; -- cgit v1.2.3 From b681f7d9ab4d697a214fa4428795790c3a937a89 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 26 May 2010 11:50:48 +0800 Subject: ACPICA: Truncate I/O addresses to 16 bits for Windows compatibility This feature is optional and is enabled if the BIOS requests any Windows OSI strings. It can also be enabled by the host OS. Signed-off-by: Matthew Garrett Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acglobal.h | 8 ++++++++ drivers/acpi/acpica/hwvalid.c | 12 ++++++++++++ drivers/acpi/acpica/nsinit.c | 9 +++++++++ include/acpi/acpixf.h | 1 + 4 files changed, 30 insertions(+) (limited to 'include') diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 9070f1fe8f1..899d68afc3c 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -125,6 +125,14 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_aml_debug_object, FALSE); */ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE); +/* + * Optionally truncate I/O addresses to 16 bits. Provides compatibility + * with other ACPI implementations. NOTE: During ACPICA initialization, + * this value is set to TRUE if any Windows OSI strings have been + * requested by the BIOS. + */ +u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE); + /* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */ struct acpi_table_fadt acpi_gbl_FADT; diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c index c10d587c164..e1d9c777b21 100644 --- a/drivers/acpi/acpica/hwvalid.c +++ b/drivers/acpi/acpica/hwvalid.c @@ -222,6 +222,12 @@ acpi_status acpi_hw_read_port(acpi_io_address address, u32 *value, u32 width) u32 one_byte; u32 i; + /* Truncate address to 16 bits if requested */ + + if (acpi_gbl_truncate_io_addresses) { + address &= ACPI_UINT16_MAX; + } + /* Validate the entire request and perform the I/O */ status = acpi_hw_validate_io_request(address, width); @@ -279,6 +285,12 @@ acpi_status acpi_hw_write_port(acpi_io_address address, u32 value, u32 width) acpi_status status; u32 i; + /* Truncate address to 16 bits if requested */ + + if (acpi_gbl_truncate_io_addresses) { + address &= ACPI_UINT16_MAX; + } + /* Validate the entire request and perform the I/O */ status = acpi_hw_validate_io_request(address, width); diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 9bd6f050f29..4e5272c313e 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -193,6 +193,15 @@ acpi_status acpi_ns_initialize_devices(void) acpi_ns_init_one_device, NULL, &info, NULL); + /* + * Any _OSI requests should be completed by now. If the BIOS has + * requested any Windows OSI strings, we will always truncate + * I/O addresses to 16 bits -- for Windows compatibility. + */ + if (acpi_gbl_osi_data >= ACPI_OSI_WIN_2000) { + acpi_gbl_truncate_io_addresses = TRUE; + } + ACPI_FREE(info.evaluate_info); if (ACPI_FAILURE(status)) { goto error_exit; diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 0e4ab1fe596..1371cc99739 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -69,6 +69,7 @@ extern acpi_name acpi_gbl_trace_method_name; extern u32 acpi_gbl_trace_flags; extern u8 acpi_gbl_enable_aml_debug_object; extern u8 acpi_gbl_copy_dsdt_locally; +extern u8 acpi_gbl_truncate_io_addresses; extern u32 acpi_current_gpe_count; extern struct acpi_table_fadt acpi_gbl_FADT; -- cgit v1.2.3 From dc66c74de6f4238020db3e2041d4aca5c5b3e9bc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 2 Jun 2010 14:31:29 +0200 Subject: drbd: Fixed a race between disk-attach and unexpected state changes This was a very hard to trigger race condition. If we got a state packet from the peer, after drbd_nl_disk() has already changed the disk state to D_NEGOTIATING but after_state_ch() was not yet run by the worker, then receive_state() might called drbd_sync_handshake(), which in turn crashed when accessing p_uuid. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 -- drivers/block/drbd/drbd_nl.c | 6 ++++++ include/linux/drbd.h | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6b077f93acc..7258c95e895 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1236,8 +1236,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Last part of the attaching process ... */ if (ns.conn >= C_CONNECTED && os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { - kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ - mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ drbd_send_sizes(mdev, 0, 0); /* to start sync... */ drbd_send_uuids(mdev); drbd_send_state(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 632e3245d1b..2151f18b21d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1114,6 +1114,12 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp mdev->new_state_tmp.i = ns.i; ns.i = os.i; ns.disk = D_NEGOTIATING; + + /* We expect to receive up-to-date UUIDs soon. + To avoid a race in receive_state, free p_uuid while + holding req_lock. I.e. atomic with the state change */ + kfree(mdev->p_uuid); + mdev->p_uuid = NULL; } rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 30da4ae4897..b8d2516668a 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.8rc2" +#define REL_VERSION "8.3.8" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 94 -- cgit v1.2.3 From da931a931da85218add949266238c54b5fecd37f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 Jun 2010 09:52:37 +1000 Subject: agp: drop vmalloc flag. Since the code that was too ugly to live is upstream, we can use it now, instead of rolling our own. Signed-off-by: Dave Airlie --- drivers/char/agp/generic.c | 4 +--- include/linux/agp_backend.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index 4b51982fd23..4e414417730 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -97,20 +97,18 @@ EXPORT_SYMBOL(agp_flush_chipset); void agp_alloc_page_array(size_t size, struct agp_memory *mem) { mem->pages = NULL; - mem->vmalloc_flag = false; if (size <= 2*PAGE_SIZE) mem->pages = kmalloc(size, GFP_KERNEL | __GFP_NORETRY); if (mem->pages == NULL) { mem->pages = vmalloc(size); - mem->vmalloc_flag = true; } } EXPORT_SYMBOL(agp_alloc_page_array); void agp_free_page_array(struct agp_memory *mem) { - if (mem->vmalloc_flag) { + if (is_vmalloc_addr(mem->pages)) { vfree(mem->pages); } else { kfree(mem->pages); diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 9101ed64f80..09ea4a1e950 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -79,7 +79,6 @@ struct agp_memory { u32 physical; bool is_bound; bool is_flushed; - bool vmalloc_flag; /* list of agp_memory mapped to the aperture */ struct list_head mapped_list; /* DMA-mapped addresses */ -- cgit v1.2.3 From b70e4f0529c089b00d0a6da13106db4de1ada4c7 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Mon, 21 Jun 2010 19:09:09 +0800 Subject: tracing: Fix undeclared ENOSYS in include/linux/tracepoint.h The header file include/linux/tracepoint.h may be included without include/linux/errno.h and then the compiler will fail on building for undelcared ENOSYS. This patch fixes this problem via including to include/linux/tracepoint.h. Signed-off-by: Wu Zhangjin LKML-Reference: <1277118549-622-1-git-send-email-wuzhangjin@gmail.com> Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 9a59d1f98cd..103d1b61aac 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -14,6 +14,7 @@ * See the file COPYING for more details. */ +#include #include #include -- cgit v1.2.3 From 8f1c14b2e3b1805d3e9e6a306d07f5371ea703a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Jun 2010 00:32:03 +0000 Subject: snmp: fix SNMP_ADD_STATS() commit aa2ea0586d9d (tcp: fix outsegs stat for TSO segments) incorrectly assumed SNMP_ADD_STATS() was used from BH context. Fix this using mib[!in_softirq()] instead of mib[0] Signed-off-by: Eric Dumazet CC: Tom Herbert Signed-off-by: David S. Miller --- include/net/snmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/snmp.h b/include/net/snmp.h index 92456f1035f..899003d18db 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -134,7 +134,7 @@ struct linux_xfrm_mib { #define SNMP_ADD_STATS_USER(mib, field, addend) \ this_cpu_add(mib[1]->mibs[field], addend) #define SNMP_ADD_STATS(mib, field, addend) \ - this_cpu_add(mib[0]->mibs[field], addend) + this_cpu_add(mib[!in_softirq()]->mibs[field], addend) /* * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr" * to make @ptr a non-percpu pointer. -- cgit v1.2.3 From bf988435bd5b53529f4408a8efb1f433f6ddfda9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 28 Jun 2010 08:45:58 +0000 Subject: ethtool: Fix potential user buffer overflow for ETHTOOL_{G, S}RXFH struct ethtool_rxnfc was originally defined in 2.6.27 for the ETHTOOL_{G,S}RXFH command with only the cmd, flow_type and data fields. It was then extended in 2.6.30 to support various additional commands. These commands should have been defined to use a new structure, but it is too late to change that now. Since user-space may still be using the old structure definition for the ETHTOOL_{G,S}RXFH commands, and since they do not need the additional fields, only copy the originally defined fields to and from user-space. Signed-off-by: Ben Hutchings Cc: stable@kernel.org Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ net/core/ethtool.c | 36 +++++++++++++++++++++++++++--------- 2 files changed, 29 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 276b40a1683..b4207ca3ad5 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -379,6 +379,8 @@ struct ethtool_rxnfc { __u32 flow_type; /* The rx flow hash value or the rule DB size */ __u64 data; + /* The following fields are not valid and must not be used for + * the ETHTOOL_{G,X}RXFH commands. */ struct ethtool_rx_flow_spec fs; __u32 rule_cnt; __u32 rule_locs[0]; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a3a7e9a48df..75e4ffeb8cc 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -318,23 +318,33 @@ out: } static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, - void __user *useraddr) + u32 cmd, void __user *useraddr) { - struct ethtool_rxnfc cmd; + struct ethtool_rxnfc info; + size_t info_size = sizeof(info); if (!dev->ethtool_ops->set_rxnfc) return -EOPNOTSUPP; - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + /* struct ethtool_rxnfc was originally defined for + * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data + * members. User-space might still be using that + * definition. */ + if (cmd == ETHTOOL_SRXFH) + info_size = (offsetof(struct ethtool_rxnfc, data) + + sizeof(info.data)); + + if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; - return dev->ethtool_ops->set_rxnfc(dev, &cmd); + return dev->ethtool_ops->set_rxnfc(dev, &info); } static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, - void __user *useraddr) + u32 cmd, void __user *useraddr) { struct ethtool_rxnfc info; + size_t info_size = sizeof(info); const struct ethtool_ops *ops = dev->ethtool_ops; int ret; void *rule_buf = NULL; @@ -342,7 +352,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (!ops->get_rxnfc) return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) + /* struct ethtool_rxnfc was originally defined for + * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data + * members. User-space might still be using that + * definition. */ + if (cmd == ETHTOOL_GRXFH) + info_size = (offsetof(struct ethtool_rxnfc, data) + + sizeof(info.data)); + + if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; if (info.cmd == ETHTOOL_GRXCLSRLALL) { @@ -360,7 +378,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, goto err_out; ret = -EFAULT; - if (copy_to_user(useraddr, &info, sizeof(info))) + if (copy_to_user(useraddr, &info, info_size)) goto err_out; if (rule_buf) { @@ -1517,12 +1535,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: - rc = ethtool_get_rxnfc(dev, useraddr); + rc = ethtool_get_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_SRXFH: case ETHTOOL_SRXCLSRLDEL: case ETHTOOL_SRXCLSRLINS: - rc = ethtool_set_rxnfc(dev, useraddr); + rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_GGRO: rc = ethtool_get_gro(dev, useraddr); -- cgit v1.2.3 From 57439f878afafefad8836ebf5c49da2a0a746105 Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Thu, 24 Jun 2010 13:02:14 +1000 Subject: fs: fix superblock iteration race list_for_each_entry_safe is not suitable to protect against concurrent modification of the list. 6754af6 introduced a race in sb walking. list_for_each_entry can use the trick of pinning the current entry in the list before we drop and retake the lock because it subsequently follows cur->next. However list_for_each_entry_safe saves n=cur->next for following before entering the loop body, so when the lock is dropped, n may be deleted. Signed-off-by: Nick Piggin Cc: Christoph Hellwig Cc: John Stultz Cc: Frank Mayhar Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 ++ fs/super.c | 6 ++++++ include/linux/list.h | 15 +++++++++++++++ 3 files changed, 23 insertions(+) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index d96047b4a63..c8c78ba0782 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -590,6 +590,8 @@ static void prune_dcache(int count) up_read(&sb->s_umount); } spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); count -= pruned; __put_super(sb); /* more work left to do? */ diff --git a/fs/super.c b/fs/super.c index 5c35bc7a499..938119ab8dc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -374,6 +374,8 @@ void sync_supers(void) up_read(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } } @@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) up_read(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } spin_unlock(&sb_lock); @@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work) } up_write(&sb->s_umount); spin_lock(&sb_lock); + /* lock was dropped, must reset next */ + list_safe_reset_next(sb, n, s_list); __put_super(sb); } spin_unlock(&sb_lock); diff --git a/include/linux/list.h b/include/linux/list.h index 8392884a297..5d57a3a1fa1 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -544,6 +544,21 @@ static inline void list_splice_tail_init(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.prev, typeof(*n), member)) +/** + * list_safe_reset_next - reset a stale list_for_each_entry_safe loop + * @pos: the loop cursor used in the list_for_each_entry_safe loop + * @n: temporary storage used in list_for_each_entry_safe + * @member: the name of the list_struct within the struct. + * + * list_safe_reset_next is not safe to use in general if the list may be + * modified concurrently (eg. the lock is dropped in the loop body). An + * exception to this is if the cursor element (pos) is pinned in the list, + * and list_safe_reset_next is called after re-taking the lock and before + * completing the current iteration of the loop body. + */ +#define list_safe_reset_next(pos, n, member) \ + n = list_entry(pos->member.next, typeof(*pos), member) + /* * Double linked lists with a single pointer list head. * Mostly useful for hash tables where the two pointer list head is -- cgit v1.2.3 From 9c695203a7ddbe49dba5f22f4c941d24f47475df Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Tue, 29 Jun 2010 15:05:25 -0700 Subject: compiler-gcc.h: gcc-4.5 needs noclone and noinline on __naked functions A __naked function is defined in C but with a body completely implemented by asm(), including any prologue and epilogue. These asm() bodies expect standard calling conventions for parameter passing. Older GCCs implement that correctly, but 4.[56] currently do not, see GCC PR44290. In the Linux kernel this breaks ARM, causing most arch/arm/mm/copypage-*.c modules to get miscompiled, resulting in kernel crashes during bootup. Part of the kernel fix is to augment the __naked function attribute to also imply noinline and noclone. This patch implements that, and has been verified to fix boot failures with gcc-4.5 compiled 2.6.34 and 2.6.35-rc1 kernels. The patch is a no-op with older GCCs. Signed-off-by: Mikael Pettersson Signed-off-by: Khem Raj Cc: Russell King Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 10 +++++++++- include/linux/compiler-gcc4.h | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 73dcf804bc9..0da5b187f12 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -58,8 +58,12 @@ * naked functions because then mcount is called without stack and frame pointer * being set up and there is no chance to restore the lr register to the value * before mcount was called. + * + * The asm() bodies of naked functions often depend on standard calling conventions, + * therefore they must be noinline and noclone. GCC 4.[56] currently fail to enforce + * this, so we must do so ourselves. See GCC PR44290. */ -#define __naked __attribute__((naked)) notrace +#define __naked __attribute__((naked)) noinline __noclone notrace #define __noreturn __attribute__((noreturn)) @@ -85,3 +89,7 @@ #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) #define gcc_header(x) _gcc_header(x) #include gcc_header(__GNUC__) + +#if !defined(__noclone) +#define __noclone /* not needed */ +#endif diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 94dea3ffbfa..fcfa5b9a431 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -48,6 +48,10 @@ * unreleased. Really, we need to have autoconf for the kernel. */ #define unreachable() __builtin_unreachable() + +/* Mark a function definition as prohibited from being cloned. */ +#define __noclone __attribute__((__noclone__)) + #endif #endif -- cgit v1.2.3 From c59690fa484c04ab96fe932241b569a09755a4d2 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 Jun 2010 00:53:53 -0700 Subject: Input: i8042 - mark stubs in i8042.h "static inline" Otherwise we may run into following: drivers/platform/built-in.o: In function `i8042_lock_chip': /home/test/ws2/projects/linux-2.6/include/linux/i8042.h:50: multiple definition of `i8042_lock_chip' drivers/input/serio/built-in.o:/home/test/ws2/projects/linux-2.6/include/linux/i8042.h:50: first defined here ... make[1]: *** [drivers/built-in.o] Error 1 make: *** [drivers] Error 2 Signed-off-by: Feng Tang Signed-off-by: Dmitry Torokhov --- include/linux/i8042.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/i8042.h b/include/linux/i8042.h index 9bf6870ee5f..a986ff58894 100644 --- a/include/linux/i8042.h +++ b/include/linux/i8042.h @@ -46,31 +46,31 @@ int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, #else -void i8042_lock_chip(void) +static inline void i8042_lock_chip(void) { } -void i8042_unlock_chip(void) +static inline void i8042_unlock_chip(void) { } -int i8042_command(unsigned char *param, int command) +static inline int i8042_command(unsigned char *param, int command) { return -ENODEV; } -bool i8042_check_port_owner(const struct serio *serio) +static inline bool i8042_check_port_owner(const struct serio *serio) { return false; } -int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str, +static inline int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str, struct serio *serio)) { return -ENODEV; } -int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, +static inline int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, struct serio *serio)) { return -ENODEV; -- cgit v1.2.3 From 4efd7e833591721bec21cc4730a7f6261417840f Mon Sep 17 00:00:00 2001 From: Andreas Steffen Date: Wed, 30 Jun 2010 10:41:15 -0700 Subject: xfrm: fix XFRMA_MARK extraction in xfrm_mark_get Determine the size of the xfrm_mark struct, not of its pointer. Signed-off-by: Andreas Steffen Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1913af67c43..fc8f36dd0f5 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1586,7 +1586,7 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) { if (attrs[XFRMA_MARK]) - memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(m)); + memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(struct xfrm_mark)); else m->v = m->m = 0; -- cgit v1.2.3 From 9b2c2ff7a1c04e69842254dd4afe0f8ad4efa439 Mon Sep 17 00:00:00 2001 From: Saeed Bishara Date: Sun, 27 Jun 2010 00:26:43 +0000 Subject: mv643xx_eth: use sw csum for big packets Some controllers (KW, Dove) limits the TX IP/layer4 checksum offloading to a max size. Signed-off-by: Saeed Bishara Acked-by: Lennert Buytenhek Signed-off-by: David S. Miller --- drivers/net/mv643xx_eth.c | 9 +++++++-- include/linux/mv643xx_eth.h | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index e345ec8cb47..73bb8ea6f54 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -289,6 +289,7 @@ struct mv643xx_eth_shared_private { unsigned int t_clk; int extended_rx_coal_limit; int tx_bw_control; + int tx_csum_limit; }; #define TX_BW_CONTROL_ABSENT 0 @@ -776,13 +777,16 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb) l4i_chk = 0; if (skb->ip_summed == CHECKSUM_PARTIAL) { + int hdr_len; int tag_bytes; BUG_ON(skb->protocol != htons(ETH_P_IP) && skb->protocol != htons(ETH_P_8021Q)); - tag_bytes = (void *)ip_hdr(skb) - (void *)skb->data - ETH_HLEN; - if (unlikely(tag_bytes & ~12)) { + hdr_len = (void *)ip_hdr(skb) - (void *)skb->data; + tag_bytes = hdr_len - ETH_HLEN; + if (skb->len - hdr_len > mp->shared->tx_csum_limit || + unlikely(tag_bytes & ~12)) { if (skb_checksum_help(skb) == 0) goto no_csum; kfree_skb(skb); @@ -2666,6 +2670,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) * Detect hardware parameters. */ msp->t_clk = (pd != NULL && pd->t_clk != 0) ? pd->t_clk : 133000000; + msp->tx_csum_limit = pd->tx_csum_limit ? pd->tx_csum_limit : 9 * 1024; infer_hw_params(msp); platform_set_drvdata(pdev, msp); diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index cbbbe9bfeca..30b0c4e78f9 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -19,6 +19,11 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; struct platform_device *shared_smi; unsigned int t_clk; + /* + * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default + * limit of 9KiB will be used. + */ + int tx_csum_limit; }; #define MV643XX_ETH_PHY_ADDR_DEFAULT 0 -- cgit v1.2.3 From b26c949755c06ec79e55a75817210083bd78fc9a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 23 Jun 2010 11:35:41 +1000 Subject: fb: fix colliding defines for fb flags. When I added the flags I must have been using a 25 line terminal and missed the following flags. The collided with flag has one user in staging despite being in-tree for 5 years. I'm happy to push this via my drm tree unless someone really wants to do it. Signed-off-by: Dave Airlie Cc: stable@kernel.org --- include/linux/fb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 907ace3a64c..8e5a9dfb76b 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -786,8 +786,6 @@ struct fb_tile_ops { #define FBINFO_MISC_USEREVENT 0x10000 /* event request from userspace */ #define FBINFO_MISC_TILEBLITTING 0x20000 /* use tile blitting */ -#define FBINFO_MISC_FIRMWARE 0x40000 /* a replaceable firmware - inited framebuffer */ /* A driver may set this flag to indicate that it does want a set_par to be * called every time when fbcon_switch is executed. The advantage is that with @@ -801,6 +799,8 @@ struct fb_tile_ops { */ #define FBINFO_MISC_ALWAYS_SETPAR 0x40000 +/* where the fb is a firmware driver, and can be replaced with a proper one */ +#define FBINFO_MISC_FIRMWARE 0x80000 /* * Host and GPU endianness differ. */ -- cgit v1.2.3 From 8c215bd3890c347dfb6a2db4779755f8b9c298a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 1 Jul 2010 09:07:17 +0200 Subject: sched: Cure nr_iowait_cpu() users Commit 0224cf4c5e (sched: Intoduce get_cpu_iowait_time_us()) broke things by not making sure preemption was indeed disabled by the callers of nr_iowait_cpu() which took the iowait value of the current cpu. This resulted in a heap of preempt warnings. Cure this by making nr_iowait_cpu() take a cpu number and fix up the callers to pass in the right number. Signed-off-by: Peter Zijlstra Cc: Arjan van de Ven Cc: Sergey Senozhatsky Cc: Rafael J. Wysocki Cc: Maxim Levitsky Cc: Len Brown Cc: Pavel Machek Cc: Jiri Slaby Cc: linux-pm@lists.linux-foundation.org LKML-Reference: <1277968037.1868.120.camel@laptop> Signed-off-by: Ingo Molnar --- drivers/cpuidle/governors/menu.c | 4 ++-- include/linux/sched.h | 2 +- kernel/sched.c | 4 ++-- kernel/time/tick-sched.c | 16 ++++++++-------- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 52ff8aa63f8..1b128702d30 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -143,7 +143,7 @@ static inline int which_bucket(unsigned int duration) * This allows us to calculate * E(duration)|iowait */ - if (nr_iowait_cpu()) + if (nr_iowait_cpu(smp_processor_id())) bucket = BUCKETS/2; if (duration < 10) @@ -175,7 +175,7 @@ static inline int performance_multiplier(void) mult += 2 * get_loadavg(); /* for IO wait tasks (per cpu!) we add 5x each */ - mult += 10 * nr_iowait_cpu(); + mult += 10 * nr_iowait_cpu(smp_processor_id()); return mult; } diff --git a/include/linux/sched.h b/include/linux/sched.h index f118809c953..747fcaedddb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -139,7 +139,7 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); -extern unsigned long nr_iowait_cpu(void); +extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long this_cpu_load(void); diff --git a/kernel/sched.c b/kernel/sched.c index a24d6d5d83f..f87abe3b017 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2864,9 +2864,9 @@ unsigned long nr_iowait(void) return sum; } -unsigned long nr_iowait_cpu(void) +unsigned long nr_iowait_cpu(int cpu) { - struct rq *this = this_rq(); + struct rq *this = cpu_rq(cpu); return atomic_read(&this->nr_iowait); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1d7b9bc1c03..1a6f828e57a 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -154,14 +154,14 @@ static void tick_nohz_update_jiffies(ktime_t now) * Updates the per cpu time idle statistics counters */ static void -update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time) +update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time) { ktime_t delta; if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - if (nr_iowait_cpu() > 0) + if (nr_iowait_cpu(cpu) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); ts->idle_entrytime = now; } @@ -175,19 +175,19 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - update_ts_time_stats(ts, now, NULL); + update_ts_time_stats(cpu, ts, now, NULL); ts->idle_active = 0; sched_clock_idle_wakeup_event(0); } -static ktime_t tick_nohz_start_idle(struct tick_sched *ts) +static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) { ktime_t now; now = ktime_get(); - update_ts_time_stats(ts, now, NULL); + update_ts_time_stats(cpu, ts, now, NULL); ts->idle_entrytime = now; ts->idle_active = 1; @@ -216,7 +216,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) if (!tick_nohz_enabled) return -1; - update_ts_time_stats(ts, ktime_get(), last_update_time); + update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); return ktime_to_us(ts->idle_sleeptime); } @@ -242,7 +242,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) if (!tick_nohz_enabled) return -1; - update_ts_time_stats(ts, ktime_get(), last_update_time); + update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); return ktime_to_us(ts->iowait_sleeptime); } @@ -284,7 +284,7 @@ void tick_nohz_stop_sched_tick(int inidle) */ ts->inidle = 1; - now = tick_nohz_start_idle(ts); + now = tick_nohz_start_idle(cpu, ts); /* * If this cpu is offline and it is the one which updates -- cgit v1.2.3 From c6353b4520788e34098bbf61c73fb9618ca7fdd6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 17 Jun 2010 11:42:22 +0200 Subject: ahci,ata_generic: let ata_generic handle new MBP w/ MCP89 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For yet unknown reason, MCP89 on MBP 7,1 doesn't work w/ ahci under linux but the controller doesn't require explicit mode setting and works fine with ata_generic. Make ahci ignore the controller on MBP 7,1 and let ata_generic take it for now. Reported in bko#15923. https://bugzilla.kernel.org/show_bug.cgi?id=15923 NVIDIA is investigating why ahci mode doesn't work. Signed-off-by: Tejun Heo Cc: Peer Chen Cc: stable@kernel.org Reported-by: Anders Østhus Reported-by: Andreas Graf Reported-by: Benoit Gschwind Reported-by: Damien Cassou Reported-by: tixetsal@juno.com Signed-off-by: Jeff Garzik --- drivers/ata/ahci.c | 10 ++++++++++ drivers/ata/ata_generic.c | 6 ++++++ include/linux/pci_ids.h | 1 + 3 files changed, 17 insertions(+) (limited to 'include') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 8ca16f54e1e..f2522534ae6 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1053,6 +1053,16 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (pdev->vendor == PCI_VENDOR_ID_MARVELL && !marvell_enable) return -ENODEV; + /* + * For some reason, MCP89 on MacBook 7,1 doesn't work with + * ahci, use ata_generic instead. + */ + if (pdev->vendor == PCI_VENDOR_ID_NVIDIA && + pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA && + pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE && + pdev->subsystem_device == 0xcb89) + return -ENODEV; + /* Promise's PDC42819 is a SAS/SATA controller that has an AHCI mode. * At the moment, we can only use the AHCI mode. Let the users know * that for SAS drives they're out of luck. diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c index 573158a9668..d4ccf74c4c9 100644 --- a/drivers/ata/ata_generic.c +++ b/drivers/ata/ata_generic.c @@ -168,6 +168,12 @@ static struct pci_device_id ata_generic[] = { { PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C561), }, { PCI_DEVICE(PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558), }, { PCI_DEVICE(PCI_VENDOR_ID_CENATEK,PCI_DEVICE_ID_CENATEK_IDE), }, + /* + * For some reason, MCP89 on MacBook 7,1 doesn't work with + * ahci, use ata_generic instead. + */ + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA, + PCI_VENDOR_ID_APPLE, 0xcb89, }, #if !defined(CONFIG_PATA_TOSHIBA) && !defined(CONFIG_PATA_TOSHIBA_MODULE) { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), }, { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4eb467910a4..3bedcc149c8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1261,6 +1261,7 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE 0x0759 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS 0x07D8 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS 0x0AA2 +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA 0x0D85 #define PCI_VENDOR_ID_IMS 0x10e0 #define PCI_DEVICE_ID_IMS_TT128 0x9128 -- cgit v1.2.3 From 4ef6acff83222f4496ceef7d1f0ee9e50a5bb403 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 1 Jul 2010 13:21:35 +0000 Subject: sched: qdisc_reset_all_tx is calling qdisc_reset without qdisc_lock When calling qdisc_reset() the qdisc lock needs to be held. In this case there is at least one driver i4l which is using this without holding the lock. Add the locking here. Signed-off-by: John Fastabend Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/net/sch_generic.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 03ca5d82675..ba749be1e35 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -317,8 +317,16 @@ extern void tcf_destroy_chain(struct tcf_proto **fl); static inline void qdisc_reset_all_tx(struct net_device *dev) { unsigned int i; - for (i = 0; i < dev->num_tx_queues; i++) - qdisc_reset(netdev_get_tx_queue(dev, i)->qdisc); + struct Qdisc *qdisc; + + for (i = 0; i < dev->num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); + } + } } /* Are all TX queues of the device empty? */ -- cgit v1.2.3 From f0796d5c73e59786d09a1e617689d1d415f2db44 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 1 Jul 2010 13:21:57 +0000 Subject: net: decreasing real_num_tx_queues needs to flush qdisc Reducing real_num_queues needs to flush the qdisc otherwise skbs with queue_mappings greater then real_num_tx_queues can be sent to the underlying driver. The flow for this is, dev_queue_xmit() dev_pick_tx() skb_tx_hash() => hash using real_num_tx_queues skb_set_queue_mapping() ... qdisc_enqueue_root() => enqueue skb on txq from hash ... dev->real_num_tx_queues -= n ... sch_direct_xmit() dev_hard_start_xmit() ndo_start_xmit(skb,dev) => skb queue set with old hash skbs are enqueued on the qdisc with skb->queue_mapping set 0 < queue_mappings < real_num_tx_queues. When the driver decreases real_num_tx_queues skb's may be dequeued from the qdisc with a queue_mapping greater then real_num_tx_queues. This fixes a case in ixgbe where this was occurring with DCB and FCoE. Because the driver is using queue_mapping to map skbs to tx descriptor rings we can potentially map skbs to rings that no longer exist. Signed-off-by: John Fastabend Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 2 +- include/linux/netdevice.h | 3 +++ include/net/sch_generic.h | 12 ++++++++---- net/core/dev.c | 18 ++++++++++++++++++ 4 files changed, 30 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index a0b33165b98..7b5d9764f31 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -4001,7 +4001,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) done: /* Notify the stack of the (possibly) reduced Tx Queue count. */ - adapter->netdev->real_num_tx_queues = adapter->num_tx_queues; + netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues); } static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 40291f37502..5e6188d9f01 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1656,6 +1656,9 @@ static inline int netif_is_multiqueue(const struct net_device *dev) return (dev->num_tx_queues > 1); } +extern void netif_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq); + /* Use this variant when it is known for sure that it * is executing from hardware interrupt context or with hardware interrupts * disabled. diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ba749be1e35..433604bb3fe 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -313,13 +313,12 @@ extern void qdisc_calculate_pkt_len(struct sk_buff *skb, extern void tcf_destroy(struct tcf_proto *tp); extern void tcf_destroy_chain(struct tcf_proto **fl); -/* Reset all TX qdiscs of a device. */ -static inline void qdisc_reset_all_tx(struct net_device *dev) +/* Reset all TX qdiscs greater then index of a device. */ +static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { - unsigned int i; struct Qdisc *qdisc; - for (i = 0; i < dev->num_tx_queues; i++) { + for (; i < dev->num_tx_queues; i++) { qdisc = netdev_get_tx_queue(dev, i)->qdisc; if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); @@ -329,6 +328,11 @@ static inline void qdisc_reset_all_tx(struct net_device *dev) } } +static inline void qdisc_reset_all_tx(struct net_device *dev) +{ + qdisc_reset_all_tx_gt(dev, 0); +} + /* Are all TX queues of the device empty? */ static inline bool qdisc_all_tx_empty(const struct net_device *dev) { diff --git a/net/core/dev.c b/net/core/dev.c index 2b3bf53bc68..723a34710ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1553,6 +1553,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } +/* + * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues + * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. + */ +void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) +{ + unsigned int real_num = dev->real_num_tx_queues; + + if (unlikely(txq > dev->num_tx_queues)) + ; + else if (txq > real_num) + dev->real_num_tx_queues = txq; + else if (txq < real_num) { + dev->real_num_tx_queues = txq; + qdisc_reset_all_tx_gt(dev, txq); + } +} +EXPORT_SYMBOL(netif_set_real_num_tx_queues); static inline void __netif_reschedule(struct Qdisc *q) { -- cgit v1.2.3 From e2aec372ff4b7e78e79c308104a860ae0ed20950 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 1 Jul 2010 13:18:58 +0000 Subject: linux/net.h: fix kernel-doc warnings Fix kernel-doc warnings in linux/net.h: Warning(include/linux/net.h:151): No description found for parameter 'wq' Warning(include/linux/net.h:151): Excess struct/union/enum/typedef member 'fasync_list' description in 'socket' Warning(include/linux/net.h:151): Excess struct/union/enum/typedef member 'wait' description in 'socket' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/net.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 2b4deeeb864..dee0b11a875 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -129,10 +129,9 @@ struct socket_wq { * @type: socket type (%SOCK_STREAM, etc) * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc) * @ops: protocol specific socket operations - * @fasync_list: Asynchronous wake up list * @file: File back pointer for gc * @sk: internal networking protocol agnostic socket representation - * @wait: wait queue for several uses + * @wq: wait queue for several uses */ struct socket { socket_state state; -- cgit v1.2.3 From ff49d74ad383f54041378144ca1a229ee9aeaa59 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Sat, 3 Jul 2010 13:07:35 +1000 Subject: module: initialize module dynamic debug later We should initialize the module dynamic debug datastructures only after determining that the module is not loaded yet. This fixes a bug that introduced in 2.6.35-rc2, where when a trying to load a module twice, we also load it's dynamic printing data twice which causes all sorts of nasty issues. Also handle the dynamic debug cleanup later on failure. Signed-off-by: Yehuda Sadeh Signed-off-by: Rusty Russell (removed a #ifdef) Signed-off-by: Linus Torvalds --- include/linux/dynamic_debug.h | 4 ++-- kernel/module.c | 23 +++++++++++++++-------- lib/dynamic_debug.c | 2 +- 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index b3cd4de9432..52c0da4bdd1 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -40,7 +40,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *modname); #if defined(CONFIG_DYNAMIC_DEBUG) -extern int ddebug_remove_module(char *mod_name); +extern int ddebug_remove_module(const char *mod_name); #define __dynamic_dbg_enabled(dd) ({ \ int __ret = 0; \ @@ -73,7 +73,7 @@ extern int ddebug_remove_module(char *mod_name); #else -static inline int ddebug_remove_module(char *mod) +static inline int ddebug_remove_module(const char *mod) { return 0; } diff --git a/kernel/module.c b/kernel/module.c index 8c6b42840dd..5d2d28197c8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2062,6 +2062,12 @@ static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) #endif } +static void dynamic_debug_remove(struct _ddebug *debug) +{ + if (debug) + ddebug_remove_module(debug->modname); +} + static void *module_alloc_update_bounds(unsigned long size) { void *ret = module_alloc(size); @@ -2124,6 +2130,8 @@ static noinline struct module *load_module(void __user *umod, void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; void __percpu *percpu; + struct _ddebug *debug = NULL; + unsigned int num_debug = 0; mm_segment_t old_fs; @@ -2476,15 +2484,9 @@ static noinline struct module *load_module(void __user *umod, kfree(strmap); strmap = NULL; - if (!mod->taints) { - struct _ddebug *debug; - unsigned int num_debug; - + if (!mod->taints) debug = section_objs(hdr, sechdrs, secstrings, "__verbose", sizeof(*debug), &num_debug); - if (debug) - dynamic_debug_setup(debug, num_debug); - } err = module_finalize(hdr, sechdrs, mod); if (err < 0) @@ -2526,10 +2528,13 @@ static noinline struct module *load_module(void __user *umod, goto unlock; } + if (debug) + dynamic_debug_setup(debug, num_debug); + /* Find duplicate symbols */ err = verify_export_symbols(mod); if (err < 0) - goto unlock; + goto ddebug; list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2557,6 +2562,8 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + ddebug: + dynamic_debug_remove(debug); unlock: mutex_unlock(&module_mutex); synchronize_sched(); diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 3df8eb17a60..02afc253372 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -692,7 +692,7 @@ static void ddebug_table_free(struct ddebug_table *dt) * Called in response to a module being unloaded. Removes * any ddebug_table's which point at the module. */ -int ddebug_remove_module(char *mod_name) +int ddebug_remove_module(const char *mod_name) { struct ddebug_table *dt, *nextdt; int ret = -ENOENT; -- cgit v1.2.3 From b945d6b2554d550fe95caadc61e521c0ad71fb9c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 May 2010 15:31:43 +0200 Subject: rbtree: Undo augmented trees performance damage and regression Reimplement augmented RB-trees without sprinkling extra branches all over the RB-tree code (which lives in the scheduler hot path). This approach is 'borrowed' from Fabio's BFQ implementation and relies on traversing the rebalance path after the RB-tree-op to correct the heap property for insertion/removal and make up for the damage done by the tree rotations. For insertion the rebalance path is trivially that from the new node upwards to the root, for removal it is that from the deepest node in the path from the to be removed node that will still be around after the removal. [ This patch also fixes a video driver regression reported by Ali Gholami Rudi - the memtype->subtree_max_end was updated incorrectly. ] Acked-by: Suresh Siddha Acked-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra Tested-by: Ali Gholami Rudi Cc: Fabio Checconi Cc: "H. Peter Anvin" Cc: Andrew Morton Cc: Linus Torvalds LKML-Reference: <1275414172.27810.27961.camel@twins> Signed-off-by: Ingo Molnar --- arch/x86/mm/pat_rbtree.c | 34 +++----------- include/linux/rbtree.h | 13 ++++-- lib/rbtree.c | 116 +++++++++++++++++++++++++++++------------------ 3 files changed, 87 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index f20eeec85a8..8acaddd0fb2 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -34,8 +34,7 @@ * memtype_lock protects the rbtree. */ -static void memtype_rb_augment_cb(struct rb_node *node); -static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); +static struct rb_root memtype_rbroot = RB_ROOT; static int is_node_overlap(struct memtype *node, u64 start, u64 end) { @@ -56,7 +55,7 @@ static u64 get_subtree_max_end(struct rb_node *node) } /* Update 'subtree_max_end' for a node, based on node and its children */ -static void update_node_max_end(struct rb_node *node) +static void memtype_rb_augment_cb(struct rb_node *node, void *__unused) { struct memtype *data; u64 max_end, child_max_end; @@ -78,25 +77,6 @@ static void update_node_max_end(struct rb_node *node) data->subtree_max_end = max_end; } -/* Update 'subtree_max_end' for a node and all its ancestors */ -static void update_path_max_end(struct rb_node *node) -{ - u64 old_max_end, new_max_end; - - while (node) { - struct memtype *data = container_of(node, struct memtype, rb); - - old_max_end = data->subtree_max_end; - update_node_max_end(node); - new_max_end = data->subtree_max_end; - - if (new_max_end == old_max_end) - break; - - node = rb_parent(node); - } -} - /* Find the first (lowest start addr) overlapping range from rb tree */ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, u64 start, u64 end) @@ -190,12 +170,6 @@ failure: return -EBUSY; } -static void memtype_rb_augment_cb(struct rb_node *node) -{ - if (node) - update_path_max_end(node); -} - static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) { struct rb_node **node = &(root->rb_node); @@ -213,6 +187,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) rb_link_node(&newdata->rb, parent, node); rb_insert_color(&newdata->rb, root); + rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL); } int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) @@ -234,13 +209,16 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) struct memtype *rbt_memtype_erase(u64 start, u64 end) { + struct rb_node *deepest; struct memtype *data; data = memtype_rb_exact_match(&memtype_rbroot, start, end); if (!data) goto out; + deepest = rb_augment_erase_begin(&data->rb); rb_erase(&data->rb, &memtype_rbroot); + rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL); out: return data; } diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index fe1872e5b37..7066acb2c53 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -110,7 +110,6 @@ struct rb_node struct rb_root { struct rb_node *rb_node; - void (*augment_cb)(struct rb_node *node); }; @@ -130,9 +129,7 @@ static inline void rb_set_color(struct rb_node *rb, int color) rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; } -#define RB_ROOT (struct rb_root) { NULL, NULL, } -#define RB_AUGMENT_ROOT(x) (struct rb_root) { NULL, x} - +#define RB_ROOT (struct rb_root) { NULL, } #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) @@ -142,6 +139,14 @@ static inline void rb_set_color(struct rb_node *rb, int color) extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); +typedef void (*rb_augment_f)(struct rb_node *node, void *data); + +extern void rb_augment_insert(struct rb_node *node, + rb_augment_f func, void *data); +extern struct rb_node *rb_augment_erase_begin(struct rb_node *node); +extern void rb_augment_erase_end(struct rb_node *node, + rb_augment_f func, void *data); + /* Find logical next and previous nodes in a tree */ extern struct rb_node *rb_next(const struct rb_node *); extern struct rb_node *rb_prev(const struct rb_node *); diff --git a/lib/rbtree.c b/lib/rbtree.c index 15e10b1afdd..4693f79195d 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -44,11 +44,6 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) else root->rb_node = right; rb_set_parent(node, right); - - if (root->augment_cb) { - root->augment_cb(node); - root->augment_cb(right); - } } static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) @@ -72,20 +67,12 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) else root->rb_node = left; rb_set_parent(node, left); - - if (root->augment_cb) { - root->augment_cb(node); - root->augment_cb(left); - } } void rb_insert_color(struct rb_node *node, struct rb_root *root) { struct rb_node *parent, *gparent; - if (root->augment_cb) - root->augment_cb(node); - while ((parent = rb_parent(node)) && rb_is_red(parent)) { gparent = rb_parent(parent); @@ -240,15 +227,12 @@ void rb_erase(struct rb_node *node, struct rb_root *root) else { struct rb_node *old = node, *left; - int old_parent_cb = 0; - int successor_parent_cb = 0; node = node->rb_right; while ((left = node->rb_left) != NULL) node = left; if (rb_parent(old)) { - old_parent_cb = 1; if (rb_parent(old)->rb_left == old) rb_parent(old)->rb_left = node; else @@ -263,10 +247,8 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (parent == old) { parent = node; } else { - successor_parent_cb = 1; if (child) rb_set_parent(child, parent); - parent->rb_left = child; node->rb_right = old->rb_right; @@ -277,24 +259,6 @@ void rb_erase(struct rb_node *node, struct rb_root *root) node->rb_left = old->rb_left; rb_set_parent(old->rb_left, node); - if (root->augment_cb) { - /* - * Here, three different nodes can have new children. - * The parent of the successor node that was selected - * to replace the node to be erased. - * The node that is getting erased and is now replaced - * by its successor. - * The parent of the node getting erased-replaced. - */ - if (successor_parent_cb) - root->augment_cb(parent); - - root->augment_cb(node); - - if (old_parent_cb) - root->augment_cb(rb_parent(old)); - } - goto color; } @@ -303,19 +267,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (child) rb_set_parent(child, parent); - - if (parent) { + if (parent) + { if (parent->rb_left == node) parent->rb_left = child; else parent->rb_right = child; - - if (root->augment_cb) - root->augment_cb(parent); - - } else { - root->rb_node = child; } + else + root->rb_node = child; color: if (color == RB_BLACK) @@ -323,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root) } EXPORT_SYMBOL(rb_erase); +static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data) +{ + struct rb_node *parent; + +up: + func(node, data); + parent = rb_parent(node); + if (!parent) + return; + + if (node == parent->rb_left && parent->rb_right) + func(parent->rb_right, data); + else if (parent->rb_left) + func(parent->rb_left, data); + + node = parent; + goto up; +} + +/* + * after inserting @node into the tree, update the tree to account for + * both the new entry and any damage done by rebalance + */ +void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + + rb_augment_path(node, func, data); +} + +/* + * before removing the node, find the deepest node on the rebalance path + * that will still be there after @node gets removed + */ +struct rb_node *rb_augment_erase_begin(struct rb_node *node) +{ + struct rb_node *deepest; + + if (!node->rb_right && !node->rb_left) + deepest = rb_parent(node); + else if (!node->rb_right) + deepest = node->rb_left; + else if (!node->rb_left) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/* + * after removal, update the tree to account for the removed entry + * and any rebalance damage. + */ +void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node) + rb_augment_path(node, func, data); +} + /* * This function returns the first node (in sort order) of the tree. */ -- cgit v1.2.3 From bcfcc450baaaa44afc1d3c51ef96a53338ff0eb2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 2 Jul 2010 07:08:44 +0000 Subject: net: Fix definition of netif_vdbg() when VERBOSE_DEBUG is defined netif_vdbg() was originally defined as entirely equivalent to netdev_vdbg(), but I assume that it was intended to take the same parameters as netif_dbg() etc. (Currently it is only used by the sfc driver, in which I worked on that assumption.) In commit a4ed89c I changed the definition used when VERBOSE_DEBUG is not defined, but I failed to notice that the definition used when VERBOSE_DEBUG is defined was also not as I expected. Change that to match netif_dbg() as well. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e6188d9f01..b21e4054c12 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2332,7 +2332,7 @@ do { \ #endif #if defined(VERBOSE_DEBUG) -#define netif_vdbg netdev_dbg +#define netif_vdbg netif_dbg #else #define netif_vdbg(priv, type, dev, format, args...) \ ({ \ -- cgit v1.2.3 From 9c3a8ee8a1d72c5c0d7fbdf426d80e270ddfa54c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jun 2010 12:07:27 +0200 Subject: writeback: remove writeback_inodes_wbc This was just an odd wrapper around writeback_inodes_wb. Removing this also allows to get rid of the bdi member of struct writeback_control which was rather out of place there. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/afs/write.c | 1 - fs/btrfs/extent_io.c | 2 -- fs/fs-writeback.c | 12 ++---------- include/linux/writeback.h | 5 ++--- mm/backing-dev.c | 3 +-- mm/page-writeback.c | 3 +-- 6 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/afs/write.c b/fs/afs/write.c index 3dab9e9948d..722743b152d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode) { struct address_space *mapping = vnode->vfs_inode.i_mapping; struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_cyclic = 1, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4080c21ec5..d74e6af9b53 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, .sync_mode = wbc->sync_mode, .older_than_this = NULL, .nr_to_write = 64, @@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .sync_io = mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = inode->i_mapping->backing_dev_info, .sync_mode = mode, .older_than_this = NULL, .nr_to_write = nr_pages * 2, diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6981e4b7c14..94a602e98bb 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -614,8 +614,8 @@ static int writeback_sb_inodes(struct super_block *sb, return 1; } -static void writeback_inodes_wb(struct bdi_writeback *wb, - struct writeback_control *wbc) +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc) { int ret = 0; @@ -660,13 +660,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, /* Leave any unwritten inodes on b_io */ } -void writeback_inodes_wbc(struct writeback_control *wbc) -{ - struct backing_dev_info *bdi = wbc->bdi; - - writeback_inodes_wb(&bdi->wb, wbc); -} - /* * The maximum number of pages to writeout in a single bdi flush/kupdate * operation. We do this so we don't hold I_SYNC against an inode for @@ -705,7 +698,6 @@ static long wb_writeback(struct bdi_writeback *wb, struct wb_writeback_args *args) { struct writeback_control wbc = { - .bdi = wb->bdi, .sb = args->sb, .sync_mode = args->sync_mode, .older_than_this = NULL, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d63ef8f9609..f6756f6a610 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -27,8 +27,6 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { - struct backing_dev_info *bdi; /* If !NULL, only write back this - queue */ struct super_block *sb; /* if !NULL, only write inodes from this super_block */ enum writeback_sync_modes sync_mode; @@ -66,7 +64,8 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); -void writeback_inodes_wbc(struct writeback_control *wbc); +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); void wakeup_flusher_threads(long nr_pages); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 660a87a2251..6e0b09a1ec2 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -340,14 +340,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) static void bdi_flush_io(struct backing_dev_info *bdi) { struct writeback_control wbc = { - .bdi = bdi, .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .range_cyclic = 1, .nr_to_write = 1024, }; - writeback_inodes_wbc(&wbc); + writeback_inodes_wb(&bdi->wb, &wbc); } /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 54f28bd493d..37498ef6154 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping, for (;;) { struct writeback_control wbc = { - .bdi = bdi, .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, @@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping, * up. */ if (bdi_nr_reclaimable > bdi_thresh) { - writeback_inodes_wbc(&wbc); + writeback_inodes_wb(&bdi->wb, &wbc); pages_written += write_chunk - wbc.nr_to_write; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); -- cgit v1.2.3 From edadfb10ba35da7253541e4155aa92eff758ebe6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jun 2010 12:07:54 +0200 Subject: writeback: split writeback_inodes_wb The case where we have a superblock doesn't require a loop here as we scan over all inodes in writeback_sb_inodes. Split it out into a separate helper to make the code simpler. This also allows to get rid of the sb member in struct writeback_control, which was rather out of place there. Also update the comments in writeback_sb_inodes that explain the handling of inodes from wrong superblocks. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 82 ++++++++++++++++++++++++++--------------------- include/linux/writeback.h | 2 -- 2 files changed, 46 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 94a602e98bb..8cc06d5432b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -554,29 +554,41 @@ static bool pin_sb_for_writeback(struct super_block *sb) /* * Write a portion of b_io inodes which belong to @sb. - * If @wbc->sb != NULL, then find and write all such + * + * If @only_this_sb is true, then find and write all such * inodes. Otherwise write only ones which go sequentially * in reverse order. + * * Return 1, if the caller writeback routine should be * interrupted. Otherwise return 0. */ -static int writeback_sb_inodes(struct super_block *sb, - struct bdi_writeback *wb, - struct writeback_control *wbc) +static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, + struct writeback_control *wbc, bool only_this_sb) { while (!list_empty(&wb->b_io)) { long pages_skipped; struct inode *inode = list_entry(wb->b_io.prev, struct inode, i_list); - if (wbc->sb && sb != inode->i_sb) { - /* super block given and doesn't - match, skip this inode */ - redirty_tail(inode); - continue; - } - if (sb != inode->i_sb) - /* finish with this superblock */ + + if (inode->i_sb != sb) { + if (only_this_sb) { + /* + * We only want to write back data for this + * superblock, move all inodes not belonging + * to it back onto the dirty list. + */ + redirty_tail(inode); + continue; + } + + /* + * The inode belongs to a different superblock. + * Bounce back to the caller to unpin this and + * pin the next superblock. + */ return 0; + } + if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); continue; @@ -629,29 +641,12 @@ void writeback_inodes_wb(struct bdi_writeback *wb, struct inode, i_list); struct super_block *sb = inode->i_sb; - if (wbc->sb) { - /* - * We are requested to write out inodes for a specific - * superblock. This means we already have s_umount - * taken by the caller which also waits for us to - * complete the writeout. - */ - if (sb != wbc->sb) { - redirty_tail(inode); - continue; - } - - WARN_ON(!rwsem_is_locked(&sb->s_umount)); - - ret = writeback_sb_inodes(sb, wb, wbc); - } else { - if (!pin_sb_for_writeback(sb)) { - requeue_io(inode); - continue; - } - ret = writeback_sb_inodes(sb, wb, wbc); - drop_super(sb); + if (!pin_sb_for_writeback(sb)) { + requeue_io(inode); + continue; } + ret = writeback_sb_inodes(sb, wb, wbc, false); + drop_super(sb); if (ret) break; @@ -660,6 +655,19 @@ void writeback_inodes_wb(struct bdi_writeback *wb, /* Leave any unwritten inodes on b_io */ } +static void __writeback_inodes_sb(struct super_block *sb, + struct bdi_writeback *wb, struct writeback_control *wbc) +{ + WARN_ON(!rwsem_is_locked(&sb->s_umount)); + + wbc->wb_start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&wb->b_io)) + queue_io(wb, wbc->older_than_this); + writeback_sb_inodes(sb, wb, wbc, true); + spin_unlock(&inode_lock); +} + /* * The maximum number of pages to writeout in a single bdi flush/kupdate * operation. We do this so we don't hold I_SYNC against an inode for @@ -698,7 +706,6 @@ static long wb_writeback(struct bdi_writeback *wb, struct wb_writeback_args *args) { struct writeback_control wbc = { - .sb = args->sb, .sync_mode = args->sync_mode, .older_than_this = NULL, .for_kupdate = args->for_kupdate, @@ -736,7 +743,10 @@ static long wb_writeback(struct bdi_writeback *wb, wbc.more_io = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; - writeback_inodes_wb(wb, &wbc); + if (args->sb) + __writeback_inodes_sb(args->sb, wb, &wbc); + else + writeback_inodes_wb(wb, &wbc); args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f6756f6a610..c24eca71e80 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -27,8 +27,6 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { - struct super_block *sb; /* if !NULL, only write inodes from - this super_block */ enum writeback_sync_modes sync_mode; unsigned long *older_than_this; /* If !NULL, only write back inodes older than this */ -- cgit v1.2.3 From 83ba7b071f30f7c01f72518ad72d5cd203c27502 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Jul 2010 08:59:53 +0200 Subject: writeback: simplify the write back thread queue First remove items from work_list as soon as we start working on them. This means we don't have to track any pending or visited state and can get rid of all the RCU magic freeing the work items - we can simply free them once the operation has finished. Second use a real completion for tracking synchronous requests - if the caller sets the completion pointer we complete it, otherwise use it as a boolean indicator that we can free the work item directly. Third unify struct wb_writeback_args and struct bdi_work into a single data structure, wb_writeback_work. Previous we set all parameters into a struct wb_writeback_args, copied it into struct bdi_work, copied it again on the stack to use it there. Instead of just allocate one structure dynamically or on the stack and use it all the way through the stack. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 253 ++++++++++++-------------------------------- include/linux/backing-dev.h | 2 - mm/backing-dev.c | 14 +-- 3 files changed, 72 insertions(+), 197 deletions(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8cc06d5432b..d5be1693ac9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -38,43 +38,18 @@ int nr_pdflush_threads; /* * Passed into wb_writeback(), essentially a subset of writeback_control */ -struct wb_writeback_args { +struct wb_writeback_work { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; -}; -/* - * Work items for the bdi_writeback threads - */ -struct bdi_work { struct list_head list; /* pending work list */ - struct rcu_head rcu_head; /* for RCU free/clear of work */ - - unsigned long seen; /* threads that have seen this work */ - atomic_t pending; /* number of threads still to do work */ - - struct wb_writeback_args args; /* writeback arguments */ - - unsigned long state; /* flag bits, see WS_* */ -}; - -enum { - WS_INPROGRESS = 0, - WS_ONSTACK, + struct completion *done; /* set if the caller waits */ }; -static inline void bdi_work_init(struct bdi_work *work, - struct wb_writeback_args *args) -{ - INIT_RCU_HEAD(&work->rcu_head); - work->args = *args; - __set_bit(WS_INPROGRESS, &work->state); -} - /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -87,49 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) return !list_empty(&bdi->work_list); } -static void bdi_work_free(struct rcu_head *head) -{ - struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); - - clear_bit(WS_INPROGRESS, &work->state); - smp_mb__after_clear_bit(); - wake_up_bit(&work->state, WS_INPROGRESS); - - if (!test_bit(WS_ONSTACK, &work->state)) - kfree(work); -} - -static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) -{ - /* - * The caller has retrieved the work arguments from this work, - * drop our reference. If this is the last ref, delete and free it - */ - if (atomic_dec_and_test(&work->pending)) { - struct backing_dev_info *bdi = wb->bdi; - - spin_lock(&bdi->wb_lock); - list_del_rcu(&work->list); - spin_unlock(&bdi->wb_lock); - - call_rcu(&work->rcu_head, bdi_work_free); - } -} - -static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) +static void bdi_queue_work(struct backing_dev_info *bdi, + struct wb_writeback_work *work) { - work->seen = bdi->wb_mask; - BUG_ON(!work->seen); - atomic_set(&work->pending, bdi->wb_cnt); - BUG_ON(!bdi->wb_cnt); - - /* - * list_add_tail_rcu() contains the necessary barriers to - * make sure the above stores are seen before the item is - * noticed on the list - */ spin_lock(&bdi->wb_lock); - list_add_tail_rcu(&work->list, &bdi->work_list); + list_add_tail(&work->list, &bdi->work_list); spin_unlock(&bdi->wb_lock); /* @@ -146,55 +83,29 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) } } -/* - * Used for on-stack allocated work items. The caller needs to wait until - * the wb threads have acked the work before it's safe to continue. - */ -static void bdi_wait_on_work_done(struct bdi_work *work) -{ - wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait, - TASK_UNINTERRUPTIBLE); -} - -static void bdi_alloc_queue_work(struct backing_dev_info *bdi, - struct wb_writeback_args *args) +static void +__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, + bool range_cyclic, bool for_background) { - struct bdi_work *work; + struct wb_writeback_work *work; /* * This is WB_SYNC_NONE writeback, so if allocation fails just * wakeup the thread for old dirty data writeback */ - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - bdi_work_init(work, args); - bdi_queue_work(bdi, work); - } else { - struct bdi_writeback *wb = &bdi->wb; - - if (wb->task) - wake_up_process(wb->task); + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + if (bdi->wb.task) + wake_up_process(bdi->wb.task); + return; } -} -/** - * bdi_queue_work_onstack - start and wait for writeback - * @args: parameters to control the work queue writeback - * - * Description: - * This function initiates writeback and waits for the operation to - * complete. Callers must hold the sb s_umount semaphore for - * reading, to avoid having the super disappear before we are done. - */ -static void bdi_queue_work_onstack(struct wb_writeback_args *args) -{ - struct bdi_work work; + work->sync_mode = WB_SYNC_NONE; + work->nr_pages = nr_pages; + work->range_cyclic = range_cyclic; + work->for_background = for_background; - bdi_work_init(&work, args); - __set_bit(WS_ONSTACK, &work.state); - - bdi_queue_work(args->sb->s_bdi, &work); - bdi_wait_on_work_done(&work); + bdi_queue_work(bdi, work); } /** @@ -210,13 +121,7 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args) */ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) { - struct wb_writeback_args args = { - .sync_mode = WB_SYNC_NONE, - .nr_pages = nr_pages, - .range_cyclic = 1, - }; - - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, nr_pages, true, false); } /** @@ -230,13 +135,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) */ void bdi_start_background_writeback(struct backing_dev_info *bdi) { - struct wb_writeback_args args = { - .sync_mode = WB_SYNC_NONE, - .nr_pages = LONG_MAX, - .for_background = 1, - .range_cyclic = 1, - }; - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, LONG_MAX, true, true); } /* @@ -703,14 +602,14 @@ static inline bool over_bground_thresh(void) * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, - struct wb_writeback_args *args) + struct wb_writeback_work *work) { struct writeback_control wbc = { - .sync_mode = args->sync_mode, + .sync_mode = work->sync_mode, .older_than_this = NULL, - .for_kupdate = args->for_kupdate, - .for_background = args->for_background, - .range_cyclic = args->range_cyclic, + .for_kupdate = work->for_kupdate, + .for_background = work->for_background, + .range_cyclic = work->range_cyclic, }; unsigned long oldest_jif; long wrote = 0; @@ -730,24 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, /* * Stop writeback when nr_pages has been consumed */ - if (args->nr_pages <= 0) + if (work->nr_pages <= 0) break; /* * For background writeout, stop when we are below the * background dirty threshold */ - if (args->for_background && !over_bground_thresh()) + if (work->for_background && !over_bground_thresh()) break; wbc.more_io = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; - if (args->sb) - __writeback_inodes_sb(args->sb, wb, &wbc); + if (work->sb) + __writeback_inodes_sb(work->sb, wb, &wbc); else writeback_inodes_wb(wb, &wbc); - args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; + work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; /* @@ -783,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, } /* - * Return the next bdi_work struct that hasn't been processed by this - * wb thread yet. ->seen is initially set for each thread that exists - * for this device, when a thread first notices a piece of work it - * clears its bit. Depending on writeback type, the thread will notify - * completion on either receiving the work (WB_SYNC_NONE) or after - * it is done (WB_SYNC_ALL). + * Return the next wb_writeback_work struct that hasn't been processed yet. */ -static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, - struct bdi_writeback *wb) +static struct wb_writeback_work * +get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) { - struct bdi_work *work, *ret = NULL; - - rcu_read_lock(); - - list_for_each_entry_rcu(work, &bdi->work_list, list) { - if (!test_bit(wb->nr, &work->seen)) - continue; - clear_bit(wb->nr, &work->seen); + struct wb_writeback_work *work = NULL; - ret = work; - break; + spin_lock(&bdi->wb_lock); + if (!list_empty(&bdi->work_list)) { + work = list_entry(bdi->work_list.next, + struct wb_writeback_work, list); + list_del_init(&work->list); } - - rcu_read_unlock(); - return ret; + spin_unlock(&bdi->wb_lock); + return work; } static long wb_check_old_data_flush(struct bdi_writeback *wb) @@ -832,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) (inodes_stat.nr_inodes - inodes_stat.nr_unused); if (nr_pages) { - struct wb_writeback_args args = { + struct wb_writeback_work work = { .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, .for_kupdate = 1, .range_cyclic = 1, }; - return wb_writeback(wb, &args); + return wb_writeback(wb, &work); } return 0; @@ -851,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) long wb_do_writeback(struct bdi_writeback *wb, int force_wait) { struct backing_dev_info *bdi = wb->bdi; - struct bdi_work *work; + struct wb_writeback_work *work; long wrote = 0; while ((work = get_next_work_item(bdi, wb)) != NULL) { - struct wb_writeback_args args = work->args; - /* * Override sync mode, in case we must wait for completion + * because this thread is exiting now. */ if (force_wait) - work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; + work->sync_mode = WB_SYNC_ALL; - /* - * If this isn't a data integrity operation, just notify - * that we have seen this work and we are now starting it. - */ - if (!test_bit(WS_ONSTACK, &work->state)) - wb_clear_pending(wb, work); - - wrote += wb_writeback(wb, &args); + wrote += wb_writeback(wb, work); /* - * This is a data integrity writeback, so only do the - * notification when we have completed the work. + * Notify the caller of completion if this is a synchronous + * work item, otherwise just free it. */ - if (test_bit(WS_ONSTACK, &work->state)) - wb_clear_pending(wb, work); + if (work->done) + complete(work->done); + else + kfree(work); } /* @@ -940,14 +823,9 @@ int bdi_writeback_task(struct bdi_writeback *wb) void wakeup_flusher_threads(long nr_pages) { struct backing_dev_info *bdi; - struct wb_writeback_args args = { - .sync_mode = WB_SYNC_NONE, - }; - if (nr_pages) { - args.nr_pages = nr_pages; - } else { - args.nr_pages = global_page_state(NR_FILE_DIRTY) + + if (!nr_pages) { + nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); } @@ -955,7 +833,7 @@ void wakeup_flusher_threads(long nr_pages) list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - bdi_alloc_queue_work(bdi, &args); + __bdi_start_writeback(bdi, nr_pages, false, false); } rcu_read_unlock(); } @@ -1164,17 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) { unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - struct wb_writeback_args args = { + DECLARE_COMPLETION_ONSTACK(done); + struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_NONE, + .done = &done, }; WARN_ON(!rwsem_is_locked(&sb->s_umount)); - args.nr_pages = nr_dirty + nr_unstable + + work.nr_pages = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_queue_work_onstack(&args); + bdi_queue_work(sb->s_bdi, &work); + wait_for_completion(&done); } EXPORT_SYMBOL(writeback_inodes_sb); @@ -1206,16 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); */ void sync_inodes_sb(struct super_block *sb) { - struct wb_writeback_args args = { + DECLARE_COMPLETION_ONSTACK(done); + struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, + .done = &done, }; WARN_ON(!rwsem_is_locked(&sb->s_umount)); - bdi_queue_work_onstack(&args); + bdi_queue_work(sb->s_bdi, &work); + wait_for_completion(&done); + wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9ae2889096b..e9aec0d099d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -82,8 +82,6 @@ struct backing_dev_info { struct bdi_writeback wb; /* default writeback info for this bdi */ spinlock_t wb_lock; /* protects update side of wb_list */ struct list_head wb_list; /* the flusher threads hanging off this bdi */ - unsigned long wb_mask; /* bitmask of registered tasks */ - unsigned int wb_cnt; /* number of registered tasks */ struct list_head work_list; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6e0b09a1ec2..123bcef13e5 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -104,15 +104,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) "b_more_io: %8lu\n" "bdi_list: %8u\n" "state: %8lx\n" - "wb_mask: %8lx\n" - "wb_list: %8u\n" - "wb_cnt: %8u\n", + "wb_list: %8u\n", (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), K(bdi_thresh), K(dirty_thresh), K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, - !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask, - !list_empty(&bdi->wb_list), bdi->wb_cnt); + !list_empty(&bdi->bdi_list), bdi->state, + !list_empty(&bdi->wb_list)); #undef K return 0; @@ -674,12 +672,6 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); - /* - * Just one thread support for now, hard code mask and count - */ - bdi->wb_mask = 1; - bdi->wb_cnt = 1; - for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init(&bdi->bdi_stat[i], 0); if (err) -- cgit v1.2.3 From 140236b4b1c749c9b795ea3d11558a0eb5a3a080 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 10 Jun 2010 13:56:33 +0300 Subject: VFS: introduce s_dirty accessors This patch introduces 3 VFS accessors: 'sb_mark_dirty()', 'sb_mark_clean()', and 'sb_is_dirty()'. They simply set 'sb->s_dirt' or test 'sb->s_dirt'. The plan is to make every FS use these accessors later instead of manipulating the 'sb->s_dirt' flag directly. Ultimately, this change is a preparation for the periodic superblock synchronization optimization which is about preventing the "sync_supers" kernel thread from waking up even if there is nothing to synchronize. This patch does not do any functional change, just adds accessor functions. Signed-off-by: Artem Bityutskiy Signed-off-by: Linus Torvalds --- include/linux/fs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 471e1ff5079..68ca1b0491a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1783,6 +1783,19 @@ extern int get_sb_pseudo(struct file_system_type *, char *, struct vfsmount *mnt); extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); +static inline void sb_mark_dirty(struct super_block *sb) +{ + sb->s_dirt = 1; +} +static inline void sb_mark_clean(struct super_block *sb) +{ + sb->s_dirt = 0; +} +static inline int sb_is_dirty(struct super_block *sb) +{ + return sb->s_dirt; +} + /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) -- cgit v1.2.3 From 5870a4d97da136908ca477e3a21bc9f4c2705161 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 4 Jul 2010 04:03:07 +0200 Subject: drm/ttm: Allocate the page pool manager in the heap. Repeated ttm_page_alloc_init/fini fails noisily because the pool manager kobj isn't zeroed out between uses (we could do just that but statically allocated kobjects are generally considered a bad thing). Move it to kzalloc'ed memory. Note that this patch drops the refcounting behavior of the pool allocator init/fini functions: it would have led to a race condition in its current form, and anyway it was never exploited. This fixes a regression with reloading kms modules at runtime, since page allocator was introduced. Signed-off-by: Francisco Jerez Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 68 +++++++++++++++++------------------- include/drm/ttm/ttm_page_alloc.h | 4 --- 2 files changed, 33 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 2f047577b1e..b1d67dc973d 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -104,7 +104,6 @@ struct ttm_pool_opts { struct ttm_pool_manager { struct kobject kobj; struct shrinker mm_shrink; - atomic_t page_alloc_inited; struct ttm_pool_opts options; union { @@ -142,7 +141,7 @@ static void ttm_pool_kobj_release(struct kobject *kobj) { struct ttm_pool_manager *m = container_of(kobj, struct ttm_pool_manager, kobj); - (void)m; + kfree(m); } static ssize_t ttm_pool_store(struct kobject *kobj, @@ -214,9 +213,7 @@ static struct kobj_type ttm_pool_kobj_type = { .default_attrs = ttm_pool_attrs, }; -static struct ttm_pool_manager _manager = { - .page_alloc_inited = ATOMIC_INIT(0) -}; +static struct ttm_pool_manager *_manager; #ifndef CONFIG_X86 static int set_pages_array_wb(struct page **pages, int addrinarray) @@ -271,7 +268,7 @@ static struct ttm_page_pool *ttm_get_pool(int flags, if (flags & TTM_PAGE_FLAG_DMA32) pool_index |= 0x2; - return &_manager.pools[pool_index]; + return &_manager->pools[pool_index]; } /* set memory back to wb and free the pages. */ @@ -387,7 +384,7 @@ static int ttm_pool_get_num_unused_pages(void) unsigned i; int total = 0; for (i = 0; i < NUM_POOLS; ++i) - total += _manager.pools[i].npages; + total += _manager->pools[i].npages; return total; } @@ -408,7 +405,7 @@ static int ttm_pool_mm_shrink(int shrink_pages, gfp_t gfp_mask) unsigned nr_free = shrink_pages; if (shrink_pages == 0) break; - pool = &_manager.pools[(i + pool_offset)%NUM_POOLS]; + pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; shrink_pages = ttm_page_pool_free(pool, nr_free); } /* return estimated number of unused pages in pool */ @@ -576,10 +573,10 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, /* If allocation request is small and there is not enough * pages in pool we fill the pool first */ - if (count < _manager.options.small + if (count < _manager->options.small && count > pool->npages) { struct list_head new_pages; - unsigned alloc_size = _manager.options.alloc_size; + unsigned alloc_size = _manager->options.alloc_size; /** * Can't change page caching if in irqsave context. We have to @@ -759,8 +756,8 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags, pool->npages += page_count; /* Check that we don't go over the pool limit */ page_count = 0; - if (pool->npages > _manager.options.max_size) { - page_count = pool->npages - _manager.options.max_size; + if (pool->npages > _manager->options.max_size) { + page_count = pool->npages - _manager->options.max_size; /* free at least NUM_PAGES_TO_ALLOC number of pages * to reduce calls to set_memory_wb */ if (page_count < NUM_PAGES_TO_ALLOC) @@ -785,33 +782,36 @@ static void ttm_page_pool_init_locked(struct ttm_page_pool *pool, int flags, int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) { int ret; - if (atomic_add_return(1, &_manager.page_alloc_inited) > 1) - return 0; + + WARN_ON(_manager); printk(KERN_INFO TTM_PFX "Initializing pool allocator.\n"); - ttm_page_pool_init_locked(&_manager.wc_pool, GFP_HIGHUSER, "wc"); + _manager = kzalloc(sizeof(*_manager), GFP_KERNEL); - ttm_page_pool_init_locked(&_manager.uc_pool, GFP_HIGHUSER, "uc"); + ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc"); - ttm_page_pool_init_locked(&_manager.wc_pool_dma32, GFP_USER | GFP_DMA32, - "wc dma"); + ttm_page_pool_init_locked(&_manager->uc_pool, GFP_HIGHUSER, "uc"); - ttm_page_pool_init_locked(&_manager.uc_pool_dma32, GFP_USER | GFP_DMA32, - "uc dma"); + ttm_page_pool_init_locked(&_manager->wc_pool_dma32, + GFP_USER | GFP_DMA32, "wc dma"); - _manager.options.max_size = max_pages; - _manager.options.small = SMALL_ALLOCATION; - _manager.options.alloc_size = NUM_PAGES_TO_ALLOC; + ttm_page_pool_init_locked(&_manager->uc_pool_dma32, + GFP_USER | GFP_DMA32, "uc dma"); - kobject_init(&_manager.kobj, &ttm_pool_kobj_type); - ret = kobject_add(&_manager.kobj, &glob->kobj, "pool"); + _manager->options.max_size = max_pages; + _manager->options.small = SMALL_ALLOCATION; + _manager->options.alloc_size = NUM_PAGES_TO_ALLOC; + + ret = kobject_init_and_add(&_manager->kobj, &ttm_pool_kobj_type, + &glob->kobj, "pool"); if (unlikely(ret != 0)) { - kobject_put(&_manager.kobj); + kobject_put(&_manager->kobj); + _manager = NULL; return ret; } - ttm_pool_mm_shrink_init(&_manager); + ttm_pool_mm_shrink_init(_manager); return 0; } @@ -820,16 +820,14 @@ void ttm_page_alloc_fini() { int i; - if (atomic_sub_return(1, &_manager.page_alloc_inited) > 0) - return; - printk(KERN_INFO TTM_PFX "Finalizing pool allocator.\n"); - ttm_pool_mm_shrink_fini(&_manager); + ttm_pool_mm_shrink_fini(_manager); for (i = 0; i < NUM_POOLS; ++i) - ttm_page_pool_free(&_manager.pools[i], FREE_ALL_PAGES); + ttm_page_pool_free(&_manager->pools[i], FREE_ALL_PAGES); - kobject_put(&_manager.kobj); + kobject_put(&_manager->kobj); + _manager = NULL; } int ttm_page_alloc_debugfs(struct seq_file *m, void *data) @@ -837,14 +835,14 @@ int ttm_page_alloc_debugfs(struct seq_file *m, void *data) struct ttm_page_pool *p; unsigned i; char *h[] = {"pool", "refills", "pages freed", "size"}; - if (atomic_read(&_manager.page_alloc_inited) == 0) { + if (!_manager) { seq_printf(m, "No pool allocator running.\n"); return 0; } seq_printf(m, "%6s %12s %13s %8s\n", h[0], h[1], h[2], h[3]); for (i = 0; i < NUM_POOLS; ++i) { - p = &_manager.pools[i]; + p = &_manager->pools[i]; seq_printf(m, "%6s %12ld %13ld %8d\n", p->name, p->nrefills, diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index 8bb4de567b2..116821448c3 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -56,10 +56,6 @@ void ttm_put_pages(struct list_head *pages, enum ttm_caching_state cstate); /** * Initialize pool allocator. - * - * Pool allocator is internaly reference counted so it can be initialized - * multiple times but ttm_page_alloc_fini has to be called same number of - * times. */ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages); /** -- cgit v1.2.3 From 095c24710aa508a303edff86709637007113fbbf Mon Sep 17 00:00:00 2001 From: Andy Walls Date: Sat, 12 Jun 2010 20:20:36 -0300 Subject: V4L/DVB: tuner: Add a definition for the Philips FQ1236 MK5 NTSC tuner Signed-off-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/tuners/tuner-simple.c | 1 + drivers/media/common/tuners/tuner-types.c | 16 ++++++++++++++++ include/media/tuner.h | 1 + 3 files changed, 18 insertions(+) (limited to 'include') diff --git a/drivers/media/common/tuners/tuner-simple.c b/drivers/media/common/tuners/tuner-simple.c index 8abbcc5fcf9..8cf2ab609d5 100644 --- a/drivers/media/common/tuners/tuner-simple.c +++ b/drivers/media/common/tuners/tuner-simple.c @@ -524,6 +524,7 @@ static int simple_radio_bandswitch(struct dvb_frontend *fe, u8 *buffer) buffer[3] = 0x39; break; case TUNER_PHILIPS_FQ1216LME_MK3: + case TUNER_PHILIPS_FQ1236_MK5: tuner_err("This tuner doesn't have FM\n"); /* Set the low band for sanity, since it covers 88-108 MHz */ buffer[3] = 0x01; diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c index d9aaaca620c..58a513bcd74 100644 --- a/drivers/media/common/tuners/tuner-types.c +++ b/drivers/media/common/tuners/tuner-types.c @@ -1353,6 +1353,17 @@ static struct tuner_params tuner_sony_btf_pxn01z_params[] = { }, }; +/* ------------ TUNER_PHILIPS_FQ1236_MK5 - Philips NTSC ------------ */ + +static struct tuner_params tuner_philips_fq1236_mk5_params[] = { + { + .type = TUNER_PARAM_TYPE_NTSC, + .ranges = tuner_fm1236_mk3_ntsc_ranges, + .count = ARRAY_SIZE(tuner_fm1236_mk3_ntsc_ranges), + .has_tda9887 = 1, /* TDA9885, no FM radio */ + }, +}; + /* --------------------------------------------------------------------- */ struct tunertype tuners[] = { @@ -1826,6 +1837,11 @@ struct tunertype tuners[] = { .params = tuner_sony_btf_pxn01z_params, .count = ARRAY_SIZE(tuner_sony_btf_pxn01z_params), }, + [TUNER_PHILIPS_FQ1236_MK5] = { /* NTSC, TDA9885, no FM radio */ + .name = "Philips FQ1236 MK5", + .params = tuner_philips_fq1236_mk5_params, + .count = ARRAY_SIZE(tuner_philips_fq1236_mk5_params), + }, }; EXPORT_SYMBOL(tuners); diff --git a/include/media/tuner.h b/include/media/tuner.h index 5505c5360ca..51811eac46f 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -130,6 +130,7 @@ #define TUNER_PHILIPS_CU1216L 82 #define TUNER_NXP_TDA18271 83 #define TUNER_SONY_BTF_PXN01Z 84 +#define TUNER_PHILIPS_FQ1236_MK5 85 /* NTSC, TDA9885, no FM radio */ /* tv card specific */ #define TDA9887_PRESENT (1<<0) -- cgit v1.2.3 From 44a54f787c0abcf75a2ed49b8ec8b2b512468f73 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 9 Jul 2010 15:41:44 -0400 Subject: tracing: Add alignment to syscall metadata declarations For some reason if we declare a static variable and then assign it later, and the assignment contains a __attribute__((__aligned__(#))), some versions of gcc will ignore it. This caused the syscall meta data to not be compact in its section and caused a kernel oops when the section was being read. The fix for these versions of gcc seems to be to add the aligned attribute to the declaration as well. This fixes the BZ regression: https://bugzilla.kernel.org/show_bug.cgi?id=16353 Reported-by: Zeev Tarantov Tested-by: Zeev Tarantov Acked-by: Frederic Weisbecker LKML-Reference: Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7f614ce274a..13ebb5413a7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -124,7 +124,8 @@ extern struct trace_event_functions enter_syscall_print_funcs; extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ - static struct syscall_metadata __syscall_meta_##sname; \ + static struct syscall_metadata \ + __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_enter_##sname; \ static struct ftrace_event_call __used \ @@ -138,7 +139,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ - static struct syscall_metadata __syscall_meta_##sname; \ + static struct syscall_metadata \ + __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_exit_##sname; \ static struct ftrace_event_call __used \ -- cgit v1.2.3 From 95f72d1ed41a66f1c1c29c24d479de81a0bea36f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jul 2010 14:36:09 +1000 Subject: lmb: rename to memblock via following scripts FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/lmb/memblock/g' \ -e 's/LMB/MEMBLOCK/g' \ $FILES for N in $(find . -name lmb.[ch]); do M=$(echo $N | sed 's/lmb/memblock/g') mv $N $M done and remove some wrong change like lmbench and dlmb etc. also move memblock.c from lib/ to mm/ Suggested-by: Ingo Molnar Acked-by: "H. Peter Anvin" Acked-by: Benjamin Herrenschmidt Acked-by: Linus Torvalds Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- Documentation/kernel-parameters.txt | 2 +- arch/microblaze/Kconfig | 2 +- arch/microblaze/include/asm/lmb.h | 17 - arch/microblaze/include/asm/memblock.h | 17 + arch/microblaze/kernel/prom.c | 14 +- arch/microblaze/mm/init.c | 40 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/abs_addr.h | 2 +- arch/powerpc/include/asm/lmb.h | 15 - arch/powerpc/include/asm/memblock.h | 15 + arch/powerpc/kernel/btext.c | 2 +- arch/powerpc/kernel/crash.c | 2 +- arch/powerpc/kernel/crash_dump.c | 4 +- arch/powerpc/kernel/dma-swiotlb.c | 2 +- arch/powerpc/kernel/dma.c | 4 +- arch/powerpc/kernel/machine_kexec.c | 12 +- arch/powerpc/kernel/paca.c | 8 +- arch/powerpc/kernel/prom.c | 62 +-- arch/powerpc/kernel/rtas.c | 6 +- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/kernel/setup_32.c | 16 +- arch/powerpc/kernel/setup_64.c | 20 +- arch/powerpc/kernel/vdso.c | 4 +- arch/powerpc/mm/40x_mmu.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 26 +- arch/powerpc/mm/init_32.c | 16 +- arch/powerpc/mm/init_64.c | 2 +- arch/powerpc/mm/mem.c | 78 ++-- arch/powerpc/mm/numa.c | 84 ++-- arch/powerpc/mm/pgtable_32.c | 6 +- arch/powerpc/mm/pgtable_64.c | 4 +- arch/powerpc/mm/ppc_mmu_32.c | 4 +- arch/powerpc/mm/stab.c | 4 +- arch/powerpc/mm/tlb_nohash.c | 4 +- arch/powerpc/platforms/85xx/corenet_ds.c | 4 +- arch/powerpc/platforms/85xx/mpc8536_ds.c | 4 +- arch/powerpc/platforms/85xx/mpc85xx_ds.c | 4 +- arch/powerpc/platforms/85xx/mpc85xx_mds.c | 4 +- arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 4 +- arch/powerpc/platforms/cell/iommu.c | 10 +- arch/powerpc/platforms/embedded6xx/wii.c | 12 +- arch/powerpc/platforms/maple/setup.c | 2 +- arch/powerpc/platforms/pasemi/iommu.c | 4 +- arch/powerpc/platforms/powermac/setup.c | 4 +- arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/ps3/mm.c | 6 +- arch/powerpc/platforms/ps3/os-area.c | 4 +- arch/powerpc/platforms/pseries/hotplug-memory.c | 38 +- arch/powerpc/platforms/pseries/iommu.c | 2 +- arch/powerpc/platforms/pseries/phyp_dump.c | 4 +- arch/powerpc/sysdev/dart_iommu.c | 8 +- arch/powerpc/sysdev/fsl_pci.c | 4 +- arch/sh/Kconfig | 2 +- arch/sh/include/asm/lmb.h | 6 - arch/sh/include/asm/memblock.h | 6 + arch/sh/kernel/machine_kexec.c | 18 +- arch/sh/kernel/setup.c | 8 +- arch/sh/mm/init.c | 40 +- arch/sh/mm/numa.c | 8 +- arch/sparc/Kconfig | 2 +- arch/sparc/include/asm/lmb.h | 10 - arch/sparc/include/asm/memblock.h | 10 + arch/sparc/kernel/mdesc.c | 16 +- arch/sparc/kernel/prom_64.c | 4 +- arch/sparc/mm/init_64.c | 54 +-- include/linux/lmb.h | 89 ---- include/linux/memblock.h | 89 ++++ lib/Kconfig | 3 - lib/Makefile | 2 - lib/lmb.c | 541 ------------------------ mm/Kconfig | 3 + mm/Makefile | 2 + mm/memblock.c | 541 ++++++++++++++++++++++++ 73 files changed, 1037 insertions(+), 1037 deletions(-) delete mode 100644 arch/microblaze/include/asm/lmb.h create mode 100644 arch/microblaze/include/asm/memblock.h delete mode 100644 arch/powerpc/include/asm/lmb.h create mode 100644 arch/powerpc/include/asm/memblock.h delete mode 100644 arch/sh/include/asm/lmb.h create mode 100644 arch/sh/include/asm/memblock.h delete mode 100644 arch/sparc/include/asm/lmb.h create mode 100644 arch/sparc/include/asm/memblock.h delete mode 100644 include/linux/lmb.h create mode 100644 include/linux/memblock.h delete mode 100644 lib/lmb.c create mode 100644 mm/memblock.c (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 82d6aeb5228..4ddb58df081 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1265,7 +1265,7 @@ and is between 256 and 4096 characters. It is defined in the file If there are multiple matching configurations changing the same attribute, the last one is used. - lmb=debug [KNL] Enable lmb debug messages. + memblock=debug [KNL] Enable memblock debug messages. load_ramdisk= [RAM] List of ramdisks to load from floppy See Documentation/blockdev/ramdisk.txt. diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 76818f92653..505a0859242 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -5,7 +5,7 @@ mainmenu "Linux/Microblaze Kernel Configuration" config MICROBLAZE def_bool y - select HAVE_LMB + select HAVE_MEMBLOCK select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FUNCTION_GRAPH_TRACER diff --git a/arch/microblaze/include/asm/lmb.h b/arch/microblaze/include/asm/lmb.h deleted file mode 100644 index a0a0a929c29..00000000000 --- a/arch/microblaze/include/asm/lmb.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2008 Michal Simek - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#ifndef _ASM_MICROBLAZE_LMB_H -#define _ASM_MICROBLAZE_LMB_H - -/* LMB limit is OFF */ -#define LMB_REAL_LIMIT 0xFFFFFFFF - -#endif /* _ASM_MICROBLAZE_LMB_H */ - - diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h new file mode 100644 index 00000000000..f9c2fa331d2 --- /dev/null +++ b/arch/microblaze/include/asm/memblock.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2008 Michal Simek + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef _ASM_MICROBLAZE_MEMBLOCK_H +#define _ASM_MICROBLAZE_MEMBLOCK_H + +/* MEMBLOCK limit is OFF */ +#define MEMBLOCK_REAL_LIMIT 0xFFFFFFFF + +#endif /* _ASM_MICROBLAZE_MEMBLOCK_H */ + + diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index a15ef6d67ca..427b13b4740 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -49,12 +49,12 @@ void __init early_init_dt_scan_chosen_arch(unsigned long node) void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - lmb_add(base, size); + memblock_add(base, size); } u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return lmb_alloc(size, align); + return memblock_alloc(size, align); } #ifdef CONFIG_EARLY_PRINTK @@ -104,8 +104,8 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen, NULL); - /* Scan memory nodes and rebuild LMBs */ - lmb_init(); + /* Scan memory nodes and rebuild MEMBLOCKs */ + memblock_init(); of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory, NULL); @@ -113,9 +113,9 @@ void __init early_init_devtree(void *params) strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); - lmb_analyze(); + memblock_analyze(); - pr_debug("Phys. mem: %lx\n", (unsigned long) lmb_phys_mem_size()); + pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size()); pr_debug(" <- early_init_devtree()\n"); } diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index cca3579d426..db593498992 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include /* mem_init */ #include #include @@ -76,10 +76,10 @@ void __init setup_memory(void) u32 kernel_align_start, kernel_align_size; /* Find main memory where is the kernel */ - for (i = 0; i < lmb.memory.cnt; i++) { - memory_start = (u32) lmb.memory.region[i].base; - memory_end = (u32) lmb.memory.region[i].base - + (u32) lmb.memory.region[i].size; + for (i = 0; i < memblock.memory.cnt; i++) { + memory_start = (u32) memblock.memory.region[i].base; + memory_end = (u32) memblock.memory.region[i].base + + (u32) memblock.memory.region[i].size; if ((memory_start <= (u32)_text) && ((u32)_text <= memory_end)) { memory_size = memory_end - memory_start; @@ -100,7 +100,7 @@ void __init setup_memory(void) kernel_align_start = PAGE_DOWN((u32)_text); /* ALIGN can be remove because _end in vmlinux.lds.S is align */ kernel_align_size = PAGE_UP((u32)klimit) - kernel_align_start; - lmb_reserve(kernel_align_start, kernel_align_size); + memblock_reserve(kernel_align_start, kernel_align_size); printk(KERN_INFO "%s: kernel addr=0x%08x-0x%08x size=0x%08x\n", __func__, kernel_align_start, kernel_align_start + kernel_align_size, kernel_align_size); @@ -141,18 +141,18 @@ void __init setup_memory(void) map_size = init_bootmem_node(&contig_page_data, PFN_UP(TOPHYS((u32)klimit)), min_low_pfn, max_low_pfn); #endif - lmb_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size); + memblock_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size); /* free bootmem is whole main memory */ free_bootmem(memory_start, memory_size); /* reserve allocate blocks */ - for (i = 0; i < lmb.reserved.cnt; i++) { + for (i = 0; i < memblock.reserved.cnt; i++) { pr_debug("reserved %d - 0x%08x-0x%08x\n", i, - (u32) lmb.reserved.region[i].base, - (u32) lmb_size_bytes(&lmb.reserved, i)); - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i) - 1, BOOTMEM_DEFAULT); + (u32) memblock.reserved.region[i].base, + (u32) memblock_size_bytes(&memblock.reserved, i)); + reserve_bootmem(memblock.reserved.region[i].base, + memblock_size_bytes(&memblock.reserved, i) - 1, BOOTMEM_DEFAULT); } #ifdef CONFIG_MMU init_bootmem_done = 1; @@ -235,7 +235,7 @@ static void mm_cmdline_setup(void) if (maxmem && memory_size > maxmem) { memory_size = maxmem; memory_end = memory_start + memory_size; - lmb.memory.region[0].size = memory_size; + memblock.memory.region[0].size = memory_size; } } } @@ -273,19 +273,19 @@ asmlinkage void __init mmu_init(void) { unsigned int kstart, ksize; - if (!lmb.reserved.cnt) { + if (!memblock.reserved.cnt) { printk(KERN_EMERG "Error memory count\n"); machine_restart(NULL); } - if ((u32) lmb.memory.region[0].size < 0x1000000) { + if ((u32) memblock.memory.region[0].size < 0x1000000) { printk(KERN_EMERG "Memory must be greater than 16MB\n"); machine_restart(NULL); } /* Find main memory where the kernel is */ - memory_start = (u32) lmb.memory.region[0].base; - memory_end = (u32) lmb.memory.region[0].base + - (u32) lmb.memory.region[0].size; + memory_start = (u32) memblock.memory.region[0].base; + memory_end = (u32) memblock.memory.region[0].base + + (u32) memblock.memory.region[0].size; memory_size = memory_end - memory_start; mm_cmdline_setup(); /* FIXME parse args from command line - not used */ @@ -297,7 +297,7 @@ asmlinkage void __init mmu_init(void) kstart = __pa(CONFIG_KERNEL_START); /* kernel start */ /* kernel size */ ksize = PAGE_ALIGN(((u32)_end - (u32)CONFIG_KERNEL_START)); - lmb_reserve(kstart, ksize); + memblock_reserve(kstart, ksize); #if defined(CONFIG_BLK_DEV_INITRD) /* Remove the init RAM disk from the available memory. */ @@ -335,7 +335,7 @@ void __init *early_get_page(void) * Mem start + 32MB -> here is limit * because of mem mapping from head.S */ - p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, + p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, memory_start + 0x2000000)); } return p; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6506bf4fbff..2031a284686 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -132,7 +132,7 @@ config PPC select HAVE_ARCH_KGDB select HAVE_KRETPROBES select HAVE_ARCH_TRACEHOOK - select HAVE_LMB + select HAVE_MEMBLOCK select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select USE_GENERIC_SMP_HELPERS if SMP diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h index 98324c5a828..9a846efe638 100644 --- a/arch/powerpc/include/asm/abs_addr.h +++ b/arch/powerpc/include/asm/abs_addr.h @@ -12,7 +12,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include diff --git a/arch/powerpc/include/asm/lmb.h b/arch/powerpc/include/asm/lmb.h deleted file mode 100644 index 6f5fdf0a19a..00000000000 --- a/arch/powerpc/include/asm/lmb.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _ASM_POWERPC_LMB_H -#define _ASM_POWERPC_LMB_H - -#include - -#define LMB_DBG(fmt...) udbg_printf(fmt) - -#ifdef CONFIG_PPC32 -extern phys_addr_t lowmem_end_addr; -#define LMB_REAL_LIMIT lowmem_end_addr -#else -#define LMB_REAL_LIMIT 0 -#endif - -#endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h new file mode 100644 index 00000000000..3c29728b56b --- /dev/null +++ b/arch/powerpc/include/asm/memblock.h @@ -0,0 +1,15 @@ +#ifndef _ASM_POWERPC_MEMBLOCK_H +#define _ASM_POWERPC_MEMBLOCK_H + +#include + +#define MEMBLOCK_DBG(fmt...) udbg_printf(fmt) + +#ifdef CONFIG_PPC32 +extern phys_addr_t lowmem_end_addr; +#define MEMBLOCK_REAL_LIMIT lowmem_end_addr +#else +#define MEMBLOCK_REAL_LIMIT 0 +#endif + +#endif /* _ASM_POWERPC_MEMBLOCK_H */ diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 26e58630ed7..625942ae558 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 29df48f2b61..417f7b05a9c 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 5fb667a6089..40f524643ba 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include @@ -33,7 +33,7 @@ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; #ifndef CONFIG_RELOCATABLE void __init reserve_kdump_trampoline(void) { - lmb_reserve(0, KDUMP_RESERVE_LIMIT); + memblock_reserve(0, KDUMP_RESERVE_LIMIT); } static void __init create_trampoline(unsigned long addr) diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index e7fe218b869..02f724f3675 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -71,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb, sd->max_direct_dma_addr = 0; /* May need to bounce if the device can't address all of DRAM */ - if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM()) + if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM()) set_dma_ops(dev, &swiotlb_dma_ops); return NOTIFY_DONE; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 8d1de6f31d5..84d6367ec00 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -89,7 +89,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask) /* Could be improved so platforms can set the limit in case * they have limited DMA windows */ - return mask >= (lmb_end_of_DRAM() - 1); + return mask >= (memblock_end_of_DRAM() - 1); #else return 1; #endif diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index bb3d893a835..89f005116aa 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include @@ -66,11 +66,11 @@ void __init reserve_crashkernel(void) unsigned long long crash_size, crash_base; int ret; - /* this is necessary because of lmb_phys_mem_size() */ - lmb_analyze(); + /* this is necessary because of memblock_phys_mem_size() */ + memblock_analyze(); /* use common parsing */ - ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; @@ -133,9 +133,9 @@ void __init reserve_crashkernel(void) "for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), (unsigned long)(crashk_res.start >> 20), - (unsigned long)(lmb_phys_mem_size() >> 20)); + (unsigned long)(memblock_phys_mem_size() >> 20)); - lmb_reserve(crashk_res.start, crash_size); + memblock_reserve(crashk_res.start, crash_size); } int overlaps_crashkernel(unsigned long start, unsigned long size) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index f88acf0218d..139a773853f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include @@ -117,7 +117,7 @@ void __init allocate_pacas(void) * the first segment. On iSeries they must be within the area mapped * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. */ - limit = min(0x10000000ULL, lmb.rmo_size); + limit = min(0x10000000ULL, memblock.rmo_size); if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); @@ -128,7 +128,7 @@ void __init allocate_pacas(void) paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); - paca = __va(lmb_alloc_base(paca_size, PAGE_SIZE, limit)); + paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); memset(paca, 0, paca_size); printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", @@ -148,7 +148,7 @@ void __init free_unused_pacas(void) if (new_size >= paca_size) return; - lmb_free(__pa(paca) + new_size, paca_size - new_size); + memblock_free(__pa(paca) + new_size, paca_size - new_size); printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", paca_size - new_size); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 05131d634e7..9d3953983fb 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include @@ -98,7 +98,7 @@ static void __init move_device_tree(void) if ((memory_limit && (start + size) > memory_limit) || overlaps_crashkernel(start, size)) { - p = __va(lmb_alloc_base(size, PAGE_SIZE, lmb.rmo_size)); + p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size)); memcpy(p, initial_boot_params, size); initial_boot_params = (struct boot_param_header *)p; DBG("Moved device tree to 0x%p\n", p); @@ -411,13 +411,13 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) { __be32 *dm, *ls, *usm; unsigned long l, n, flags; - u64 base, size, lmb_size; + u64 base, size, memblock_size; unsigned int is_kexec_kdump = 0, rngs; - ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); + ls = of_get_flat_dt_prop(node, "ibm,memblock-size", &l); if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) return 0; - lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls); + memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls); dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); if (dm == NULL || l < sizeof(__be32)) @@ -442,11 +442,11 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) or if the block is not assigned to this partition (0x8) */ if ((flags & 0x80) || !(flags & 0x8)) continue; - size = lmb_size; + size = memblock_size; rngs = 1; if (is_kexec_kdump) { /* - * For each lmb in ibm,dynamic-memory, a corresponding + * For each memblock in ibm,dynamic-memory, a corresponding * entry in linux,drconf-usable-memory property contains * a counter 'p' followed by 'p' (base, size) duple. * Now read the counter from @@ -469,10 +469,10 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) if ((base + size) > 0x80000000ul) size = 0x80000000ul - base; } - lmb_add(base, size); + memblock_add(base, size); } while (--rngs); } - lmb_dump_all(); + memblock_dump_all(); return 0; } #else @@ -501,14 +501,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } #endif - lmb_add(base, size); + memblock_add(base, size); memstart_addr = min((u64)memstart_addr, base); } u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return lmb_alloc(size, align); + return memblock_alloc(size, align); } #ifdef CONFIG_BLK_DEV_INITRD @@ -534,12 +534,12 @@ static void __init early_reserve_mem(void) /* before we do anything, lets reserve the dt blob */ self_base = __pa((unsigned long)initial_boot_params); self_size = initial_boot_params->totalsize; - lmb_reserve(self_base, self_size); + memblock_reserve(self_base, self_size); #ifdef CONFIG_BLK_DEV_INITRD /* then reserve the initrd, if any */ if (initrd_start && (initrd_end > initrd_start)) - lmb_reserve(__pa(initrd_start), initrd_end - initrd_start); + memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); #endif /* CONFIG_BLK_DEV_INITRD */ #ifdef CONFIG_PPC32 @@ -560,7 +560,7 @@ static void __init early_reserve_mem(void) if (base_32 == self_base && size_32 == self_size) continue; DBG("reserving: %x -> %x\n", base_32, size_32); - lmb_reserve(base_32, size_32); + memblock_reserve(base_32, size_32); } return; } @@ -571,7 +571,7 @@ static void __init early_reserve_mem(void) if (size == 0) break; DBG("reserving: %llx -> %llx\n", base, size); - lmb_reserve(base, size); + memblock_reserve(base, size); } } @@ -594,7 +594,7 @@ static inline unsigned long phyp_dump_calculate_reserve_size(void) return phyp_dump_info->reserve_bootvar; /* divide by 20 to get 5% of value */ - tmp = lmb_end_of_DRAM(); + tmp = memblock_end_of_DRAM(); do_div(tmp, 20); /* round it down in multiples of 256 */ @@ -633,11 +633,11 @@ static void __init phyp_dump_reserve_mem(void) if (phyp_dump_info->phyp_dump_is_active) { /* Reserve *everything* above RMR.Area freed by userland tools*/ base = variable_reserve_size; - size = lmb_end_of_DRAM() - base; + size = memblock_end_of_DRAM() - base; /* XXX crashed_ram_end is wrong, since it may be beyond * the memory_limit, it will need to be adjusted. */ - lmb_reserve(base, size); + memblock_reserve(base, size); phyp_dump_info->init_reserve_start = base; phyp_dump_info->init_reserve_size = size; @@ -645,8 +645,8 @@ static void __init phyp_dump_reserve_mem(void) size = phyp_dump_info->cpu_state_size + phyp_dump_info->hpte_region_size + variable_reserve_size; - base = lmb_end_of_DRAM() - size; - lmb_reserve(base, size); + base = memblock_end_of_DRAM() - size; + memblock_reserve(base, size); phyp_dump_info->init_reserve_start = base; phyp_dump_info->init_reserve_size = size; } @@ -681,8 +681,8 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen, NULL); - /* Scan memory nodes and rebuild LMBs */ - lmb_init(); + /* Scan memory nodes and rebuild MEMBLOCKs */ + memblock_init(); of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -690,11 +690,11 @@ void __init early_init_devtree(void *params) strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); - /* Reserve LMB regions used by kernel, initrd, dt, etc... */ - lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); + /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ + memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); /* If relocatable, reserve first 32k for interrupt vectors etc. */ if (PHYSICAL_START > MEMORY_START) - lmb_reserve(MEMORY_START, 0x8000); + memblock_reserve(MEMORY_START, 0x8000); reserve_kdump_trampoline(); reserve_crashkernel(); early_reserve_mem(); @@ -706,17 +706,17 @@ void __init early_init_devtree(void *params) /* Ensure that total memory size is page-aligned, because * otherwise mark_bootmem() gets upset. */ - lmb_analyze(); - memsize = lmb_phys_mem_size(); + memblock_analyze(); + memsize = memblock_phys_mem_size(); if ((memsize & PAGE_MASK) != memsize) limit = memsize & PAGE_MASK; } - lmb_enforce_memory_limit(limit); + memblock_enforce_memory_limit(limit); - lmb_analyze(); - lmb_dump_all(); + memblock_analyze(); + memblock_dump_all(); - DBG("Phys. mem: %llx\n", lmb_phys_mem_size()); + DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 0e1ec6f746f..d0516dbee76 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -934,11 +934,11 @@ void __init rtas_initialize(void) */ #ifdef CONFIG_PPC64 if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { - rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX); + rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } #endif - rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); + rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 5e4d852f640..b7e6c7e193a 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 7d84b210f16..a10ffc85ada 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -246,12 +246,12 @@ static void __init irqstack_early_init(void) unsigned int i; /* interrupt stacks must be in lowmem, we get that for free on ppc32 - * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ + * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); hardirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } } @@ -261,15 +261,15 @@ static void __init exc_lvl_early_init(void) unsigned int i; /* interrupt stacks must be in lowmem, we get that for free on ppc32 - * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ + * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ for_each_possible_cpu(i) { critirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); #ifdef CONFIG_BOOKE dbgirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); mcheckirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); #endif } } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 643dcac40fc..d135f93cb0f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -158,7 +158,7 @@ static void __init setup_paca(struct paca_struct *new_paca) * the CPU that ignores the top 2 bits of the address in real * mode so we can access kernel globals normally provided we * only toy with things in the RMO region. From here, we do - * some early parsing of the device-tree to setup out LMB + * some early parsing of the device-tree to setup out MEMBLOCK * data structures, and allocate & initialize the hash table * and segment tables so we can start running with translation * enabled. @@ -404,7 +404,7 @@ void __init setup_system(void) printk("-----------------------------------------------------\n"); printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - printk("physicalMemorySize = 0x%llx\n", lmb_phys_mem_size()); + printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); if (ppc64_caches.dline_size != 0x80) printk("ppc64_caches.dcache_line_size = 0x%x\n", ppc64_caches.dline_size); @@ -443,10 +443,10 @@ static void __init irqstack_early_init(void) */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc_base(THREAD_SIZE, + __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); hardirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc_base(THREAD_SIZE, + __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); } } @@ -458,11 +458,11 @@ static void __init exc_lvl_early_init(void) for_each_possible_cpu(i) { critirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); dbgirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); mcheckirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } } #else @@ -487,11 +487,11 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), lmb.rmo_size); + limit = min(slb0_limit(), memblock.rmo_size); for_each_possible_cpu(i) { unsigned long sp; - sp = lmb_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); sp += THREAD_SIZE; paca[i].emergency_sp = __va(sp); } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d84d19224a9..13002fe206e 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -734,7 +734,7 @@ static int __init vdso_init(void) vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100; if (firmware_has_feature(FW_FEATURE_LPAR)) vdso_data->platform |= 1; - vdso_data->physicalMemorySize = lmb_phys_mem_size(); + vdso_data->physicalMemorySize = memblock_phys_mem_size(); vdso_data->dcache_size = ppc64_caches.dsize; vdso_data->dcache_line_size = ppc64_caches.dline_size; vdso_data->icache_size = ppc64_caches.isize; diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 65abfcfaaa9..1dc2fa5ce1b 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -135,7 +135,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top) /* If the size of RAM is not an exact power of two, we may not * have covered RAM in its entirety with 16 and 4 MiB * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the LMB to allocate PTEs for "tail" + * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3ecdcec0a39..98f262de558 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include @@ -384,8 +384,8 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, printk(KERN_INFO "Huge page(16GB) memory: " "addr = 0x%lX size = 0x%lX pages = %d\n", phys_addr, block_size, expected_pages); - if (phys_addr + (16 * GB) <= lmb_end_of_DRAM()) { - lmb_reserve(phys_addr, block_size * expected_pages); + if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) { + memblock_reserve(phys_addr, block_size * expected_pages); add_gpage(phys_addr, block_size, expected_pages); } return 0; @@ -458,7 +458,7 @@ static void __init htab_init_page_sizes(void) * and we have at least 1G of RAM at boot */ if (mmu_psize_defs[MMU_PAGE_16M].shift && - lmb_phys_mem_size() >= 0x40000000) + memblock_phys_mem_size() >= 0x40000000) mmu_vmemmap_psize = MMU_PAGE_16M; else if (mmu_psize_defs[MMU_PAGE_64K].shift) mmu_vmemmap_psize = MMU_PAGE_64K; @@ -520,7 +520,7 @@ static unsigned long __init htab_get_table_size(void) return 1UL << ppc64_pft_size; /* round mem_size up to next power of 2 */ - mem_size = lmb_phys_mem_size(); + mem_size = memblock_phys_mem_size(); rnd_mem_size = 1UL << __ilog2(mem_size); if (rnd_mem_size < mem_size) rnd_mem_size <<= 1; @@ -627,7 +627,7 @@ static void __init htab_initialize(void) else limit = 0; - table = lmb_alloc_base(htab_size_bytes, htab_size_bytes, limit); + table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); @@ -647,9 +647,9 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); #ifdef CONFIG_DEBUG_PAGEALLOC - linear_map_hash_count = lmb_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = __va(lmb_alloc_base(linear_map_hash_count, - 1, lmb.rmo_size)); + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, + 1, memblock.rmo_size)); memset(linear_map_hash_slots, 0, linear_map_hash_count); #endif /* CONFIG_DEBUG_PAGEALLOC */ @@ -659,16 +659,16 @@ static void __init htab_initialize(void) */ /* create bolted the linear mapping in the hash table */ - for (i=0; i < lmb.memory.cnt; i++) { - base = (unsigned long)__va(lmb.memory.region[i].base); - size = lmb.memory.region[i].size; + for (i=0; i < memblock.memory.cnt; i++) { + base = (unsigned long)__va(memblock.memory.region[i].base); + size = memblock.memory.region[i].size; DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); #ifdef CONFIG_U3_DART /* Do not map the DART space. Fortunately, it will be aligned - * in such a way that it will not cross two lmb regions and + * in such a way that it will not cross two memblock regions and * will fit within a single 16Mb page. * The DART space is assumed to be a full 16Mb region even if * we only use 2Mb of that space. We will use more of it later diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 767333005eb..6a6975dc265 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include @@ -136,17 +136,17 @@ void __init MMU_init(void) /* parse args from command line */ MMU_setup(); - if (lmb.memory.cnt > 1) { + if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII - lmb.memory.cnt = 1; - lmb_analyze(); + memblock.memory.cnt = 1; + memblock_analyze(); printk(KERN_WARNING "Only using first contiguous memory region"); #else wii_memory_fixups(); #endif } - total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr; + total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr; lowmem_end_addr = memstart_addr + total_lowmem; #ifdef CONFIG_FSL_BOOKE @@ -161,8 +161,8 @@ void __init MMU_init(void) lowmem_end_addr = memstart_addr + total_lowmem; #ifndef CONFIG_HIGHMEM total_memory = total_lowmem; - lmb_enforce_memory_limit(lowmem_end_addr); - lmb_analyze(); + memblock_enforce_memory_limit(lowmem_end_addr); + memblock_analyze(); #endif /* CONFIG_HIGHMEM */ } @@ -200,7 +200,7 @@ void __init *early_get_page(void) if (init_bootmem_done) { p = alloc_bootmem_pages(PAGE_SIZE); } else { - p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, + p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, __initial_memory_limit_addr)); } return p; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index e267f223fdf..71f1415e247 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0f594d774bf..1a84a8d0000 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -83,13 +83,13 @@ int page_is_ram(unsigned long pfn) #else unsigned long paddr = (pfn << PAGE_SHIFT); int i; - for (i=0; i < lmb.memory.cnt; i++) { + for (i=0; i < memblock.memory.cnt; i++) { unsigned long base; - base = lmb.memory.region[i].base; + base = memblock.memory.region[i].base; if ((paddr >= base) && - (paddr < (base + lmb.memory.region[i].size))) { + (paddr < (base + memblock.memory.region[i].size))) { return 1; } } @@ -142,14 +142,14 @@ int arch_add_memory(int nid, u64 start, u64 size) /* * walk_memory_resource() needs to make sure there is no holes in a given * memory range. PPC64 does not maintain the memory layout in /proc/iomem. - * Instead it maintains it in lmb.memory structures. Walk through the + * Instead it maintains it in memblock.memory structures. Walk through the * memory regions, find holes and callback for contiguous regions. */ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { - struct lmb_property res; + struct memblock_property res; unsigned long pfn, len; u64 end; int ret = -1; @@ -158,7 +158,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, res.size = (u64) nr_pages << PAGE_SHIFT; end = res.base + res.size - 1; - while ((res.base < end) && (lmb_find(&res) >= 0)) { + while ((res.base < end) && (memblock_find(&res) >= 0)) { pfn = (unsigned long)(res.base >> PAGE_SHIFT); len = (unsigned long)(res.size >> PAGE_SHIFT); ret = (*func)(pfn, len, arg); @@ -184,8 +184,8 @@ void __init do_init_bootmem(void) unsigned long total_pages; int boot_mapsize; - max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; - total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; + max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; + total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM total_pages = total_lowmem >> PAGE_SHIFT; max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; @@ -198,16 +198,16 @@ void __init do_init_bootmem(void) */ bootmap_pages = bootmem_bootmap_pages(total_pages); - start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); + start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); min_low_pfn = MEMORY_START >> PAGE_SHIFT; boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); /* Add active regions with valid PFNs */ - for (i = 0; i < lmb.memory.cnt; i++) { + for (i = 0; i < memblock.memory.cnt; i++) { unsigned long start_pfn, end_pfn; - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); add_active_range(0, start_pfn, end_pfn); } @@ -218,17 +218,17 @@ void __init do_init_bootmem(void) free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); /* reserve the sections we're already using */ - for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long addr = lmb.reserved.region[i].base + - lmb_size_bytes(&lmb.reserved, i) - 1; + for (i = 0; i < memblock.reserved.cnt; i++) { + unsigned long addr = memblock.reserved.region[i].base + + memblock_size_bytes(&memblock.reserved, i) - 1; if (addr < lowmem_end_addr) - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i), + reserve_bootmem(memblock.reserved.region[i].base, + memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); - else if (lmb.reserved.region[i].base < lowmem_end_addr) { + else if (memblock.reserved.region[i].base < lowmem_end_addr) { unsigned long adjusted_size = lowmem_end_addr - - lmb.reserved.region[i].base; - reserve_bootmem(lmb.reserved.region[i].base, + memblock.reserved.region[i].base; + reserve_bootmem(memblock.reserved.region[i].base, adjusted_size, BOOTMEM_DEFAULT); } } @@ -236,9 +236,9 @@ void __init do_init_bootmem(void) free_bootmem_with_active_regions(0, max_pfn); /* reserve the sections we're already using */ - for (i = 0; i < lmb.reserved.cnt; i++) - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i), + for (i = 0; i < memblock.reserved.cnt; i++) + reserve_bootmem(memblock.reserved.region[i].base, + memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); #endif @@ -251,20 +251,20 @@ void __init do_init_bootmem(void) /* mark pages that don't exist as nosave */ static int __init mark_nonram_nosave(void) { - unsigned long lmb_next_region_start_pfn, - lmb_region_max_pfn; + unsigned long memblock_next_region_start_pfn, + memblock_region_max_pfn; int i; - for (i = 0; i < lmb.memory.cnt - 1; i++) { - lmb_region_max_pfn = - (lmb.memory.region[i].base >> PAGE_SHIFT) + - (lmb.memory.region[i].size >> PAGE_SHIFT); - lmb_next_region_start_pfn = - lmb.memory.region[i+1].base >> PAGE_SHIFT; + for (i = 0; i < memblock.memory.cnt - 1; i++) { + memblock_region_max_pfn = + (memblock.memory.region[i].base >> PAGE_SHIFT) + + (memblock.memory.region[i].size >> PAGE_SHIFT); + memblock_next_region_start_pfn = + memblock.memory.region[i+1].base >> PAGE_SHIFT; - if (lmb_region_max_pfn < lmb_next_region_start_pfn) - register_nosave_region(lmb_region_max_pfn, - lmb_next_region_start_pfn); + if (memblock_region_max_pfn < memblock_next_region_start_pfn) + register_nosave_region(memblock_region_max_pfn, + memblock_next_region_start_pfn); } return 0; @@ -275,8 +275,8 @@ static int __init mark_nonram_nosave(void) */ void __init paging_init(void) { - unsigned long total_ram = lmb_phys_mem_size(); - phys_addr_t top_of_ram = lmb_end_of_DRAM(); + unsigned long total_ram = memblock_phys_mem_size(); + phys_addr_t top_of_ram = memblock_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; #ifdef CONFIG_PPC32 @@ -327,7 +327,7 @@ void __init mem_init(void) swiotlb_init(1); #endif - num_physpages = lmb.memory.size >> PAGE_SHIFT; + num_physpages = memblock.memory.size >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -364,7 +364,7 @@ void __init mem_init(void) highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { struct page *page = pfn_to_page(pfn); - if (lmb_is_reserved(pfn << PAGE_SHIFT)) + if (memblock_is_reserved(pfn << PAGE_SHIFT)) continue; ClearPageReserved(page); init_page_count(page); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 80d110635d2..f47364585ec 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -351,7 +351,7 @@ struct of_drconf_cell { #define DRCONF_MEM_RESERVED 0x00000080 /* - * Read the next lmb list entry from the ibm,dynamic-memory property + * Read the next memblock list entry from the ibm,dynamic-memory property * and return the information in the provided of_drconf_cell structure. */ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) @@ -372,8 +372,8 @@ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) /* * Retreive and validate the ibm,dynamic-memory property of the device tree. * - * The layout of the ibm,dynamic-memory property is a number N of lmb - * list entries followed by N lmb list entries. Each lmb list entry + * The layout of the ibm,dynamic-memory property is a number N of memblock + * list entries followed by N memblock list entries. Each memblock list entry * contains information as layed out in the of_drconf_cell struct above. */ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) @@ -398,15 +398,15 @@ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) } /* - * Retreive and validate the ibm,lmb-size property for drconf memory + * Retreive and validate the ibm,memblock-size property for drconf memory * from the device tree. */ -static u64 of_get_lmb_size(struct device_node *memory) +static u64 of_get_memblock_size(struct device_node *memory) { const u32 *prop; u32 len; - prop = of_get_property(memory, "ibm,lmb-size", &len); + prop = of_get_property(memory, "ibm,memblock-size", &len); if (!prop || len < sizeof(unsigned int)) return 0; @@ -540,19 +540,19 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size) { /* - * We use lmb_end_of_DRAM() in here instead of memory_limit because + * We use memblock_end_of_DRAM() in here instead of memory_limit because * we've already adjusted it for the limit and it takes care of * having memory holes below the limit. Also, in the case of * iommu_is_off, memory_limit is not set but is implicitly enforced. */ - if (start + size <= lmb_end_of_DRAM()) + if (start + size <= memblock_end_of_DRAM()) return size; - if (start >= lmb_end_of_DRAM()) + if (start >= memblock_end_of_DRAM()) return 0; - return lmb_end_of_DRAM() - start; + return memblock_end_of_DRAM() - start; } /* @@ -562,7 +562,7 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, static inline int __init read_usm_ranges(const u32 **usm) { /* - * For each lmb in ibm,dynamic-memory a corresponding + * For each memblock in ibm,dynamic-memory a corresponding * entry in linux,drconf-usable-memory property contains * a counter followed by that many (base, size) duple. * read the counter from linux,drconf-usable-memory @@ -578,7 +578,7 @@ static void __init parse_drconf_memory(struct device_node *memory) { const u32 *dm, *usm; unsigned int n, rc, ranges, is_kexec_kdump = 0; - unsigned long lmb_size, base, size, sz; + unsigned long memblock_size, base, size, sz; int nid; struct assoc_arrays aa; @@ -586,8 +586,8 @@ static void __init parse_drconf_memory(struct device_node *memory) if (!n) return; - lmb_size = of_get_lmb_size(memory); - if (!lmb_size) + memblock_size = of_get_memblock_size(memory); + if (!memblock_size) return; rc = of_get_assoc_arrays(memory, &aa); @@ -611,7 +611,7 @@ static void __init parse_drconf_memory(struct device_node *memory) continue; base = drmem.base_addr; - size = lmb_size; + size = memblock_size; ranges = 1; if (is_kexec_kdump) { @@ -731,7 +731,7 @@ new_range: } /* - * Now do the same thing for each LMB listed in the ibm,dynamic-memory + * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory * property in the ibm,dynamic-reconfiguration-memory node. */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -743,8 +743,8 @@ new_range: static void __init setup_nonnuma(void) { - unsigned long top_of_ram = lmb_end_of_DRAM(); - unsigned long total_ram = lmb_phys_mem_size(); + unsigned long top_of_ram = memblock_end_of_DRAM(); + unsigned long total_ram = memblock_phys_mem_size(); unsigned long start_pfn, end_pfn; unsigned int i, nid = 0; @@ -753,9 +753,9 @@ static void __init setup_nonnuma(void) printk(KERN_DEBUG "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); - for (i = 0; i < lmb.memory.cnt; ++i) { - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + for (i = 0; i < memblock.memory.cnt; ++i) { + start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); fake_numa_create_new_node(end_pfn, &nid); add_active_range(nid, start_pfn, end_pfn); @@ -813,7 +813,7 @@ static void __init dump_numa_memory_topology(void) count = 0; - for (i = 0; i < lmb_end_of_DRAM(); + for (i = 0; i < memblock_end_of_DRAM(); i += (1 << SECTION_SIZE_BITS)) { if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { if (count == 0) @@ -833,7 +833,7 @@ static void __init dump_numa_memory_topology(void) } /* - * Allocate some memory, satisfying the lmb or bootmem allocator where + * Allocate some memory, satisfying the memblock or bootmem allocator where * required. nid is the preferred node and end is the physical address of * the highest address in the node. * @@ -847,11 +847,11 @@ static void __init *careful_zallocation(int nid, unsigned long size, int new_nid; unsigned long ret_paddr; - ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); + ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); /* retry over all memory */ if (!ret_paddr) - ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); + ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); if (!ret_paddr) panic("numa.c: cannot allocate %lu bytes for node %d", @@ -861,14 +861,14 @@ static void __init *careful_zallocation(int nid, unsigned long size, /* * We initialize the nodes in numeric order: 0, 1, 2... - * and hand over control from the LMB allocator to the + * and hand over control from the MEMBLOCK allocator to the * bootmem allocator. If this function is called for * node 5, then we know that all nodes <5 are using the - * bootmem allocator instead of the LMB allocator. + * bootmem allocator instead of the MEMBLOCK allocator. * * So, check the nid from which this allocation came * and double check to see if we need to use bootmem - * instead of the LMB. We don't free the LMB memory + * instead of the MEMBLOCK. We don't free the MEMBLOCK memory * since it would be useless. */ new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); @@ -893,9 +893,9 @@ static void mark_reserved_regions_for_nid(int nid) struct pglist_data *node = NODE_DATA(nid); int i; - for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long physbase = lmb.reserved.region[i].base; - unsigned long size = lmb.reserved.region[i].size; + for (i = 0; i < memblock.reserved.cnt; i++) { + unsigned long physbase = memblock.reserved.region[i].base; + unsigned long size = memblock.reserved.region[i].size; unsigned long start_pfn = physbase >> PAGE_SHIFT; unsigned long end_pfn = PFN_UP(physbase + size); struct node_active_region node_ar; @@ -903,7 +903,7 @@ static void mark_reserved_regions_for_nid(int nid) node->node_spanned_pages; /* - * Check to make sure that this lmb.reserved area is + * Check to make sure that this memblock.reserved area is * within the bounds of the node that we care about. * Checking the nid of the start and end points is not * sufficient because the reserved area could span the @@ -961,7 +961,7 @@ void __init do_init_bootmem(void) int nid; min_low_pfn = 0; - max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn; if (parse_numa_properties()) @@ -1038,7 +1038,7 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; free_area_init_nodes(max_zone_pfns); } @@ -1072,7 +1072,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, { const u32 *dm; unsigned int drconf_cell_cnt, rc; - unsigned long lmb_size; + unsigned long memblock_size; struct assoc_arrays aa; int nid = -1; @@ -1080,8 +1080,8 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, if (!drconf_cell_cnt) return -1; - lmb_size = of_get_lmb_size(memory); - if (!lmb_size) + memblock_size = of_get_memblock_size(memory); + if (!memblock_size) return -1; rc = of_get_assoc_arrays(memory, &aa); @@ -1100,7 +1100,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, continue; if ((scn_addr < drmem.base_addr) - || (scn_addr >= (drmem.base_addr + lmb_size))) + || (scn_addr >= (drmem.base_addr + memblock_size))) continue; nid = of_drconf_to_nid_single(&drmem, &aa); @@ -1113,7 +1113,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, /* * Find the node associated with a hot added memory section for memory * represented in the device tree as a node (i.e. memory@XXXX) for - * each lmb. + * each memblock. */ int hot_add_node_scn_to_nid(unsigned long scn_addr) { @@ -1154,8 +1154,8 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr) /* * Find the node associated with a hot added memory section. Section - * corresponds to a SPARSEMEM section, not an LMB. It is assumed that - * sections are fully contained within a single LMB. + * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that + * sections are fully contained within a single MEMBLOCK. */ int hot_add_scn_to_nid(unsigned long scn_addr) { diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 34347b2e7e3..a87ead0138b 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -198,7 +198,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, * mem_init() sets high_memory so only do the check after that. */ if (mem_init_done && (p < virt_to_phys(high_memory)) && - !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) { + !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) { printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", (unsigned long long)p, __builtin_return_address(0)); return NULL; @@ -331,7 +331,7 @@ void __init mapin_ram(void) s = mmu_mapin_ram(top); __mapin_ram_chunk(s, top); - top = lmb_end_of_DRAM(); + top = memblock_end_of_DRAM(); s = wii_mmu_mapin_mem2(top); __mapin_ram_chunk(s, top); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index d050fc8d971..21d6dfab794 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include @@ -67,7 +67,7 @@ static void *early_alloc_pgtable(unsigned long size) if (init_bootmem_done) pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); else - pt = __va(lmb_alloc_base(size, size, + pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); memset(pt, 0, size); diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index f11c2cdcb0f..f8a01829d64 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -223,7 +223,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(lmb_alloc_base(Hash_size, Hash_size, + Hash = __va(memblock_alloc_base(Hash_size, Hash_size, __initial_memory_limit_addr)); cacheable_memzero(Hash, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 687fddaa24c..446a01842a7 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -12,7 +12,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include @@ -252,7 +252,7 @@ void __init stabs_alloc(void) if (cpu == 0) continue; /* stab for CPU 0 is statically allocated */ - newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, + newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, 1< #include #include -#include +#include #include #include @@ -426,7 +426,7 @@ static void __early_init_mmu(int boot_cpu) /* Set the global containing the top of the linear mapping * for use by the TLB miss code */ - linear_map_top = lmb_end_of_DRAM(); + linear_map_top = memblock_end_of_DRAM(); /* A sync won't hurt us after mucking around with * the MMU configuration diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c index 534c2ecc89d..2ab338c9ac3 100644 --- a/arch/powerpc/platforms/85xx/corenet_ds.c +++ b/arch/powerpc/platforms/85xx/corenet_ds.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -100,7 +100,7 @@ void __init corenet_ds_setup_arch(void) #endif #ifdef CONFIG_SWIOTLB - if (lmb_end_of_DRAM() > max) { + if (memblock_end_of_DRAM() > max) { ppc_swiotlb_enable = 1; set_pci_dma_ops(&swiotlb_dma_ops); ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index 004b7d36cdb..f79f2f10214 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -94,7 +94,7 @@ static void __init mpc8536_ds_setup_arch(void) #endif #ifdef CONFIG_SWIOTLB - if (lmb_end_of_DRAM() > max) { + if (memblock_end_of_DRAM() > max) { ppc_swiotlb_enable = 1; set_pci_dma_ops(&swiotlb_dma_ops); ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index 544011a562f..8190bc25bf2 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -190,7 +190,7 @@ static void __init mpc85xx_ds_setup_arch(void) #endif #ifdef CONFIG_SWIOTLB - if (lmb_end_of_DRAM() > max) { + if (memblock_end_of_DRAM() > max) { ppc_swiotlb_enable = 1; set_pci_dma_ops(&swiotlb_dma_ops); ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 8fe87fc6148..494513682d7 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include @@ -325,7 +325,7 @@ static void __init mpc85xx_mds_setup_arch(void) #endif /* CONFIG_QUICC_ENGINE */ #ifdef CONFIG_SWIOTLB - if (lmb_end_of_DRAM() > max) { + if (memblock_end_of_DRAM() > max) { ppc_swiotlb_enable = 1; set_pci_dma_ops(&swiotlb_dma_ops); ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index 2aa69a69bcc..b11c3535f35 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -103,7 +103,7 @@ mpc86xx_hpcn_setup_arch(void) #endif #ifdef CONFIG_SWIOTLB - if (lmb_end_of_DRAM() > max) { + if (memblock_end_of_DRAM() > max) { ppc_swiotlb_enable = 1; set_pci_dma_ops(&swiotlb_dma_ops); ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 4326b737d91..3712900471b 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -845,10 +845,10 @@ static int __init cell_iommu_init_disabled(void) /* If we found a DMA window, we check if it's big enough to enclose * all of physical memory. If not, we force enable IOMMU */ - if (np && size < lmb_end_of_DRAM()) { + if (np && size < memblock_end_of_DRAM()) { printk(KERN_WARNING "iommu: force-enabled, dma window" " (%ldMB) smaller than total memory (%lldMB)\n", - size >> 20, lmb_end_of_DRAM() >> 20); + size >> 20, memblock_end_of_DRAM() >> 20); return -ENODEV; } @@ -1064,7 +1064,7 @@ static int __init cell_iommu_fixed_mapping_init(void) } fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT); - fsize = lmb_phys_mem_size(); + fsize = memblock_phys_mem_size(); if ((fbase + fsize) <= 0x800000000ul) hbase = 0; /* use the device tree window */ @@ -1169,7 +1169,7 @@ static int __init cell_iommu_init(void) * Note: should we make sure we have the IOMMU actually disabled ? */ if (iommu_is_off || - (!iommu_force_on && lmb_end_of_DRAM() <= 0x80000000ull)) + (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull)) if (cell_iommu_init_disabled() == 0) goto bail; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 174a04ac480..5cdcc7c8d97 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -65,7 +65,7 @@ static int __init page_aligned(unsigned long x) void __init wii_memory_fixups(void) { - struct lmb_property *p = lmb.memory.region; + struct memblock_property *p = memblock.memory.region; /* * This is part of a workaround to allow the use of two @@ -77,7 +77,7 @@ void __init wii_memory_fixups(void) * between both ranges. */ - BUG_ON(lmb.memory.cnt != 2); + BUG_ON(memblock.memory.cnt != 2); BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base)); p[0].size = _ALIGN_DOWN(p[0].size, PAGE_SIZE); @@ -92,11 +92,11 @@ void __init wii_memory_fixups(void) p[0].size += wii_hole_size + p[1].size; - lmb.memory.cnt = 1; - lmb_analyze(); + memblock.memory.cnt = 1; + memblock_analyze(); /* reserve the hole */ - lmb_reserve(wii_hole_start, wii_hole_size); + memblock_reserve(wii_hole_start, wii_hole_size); /* allow ioremapping the address space in the hole */ __allow_ioremap_reserved = 1; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 39df70529d2..3fff8d979b4 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 7b1d608ea3c..1f9fb2c5776 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -204,7 +204,7 @@ int __init iob_init(struct device_node *dn) pr_debug(" -> %s\n", __func__); /* Allocate a spare page to map all invalid IOTLB pages. */ - tmp = lmb_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE); + tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE); if (!tmp) panic("IOBMAP: Cannot allocate spare page!"); /* Empty l1 is marked invalid */ @@ -275,7 +275,7 @@ void __init alloc_iobmap_l2(void) return; #endif /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ - iob_l2_base = (u32 *)abs_to_virt(lmb_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); + iob_l2_base = (u32 *)abs_to_virt(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base); } diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index f1d0132ebcc..9deb274841f 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include @@ -619,7 +619,7 @@ static int __init pmac_probe(void) * driver needs that. We have to allocate it now. We allocate 4k * (1 small page) for now. */ - smu_cmdbuf_abs = lmb_alloc_base(4096, 4096, 0x80000000UL); + smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL); #endif /* CONFIG_PMAC_SMU */ return 1; diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 1e8a1e39dfe..2c0ed87f202 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 7925751e464..c2045880e67 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -318,8 +318,8 @@ static int __init ps3_mm_add_memory(void) return result; } - lmb_add(start_addr, map.r1.size); - lmb_analyze(); + memblock_add(start_addr, map.r1.size); + memblock_analyze(); result = online_pages(start_pfn, nr_pages); diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index dd521a181f2..5b759b66959 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -723,7 +723,7 @@ static void os_area_queue_work(void) * flash to a high address in the boot memory region and then puts that RAM * address and the byte count into the repository for retrieval by the guest. * We copy the data we want into a static variable and allow the memory setup - * by the HV to be claimed by the lmb manager. + * by the HV to be claimed by the memblock manager. * * The os area mirror will not be available to a second stage kernel, and * the header verify will fail. In this case, the saved_params values will diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 01e7b5bb3c1..deab5f94609 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -10,14 +10,14 @@ */ #include -#include +#include #include #include #include #include #include -static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size) +static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long start, start_pfn; struct zone *zone; @@ -26,7 +26,7 @@ static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size) start_pfn = base >> PAGE_SHIFT; if (!pfn_valid(start_pfn)) { - lmb_remove(base, lmb_size); + memblock_remove(base, memblock_size); return 0; } @@ -41,20 +41,20 @@ static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size) * to sysfs "state" file and we can't remove sysfs entries * while writing to it. So we have to defer it to here. */ - ret = __remove_pages(zone, start_pfn, lmb_size >> PAGE_SHIFT); + ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT); if (ret) return ret; /* * Update memory regions for memory remove */ - lmb_remove(base, lmb_size); + memblock_remove(base, memblock_size); /* * Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(base); - ret = remove_section_mapping(start, start + lmb_size); + ret = remove_section_mapping(start, start + memblock_size); /* Ensure all vmalloc mappings are flushed in case they also * hit that section of memory @@ -69,7 +69,7 @@ static int pseries_remove_memory(struct device_node *np) const char *type; const unsigned int *regs; unsigned long base; - unsigned int lmb_size; + unsigned int memblock_size; int ret = -EINVAL; /* @@ -80,16 +80,16 @@ static int pseries_remove_memory(struct device_node *np) return 0; /* - * Find the bae address and size of the lmb + * Find the bae address and size of the memblock */ regs = of_get_property(np, "reg", NULL); if (!regs) return ret; base = *(unsigned long *)regs; - lmb_size = regs[3]; + memblock_size = regs[3]; - ret = pseries_remove_lmb(base, lmb_size); + ret = pseries_remove_memblock(base, memblock_size); return ret; } @@ -98,7 +98,7 @@ static int pseries_add_memory(struct device_node *np) const char *type; const unsigned int *regs; unsigned long base; - unsigned int lmb_size; + unsigned int memblock_size; int ret = -EINVAL; /* @@ -109,43 +109,43 @@ static int pseries_add_memory(struct device_node *np) return 0; /* - * Find the base and size of the lmb + * Find the base and size of the memblock */ regs = of_get_property(np, "reg", NULL); if (!regs) return ret; base = *(unsigned long *)regs; - lmb_size = regs[3]; + memblock_size = regs[3]; /* * Update memory region to represent the memory add */ - ret = lmb_add(base, lmb_size); + ret = memblock_add(base, memblock_size); return (ret < 0) ? -EINVAL : 0; } static int pseries_drconf_memory(unsigned long *base, unsigned int action) { struct device_node *np; - const unsigned long *lmb_size; + const unsigned long *memblock_size; int rc; np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (!np) return -EINVAL; - lmb_size = of_get_property(np, "ibm,lmb-size", NULL); - if (!lmb_size) { + memblock_size = of_get_property(np, "ibm,memblock-size", NULL); + if (!memblock_size) { of_node_put(np); return -EINVAL; } if (action == PSERIES_DRCONF_MEM_ADD) { - rc = lmb_add(*base, *lmb_size); + rc = memblock_add(*base, *memblock_size); rc = (rc < 0) ? -EINVAL : 0; } else if (action == PSERIES_DRCONF_MEM_REMOVE) { - rc = pseries_remove_lmb(*base, *lmb_size); + rc = pseries_remove_memblock(*base, *memblock_size); } else { rc = -EINVAL; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index d26182d42cb..395848e30c5 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -66,7 +66,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, tcep = ((u64 *)tbl->it_base) + index; while (npages--) { - /* can't move this out since we might cross LMB boundary */ + /* can't move this out since we might cross MEMBLOCK boundary */ rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; diff --git a/arch/powerpc/platforms/pseries/phyp_dump.c b/arch/powerpc/platforms/pseries/phyp_dump.c index 7ebd9e88d36..6e7742da007 100644 --- a/arch/powerpc/platforms/pseries/phyp_dump.c +++ b/arch/powerpc/platforms/pseries/phyp_dump.c @@ -255,12 +255,12 @@ void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr) /* ------------------------------------------------- */ /** - * release_memory_range -- release memory previously lmb_reserved + * release_memory_range -- release memory previously memblock_reserved * @start_pfn: starting physical frame number * @nr_pages: number of pages to free. * * This routine will release memory that had been previously - * lmb_reserved in early boot. The released memory becomes + * memblock_reserved in early boot. The released memory becomes * available for genreal use. */ static void release_memory_range(unsigned long start_pfn, diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index c8b96ed7c01..559db2b846a 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -232,7 +232,7 @@ static int __init dart_init(struct device_node *dart_node) * that to work around what looks like a problem with the HT bridge * prefetching into invalid pages and corrupting data */ - tmp = lmb_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); + tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & DARTMAP_RPNMASK); @@ -407,7 +407,7 @@ void __init alloc_dart_table(void) if (iommu_is_off) return; - if (!iommu_force_on && lmb_end_of_DRAM() <= 0x40000000ull) + if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) return; /* 512 pages (2MB) is max DART tablesize. */ @@ -416,7 +416,7 @@ void __init alloc_dart_table(void) * will blow up an entire large page anyway in the kernel mapping */ dart_tablebase = (unsigned long) - abs_to_virt(lmb_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); + abs_to_virt(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index a14760fe513..356c6a0e1b2 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include @@ -190,7 +190,7 @@ static void __init setup_pci_atmu(struct pci_controller *hose, pr_info("%s: PCICSRBAR @ 0x%x\n", name, pcicsrbar); /* Setup inbound mem window */ - mem = lmb_end_of_DRAM(); + mem = memblock_end_of_DRAM(); sz = min(mem, paddr_lo); mem_log = __ilog2_u64(sz); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 573fca1fbd9..82868fee21f 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -10,7 +10,7 @@ config SUPERH select EMBEDDED select HAVE_CLK select HAVE_IDE if HAS_IOPORT - select HAVE_LMB + select HAVE_MEMBLOCK select HAVE_OPROFILE select HAVE_GENERIC_DMA_COHERENT select HAVE_ARCH_TRACEHOOK diff --git a/arch/sh/include/asm/lmb.h b/arch/sh/include/asm/lmb.h deleted file mode 100644 index 9b437f657ff..00000000000 --- a/arch/sh/include/asm/lmb.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_LMB_H -#define __ASM_SH_LMB_H - -#define LMB_REAL_LIMIT 0 - -#endif /* __ASM_SH_LMB_H */ diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h new file mode 100644 index 00000000000..dfe683b8807 --- /dev/null +++ b/arch/sh/include/asm/memblock.h @@ -0,0 +1,6 @@ +#ifndef __ASM_SH_MEMBLOCK_H +#define __ASM_SH_MEMBLOCK_H + +#define MEMBLOCK_REAL_LIMIT 0 + +#endif /* __ASM_SH_MEMBLOCK_H */ diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index 5a559e666eb..e2a3af31ff9 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -157,10 +157,10 @@ void __init reserve_crashkernel(void) unsigned long long crash_size, crash_base; int ret; - /* this is necessary because of lmb_phys_mem_size() */ - lmb_analyze(); + /* this is necessary because of memblock_phys_mem_size() */ + memblock_analyze(); - ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; @@ -172,14 +172,14 @@ void __init reserve_crashkernel(void) crash_size = PAGE_ALIGN(crashk_res.end - crashk_res.start + 1); if (!crashk_res.start) { - unsigned long max = lmb_end_of_DRAM() - memory_limit; - crashk_res.start = __lmb_alloc_base(crash_size, PAGE_SIZE, max); + unsigned long max = memblock_end_of_DRAM() - memory_limit; + crashk_res.start = __memblock_alloc_base(crash_size, PAGE_SIZE, max); if (!crashk_res.start) { pr_err("crashkernel allocation failed\n"); goto disable; } } else { - ret = lmb_reserve(crashk_res.start, crash_size); + ret = memblock_reserve(crashk_res.start, crash_size); if (unlikely(ret < 0)) { pr_err("crashkernel reservation failed - " "memory is in use\n"); @@ -192,7 +192,7 @@ void __init reserve_crashkernel(void) /* * Crash kernel trumps memory limit */ - if ((lmb_end_of_DRAM() - memory_limit) <= crashk_res.end) { + if ((memblock_end_of_DRAM() - memory_limit) <= crashk_res.end) { memory_limit = 0; pr_info("Disabled memory limit for crashkernel\n"); } @@ -201,7 +201,7 @@ void __init reserve_crashkernel(void) "for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), (unsigned long)(crashk_res.start), - (unsigned long)(lmb_phys_mem_size() >> 20)); + (unsigned long)(memblock_phys_mem_size() >> 20)); return; diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 272734681d2..e769401a78b 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include @@ -141,10 +141,10 @@ void __init check_for_initrd(void) goto disable; } - if (unlikely(end > lmb_end_of_DRAM())) { + if (unlikely(end > memblock_end_of_DRAM())) { pr_err("initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - end, (unsigned long)lmb_end_of_DRAM()); + end, (unsigned long)memblock_end_of_DRAM()); goto disable; } @@ -161,7 +161,7 @@ void __init check_for_initrd(void) initrd_start = (unsigned long)__va(__pa(start)); initrd_end = initrd_start + INITRD_SIZE; - lmb_reserve(__pa(initrd_start), INITRD_SIZE); + memblock_reserve(__pa(initrd_start), INITRD_SIZE); return; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 46f84de6246..d0e249100e9 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -33,7 +33,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD]; void __init generic_mem_init(void) { - lmb_add(__MEMORY_START, __MEMORY_SIZE); + memblock_add(__MEMORY_START, __MEMORY_SIZE); } void __init __weak plat_mem_setup(void) @@ -176,12 +176,12 @@ void __init allocate_pgdat(unsigned int nid) get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); #ifdef CONFIG_NEED_MULTIPLE_NODES - phys = __lmb_alloc_base(sizeof(struct pglist_data), + phys = __memblock_alloc_base(sizeof(struct pglist_data), SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT); /* Retry with all of system memory */ if (!phys) - phys = __lmb_alloc_base(sizeof(struct pglist_data), - SMP_CACHE_BYTES, lmb_end_of_DRAM()); + phys = __memblock_alloc_base(sizeof(struct pglist_data), + SMP_CACHE_BYTES, memblock_end_of_DRAM()); if (!phys) panic("Can't allocate pgdat for node %d\n", nid); @@ -212,7 +212,7 @@ static void __init bootmem_init_one_node(unsigned int nid) total_pages = bootmem_bootmap_pages(p->node_spanned_pages); - paddr = lmb_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE); + paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE); if (!paddr) panic("Can't allocate bootmap for nid[%d]\n", nid); @@ -227,9 +227,9 @@ static void __init bootmem_init_one_node(unsigned int nid) */ if (nid == 0) { /* Reserve the sections we're already using. */ - for (i = 0; i < lmb.reserved.cnt; i++) - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i), + for (i = 0; i < memblock.reserved.cnt; i++) + reserve_bootmem(memblock.reserved.region[i].base, + memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); } @@ -241,10 +241,10 @@ static void __init do_init_bootmem(void) int i; /* Add active regions with valid PFNs. */ - for (i = 0; i < lmb.memory.cnt; i++) { + for (i = 0; i < memblock.memory.cnt; i++) { unsigned long start_pfn, end_pfn; - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); __add_active_range(0, start_pfn, end_pfn); } @@ -276,7 +276,7 @@ static void __init early_reserve_mem(void) * this catches the (definitely buggy) case of us accidentally * initializing the bootmem allocator with an invalid RAM area. */ - lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET, + memblock_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET)); @@ -284,7 +284,7 @@ static void __init early_reserve_mem(void) * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET. */ if (CONFIG_ZERO_PAGE_OFFSET != 0) - lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET); + memblock_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET); /* * Handle additional early reservations @@ -299,27 +299,27 @@ void __init paging_init(void) unsigned long vaddr, end; int nid; - lmb_init(); + memblock_init(); sh_mv.mv_mem_init(); early_reserve_mem(); - lmb_enforce_memory_limit(memory_limit); - lmb_analyze(); + memblock_enforce_memory_limit(memory_limit); + memblock_analyze(); - lmb_dump_all(); + memblock_dump_all(); /* * Determine low and high memory ranges: */ - max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; min_low_pfn = __MEMORY_START >> PAGE_SHIFT; nodes_clear(node_online_map); memory_start = (unsigned long)__va(__MEMORY_START); - memory_end = memory_start + (memory_limit ?: lmb_phys_mem_size()); + memory_end = memory_start + (memory_limit ?: memblock_phys_mem_size()); uncached_init(); pmb_init(); diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c index a2e645f64a3..3d85225b9e9 100644 --- a/arch/sh/mm/numa.c +++ b/arch/sh/mm/numa.c @@ -9,7 +9,7 @@ */ #include #include -#include +#include #include #include #include @@ -39,12 +39,12 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) pmb_bolt_mapping((unsigned long)__va(start), start, end - start, PAGE_KERNEL); - lmb_add(start, end - start); + memblock_add(start, end - start); __add_active_range(nid, start_pfn, end_pfn); /* Node-local pgdat */ - NODE_DATA(nid) = __va(lmb_alloc_base(sizeof(struct pglist_data), + NODE_DATA(nid) = __va(memblock_alloc_base(sizeof(struct pglist_data), SMP_CACHE_BYTES, end)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); @@ -54,7 +54,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) /* Node-local bootmap */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_paddr = lmb_alloc_base(bootmap_pages << PAGE_SHIFT, + bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT, PAGE_SIZE, end); init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, start_pfn, end_pfn); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 6f1470baa31..c0015db247b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -42,7 +42,7 @@ config SPARC64 select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KRETPROBES select HAVE_KPROBES - select HAVE_LMB + select HAVE_MEMBLOCK select HAVE_SYSCALL_WRAPPERS select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/sparc/include/asm/lmb.h b/arch/sparc/include/asm/lmb.h deleted file mode 100644 index 6a352cbcf52..00000000000 --- a/arch/sparc/include/asm/lmb.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _SPARC64_LMB_H -#define _SPARC64_LMB_H - -#include - -#define LMB_DBG(fmt...) prom_printf(fmt) - -#define LMB_REAL_LIMIT 0 - -#endif /* !(_SPARC64_LMB_H) */ diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h new file mode 100644 index 00000000000..f12af880649 --- /dev/null +++ b/arch/sparc/include/asm/memblock.h @@ -0,0 +1,10 @@ +#ifndef _SPARC64_MEMBLOCK_H +#define _SPARC64_MEMBLOCK_H + +#include + +#define MEMBLOCK_DBG(fmt...) prom_printf(fmt) + +#define MEMBLOCK_REAL_LIMIT 0 + +#endif /* !(_SPARC64_MEMBLOCK_H) */ diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index cdc91d919e9..83e85c2e802 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -4,7 +4,7 @@ */ #include #include -#include +#include #include #include #include @@ -86,7 +86,7 @@ static void mdesc_handle_init(struct mdesc_handle *hp, hp->handle_size = handle_size; } -static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size) +static struct mdesc_handle * __init mdesc_memblock_alloc(unsigned int mdesc_size) { unsigned int handle_size, alloc_size; struct mdesc_handle *hp; @@ -97,7 +97,7 @@ static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size) mdesc_size); alloc_size = PAGE_ALIGN(handle_size); - paddr = lmb_alloc(alloc_size, PAGE_SIZE); + paddr = memblock_alloc(alloc_size, PAGE_SIZE); hp = NULL; if (paddr) { @@ -107,7 +107,7 @@ static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size) return hp; } -static void mdesc_lmb_free(struct mdesc_handle *hp) +static void mdesc_memblock_free(struct mdesc_handle *hp) { unsigned int alloc_size; unsigned long start; @@ -120,9 +120,9 @@ static void mdesc_lmb_free(struct mdesc_handle *hp) free_bootmem_late(start, alloc_size); } -static struct mdesc_mem_ops lmb_mdesc_ops = { - .alloc = mdesc_lmb_alloc, - .free = mdesc_lmb_free, +static struct mdesc_mem_ops memblock_mdesc_ops = { + .alloc = mdesc_memblock_alloc, + .free = mdesc_memblock_free, }; static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) @@ -914,7 +914,7 @@ void __init sun4v_mdesc_init(void) printk("MDESC: Size is %lu bytes.\n", len); - hp = mdesc_alloc(len, &lmb_mdesc_ops); + hp = mdesc_alloc(len, &memblock_mdesc_ops); if (hp == NULL) { prom_printf("MDESC: alloc of %lu bytes failed.\n", len); prom_halt(); diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index fb06ac2bd38..466a32763ea 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -34,7 +34,7 @@ void * __init prom_early_alloc(unsigned long size) { - unsigned long paddr = lmb_alloc(size, SMP_CACHE_BYTES); + unsigned long paddr = memblock_alloc(size, SMP_CACHE_BYTES); void *ret; if (!paddr) { diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index b2831dc3c12..f0434513df1 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include @@ -726,7 +726,7 @@ static void __init find_ramdisk(unsigned long phys_base) initrd_start = ramdisk_image; initrd_end = ramdisk_image + sparc_ramdisk_size; - lmb_reserve(initrd_start, sparc_ramdisk_size); + memblock_reserve(initrd_start, sparc_ramdisk_size); initrd_start += PAGE_OFFSET; initrd_end += PAGE_OFFSET; @@ -822,7 +822,7 @@ static void __init allocate_node_data(int nid) struct pglist_data *p; #ifdef CONFIG_NEED_MULTIPLE_NODES - paddr = lmb_alloc_nid(sizeof(struct pglist_data), + paddr = memblock_alloc_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid, nid_range); if (!paddr) { prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); @@ -843,7 +843,7 @@ static void __init allocate_node_data(int nid) if (p->node_spanned_pages) { num_pages = bootmem_bootmap_pages(p->node_spanned_pages); - paddr = lmb_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid, + paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid, nid_range); if (!paddr) { prom_printf("Cannot allocate bootmap for nid[%d]\n", @@ -974,11 +974,11 @@ static void __init add_node_ranges(void) { int i; - for (i = 0; i < lmb.memory.cnt; i++) { - unsigned long size = lmb_size_bytes(&lmb.memory, i); + for (i = 0; i < memblock.memory.cnt; i++) { + unsigned long size = memblock_size_bytes(&memblock.memory, i); unsigned long start, end; - start = lmb.memory.region[i].base; + start = memblock.memory.region[i].base; end = start + size; while (start < end) { unsigned long this_end; @@ -1010,7 +1010,7 @@ static int __init grab_mlgroups(struct mdesc_handle *md) if (!count) return -ENOENT; - paddr = lmb_alloc(count * sizeof(struct mdesc_mlgroup), + paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup), SMP_CACHE_BYTES); if (!paddr) return -ENOMEM; @@ -1051,7 +1051,7 @@ static int __init grab_mblocks(struct mdesc_handle *md) if (!count) return -ENOENT; - paddr = lmb_alloc(count * sizeof(struct mdesc_mblock), + paddr = memblock_alloc(count * sizeof(struct mdesc_mblock), SMP_CACHE_BYTES); if (!paddr) return -ENOMEM; @@ -1279,8 +1279,8 @@ static int bootmem_init_numa(void) static void __init bootmem_init_nonnuma(void) { - unsigned long top_of_ram = lmb_end_of_DRAM(); - unsigned long total_ram = lmb_phys_mem_size(); + unsigned long top_of_ram = memblock_end_of_DRAM(); + unsigned long total_ram = memblock_phys_mem_size(); unsigned int i; numadbg("bootmem_init_nonnuma()\n"); @@ -1292,15 +1292,15 @@ static void __init bootmem_init_nonnuma(void) init_node_masks_nonnuma(); - for (i = 0; i < lmb.memory.cnt; i++) { - unsigned long size = lmb_size_bytes(&lmb.memory, i); + for (i = 0; i < memblock.memory.cnt; i++) { + unsigned long size = memblock_size_bytes(&memblock.memory, i); unsigned long start_pfn, end_pfn; if (!size) continue; - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); add_active_range(0, start_pfn, end_pfn); } @@ -1338,9 +1338,9 @@ static void __init trim_reserved_in_node(int nid) numadbg(" trim_reserved_in_node(%d)\n", nid); - for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long start = lmb.reserved.region[i].base; - unsigned long size = lmb_size_bytes(&lmb.reserved, i); + for (i = 0; i < memblock.reserved.cnt; i++) { + unsigned long start = memblock.reserved.region[i].base; + unsigned long size = memblock_size_bytes(&memblock.reserved, i); unsigned long end = start + size; reserve_range_in_node(nid, start, end); @@ -1384,7 +1384,7 @@ static unsigned long __init bootmem_init(unsigned long phys_base) unsigned long end_pfn; int nid; - end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + end_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn = end_pfn; min_low_pfn = (phys_base >> PAGE_SHIFT); @@ -1734,7 +1734,7 @@ void __init paging_init(void) sun4v_ktsb_init(); } - lmb_init(); + memblock_init(); /* Find available physical memory... * @@ -1752,17 +1752,17 @@ void __init paging_init(void) phys_base = 0xffffffffffffffffUL; for (i = 0; i < pavail_ents; i++) { phys_base = min(phys_base, pavail[i].phys_addr); - lmb_add(pavail[i].phys_addr, pavail[i].reg_size); + memblock_add(pavail[i].phys_addr, pavail[i].reg_size); } - lmb_reserve(kern_base, kern_size); + memblock_reserve(kern_base, kern_size); find_ramdisk(phys_base); - lmb_enforce_memory_limit(cmdline_memory_size); + memblock_enforce_memory_limit(cmdline_memory_size); - lmb_analyze(); - lmb_dump_all(); + memblock_analyze(); + memblock_dump_all(); set_bit(0, mmu_context_bmap); @@ -1816,8 +1816,8 @@ void __init paging_init(void) */ for_each_possible_cpu(i) { /* XXX Use node local allocations... XXX */ - softirq_stack[i] = __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); - hardirq_stack[i] = __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + softirq_stack[i] = __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); + hardirq_stack[i] = __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } /* Setup bootmem... */ diff --git a/include/linux/lmb.h b/include/linux/lmb.h deleted file mode 100644 index f3d14333ebe..00000000000 --- a/include/linux/lmb.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef _LINUX_LMB_H -#define _LINUX_LMB_H -#ifdef __KERNEL__ - -/* - * Logical memory blocks. - * - * Copyright (C) 2001 Peter Bergner, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include - -#define MAX_LMB_REGIONS 128 - -struct lmb_property { - u64 base; - u64 size; -}; - -struct lmb_region { - unsigned long cnt; - u64 size; - struct lmb_property region[MAX_LMB_REGIONS+1]; -}; - -struct lmb { - unsigned long debug; - u64 rmo_size; - struct lmb_region memory; - struct lmb_region reserved; -}; - -extern struct lmb lmb; - -extern void __init lmb_init(void); -extern void __init lmb_analyze(void); -extern long lmb_add(u64 base, u64 size); -extern long lmb_remove(u64 base, u64 size); -extern long __init lmb_free(u64 base, u64 size); -extern long __init lmb_reserve(u64 base, u64 size); -extern u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64, u64, int *)); -extern u64 __init lmb_alloc(u64 size, u64 align); -extern u64 __init lmb_alloc_base(u64 size, - u64, u64 max_addr); -extern u64 __init __lmb_alloc_base(u64 size, - u64 align, u64 max_addr); -extern u64 __init lmb_phys_mem_size(void); -extern u64 lmb_end_of_DRAM(void); -extern void __init lmb_enforce_memory_limit(u64 memory_limit); -extern int __init lmb_is_reserved(u64 addr); -extern int lmb_is_region_reserved(u64 base, u64 size); -extern int lmb_find(struct lmb_property *res); - -extern void lmb_dump_all(void); - -static inline u64 -lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) -{ - return type->region[region_nr].size; -} -static inline u64 -lmb_size_pages(struct lmb_region *type, unsigned long region_nr) -{ - return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; -} -static inline u64 -lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) -{ - return type->region[region_nr].base >> PAGE_SHIFT; -} -static inline u64 -lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) -{ - return lmb_start_pfn(type, region_nr) + - lmb_size_pages(type, region_nr); -} - -#include - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_LMB_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h new file mode 100644 index 00000000000..a59faf2b5ed --- /dev/null +++ b/include/linux/memblock.h @@ -0,0 +1,89 @@ +#ifndef _LINUX_MEMBLOCK_H +#define _LINUX_MEMBLOCK_H +#ifdef __KERNEL__ + +/* + * Logical memory blocks. + * + * Copyright (C) 2001 Peter Bergner, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#define MAX_MEMBLOCK_REGIONS 128 + +struct memblock_property { + u64 base; + u64 size; +}; + +struct memblock_region { + unsigned long cnt; + u64 size; + struct memblock_property region[MAX_MEMBLOCK_REGIONS+1]; +}; + +struct memblock { + unsigned long debug; + u64 rmo_size; + struct memblock_region memory; + struct memblock_region reserved; +}; + +extern struct memblock memblock; + +extern void __init memblock_init(void); +extern void __init memblock_analyze(void); +extern long memblock_add(u64 base, u64 size); +extern long memblock_remove(u64 base, u64 size); +extern long __init memblock_free(u64 base, u64 size); +extern long __init memblock_reserve(u64 base, u64 size); +extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, + u64 (*nid_range)(u64, u64, int *)); +extern u64 __init memblock_alloc(u64 size, u64 align); +extern u64 __init memblock_alloc_base(u64 size, + u64, u64 max_addr); +extern u64 __init __memblock_alloc_base(u64 size, + u64 align, u64 max_addr); +extern u64 __init memblock_phys_mem_size(void); +extern u64 memblock_end_of_DRAM(void); +extern void __init memblock_enforce_memory_limit(u64 memory_limit); +extern int __init memblock_is_reserved(u64 addr); +extern int memblock_is_region_reserved(u64 base, u64 size); +extern int memblock_find(struct memblock_property *res); + +extern void memblock_dump_all(void); + +static inline u64 +memblock_size_bytes(struct memblock_region *type, unsigned long region_nr) +{ + return type->region[region_nr].size; +} +static inline u64 +memblock_size_pages(struct memblock_region *type, unsigned long region_nr) +{ + return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; +} +static inline u64 +memblock_start_pfn(struct memblock_region *type, unsigned long region_nr) +{ + return type->region[region_nr].base >> PAGE_SHIFT; +} +static inline u64 +memblock_end_pfn(struct memblock_region *type, unsigned long region_nr) +{ + return memblock_start_pfn(type, region_nr) + + memblock_size_pages(type, region_nr); +} + +#include + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_MEMBLOCK_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 170d8ca901d..5b916bc0fba 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -181,9 +181,6 @@ config HAS_DMA config CHECK_SIGNATURE bool -config HAVE_LMB - boolean - config CPUMASK_OFFSTACK bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS help diff --git a/lib/Makefile b/lib/Makefile index 3f1062cbbff..0bfabba1bb3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -89,8 +89,6 @@ obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o -obj-$(CONFIG_HAVE_LMB) += lmb.o - obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o diff --git a/lib/lmb.c b/lib/lmb.c deleted file mode 100644 index b1fc5260652..00000000000 --- a/lib/lmb.c +++ /dev/null @@ -1,541 +0,0 @@ -/* - * Procedures for maintaining information about logical memory blocks. - * - * Peter Bergner, IBM Corp. June 2001. - * Copyright (C) 2001 Peter Bergner. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include - -#define LMB_ALLOC_ANYWHERE 0 - -struct lmb lmb; - -static int lmb_debug; - -static int __init early_lmb(char *p) -{ - if (p && strstr(p, "debug")) - lmb_debug = 1; - return 0; -} -early_param("lmb", early_lmb); - -static void lmb_dump(struct lmb_region *region, char *name) -{ - unsigned long long base, size; - int i; - - pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); - - for (i = 0; i < region->cnt; i++) { - base = region->region[i].base; - size = region->region[i].size; - - pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n", - name, i, base, base + size - 1, size); - } -} - -void lmb_dump_all(void) -{ - if (!lmb_debug) - return; - - pr_info("LMB configuration:\n"); - pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size); - pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size); - - lmb_dump(&lmb.memory, "memory"); - lmb_dump(&lmb.reserved, "reserved"); -} - -static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2, - u64 size2) -{ - return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); -} - -static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) -{ - if (base2 == base1 + size1) - return 1; - else if (base1 == base2 + size2) - return -1; - - return 0; -} - -static long lmb_regions_adjacent(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) -{ - u64 base1 = rgn->region[r1].base; - u64 size1 = rgn->region[r1].size; - u64 base2 = rgn->region[r2].base; - u64 size2 = rgn->region[r2].size; - - return lmb_addrs_adjacent(base1, size1, base2, size2); -} - -static void lmb_remove_region(struct lmb_region *rgn, unsigned long r) -{ - unsigned long i; - - for (i = r; i < rgn->cnt - 1; i++) { - rgn->region[i].base = rgn->region[i + 1].base; - rgn->region[i].size = rgn->region[i + 1].size; - } - rgn->cnt--; -} - -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void lmb_coalesce_regions(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) -{ - rgn->region[r1].size += rgn->region[r2].size; - lmb_remove_region(rgn, r2); -} - -void __init lmb_init(void) -{ - /* Create a dummy zero size LMB which will get coalesced away later. - * This simplifies the lmb_add() code below... - */ - lmb.memory.region[0].base = 0; - lmb.memory.region[0].size = 0; - lmb.memory.cnt = 1; - - /* Ditto. */ - lmb.reserved.region[0].base = 0; - lmb.reserved.region[0].size = 0; - lmb.reserved.cnt = 1; -} - -void __init lmb_analyze(void) -{ - int i; - - lmb.memory.size = 0; - - for (i = 0; i < lmb.memory.cnt; i++) - lmb.memory.size += lmb.memory.region[i].size; -} - -static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) -{ - unsigned long coalesced = 0; - long adjacent, i; - - if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) { - rgn->region[0].base = base; - rgn->region[0].size = size; - return 0; - } - - /* First try and coalesce this LMB with another. */ - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; - - if ((rgnbase == base) && (rgnsize == size)) - /* Already have this region, so we're done */ - return 0; - - adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize); - if (adjacent > 0) { - rgn->region[i].base -= size; - rgn->region[i].size += size; - coalesced++; - break; - } else if (adjacent < 0) { - rgn->region[i].size += size; - coalesced++; - break; - } - } - - if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) { - lmb_coalesce_regions(rgn, i, i+1); - coalesced++; - } - - if (coalesced) - return coalesced; - if (rgn->cnt >= MAX_LMB_REGIONS) - return -1; - - /* Couldn't coalesce the LMB, so add it to the sorted table. */ - for (i = rgn->cnt - 1; i >= 0; i--) { - if (base < rgn->region[i].base) { - rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].size = rgn->region[i].size; - } else { - rgn->region[i+1].base = base; - rgn->region[i+1].size = size; - break; - } - } - - if (base < rgn->region[0].base) { - rgn->region[0].base = base; - rgn->region[0].size = size; - } - rgn->cnt++; - - return 0; -} - -long lmb_add(u64 base, u64 size) -{ - struct lmb_region *_rgn = &lmb.memory; - - /* On pSeries LPAR systems, the first LMB is our RMO region. */ - if (base == 0) - lmb.rmo_size = size; - - return lmb_add_region(_rgn, base, size); - -} - -static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size) -{ - u64 rgnbegin, rgnend; - u64 end = base + size; - int i; - - rgnbegin = rgnend = 0; /* supress gcc warnings */ - - /* Find the region where (base, size) belongs to */ - for (i=0; i < rgn->cnt; i++) { - rgnbegin = rgn->region[i].base; - rgnend = rgnbegin + rgn->region[i].size; - - if ((rgnbegin <= base) && (end <= rgnend)) - break; - } - - /* Didn't find the region */ - if (i == rgn->cnt) - return -1; - - /* Check to see if we are removing entire region */ - if ((rgnbegin == base) && (rgnend == end)) { - lmb_remove_region(rgn, i); - return 0; - } - - /* Check to see if region is matching at the front */ - if (rgnbegin == base) { - rgn->region[i].base = end; - rgn->region[i].size -= size; - return 0; - } - - /* Check to see if the region is matching at the end */ - if (rgnend == end) { - rgn->region[i].size -= size; - return 0; - } - - /* - * We need to split the entry - adjust the current one to the - * beginging of the hole and add the region after hole. - */ - rgn->region[i].size = base - rgn->region[i].base; - return lmb_add_region(rgn, end, rgnend - end); -} - -long lmb_remove(u64 base, u64 size) -{ - return __lmb_remove(&lmb.memory, base, size); -} - -long __init lmb_free(u64 base, u64 size) -{ - return __lmb_remove(&lmb.reserved, base, size); -} - -long __init lmb_reserve(u64 base, u64 size) -{ - struct lmb_region *_rgn = &lmb.reserved; - - BUG_ON(0 == size); - - return lmb_add_region(_rgn, base, size); -} - -long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size) -{ - unsigned long i; - - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; - if (lmb_addrs_overlap(base, size, rgnbase, rgnsize)) - break; - } - - return (i < rgn->cnt) ? i : -1; -} - -static u64 lmb_align_down(u64 addr, u64 size) -{ - return addr & ~(size - 1); -} - -static u64 lmb_align_up(u64 addr, u64 size) -{ - return (addr + (size - 1)) & ~(size - 1); -} - -static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end, - u64 size, u64 align) -{ - u64 base, res_base; - long j; - - base = lmb_align_down((end - size), align); - while (start <= base) { - j = lmb_overlaps_region(&lmb.reserved, base, size); - if (j < 0) { - /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, size) < 0) - base = ~(u64)0; - return base; - } - res_base = lmb.reserved.region[j].base; - if (res_base < size) - break; - base = lmb_align_down(res_base - size, align); - } - - return ~(u64)0; -} - -static u64 __init lmb_alloc_nid_region(struct lmb_property *mp, - u64 (*nid_range)(u64, u64, int *), - u64 size, u64 align, int nid) -{ - u64 start, end; - - start = mp->base; - end = start + mp->size; - - start = lmb_align_up(start, align); - while (start < end) { - u64 this_end; - int this_nid; - - this_end = nid_range(start, end, &this_nid); - if (this_nid == nid) { - u64 ret = lmb_alloc_nid_unreserved(start, this_end, - size, align); - if (ret != ~(u64)0) - return ret; - } - start = this_end; - } - - return ~(u64)0; -} - -u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64 start, u64 end, int *nid)) -{ - struct lmb_region *mem = &lmb.memory; - int i; - - BUG_ON(0 == size); - - size = lmb_align_up(size, align); - - for (i = 0; i < mem->cnt; i++) { - u64 ret = lmb_alloc_nid_region(&mem->region[i], - nid_range, - size, align, nid); - if (ret != ~(u64)0) - return ret; - } - - return lmb_alloc(size, align); -} - -u64 __init lmb_alloc(u64 size, u64 align) -{ - return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); -} - -u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr) -{ - u64 alloc; - - alloc = __lmb_alloc_base(size, align, max_addr); - - if (alloc == 0) - panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", - (unsigned long long) size, (unsigned long long) max_addr); - - return alloc; -} - -u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr) -{ - long i, j; - u64 base = 0; - u64 res_base; - - BUG_ON(0 == size); - - size = lmb_align_up(size, align); - - /* On some platforms, make sure we allocate lowmem */ - /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */ - if (max_addr == LMB_ALLOC_ANYWHERE) - max_addr = LMB_REAL_LIMIT; - - for (i = lmb.memory.cnt - 1; i >= 0; i--) { - u64 lmbbase = lmb.memory.region[i].base; - u64 lmbsize = lmb.memory.region[i].size; - - if (lmbsize < size) - continue; - if (max_addr == LMB_ALLOC_ANYWHERE) - base = lmb_align_down(lmbbase + lmbsize - size, align); - else if (lmbbase < max_addr) { - base = min(lmbbase + lmbsize, max_addr); - base = lmb_align_down(base - size, align); - } else - continue; - - while (base && lmbbase <= base) { - j = lmb_overlaps_region(&lmb.reserved, base, size); - if (j < 0) { - /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, size) < 0) - return 0; - return base; - } - res_base = lmb.reserved.region[j].base; - if (res_base < size) - break; - base = lmb_align_down(res_base - size, align); - } - } - return 0; -} - -/* You must call lmb_analyze() before this. */ -u64 __init lmb_phys_mem_size(void) -{ - return lmb.memory.size; -} - -u64 lmb_end_of_DRAM(void) -{ - int idx = lmb.memory.cnt - 1; - - return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); -} - -/* You must call lmb_analyze() after this. */ -void __init lmb_enforce_memory_limit(u64 memory_limit) -{ - unsigned long i; - u64 limit; - struct lmb_property *p; - - if (!memory_limit) - return; - - /* Truncate the lmb regions to satisfy the memory limit. */ - limit = memory_limit; - for (i = 0; i < lmb.memory.cnt; i++) { - if (limit > lmb.memory.region[i].size) { - limit -= lmb.memory.region[i].size; - continue; - } - - lmb.memory.region[i].size = limit; - lmb.memory.cnt = i + 1; - break; - } - - if (lmb.memory.region[0].size < lmb.rmo_size) - lmb.rmo_size = lmb.memory.region[0].size; - - memory_limit = lmb_end_of_DRAM(); - - /* And truncate any reserves above the limit also. */ - for (i = 0; i < lmb.reserved.cnt; i++) { - p = &lmb.reserved.region[i]; - - if (p->base > memory_limit) - p->size = 0; - else if ((p->base + p->size) > memory_limit) - p->size = memory_limit - p->base; - - if (p->size == 0) { - lmb_remove_region(&lmb.reserved, i); - i--; - } - } -} - -int __init lmb_is_reserved(u64 addr) -{ - int i; - - for (i = 0; i < lmb.reserved.cnt; i++) { - u64 upper = lmb.reserved.region[i].base + - lmb.reserved.region[i].size - 1; - if ((addr >= lmb.reserved.region[i].base) && (addr <= upper)) - return 1; - } - return 0; -} - -int lmb_is_region_reserved(u64 base, u64 size) -{ - return lmb_overlaps_region(&lmb.reserved, base, size); -} - -/* - * Given a , find which memory regions belong to this range. - * Adjust the request and return a contiguous chunk. - */ -int lmb_find(struct lmb_property *res) -{ - int i; - u64 rstart, rend; - - rstart = res->base; - rend = rstart + res->size - 1; - - for (i = 0; i < lmb.memory.cnt; i++) { - u64 start = lmb.memory.region[i].base; - u64 end = start + lmb.memory.region[i].size - 1; - - if (start > rend) - return -1; - - if ((end >= rstart) && (start < rend)) { - /* adjust the request */ - if (rstart < start) - rstart = start; - if (rend > end) - rend = end; - res->base = rstart; - res->size = rend - rstart + 1; - return 0; - } - } - return -1; -} diff --git a/mm/Kconfig b/mm/Kconfig index 527136b2238..f4e516e9c37 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -128,6 +128,9 @@ config SPARSEMEM_VMEMMAP pfn_to_page and page_to_pfn operations. This is the most efficient option when sufficient kernel resources are available. +config HAVE_MEMBLOCK + boolean + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" diff --git a/mm/Makefile b/mm/Makefile index 8982504bd03..34b2546a9e3 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -15,6 +15,8 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ $(mmu-y) obj-y += init-mm.o +obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o + obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HAS_DMA) += dmapool.o diff --git a/mm/memblock.c b/mm/memblock.c new file mode 100644 index 00000000000..3024eb30fc2 --- /dev/null +++ b/mm/memblock.c @@ -0,0 +1,541 @@ +/* + * Procedures for maintaining information about logical memory blocks. + * + * Peter Bergner, IBM Corp. June 2001. + * Copyright (C) 2001 Peter Bergner. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#define MEMBLOCK_ALLOC_ANYWHERE 0 + +struct memblock memblock; + +static int memblock_debug; + +static int __init early_memblock(char *p) +{ + if (p && strstr(p, "debug")) + memblock_debug = 1; + return 0; +} +early_param("memblock", early_memblock); + +static void memblock_dump(struct memblock_region *region, char *name) +{ + unsigned long long base, size; + int i; + + pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); + + for (i = 0; i < region->cnt; i++) { + base = region->region[i].base; + size = region->region[i].size; + + pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n", + name, i, base, base + size - 1, size); + } +} + +void memblock_dump_all(void) +{ + if (!memblock_debug) + return; + + pr_info("MEMBLOCK configuration:\n"); + pr_info(" rmo_size = 0x%llx\n", (unsigned long long)memblock.rmo_size); + pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size); + + memblock_dump(&memblock.memory, "memory"); + memblock_dump(&memblock.reserved, "reserved"); +} + +static unsigned long memblock_addrs_overlap(u64 base1, u64 size1, u64 base2, + u64 size2) +{ + return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); +} + +static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) +{ + if (base2 == base1 + size1) + return 1; + else if (base1 == base2 + size2) + return -1; + + return 0; +} + +static long memblock_regions_adjacent(struct memblock_region *rgn, + unsigned long r1, unsigned long r2) +{ + u64 base1 = rgn->region[r1].base; + u64 size1 = rgn->region[r1].size; + u64 base2 = rgn->region[r2].base; + u64 size2 = rgn->region[r2].size; + + return memblock_addrs_adjacent(base1, size1, base2, size2); +} + +static void memblock_remove_region(struct memblock_region *rgn, unsigned long r) +{ + unsigned long i; + + for (i = r; i < rgn->cnt - 1; i++) { + rgn->region[i].base = rgn->region[i + 1].base; + rgn->region[i].size = rgn->region[i + 1].size; + } + rgn->cnt--; +} + +/* Assumption: base addr of region 1 < base addr of region 2 */ +static void memblock_coalesce_regions(struct memblock_region *rgn, + unsigned long r1, unsigned long r2) +{ + rgn->region[r1].size += rgn->region[r2].size; + memblock_remove_region(rgn, r2); +} + +void __init memblock_init(void) +{ + /* Create a dummy zero size MEMBLOCK which will get coalesced away later. + * This simplifies the memblock_add() code below... + */ + memblock.memory.region[0].base = 0; + memblock.memory.region[0].size = 0; + memblock.memory.cnt = 1; + + /* Ditto. */ + memblock.reserved.region[0].base = 0; + memblock.reserved.region[0].size = 0; + memblock.reserved.cnt = 1; +} + +void __init memblock_analyze(void) +{ + int i; + + memblock.memory.size = 0; + + for (i = 0; i < memblock.memory.cnt; i++) + memblock.memory.size += memblock.memory.region[i].size; +} + +static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size) +{ + unsigned long coalesced = 0; + long adjacent, i; + + if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) { + rgn->region[0].base = base; + rgn->region[0].size = size; + return 0; + } + + /* First try and coalesce this MEMBLOCK with another. */ + for (i = 0; i < rgn->cnt; i++) { + u64 rgnbase = rgn->region[i].base; + u64 rgnsize = rgn->region[i].size; + + if ((rgnbase == base) && (rgnsize == size)) + /* Already have this region, so we're done */ + return 0; + + adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); + if (adjacent > 0) { + rgn->region[i].base -= size; + rgn->region[i].size += size; + coalesced++; + break; + } else if (adjacent < 0) { + rgn->region[i].size += size; + coalesced++; + break; + } + } + + if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) { + memblock_coalesce_regions(rgn, i, i+1); + coalesced++; + } + + if (coalesced) + return coalesced; + if (rgn->cnt >= MAX_MEMBLOCK_REGIONS) + return -1; + + /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ + for (i = rgn->cnt - 1; i >= 0; i--) { + if (base < rgn->region[i].base) { + rgn->region[i+1].base = rgn->region[i].base; + rgn->region[i+1].size = rgn->region[i].size; + } else { + rgn->region[i+1].base = base; + rgn->region[i+1].size = size; + break; + } + } + + if (base < rgn->region[0].base) { + rgn->region[0].base = base; + rgn->region[0].size = size; + } + rgn->cnt++; + + return 0; +} + +long memblock_add(u64 base, u64 size) +{ + struct memblock_region *_rgn = &memblock.memory; + + /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */ + if (base == 0) + memblock.rmo_size = size; + + return memblock_add_region(_rgn, base, size); + +} + +static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size) +{ + u64 rgnbegin, rgnend; + u64 end = base + size; + int i; + + rgnbegin = rgnend = 0; /* supress gcc warnings */ + + /* Find the region where (base, size) belongs to */ + for (i=0; i < rgn->cnt; i++) { + rgnbegin = rgn->region[i].base; + rgnend = rgnbegin + rgn->region[i].size; + + if ((rgnbegin <= base) && (end <= rgnend)) + break; + } + + /* Didn't find the region */ + if (i == rgn->cnt) + return -1; + + /* Check to see if we are removing entire region */ + if ((rgnbegin == base) && (rgnend == end)) { + memblock_remove_region(rgn, i); + return 0; + } + + /* Check to see if region is matching at the front */ + if (rgnbegin == base) { + rgn->region[i].base = end; + rgn->region[i].size -= size; + return 0; + } + + /* Check to see if the region is matching at the end */ + if (rgnend == end) { + rgn->region[i].size -= size; + return 0; + } + + /* + * We need to split the entry - adjust the current one to the + * beginging of the hole and add the region after hole. + */ + rgn->region[i].size = base - rgn->region[i].base; + return memblock_add_region(rgn, end, rgnend - end); +} + +long memblock_remove(u64 base, u64 size) +{ + return __memblock_remove(&memblock.memory, base, size); +} + +long __init memblock_free(u64 base, u64 size) +{ + return __memblock_remove(&memblock.reserved, base, size); +} + +long __init memblock_reserve(u64 base, u64 size) +{ + struct memblock_region *_rgn = &memblock.reserved; + + BUG_ON(0 == size); + + return memblock_add_region(_rgn, base, size); +} + +long memblock_overlaps_region(struct memblock_region *rgn, u64 base, u64 size) +{ + unsigned long i; + + for (i = 0; i < rgn->cnt; i++) { + u64 rgnbase = rgn->region[i].base; + u64 rgnsize = rgn->region[i].size; + if (memblock_addrs_overlap(base, size, rgnbase, rgnsize)) + break; + } + + return (i < rgn->cnt) ? i : -1; +} + +static u64 memblock_align_down(u64 addr, u64 size) +{ + return addr & ~(size - 1); +} + +static u64 memblock_align_up(u64 addr, u64 size) +{ + return (addr + (size - 1)) & ~(size - 1); +} + +static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end, + u64 size, u64 align) +{ + u64 base, res_base; + long j; + + base = memblock_align_down((end - size), align); + while (start <= base) { + j = memblock_overlaps_region(&memblock.reserved, base, size); + if (j < 0) { + /* this area isn't reserved, take it */ + if (memblock_add_region(&memblock.reserved, base, size) < 0) + base = ~(u64)0; + return base; + } + res_base = memblock.reserved.region[j].base; + if (res_base < size) + break; + base = memblock_align_down(res_base - size, align); + } + + return ~(u64)0; +} + +static u64 __init memblock_alloc_nid_region(struct memblock_property *mp, + u64 (*nid_range)(u64, u64, int *), + u64 size, u64 align, int nid) +{ + u64 start, end; + + start = mp->base; + end = start + mp->size; + + start = memblock_align_up(start, align); + while (start < end) { + u64 this_end; + int this_nid; + + this_end = nid_range(start, end, &this_nid); + if (this_nid == nid) { + u64 ret = memblock_alloc_nid_unreserved(start, this_end, + size, align); + if (ret != ~(u64)0) + return ret; + } + start = this_end; + } + + return ~(u64)0; +} + +u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, + u64 (*nid_range)(u64 start, u64 end, int *nid)) +{ + struct memblock_region *mem = &memblock.memory; + int i; + + BUG_ON(0 == size); + + size = memblock_align_up(size, align); + + for (i = 0; i < mem->cnt; i++) { + u64 ret = memblock_alloc_nid_region(&mem->region[i], + nid_range, + size, align, nid); + if (ret != ~(u64)0) + return ret; + } + + return memblock_alloc(size, align); +} + +u64 __init memblock_alloc(u64 size, u64 align) +{ + return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); +} + +u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr) +{ + u64 alloc; + + alloc = __memblock_alloc_base(size, align, max_addr); + + if (alloc == 0) + panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", + (unsigned long long) size, (unsigned long long) max_addr); + + return alloc; +} + +u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) +{ + long i, j; + u64 base = 0; + u64 res_base; + + BUG_ON(0 == size); + + size = memblock_align_up(size, align); + + /* On some platforms, make sure we allocate lowmem */ + /* Note that MEMBLOCK_REAL_LIMIT may be MEMBLOCK_ALLOC_ANYWHERE */ + if (max_addr == MEMBLOCK_ALLOC_ANYWHERE) + max_addr = MEMBLOCK_REAL_LIMIT; + + for (i = memblock.memory.cnt - 1; i >= 0; i--) { + u64 memblockbase = memblock.memory.region[i].base; + u64 memblocksize = memblock.memory.region[i].size; + + if (memblocksize < size) + continue; + if (max_addr == MEMBLOCK_ALLOC_ANYWHERE) + base = memblock_align_down(memblockbase + memblocksize - size, align); + else if (memblockbase < max_addr) { + base = min(memblockbase + memblocksize, max_addr); + base = memblock_align_down(base - size, align); + } else + continue; + + while (base && memblockbase <= base) { + j = memblock_overlaps_region(&memblock.reserved, base, size); + if (j < 0) { + /* this area isn't reserved, take it */ + if (memblock_add_region(&memblock.reserved, base, size) < 0) + return 0; + return base; + } + res_base = memblock.reserved.region[j].base; + if (res_base < size) + break; + base = memblock_align_down(res_base - size, align); + } + } + return 0; +} + +/* You must call memblock_analyze() before this. */ +u64 __init memblock_phys_mem_size(void) +{ + return memblock.memory.size; +} + +u64 memblock_end_of_DRAM(void) +{ + int idx = memblock.memory.cnt - 1; + + return (memblock.memory.region[idx].base + memblock.memory.region[idx].size); +} + +/* You must call memblock_analyze() after this. */ +void __init memblock_enforce_memory_limit(u64 memory_limit) +{ + unsigned long i; + u64 limit; + struct memblock_property *p; + + if (!memory_limit) + return; + + /* Truncate the memblock regions to satisfy the memory limit. */ + limit = memory_limit; + for (i = 0; i < memblock.memory.cnt; i++) { + if (limit > memblock.memory.region[i].size) { + limit -= memblock.memory.region[i].size; + continue; + } + + memblock.memory.region[i].size = limit; + memblock.memory.cnt = i + 1; + break; + } + + if (memblock.memory.region[0].size < memblock.rmo_size) + memblock.rmo_size = memblock.memory.region[0].size; + + memory_limit = memblock_end_of_DRAM(); + + /* And truncate any reserves above the limit also. */ + for (i = 0; i < memblock.reserved.cnt; i++) { + p = &memblock.reserved.region[i]; + + if (p->base > memory_limit) + p->size = 0; + else if ((p->base + p->size) > memory_limit) + p->size = memory_limit - p->base; + + if (p->size == 0) { + memblock_remove_region(&memblock.reserved, i); + i--; + } + } +} + +int __init memblock_is_reserved(u64 addr) +{ + int i; + + for (i = 0; i < memblock.reserved.cnt; i++) { + u64 upper = memblock.reserved.region[i].base + + memblock.reserved.region[i].size - 1; + if ((addr >= memblock.reserved.region[i].base) && (addr <= upper)) + return 1; + } + return 0; +} + +int memblock_is_region_reserved(u64 base, u64 size) +{ + return memblock_overlaps_region(&memblock.reserved, base, size); +} + +/* + * Given a , find which memory regions belong to this range. + * Adjust the request and return a contiguous chunk. + */ +int memblock_find(struct memblock_property *res) +{ + int i; + u64 rstart, rend; + + rstart = res->base; + rend = rstart + res->size - 1; + + for (i = 0; i < memblock.memory.cnt; i++) { + u64 start = memblock.memory.region[i].base; + u64 end = start + memblock.memory.region[i].size - 1; + + if (start > rend) + return -1; + + if ((end >= rstart) && (start < rend)) { + /* adjust the request */ + if (rstart < start) + rstart = start; + if (rend > end) + rend = end; + res->base = rstart; + res->size = rend - rstart + 1; + return 0; + } + } + return -1; +} -- cgit v1.2.3 From b0f77d0eae0c58a5a9691a067ada112ceeae2d00 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 14 Jul 2010 20:50:29 -0700 Subject: net: fix problem in reading sock TX queue Fix problem in reading the tx_queue recorded in a socket. In dev_pick_tx, the TX queue is read by doing a check with sk_tx_queue_recorded on the socket, followed by a sk_tx_queue_get. The problem is that there is not mutual exclusion across these calls in the socket so it it is possible that the queue in the sock can be invalidated after sk_tx_queue_recorded is called so that sk_tx_queue get returns -1, which sets 65535 in queue_index and thus dev_pick_tx returns 65536 which is a bogus queue and can cause crash in dev_queue_xmit. We fix this by only calling sk_tx_queue_get which does the proper checks. The interface is that sk_tx_queue_get returns the TX queue if the sock argument is non-NULL and TX queue is recorded, else it returns -1. sk_tx_queue_recorded is no longer used so it can be completely removed. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/sock.h | 7 +------ net/core/dev.c | 7 +++---- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 731150d5279..0a691ea7654 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1224,12 +1224,7 @@ static inline void sk_tx_queue_clear(struct sock *sk) static inline int sk_tx_queue_get(const struct sock *sk) { - return sk->sk_tx_queue_mapping; -} - -static inline bool sk_tx_queue_recorded(const struct sock *sk) -{ - return (sk && sk->sk_tx_queue_mapping >= 0); + return sk ? sk->sk_tx_queue_mapping : -1; } static inline void sk_set_socket(struct sock *sk, struct socket *sock) diff --git a/net/core/dev.c b/net/core/dev.c index 4b05fdf762a..0ea10f849be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2029,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - u16 queue_index; + int queue_index; struct sock *sk = skb->sk; - if (sk_tx_queue_recorded(sk)) { - queue_index = sk_tx_queue_get(sk); - } else { + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) { -- cgit v1.2.3 From 13ceef099edd2b70c5a6f3a9ef5d6d97cda2e096 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Jul 2010 07:56:33 +0200 Subject: jbd2/ocfs2: Fix block checksumming when a buffer is used in several transactions OCFS2 uses t_commit trigger to compute and store checksum of the just committed blocks. When a buffer has b_frozen_data, checksum is computed for it instead of b_data but this can result in an old checksum being written to the filesystem in the following scenario: 1) transaction1 is opened 2) handle1 is opened 3) journal_access(handle1, bh) - This sets jh->b_transaction to transaction1 4) modify(bh) 5) journal_dirty(handle1, bh) 6) handle1 is closed 7) start committing transaction1, opening transaction2 8) handle2 is opened 9) journal_access(handle2, bh) - This copies off b_frozen_data to make it safe for transaction1 to commit. jh->b_next_transaction is set to transaction2. 10) jbd2_journal_write_metadata() checksums b_frozen_data 11) the journal correctly writes b_frozen_data to the disk journal 12) handle2 is closed - There was no dirty call for the bh on handle2, so it is never queued for any more journal operation 13) Checkpointing finally happens, and it just spools the bh via normal buffer writeback. This will write b_data, which was never triggered on and thus contains a wrong (old) checksum. This patch fixes the problem by calling the trigger at the moment data is frozen for journal commit - i.e., either when b_frozen_data is created by do_get_write_access or just before we write a buffer to the log if b_frozen_data does not exist. We also rename the trigger to t_frozen as that better describes when it is called. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/jbd2/journal.c | 15 +++++++-------- fs/jbd2/transaction.c | 9 ++++++--- fs/ocfs2/journal.c | 24 ++++++++++++------------ include/linux/jbd2.h | 11 ++++++----- 4 files changed, 31 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index bc2ff593276..036880895bf 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); - struct jbd2_buffer_trigger_type *triggers; journal_t *journal = transaction->t_journal; /* @@ -328,21 +327,21 @@ repeat: done_copy_out = 1; new_page = virt_to_page(jh_in->b_frozen_data); new_offset = offset_in_page(jh_in->b_frozen_data); - triggers = jh_in->b_frozen_triggers; } else { new_page = jh2bh(jh_in)->b_page; new_offset = offset_in_page(jh2bh(jh_in)->b_data); - triggers = jh_in->b_triggers; } mapped_data = kmap_atomic(new_page, KM_USER0); /* - * Fire any commit trigger. Do this before checking for escaping, - * as the trigger may modify the magic offset. If a copy-out - * happens afterwards, it will have the correct data in the buffer. + * Fire data frozen trigger if data already wasn't frozen. Do this + * before checking for escaping, as the trigger may modify the magic + * offset. If a copy-out happens afterwards, it will have the correct + * data in the buffer. */ - jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, - triggers); + if (!done_copy_out) + jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset, + jh_in->b_triggers); /* * Check for escaping diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e214d68620a..b8e0806681b 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -725,6 +725,9 @@ done: page = jh2bh(jh)->b_page; offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; source = kmap_atomic(page, KM_USER0); + /* Fire data frozen trigger just before we copy the data */ + jbd2_buffer_frozen_trigger(jh, source + offset, + jh->b_triggers); memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); kunmap_atomic(source, KM_USER0); @@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh, jh->b_triggers = type; } -void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data, +void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, struct jbd2_buffer_trigger_type *triggers) { struct buffer_head *bh = jh2bh(jh); - if (!triggers || !triggers->t_commit) + if (!triggers || !triggers->t_frozen) return; - triggers->t_commit(triggers, bh, mapped_data, bh->b_size); + triggers->t_frozen(triggers, bh, mapped_data, bh->b_size); } void jbd2_buffer_abort_trigger(struct journal_head *jh, diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 39113b5e79e..625de9d7088 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger return container_of(triggers, struct ocfs2_triggers, ot_triggers); } -static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, * Quota blocks have their own trigger because the struct ocfs2_block_check * offset depends on the blocksize. */ -static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, * Directory blocks also have their own trigger because the * struct ocfs2_block_check offset depends on the blocksize. */ -static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, static struct ocfs2_triggers di_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dinode, i_check), @@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = { static struct ocfs2_triggers eb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_extent_block, h_check), @@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = { static struct ocfs2_triggers rb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), @@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = { static struct ocfs2_triggers gd_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), @@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = { static struct ocfs2_triggers db_triggers = { .ot_triggers = { - .t_commit = ocfs2_db_commit_trigger, + .t_frozen = ocfs2_db_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, }; static struct ocfs2_triggers xb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), @@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = { static struct ocfs2_triggers dq_triggers = { .ot_triggers = { - .t_commit = ocfs2_dq_commit_trigger, + .t_frozen = ocfs2_dq_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, }; static struct ocfs2_triggers dr_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), @@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = { static struct ocfs2_triggers dl_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a4d2e9f7088..adf832dec3f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); struct jbd2_buffer_trigger_type { /* - * Fired just before a buffer is written to the journal. - * mapped_data is a mapped buffer that is the frozen data for - * commit. + * Fired a the moment data to write to the journal are known to be + * stable - so either at the moment b_frozen_data is created or just + * before a buffer is written to the journal. mapped_data is a mapped + * buffer that is the frozen data for commit. */ - void (*t_commit)(struct jbd2_buffer_trigger_type *type, + void (*t_frozen)(struct jbd2_buffer_trigger_type *type, struct buffer_head *bh, void *mapped_data, size_t size); @@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type { struct buffer_head *bh); }; -extern void jbd2_buffer_commit_trigger(struct journal_head *jh, +extern void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, struct jbd2_buffer_trigger_type *triggers); extern void jbd2_buffer_abort_trigger(struct journal_head *jh, -- cgit v1.2.3 From 58c84eda07560a6b75b03e8d3b26d6eddfc14011 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 15 Jul 2010 09:41:42 -0600 Subject: PCI: fall back to original BIOS BAR addresses If we fail to assign resources to a PCI BAR, this patch makes us try the original address from BIOS rather than leaving it disabled. Linux tries to make sure all PCI device BARs are inside the upstream PCI host bridge or P2P bridge apertures, reassigning BARs if necessary. Windows does similar reassignment. Before this patch, if we could not move a BAR into an aperture, we left the resource unassigned, i.e., at address zero. Windows leaves such BARs at the original BIOS addresses, and this patch makes Linux do the same. This is a bit ugly because we disable the resource long before we try to reassign it, so we have to keep track of the BIOS BAR address somewhere. For lack of a better place, I put it in the struct pci_dev. I think it would be cleaner to attempt the assignment immediately when the claim fails, so we could easily remember the original address. But we currently claim motherboard resources in the middle, after attempting to claim PCI resources and before assigning new PCI resources, and changing that is a fairly big job. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16263 Reported-by: Andrew Tested-by: Andrew Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 1 + drivers/pci/setup-res.c | 32 ++++++++++++++++++++++++++++++++ include/linux/pci.h | 1 + 3 files changed, 34 insertions(+) (limited to 'include') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 6fdb3ec30c3..55253095be8 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -184,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass) idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { /* We'll assign a new address later */ + dev->fw_addr[idx] = r->start; r->end -= r->start; r->start = 0; } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 92379e2d37e..2aaa13150de 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -156,6 +156,38 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, pcibios_align_resource, dev); } + if (ret < 0 && dev->fw_addr[resno]) { + struct resource *root, *conflict; + resource_size_t start, end; + + /* + * If we failed to assign anything, let's try the address + * where firmware left it. That at least has a chance of + * working, which is better than just leaving it disabled. + */ + + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + + start = res->start; + end = res->end; + res->start = dev->fw_addr[resno]; + res->end = res->start + size - 1; + dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", + resno, res); + conflict = request_resource_conflict(root, res); + if (conflict) { + dev_info(&dev->dev, + "BAR %d: %pR conflicts with %s %pR\n", resno, + res, conflict->name, conflict); + res->start = start; + res->end = end; + } else + ret = 0; + } + if (!ret) { res->flags &= ~IORESOURCE_STARTALIGN; dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); diff --git a/include/linux/pci.h b/include/linux/pci.h index 7cb00845f15..f26fda76b87 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -288,6 +288,7 @@ struct pci_dev { */ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */ /* These fields are used by common fixups */ unsigned int transparent:1; /* Transparent PCI bridge */ -- cgit v1.2.3 From 2f495c398edca50ac251c134f1995a2fb3c06cb7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Jun 2010 13:20:46 +1000 Subject: net/phy/marvell: Expose IDs and flags in a .h and add dns323 LEDs setup flag This moves the various known Marvell PHY IDs to include/linux/marvell_phy.h along with dev_flags definitions for use by the driver. I then added a flag that changes the PHY init code to setup the LEDs config to the values needed to operate a dns323 rev C1 NAS. I moved the existing "resistance" flag to the .h as well, though I've been unable to find whoever sets this to convert it to use that constant. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Wolfram Sang Acked-by: David S. Miller Signed-off-by: Nicolas Pitre --- drivers/net/phy/marvell.c | 38 ++++++++++++++++++++------------------ include/linux/marvell_phy.h | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+), 18 deletions(-) create mode 100644 include/linux/marvell_phy.h (limited to 'include') diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 78b74e83ce5..5a1bd5db2a9 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -48,8 +49,6 @@ #define MII_M1145_RGMII_RX_DELAY 0x0080 #define MII_M1145_RGMII_TX_DELAY 0x0002 -#define M1145_DEV_FLAGS_RESISTANCE 0x00000001 - #define MII_M1111_PHY_LED_CONTROL 0x18 #define MII_M1111_PHY_LED_DIRECT 0x4100 #define MII_M1111_PHY_LED_COMBINE 0x411c @@ -350,7 +349,10 @@ static int m88e1118_config_init(struct phy_device *phydev) return err; /* Adjust LED Control */ - err = phy_write(phydev, 0x10, 0x021e); + if (phydev->dev_flags & MARVELL_PHY_M1118_DNS323_LEDS) + err = phy_write(phydev, 0x10, 0x1100); + else + err = phy_write(phydev, 0x10, 0x021e); if (err < 0) return err; @@ -398,7 +400,7 @@ static int m88e1145_config_init(struct phy_device *phydev) if (err < 0) return err; - if (phydev->dev_flags & M1145_DEV_FLAGS_RESISTANCE) { + if (phydev->dev_flags & MARVELL_PHY_M1145_FLAGS_RESISTANCE) { err = phy_write(phydev, 0x1d, 0x0012); if (err < 0) return err; @@ -529,8 +531,8 @@ static int m88e1121_did_interrupt(struct phy_device *phydev) static struct phy_driver marvell_drivers[] = { { - .phy_id = 0x01410c60, - .phy_id_mask = 0xfffffff0, + .phy_id = MARVELL_PHY_ID_88E1101, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1101", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, @@ -541,8 +543,8 @@ static struct phy_driver marvell_drivers[] = { .driver = { .owner = THIS_MODULE }, }, { - .phy_id = 0x01410c90, - .phy_id_mask = 0xfffffff0, + .phy_id = MARVELL_PHY_ID_88E1112, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1112", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, @@ -554,8 +556,8 @@ static struct phy_driver marvell_drivers[] = { .driver = { .owner = THIS_MODULE }, }, { - .phy_id = 0x01410cc0, - .phy_id_mask = 0xfffffff0, + .phy_id = MARVELL_PHY_ID_88E1111, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1111", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, @@ -567,8 +569,8 @@ static struct phy_driver marvell_drivers[] = { .driver = { .owner = THIS_MODULE }, }, { - .phy_id = 0x01410e10, - .phy_id_mask = 0xfffffff0, + .phy_id = MARVELL_PHY_ID_88E1118, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1118", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, @@ -580,8 +582,8 @@ static struct phy_driver marvell_drivers[] = { .driver = {.owner = THIS_MODULE,}, }, { - .phy_id = 0x01410cb0, - .phy_id_mask = 0xfffffff0, + .phy_id = MARVELL_PHY_ID_88E1121R, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1121R", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, @@ -593,8 +595,8 @@ static struct phy_driver marvell_drivers[] = { .driver = { .owner = THIS_MODULE }, }, { - .phy_id = 0x01410cd0, - .phy_id_mask = 0xfffffff0, + .phy_id = MARVELL_PHY_ID_88E1145, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1145", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, @@ -606,8 +608,8 @@ static struct phy_driver marvell_drivers[] = { .driver = { .owner = THIS_MODULE }, }, { - .phy_id = 0x01410e30, - .phy_id_mask = 0xfffffff0, + .phy_id = MARVELL_PHY_ID_88E1240, + .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1240", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h new file mode 100644 index 00000000000..2ed4fb8bbd5 --- /dev/null +++ b/include/linux/marvell_phy.h @@ -0,0 +1,20 @@ +#ifndef _MARVELL_PHY_H +#define _MARVELL_PHY_H + +/* Mask used for ID comparisons */ +#define MARVELL_PHY_ID_MASK 0xfffffff0 + +/* Known PHY IDs */ +#define MARVELL_PHY_ID_88E1101 0x01410c60 +#define MARVELL_PHY_ID_88E1112 0x01410c90 +#define MARVELL_PHY_ID_88E1111 0x01410cc0 +#define MARVELL_PHY_ID_88E1118 0x01410e10 +#define MARVELL_PHY_ID_88E1121R 0x01410cb0 +#define MARVELL_PHY_ID_88E1145 0x01410cd0 +#define MARVELL_PHY_ID_88E1240 0x01410e30 + +/* struct phy_device dev_flags definitions */ +#define MARVELL_PHY_M1145_FLAGS_RESISTANCE 0x00000001 +#define MARVELL_PHY_M1118_DNS323_LEDS 0x00000002 + +#endif /* _MARVELL_PHY_H */ -- cgit v1.2.3 From 7f8275d0d660c146de6ee3017e1e2e594c49e820 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 19 Jul 2010 14:56:17 +1000 Subject: mm: add context argument to shrinker callback The current shrinker implementation requires the registered callback to have global state to work from. This makes it difficult to shrink caches that are not global (e.g. per-filesystem caches). Pass the shrinker structure to the callback so that users can embed the shrinker structure in the context the shrinker needs to operate on and get back to it in the callback via container_of(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- arch/x86/kvm/mmu.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 2 +- fs/dcache.c | 2 +- fs/gfs2/glock.c | 2 +- fs/gfs2/quota.c | 2 +- fs/gfs2/quota.h | 2 +- fs/inode.c | 2 +- fs/mbcache.c | 5 +++-- fs/nfs/dir.c | 2 +- fs/nfs/internal.h | 3 ++- fs/quota/dquot.c | 2 +- fs/ubifs/shrinker.c | 2 +- fs/ubifs/ubifs.h | 2 +- fs/xfs/linux-2.6/xfs_buf.c | 5 +++-- fs/xfs/linux-2.6/xfs_sync.c | 1 + fs/xfs/quota/xfs_qm.c | 7 +++++-- include/linux/mm.h | 2 +- mm/vmscan.c | 8 +++++--- 18 files changed, 31 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3699613e883..b1ed0a1a591 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2926,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) return kvm_mmu_zap_page(kvm, page) + 1; } -static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) +static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { struct kvm *kvm; struct kvm *kvm_freed = NULL; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8757ecf6e96..e7018708cc3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4978,7 +4978,7 @@ i915_gpu_is_active(struct drm_device *dev) } static int -i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) +i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { drm_i915_private_t *dev_priv, *next_dev; struct drm_i915_gem_object *obj_priv, *next_obj; diff --git a/fs/dcache.c b/fs/dcache.c index c8c78ba0782..86d4db15473 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -896,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent); * * In this case we return -1 to tell the caller that we baled. */ -static int shrink_dcache_memory(int nr, gfp_t gfp_mask) +static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { if (nr) { if (!(gfp_mask & __GFP_FS)) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index dbab3fdc258..0898f3ec821 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1358,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } -static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) +static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { struct gfs2_glock *gl; int may_demote; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index b256d6f2428..8f02d3db8f4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(qd_lru_lock); -int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) +int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 195f60c8bd1..e7d236ca48b 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); +extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/inode.c b/fs/inode.c index 2bee20ae3d6..722860b323a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan) * This function is passed the number of inodes to scan, and it returns the * total number of remaining possibly-reclaimable inodes. */ -static int shrink_icache_memory(int nr, gfp_t gfp_mask) +static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { if (nr) { /* diff --git a/fs/mbcache.c b/fs/mbcache.c index ec88ff3d04a..e28f21b9534 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache) * What the mbcache registers as to get shrunk dynamically. */ -static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); +static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); static struct shrinker mb_cache_shrinker = { .shrink = mb_cache_shrink_fn, @@ -191,13 +191,14 @@ forget: * This function is called by the kernel memory management when memory * gets low. * + * @shrink: (ignored) * @nr_to_scan: Number of objects to scan * @gfp_mask: (ignored) * * Returns the number of objects which are present in the cache. */ static int -mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask) +mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(free_list); struct list_head *l, *ltmp; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 782b431ef91..e60416d3f81 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head) } } -int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(head); struct nfs_inode *nfsi; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d8bd619e386..e70f44b9b3f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[]; void nfs_close_context(struct nfs_open_context *ctx, int is_sync); /* dir.c */ -extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); +extern int nfs_access_cache_shrinker(struct shrinker *shrink, + int nr_to_scan, gfp_t gfp_mask); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 12c233da1b6..437d2ca2de9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -676,7 +676,7 @@ static void prune_dqcache(int count) * This is called from kswapd when we think we need some * more memory */ -static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) +static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { if (nr) { spin_lock(&dq_list_lock); diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 02feb59cefc..0b201114a5a 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -277,7 +277,7 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(int nr, gfp_t gfp_mask) +int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) { int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 2eef553d50c..04310878f44 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); +int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); /* commit.c */ int ubifs_bg_thread(void *info); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 649ade8ef59..2ee3f7a6016 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -45,7 +45,7 @@ static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); -STATIC int xfsbufd_wakeup(int, gfp_t); +STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); static struct shrinker xfs_buf_shake = { .shrink = xfsbufd_wakeup, @@ -340,7 +340,7 @@ _xfs_buf_lookup_pages( __func__, gfp_mask); XFS_STATS_INC(xb_page_retries); - xfsbufd_wakeup(0, gfp_mask); + xfsbufd_wakeup(NULL, 0, gfp_mask); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } @@ -1762,6 +1762,7 @@ xfs_buf_runall_queues( STATIC int xfsbufd_wakeup( + struct shrinker *shrink, int priority, gfp_t mask) { diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ef7f0218bcc..be375827af9 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -838,6 +838,7 @@ static struct rw_semaphore xfs_mount_list_lock; static int xfs_reclaim_inode_shrink( + struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 8c117ff2e3a..67c018392d6 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -69,7 +69,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(int, gfp_t); +STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); static struct shrinker xfs_qm_shaker = { .shrink = xfs_qm_shake, @@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist( */ /* ARGSUSED */ STATIC int -xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) +xfs_qm_shake( + struct shrinker *shrink, + int nr_to_scan, + gfp_t gfp_mask) { int ndqused, nfree, n; diff --git a/include/linux/mm.h b/include/linux/mm.h index b969efb0378..a2b48041b91 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -999,7 +999,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) * querying the cache size, so a fastpath for that case is appropriate. */ struct shrinker { - int (*shrink)(int nr_to_scan, gfp_t gfp_mask); + int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask); int seeks; /* seeks to recreate an obj */ /* These are for internal use */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 9c7e57cc63a..199fa436c0d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -213,8 +213,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, list_for_each_entry(shrinker, &shrinker_list, list) { unsigned long long delta; unsigned long total_scan; - unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask); + unsigned long max_pass; + max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask); delta = (4 * scanned) / shrinker->seeks; delta *= max_pass; do_div(delta, lru_pages + 1); @@ -242,8 +243,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, int shrink_ret; int nr_before; - nr_before = (*shrinker->shrink)(0, gfp_mask); - shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask); + nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask); + shrink_ret = (*shrinker->shrink)(shrinker, this_scan, + gfp_mask); if (shrink_ret == -1) break; if (shrink_ret < nr_before) -- cgit v1.2.3 From 772a2f9b488f4d27c314da5eeabde750b9ead41b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Jul 2010 10:39:47 +0200 Subject: fb: handle allocation failure in alloc_apertures() If the kzalloc() fails we should return NULL. All the places that call alloc_apertures() check for this already. Signed-off-by: Dan Carpenter Acked-by: James Simmons Acked-by: Marcin Slusarz Signed-off-by: Dave Airlie --- include/linux/fb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 8e5a9dfb76b..e7445df44d6 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -873,6 +873,8 @@ struct fb_info { static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct) + max_num * sizeof(struct aperture), GFP_KERNEL); + if (!a) + return NULL; a->count = max_num; return a; } -- cgit v1.2.3 From 07fca0e57fca925032526349f4370f97ed580cc9 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 10 Jul 2010 08:35:00 +0200 Subject: tracing: Properly align linker defined symbols We define a number of symbols in the linker scipt like this: __start_syscalls_metadata = .; *(__syscalls_metadata) But we do not know the alignment of "." when we assign the __start_syscalls_metadata symbol. gcc started to uses bigger alignment for structs (32 bytes), so we saw situations where the linker due to alignment constraints increased the value of "." after the symbol assignment. This resulted in boot fails. Fix this by forcing a 32 byte alignment of "." before the assignment. This patch introduces the forced alignment for ftrace_events and syscalls_metadata. It may be required in more places. Reported-by: Zeev Tarantov Signed-off-by: Sam Ravnborg LKML-Reference: <20100710063459.GA14596@merkur.ravnborg.org> Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 48c5299cbf2..4b5902ad0d5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -63,6 +63,12 @@ /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) +/* + * Align to a 32 byte boundary equal to the + * alignment gcc 4.5 uses for a struct + */ +#define STRUCT_ALIGN() . = ALIGN(32) + /* The actual configuration determine if the init/exit sections * are handled as text/data or they can be discarded (which * often happens at runtime) @@ -166,7 +172,11 @@ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ + \ + STRUCT_ALIGN(); \ FTRACE_EVENTS() \ + \ + STRUCT_ALIGN(); \ TRACE_SYSCALLS() /* -- cgit v1.2.3 From 844b9a8707f1fcf0482e0c52f44a555e799ccda6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Jul 2010 13:24:34 -0700 Subject: vfs: fix RCU-lockdep false positive due to /proc If a single-threaded process does a file-descriptor operation, and some other process accesses that same file descriptor via /proc, the current rcu_dereference_check_fdtable() can give a false-positive RCU-lockdep splat due to the reference count being increased by the /proc access after the reference-count check in fget_light() but before the check in rcu_dereference_check_fdtable(). This commit prevents this false positive by checking for a single-threaded process. To avoid #include hell, this commit uses the wrapper for thread_group_empty(current) defined by rcu_my_thread_group_empty() provided in a separate commit. Located-by: Miles Lane Located-by: Eric Dumazet Signed-off-by: Paul E. McKenney Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fdtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 013dc529e95..d147461bc27 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -61,7 +61,8 @@ struct files_struct { (rcu_dereference_check((fdtfd), \ rcu_read_lock_held() || \ lockdep_is_held(&(files)->file_lock) || \ - atomic_read(&(files)->count) == 1)) + atomic_read(&(files)->count) == 1 || \ + rcu_my_thread_group_empty())) #define files_fdtable(files) \ (rcu_dereference_check_fdtable((files), (files)->fdt)) -- cgit v1.2.3 From a6a1a095ec8ace2912fc280d371eee8ff5da5736 Mon Sep 17 00:00:00 2001 From: Doug Goldstein Date: Tue, 20 Jul 2010 15:22:25 -0700 Subject: include/linux/vgaarb.h: add missing part of include guard vgaarb.h was missing the #define of the #ifndef at the top for the guard to prevent multiple #include's from causing re-define errors Signed-off-by: Doug Goldstein Cc: Dave Airlie Cc: Jesse Barnes Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- include/linux/vgaarb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index c9a97597699..814f294d4cd 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -29,6 +29,7 @@ */ #ifndef LINUX_VGA_H +#define LINUX_VGA_H #include -- cgit v1.2.3 From f8324e20f8289dffc646d64366332e05eaacab25 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Tue, 20 Jul 2010 18:45:14 -0700 Subject: math-emu: correct test for downshifting fraction in _FP_FROM_INT() The kernel's math-emu code contains a macro _FP_FROM_INT() which is used to convert an integer to a raw normalized floating-point value. It does this basically in three steps: 1. Compute the exponent from the number of leading zero bits. 2. Downshift large fractions to put the MSB in the right position for normalized fractions. 3. Upshift small fractions to put the MSB in the right position. There is an boundary error in step 2, causing a fraction with its MSB exactly one bit above the normalized MSB position to not be downshifted. This results in a non-normalized raw float, which when packed becomes a massively inaccurate representation for that input. The impact of this depends on a number of arch-specific factors, but it is known to have broken emulation of FXTOD instructions on UltraSPARC III, which was originally reported as GCC bug 44631 . Any arch which uses math-emu to emulate conversions from integers to same-size floats may be affected. The fix is simple: the exponent comparison used to determine if the fraction should be downshifted must be "<=" not "<". I'm sending a kernel module to test this as a reply to this message. There are also SPARC user-space test cases in the GCC bug entry. Signed-off-by: Mikael Pettersson Signed-off-by: David S. Miller --- include/math-emu/op-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h index fd882261225..9696a5e2c43 100644 --- a/include/math-emu/op-common.h +++ b/include/math-emu/op-common.h @@ -799,7 +799,7 @@ do { \ X##_e -= (_FP_W_TYPE_SIZE - rsize); \ X##_e = rsize - X##_e - 1; \ \ - if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e) \ + if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs <= X##_e) \ __FP_FRAC_SRS_1(ur_, (X##_e - _FP_WFRACBITS_##fs + 1), rsize);\ _FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize); \ if ((_FP_WFRACBITS_##fs - X##_e - 1) > 0) \ -- cgit v1.2.3 From edd63cb6b91024332d6983fc51058ac1ef0c081e Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 21 Jul 2010 19:27:07 -0500 Subject: sysrq,kdb: Use __handle_sysrq() for kdb's sysrq function The kdb code should not toggle the sysrq state in case an end user wants to try and resume the normal kernel execution. Signed-off-by: Jason Wessel Acked-by: Dmitry Torokhov --- drivers/char/sysrq.c | 2 +- include/linux/sysrq.h | 1 + kernel/debug/kdb/kdb_main.c | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 5d64e3acb00..878ac0c2cc6 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -493,7 +493,7 @@ static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p) sysrq_key_table[i] = op_p; } -static void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) +void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) { struct sysrq_key_op *op_p; int orig_log_level; diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 4496322e28d..609e8ca5f53 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -45,6 +45,7 @@ struct sysrq_key_op { */ void handle_sysrq(int key, struct tty_struct *tty); +void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); int register_sysrq_key(int key, struct sysrq_key_op *op); int unregister_sysrq_key(int key, struct sysrq_key_op *op); struct sysrq_key_op *__sysrq_get_key_op(int key); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 7e9bfd54a0d..ebe4a287419 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1820,9 +1820,8 @@ static int kdb_sr(int argc, const char **argv) { if (argc != 1) return KDB_ARGCOUNT; - sysrq_toggle_support(1); kdb_trap_printk++; - handle_sysrq(*argv[1], NULL); + __handle_sysrq(*argv[1], NULL, 0); kdb_trap_printk--; return 0; -- cgit v1.2.3 From 8a35747a5d13b99e076b0222729e0caa48cb69b6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 21 Jul 2010 21:44:31 +0000 Subject: macvtap: Limit packet queue length Mark Wagner reported OOM symptoms when sending UDP traffic over a macvtap link to a kvm receiver. This appears to be caused by the fact that macvtap packet queues are unlimited in length. This means that if the receiver can't keep up with the rate of flow, then we will hit OOM. Of course it gets worse if the OOM killer then decides to kill the receiver. This patch imposes a cap on the packet queue length, in the same way as the tuntap driver, using the device TX queue length. Please note that macvtap currently has no way of giving congestion notification, that means the software device TX queue cannot be used and packets will always be dropped once the macvtap driver queue fills up. This shouldn't be a great problem for the scenario where macvtap is used to feed a kvm receiver, as the traffic is most likely external in origin so congestion notification can't be applied anyway. Of course, if anybody decides to complain about guest-to-guest UDP packet loss down the track, then we may have to revisit this. Incidentally, this patch also fixes a real memory leak when macvtap_get_queue fails. Chris Wright noticed that for this patch to work, we need a non-zero TX queue length. This patch includes his work to change the default macvtap TX queue length to 500. Reported-by: Mark Wagner Signed-off-by: Herbert Xu Acked-by: Chris Wright Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 10 ++++++++-- drivers/net/macvtap.c | 18 ++++++++++++++++-- include/linux/if_macvlan.h | 2 ++ 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 87e8d4cb405..f15fe2cf72a 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -499,7 +499,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static void macvlan_setup(struct net_device *dev) +void macvlan_common_setup(struct net_device *dev) { ether_setup(dev); @@ -508,6 +508,12 @@ static void macvlan_setup(struct net_device *dev) dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, dev->ethtool_ops = &macvlan_ethtool_ops; +} +EXPORT_SYMBOL_GPL(macvlan_common_setup); + +static void macvlan_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); dev->tx_queue_len = 0; } @@ -705,7 +711,6 @@ int macvlan_link_register(struct rtnl_link_ops *ops) /* common fields */ ops->priv_size = sizeof(struct macvlan_dev); ops->get_tx_queues = macvlan_get_tx_queues; - ops->setup = macvlan_setup; ops->validate = macvlan_validate; ops->maxtype = IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -719,6 +724,7 @@ EXPORT_SYMBOL_GPL(macvlan_link_register); static struct rtnl_link_ops macvlan_link_ops = { .kind = "macvlan", + .setup = macvlan_setup, .newlink = macvlan_newlink, .dellink = macvlan_dellink, }; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a8a94e2f6dd..ff02b836c3c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -180,11 +180,18 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) { struct macvtap_queue *q = macvtap_get_queue(dev, skb); if (!q) - return -ENOLINK; + goto drop; + + if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) + goto drop; skb_queue_tail(&q->sk.sk_receive_queue, skb); wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); - return 0; + return NET_RX_SUCCESS; + +drop: + kfree_skb(skb); + return NET_RX_DROP; } /* @@ -235,8 +242,15 @@ static void macvtap_dellink(struct net_device *dev, macvlan_dellink(dev, head); } +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .kind = "macvtap", + .setup = macvtap_setup, .newlink = macvtap_newlink, .dellink = macvtap_dellink, }; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 9ea047aca79..1ffaeffeff7 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -67,6 +67,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, } } +extern void macvlan_common_setup(struct net_device *dev); + extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], int (*receive)(struct sk_buff *skb), -- cgit v1.2.3 From 718be4aaf3613cf7c2d097f925abc3d3553c0605 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 22 Jul 2010 16:54:27 -0400 Subject: ACPI: skip checking BM_STS if the BIOS doesn't ask for it It turns out that there is a bit in the _CST for Intel FFH C3 that tells the OS if we should be checking BM_STS or not. Linux has been unconditionally checking BM_STS. If the chip-set is configured to enable BM_STS, it can retard or completely prevent entry into deep C-states -- as illustrated by turbostat: http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/ ref: Intel Processor Vendor-Specific ACPI Interface Specification table 4 "_CST FFH GAS Field Encoding" Bit 1: Set to 1 if OSPM should use Bus Master avoidance for this C-state https://bugzilla.kernel.org/show_bug.cgi?id=15886 Signed-off-by: Len Brown --- arch/x86/kernel/acpi/cstate.c | 9 +++++++++ drivers/acpi/processor_idle.c | 2 +- include/acpi/processor.h | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2e837f5080f..fb7a5f052e2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -145,6 +145,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, percpu_entry->states[cx->index].eax = cx->address; percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; } + + /* + * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared, + * then we should skip checking BM_STS for this C-state. + * ref: "Intel Processor Vendor-Specific ACPI Interface Specification" + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2)) + cx->bm_sts_skip = 1; + return retval; } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index b1b385692f4..b351342f1fa 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -947,7 +947,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, if (acpi_idle_suspend) return(acpi_idle_enter_c1(dev, state)); - if (acpi_idle_bm_check()) { + if (!cx->bm_sts_skip && acpi_idle_bm_check()) { if (dev->safe_state) { dev->last_state = dev->safe_state; return dev->safe_state->enter(dev, dev->safe_state); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index da565a48240..a68ca8a11a5 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -48,7 +48,7 @@ struct acpi_power_register { u8 space_id; u8 bit_width; u8 bit_offset; - u8 reserved; + u8 access_size; u64 address; } __attribute__ ((packed)); @@ -63,6 +63,7 @@ struct acpi_processor_cx { u32 power; u32 usage; u64 time; + u8 bm_sts_skip; char desc[ACPI_CX_DESC_LEN]; }; -- cgit v1.2.3 From da5e37efe8704fc2b354626467f80f73c5e3c020 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 13 Jul 2010 11:39:42 +0200 Subject: vmlinux.lds: fix .data..init_task output section (fix popwerpc boot) The .data..init_task output section was missing a load offset causing a popwerpc target to fail to boot. Sean MacLennan tracked it down to the definition of INIT_TASK_DATA_SECTION(). There are only two users of INIT_TASK_DATA_SECTION() in the kernel today: cris and popwerpc. cris do not support relocatable kernels and is thus not impacted by this change. Fix INIT_TASK_DATA_SECTION() to specify load offset like all other output sections. Reported-by: Sean MacLennan Signed-off-by: Sam Ravnborg Signed-off-by: Benjamin Herrenschmidt --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 48c5299cbf2..cdfff74e973 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -435,7 +435,7 @@ */ #define INIT_TASK_DATA_SECTION(align) \ . = ALIGN(align); \ - .data..init_task : { \ + .data..init_task : AT(ADDR(.data..init_task) - LOAD_OFFSET) { \ INIT_TASK_DATA(align) \ } -- cgit v1.2.3 From 72ad5d77fb981963edae15eee8196c80238f5ed0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Jul 2010 22:59:09 +0200 Subject: ACPI / Sleep: Allow the NVS saving to be skipped during suspend to RAM Commit 2a6b69765ad794389f2fc3e14a0afa1a995221c2 (ACPI: Store NVS state even when entering suspend to RAM) caused the ACPI suspend code save the NVS area during suspend and restore it during resume unconditionally, although it is known that some systems need to use acpi_sleep=s4_nonvs for hibernation to work. To allow the affected systems to avoid saving and restoring the NVS area during suspend to RAM and resume, introduce kernel command line option acpi_sleep=nonvs and make acpi_sleep=s4_nonvs work as its alias temporarily (add acpi_sleep=s4_nonvs to the feature removal file). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16396 . Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: tomas m Signed-off-by: Len Brown --- Documentation/feature-removal-schedule.txt | 7 ++++++ Documentation/kernel-parameters.txt | 4 ++-- arch/x86/kernel/acpi/sleep.c | 9 ++++++-- drivers/acpi/sleep.c | 35 +++++++++++++++--------------- include/linux/acpi.h | 2 +- 5 files changed, 34 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index c268783bc4e..1571c0c83db 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -647,3 +647,10 @@ Who: Stefan Richter ---------------------------- +What: The acpi_sleep=s4_nonvs command line option +When: 2.6.37 +Files: arch/x86/kernel/acpi/sleep.c +Why: superseded by acpi_sleep=nonvs +Who: Rafael J. Wysocki + +---------------------------- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4ddb58df081..2b2407d9a6d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -254,8 +254,8 @@ and is between 256 and 4096 characters. It is defined in the file control method, with respect to putting devices into low power states, to be enforced (the ACPI 2.0 ordering of _PTS is used by default). - s4_nonvs prevents the kernel from saving/restoring the - ACPI NVS memory during hibernation. + nonvs prevents the kernel from saving/restoring the + ACPI NVS memory during suspend/hibernation and resume. sci_force_enable causes the kernel to set SCI_EN directly on resume from S1/S3 (which is against the ACPI spec, but some broken systems don't work without it). diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b9..fcc3c61fdec 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -157,9 +157,14 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); + if (strncmp(str, "s4_nonvs", 8) == 0) { + pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " + "please use acpi_sleep=nonvs instead"); + acpi_nvs_nosave(); + } #endif + if (strncmp(str, "nonvs", 5) == 0) + acpi_nvs_nosave(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); str = strchr(str, ','); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 5b7c52e4a00..2862c781b37 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -81,6 +81,20 @@ static int acpi_sleep_prepare(u32 acpi_state) #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; +/* + * The ACPI specification wants us to save NVS memory regions during hibernation + * and to restore them during the subsequent resume. Windows does that also for + * suspend to RAM. However, it is known that this mechanism does not work on + * all machines, so we allow the user to disable it with the help of the + * 'acpi_sleep=nonvs' kernel command line option. + */ +static bool nvs_nosave; + +void __init acpi_nvs_nosave(void) +{ + nvs_nosave = true; +} + /* * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the * user to request that behavior by using the 'acpi_old_suspend_ordering' @@ -197,8 +211,7 @@ static int acpi_suspend_begin(suspend_state_t pm_state) u32 acpi_state = acpi_suspend_states[pm_state]; int error = 0; - error = suspend_nvs_alloc(); - + error = nvs_nosave ? 0 : suspend_nvs_alloc(); if (error) return error; @@ -388,20 +401,6 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATION -/* - * The ACPI specification wants us to save NVS memory regions during hibernation - * and to restore them during the subsequent resume. However, it is not certain - * if this mechanism is going to work on all machines, so we allow the user to - * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line - * option. - */ -static bool s4_no_nvs; - -void __init acpi_s4_no_nvs(void) -{ - s4_no_nvs = true; -} - static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; static bool nosigcheck; @@ -415,7 +414,7 @@ static int acpi_hibernation_begin(void) { int error; - error = s4_no_nvs ? 0 : suspend_nvs_alloc(); + error = nvs_nosave ? 0 : suspend_nvs_alloc(); if (!error) { acpi_target_sleep_state = ACPI_STATE_S4; acpi_sleep_tts_switch(acpi_target_sleep_state); @@ -510,7 +509,7 @@ static int acpi_hibernation_begin_old(void) error = acpi_sleep_prepare(ACPI_STATE_S4); if (!error) { - if (!s4_no_nvs) + if (!nvs_nosave) error = suspend_nvs_alloc(); if (!error) acpi_target_sleep_state = ACPI_STATE_S4; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 224a38c960d..ccf94dc5acd 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -253,7 +253,7 @@ int acpi_resources_are_enforced(void); #ifdef CONFIG_PM_SLEEP void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); -void __init acpi_s4_no_nvs(void); +void __init acpi_nvs_nosave(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3 From 3b87956ea645fb4de7e59c7d0aa94de04be72615 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 22 Jul 2010 18:45:04 +0000 Subject: net sched: fix race in mirred device removal This fixes hang when target device of mirred packet classifier action is removed. If a mirror or redirection action is configured to cause packets to go to another device, the classifier holds a ref count, but was assuming the adminstrator cleaned up all redirections before removing. The fix is to add a notifier and cleanup during unregister. The new list is implicitly protected by RTNL mutex because it is held during filter add/delete as well as notifier. Signed-off-by: Stephen Hemminger Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_mirred.h | 1 + net/sched/act_mirred.c | 43 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index ceac661cdfd..cfe2943690f 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -9,6 +9,7 @@ struct tcf_mirred { int tcfm_ifindex; int tcfm_ok_push; struct net_device *tcfm_dev; + struct list_head tcfm_list; }; #define to_mirred(pc) \ container_of(pc, struct tcf_mirred, common) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c0b6863e3b8..1980b71c283 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -33,6 +33,7 @@ static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); +static LIST_HEAD(mirred_list); static struct tcf_hashinfo mirred_hash_info = { .htab = tcf_mirred_ht, @@ -47,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) m->tcf_bindcnt--; m->tcf_refcnt--; if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { - dev_put(m->tcfm_dev); + list_del(&m->tcfm_list); + if (m->tcfm_dev) + dev_put(m->tcfm_dev); tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } @@ -134,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, m->tcfm_ok_push = ok_push; } spin_unlock_bh(&m->tcf_lock); - if (ret == ACT_P_CREATED) + if (ret == ACT_P_CREATED) { + list_add(&m->tcfm_list, &mirred_list); tcf_hash_insert(pc, &mirred_hash_info); + } return ret; } @@ -162,9 +167,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, m->tcf_tm.lastuse = jiffies; dev = m->tcfm_dev; + if (!dev) { + printk_once(KERN_NOTICE "tc mirred: target device is gone\n"); + goto out; + } + if (!(dev->flags & IFF_UP)) { if (net_ratelimit()) - pr_notice("tc mirred to Houston: device %s is gone!\n", + pr_notice("tc mirred to Houston: device %s is down\n", dev->name); goto out; } @@ -232,6 +242,28 @@ nla_put_failure: return -1; } +static int mirred_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct tcf_mirred *m; + + if (event == NETDEV_UNREGISTER) + list_for_each_entry(m, &mirred_list, tcfm_list) { + if (m->tcfm_dev == dev) { + dev_put(dev); + m->tcfm_dev = NULL; + } + } + + return NOTIFY_DONE; +} + +static struct notifier_block mirred_device_notifier = { + .notifier_call = mirred_device_event, +}; + + static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .hinfo = &mirred_hash_info, @@ -252,12 +284,17 @@ MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { + int err = register_netdevice_notifier(&mirred_device_notifier); + if (err) + return err; + pr_info("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } static void __exit mirred_cleanup_module(void) { + unregister_netdevice_notifier(&mirred_device_notifier); tcf_unregister_action(&act_mirred_ops); } -- cgit v1.2.3 From 40e2e97316af6e62affab7a392e792494b8d9dde Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Jul 2010 21:17:09 +0000 Subject: direct-io: move aio_complete into ->end_io Filesystems with unwritten extent support must not complete an AIO request until the transaction to convert the extent has been commited. That means the aio_complete calls needs to be moved into the ->end_io callback so that the filesystem can control when to call it exactly. This makes a bit of a mess out of dio_complete and the ->end_io callback prototype even more complicated. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Alex Elder --- fs/direct-io.c | 26 ++++++++++++++------------ fs/ext4/inode.c | 10 +++++++--- fs/ocfs2/aops.c | 7 ++++++- fs/xfs/linux-2.6/xfs_aops.c | 7 ++++++- fs/xfs/linux-2.6/xfs_aops.h | 2 ++ include/linux/fs.h | 3 ++- 6 files changed, 37 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/direct-io.c b/fs/direct-io.c index 7600aacf531..a10cb91cade 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio) * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static int dio_complete(struct dio *dio, loff_t offset, int ret) +static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async) { ssize_t transferred = 0; @@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) transferred = dio->i_size - offset; } - if (dio->end_io && dio->result) - dio->end_io(dio->iocb, offset, transferred, - dio->map_bh.b_private); - - if (dio->flags & DIO_LOCKING) - /* lockdep: non-owner release */ - up_read_non_owner(&dio->inode->i_alloc_sem); - if (ret == 0) ret = dio->page_errors; if (ret == 0) @@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) if (ret == 0) ret = transferred; + if (dio->end_io && dio->result) { + dio->end_io(dio->iocb, offset, transferred, + dio->map_bh.b_private, ret, is_async); + } else if (is_async) { + aio_complete(dio->iocb, ret, 0); + } + + if (dio->flags & DIO_LOCKING) + /* lockdep: non-owner release */ + up_read_non_owner(&dio->inode->i_alloc_sem); + return ret; } @@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error) spin_unlock_irqrestore(&dio->bio_lock, flags); if (remaining == 0) { - int ret = dio_complete(dio, dio->iocb->ki_pos, 0); - aio_complete(dio->iocb, ret, 0); + dio_complete(dio, dio->iocb->ki_pos, 0, true); kfree(dio); } } @@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, spin_unlock_irqrestore(&dio->bio_lock, flags); if (ret2 == 0) { - ret = dio_complete(dio, offset, ret); + ret = dio_complete(dio, offset, ret, false); kfree(dio); } else BUG_ON(ret != -EIOCBQUEUED); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 42272d67955..0afc8c1d8cf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) } static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, - ssize_t size, void *private) + ssize_t size, void *private, int ret, + bool is_async) { ext4_io_end_t *io_end = iocb->private; struct workqueue_struct *wq; @@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, /* if not async direct IO or dio with 0 bytes write, just return */ if (!io_end || !size) - return; + goto out; ext_debug("ext4_end_io_dio(): io_end 0x%p" "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", @@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, if (io_end->flag != EXT4_IO_UNWRITTEN){ ext4_free_io_end(io_end); iocb->private = NULL; - return; + goto out; } io_end->offset = offset; @@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, list_add_tail(&io_end->list, &ei->i_completed_io_list); spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); iocb->private = NULL; +out: + if (is_async) + aio_complete(iocb, ret, 0); } static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 356e976772b..96337a4fbbd 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -578,7 +578,9 @@ bail: static void ocfs2_dio_end_io(struct kiocb *iocb, loff_t offset, ssize_t bytes, - void *private) + void *private, + int ret, + bool is_async) { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; int level; @@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, if (!level) up_read(&inode->i_alloc_sem); ocfs2_rw_unlock(inode, level); + + if (is_async) + aio_complete(iocb, ret, 0); } /* diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 8abbf0532ea..95d1e2695c3 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1406,7 +1406,9 @@ xfs_end_io_direct( struct kiocb *iocb, loff_t offset, ssize_t size, - void *private) + void *private, + int ret, + bool is_async) { xfs_ioend_t *ioend = iocb->private; @@ -1452,6 +1454,9 @@ xfs_end_io_direct( * against double-freeing. */ iocb->private = NULL; + + if (is_async) + aio_complete(iocb, ret, 0); } STATIC ssize_t diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 319da173cc1..c5057fb6237 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -37,6 +37,8 @@ typedef struct xfs_ioend { size_t io_size; /* size of the extent */ xfs_off_t io_offset; /* offset in the file */ struct work_struct io_work; /* xfsdatad work queue */ + struct kiocb *io_iocb; + int io_result; } xfs_ioend_t; extern const struct address_space_operations xfs_address_space_operations; diff --git a/include/linux/fs.h b/include/linux/fs.h index 68ca1b0491a..f91affb7d53 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -415,7 +415,8 @@ struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, - ssize_t bytes, void *private); + ssize_t bytes, void *private, int ret, + bool is_async); /* * Attribute flags. These should be or-ed together to figure out what -- cgit v1.2.3 From 98864ff58dd2b8ef9e72b0d2c70f34e7ff24a2ee Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 22 May 2010 23:59:11 +0100 Subject: ARM: OMAP: Convert OMAPFB and VRAM SDRAM reservation to LMB Signed-off-by: Russell King --- arch/arm/plat-omap/common.c | 4 ++-- arch/arm/plat-omap/fb.c | 30 +++++++++++++++++++----------- arch/arm/plat-omap/include/plat/vram.h | 4 ++-- drivers/video/omap2/vram.c | 33 +++++++++++++++------------------ include/linux/omapfb.h | 2 +- 5 files changed, 39 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c index 9f6bbc178a7..ebed82699eb 100644 --- a/arch/arm/plat-omap/common.c +++ b/arch/arm/plat-omap/common.c @@ -85,8 +85,8 @@ EXPORT_SYMBOL(omap_get_var_config); void __init omap_reserve(void) { - omapfb_reserve_sdram(); - omap_vram_reserve_sdram(); + omapfb_reserve_sdram_memblock(); + omap_vram_reserve_sdram_memblock(); } /* diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c index 97db493904f..0054b9501a5 100644 --- a/arch/arm/plat-omap/fb.c +++ b/arch/arm/plat-omap/fb.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -173,25 +173,27 @@ static int check_fbmem_region(int region_idx, struct omapfb_mem_region *rg, static int valid_sdram(unsigned long addr, unsigned long size) { - struct bootmem_data *bdata = NODE_DATA(0)->bdata; - unsigned long sdram_start, sdram_end; + struct memblock_property res; - sdram_start = bdata->node_min_pfn << PAGE_SHIFT; - sdram_end = bdata->node_low_pfn << PAGE_SHIFT; - - return addr >= sdram_start && sdram_end - addr >= size; + res.base = addr; + res.size = size; + return !memblock_find(&res) && res.base == addr && res.size == size; } static int reserve_sdram(unsigned long addr, unsigned long size) { - return reserve_bootmem(addr, size, BOOTMEM_EXCLUSIVE); + if (memblock_is_region_reserved(addr, size)) + return -EBUSY; + if (memblock_reserve(addr, size)) + return -ENOMEM; + return 0; } /* * Called from map_io. We need to call to this early enough so that we * can reserve the fixed SDRAM regions before VM could get hold of them. */ -void __init omapfb_reserve_sdram(void) +void __init omapfb_reserve_sdram_memblock(void) { unsigned long reserved = 0; int i; @@ -386,7 +388,10 @@ static inline int omap_init_fb(void) arch_initcall(omap_init_fb); -void omapfb_reserve_sdram(void) {} +void omapfb_reserve_sdram_memblock(void) +{ +} + unsigned long omapfb_reserve_sram(unsigned long sram_pstart, unsigned long sram_vstart, unsigned long sram_size, @@ -402,7 +407,10 @@ void omapfb_set_platform_data(struct omapfb_platform_data *data) { } -void omapfb_reserve_sdram(void) {} +void omapfb_reserve_sdram_memblock(void) +{ +} + unsigned long omapfb_reserve_sram(unsigned long sram_pstart, unsigned long sram_vstart, unsigned long sram_size, diff --git a/arch/arm/plat-omap/include/plat/vram.h b/arch/arm/plat-omap/include/plat/vram.h index edd4987758a..0aa4ecd12c7 100644 --- a/arch/arm/plat-omap/include/plat/vram.h +++ b/arch/arm/plat-omap/include/plat/vram.h @@ -38,7 +38,7 @@ extern void omap_vram_get_info(unsigned long *vram, unsigned long *free_vram, extern void omap_vram_set_sdram_vram(u32 size, u32 start); extern void omap_vram_set_sram_vram(u32 size, u32 start); -extern void omap_vram_reserve_sdram(void); +extern void omap_vram_reserve_sdram_memblock(void); extern unsigned long omap_vram_reserve_sram(unsigned long sram_pstart, unsigned long sram_vstart, unsigned long sram_size, @@ -48,7 +48,7 @@ extern unsigned long omap_vram_reserve_sram(unsigned long sram_pstart, static inline void omap_vram_set_sdram_vram(u32 size, u32 start) { } static inline void omap_vram_set_sram_vram(u32 size, u32 start) { } -static inline void omap_vram_reserve_sdram(void) { } +static inline void omap_vram_reserve_sdram_memblock(void) { } static inline unsigned long omap_vram_reserve_sram(unsigned long sram_pstart, unsigned long sram_vstart, unsigned long sram_size, diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c index 3b1237ad85e..f6fdc2085f3 100644 --- a/drivers/video/omap2/vram.c +++ b/drivers/video/omap2/vram.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -525,10 +525,8 @@ early_param("vram", omap_vram_early_vram); * Called from map_io. We need to call to this early enough so that we * can reserve the fixed SDRAM regions before VM could get hold of them. */ -void __init omap_vram_reserve_sdram(void) +void __init omap_vram_reserve_sdram_memblock(void) { - struct bootmem_data *bdata; - unsigned long sdram_start, sdram_size; u32 paddr; u32 size = 0; @@ -555,29 +553,28 @@ void __init omap_vram_reserve_sdram(void) size = PAGE_ALIGN(size); - bdata = NODE_DATA(0)->bdata; - sdram_start = bdata->node_min_pfn << PAGE_SHIFT; - sdram_size = (bdata->node_low_pfn << PAGE_SHIFT) - sdram_start; - if (paddr) { - if ((paddr & ~PAGE_MASK) || paddr < sdram_start || - paddr + size > sdram_start + sdram_size) { + struct memblock_property res; + + res.base = paddr; + res.size = size; + if ((paddr & ~PAGE_MASK) || memblock_find(&res) || + res.base != paddr || res.size != size) { pr_err("Illegal SDRAM region for VRAM\n"); return; } - if (reserve_bootmem(paddr, size, BOOTMEM_EXCLUSIVE) < 0) { - pr_err("FB: failed to reserve VRAM\n"); + if (memblock_is_region_reserved(paddr, size)) { + pr_err("FB: failed to reserve VRAM - busy\n"); return; } - } else { - if (size > sdram_size) { - pr_err("Illegal SDRAM size for VRAM\n"); + + if (memblock_reserve(paddr, size) < 0) { + pr_err("FB: failed to reserve VRAM - no memory\n"); return; } - - paddr = virt_to_phys(alloc_bootmem_pages(size)); - BUG_ON(paddr & ~PAGE_MASK); + } else { + paddr = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_REAL_LIMIT); } omap_vram_add_region(paddr, size); diff --git a/include/linux/omapfb.h b/include/linux/omapfb.h index 9bdd91486b4..7e4cd616bcb 100644 --- a/include/linux/omapfb.h +++ b/include/linux/omapfb.h @@ -253,7 +253,7 @@ struct omapfb_platform_data { /* in arch/arm/plat-omap/fb.c */ extern void omapfb_set_platform_data(struct omapfb_platform_data *data); extern void omapfb_set_ctrl_platform_data(void *pdata); -extern void omapfb_reserve_sdram(void); +extern void omapfb_reserve_sdram_memblock(void); #endif -- cgit v1.2.3 From ec489aa8f993f8d2ec962ce113071faac482aa27 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 2 Jun 2010 08:13:52 +0100 Subject: ARM: 6157/2: PL011 TX/RX split of LCR for ST-Ericssons derivative In the ST-Ericsson version of the PL011 the TX and RX have different control registers. Cc: Alessandro Rubini Signed-off-by: Marcin Mielczarczyk Signed-off-by: Linus Walleij Signed-off-by: Russell King --- drivers/serial/amba-pl011.c | 61 +++++++++++++++++++++++++++++++++++++-------- include/linux/amba/serial.h | 2 ++ 2 files changed, 52 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index eb4cb480b93..5644cf2385b 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -69,9 +69,11 @@ struct uart_amba_port { struct uart_port port; struct clk *clk; - unsigned int im; /* interrupt mask */ + unsigned int im; /* interrupt mask */ unsigned int old_status; - unsigned int ifls; /* vendor-specific */ + unsigned int ifls; /* vendor-specific */ + unsigned int lcrh_tx; /* vendor-specific */ + unsigned int lcrh_rx; /* vendor-specific */ bool autorts; }; @@ -79,16 +81,22 @@ struct uart_amba_port { struct vendor_data { unsigned int ifls; unsigned int fifosize; + unsigned int lcrh_tx; + unsigned int lcrh_rx; }; static struct vendor_data vendor_arm = { .ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, .fifosize = 16, + .lcrh_tx = UART011_LCRH, + .lcrh_rx = UART011_LCRH, }; static struct vendor_data vendor_st = { .ifls = UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF, .fifosize = 64, + .lcrh_tx = ST_UART011_LCRH_TX, + .lcrh_rx = ST_UART011_LCRH_RX, }; static void pl011_stop_tx(struct uart_port *port) @@ -327,12 +335,12 @@ static void pl011_break_ctl(struct uart_port *port, int break_state) unsigned int lcr_h; spin_lock_irqsave(&uap->port.lock, flags); - lcr_h = readw(uap->port.membase + UART011_LCRH); + lcr_h = readw(uap->port.membase + uap->lcrh_tx); if (break_state == -1) lcr_h |= UART01x_LCRH_BRK; else lcr_h &= ~UART01x_LCRH_BRK; - writew(lcr_h, uap->port.membase + UART011_LCRH); + writew(lcr_h, uap->port.membase + uap->lcrh_tx); spin_unlock_irqrestore(&uap->port.lock, flags); } @@ -393,7 +401,17 @@ static int pl011_startup(struct uart_port *port) writew(cr, uap->port.membase + UART011_CR); writew(0, uap->port.membase + UART011_FBRD); writew(1, uap->port.membase + UART011_IBRD); - writew(0, uap->port.membase + UART011_LCRH); + writew(0, uap->port.membase + uap->lcrh_rx); + if (uap->lcrh_tx != uap->lcrh_rx) { + int i; + /* + * Wait 10 PCLKs before writing LCRH_TX register, + * to get this delay write read only register 10 times + */ + for (i = 0; i < 10; ++i) + writew(0xff, uap->port.membase + UART011_MIS); + writew(0, uap->port.membase + uap->lcrh_tx); + } writew(0, uap->port.membase + UART01x_DR); while (readw(uap->port.membase + UART01x_FR) & UART01x_FR_BUSY) barrier(); @@ -422,10 +440,19 @@ static int pl011_startup(struct uart_port *port) return retval; } +static void pl011_shutdown_channel(struct uart_amba_port *uap, + unsigned int lcrh) +{ + unsigned long val; + + val = readw(uap->port.membase + lcrh); + val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN); + writew(val, uap->port.membase + lcrh); +} + static void pl011_shutdown(struct uart_port *port) { struct uart_amba_port *uap = (struct uart_amba_port *)port; - unsigned long val; /* * disable all interrupts @@ -450,9 +477,9 @@ static void pl011_shutdown(struct uart_port *port) /* * disable break condition and fifos */ - val = readw(uap->port.membase + UART011_LCRH); - val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN); - writew(val, uap->port.membase + UART011_LCRH); + pl011_shutdown_channel(uap, uap->lcrh_rx); + if (uap->lcrh_rx != uap->lcrh_tx) + pl011_shutdown_channel(uap, uap->lcrh_tx); /* * Shut down the clock producer @@ -561,7 +588,17 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, * NOTE: MUST BE WRITTEN AFTER UARTLCR_M & UARTLCR_L * ----------^----------^----------^----------^----- */ - writew(lcr_h, port->membase + UART011_LCRH); + writew(lcr_h, port->membase + uap->lcrh_rx); + if (uap->lcrh_rx != uap->lcrh_tx) { + int i; + /* + * Wait 10 PCLKs before writing LCRH_TX register, + * to get this delay write read only register 10 times + */ + for (i = 0; i < 10; ++i) + writew(0xff, uap->port.membase + UART011_MIS); + writew(lcr_h, port->membase + uap->lcrh_tx); + } writew(old_cr, port->membase + UART011_CR); spin_unlock_irqrestore(&port->lock, flags); @@ -688,7 +725,7 @@ pl011_console_get_options(struct uart_amba_port *uap, int *baud, if (readw(uap->port.membase + UART011_CR) & UART01x_CR_UARTEN) { unsigned int lcr_h, ibrd, fbrd; - lcr_h = readw(uap->port.membase + UART011_LCRH); + lcr_h = readw(uap->port.membase + uap->lcrh_tx); *parity = 'n'; if (lcr_h & UART01x_LCRH_PEN) { @@ -800,6 +837,8 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id) } uap->ifls = vendor->ifls; + uap->lcrh_rx = vendor->lcrh_rx; + uap->lcrh_tx = vendor->lcrh_tx; uap->port.dev = &dev->dev; uap->port.mapbase = dev->res.start; uap->port.membase = base; diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 5a5a7fd6249..93c96a66c51 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -38,10 +38,12 @@ #define UART01x_FR 0x18 /* Flag register (Read only). */ #define UART010_IIR 0x1C /* Interrupt indentification register (Read). */ #define UART010_ICR 0x1C /* Interrupt clear register (Write). */ +#define ST_UART011_LCRH_RX 0x1C /* Rx line control register. */ #define UART01x_ILPR 0x20 /* IrDA low power counter register. */ #define UART011_IBRD 0x24 /* Integer baud rate divisor register. */ #define UART011_FBRD 0x28 /* Fractional baud rate divisor register. */ #define UART011_LCRH 0x2c /* Line control register. */ +#define ST_UART011_LCRH_TX 0x2c /* Tx Line control register. */ #define UART011_CR 0x30 /* Control register. */ #define UART011_IFLS 0x34 /* Interrupt fifo level select. */ #define UART011_IMSC 0x38 /* Interrupt mask. */ -- cgit v1.2.3 From ac3e3fb424d44109dda3b1a3459e1b30fa60ac4a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 2 Jun 2010 20:40:22 +0100 Subject: ARM: 6158/2: PL011 baudrate extension for ST-Ericssons derivative Implementation of the ST-Ericsson baudrate extension in the PL011 block. In this modified variant it is possible to change the sampling factor from 16 to 8, and thanks to this we can get higher baudrates while still using the same peripheral clock. Also replace the simple division to determine the baud divisor with DIV_ROUND_CLOSEST() rather than a simple integer division. Cc: Alessandro Rubini Cc: Jerzy Kasenberg Signed-off-by: Marcin Mielczarczyk Signed-off-by: Linus Walleij Signed-off-by: Russell King --- drivers/serial/amba-pl011.c | 27 +++++++++++++++++++++++++-- include/linux/amba/serial.h | 1 + 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 5644cf2385b..f67e09da6d3 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -74,6 +74,7 @@ struct uart_amba_port { unsigned int ifls; /* vendor-specific */ unsigned int lcrh_tx; /* vendor-specific */ unsigned int lcrh_rx; /* vendor-specific */ + bool oversampling; /* vendor-specific */ bool autorts; }; @@ -83,6 +84,7 @@ struct vendor_data { unsigned int fifosize; unsigned int lcrh_tx; unsigned int lcrh_rx; + bool oversampling; }; static struct vendor_data vendor_arm = { @@ -90,6 +92,7 @@ static struct vendor_data vendor_arm = { .fifosize = 16, .lcrh_tx = UART011_LCRH, .lcrh_rx = UART011_LCRH, + .oversampling = false, }; static struct vendor_data vendor_st = { @@ -97,6 +100,7 @@ static struct vendor_data vendor_st = { .fifosize = 64, .lcrh_tx = ST_UART011_LCRH_TX, .lcrh_rx = ST_UART011_LCRH_RX, + .oversampling = true, }; static void pl011_stop_tx(struct uart_port *port) @@ -499,8 +503,13 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, /* * Ask the core to calculate the divisor for us. */ - baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); - quot = port->uartclk * 4 / baud; + baud = uart_get_baud_rate(port, termios, old, 0, + port->uartclk/(uap->oversampling ? 8 : 16)); + + if (baud > port->uartclk/16) + quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud); + else + quot = DIV_ROUND_CLOSEST(port->uartclk * 4, baud); switch (termios->c_cflag & CSIZE) { case CS5: @@ -579,6 +588,13 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, uap->autorts = false; } + if (uap->oversampling) { + if (baud > port->uartclk/16) + old_cr |= ST_UART011_CR_OVSFACT; + else + old_cr &= ~ST_UART011_CR_OVSFACT; + } + /* Set baud rate */ writew(quot & 0x3f, port->membase + UART011_FBRD); writew(quot >> 6, port->membase + UART011_IBRD); @@ -744,6 +760,12 @@ pl011_console_get_options(struct uart_amba_port *uap, int *baud, fbrd = readw(uap->port.membase + UART011_FBRD); *baud = uap->port.uartclk * 4 / (64 * ibrd + fbrd); + + if (uap->oversampling) { + if (readw(uap->port.membase + UART011_CR) + & ST_UART011_CR_OVSFACT) + *baud *= 2; + } } } @@ -839,6 +861,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id) uap->ifls = vendor->ifls; uap->lcrh_rx = vendor->lcrh_rx; uap->lcrh_tx = vendor->lcrh_tx; + uap->oversampling = vendor->oversampling; uap->port.dev = &dev->dev; uap->port.mapbase = dev->res.start; uap->port.membase = base; diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 93c96a66c51..e1b634b635f 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -86,6 +86,7 @@ #define UART010_CR_TIE 0x0020 #define UART010_CR_RIE 0x0010 #define UART010_CR_MSIE 0x0008 +#define ST_UART011_CR_OVSFACT 0x0008 /* Oversampling factor */ #define UART01x_CR_IIRLP 0x0004 /* SIR low power mode */ #define UART01x_CR_SIREN 0x0002 /* SIR enable */ #define UART01x_CR_UARTEN 0x0001 /* UART enable */ -- cgit v1.2.3 From 7d14831e21060fbfbfe8453460ac19205f4ce1c2 Mon Sep 17 00:00:00 2001 From: Anuj Aggarwal Date: Mon, 12 Jul 2010 17:54:06 +0530 Subject: regulator: tps6507x: allow driver to use DEFDCDC{2,3}_HIGH register Acked-by: Mark Brown In TPS6507x, depending on the status of DEFDCDC{2,3} pin either DEFDCDC{2,3}_LOW or DEFDCDC{2,3}_HIGH register needs to be read or programmed to change the output voltage. The current driver assumes DEFDCDC{2,3} pins are always tied low and thus operates only on DEFDCDC{2,3}_LOW register. This need not always be the case (as is found on OMAP-L138 EVM). Unfortunately, software cannot read the status of DEFDCDC{2,3} pins. So, this information is passed through platform data depending on how the board is wired. Signed-off-by: Anuj Aggarwal Signed-off-by: Sekhar Nori Signed-off-by: Liam Girdwood --- drivers/regulator/tps6507x-regulator.c | 36 +++++++++++++++++++++++++++------- include/linux/regulator/tps6507x.h | 32 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 include/linux/regulator/tps6507x.h (limited to 'include') diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c index 14b4576281c..8152d65220f 100644 --- a/drivers/regulator/tps6507x-regulator.c +++ b/drivers/regulator/tps6507x-regulator.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -101,9 +102,12 @@ struct tps_info { unsigned max_uV; u8 table_len; const u16 *table; + + /* Does DCDC high or the low register defines output voltage? */ + bool defdcdc_default; }; -static const struct tps_info tps6507x_pmic_regs[] = { +static struct tps_info tps6507x_pmic_regs[] = { { .name = "VDCDC1", .min_uV = 725000, @@ -145,7 +149,7 @@ struct tps6507x_pmic { struct regulator_desc desc[TPS6507X_NUM_REGULATOR]; struct tps6507x_dev *mfd; struct regulator_dev *rdev[TPS6507X_NUM_REGULATOR]; - const struct tps_info *info[TPS6507X_NUM_REGULATOR]; + struct tps_info *info[TPS6507X_NUM_REGULATOR]; struct mutex io_lock; }; static inline int tps6507x_pmic_read(struct tps6507x_pmic *tps, u8 reg) @@ -341,10 +345,16 @@ static int tps6507x_pmic_dcdc_get_voltage(struct regulator_dev *dev) reg = TPS6507X_REG_DEFDCDC1; break; case TPS6507X_DCDC_2: - reg = TPS6507X_REG_DEFDCDC2_LOW; + if (tps->info[dcdc]->defdcdc_default) + reg = TPS6507X_REG_DEFDCDC2_HIGH; + else + reg = TPS6507X_REG_DEFDCDC2_LOW; break; case TPS6507X_DCDC_3: - reg = TPS6507X_REG_DEFDCDC3_LOW; + if (tps->info[dcdc]->defdcdc_default) + reg = TPS6507X_REG_DEFDCDC3_HIGH; + else + reg = TPS6507X_REG_DEFDCDC3_LOW; break; default: return -EINVAL; @@ -370,10 +380,16 @@ static int tps6507x_pmic_dcdc_set_voltage(struct regulator_dev *dev, reg = TPS6507X_REG_DEFDCDC1; break; case TPS6507X_DCDC_2: - reg = TPS6507X_REG_DEFDCDC2_LOW; + if (tps->info[dcdc]->defdcdc_default) + reg = TPS6507X_REG_DEFDCDC2_HIGH; + else + reg = TPS6507X_REG_DEFDCDC2_LOW; break; case TPS6507X_DCDC_3: - reg = TPS6507X_REG_DEFDCDC3_LOW; + if (tps->info[dcdc]->defdcdc_default) + reg = TPS6507X_REG_DEFDCDC3_HIGH; + else + reg = TPS6507X_REG_DEFDCDC3_LOW; break; default: return -EINVAL; @@ -532,7 +548,7 @@ int tps6507x_pmic_probe(struct platform_device *pdev) { struct tps6507x_dev *tps6507x_dev = dev_get_drvdata(pdev->dev.parent); static int desc_id; - const struct tps_info *info = &tps6507x_pmic_regs[0]; + struct tps_info *info = &tps6507x_pmic_regs[0]; struct regulator_init_data *init_data; struct regulator_dev *rdev; struct tps6507x_pmic *tps; @@ -569,6 +585,12 @@ int tps6507x_pmic_probe(struct platform_device *pdev) for (i = 0; i < TPS6507X_NUM_REGULATOR; i++, info++, init_data++) { /* Register the regulators */ tps->info[i] = info; + if (init_data->driver_data) { + struct tps6507x_reg_platform_data *data = + init_data->driver_data; + tps->info[i]->defdcdc_default = data->defdcdc_default; + } + tps->desc[i].name = info->name; tps->desc[i].id = desc_id++; tps->desc[i].n_voltages = num_voltages[i]; diff --git a/include/linux/regulator/tps6507x.h b/include/linux/regulator/tps6507x.h new file mode 100644 index 00000000000..4892f591bab --- /dev/null +++ b/include/linux/regulator/tps6507x.h @@ -0,0 +1,32 @@ +/* + * tps6507x.h -- Voltage regulation for the Texas Instruments TPS6507X + * + * Copyright (C) 2010 Texas Instruments, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef REGULATOR_TPS6507X +#define REGULATOR_TPS6507X + +/** + * tps6507x_reg_platform_data - platform data for tps6507x + * @defdcdc_default: Defines whether DCDC high or the low register controls + * output voltage by default. Valid for DCDC2 and DCDC3 outputs only. + */ +struct tps6507x_reg_platform_data { + bool defdcdc_default; +}; + +#endif -- cgit v1.2.3 From bb8f563c848faa113059973f68c24a3bb6a9585e Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 21 Jul 2010 12:53:57 +0100 Subject: ARM: 6243/1: mmci: pass power_mode to the translate_vdd callback Platforms may have some external power control which need to be controlled from board specific code. Rename the translate_vdd() callback to vdd_handler() and pass it the power mode. Acked-by: Linus Walleij Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- drivers/mmc/host/mmci.c | 13 +++---------- include/linux/amba/mmci.h | 10 ++++++---- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 3eaa0e9373c..7ae3eeeefc2 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -493,16 +493,9 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) /* This implicitly enables the regulator */ mmc_regulator_set_ocr(host->vcc, ios->vdd); #endif - /* - * The translate_vdd function is not used if you have - * an external regulator, or your design is really weird. - * Using it would mean sending in power control BOTH using - * a regulator AND the 4 MMCIPWR bits. If we don't have - * a regulator, we might have some other platform specific - * power control behind this translate function. - */ - if (!host->vcc && host->plat->translate_vdd) - pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd); + if (host->plat->vdd_handler) + pwr |= host->plat->vdd_handler(mmc_dev(mmc), ios->vdd, + ios->power_mode); /* The ST version does not have this, fall through to POWER_ON */ if (host->hw_designer != AMBA_VENDOR_ST) { pwr |= MCI_PWR_UP; diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h index 7e466fe7202..ca84ce70d5d 100644 --- a/include/linux/amba/mmci.h +++ b/include/linux/amba/mmci.h @@ -15,9 +15,10 @@ * @ocr_mask: available voltages on the 4 pins from the block, this * is ignored if a regulator is used, see the MMC_VDD_* masks in * mmc/host.h - * @translate_vdd: a callback function to translate a MMC_VDD_* - * mask into a value to be binary or:ed and written into the - * MMCIPWR register of the block + * @vdd_handler: a callback function to translate a MMC_VDD_* + * mask into a value to be binary (or set some other custom bits + * in MMCIPWR) or:ed and written into the MMCIPWR register of the + * block. May also control external power based on the power_mode. * @status: if no GPIO read function was given to the block in * gpio_wp (below) this function will be called to determine * whether a card is present in the MMC slot or not @@ -29,7 +30,8 @@ struct mmci_platform_data { unsigned int f_max; unsigned int ocr_mask; - u32 (*translate_vdd)(struct device *, unsigned int); + u32 (*vdd_handler)(struct device *, unsigned int vdd, + unsigned char power_mode); unsigned int (*status)(struct device *); int gpio_wp; int gpio_cd; -- cgit v1.2.3 From de09a9771a5346029f4d11e4ac886be7f9bfdd75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Jul 2010 12:45:49 +0100 Subject: CRED: Fix get_task_cred() and task_state() to not resurrect dead credentials It's possible for get_task_cred() as it currently stands to 'corrupt' a set of credentials by incrementing their usage count after their replacement by the task being accessed. What happens is that get_task_cred() can race with commit_creds(): TASK_1 TASK_2 RCU_CLEANER -->get_task_cred(TASK_2) rcu_read_lock() __cred = __task_cred(TASK_2) -->commit_creds() old_cred = TASK_2->real_cred TASK_2->real_cred = ... put_cred(old_cred) call_rcu(old_cred) [__cred->usage == 0] get_cred(__cred) [__cred->usage == 1] rcu_read_unlock() -->put_cred_rcu() [__cred->usage == 1] panic() However, since a tasks credentials are generally not changed very often, we can reasonably make use of a loop involving reading the creds pointer and using atomic_inc_not_zero() to attempt to increment it if it hasn't already hit zero. If successful, we can safely return the credentials in the knowledge that, even if the task we're accessing has released them, they haven't gone to the RCU cleanup code. We then change task_state() in procfs to use get_task_cred() rather than calling get_cred() on the result of __task_cred(), as that suffers from the same problem. Without this change, a BUG_ON in __put_cred() or in put_cred_rcu() can be tripped when it is noticed that the usage count is not zero as it ought to be, for example: kernel BUG at kernel/cred.c:168! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/kernel/mm/ksm/run CPU 0 Pid: 2436, comm: master Not tainted 2.6.33.3-85.fc13.x86_64 #1 0HR330/OptiPlex 745 RIP: 0010:[] [] __put_cred+0xc/0x45 RSP: 0018:ffff88019e7e9eb8 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ffff880161514480 RCX: 00000000ffffffff RDX: 00000000ffffffff RSI: ffff880140c690c0 RDI: ffff880140c690c0 RBP: ffff88019e7e9eb8 R08: 00000000000000d0 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000040 R12: ffff880140c690c0 R13: ffff88019e77aea0 R14: 00007fff336b0a5c R15: 0000000000000001 FS: 00007f12f50d97c0(0000) GS:ffff880007400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f461bc000 CR3: 00000001b26ce000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process master (pid: 2436, threadinfo ffff88019e7e8000, task ffff88019e77aea0) Stack: ffff88019e7e9ec8 ffffffff810698cd ffff88019e7e9ef8 ffffffff81069b45 <0> ffff880161514180 ffff880161514480 ffff880161514180 0000000000000000 <0> ffff88019e7e9f28 ffffffff8106aace 0000000000000001 0000000000000246 Call Trace: [] put_cred+0x13/0x15 [] commit_creds+0x16b/0x175 [] set_current_groups+0x47/0x4e [] sys_setgroups+0xf6/0x105 [] system_call_fastpath+0x16/0x1b Code: 48 8d 71 ff e8 7e 4e 15 00 85 c0 78 0b 8b 75 ec 48 89 df e8 ef 4a 15 00 48 83 c4 18 5b c9 c3 55 8b 07 8b 07 48 89 e5 85 c0 74 04 <0f> 0b eb fe 65 48 8b 04 25 00 cc 00 00 48 3b b8 58 04 00 00 75 RIP [] __put_cred+0xc/0x45 RSP ---[ end trace df391256a100ebdd ]--- Signed-off-by: David Howells Acked-by: Jiri Olsa Signed-off-by: Linus Torvalds --- fs/proc/array.c | 2 +- include/linux/cred.h | 21 +-------------------- kernel/cred.c | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/proc/array.c b/fs/proc/array.c index 9b58d38bc91..fff6572676a 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -176,7 +176,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, if (tracer) tpid = task_pid_nr_ns(tracer, ns); } - cred = get_cred((struct cred *) __task_cred(p)); + cred = get_task_cred(p); seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" diff --git a/include/linux/cred.h b/include/linux/cred.h index 75c0fa88130..ce40cbc791e 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -153,6 +153,7 @@ struct cred { extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); extern int copy_creds(struct task_struct *, unsigned long); +extern const struct cred *get_task_cred(struct task_struct *); extern struct cred *cred_alloc_blank(void); extern struct cred *prepare_creds(void); extern struct cred *prepare_exec_creds(void); @@ -281,26 +282,6 @@ static inline void put_cred(const struct cred *_cred) #define __task_cred(task) \ ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held()))) -/** - * get_task_cred - Get another task's objective credentials - * @task: The task to query - * - * Get the objective credentials of a task, pinning them so that they can't go - * away. Accessing a task's credentials directly is not permitted. - * - * The caller must make sure task doesn't go away, either by holding a ref on - * task or by holding tasklist_lock to prevent it from being unlinked. - */ -#define get_task_cred(task) \ -({ \ - struct cred *__cred; \ - rcu_read_lock(); \ - __cred = (struct cred *) __task_cred((task)); \ - get_cred(__cred); \ - rcu_read_unlock(); \ - __cred; \ -}) - /** * get_current_cred - Get the current task's subjective credentials * diff --git a/kernel/cred.c b/kernel/cred.c index a2d5504fbcc..60bc8b1e32e 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -209,6 +209,31 @@ void exit_creds(struct task_struct *tsk) } } +/** + * get_task_cred - Get another task's objective credentials + * @task: The task to query + * + * Get the objective credentials of a task, pinning them so that they can't go + * away. Accessing a task's credentials directly is not permitted. + * + * The caller must also make sure task doesn't get deleted, either by holding a + * ref on task or by holding tasklist_lock to prevent it from being unlinked. + */ +const struct cred *get_task_cred(struct task_struct *task) +{ + const struct cred *cred; + + rcu_read_lock(); + + do { + cred = __task_cred((task)); + BUG_ON(!cred); + } while (!atomic_inc_not_zero(&((struct cred *)cred)->usage)); + + rcu_read_unlock(); + return cred; +} + /* * Allocate blank credentials, such that the credentials can be filled in at a * later date without risk of ENOMEM. -- cgit v1.2.3 From 8f92054e7ca1d3a3ae50fb42d2253ac8730d9b2a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Jul 2010 12:45:55 +0100 Subject: CRED: Fix __task_cred()'s lockdep check and banner comment Fix __task_cred()'s lockdep check by removing the following validation condition: lockdep_tasklist_lock_is_held() as commit_creds() does not take the tasklist_lock, and nor do most of the functions that call it, so this check is pointless and it can prevent detection of the RCU lock not being held if the tasklist_lock is held. Instead, add the following validation condition: task->exit_state >= 0 to permit the access if the target task is dead and therefore unable to change its own credentials. Fix __task_cred()'s comment to: (1) discard the bit that says that the caller must prevent the target task from being deleted. That shouldn't need saying. (2) Add a comment indicating the result of __task_cred() should not be passed directly to get_cred(), but rather than get_task_cred() should be used instead. Also put a note into the documentation to enforce this point there too. Signed-off-by: David Howells Acked-by: Jiri Olsa Cc: Paul E. McKenney Signed-off-by: Linus Torvalds --- Documentation/credentials.txt | 3 +++ include/linux/cred.h | 15 ++++++++++----- include/linux/sched.h | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/credentials.txt b/Documentation/credentials.txt index a2db3528700..995baf379c0 100644 --- a/Documentation/credentials.txt +++ b/Documentation/credentials.txt @@ -417,6 +417,9 @@ reference on them using: This does all the RCU magic inside of it. The caller must call put_cred() on the credentials so obtained when they're finished with. + [*] Note: The result of __task_cred() should not be passed directly to + get_cred() as this may race with commit_cred(). + There are a couple of convenience functions to access bits of another task's credentials, hiding the RCU magic from the caller: diff --git a/include/linux/cred.h b/include/linux/cred.h index ce40cbc791e..4d2c39573f3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -274,13 +274,18 @@ static inline void put_cred(const struct cred *_cred) * @task: The task to query * * Access the objective credentials of a task. The caller must hold the RCU - * readlock. + * readlock or the task must be dead and unable to change its own credentials. * - * The caller must make sure task doesn't go away, either by holding a ref on - * task or by holding tasklist_lock to prevent it from being unlinked. + * The result of this function should not be passed directly to get_cred(); + * rather get_task_cred() should be used instead. */ -#define __task_cred(task) \ - ((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held()))) +#define __task_cred(task) \ + ({ \ + const struct task_struct *__t = (task); \ + rcu_dereference_check(__t->real_cred, \ + rcu_read_lock_held() || \ + task_is_dead(__t)); \ + }) /** * get_current_cred - Get the current task's subjective credentials diff --git a/include/linux/sched.h b/include/linux/sched.h index 747fcaedddb..0478888c689 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -214,6 +214,7 @@ extern char ___assert_task_state[1 - 2*!!( #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) +#define task_is_dead(task) ((task)->exit_state != 0) #define task_is_stopped_or_traced(task) \ ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ -- cgit v1.2.3 From b608b283a962caaa280756bc8563016a71712acf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Jul 2010 15:31:54 -0400 Subject: NFS: kswapd must not block in nfs_release_page See https://bugzilla.kernel.org/show_bug.cgi?id=16056 If other processes are blocked waiting for kswapd to free up some memory so that they can make progress, then we cannot allow kswapd to block on those processes. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/file.c | 13 +++++++++++-- fs/nfs/write.c | 4 ++-- include/linux/nfs_fs.h | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 36a5e74f51b..f036153d9f5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -493,11 +494,19 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) */ static int nfs_release_page(struct page *page, gfp_t gfp) { + struct address_space *mapping = page->mapping; + dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if ((gfp & GFP_KERNEL) == GFP_KERNEL) - nfs_wb_page(page->mapping->host, page); + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + int how = FLUSH_SYNC; + + /* Don't let kswapd deadlock waiting for OOM RPC calls */ + if (current_is_kswapd()) + how = 0; + nfs_commit_inode(mapping->host, how); + } /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 91679e2631e..0a6c65a1f9d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1379,7 +1379,7 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; -static int nfs_commit_inode(struct inode *inode, int how) +int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); int may_wait = how & FLUSH_SYNC; @@ -1443,7 +1443,7 @@ out_mark_dirty: return ret; } #else -static int nfs_commit_inode(struct inode *inode, int how) +int nfs_commit_inode(struct inode *inode, int how) { return 0; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 77c2ae53431..f6e2455f13d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -493,6 +493,7 @@ extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); #endif -- cgit v1.2.3 From 7cfe249475fdd82ad3c2767a9b906cc775dab868 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 15 Jul 2010 10:47:14 +0100 Subject: ARM: AMBA: Add pclk support to AMBA bus infrastructure Some platforms gate the pclk (APB - the bus - clock) to the peripherals for power saving, along with the functional clock. When devices are accessed without pclk enabled, the kernel will oops. This gives them two options: 1. Leave all clocks on all the time. 2. Attempt to gate pclk along with the functional clock. (With some hardware, pclk and the functional clock are gated by a single bit in a register.) (1) has the disadvantage that it causes increased power usage, which is bad news for battery operated devices. (2) can lead to kernel oops if registers are accessed without the functional clock being enabled. So, introduce the apb_pclk signal in such a way existing drivers don't need to be updated. Essentially, this means we guarantee that: 1. pclk will be enabled whenever the driver is bound to a device - from probe() to remove() time. 2. pclk will also be enabled when reading the primecell IDs from the device. In order to allow drivers to be incrementally updated to achieve greater power savings, we provide two additional calls to allow drivers to manage the pclk - amba_pclk_enable()/amba_pclk_disable(). Signed-off-by: Russell King --- drivers/amba/bus.c | 88 +++++++++++++++++++++++++++++++++++++----------- include/linux/amba/bus.h | 11 ++++++ 2 files changed, 80 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index f60b2b6a093..d31590e7011 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -122,6 +122,31 @@ static int __init amba_init(void) postcore_initcall(amba_init); +static int amba_get_enable_pclk(struct amba_device *pcdev) +{ + struct clk *pclk = clk_get(&pcdev->dev, "apb_pclk"); + int ret; + + pcdev->pclk = pclk; + + if (IS_ERR(pclk)) + return PTR_ERR(pclk); + + ret = clk_enable(pclk); + if (ret) + clk_put(pclk); + + return ret; +} + +static void amba_put_disable_pclk(struct amba_device *pcdev) +{ + struct clk *pclk = pcdev->pclk; + + clk_disable(pclk); + clk_put(pclk); +} + /* * These are the device model conversion veneers; they convert the * device model structures to our more specific structures. @@ -130,17 +155,33 @@ static int amba_probe(struct device *dev) { struct amba_device *pcdev = to_amba_device(dev); struct amba_driver *pcdrv = to_amba_driver(dev->driver); - struct amba_id *id; + struct amba_id *id = amba_lookup(pcdrv->id_table, pcdev); + int ret; - id = amba_lookup(pcdrv->id_table, pcdev); + do { + ret = amba_get_enable_pclk(pcdev); + if (ret) + break; + + ret = pcdrv->probe(pcdev, id); + if (ret == 0) + break; - return pcdrv->probe(pcdev, id); + amba_put_disable_pclk(pcdev); + } while (0); + + return ret; } static int amba_remove(struct device *dev) { + struct amba_device *pcdev = to_amba_device(dev); struct amba_driver *drv = to_amba_driver(dev->driver); - return drv->remove(to_amba_device(dev)); + int ret = drv->remove(pcdev); + + amba_put_disable_pclk(pcdev); + + return ret; } static void amba_shutdown(struct device *dev) @@ -203,7 +244,6 @@ static void amba_device_release(struct device *dev) */ int amba_device_register(struct amba_device *dev, struct resource *parent) { - u32 pid, cid; u32 size; void __iomem *tmp; int i, ret; @@ -241,25 +281,35 @@ int amba_device_register(struct amba_device *dev, struct resource *parent) goto err_release; } - /* - * Read pid and cid based on size of resource - * they are located at end of region - */ - for (pid = 0, i = 0; i < 4; i++) - pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) << (i * 8); - for (cid = 0, i = 0; i < 4; i++) - cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) << (i * 8); + ret = amba_get_enable_pclk(dev); + if (ret == 0) { + u32 pid, cid; - iounmap(tmp); + /* + * Read pid and cid based on size of resource + * they are located at end of region + */ + for (pid = 0, i = 0; i < 4; i++) + pid |= (readl(tmp + size - 0x20 + 4 * i) & 255) << + (i * 8); + for (cid = 0, i = 0; i < 4; i++) + cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) << + (i * 8); - if (cid == 0xb105f00d) - dev->periphid = pid; + amba_put_disable_pclk(dev); - if (!dev->periphid) { - ret = -ENODEV; - goto err_release; + if (cid == 0xb105f00d) + dev->periphid = pid; + + if (!dev->periphid) + ret = -ENODEV; } + iounmap(tmp); + + if (ret) + goto err_release; + ret = device_add(&dev->dev); if (ret) goto err_release; diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 8b103860783..b0c17401243 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -14,14 +14,19 @@ #ifndef ASMARM_AMBA_H #define ASMARM_AMBA_H +#include #include +#include #include #define AMBA_NR_IRQS 2 +struct clk; + struct amba_device { struct device dev; struct resource res; + struct clk *pclk; u64 dma_mask; unsigned int periphid; unsigned int irq[AMBA_NR_IRQS]; @@ -59,6 +64,12 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); +#define amba_pclk_enable(d) \ + (IS_ERR((d)->pclk) ? 0 : clk_enable((d)->pclk)) + +#define amba_pclk_disable(d) \ + do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0) + #define amba_config(d) (((d)->periphid >> 24) & 0xff) #define amba_rev(d) (((d)->periphid >> 20) & 0x0f) #define amba_manf(d) (((d)->periphid >> 12) & 0xff) -- cgit v1.2.3 From 77a63f3d1e0a3e7ede8d10f569e8481b13ff47c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Aug 2010 13:40:40 -0400 Subject: NFS: Fix a typo in include/linux/nfs_fs.h nfs_commit_inode() needs to be defined irrespectively of whether or not we are supporting NFSv3 and NFSv4. Allow the compiler to optimise away code in the NFSv2-only case by converting it into an inlined stub function. Reported-and-tested-by: Ingo Molnar Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/write.c | 5 ----- include/linux/nfs_fs.h | 6 ++++++ 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bb72ad34d51..9f81bdd91c5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1454,11 +1454,6 @@ out_mark_dirty: return ret; } #else -int nfs_commit_inode(struct inode *inode, int how) -{ - return 0; -} - static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { return 0; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f6e2455f13d..bad4d121b16 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -496,6 +496,12 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); +#else +static inline int +nfs_commit_inode(struct inode *inode, int how) +{ + return 0; +} #endif static inline int -- cgit v1.2.3 From b126468e08d92aaeffa58ef04d70e417241dadc1 Mon Sep 17 00:00:00 2001 From: Fang Wenqi Date: Tue, 1 Jun 2010 02:43:06 +0000 Subject: virtio_9p.h needs Found with makes headers_check: include/linux/virtio_9p.h:15: found __[us]{8,16,32,64} type without #include Signed-off-by: Fang Wenqi Signed-off-by: Eric Van Hensbergen --- include/linux/virtio_9p.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index 5cf11765146..395c38a47ad 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -4,6 +4,7 @@ * compatible drivers/servers. */ #include #include +#include /* The feature bitmap for virtio 9P */ -- cgit v1.2.3 From 7751bdb3a095ad32dd4fcff3443cf8dd4cb1e748 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Fri, 4 Jun 2010 13:41:26 +0000 Subject: 9p: readdir implementation for 9p2000.L This patch implements the kernel part of readdir() implementation for 9p2000.L Change from V3: Instead of inode, server now sends qids for each dirent SYNOPSIS size[4] Treaddir tag[2] fid[4] offset[8] count[4] size[4] Rreaddir tag[2] count[4] data[count] DESCRIPTION The readdir request asks the server to read the directory specified by 'fid' at an offset specified by 'offset' and return as many dirent structures as possible that fit into count bytes. Each dirent structure is laid out as follows. qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file offset[8] offset into the next dirent. type[1] type of this directory entry. name[256] name of this directory entry. This patch adds v9fs_dir_readdir_dotl() as the readdir() call for 9p2000.L. This function sends P9_TREADDIR command to the server. In response the server sends a buffer filled with dirent structures. This is different from the existing v9fs_dir_readdir() call which receives stat structures from the server. This results in significant speedup of readdir() on large directories. For example, doing 'ls >/dev/null' on a directory with 10000 files on my laptop takes 1.088 seconds with the existing code, but only takes 0.339 seconds with the new readdir. Signed-off-by: Sripathi Kodi Reviewed-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_dir.c | 134 ++++++++++++++++++++++++++++++++++++++++++------ include/net/9p/9p.h | 17 ++++++ include/net/9p/client.h | 18 +++++++ net/9p/client.c | 47 +++++++++++++++++ net/9p/protocol.c | 27 ++++++++++ 5 files changed, 227 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 36d961f342a..16c8a2a98c1 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -87,29 +87,19 @@ static void p9stat_init(struct p9_wstat *stbuf) } /** - * v9fs_dir_readdir - read a directory + * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir * @filp: opened file structure - * @dirent: directory structure ??? - * @filldir: function to populate directory structure ??? + * @buflen: Length in bytes of buffer to allocate * */ -static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) +static int v9fs_alloc_rdir_buf(struct file *filp, int buflen) { - int over; - struct p9_wstat st; - int err = 0; - struct p9_fid *fid; - int buflen; - int reclen = 0; struct p9_rdir *rdir; + struct p9_fid *fid; + int err = 0; - P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); fid = filp->private_data; - - buflen = fid->clnt->msize - P9_IOHDRSZ; - - /* allocate rdir on demand */ if (!fid->rdir) { rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); @@ -128,6 +118,36 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) spin_unlock(&filp->f_dentry->d_lock); kfree(rdir); } +exit: + return err; +} + +/** + * v9fs_dir_readdir - read a directory + * @filp: opened file structure + * @dirent: directory structure ??? + * @filldir: function to populate directory structure ??? + * + */ + +static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + int over; + struct p9_wstat st; + int err = 0; + struct p9_fid *fid; + int buflen; + int reclen = 0; + struct p9_rdir *rdir; + + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); + fid = filp->private_data; + + buflen = fid->clnt->msize - P9_IOHDRSZ; + + err = v9fs_alloc_rdir_buf(filp, buflen); + if (err) + goto exit; rdir = (struct p9_rdir *) fid->rdir; err = mutex_lock_interruptible(&rdir->mutex); @@ -176,6 +196,88 @@ exit: return err; } +/** + * v9fs_dir_readdir_dotl - read a directory + * @filp: opened file structure + * @dirent: buffer to fill dirent structures + * @filldir: function to populate dirent structures + * + */ +static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, + filldir_t filldir) +{ + int over; + int err = 0; + struct p9_fid *fid; + int buflen; + struct p9_rdir *rdir; + struct p9_dirent curdirent; + u64 oldoffset = 0; + + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); + fid = filp->private_data; + + buflen = fid->clnt->msize - P9_READDIRHDRSZ; + + err = v9fs_alloc_rdir_buf(filp, buflen); + if (err) + goto exit; + rdir = (struct p9_rdir *) fid->rdir; + + err = mutex_lock_interruptible(&rdir->mutex); + if (err) + return err; + + while (err == 0) { + if (rdir->tail == rdir->head) { + err = p9_client_readdir(fid, rdir->buf, buflen, + filp->f_pos); + if (err <= 0) + goto unlock_and_exit; + + rdir->head = 0; + rdir->tail = err; + } + + while (rdir->head < rdir->tail) { + + err = p9dirent_read(rdir->buf + rdir->head, + buflen - rdir->head, &curdirent, + fid->clnt->proto_version); + if (err < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); + err = -EIO; + goto unlock_and_exit; + } + + /* d_off in dirent structure tracks the offset into + * the next dirent in the dir. However, filldir() + * expects offset into the current dirent. Hence + * while calling filldir send the offset from the + * previous dirent structure. + */ + over = filldir(dirent, curdirent.d_name, + strlen(curdirent.d_name), + oldoffset, v9fs_qid2ino(&curdirent.qid), + curdirent.d_type); + oldoffset = curdirent.d_off; + + if (over) { + err = 0; + goto unlock_and_exit; + } + + filp->f_pos = curdirent.d_off; + rdir->head += err; + } + } + +unlock_and_exit: + mutex_unlock(&rdir->mutex); +exit: + return err; +} + /** * v9fs_dir_release - close a directory @@ -207,7 +309,7 @@ const struct file_operations v9fs_dir_operations = { const struct file_operations v9fs_dir_operations_dotl = { .read = generic_read_dir, .llseek = generic_file_llseek, - .readdir = v9fs_dir_readdir, + .readdir = v9fs_dir_readdir_dotl, .open = v9fs_file_open, .release = v9fs_dir_release, }; diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 156c26bb8bd..f1b0b310265 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -133,6 +133,8 @@ enum p9_msg_t { P9_RSTATFS, P9_TRENAME = 20, P9_RRENAME, + P9_TREADDIR = 40, + P9_RREADDIR, P9_TVERSION = 100, P9_RVERSION, P9_TAUTH = 102, @@ -275,6 +277,9 @@ enum p9_qid_t { /* ample room for Twrite/Rread header */ #define P9_IOHDRSZ 24 +/* Room for readdir header */ +#define P9_READDIRHDRSZ 24 + /** * struct p9_str - length prefixed string type * @len: length of the string @@ -485,6 +490,18 @@ struct p9_rwrite { u32 count; }; +struct p9_treaddir { + u32 fid; + u64 offset; + u32 count; +}; + +struct p9_rreaddir { + u32 count; + u8 *data; +}; + + struct p9_tclunk { u32 fid; }; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 7dd3ed85c78..2ec93685e6d 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -195,6 +195,21 @@ struct p9_fid { struct list_head dlist; /* list of all fids attached to a dentry */ }; +/** + * struct p9_dirent - directory entry structure + * @qid: The p9 server qid for this dirent + * @d_off: offset to the next dirent + * @d_type: type of file + * @d_name: file name + */ + +struct p9_dirent { + struct p9_qid qid; + u64 d_off; + unsigned char d_type; + char d_name[256]; +}; + int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name); int p9_client_version(struct p9_client *); @@ -217,6 +232,9 @@ int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count); int p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, u64 offset, u32 count); +int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset); +int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, + int proto_version); struct p9_wstat *p9_client_stat(struct p9_fid *fid); int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst); diff --git a/net/9p/client.c b/net/9p/client.c index 37c8da07a80..a80357483a4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1432,3 +1432,50 @@ error: } EXPORT_SYMBOL(p9_client_rename); +int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) +{ + int err, rsize, total; + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + fid->fid, (long long unsigned) offset, count); + + err = 0; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ) + rsize = clnt->msize - P9_READDIRHDRSZ; + + if (count < rsize) + rsize = count; + + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + + if (data) + memmove(data, dataptr, count); + + p9_free_req(clnt, req); + return count; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_readdir); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 149f8216013..b645c826353 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -580,3 +580,30 @@ void p9pdu_reset(struct p9_fcall *pdu) pdu->offset = 0; pdu->size = 0; } + +int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, + int proto_version) +{ + struct p9_fcall fake_pdu; + int ret; + char *nameptr; + + fake_pdu.size = len; + fake_pdu.capacity = len; + fake_pdu.sdata = buf; + fake_pdu.offset = 0; + + ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); + if (ret) { + P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); + p9pdu_dump(1, &fake_pdu); + goto out; + } + + strcpy(dirent->d_name, nameptr); + +out: + return fake_pdu.offset; +} +EXPORT_SYMBOL(p9dirent_read); -- cgit v1.2.3 From f085312204f384a0277a66c3c48ba8f9edcd58f2 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Mon, 12 Jul 2010 20:07:23 +0530 Subject: 9p: getattr client implementation for 9P2000.L protocol. SYNOPSIS size[4] Tgetattr tag[2] fid[4] request_mask[8] size[4] Rgetattr tag[2] lstat[n] DESCRIPTION The getattr transaction inquires about the file identified by fid. request_mask is a bit mask that specifies which fields of the stat structure is the client interested in. The reply will contain a machine-independent directory entry, laid out as follows: st_result_mask[8] Bit mask that indicates which fields in the stat structure have been populated by the server qid.type[1] the type of the file (directory, etc.), represented as a bit vector corresponding to the high 8 bits of the file's mode word. qid.vers[4] version number for given path qid.path[8] the file server's unique identification for the file st_mode[4] Permission and flags st_uid[4] User id of owner st_gid[4] Group ID of owner st_nlink[8] Number of hard links st_rdev[8] Device ID (if special file) st_size[8] Size, in bytes st_blksize[8] Block size for file system IO st_blocks[8] Number of file system blocks allocated st_atime_sec[8] Time of last access, seconds st_atime_nsec[8] Time of last access, nanoseconds st_mtime_sec[8] Time of last modification, seconds st_mtime_nsec[8] Time of last modification, nanoseconds st_ctime_sec[8] Time of last status change, seconds st_ctime_nsec[8] Time of last status change, nanoseconds st_btime_sec[8] Time of creation (birth) of file, seconds st_btime_nsec[8] Time of creation (birth) of file, nanoseconds st_gen[8] Inode generation st_data_version[8] Data version number request_mask and result_mask bit masks contain the following bits #define P9_STATS_MODE 0x00000001ULL #define P9_STATS_NLINK 0x00000002ULL #define P9_STATS_UID 0x00000004ULL #define P9_STATS_GID 0x00000008ULL #define P9_STATS_RDEV 0x00000010ULL #define P9_STATS_ATIME 0x00000020ULL #define P9_STATS_MTIME 0x00000040ULL #define P9_STATS_CTIME 0x00000080ULL #define P9_STATS_INO 0x00000100ULL #define P9_STATS_SIZE 0x00000200ULL #define P9_STATS_BLOCKS 0x00000400ULL #define P9_STATS_BTIME 0x00000800ULL #define P9_STATS_GEN 0x00001000ULL #define P9_STATS_DATA_VERSION 0x00002000ULL #define P9_STATS_BASIC 0x000007ffULL #define P9_STATS_ALL 0x00003fffULL This patch implements the client side of getattr implementation for 9P2000.L. It introduces a new structure p9_stat_dotl for getting Linux stat information along with QID. The data layout is similar to stat structure in Linux user space with the following major differences: inode (st_ino) is not part of data. Instead qid is. device (st_dev) is not part of data because this doesn't make sense on the client. All time variables are 64 bit wide on the wire. The kernel seems to use 32 bit variables for these variables. However, some of the architectures have used 64 bit variables and glibc exposes 64 bit variables to user space on some architectures. Hence to be on the safer side we have made these 64 bit in the protocol. Refer to the comments in include/asm-generic/stat.h There are some additional fields: st_btime_sec, st_btime_nsec, st_gen, st_data_version apart from the bitmask, st_result_mask. The bit mask is filled by the server to indicate which stat fields have been populated by the server. Currently there is no clean way for the server to obtain these additional fields, so it sends back just the basic fields. Signed-off-by: Sripathi Kodi Signed-off-by: Eric Van Hensbegren --- fs/9p/v9fs_vfs.h | 1 + fs/9p/vfs_inode.c | 177 +++++++++++++++++++++++++++++++++++++++++++----- fs/9p/vfs_super.c | 43 +++++++----- include/net/9p/9p.h | 44 ++++++++++++ include/net/9p/client.h | 3 + net/9p/client.c | 59 ++++++++++++++++ net/9p/protocol.c | 28 ++++++++ 7 files changed, 321 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 32ef4009d03..f47c6bbb01b 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -55,6 +55,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode); void v9fs_clear_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); +void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4331b3b5ee1..afcb8d88938 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -396,23 +396,14 @@ void v9fs_clear_inode(struct inode *inode) #endif } -/** - * v9fs_inode_from_fid - populate an inode by issuing a attribute request - * @v9ses: session information - * @fid: fid to issue attribute request for - * @sb: superblock on which to create inode - * - */ - static struct inode * -v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, +v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { int err, umode; - struct inode *ret; + struct inode *ret = NULL; struct p9_wstat *st; - ret = NULL; st = p9_client_stat(fid); if (IS_ERR(st)) return ERR_CAST(st); @@ -433,15 +424,62 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, #endif p9stat_free(st); kfree(st); - return ret; - error: p9stat_free(st); kfree(st); return ERR_PTR(err); } +static struct inode * +v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, + struct super_block *sb) +{ + struct inode *ret = NULL; + int err; + struct p9_stat_dotl *st; + + st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); + if (IS_ERR(st)) + return ERR_CAST(st); + + ret = v9fs_get_inode(sb, st->st_mode); + if (IS_ERR(ret)) { + err = PTR_ERR(ret); + goto error; + } + + v9fs_stat2inode_dotl(st, ret); + ret->i_ino = v9fs_qid2ino(&st->qid); +#ifdef CONFIG_9P_FSCACHE + v9fs_vcookie_set_qid(ret, &st->qid); + v9fs_cache_inode_get_cookie(ret); +#endif + kfree(st); + return ret; +error: + kfree(st); + return ERR_PTR(err); +} + +/** + * v9fs_inode_from_fid - Helper routine to populate an inode by + * issuing a attribute request + * @v9ses: session information + * @fid: fid to issue attribute request for + * @sb: superblock on which to create inode + * + */ +static inline struct inode * +v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, + struct super_block *sb) +{ + if (v9fs_proto_dotl(v9ses)) + return v9fs_inode_dotl(v9ses, fid, sb); + else + return v9fs_inode(v9ses, fid, sb); +} + /** * v9fs_remove - helper function to remove files and directories * @dir: directory inode that is being deleted @@ -853,6 +891,42 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } +static int +v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + int err; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_stat_dotl *st; + + P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); + err = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) + return simple_getattr(mnt, dentry, stat); + + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + /* Ask for all the fields in stat structure. Server will return + * whatever it supports + */ + + st = p9_client_getattr_dotl(fid, P9_STATS_ALL); + if (IS_ERR(st)) + return PTR_ERR(st); + + v9fs_stat2inode_dotl(st, dentry->d_inode); + generic_fillattr(dentry->d_inode, stat); + /* Change block size to what the server returned */ + stat->blksize = st->st_blksize; + + kfree(st); + return 0; +} + /** * v9fs_vfs_setattr - set file metadata * @dentry: file whose metadata to set @@ -979,6 +1053,77 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; } +/** + * v9fs_stat2inode_dotl - populate an inode structure with stat info + * @stat: stat structure + * @inode: inode to populate + * @sb: superblock of filesystem + * + */ + +void +v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) +{ + + if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { + inode->i_atime.tv_sec = stat->st_atime_sec; + inode->i_atime.tv_nsec = stat->st_atime_nsec; + inode->i_mtime.tv_sec = stat->st_mtime_sec; + inode->i_mtime.tv_nsec = stat->st_mtime_nsec; + inode->i_ctime.tv_sec = stat->st_ctime_sec; + inode->i_ctime.tv_nsec = stat->st_ctime_nsec; + inode->i_uid = stat->st_uid; + inode->i_gid = stat->st_gid; + inode->i_nlink = stat->st_nlink; + inode->i_mode = stat->st_mode; + inode->i_rdev = new_decode_dev(stat->st_rdev); + + if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) + init_special_inode(inode, inode->i_mode, inode->i_rdev); + + i_size_write(inode, stat->st_size); + inode->i_blocks = stat->st_blocks; + } else { + if (stat->st_result_mask & P9_STATS_ATIME) { + inode->i_atime.tv_sec = stat->st_atime_sec; + inode->i_atime.tv_nsec = stat->st_atime_nsec; + } + if (stat->st_result_mask & P9_STATS_MTIME) { + inode->i_mtime.tv_sec = stat->st_mtime_sec; + inode->i_mtime.tv_nsec = stat->st_mtime_nsec; + } + if (stat->st_result_mask & P9_STATS_CTIME) { + inode->i_ctime.tv_sec = stat->st_ctime_sec; + inode->i_ctime.tv_nsec = stat->st_ctime_nsec; + } + if (stat->st_result_mask & P9_STATS_UID) + inode->i_uid = stat->st_uid; + if (stat->st_result_mask & P9_STATS_GID) + inode->i_gid = stat->st_gid; + if (stat->st_result_mask & P9_STATS_NLINK) + inode->i_nlink = stat->st_nlink; + if (stat->st_result_mask & P9_STATS_MODE) { + inode->i_mode = stat->st_mode; + if ((S_ISBLK(inode->i_mode)) || + (S_ISCHR(inode->i_mode))) + init_special_inode(inode, inode->i_mode, + inode->i_rdev); + } + if (stat->st_result_mask & P9_STATS_RDEV) + inode->i_rdev = new_decode_dev(stat->st_rdev); + if (stat->st_result_mask & P9_STATS_SIZE) + i_size_write(inode, stat->st_size); + if (stat->st_result_mask & P9_STATS_BLOCKS) + inode->i_blocks = stat->st_blocks; + } + if (stat->st_result_mask & P9_STATS_GEN) + inode->i_generation = stat->st_gen; + + /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION + * because the inode structure does not have fields for them. + */ +} + /** * v9fs_qid2ino - convert qid into inode number * @qid: qid to hash @@ -1254,7 +1399,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = { .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod, .rename = v9fs_vfs_rename, - .getattr = v9fs_vfs_getattr, + .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr, }; @@ -1276,7 +1421,7 @@ static const struct inode_operations v9fs_file_inode_operations = { }; static const struct inode_operations v9fs_file_inode_operations_dotl = { - .getattr = v9fs_vfs_getattr, + .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr, }; @@ -1292,6 +1437,6 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = { .readlink = generic_readlink, .follow_link = v9fs_vfs_follow_link, .put_link = v9fs_vfs_put_link, - .getattr = v9fs_vfs_getattr, + .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr, }; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index be74d020436..3623f692b44 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -107,7 +107,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, struct inode *inode = NULL; struct dentry *root = NULL; struct v9fs_session_info *v9ses = NULL; - struct p9_wstat *st = NULL; int mode = S_IRWXUGO | S_ISVTX; struct p9_fid *fid; int retval = 0; @@ -124,16 +123,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, goto close_session; } - st = p9_client_stat(fid); - if (IS_ERR(st)) { - retval = PTR_ERR(st); - goto clunk_fid; - } - sb = sget(fs_type, NULL, v9fs_set_super, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); - goto free_stat; + goto clunk_fid; } v9fs_fill_super(sb, v9ses, flags, data); @@ -151,22 +144,38 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, } sb->s_root = root; - root->d_inode->i_ino = v9fs_qid2ino(&st->qid); - v9fs_stat2inode(st, root->d_inode, sb); + if (v9fs_proto_dotl(v9ses)) { + struct p9_stat_dotl *st = NULL; + st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); + if (IS_ERR(st)) { + retval = PTR_ERR(st); + goto clunk_fid; + } + + v9fs_stat2inode_dotl(st, root->d_inode); + kfree(st); + } else { + struct p9_wstat *st = NULL; + st = p9_client_stat(fid); + if (IS_ERR(st)) { + retval = PTR_ERR(st); + goto clunk_fid; + } + + root->d_inode->i_ino = v9fs_qid2ino(&st->qid); + v9fs_stat2inode(st, root->d_inode, sb); + + p9stat_free(st); + kfree(st); + } v9fs_fid_add(root, fid); - p9stat_free(st); - kfree(st); P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); simple_set_mnt(mnt, sb); return 0; -free_stat: - p9stat_free(st); - kfree(st); - clunk_fid: p9_client_clunk(fid); @@ -176,8 +185,6 @@ close_session: return retval; release_sb: - p9stat_free(st); - kfree(st); deactivate_locked_super(sb); return retval; } diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index f1b0b310265..ab12e1c9cc7 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -133,6 +133,8 @@ enum p9_msg_t { P9_RSTATFS, P9_TRENAME = 20, P9_RRENAME, + P9_TGETATTR = 24, + P9_RGETATTR, P9_TREADDIR = 40, P9_RREADDIR, P9_TVERSION = 100, @@ -362,6 +364,48 @@ struct p9_wstat { u32 n_muid; /* 9p2000.u extensions */ }; +struct p9_stat_dotl { + u64 st_result_mask; + struct p9_qid qid; + u32 st_mode; + u32 st_uid; + u32 st_gid; + u64 st_nlink; + u64 st_rdev; + u64 st_size; + u64 st_blksize; + u64 st_blocks; + u64 st_atime_sec; + u64 st_atime_nsec; + u64 st_mtime_sec; + u64 st_mtime_nsec; + u64 st_ctime_sec; + u64 st_ctime_nsec; + u64 st_btime_sec; + u64 st_btime_nsec; + u64 st_gen; + u64 st_data_version; +}; + +#define P9_STATS_MODE 0x00000001ULL +#define P9_STATS_NLINK 0x00000002ULL +#define P9_STATS_UID 0x00000004ULL +#define P9_STATS_GID 0x00000008ULL +#define P9_STATS_RDEV 0x00000010ULL +#define P9_STATS_ATIME 0x00000020ULL +#define P9_STATS_MTIME 0x00000040ULL +#define P9_STATS_CTIME 0x00000080ULL +#define P9_STATS_INO 0x00000100ULL +#define P9_STATS_SIZE 0x00000200ULL +#define P9_STATS_BLOCKS 0x00000400ULL + +#define P9_STATS_BTIME 0x00000800ULL +#define P9_STATS_GEN 0x00001000ULL +#define P9_STATS_DATA_VERSION 0x00002000ULL + +#define P9_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ +#define P9_STATS_ALL 0x00003fffULL /* Mask for All fields above */ + /* Structures for Protocol Operations */ struct p9_tstatfs { u32 fid; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 2ec93685e6d..6462eec435b 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -238,6 +238,9 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, struct p9_wstat *p9_client_stat(struct p9_fid *fid); int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst); +struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, + u64 request_mask); + struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req); diff --git a/net/9p/client.c b/net/9p/client.c index 4ff068e98f7..5e97118da3b 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1303,6 +1303,65 @@ error: } EXPORT_SYMBOL(p9_client_stat); +struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, + u64 request_mask) +{ + int err; + struct p9_client *clnt; + struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl), + GFP_KERNEL); + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", + fid->fid, request_mask); + + if (!ret) + return ERR_PTR(-ENOMEM); + + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, + "<<< RGETATTR st_result_mask=%lld\n" + "<<< qid=%x.%llx.%x\n" + "<<< st_mode=%8.8x st_nlink=%llu\n" + "<<< st_uid=%d st_gid=%d\n" + "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n" + "<<< st_atime_sec=%lld st_atime_nsec=%lld\n" + "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n" + "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n" + "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" + "<<< st_gen=%lld st_data_version=%lld", + ret->st_result_mask, ret->qid.type, ret->qid.path, + ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid, + ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize, + ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, + ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, + ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, + ret->st_gen, ret->st_data_version); + + p9_free_req(clnt, req); + return ret; + +error: + kfree(ret); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_getattr_dotl); + static int p9_client_statsize(struct p9_wstat *wst, int proto_version) { int ret; diff --git a/net/9p/protocol.c b/net/9p/protocol.c index b645c826353..3e4f7769589 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -141,6 +141,7 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) D - data blob (int32_t size followed by void *, results are not freed) T - array of strings (int16_t count, followed by strings) R - array of qids (int16_t count, followed by qids) + A - stat for 9p2000.L (p9_stat_dotl) ? - if optional = 1, continue parsing */ @@ -340,6 +341,33 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } } break; + case 'A': { + struct p9_stat_dotl *stbuf = + va_arg(ap, struct p9_stat_dotl *); + + memset(stbuf, 0, sizeof(struct p9_stat_dotl)); + errcode = + p9pdu_readf(pdu, proto_version, + "qQdddqqqqqqqqqqqqqqq", + &stbuf->st_result_mask, + &stbuf->qid, + &stbuf->st_mode, + &stbuf->st_uid, &stbuf->st_gid, + &stbuf->st_nlink, + &stbuf->st_rdev, &stbuf->st_size, + &stbuf->st_blksize, &stbuf->st_blocks, + &stbuf->st_atime_sec, + &stbuf->st_atime_nsec, + &stbuf->st_mtime_sec, + &stbuf->st_mtime_nsec, + &stbuf->st_ctime_sec, + &stbuf->st_ctime_nsec, + &stbuf->st_btime_sec, + &stbuf->st_btime_nsec, + &stbuf->st_gen, + &stbuf->st_data_version); + } + break; case '?': if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) -- cgit v1.2.3 From 87d7845aa0b157a62448dd3e339856f28befe1f4 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Fri, 18 Jun 2010 11:50:10 +0530 Subject: 9p: Implement client side of setattr for 9P2000.L protocol. SYNOPSIS size[4] Tsetattr tag[2] attr[n] size[4] Rsetattr tag[2] DESCRIPTION The setattr command changes some of the file status information. attr resembles the iattr structure used in Linux kernel. It specifies which status parameter is to be changed and to what value. It is laid out as follows: valid[4] specifies which status information is to be changed. Possible values are: ATTR_MODE (1 << 0) ATTR_UID (1 << 1) ATTR_GID (1 << 2) ATTR_SIZE (1 << 3) ATTR_ATIME (1 << 4) ATTR_MTIME (1 << 5) ATTR_ATIME_SET (1 << 7) ATTR_MTIME_SET (1 << 8) The last two bits represent whether the time information is being sent by the client's user space. In the absense of these bits the server always uses server's time. mode[4] File permission bits uid[4] Owner id of file gid[4] Group id of the file size[8] File size atime_sec[8] Time of last file access, seconds atime_nsec[8] Time of last file access, nanoseconds mtime_sec[8] Time of last file modification, seconds mtime_nsec[8] Time of last file modification, nanoseconds Explanation of the patches: -------------------------- *) The kernel just copies relevent contents of iattr structure to p9_iattr_dotl structure and passes it down to the client. The only check it has is calling inode_change_ok() *) The p9_iattr_dotl structure does not have ctime and ia_file parameters because I don't think these are needed in our case. The client user space can request updating just ctime by calling chown(fd, -1, -1). This is handled on server side without a need for putting ctime on the wire. *) The server currently supports changing mode, time, ownership and size of the file. *) 9P RFC says "Either all the changes in wstat request happen, or none of them does: if the request succeeds, all changes were made; if it fails, none were." I have not done anything to implement this specifically because I don't see a reason. Signed-off-by: Sripathi Kodi Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- include/net/9p/9p.h | 28 ++++++++++++++++++++++++++++ include/net/9p/client.h | 1 + net/9p/client.c | 30 ++++++++++++++++++++++++++++++ net/9p/protocol.c | 17 +++++++++++++++++ 5 files changed, 122 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index afcb8d88938..a90324f4546 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -976,6 +976,49 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) return retval; } +/** + * v9fs_vfs_setattr_dotl - set file metadata + * @dentry: file whose metadata to set + * @iattr: metadata assignment structure + * + */ + +static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) +{ + int retval; + struct v9fs_session_info *v9ses; + struct p9_fid *fid; + struct p9_iattr_dotl p9attr; + + P9_DPRINTK(P9_DEBUG_VFS, "\n"); + + retval = inode_change_ok(dentry->d_inode, iattr); + if (retval) + return retval; + + p9attr.valid = iattr->ia_valid; + p9attr.mode = iattr->ia_mode; + p9attr.uid = iattr->ia_uid; + p9attr.gid = iattr->ia_gid; + p9attr.size = iattr->ia_size; + p9attr.atime_sec = iattr->ia_atime.tv_sec; + p9attr.atime_nsec = iattr->ia_atime.tv_nsec; + p9attr.mtime_sec = iattr->ia_mtime.tv_sec; + p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; + + retval = -EPERM; + v9ses = v9fs_inode2v9ses(dentry->d_inode); + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + retval = p9_client_setattr(fid, &p9attr); + if (retval >= 0) + retval = inode_setattr(dentry->d_inode, iattr); + + return retval; +} + /** * v9fs_stat2inode - populate an inode structure with mistat info * @stat: Plan 9 metadata (mistat) structure @@ -1400,7 +1443,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = { .mknod = v9fs_vfs_mknod, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr_dotl, - .setattr = v9fs_vfs_setattr, + .setattr = v9fs_vfs_setattr_dotl, }; static const struct inode_operations v9fs_dir_inode_operations = { @@ -1422,7 +1465,7 @@ static const struct inode_operations v9fs_file_inode_operations = { static const struct inode_operations v9fs_file_inode_operations_dotl = { .getattr = v9fs_vfs_getattr_dotl, - .setattr = v9fs_vfs_setattr, + .setattr = v9fs_vfs_setattr_dotl, }; static const struct inode_operations v9fs_symlink_inode_operations = { @@ -1438,5 +1481,5 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = { .follow_link = v9fs_vfs_follow_link, .put_link = v9fs_vfs_put_link, .getattr = v9fs_vfs_getattr_dotl, - .setattr = v9fs_vfs_setattr, + .setattr = v9fs_vfs_setattr_dotl, }; diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index ab12e1c9cc7..7f64d72f6c6 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -135,6 +135,8 @@ enum p9_msg_t { P9_RRENAME, P9_TGETATTR = 24, P9_RGETATTR, + P9_TSETATTR = 26, + P9_RSETATTR, P9_TREADDIR = 40, P9_RREADDIR, P9_TVERSION = 100, @@ -406,6 +408,32 @@ struct p9_stat_dotl { #define P9_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ #define P9_STATS_ALL 0x00003fffULL /* Mask for All fields above */ +/** + * struct p9_iattr_dotl - P9 inode attribute for setattr + * @valid: bitfield specifying which fields are valid + * same as in struct iattr + * @mode: File permission bits + * @uid: user id of owner + * @gid: group id + * @size: File size + * @atime_sec: Last access time, seconds + * @atime_nsec: Last access time, nanoseconds + * @mtime_sec: Last modification time, seconds + * @mtime_nsec: Last modification time, nanoseconds + */ + +struct p9_iattr_dotl { + u32 valid; + u32 mode; + u32 uid; + u32 gid; + u64 size; + u64 atime_sec; + u64 atime_nsec; + u64 mtime_sec; + u64 mtime_nsec; +}; + /* Structures for Protocol Operations */ struct p9_tstatfs { u32 fid; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 6462eec435b..afdc385152f 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -237,6 +237,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, int proto_version); struct p9_wstat *p9_client_stat(struct p9_fid *fid); int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst); +int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, u64 request_mask); diff --git a/net/9p/client.c b/net/9p/client.c index 5e97118da3b..b2f70ec889c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1426,6 +1426,36 @@ error: } EXPORT_SYMBOL(p9_client_wstat); +int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, + " valid=%x mode=%x uid=%d gid=%d size=%lld\n" + " atime_sec=%lld atime_nsec=%lld\n" + " mtime_sec=%lld mtime_nsec=%lld\n", + p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, + p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, + p9attr->mtime_sec, p9attr->mtime_nsec); + + req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr); + + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_setattr); + int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) { int err; diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 3e4f7769589..3acd3afb20c 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -516,6 +516,23 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, } } break; + case 'I':{ + struct p9_iattr_dotl *p9attr = va_arg(ap, + struct p9_iattr_dotl *); + + errcode = p9pdu_writef(pdu, proto_version, + "ddddqqqqq", + p9attr->valid, + p9attr->mode, + p9attr->uid, + p9attr->gid, + p9attr->size, + p9attr->atime_sec, + p9attr->atime_nsec, + p9attr->mtime_sec, + p9attr->mtime_nsec); + } + break; case '?': if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) -- cgit v1.2.3 From 652df9a7fd03cb47a3f663f0c08a2bd086505e9b Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Thu, 3 Jun 2010 15:16:59 -0700 Subject: 9p: Define and implement TLINK for 9P2000.L This patch adds a helper function to get the dentry from inode and uses it in creating a Hardlink SYNOPSIS size[4] Tlink tag[2] dfid[4] oldfid[4] newpath[s] size[4] Rlink tag[2] DESCRIPTION Create a link 'newpath' in directory pointed by dfid linking to oldfid path. [sripathik@in.ibm.com : p9_client_link should not free req structure if p9_client_rpc has returned an error.] Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 2 ++ include/net/9p/client.h | 1 + net/9p/client.c | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 7f64d72f6c6..5985c0f83db 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -139,6 +139,8 @@ enum p9_msg_t { P9_RSETATTR, P9_TREADDIR = 40, P9_RREADDIR, + P9_TLINK = 70, + P9_RLINK, P9_TVERSION = 100, P9_RVERSION, P9_TAUTH = 102, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index afdc385152f..e36f11650e9 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -226,6 +226,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int p9_client_open(struct p9_fid *fid, int mode); int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension); +int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname); int p9_client_clunk(struct p9_fid *fid); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, diff --git a/net/9p/client.c b/net/9p/client.c index b2f70ec889c..ad1c4489ab4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1095,6 +1095,25 @@ error: } EXPORT_SYMBOL(p9_client_fcreate); +int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) +{ + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", + dfid->fid, oldfid->fid, newname); + clnt = dfid->clnt; + req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, + newname); + if (IS_ERR(req)) + return PTR_ERR(req); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n"); + p9_free_req(clnt, req); + return 0; +} +EXPORT_SYMBOL(p9_client_link); + int p9_client_clunk(struct p9_fid *fid) { int err; -- cgit v1.2.3 From 50cc42ff3d7bc48a436c5a0413459ca7841b505f Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Wed, 9 Jun 2010 15:59:31 -0700 Subject: 9p: Define and implement TSYMLINK for 9P2000.L Create a symbolic link SYNOPSIS size[4] Tsymlink tag[2] fid[4] name[s] symtgt[s] gid[4] size[4] Rsymlink tag[2] qid[13] DESCRIPTION Create a symbolic link named 'name' pointing to 'symtgt'. gid represents the effective group id of the caller. The permissions of a symbolic link are irrelevant hence it is omitted from the protocol. Signed-off-by: Venkateswararao Jujjuri Reviewed-by: Sripathi Kodi Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++-- include/net/9p/9p.h | 4 ++ include/net/9p/client.h | 2 + net/9p/client.c | 34 ++++++++++++++++ 4 files changed, 137 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index e6ece237241..a7319364544 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1245,7 +1245,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) if (IS_ERR(fid)) return PTR_ERR(fid); - if (!v9fs_proto_dotu(v9ses)) + if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) return -EBADF; st = p9_client_stat(fid); @@ -1350,6 +1350,99 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, return 0; } +/** + * v9fs_vfs_symlink_dotl - helper function to create symlinks + * @dir: directory inode containing symlink + * @dentry: dentry for symlink + * @symname: symlink data + * + * See Also: 9P2000.L RFC for more information + * + */ + +static int +v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct v9fs_session_info *v9ses; + struct p9_fid *dfid; + struct p9_fid *fid = NULL; + struct inode *inode; + struct p9_qid qid; + char *name; + int err; + gid_t gid; + + name = (char *) dentry->d_name.name; + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", + dir->i_ino, name, symname); + v9ses = v9fs_inode2v9ses(dir); + + dfid = v9fs_fid_lookup(dentry->d_parent); + if (IS_ERR(dfid)) { + err = PTR_ERR(dfid); + P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + return err; + } + + gid = v9fs_get_fsgid_for_create(dir); + + if (gid < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid); + goto error; + } + + /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */ + err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid); + + if (err < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); + goto error; + } + + if (v9ses->cache) { + /* Now walk from the parent so we can get an unopened fid. */ + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); + fid = NULL; + goto error; + } + + /* instantiate inode and assign the unopened fid to dentry */ + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", + err); + goto error; + } + dentry->d_op = &v9fs_cached_dentry_operations; + d_instantiate(dentry, inode); + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + fid = NULL; + } else { + /* Not in cached mode. No need to populate inode with stat */ + inode = v9fs_get_inode(dir->i_sb, S_IFLNK); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto error; + } + dentry->d_op = &v9fs_dentry_operations; + d_instantiate(dentry, inode); + } + +error: + if (fid) + p9_client_clunk(fid); + + return err; +} + /** * v9fs_vfs_symlink - helper function to create symlinks * @dir: directory inode containing symlink @@ -1527,7 +1620,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, .symlink = v9fs_vfs_symlink, - .link = v9fs_vfs_link_dotl, + .link = v9fs_vfs_link, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, @@ -1540,8 +1633,8 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { static const struct inode_operations v9fs_dir_inode_operations_dotl = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, - .symlink = v9fs_vfs_symlink, - .link = v9fs_vfs_link, + .link = v9fs_vfs_link_dotl, + .symlink = v9fs_vfs_symlink_dotl, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 5985c0f83db..44a6883d714 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -88,6 +88,8 @@ do { \ * enum p9_msg_t - 9P message types * @P9_TSTATFS: file system status request * @P9_RSTATFS: file system status response + * @P9_TSYMLINK: make symlink request + * @P9_RSYMLINK: make symlink response * @P9_TRENAME: rename request * @P9_RRENAME: rename response * @P9_TVERSION: version handshake request @@ -131,6 +133,8 @@ do { \ enum p9_msg_t { P9_TSTATFS = 8, P9_RSTATFS, + P9_TSYMLINK = 16, + P9_RSYMLINK, P9_TRENAME = 20, P9_RRENAME, P9_TGETATTR = 24, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index e36f11650e9..2e039730920 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -227,6 +227,8 @@ int p9_client_open(struct p9_fid *fid, int mode); int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension); int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname); +int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid, + struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, diff --git a/net/9p/client.c b/net/9p/client.c index ad1c4489ab4..e37e64cb939 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1095,6 +1095,40 @@ error: } EXPORT_SYMBOL(p9_client_fcreate); +int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, + struct p9_qid *qid) +{ + int err = 0; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", + dfid->fid, name, symtgt); + clnt = dfid->clnt; + + req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt, + gid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", + qid->type, (unsigned long long)qid->path, qid->version); + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_symlink); + int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) { struct p9_client *clnt; -- cgit v1.2.3 From 4b43516ab19b748b48322937fd9307af17541c4d Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Wed, 16 Jun 2010 14:27:01 +0530 Subject: 9p: Implement TMKNOD Synopsis size[4] Tmknod tag[2] fid[4] name[s] mode[4] major[4] minor[4] gid[4] size[4] Rmknod tag[2] qid[13] Description mknod asks the file server to create a device node with given major and minor number, mode and gid. The qid for the new device node is returned with the mknod reply message. [sripathik@in.ibm.com: Fix error handling code] Signed-off-by: M. Mohan Kumar Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++-- include/net/9p/9p.h | 4 ++ include/net/9p/client.h | 2 + net/9p/client.c | 31 ++++++++++++++ 4 files changed, 140 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index a7319364544..4d9f45ec612 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -302,7 +302,13 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) case S_IFBLK: case S_IFCHR: case S_IFSOCK: - if (!v9fs_proto_dotu(v9ses)) { + if (v9fs_proto_dotl(v9ses)) { + inode->i_op = &v9fs_file_inode_operations_dotl; + inode->i_fop = &v9fs_file_operations_dotl; + } else if (v9fs_proto_dotu(v9ses)) { + inode->i_op = &v9fs_file_inode_operations; + inode->i_fop = &v9fs_file_operations; + } else { P9_DPRINTK(P9_DEBUG_ERROR, "special files without extended mode\n"); err = -EINVAL; @@ -1616,6 +1622,100 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) return retval; } +/** + * v9fs_vfs_mknod_dotl - create a special file + * @dir: inode destination for new link + * @dentry: dentry for file + * @mode: mode for creation + * @rdev: device associated with special file + * + */ +static int +v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode, + dev_t rdev) +{ + int err; + char *name; + struct v9fs_session_info *v9ses; + struct p9_fid *fid = NULL, *dfid = NULL; + struct inode *inode; + gid_t gid; + struct p9_qid qid; + struct dentry *dir_entry; + + P9_DPRINTK(P9_DEBUG_VFS, + " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, + dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); + + if (!new_valid_dev(rdev)) + return -EINVAL; + + v9ses = v9fs_inode2v9ses(dir); + dir_dentry = v9fs_dentry_from_dir_inode(dir); + dfid = v9fs_fid_lookup(dir_entry); + if (IS_ERR(dfid)) { + err = PTR_ERR(dfid); + P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + dfid = NULL; + goto error; + } + + gid = v9fs_get_fsgid_for_create(dir); + if (gid < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n"); + goto error; + } + + name = (char *) dentry->d_name.name; + + err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid); + if (err < 0) + goto error; + + /* instantiate inode and assign the unopened fid to the dentry */ + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); + fid = NULL; + goto error; + } + + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", + err); + goto error; + } + dentry->d_op = &v9fs_cached_dentry_operations; + d_instantiate(dentry, inode); + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + fid = NULL; + } else { + /* + * Not in cached mode. No need to populate inode with stat. + * socket syscall returns a fd, so we need instantiate + */ + inode = v9fs_get_inode(dir->i_sb, mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto error; + } + dentry->d_op = &v9fs_dentry_operations; + d_instantiate(dentry, inode); + } + +error: + if (fid) + p9_client_clunk(fid); + return err; +} + static const struct inode_operations v9fs_dir_inode_operations_dotu = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, @@ -1624,7 +1724,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, - .mknod = v9fs_vfs_mknod, + .mknod = v9fs_vfs_mknod_dotl, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, @@ -1638,7 +1738,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = { .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, - .mknod = v9fs_vfs_mknod, + .mknod = v9fs_vfs_mknod_dotl, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 44a6883d714..ff32091d806 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -90,6 +90,8 @@ do { \ * @P9_RSTATFS: file system status response * @P9_TSYMLINK: make symlink request * @P9_RSYMLINK: make symlink response + * @P9_TMKNOD: create a special file object request + * @P9_RMKNOD: create a special file object response * @P9_TRENAME: rename request * @P9_RRENAME: rename response * @P9_TVERSION: version handshake request @@ -135,6 +137,8 @@ enum p9_msg_t { P9_RSTATFS, P9_TSYMLINK = 16, P9_RSYMLINK, + P9_TMKNOD = 18, + P9_RMKNOD, P9_TRENAME = 20, P9_RRENAME, P9_TGETATTR = 24, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 2e039730920..6e70358c71d 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -245,6 +245,8 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, u64 request_mask); +int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode, + dev_t rdev, gid_t gid, struct p9_qid *); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req); diff --git a/net/9p/client.c b/net/9p/client.c index e37e64cb939..cdfbd674079 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1622,3 +1622,34 @@ error: return err; } EXPORT_SYMBOL(p9_client_readdir); + +int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, + dev_t rdev, gid_t gid, struct p9_qid *qid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " + "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); + req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, + MAJOR(rdev), MINOR(rdev), gid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); + +error: + p9_free_req(clnt, req); + return err; + +} +EXPORT_SYMBOL(p9_client_mknod_dotl); -- cgit v1.2.3 From 01a622bd7409bb7af38e784cff814e5e723f7951 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Wed, 16 Jun 2010 14:27:22 +0530 Subject: 9p: Implement TMKDIR Implement TMKDIR as part of 2000.L Work Synopsis size[4] Tmkdir tag[2] fid[4] name[s] mode[4] gid[4] size[4] Rmkdir tag[2] qid[13] Description mkdir asks the file server to create a directory with given name, mode and gid. The qid for the new directory is returned with the mkdir reply message. Note: 72 is selected as the opcode for TMKDIR from the reserved list. Signed-off-by: M. Mohan Kumar Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++-- include/net/9p/9p.h | 4 +++ include/net/9p/client.h | 2 ++ net/9p/client.c | 31 ++++++++++++++++++ 4 files changed, 117 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4d9f45ec612..39dc7956732 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -731,6 +731,83 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) return err; } + +/** + * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory + * @dir: inode that is being unlinked + * @dentry: dentry that is being unlinked + * @mode: mode for new directory + * + */ + +static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry, + int mode) +{ + int err; + struct v9fs_session_info *v9ses; + struct p9_fid *fid = NULL, *dfid = NULL; + gid_t gid; + char *name; + struct inode *inode; + struct p9_qid qid; + struct dentry *dir_dentry; + + P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + err = 0; + v9ses = v9fs_inode2v9ses(dir); + + mode |= S_IFDIR; + dir_dentry = v9fs_dentry_from_dir_inode(dir); + dfid = v9fs_fid_lookup(dir_dentry); + if (IS_ERR(dfid)) { + err = PTR_ERR(dfid); + P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + dfid = NULL; + goto error; + } + + gid = v9fs_get_fsgid_for_create(dir); + if (gid < 0) { + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n"); + goto error; + } + + name = (char *) dentry->d_name.name; + err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); + if (err < 0) + goto error; + + /* instantiate inode and assign the unopened fid to the dentry */ + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); + fid = NULL; + goto error; + } + + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", + err); + goto error; + } + dentry->d_op = &v9fs_cached_dentry_operations; + d_instantiate(dentry, inode); + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + fid = NULL; + } +error: + if (fid) + p9_client_clunk(fid); + return err; +} + /** * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode * @dir: inode that is being walked from @@ -1641,7 +1718,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode, struct inode *inode; gid_t gid; struct p9_qid qid; - struct dentry *dir_entry; + struct dentry *dir_dentry; P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, @@ -1652,7 +1729,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode, v9ses = v9fs_inode2v9ses(dir); dir_dentry = v9fs_dentry_from_dir_inode(dir); - dfid = v9fs_fid_lookup(dir_entry); + dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); @@ -1736,7 +1813,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = { .link = v9fs_vfs_link_dotl, .symlink = v9fs_vfs_symlink_dotl, .unlink = v9fs_vfs_unlink, - .mkdir = v9fs_vfs_mkdir, + .mkdir = v9fs_vfs_mkdir_dotl, .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod_dotl, .rename = v9fs_vfs_rename, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index ff32091d806..091b471d8f0 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -94,6 +94,8 @@ do { \ * @P9_RMKNOD: create a special file object response * @P9_TRENAME: rename request * @P9_RRENAME: rename response + * @P9_TMKDIR: create a directory request + * @P9_RMKDIR: create a directory response * @P9_TVERSION: version handshake request * @P9_RVERSION: version handshake response * @P9_TAUTH: request to establish authentication channel @@ -149,6 +151,8 @@ enum p9_msg_t { P9_RREADDIR, P9_TLINK = 70, P9_RLINK, + P9_TMKDIR = 72, + P9_RMKDIR, P9_TVERSION = 100, P9_RVERSION, P9_TAUTH = 102, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 6e70358c71d..55d913a9b79 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -247,6 +247,8 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode, dev_t rdev, gid_t gid, struct p9_qid *); +int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, + gid_t gid, struct p9_qid *); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req); diff --git a/net/9p/client.c b/net/9p/client.c index cdfbd674079..a3bdd341f2a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1653,3 +1653,34 @@ error: } EXPORT_SYMBOL(p9_client_mknod_dotl); + +int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, + gid_t gid, struct p9_qid *qid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", + fid->fid, name, mode, gid); + req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, + gid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); + +error: + p9_free_req(clnt, req); + return err; + +} +EXPORT_SYMBOL(p9_client_mkdir_dotl); -- cgit v1.2.3 From 5643135a28464e7c19d8d23a9e0804697a62c84b Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Thu, 17 Jun 2010 18:27:46 -0700 Subject: fs/9p: This patch implements TLCREATE for 9p2000.L protocol. SYNOPSIS size[4] Tlcreate tag[2] fid[4] name[s] flags[4] mode[4] gid[4] size[4] Rlcreate tag[2] qid[13] iounit[4] DESCRIPTION The Tlreate request asks the file server to create a new regular file with the name supplied, in the directory (dir) represented by fid. The mode argument specifies the permissions to use. New file is created with the uid if the fid and with supplied gid. The flags argument represent Linux access mode flags with which the caller is requesting to open the file with. Protocol allows all the Linux access modes but it is upto the server to allow/disallow any of these acess modes. If the server doesn't support any of the access mode, it is expected to return error. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++- include/net/9p/9p.h | 4 ++ include/net/9p/client.h | 2 + net/9p/client.c | 44 +++++++++++++++++++ 4 files changed, 163 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 39dc7956732..2ac245902a4 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -641,6 +641,118 @@ error: return ERR_PTR(err); } +/** + * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. + * @dir: directory inode that is being created + * @dentry: dentry that is being deleted + * @mode: create permissions + * @nd: path information + * + */ + +static int +v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + int err = 0; + char *name = NULL; + gid_t gid; + int flags; + struct v9fs_session_info *v9ses; + struct p9_fid *fid = NULL; + struct p9_fid *dfid, *ofid; + struct file *filp; + struct p9_qid qid; + struct inode *inode; + + v9ses = v9fs_inode2v9ses(dir); + if (nd && nd->flags & LOOKUP_OPEN) + flags = nd->intent.open.flags - 1; + else + flags = O_RDWR; + + name = (char *) dentry->d_name.name; + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x " + "mode:0x%x\n", name, flags, mode); + + dfid = v9fs_fid_lookup(dentry->d_parent); + if (IS_ERR(dfid)) { + err = PTR_ERR(dfid); + P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); + return err; + } + + /* clone a fid to use for creation */ + ofid = p9_client_walk(dfid, 0, NULL, 1); + if (IS_ERR(ofid)) { + err = PTR_ERR(ofid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + return err; + } + + gid = v9fs_get_fsgid_for_create(dir); + err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); + if (err < 0) { + P9_DPRINTK(P9_DEBUG_VFS, + "p9_client_open_dotl failed in creat %d\n", + err); + goto error; + } + + /* No need to populate the inode if we are not opening the file AND + * not in cached mode. + */ + if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) { + /* Not in cached mode. No need to populate inode with stat */ + dentry->d_op = &v9fs_dentry_operations; + p9_client_clunk(ofid); + d_instantiate(dentry, NULL); + return 0; + } + + /* Now walk from the parent so we can get an unopened fid. */ + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); + fid = NULL; + goto error; + } + + /* instantiate inode and assign the unopened fid to dentry */ + inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); + goto error; + } + dentry->d_op = &v9fs_cached_dentry_operations; + d_instantiate(dentry, inode); + err = v9fs_fid_add(dentry, fid); + if (err < 0) + goto error; + + /* if we are opening a file, assign the open fid to the file */ + if (nd && nd->flags & LOOKUP_OPEN) { + filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created); + if (IS_ERR(filp)) { + p9_client_clunk(ofid); + return PTR_ERR(filp); + } + filp->private_data = ofid; + } else + p9_client_clunk(ofid); + + return 0; + +error: + if (ofid) + p9_client_clunk(ofid); + if (fid) + p9_client_clunk(fid); + return err; +} + /** * v9fs_vfs_create - VFS hook to create files * @dir: directory inode that is being created @@ -1808,7 +1920,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { }; static const struct inode_operations v9fs_dir_inode_operations_dotl = { - .create = v9fs_vfs_create, + .create = v9fs_vfs_create_dotl, .lookup = v9fs_vfs_lookup, .link = v9fs_vfs_link_dotl, .symlink = v9fs_vfs_symlink_dotl, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 091b471d8f0..06d111d6103 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -92,6 +92,8 @@ do { \ * @P9_RSYMLINK: make symlink response * @P9_TMKNOD: create a special file object request * @P9_RMKNOD: create a special file object response + * @P9_TLCREATE: prepare a handle for I/O on an new file for 9P2000.L + * @P9_RLCREATE: response with file access information for 9P2000.L * @P9_TRENAME: rename request * @P9_RRENAME: rename response * @P9_TMKDIR: create a directory request @@ -137,6 +139,8 @@ do { \ enum p9_msg_t { P9_TSTATFS = 8, P9_RSTATFS, + P9_TLCREATE = 14, + P9_RLCREATE, P9_TSYMLINK = 16, P9_RSYMLINK, P9_TMKNOD = 18, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 55d913a9b79..d755c0ed675 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -229,6 +229,8 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname); int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid, struct p9_qid *qid); +int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, + gid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, diff --git a/net/9p/client.c b/net/9p/client.c index a3bdd341f2a..e580409b105 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1050,6 +1050,50 @@ error: } EXPORT_SYMBOL(p9_client_open); +int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, + gid_t gid, struct p9_qid *qid) +{ + int err = 0; + struct p9_client *clnt; + struct p9_req_t *req; + int iounit; + + P9_DPRINTK(P9_DEBUG_9P, + ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", + ofid->fid, name, flags, mode, gid); + clnt = ofid->clnt; + + if (ofid->mode != -1) + return -EINVAL; + + req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags, + mode, gid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", + qid->type, + (unsigned long long)qid->path, + qid->version, iounit); + + ofid->mode = mode; + ofid->iounit = iounit; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_create_dotl); + int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension) { -- cgit v1.2.3 From ef56547efa3c88609069e2a91f46e25c31dd536e Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Tue, 22 Jun 2010 19:47:50 +0530 Subject: 9p: Implement LOPEN Implement 9p2000.L version of open(LOPEN) interface in 9p client. For LOPEN, no need to convert the flags to and from 9p mode to VFS mode. Synopsis: size[4] Tlopen tag[2] fid[4] mode[4] size[4] Rlopen tag[2] qid[13] iounit[4] [Fix mode bit format - jvrao@linux.vnet.ibm.com] Signed-off-by: M. Mohan Kumar Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbegren --- fs/9p/vfs_file.c | 13 +++++++++---- include/net/9p/9p.h | 2 ++ net/9p/client.c | 17 ++++++++++------- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 2d686ec322a..e97c92bd6f1 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -59,9 +59,13 @@ int v9fs_file_open(struct inode *inode, struct file *file) struct p9_fid *fid; int omode; - P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); + P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); v9ses = v9fs_inode2v9ses(inode); - omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses)); + if (v9fs_proto_dotl(v9ses)) + omode = file->f_flags; + else + omode = v9fs_uflags2omode(file->f_flags, + v9fs_proto_dotu(v9ses)); fid = file->private_data; if (!fid) { fid = v9fs_fid_clone(file->f_path.dentry); @@ -73,11 +77,12 @@ int v9fs_file_open(struct inode *inode, struct file *file) p9_client_clunk(fid); return err; } - if (omode & P9_OTRUNC) { + if (file->f_flags & O_TRUNC) { i_size_write(inode, 0); inode->i_blocks = 0; } - if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses))) + if ((file->f_flags & O_APPEND) && + (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))) generic_file_llseek(file, 0, SEEK_END); } diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 06d111d6103..cf580a40e29 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -139,6 +139,8 @@ do { \ enum p9_msg_t { P9_TSTATFS = 8, P9_RSTATFS, + P9_TLOPEN = 12, + P9_RLOPEN, P9_TLCREATE = 14, P9_RLCREATE, P9_TSYMLINK = 16, diff --git a/net/9p/client.c b/net/9p/client.c index e580409b105..c458e042d38 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1016,14 +1016,18 @@ int p9_client_open(struct p9_fid *fid, int mode) struct p9_qid qid; int iounit; - P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode); - err = 0; clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", + p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); + err = 0; if (fid->mode != -1) return -EINVAL; - req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); + if (p9_is_proto_dotl(clnt)) + req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode); + else + req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1035,10 +1039,9 @@ int p9_client_open(struct p9_fid *fid, int mode) goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, - (unsigned long long)qid.path, - qid.version, iounit); + P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", + p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, + (unsigned long long)qid.path, qid.version, iounit); fid->mode = mode; fid->iounit = iounit; -- cgit v1.2.3 From 0ef63f345c48afe5896c5cffcba57f0457d409b9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 31 May 2010 13:22:45 +0530 Subject: net/9p: Implement attrwalk 9p call TXATTRWALK: Descend a ATTR namespace size[4] TXATTRWALK tag[2] fid[4] newfid[4] name[s] size[4] RXATTRWALK tag[2] size[8] txattrwalk gets a fid pointing to xattr. This fid can later be used to read the xattr value. If name is NULL the fid returned can be used to get the list of extended attribute associated to the file system object. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 2 ++ include/net/9p/client.h | 1 + net/9p/client.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) (limited to 'include') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index cf580a40e29..6fabb5e559b 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -153,6 +153,8 @@ enum p9_msg_t { P9_RGETATTR, P9_TSETATTR = 26, P9_RSETATTR, + P9_TXATTRWALK = 30, + P9_RXATTRWALK, P9_TREADDIR = 40, P9_RREADDIR, P9_TLINK = 70, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index d755c0ed675..60398b1a3f7 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -260,5 +260,6 @@ void p9stat_free(struct p9_wstat *); int p9_is_proto_dotu(struct p9_client *clnt); int p9_is_proto_dotl(struct p9_client *clnt); +struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); #endif /* NET_9P_CLIENT_H */ diff --git a/net/9p/client.c b/net/9p/client.c index c458e042d38..ec80ee71d45 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1622,6 +1622,56 @@ error: } EXPORT_SYMBOL(p9_client_rename); +/* + * An xattrwalk without @attr_name gives the fid for the lisxattr namespace + */ +struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, + const char *attr_name, u64 *attr_size) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + struct p9_fid *attr_fid; + + err = 0; + clnt = file_fid->clnt; + attr_fid = p9_fid_create(clnt); + if (IS_ERR(attr_fid)) { + err = PTR_ERR(attr_fid); + attr_fid = NULL; + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, + ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", + file_fid->fid, attr_fid->fid, attr_name); + + req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds", + file_fid->fid, attr_fid->fid, attr_name); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto clunk_fid; + } + p9_free_req(clnt, req); + P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", + attr_fid->fid, *attr_size); + return attr_fid; +clunk_fid: + p9_client_clunk(attr_fid); + attr_fid = NULL; +error: + if (attr_fid && (attr_fid != file_fid)) + p9_fid_destroy(attr_fid); + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(p9_client_xattrwalk); + int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { int err, rsize, total; -- cgit v1.2.3 From eda25e46161527845572131b37706a458d9270ef Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 31 May 2010 13:22:50 +0530 Subject: net/9p: Implement TXATTRCREATE 9p call TXATTRCREATE: Prepare a fid for setting xattr value on a file system object. size[4] TXATTRCREATE tag[2] fid[4] name[s] attr_size[8] flags[4] size[4] RXATTRCREATE tag[2] txattrcreate gets a fid pointing to xattr. This fid can later be used to set the xattr value. flag value is derived from set Linux setxattr. The manpage says "The flags parameter can be used to refine the semantics of the operation. XATTR_CREATE specifies a pure create, which fails if the named attribute exists already. XATTR_REPLACE specifies a pure replace operation, which fails if the named attribute does not already exist. By default (no flags), the extended attribute will be created if need be, or will simply replace the value if the attribute exists." The actual setxattr operation happens when the fid is clunked. At that point the written byte count and the attr_size specified in TXATTRCREATE should be same otherwise an error will be returned. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 2 ++ include/net/9p/client.h | 1 + net/9p/client.c | 25 +++++++++++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 6fabb5e559b..a8de812ccbc 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -155,6 +155,8 @@ enum p9_msg_t { P9_RSETATTR, P9_TXATTRWALK = 30, P9_RXATTRWALK, + P9_TXATTRCREATE = 32, + P9_RXATTRCREATE, P9_TREADDIR = 40, P9_RREADDIR, P9_TLINK = 70, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 60398b1a3f7..d1aa2cfb30f 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -261,5 +261,6 @@ void p9stat_free(struct p9_wstat *); int p9_is_proto_dotu(struct p9_client *clnt); int p9_is_proto_dotl(struct p9_client *clnt); struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); +int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int); #endif /* NET_9P_CLIENT_H */ diff --git a/net/9p/client.c b/net/9p/client.c index ec80ee71d45..43396acd714 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1672,6 +1672,31 @@ error: } EXPORT_SYMBOL_GPL(p9_client_xattrwalk); +int p9_client_xattrcreate(struct p9_fid *fid, const char *name, + u64 attr_size, int flags) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, + ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n", + fid->fid, name, (long long)attr_size, flags); + err = 0; + clnt = fid->clnt; + req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd", + fid->fid, name, attr_size, flags); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL_GPL(p9_client_xattrcreate); + int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { int err, rsize, total; -- cgit v1.2.3