diff options
Diffstat (limited to 'init')
-rw-r--r-- | init/Kconfig | 149 | ||||
-rw-r--r-- | init/do_mounts.c | 70 | ||||
-rw-r--r-- | init/do_mounts_initrd.c | 7 | ||||
-rw-r--r-- | init/main.c | 77 |
4 files changed, 187 insertions, 116 deletions
diff --git a/init/Kconfig b/init/Kconfig index 5cff9a980c39..fdfd97efe0e0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -21,6 +21,13 @@ config CONSTRUCTORS depends on !UML default y +config HAVE_IRQ_WORK + bool + +config IRQ_WORK + bool + depends on HAVE_IRQ_WORK + menu "General setup" config EXPERIMENTAL @@ -64,7 +71,7 @@ config BROKEN_ON_SMP config LOCK_KERNEL bool - depends on SMP || PREEMPT + depends on (SMP || PREEMPT) && BKL default y config INIT_ENV_ARG_LIMIT @@ -320,13 +327,19 @@ config AUDITSYSCALL help Enable low-overhead system-call auditing infrastructure that can be used independently or with another kernel subsystem, - such as SELinux. To use audit's filesystem watch feature, please - ensure that INOTIFY is configured. + such as SELinux. + +config AUDIT_WATCH + def_bool y + depends on AUDITSYSCALL + select FSNOTIFY config AUDIT_TREE def_bool y depends on AUDITSYSCALL - select INOTIFY + select FSNOTIFY + +source "kernel/irq/Kconfig" menu "RCU Subsystem" @@ -336,6 +349,7 @@ choice config TREE_RCU bool "Tree-based hierarchical RCU" + depends on !PREEMPT && SMP help This option selects the RCU implementation that is designed for very large SMP system with hundreds or @@ -343,7 +357,7 @@ config TREE_RCU smaller systems. config TREE_PREEMPT_RCU - bool "Preemptable tree-based hierarchical RCU" + bool "Preemptible tree-based hierarchical RCU" depends on PREEMPT help This option selects the RCU implementation that is @@ -361,8 +375,22 @@ config TINY_RCU is not required. This option greatly reduces the memory footprint of RCU. +config TINY_PREEMPT_RCU + bool "Preemptible UP-only small-memory-footprint RCU" + depends on !SMP && PREEMPT + help + This option selects the RCU implementation that is designed + for real-time UP systems. This option greatly reduces the + memory footprint of RCU. + endchoice +config PREEMPT_RCU + def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) + help + This option enables preemptible-RCU code that is common between + the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. + config RCU_TRACE bool "Enable tracing for RCU" depends on TREE_RCU || TREE_PREEMPT_RCU @@ -383,9 +411,12 @@ config RCU_FANOUT help This option controls the fanout of hierarchical implementations of RCU, allowing RCU to work efficiently on machines with - large numbers of CPUs. This value must be at least the cube - root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit - systems and up to 262,144 for 64-bit systems. + large numbers of CPUs. This value must be at least the fourth + root of NR_CPUS, which allows NR_CPUS to be insanely large. + The default value of RCU_FANOUT should be used for production + systems, but if you are stress-testing the RCU implementation + itself, small RCU_FANOUT values allow you to test large-system + code paths on small(er) systems. Select a specific number if testing RCU itself. Take the default if unsure. @@ -573,8 +604,8 @@ config CGROUP_MEM_RES_CTLR could in turn add some fork/exit overhead. config CGROUP_MEM_RES_CTLR_SWAP - bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)" - depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL + bool "Memory Resource Controller Swap Extension" + depends on CGROUP_MEM_RES_CTLR && SWAP help Add swap management feature to memory resource controller. When you enable this, you can limit mem+swap usage per cgroup. In other words, @@ -630,11 +661,14 @@ config BLK_CGROUP Currently, CFQ IO scheduler uses it to recognize task groups and control disk bandwidth allocation (proportional time slice allocation) - to such task groups. + to such task groups. It is also used by bio throttling logic in + block layer to implement upper limit in IO rates on a device. This option only enables generic Block IO controller infrastructure. - One needs to also enable actual IO controlling logic in CFQ for it - to take effect. (CONFIG_CFQ_GROUP_IOSCHED=y). + One needs to also enable actual IO controlling logic/policy. For + enabling proportional weight division of disk bandwidth in CFQ seti + CONFIG_CFQ_GROUP_IOSCHED=y and for enabling throttling policy set + CONFIG_BLK_THROTTLE=y. See Documentation/cgroups/blkio-controller.txt for more information. @@ -652,40 +686,42 @@ config MM_OWNER bool config SYSFS_DEPRECATED - bool - -config SYSFS_DEPRECATED_V2 bool "enable deprecated sysfs features to support old userspace tools" depends on SYSFS default n - select SYSFS_DEPRECATED - help - This option switches the layout of sysfs to the deprecated - version. Do not use it on recent distributions. - - The current sysfs layout features a unified device tree at - /sys/devices/, which is able to express a hierarchy between - class devices. If the deprecated option is set to Y, the - unified device tree is split into a bus device tree at - /sys/devices/ and several individual class device trees at - /sys/class/. The class and bus devices will be connected by - "<subsystem>:<name>" and the "device" links. The "block" - class devices, will not show up in /sys/class/block/. Some - subsystems will suppress the creation of some devices which - depend on the unified device tree. - - This option is not a pure compatibility option that can - be safely enabled on newer distributions. It will change the - layout of sysfs to the non-extensible deprecated version, - and disable some features, which can not be exported without - confusing older userspace tools. Since 2007/2008 all major - distributions do not enable this option, and ship no tools which - depend on the deprecated layout or this option. - - If you are using a new kernel on an older distribution, or use - older userspace tools, you might need to say Y here. Do not say Y, - if the original kernel, that came with your distribution, has - this option set to N. + help + This option adds code that switches the layout of the "block" class + devices, to not show up in /sys/class/block/, but only in + /sys/block/. + + This switch is only active when the sysfs.deprecated=1 boot option is + passed or the SYSFS_DEPRECATED_V2 option is set. + + This option allows new kernels to run on old distributions and tools, + which might get confused by /sys/class/block/. Since 2007/2008 all + major distributions and tools handle this just fine. + + Recent distributions and userspace tools after 2009/2010 depend on + the existence of /sys/class/block/, and will not work with this + option enabled. + + Only if you are using a new kernel on an old distribution, you might + need to say Y here. + +config SYSFS_DEPRECATED_V2 + bool "enabled deprecated sysfs features by default" + default n + depends on SYSFS + depends on SYSFS_DEPRECATED + help + Enable deprecated sysfs by default. + + See the CONFIG_SYSFS_DEPRECATED option for more details about this + option. + + Only if you are using a new kernel on an old distribution, you might + need to say Y here. Even then, odds are you would not need it + enabled, you can always pass the boot option if absolutely necessary. config RELAY bool "Kernel->user space relay support (formerly relayfs)" @@ -983,6 +1019,7 @@ config PERF_EVENTS default y if (PROFILING || PERF_COUNTERS) depends on HAVE_PERF_EVENTS select ANON_INODES + select IRQ_WORK help Enable kernel support for various performance events provided by software and hardware. @@ -1143,30 +1180,6 @@ config TRACEPOINTS source "arch/Kconfig" -config SLOW_WORK - default n - bool - help - The slow work thread pool provides a number of dynamically allocated - threads that can be used by the kernel to perform operations that - take a relatively long time. - - An example of this would be CacheFiles doing a path lookup followed - by a series of mkdirs and a create call, all of which have to touch - disk. - - See Documentation/slow-work.txt. - -config SLOW_WORK_DEBUG - bool "Slow work debugging through debugfs" - default n - depends on SLOW_WORK && DEBUG_FS - help - Display the contents of the slow work run queue through debugfs, - including items currently executing. - - See Documentation/slow-work.txt. - endmenu # General setup config HAVE_GENERIC_DMA_COHERENT diff --git a/init/do_mounts.c b/init/do_mounts.c index 02e3ca4fc527..42db0551c3aa 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -58,6 +58,62 @@ static int __init readwrite(char *str) __setup("ro", readonly); __setup("rw", readwrite); +#ifdef CONFIG_BLOCK +/** + * match_dev_by_uuid - callback for finding a partition using its uuid + * @dev: device passed in by the caller + * @data: opaque pointer to a 36 byte char array with a UUID + * + * Returns 1 if the device matches, and 0 otherwise. + */ +static int match_dev_by_uuid(struct device *dev, void *data) +{ + u8 *uuid = data; + struct hd_struct *part = dev_to_part(dev); + + if (!part->info) + goto no_match; + + if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid))) + goto no_match; + + return 1; +no_match: + return 0; +} + + +/** + * devt_from_partuuid - looks up the dev_t of a partition by its UUID + * @uuid: 36 byte char array containing a hex ascii UUID + * + * The function will return the first partition which contains a matching + * UUID value in its partition_meta_info struct. This does not search + * by filesystem UUIDs. + * + * Returns the matching dev_t on success or 0 on failure. + */ +static dev_t __init devt_from_partuuid(char *uuid_str) +{ + dev_t res = 0; + struct device *dev = NULL; + u8 uuid[16]; + + /* Pack the requested UUID in the expected format. */ + part_pack_uuid(uuid_str, uuid); + + dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid); + if (!dev) + goto done; + + res = dev->devt; + put_device(dev); + +done: + return res; +} +#endif + /* * Convert a name into device number. We accept the following variants: * @@ -68,6 +124,8 @@ __setup("rw", readwrite); * of partition - device number of disk plus the partition number * 5) /dev/<disk_name>p<decimal> - same as the above, that form is * used when disk name of partitioned disk ends on a digit. + * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + * unique id of a partition if the partition table provides it. * * If name doesn't have fall into the categories above, we return (0,0). * block_class is used to check if something is a disk name. If the disk @@ -82,6 +140,18 @@ dev_t name_to_dev_t(char *name) dev_t res = 0; int part; +#ifdef CONFIG_BLOCK + if (strncmp(name, "PARTUUID=", 9) == 0) { + name += 9; + if (strlen(name) != 36) + goto fail; + res = devt_from_partuuid(name); + if (!res) + goto fail; + goto done; + } +#endif + if (strncmp(name, "/dev/", 5) != 0) { unsigned maj, min; diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 2b108538d0d9..3098a38f3ae1 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -24,10 +24,11 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); -static int __init do_linuxrc(void * shell) +static int __init do_linuxrc(void *_shell) { - static char *argv[] = { "linuxrc", NULL, }; - extern char * envp_init[]; + static const char *argv[] = { "linuxrc", NULL, }; + extern const char *envp_init[]; + const char *shell = _shell; sys_close(old_fd);sys_close(root_fd); sys_setsid(); diff --git a/init/main.c b/init/main.c index 4ddb53f04f2a..e59af24a0b7c 100644 --- a/init/main.c +++ b/init/main.c @@ -32,7 +32,6 @@ #include <linux/start_kernel.h> #include <linux/security.h> #include <linux/smp.h> -#include <linux/workqueue.h> #include <linux/profile.h> #include <linux/rcupdate.h> #include <linux/moduleparam.h> @@ -66,11 +65,9 @@ #include <linux/ftrace.h> #include <linux/async.h> #include <linux/kmemcheck.h> -#include <linux/kmemtrace.h> #include <linux/sfi.h> #include <linux/shmem_fs.h> #include <linux/slab.h> -#include <trace/boot.h> #include <asm/io.h> #include <asm/bugs.h> @@ -200,15 +197,15 @@ static int __init set_reset_devices(char *str) __setup("reset_devices", set_reset_devices); -static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; -char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; +static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; -extern struct obs_kernel_param __setup_start[], __setup_end[]; +extern const struct obs_kernel_param __setup_start[], __setup_end[]; static int __init obsolete_checksetup(char *line) { - struct obs_kernel_param *p; + const struct obs_kernel_param *p; int had_early_param = 0; p = __setup_start; @@ -427,7 +424,6 @@ static void __init setup_command_line(char *command_line) static __initdata DECLARE_COMPLETION(kthreadd_done); static noinline void __init_refok rest_init(void) - __releases(kernel_lock) { int pid; @@ -444,7 +440,6 @@ static noinline void __init_refok rest_init(void) kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); rcu_read_unlock(); complete(&kthreadd_done); - unlock_kernel(); /* * The boot idle thread must execute schedule() @@ -462,7 +457,7 @@ static noinline void __init_refok rest_init(void) /* Check for early params. */ static int __init do_early_param(char *param, char *val) { - struct obs_kernel_param *p; + const struct obs_kernel_param *p; for (p = __setup_start; p < __setup_end; p++) { if ((p->early && strcmp(param, p->str) == 0) || @@ -540,7 +535,7 @@ static void __init mm_init(void) asmlinkage void __init start_kernel(void) { char * command_line; - extern struct kernel_param __start___param[], __stop___param[]; + extern const struct kernel_param __start___param[], __stop___param[]; smp_setup_processor_id(); @@ -560,13 +555,11 @@ asmlinkage void __init start_kernel(void) local_irq_disable(); early_boot_irqs_off(); - early_init_irq_lock_class(); /* * Interrupts are still disabled. Do necessary setups, then * enable them */ - lock_kernel(); tick_init(); boot_cpu_init(); page_address_init(); @@ -664,7 +657,6 @@ asmlinkage void __init start_kernel(void) #endif page_cgroup_init(); enable_debug_pagealloc(); - kmemtrace_init(); kmemleak_init(); debug_objects_mem_init(); idr_init_cache(); @@ -726,38 +718,39 @@ int initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); static char msgbuf[64]; -static struct boot_trace_call call; -static struct boot_trace_ret ret; -int do_one_initcall(initcall_t fn) +static int __init_or_module do_one_initcall_debug(initcall_t fn) { - int count = preempt_count(); ktime_t calltime, delta, rettime; + unsigned long long duration; + int ret; - if (initcall_debug) { - call.caller = task_pid_nr(current); - printk("calling %pF @ %i\n", fn, call.caller); - calltime = ktime_get(); - trace_boot_call(&call, fn); - enable_boot_trace(); - } + printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); + calltime = ktime_get(); + ret = fn(); + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn, + ret, duration); - ret.result = fn(); + return ret; +} - if (initcall_debug) { - disable_boot_trace(); - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10; - trace_boot_ret(&ret, fn); - printk("initcall %pF returned %d after %Ld usecs\n", fn, - ret.result, ret.duration); - } +int __init_or_module do_one_initcall(initcall_t fn) +{ + int count = preempt_count(); + int ret; + + if (initcall_debug) + ret = do_one_initcall_debug(fn); + else + ret = fn(); msgbuf[0] = 0; - if (ret.result && ret.result != -ENODEV && initcall_debug) - sprintf(msgbuf, "error code %d ", ret.result); + if (ret && ret != -ENODEV && initcall_debug) + sprintf(msgbuf, "error code %d ", ret); if (preempt_count() != count) { strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); @@ -771,7 +764,7 @@ int do_one_initcall(initcall_t fn) printk("initcall %pF returned with %s\n", fn, msgbuf); } - return ret.result; + return ret; } @@ -797,7 +790,6 @@ static void __init do_initcalls(void) */ static void __init do_basic_setup(void) { - init_workqueues(); cpuset_init_smp(); usermodehelper_init(); init_tmpfs(); @@ -815,7 +807,7 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } -static void run_init_process(char *init_filename) +static void run_init_process(const char *init_filename) { argv_init[0] = init_filename; kernel_execve(init_filename, argv_init, envp_init); @@ -825,12 +817,10 @@ static void run_init_process(char *init_filename) * makes it inline to init() and it becomes part of init.text section */ static noinline int init_post(void) - __releases(kernel_lock) { /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); free_initmem(); - unlock_kernel(); mark_rodata_ro(); system_state = SYSTEM_RUNNING; numa_default_policy(); @@ -870,8 +860,6 @@ static int __init kernel_init(void * unused) * Wait until kthreadd is all set-up. */ wait_for_completion(&kthreadd_done); - lock_kernel(); - /* * init can allocate pages on any node */ @@ -895,7 +883,6 @@ static int __init kernel_init(void * unused) smp_prepare_cpus(setup_max_cpus); do_pre_smp_initcalls(); - start_boot_trace(); smp_init(); sched_init_smp(); |