summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig149
-rw-r--r--init/do_mounts.c70
-rw-r--r--init/do_mounts_initrd.c7
-rw-r--r--init/main.c77
4 files changed, 187 insertions, 116 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 5cff9a980c39..fdfd97efe0e0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -21,6 +21,13 @@ config CONSTRUCTORS
depends on !UML
default y
+config HAVE_IRQ_WORK
+ bool
+
+config IRQ_WORK
+ bool
+ depends on HAVE_IRQ_WORK
+
menu "General setup"
config EXPERIMENTAL
@@ -64,7 +71,7 @@ config BROKEN_ON_SMP
config LOCK_KERNEL
bool
- depends on SMP || PREEMPT
+ depends on (SMP || PREEMPT) && BKL
default y
config INIT_ENV_ARG_LIMIT
@@ -320,13 +327,19 @@ config AUDITSYSCALL
help
Enable low-overhead system-call auditing infrastructure that
can be used independently or with another kernel subsystem,
- such as SELinux. To use audit's filesystem watch feature, please
- ensure that INOTIFY is configured.
+ such as SELinux.
+
+config AUDIT_WATCH
+ def_bool y
+ depends on AUDITSYSCALL
+ select FSNOTIFY
config AUDIT_TREE
def_bool y
depends on AUDITSYSCALL
- select INOTIFY
+ select FSNOTIFY
+
+source "kernel/irq/Kconfig"
menu "RCU Subsystem"
@@ -336,6 +349,7 @@ choice
config TREE_RCU
bool "Tree-based hierarchical RCU"
+ depends on !PREEMPT && SMP
help
This option selects the RCU implementation that is
designed for very large SMP system with hundreds or
@@ -343,7 +357,7 @@ config TREE_RCU
smaller systems.
config TREE_PREEMPT_RCU
- bool "Preemptable tree-based hierarchical RCU"
+ bool "Preemptible tree-based hierarchical RCU"
depends on PREEMPT
help
This option selects the RCU implementation that is
@@ -361,8 +375,22 @@ config TINY_RCU
is not required. This option greatly reduces the
memory footprint of RCU.
+config TINY_PREEMPT_RCU
+ bool "Preemptible UP-only small-memory-footprint RCU"
+ depends on !SMP && PREEMPT
+ help
+ This option selects the RCU implementation that is designed
+ for real-time UP systems. This option greatly reduces the
+ memory footprint of RCU.
+
endchoice
+config PREEMPT_RCU
+ def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
+ help
+ This option enables preemptible-RCU code that is common between
+ the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
+
config RCU_TRACE
bool "Enable tracing for RCU"
depends on TREE_RCU || TREE_PREEMPT_RCU
@@ -383,9 +411,12 @@ config RCU_FANOUT
help
This option controls the fanout of hierarchical implementations
of RCU, allowing RCU to work efficiently on machines with
- large numbers of CPUs. This value must be at least the cube
- root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
- systems and up to 262,144 for 64-bit systems.
+ large numbers of CPUs. This value must be at least the fourth
+ root of NR_CPUS, which allows NR_CPUS to be insanely large.
+ The default value of RCU_FANOUT should be used for production
+ systems, but if you are stress-testing the RCU implementation
+ itself, small RCU_FANOUT values allow you to test large-system
+ code paths on small(er) systems.
Select a specific number if testing RCU itself.
Take the default if unsure.
@@ -573,8 +604,8 @@ config CGROUP_MEM_RES_CTLR
could in turn add some fork/exit overhead.
config CGROUP_MEM_RES_CTLR_SWAP
- bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)"
- depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL
+ bool "Memory Resource Controller Swap Extension"
+ depends on CGROUP_MEM_RES_CTLR && SWAP
help
Add swap management feature to memory resource controller. When you
enable this, you can limit mem+swap usage per cgroup. In other words,
@@ -630,11 +661,14 @@ config BLK_CGROUP
Currently, CFQ IO scheduler uses it to recognize task groups and
control disk bandwidth allocation (proportional time slice allocation)
- to such task groups.
+ to such task groups. It is also used by bio throttling logic in
+ block layer to implement upper limit in IO rates on a device.
This option only enables generic Block IO controller infrastructure.
- One needs to also enable actual IO controlling logic in CFQ for it
- to take effect. (CONFIG_CFQ_GROUP_IOSCHED=y).
+ One needs to also enable actual IO controlling logic/policy. For
+ enabling proportional weight division of disk bandwidth in CFQ seti
+ CONFIG_CFQ_GROUP_IOSCHED=y and for enabling throttling policy set
+ CONFIG_BLK_THROTTLE=y.
See Documentation/cgroups/blkio-controller.txt for more information.
@@ -652,40 +686,42 @@ config MM_OWNER
bool
config SYSFS_DEPRECATED
- bool
-
-config SYSFS_DEPRECATED_V2
bool "enable deprecated sysfs features to support old userspace tools"
depends on SYSFS
default n
- select SYSFS_DEPRECATED
- help
- This option switches the layout of sysfs to the deprecated
- version. Do not use it on recent distributions.
-
- The current sysfs layout features a unified device tree at
- /sys/devices/, which is able to express a hierarchy between
- class devices. If the deprecated option is set to Y, the
- unified device tree is split into a bus device tree at
- /sys/devices/ and several individual class device trees at
- /sys/class/. The class and bus devices will be connected by
- "<subsystem>:<name>" and the "device" links. The "block"
- class devices, will not show up in /sys/class/block/. Some
- subsystems will suppress the creation of some devices which
- depend on the unified device tree.
-
- This option is not a pure compatibility option that can
- be safely enabled on newer distributions. It will change the
- layout of sysfs to the non-extensible deprecated version,
- and disable some features, which can not be exported without
- confusing older userspace tools. Since 2007/2008 all major
- distributions do not enable this option, and ship no tools which
- depend on the deprecated layout or this option.
-
- If you are using a new kernel on an older distribution, or use
- older userspace tools, you might need to say Y here. Do not say Y,
- if the original kernel, that came with your distribution, has
- this option set to N.
+ help
+ This option adds code that switches the layout of the "block" class
+ devices, to not show up in /sys/class/block/, but only in
+ /sys/block/.
+
+ This switch is only active when the sysfs.deprecated=1 boot option is
+ passed or the SYSFS_DEPRECATED_V2 option is set.
+
+ This option allows new kernels to run on old distributions and tools,
+ which might get confused by /sys/class/block/. Since 2007/2008 all
+ major distributions and tools handle this just fine.
+
+ Recent distributions and userspace tools after 2009/2010 depend on
+ the existence of /sys/class/block/, and will not work with this
+ option enabled.
+
+ Only if you are using a new kernel on an old distribution, you might
+ need to say Y here.
+
+config SYSFS_DEPRECATED_V2
+ bool "enabled deprecated sysfs features by default"
+ default n
+ depends on SYSFS
+ depends on SYSFS_DEPRECATED
+ help
+ Enable deprecated sysfs by default.
+
+ See the CONFIG_SYSFS_DEPRECATED option for more details about this
+ option.
+
+ Only if you are using a new kernel on an old distribution, you might
+ need to say Y here. Even then, odds are you would not need it
+ enabled, you can always pass the boot option if absolutely necessary.
config RELAY
bool "Kernel->user space relay support (formerly relayfs)"
@@ -983,6 +1019,7 @@ config PERF_EVENTS
default y if (PROFILING || PERF_COUNTERS)
depends on HAVE_PERF_EVENTS
select ANON_INODES
+ select IRQ_WORK
help
Enable kernel support for various performance events provided
by software and hardware.
@@ -1143,30 +1180,6 @@ config TRACEPOINTS
source "arch/Kconfig"
-config SLOW_WORK
- default n
- bool
- help
- The slow work thread pool provides a number of dynamically allocated
- threads that can be used by the kernel to perform operations that
- take a relatively long time.
-
- An example of this would be CacheFiles doing a path lookup followed
- by a series of mkdirs and a create call, all of which have to touch
- disk.
-
- See Documentation/slow-work.txt.
-
-config SLOW_WORK_DEBUG
- bool "Slow work debugging through debugfs"
- default n
- depends on SLOW_WORK && DEBUG_FS
- help
- Display the contents of the slow work run queue through debugfs,
- including items currently executing.
-
- See Documentation/slow-work.txt.
-
endmenu # General setup
config HAVE_GENERIC_DMA_COHERENT
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 02e3ca4fc527..42db0551c3aa 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -58,6 +58,62 @@ static int __init readwrite(char *str)
__setup("ro", readonly);
__setup("rw", readwrite);
+#ifdef CONFIG_BLOCK
+/**
+ * match_dev_by_uuid - callback for finding a partition using its uuid
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to a 36 byte char array with a UUID
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int match_dev_by_uuid(struct device *dev, void *data)
+{
+ u8 *uuid = data;
+ struct hd_struct *part = dev_to_part(dev);
+
+ if (!part->info)
+ goto no_match;
+
+ if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
+ goto no_match;
+
+ return 1;
+no_match:
+ return 0;
+}
+
+
+/**
+ * devt_from_partuuid - looks up the dev_t of a partition by its UUID
+ * @uuid: 36 byte char array containing a hex ascii UUID
+ *
+ * The function will return the first partition which contains a matching
+ * UUID value in its partition_meta_info struct. This does not search
+ * by filesystem UUIDs.
+ *
+ * Returns the matching dev_t on success or 0 on failure.
+ */
+static dev_t __init devt_from_partuuid(char *uuid_str)
+{
+ dev_t res = 0;
+ struct device *dev = NULL;
+ u8 uuid[16];
+
+ /* Pack the requested UUID in the expected format. */
+ part_pack_uuid(uuid_str, uuid);
+
+ dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
+ if (!dev)
+ goto done;
+
+ res = dev->devt;
+ put_device(dev);
+
+done:
+ return res;
+}
+#endif
+
/*
* Convert a name into device number. We accept the following variants:
*
@@ -68,6 +124,8 @@ __setup("rw", readwrite);
* of partition - device number of disk plus the partition number
* 5) /dev/<disk_name>p<decimal> - same as the above, that form is
* used when disk name of partitioned disk ends on a digit.
+ * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ * unique id of a partition if the partition table provides it.
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
@@ -82,6 +140,18 @@ dev_t name_to_dev_t(char *name)
dev_t res = 0;
int part;
+#ifdef CONFIG_BLOCK
+ if (strncmp(name, "PARTUUID=", 9) == 0) {
+ name += 9;
+ if (strlen(name) != 36)
+ goto fail;
+ res = devt_from_partuuid(name);
+ if (!res)
+ goto fail;
+ goto done;
+ }
+#endif
+
if (strncmp(name, "/dev/", 5) != 0) {
unsigned maj, min;
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 2b108538d0d9..3098a38f3ae1 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -24,10 +24,11 @@ static int __init no_initrd(char *str)
__setup("noinitrd", no_initrd);
-static int __init do_linuxrc(void * shell)
+static int __init do_linuxrc(void *_shell)
{
- static char *argv[] = { "linuxrc", NULL, };
- extern char * envp_init[];
+ static const char *argv[] = { "linuxrc", NULL, };
+ extern const char *envp_init[];
+ const char *shell = _shell;
sys_close(old_fd);sys_close(root_fd);
sys_setsid();
diff --git a/init/main.c b/init/main.c
index 4ddb53f04f2a..e59af24a0b7c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -32,7 +32,6 @@
#include <linux/start_kernel.h>
#include <linux/security.h>
#include <linux/smp.h>
-#include <linux/workqueue.h>
#include <linux/profile.h>
#include <linux/rcupdate.h>
#include <linux/moduleparam.h>
@@ -66,11 +65,9 @@
#include <linux/ftrace.h>
#include <linux/async.h>
#include <linux/kmemcheck.h>
-#include <linux/kmemtrace.h>
#include <linux/sfi.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
-#include <trace/boot.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -200,15 +197,15 @@ static int __init set_reset_devices(char *str)
__setup("reset_devices", set_reset_devices);
-static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
-extern struct obs_kernel_param __setup_start[], __setup_end[];
+extern const struct obs_kernel_param __setup_start[], __setup_end[];
static int __init obsolete_checksetup(char *line)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
int had_early_param = 0;
p = __setup_start;
@@ -427,7 +424,6 @@ static void __init setup_command_line(char *command_line)
static __initdata DECLARE_COMPLETION(kthreadd_done);
static noinline void __init_refok rest_init(void)
- __releases(kernel_lock)
{
int pid;
@@ -444,7 +440,6 @@ static noinline void __init_refok rest_init(void)
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);
- unlock_kernel();
/*
* The boot idle thread must execute schedule()
@@ -462,7 +457,7 @@ static noinline void __init_refok rest_init(void)
/* Check for early params. */
static int __init do_early_param(char *param, char *val)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
for (p = __setup_start; p < __setup_end; p++) {
if ((p->early && strcmp(param, p->str) == 0) ||
@@ -540,7 +535,7 @@ static void __init mm_init(void)
asmlinkage void __init start_kernel(void)
{
char * command_line;
- extern struct kernel_param __start___param[], __stop___param[];
+ extern const struct kernel_param __start___param[], __stop___param[];
smp_setup_processor_id();
@@ -560,13 +555,11 @@ asmlinkage void __init start_kernel(void)
local_irq_disable();
early_boot_irqs_off();
- early_init_irq_lock_class();
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
- lock_kernel();
tick_init();
boot_cpu_init();
page_address_init();
@@ -664,7 +657,6 @@ asmlinkage void __init start_kernel(void)
#endif
page_cgroup_init();
enable_debug_pagealloc();
- kmemtrace_init();
kmemleak_init();
debug_objects_mem_init();
idr_init_cache();
@@ -726,38 +718,39 @@ int initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
static char msgbuf[64];
-static struct boot_trace_call call;
-static struct boot_trace_ret ret;
-int do_one_initcall(initcall_t fn)
+static int __init_or_module do_one_initcall_debug(initcall_t fn)
{
- int count = preempt_count();
ktime_t calltime, delta, rettime;
+ unsigned long long duration;
+ int ret;
- if (initcall_debug) {
- call.caller = task_pid_nr(current);
- printk("calling %pF @ %i\n", fn, call.caller);
- calltime = ktime_get();
- trace_boot_call(&call, fn);
- enable_boot_trace();
- }
+ printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
+ calltime = ktime_get();
+ ret = fn();
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+ printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn,
+ ret, duration);
- ret.result = fn();
+ return ret;
+}
- if (initcall_debug) {
- disable_boot_trace();
- rettime = ktime_get();
- delta = ktime_sub(rettime, calltime);
- ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- trace_boot_ret(&ret, fn);
- printk("initcall %pF returned %d after %Ld usecs\n", fn,
- ret.result, ret.duration);
- }
+int __init_or_module do_one_initcall(initcall_t fn)
+{
+ int count = preempt_count();
+ int ret;
+
+ if (initcall_debug)
+ ret = do_one_initcall_debug(fn);
+ else
+ ret = fn();
msgbuf[0] = 0;
- if (ret.result && ret.result != -ENODEV && initcall_debug)
- sprintf(msgbuf, "error code %d ", ret.result);
+ if (ret && ret != -ENODEV && initcall_debug)
+ sprintf(msgbuf, "error code %d ", ret);
if (preempt_count() != count) {
strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -771,7 +764,7 @@ int do_one_initcall(initcall_t fn)
printk("initcall %pF returned with %s\n", fn, msgbuf);
}
- return ret.result;
+ return ret;
}
@@ -797,7 +790,6 @@ static void __init do_initcalls(void)
*/
static void __init do_basic_setup(void)
{
- init_workqueues();
cpuset_init_smp();
usermodehelper_init();
init_tmpfs();
@@ -815,7 +807,7 @@ static void __init do_pre_smp_initcalls(void)
do_one_initcall(*fn);
}
-static void run_init_process(char *init_filename)
+static void run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
kernel_execve(init_filename, argv_init, envp_init);
@@ -825,12 +817,10 @@ static void run_init_process(char *init_filename)
* makes it inline to init() and it becomes part of init.text section
*/
static noinline int init_post(void)
- __releases(kernel_lock)
{
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
- unlock_kernel();
mark_rodata_ro();
system_state = SYSTEM_RUNNING;
numa_default_policy();
@@ -870,8 +860,6 @@ static int __init kernel_init(void * unused)
* Wait until kthreadd is all set-up.
*/
wait_for_completion(&kthreadd_done);
- lock_kernel();
-
/*
* init can allocate pages on any node
*/
@@ -895,7 +883,6 @@ static int __init kernel_init(void * unused)
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
- start_boot_trace();
smp_init();
sched_init_smp();