summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-driver-adp887056
-rw-r--r--Documentation/Changes43
-rw-r--r--Documentation/CodingStyle4
-rw-r--r--Documentation/accounting/cgroupstats.txt4
-rw-r--r--Documentation/arm/kernel_user_helpers.txt267
-rw-r--r--Documentation/cgroups/blkio-controller.txt41
-rw-r--r--Documentation/cgroups/cgroups.txt60
-rw-r--r--Documentation/cgroups/cpuacct.txt21
-rw-r--r--Documentation/cgroups/cpusets.txt28
-rw-r--r--Documentation/cgroups/devices.txt6
-rw-r--r--Documentation/cgroups/freezer-subsystem.txt20
-rw-r--r--Documentation/cgroups/memory.txt58
-rw-r--r--Documentation/devicetree/bindings/arm/arm-boards20
-rw-r--r--Documentation/devicetree/bindings/arm/freescale.txt7
-rw-r--r--Documentation/devicetree/bindings/arm/genesi.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/primecell.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/samsung.txt9
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec2.txt (renamed from Documentation/devicetree/bindings/powerpc/fsl/sec.txt)2
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio.txt46
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_nvidia.txt7
-rw-r--r--Documentation/devicetree/bindings/i2c/arm-versatile.txt10
-rw-r--r--Documentation/devicetree/bindings/mtd/arm-versatile.txt8
-rw-r--r--Documentation/devicetree/bindings/net/smsc-lan91c111.txt10
-rw-r--r--Documentation/devicetree/bindings/spi/spi_nvidia.txt5
-rw-r--r--Documentation/devicetree/bindings/tty/serial/of-serial.txt36
-rw-r--r--Documentation/devicetree/usage-model403
-rw-r--r--Documentation/feature-removal-schedule.txt39
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt16
-rw-r--r--Documentation/filesystems/nilfs2.txt1
-rw-r--r--Documentation/filesystems/proc.txt1
-rw-r--r--Documentation/hwmon/f71882fg4
-rw-r--r--Documentation/hwmon/k10temp8
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/kmemleak.txt4
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt5
-rw-r--r--Documentation/md.txt2
-rw-r--r--Documentation/networking/ip-sysctl.txt2
-rw-r--r--Documentation/power/devices.txt67
-rw-r--r--Documentation/power/runtime_pm.txt31
-rw-r--r--Documentation/printk-formats.txt119
-rw-r--r--Documentation/scheduler/sched-design-CFS.txt7
-rw-r--r--Documentation/scheduler/sched-rt-group.txt7
-rw-r--r--Documentation/spinlocks.txt45
-rw-r--r--Documentation/usb/error-codes.txt9
-rw-r--r--Documentation/virtual/lguest/Makefile2
-rw-r--r--Documentation/virtual/lguest/lguest.c22
-rw-r--r--Documentation/vm/hwpoison.txt6
-rw-r--r--Documentation/x86/boot.txt2
48 files changed, 1311 insertions, 299 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
new file mode 100644
index 00000000000..aa11dbdd794
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
@@ -0,0 +1,56 @@
+What: /sys/class/backlight/<backlight>/<ambient light zone>_max
+What: /sys/class/backlight/<backlight>/l1_daylight_max
+What: /sys/class/backlight/<backlight>/l2_bright_max
+What: /sys/class/backlight/<backlight>/l3_office_max
+What: /sys/class/backlight/<backlight>/l4_indoor_max
+What: /sys/class/backlight/<backlight>/l5_dark_max
+Date: Mai 2011
+KernelVersion: 2.6.40
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Control the maximum brightness for <ambient light zone>
+ on this <backlight>. Values are between 0 and 127. This file
+ will also show the brightness level stored for this
+ <ambient light zone>.
+
+What: /sys/class/backlight/<backlight>/<ambient light zone>_dim
+What: /sys/class/backlight/<backlight>/l2_bright_dim
+What: /sys/class/backlight/<backlight>/l3_office_dim
+What: /sys/class/backlight/<backlight>/l4_indoor_dim
+What: /sys/class/backlight/<backlight>/l5_dark_dim
+Date: Mai 2011
+KernelVersion: 2.6.40
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Control the dim brightness for <ambient light zone>
+ on this <backlight>. Values are between 0 and 127, typically
+ set to 0. Full off when the backlight is disabled.
+ This file will also show the dim brightness level stored for
+ this <ambient light zone>.
+
+What: /sys/class/backlight/<backlight>/ambient_light_level
+Date: Mai 2011
+KernelVersion: 2.6.40
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Get conversion value of the light sensor.
+ This value is updated every 80 ms (when the light sensor
+ is enabled). Returns integer between 0 (dark) and
+ 8000 (max ambient brightness)
+
+What: /sys/class/backlight/<backlight>/ambient_light_zone
+Date: Mai 2011
+KernelVersion: 2.6.40
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Get/Set current ambient light zone. Reading returns
+ integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark).
+ Writing a value between 1..5 forces the backlight controller
+ to enter the corresponding ambient light zone.
+ Writing 0 returns to normal/automatic ambient light level
+ operation. The ambient light sensing feature on these devices
+ is an extension to the API documented in
+ Documentation/ABI/stable/sysfs-class-backlight.
+ It can be enabled by writing the value stored in
+ /sys/class/backlight/<backlight>/max_brightness to
+ /sys/class/backlight/<backlight>/brightness. \ No newline at end of file
diff --git a/Documentation/Changes b/Documentation/Changes
index 5f4828a034e..b1758088527 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -2,13 +2,7 @@ Intro
=====
This document is designed to provide a list of the minimum levels of
-software necessary to run the 2.6 kernels, as well as provide brief
-instructions regarding any other "Gotchas" users may encounter when
-trying life on the Bleeding Edge. If upgrading from a pre-2.4.x
-kernel, please consult the Changes file included with 2.4.x kernels for
-additional information; most of that information will not be repeated
-here. Basically, this document assumes that your system is already
-functional and running at least 2.4.x kernels.
+software necessary to run the 3.0 kernels.
This document is originally based on my "Changes" file for 2.0.x kernels
and therefore owes credit to the same people as that file (Jared Mauch,
@@ -22,11 +16,10 @@ Upgrade to at *least* these software revisions before thinking you've
encountered a bug! If you're unsure what version you're currently
running, the suggested command should tell you.
-Again, keep in mind that this list assumes you are already
-functionally running a Linux 2.4 kernel. Also, not all tools are
-necessary on all systems; obviously, if you don't have any ISDN
-hardware, for example, you probably needn't concern yourself with
-isdn4k-utils.
+Again, keep in mind that this list assumes you are already functionally
+running a Linux kernel. Also, not all tools are necessary on all
+systems; obviously, if you don't have any ISDN hardware, for example,
+you probably needn't concern yourself with isdn4k-utils.
o Gnu C 3.2 # gcc --version
o Gnu make 3.80 # make --version
@@ -114,12 +107,12 @@ Ksymoops
If the unthinkable happens and your kernel oopses, you may need the
ksymoops tool to decode it, but in most cases you don't.
-In the 2.6 kernel it is generally preferred to build the kernel with
-CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is
-(this also produces better output than ksymoops).
-If for some reason your kernel is not build with CONFIG_KALLSYMS and
-you have no way to rebuild and reproduce the Oops with that option, then
-you can still decode that Oops with ksymoops.
+It is generally preferred to build the kernel with CONFIG_KALLSYMS so
+that it produces readable dumps that can be used as-is (this also
+produces better output than ksymoops). If for some reason your kernel
+is not build with CONFIG_KALLSYMS and you have no way to rebuild and
+reproduce the Oops with that option, then you can still decode that Oops
+with ksymoops.
Module-Init-Tools
-----------------
@@ -261,8 +254,8 @@ needs to be recompiled or (preferably) upgraded.
NFS-utils
---------
-In 2.4 and earlier kernels, the nfs server needed to know about any
-client that expected to be able to access files via NFS. This
+In ancient (2.4 and earlier) kernels, the nfs server needed to know
+about any client that expected to be able to access files via NFS. This
information would be given to the kernel by "mountd" when the client
mounted the filesystem, or by "exportfs" at system startup. exportfs
would take information about active clients from /var/lib/nfs/rmtab.
@@ -272,11 +265,11 @@ which is not always easy, particularly when trying to implement
fail-over. Even when the system is working well, rmtab suffers from
getting lots of old entries that never get removed.
-With 2.6 we have the option of having the kernel tell mountd when it
-gets a request from an unknown host, and mountd can give appropriate
-export information to the kernel. This removes the dependency on
-rmtab and means that the kernel only needs to know about currently
-active clients.
+With modern kernels we have the option of having the kernel tell mountd
+when it gets a request from an unknown host, and mountd can give
+appropriate export information to the kernel. This removes the
+dependency on rmtab and means that the kernel only needs to know about
+currently active clients.
To enable this new functionality, you need to:
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 58b0bf91783..fa6e25b94a5 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -680,8 +680,8 @@ ones already enabled by DEBUG.
Chapter 14: Allocating memory
The kernel provides the following general purpose memory allocators:
-kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API
-documentation for further information about them.
+kmalloc(), kzalloc(), kcalloc(), vmalloc(), and vzalloc(). Please refer to
+the API documentation for further information about them.
The preferred form for passing a size of a struct is the following:
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
index eda40fd39ca..d16a9849e60 100644
--- a/Documentation/accounting/cgroupstats.txt
+++ b/Documentation/accounting/cgroupstats.txt
@@ -21,7 +21,7 @@ information will not be available.
To extract cgroup statistics a utility very similar to getdelays.c
has been developed, the sample output of the utility is shown below
-~/balbir/cgroupstats # ./getdelays -C "/cgroup/a"
+~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a"
sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
-~/balbir/cgroupstats # ./getdelays -C "/cgroup"
+~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup"
sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/arm/kernel_user_helpers.txt b/Documentation/arm/kernel_user_helpers.txt
new file mode 100644
index 00000000000..a17df9f91d1
--- /dev/null
+++ b/Documentation/arm/kernel_user_helpers.txt
@@ -0,0 +1,267 @@
+Kernel-provided User Helpers
+============================
+
+These are segment of kernel provided user code reachable from user space
+at a fixed address in kernel memory. This is used to provide user space
+with some operations which require kernel help because of unimplemented
+native feature and/or instructions in many ARM CPUs. The idea is for this
+code to be executed directly in user mode for best efficiency but which is
+too intimate with the kernel counter part to be left to user libraries.
+In fact this code might even differ from one CPU to another depending on
+the available instruction set, or whether it is a SMP systems. In other
+words, the kernel reserves the right to change this code as needed without
+warning. Only the entry points and their results as documented here are
+guaranteed to be stable.
+
+This is different from (but doesn't preclude) a full blown VDSO
+implementation, however a VDSO would prevent some assembly tricks with
+constants that allows for efficient branching to those code segments. And
+since those code segments only use a few cycles before returning to user
+code, the overhead of a VDSO indirect far call would add a measurable
+overhead to such minimalistic operations.
+
+User space is expected to bypass those helpers and implement those things
+inline (either in the code emitted directly by the compiler, or part of
+the implementation of a library call) when optimizing for a recent enough
+processor that has the necessary native support, but only if resulting
+binaries are already to be incompatible with earlier ARM processors due to
+useage of similar native instructions for other things. In other words
+don't make binaries unable to run on earlier processors just for the sake
+of not using these kernel helpers if your compiled code is not going to
+use new instructions for other purpose.
+
+New helpers may be added over time, so an older kernel may be missing some
+helpers present in a newer kernel. For this reason, programs must check
+the value of __kuser_helper_version (see below) before assuming that it is
+safe to call any particular helper. This check should ideally be
+performed only once at process startup time, and execution aborted early
+if the required helpers are not provided by the kernel version that
+process is running on.
+
+kuser_helper_version
+--------------------
+
+Location: 0xffff0ffc
+
+Reference declaration:
+
+ extern int32_t __kuser_helper_version;
+
+Definition:
+
+ This field contains the number of helpers being implemented by the
+ running kernel. User space may read this to determine the availability
+ of a particular helper.
+
+Usage example:
+
+#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
+
+void check_kuser_version(void)
+{
+ if (__kuser_helper_version < 2) {
+ fprintf(stderr, "can't do atomic operations, kernel too old\n");
+ abort();
+ }
+}
+
+Notes:
+
+ User space may assume that the value of this field never changes
+ during the lifetime of any single process. This means that this
+ field can be read once during the initialisation of a library or
+ startup phase of a program.
+
+kuser_get_tls
+-------------
+
+Location: 0xffff0fe0
+
+Reference prototype:
+
+ void * __kuser_get_tls(void);
+
+Input:
+
+ lr = return address
+
+Output:
+
+ r0 = TLS value
+
+Clobbered registers:
+
+ none
+
+Definition:
+
+ Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
+
+Usage example:
+
+typedef void * (__kuser_get_tls_t)(void);
+#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
+
+void foo()
+{
+ void *tls = __kuser_get_tls();
+ printf("TLS = %p\n", tls);
+}
+
+Notes:
+
+ - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
+
+kuser_cmpxchg
+-------------
+
+Location: 0xffff0fc0
+
+Reference prototype:
+
+ int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
+
+Input:
+
+ r0 = oldval
+ r1 = newval
+ r2 = ptr
+ lr = return address
+
+Output:
+
+ r0 = success code (zero or non-zero)
+ C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+ r3, ip, flags
+
+Definition:
+
+ Atomically store newval in *ptr only if *ptr is equal to oldval.
+ Return zero if *ptr was changed or non-zero if no exchange happened.
+ The C flag is also set if *ptr was changed to allow for assembly
+ optimization in the calling code.
+
+Usage example:
+
+typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
+#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
+
+int atomic_add(volatile int *ptr, int val)
+{
+ int old, new;
+
+ do {
+ old = *ptr;
+ new = old + val;
+ } while(__kuser_cmpxchg(old, new, ptr));
+
+ return new;
+}
+
+Notes:
+
+ - This routine already includes memory barriers as needed.
+
+ - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
+
+kuser_memory_barrier
+--------------------
+
+Location: 0xffff0fa0
+
+Reference prototype:
+
+ void __kuser_memory_barrier(void);
+
+Input:
+
+ lr = return address
+
+Output:
+
+ none
+
+Clobbered registers:
+
+ none
+
+Definition:
+
+ Apply any needed memory barrier to preserve consistency with data modified
+ manually and __kuser_cmpxchg usage.
+
+Usage example:
+
+typedef void (__kuser_dmb_t)(void);
+#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
+
+Notes:
+
+ - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
+
+kuser_cmpxchg64
+---------------
+
+Location: 0xffff0f60
+
+Reference prototype:
+
+ int __kuser_cmpxchg64(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+
+Input:
+
+ r0 = pointer to oldval
+ r1 = pointer to newval
+ r2 = pointer to target value
+ lr = return address
+
+Output:
+
+ r0 = success code (zero or non-zero)
+ C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+ r3, lr, flags
+
+Definition:
+
+ Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
+ is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was
+ changed or non-zero if no exchange happened.
+
+ The C flag is also set if *ptr was changed to allow for assembly
+ optimization in the calling code.
+
+Usage example:
+
+typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
+
+int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
+{
+ int64_t old, new;
+
+ do {
+ old = *ptr;
+ new = old + val;
+ } while(__kuser_cmpxchg64(&old, &new, ptr));
+
+ return new;
+}
+
+Notes:
+
+ - This routine already includes memory barriers as needed.
+
+ - Due to the length of this sequence, this spans 2 conventional kuser
+ "slots", therefore 0xffff0f80 is not used as a valid entry point.
+
+ - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 465351d4cf8..84f0a15fc21 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -28,16 +28,19 @@ cgroups. Here is what you can do.
- Enable group scheduling in CFQ
CONFIG_CFQ_GROUP_IOSCHED=y
-- Compile and boot into kernel and mount IO controller (blkio).
+- Compile and boot into kernel and mount IO controller (blkio); see
+ cgroups.txt, Why are cgroups needed?.
- mount -t cgroup -o blkio none /cgroup
+ mount -t tmpfs cgroup_root /sys/fs/cgroup
+ mkdir /sys/fs/cgroup/blkio
+ mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
- Create two cgroups
- mkdir -p /cgroup/test1/ /cgroup/test2
+ mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
- Set weights of group test1 and test2
- echo 1000 > /cgroup/test1/blkio.weight
- echo 500 > /cgroup/test2/blkio.weight
+ echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
+ echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
- Create two same size files (say 512MB each) on same disk (file1, file2) and
launch two dd threads in different cgroup to read those files.
@@ -46,12 +49,12 @@ cgroups. Here is what you can do.
echo 3 > /proc/sys/vm/drop_caches
dd if=/mnt/sdb/zerofile1 of=/dev/null &
- echo $! > /cgroup/test1/tasks
- cat /cgroup/test1/tasks
+ echo $! > /sys/fs/cgroup/blkio/test1/tasks
+ cat /sys/fs/cgroup/blkio/test1/tasks
dd if=/mnt/sdb/zerofile2 of=/dev/null &
- echo $! > /cgroup/test2/tasks
- cat /cgroup/test2/tasks
+ echo $! > /sys/fs/cgroup/blkio/test2/tasks
+ cat /sys/fs/cgroup/blkio/test2/tasks
- At macro level, first dd should finish first. To get more precise data, keep
on looking at (with the help of script), at blkio.disk_time and
@@ -68,13 +71,13 @@ Throttling/Upper Limit policy
- Enable throttling in block layer
CONFIG_BLK_DEV_THROTTLING=y
-- Mount blkio controller
- mount -t cgroup -o blkio none /cgroup/blkio
+- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)
+ mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
- Specify a bandwidth rate on particular device for root group. The format
for policy is "<major>:<minor> <byes_per_second>".
- echo "8:16 1048576" > /cgroup/blkio/blkio.read_bps_device
+ echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
Above will put a limit of 1MB/second on reads happening for root group
on device having major/minor number 8:16.
@@ -87,7 +90,7 @@ Throttling/Upper Limit policy
1024+0 records out
4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
- Limits for writes can be put using blkio.write_bps_device file.
+ Limits for writes can be put using blkio.throttle.write_bps_device file.
Hierarchical Cgroups
====================
@@ -108,7 +111,7 @@ Hierarchical Cgroups
CFQ and throttling will practically treat all groups at same level.
pivot
- / | \ \
+ / / \ \
root test1 test2 test3
Down the line we can implement hierarchical accounting/control support
@@ -149,7 +152,7 @@ Proportional weight policy files
Following is the format.
- #echo dev_maj:dev_minor weight > /path/to/cgroup/blkio.weight_device
+ # echo dev_maj:dev_minor weight > blkio.weight_device
Configure weight=300 on /dev/sdb (8:16) in this cgroup
# echo 8:16 300 > blkio.weight_device
# cat blkio.weight_device
@@ -283,28 +286,28 @@ Throttling/Upper limit policy files
specified in bytes per second. Rules are per deivce. Following is
the format.
- echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.read_bps_device
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
- blkio.throttle.write_bps_device
- Specifies upper limit on WRITE rate to the device. IO rate is
specified in bytes per second. Rules are per deivce. Following is
the format.
- echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.write_bps_device
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
- blkio.throttle.read_iops_device
- Specifies upper limit on READ rate from the device. IO rate is
specified in IO per second. Rules are per deivce. Following is
the format.
- echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.read_iops_device
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
- blkio.throttle.write_iops_device
- Specifies upper limit on WRITE rate to the device. IO rate is
specified in io per second. Rules are per deivce. Following is
the format.
- echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.write_iops_device
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
Note: If both BW and IOPS rules are specified for a device, then IO is
subjectd to both the constraints.
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 0ed99f08f1f..cd67e90003c 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -138,11 +138,11 @@ With the ability to classify tasks differently for different resources
the admin can easily set up a script which receives exec notifications
and depending on who is launching the browser he can
- # echo browser_pid > /mnt/<restype>/<userclass>/tasks
+ # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
With only a single hierarchy, he now would potentially have to create
a separate cgroup for every browser launched and associate it with
-approp network and other resource class. This may lead to
+appropriate network and other resource class. This may lead to
proliferation of such cgroups.
Also lets say that the administrator would like to give enhanced network
@@ -153,9 +153,9 @@ apps enhanced CPU power,
With ability to write pids directly to resource classes, it's just a
matter of :
- # echo pid > /mnt/network/<new_class>/tasks
+ # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
(after some time)
- # echo pid > /mnt/network/<orig_class>/tasks
+ # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
Without this ability, he would have to split the cgroup into
multiple separate ones and then associate the new cgroups with the
@@ -310,21 +310,24 @@ subsystem, this is the case for the cpuset.
To start a new job that is to be contained within a cgroup, using
the "cpuset" cgroup subsystem, the steps are something like:
- 1) mkdir /dev/cgroup
- 2) mount -t cgroup -ocpuset cpuset /dev/cgroup
- 3) Create the new cgroup by doing mkdir's and write's (or echo's) in
- the /dev/cgroup virtual file system.
- 4) Start a task that will be the "founding father" of the new job.
- 5) Attach that task to the new cgroup by writing its pid to the
- /dev/cgroup tasks file for that cgroup.
- 6) fork, exec or clone the job tasks from this founding father task.
+ 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
+ 2) mkdir /sys/fs/cgroup/cpuset
+ 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
+ the /sys/fs/cgroup virtual file system.
+ 5) Start a task that will be the "founding father" of the new job.
+ 6) Attach that task to the new cgroup by writing its pid to the
+ /sys/fs/cgroup/cpuset/tasks file for that cgroup.
+ 7) fork, exec or clone the job tasks from this founding father task.
For example, the following sequence of commands will setup a cgroup
named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
and then start a subshell 'sh' in that cgroup:
- mount -t cgroup cpuset -ocpuset /dev/cgroup
- cd /dev/cgroup
+ mount -t tmpfs cgroup_root /sys/fs/cgroup
+ mkdir /sys/fs/cgroup/cpuset
+ mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
+ cd /sys/fs/cgroup/cpuset
mkdir Charlie
cd Charlie
/bin/echo 2-3 > cpuset.cpus
@@ -345,7 +348,7 @@ Creating, modifying, using the cgroups can be done through the cgroup
virtual filesystem.
To mount a cgroup hierarchy with all available subsystems, type:
-# mount -t cgroup xxx /dev/cgroup
+# mount -t cgroup xxx /sys/fs/cgroup
The "xxx" is not interpreted by the cgroup code, but will appear in
/proc/mounts so may be any useful identifying string that you like.
@@ -354,23 +357,32 @@ Note: Some subsystems do not work without some user input first. For instance,
if cpusets are enabled the user will have to populate the cpus and mems files
for each new cgroup created before that group can be used.
+As explained in section `1.2 Why are cgroups needed?' you should create
+different hierarchies of cgroups for each single resource or group of
+resources you want to control. Therefore, you should mount a tmpfs on
+/sys/fs/cgroup and create directories for each cgroup resource or resource
+group.
+
+# mount -t tmpfs cgroup_root /sys/fs/cgroup
+# mkdir /sys/fs/cgroup/rg1
+
To mount a cgroup hierarchy with just the cpuset and memory
subsystems, type:
-# mount -t cgroup -o cpuset,memory hier1 /dev/cgroup
+# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
To change the set of subsystems bound to a mounted hierarchy, just
remount with different options:
-# mount -o remount,cpuset,blkio hier1 /dev/cgroup
+# mount -o remount,cpuset,blkio hier1 /sys/fs/cgroup/rg1
Now memory is removed from the hierarchy and blkio is added.
Note this will add blkio to the hierarchy but won't remove memory or
cpuset, because the new options are appended to the old ones:
-# mount -o remount,blkio /dev/cgroup
+# mount -o remount,blkio /sys/fs/cgroup/rg1
To Specify a hierarchy's release_agent:
# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
- xxx /dev/cgroup
+ xxx /sys/fs/cgroup/rg1
Note that specifying 'release_agent' more than once will return failure.
@@ -379,17 +391,17 @@ when the hierarchy consists of a single (root) cgroup. Supporting
the ability to arbitrarily bind/unbind subsystems from an existing
cgroup hierarchy is intended to be implemented in the future.
-Then under /dev/cgroup you can find a tree that corresponds to the
-tree of the cgroups in the system. For instance, /dev/cgroup
+Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
is the cgroup that holds the whole system.
If you want to change the value of release_agent:
-# echo "/sbin/new_release_agent" > /dev/cgroup/release_agent
+# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
It can also be changed via remount.
-If you want to create a new cgroup under /dev/cgroup:
-# cd /dev/cgroup
+If you want to create a new cgroup under /sys/fs/cgroup/rg1:
+# cd /sys/fs/cgroup/rg1
# mkdir my_cgroup
Now you want to do something with this cgroup.
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt
index 8b930946c52..9ad85df4b98 100644
--- a/Documentation/cgroups/cpuacct.txt
+++ b/Documentation/cgroups/cpuacct.txt
@@ -10,26 +10,25 @@ directly present in its group.
Accounting groups can be created by first mounting the cgroup filesystem.
-# mkdir /cgroups
-# mount -t cgroup -ocpuacct none /cgroups
-
-With the above step, the initial or the parent accounting group
-becomes visible at /cgroups. At bootup, this group includes all the
-tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
-/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
-this group which is essentially the CPU time obtained by all the tasks
+# mount -t cgroup -ocpuacct none /sys/fs/cgroup
+
+With the above step, the initial or the parent accounting group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
+by this group which is essentially the CPU time obtained by all the tasks
in the system.
-New accounting groups can be created under the parent group /cgroups.
+New accounting groups can be created under the parent group /sys/fs/cgroup.
-# cd /cgroups
+# cd /sys/fs/cgroup
# mkdir g1
# echo $$ > g1
The above steps create a new group g1 and move the current shell
process (bash) into it. CPU time consumed by this bash and its children
can be obtained from g1/cpuacct.usage and the same is accumulated in
-/cgroups/cpuacct.usage also.
+/sys/fs/cgroup/cpuacct.usage also.
cpuacct.stat file lists a few statistics which further divide the
CPU time obtained by the cgroup into user and system times. Currently
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
index 98a30829af7..5b0d78e55cc 100644
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -661,21 +661,21 @@ than stress the kernel.
To start a new job that is to be contained within a cpuset, the steps are:
- 1) mkdir /dev/cpuset
- 2) mount -t cgroup -ocpuset cpuset /dev/cpuset
+ 1) mkdir /sys/fs/cgroup/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
3) Create the new cpuset by doing mkdir's and write's (or echo's) in
- the /dev/cpuset virtual file system.
+ the /sys/fs/cgroup/cpuset virtual file system.
4) Start a task that will be the "founding father" of the new job.
5) Attach that task to the new cpuset by writing its pid to the
- /dev/cpuset tasks file for that cpuset.
+ /sys/fs/cgroup/cpuset tasks file for that cpuset.
6) fork, exec or clone the job tasks from this founding father task.
For example, the following sequence of commands will setup a cpuset
named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
and then start a subshell 'sh' in that cpuset:
- mount -t cgroup -ocpuset cpuset /dev/cpuset
- cd /dev/cpuset
+ mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ cd /sys/fs/cgroup/cpuset
mkdir Charlie
cd Charlie
/bin/echo 2-3 > cpuset.cpus
@@ -710,14 +710,14 @@ Creating, modifying, using the cpusets can be done through the cpuset
virtual filesystem.
To mount it, type:
-# mount -t cgroup -o cpuset cpuset /dev/cpuset
+# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
-Then under /dev/cpuset you can find a tree that corresponds to the
-tree of the cpusets in the system. For instance, /dev/cpuset
+Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
+tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
is the cpuset that holds the whole system.
-If you want to create a new cpuset under /dev/cpuset:
-# cd /dev/cpuset
+If you want to create a new cpuset under /sys/fs/cgroup/cpuset:
+# cd /sys/fs/cgroup/cpuset
# mkdir my_cpuset
Now you want to do something with this cpuset.
@@ -765,12 +765,12 @@ wrapper around the cgroup filesystem.
The command
-mount -t cpuset X /dev/cpuset
+mount -t cpuset X /sys/fs/cgroup/cpuset
is equivalent to
-mount -t cgroup -ocpuset,noprefix X /dev/cpuset
-echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
+mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
+echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
2.2 Adding/removing cpus
------------------------
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt
index 57ca4c89fe5..16624a7f822 100644
--- a/Documentation/cgroups/devices.txt
+++ b/Documentation/cgroups/devices.txt
@@ -22,16 +22,16 @@ removed from the child(ren).
An entry is added using devices.allow, and removed using
devices.deny. For instance
- echo 'c 1:3 mr' > /cgroups/1/devices.allow
+ echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
allows cgroup 1 to read and mknod the device usually known as
/dev/null. Doing
- echo a > /cgroups/1/devices.deny
+ echo a > /sys/fs/cgroup/1/devices.deny
will remove the default 'a *:* rwm' entry. Doing
- echo a > /cgroups/1/devices.allow
+ echo a > /sys/fs/cgroup/1/devices.allow
will add the 'a *:* rwm' entry to the whitelist.
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
index 41f37fea127..c21d77742a0 100644
--- a/Documentation/cgroups/freezer-subsystem.txt
+++ b/Documentation/cgroups/freezer-subsystem.txt
@@ -59,28 +59,28 @@ is non-freezable.
* Examples of usage :
- # mkdir /containers
- # mount -t cgroup -ofreezer freezer /containers
- # mkdir /containers/0
- # echo $some_pid > /containers/0/tasks
+ # mkdir /sys/fs/cgroup/freezer
+ # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
+ # mkdir /sys/fs/cgroup/freezer/0
+ # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
to get status of the freezer subsystem :
- # cat /containers/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
THAWED
to freeze all tasks in the container :
- # echo FROZEN > /containers/0/freezer.state
- # cat /containers/0/freezer.state
+ # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
FREEZING
- # cat /containers/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
FROZEN
to unfreeze all tasks in the container :
- # echo THAWED > /containers/0/freezer.state
- # cat /containers/0/freezer.state
+ # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
THAWED
This is the basic mechanism which should do the right thing for user space task
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 7c163477fcd..06eb6d957c8 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -1,8 +1,8 @@
Memory Resource Controller
-NOTE: The Memory Resource Controller has been generically been referred
- to as the memory controller in this document. Do not confuse memory
- controller used here with the memory controller that is used in hardware.
+NOTE: The Memory Resource Controller has generically been referred to as the
+ memory controller in this document. Do not confuse memory controller
+ used here with the memory controller that is used in hardware.
(For editors)
In this document:
@@ -70,6 +70,7 @@ Brief summary of control files.
(See sysctl's vm.swappiness)
memory.move_charge_at_immigrate # set/show controls of moving charges
memory.oom_control # set/show oom controls.
+ memory.numa_stat # show the number of memory usage per numa node
1. History
@@ -181,7 +182,7 @@ behind this approach is that a cgroup that aggressively uses a shared
page will eventually get charged for it (once it is uncharged from
the cgroup that brought it in -- this will happen on memory pressure).
-Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used..
+Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.
When you do swapoff and make swapped-out pages of shmem(tmpfs) to
be backed into memory in force, charges for pages are accounted against the
caller of swapoff rather than the users of shmem.
@@ -213,7 +214,7 @@ affecting global LRU, memory+swap limit is better than just limiting swap from
OS point of view.
* What happens when a cgroup hits memory.memsw.limit_in_bytes
-When a cgroup his memory.memsw.limit_in_bytes, it's useless to do swap-out
+When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
in this cgroup. Then, swap-out will not be done by cgroup routine and file
caches are dropped. But as mentioned above, global LRU can do swapout memory
from it for sanity of the system's memory management state. You can't forbid
@@ -263,16 +264,17 @@ b. Enable CONFIG_RESOURCE_COUNTERS
c. Enable CONFIG_CGROUP_MEM_RES_CTLR
d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension)
-1. Prepare the cgroups
-# mkdir -p /cgroups
-# mount -t cgroup none /cgroups -o memory
+1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
+# mount -t tmpfs none /sys/fs/cgroup
+# mkdir /sys/fs/cgroup/memory
+# mount -t cgroup none /sys/fs/cgroup/memory -o memory
2. Make the new group and move bash into it
-# mkdir /cgroups/0
-# echo $$ > /cgroups/0/tasks
+# mkdir /sys/fs/cgroup/memory/0
+# echo $$ > /sys/fs/cgroup/memory/0/tasks
Since now we're in the 0 cgroup, we can alter the memory limit:
-# echo 4M > /cgroups/0/memory.limit_in_bytes
+# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
@@ -280,11 +282,11 @@ mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
NOTE: We cannot set limits on the root cgroup any more.
-# cat /cgroups/0/memory.limit_in_bytes
+# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
4194304
We can check the usage:
-# cat /cgroups/0/memory.usage_in_bytes
+# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
1216512
A successful write to this file does not guarantee a successful set of
@@ -464,6 +466,24 @@ value for efficient access. (Of course, when necessary, it's synchronized.)
If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
value in memory.stat(see 5.2).
+5.6 numa_stat
+
+This is similar to numa_maps but operates on a per-memcg basis. This is
+useful for providing visibility into the numa locality information within
+an memcg since the pages are allowed to be allocated from any physical
+node. One of the usecases is evaluating application performance by
+combining this information with the application's cpu allocation.
+
+We export "total", "file", "anon" and "unevictable" pages per-node for
+each memcg. The ouput format of memory.numa_stat is:
+
+total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
+file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
+anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+
+And we have total = file + anon + unevictable.
+
6. Hierarchy support
The memory controller supports a deep hierarchy and hierarchical accounting.
@@ -471,13 +491,13 @@ The hierarchy is created by creating the appropriate cgroups in the
cgroup filesystem. Consider for example, the following cgroup filesystem
hierarchy
- root
+ root
/ | \
- / | \
- a b c
- | \
- | \
- d e
+ / | \
+ a b c
+ | \
+ | \
+ d e
In the diagram above, with hierarchical accounting enabled, all memory
usage of e, is accounted to its ancestors up until the root (i.e, c and root),
diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards
new file mode 100644
index 00000000000..91f26148af7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm-boards
@@ -0,0 +1,20 @@
+ARM Versatile Application and Platform Baseboards
+-------------------------------------------------
+ARM's development hardware platform with connectors for customizable
+core tiles. The hardware configuration of the Versatile boards is
+highly customizable.
+
+Required properties (in root node):
+ compatible = "arm,versatile-ab"; /* Application baseboard */
+ compatible = "arm,versatile-pb"; /* Platform baseboard */
+
+Interrupt controllers:
+- VIC required properties:
+ compatible = "arm,versatile-vic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+- SIC required properties:
+ compatible = "arm,versatile-sic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
diff --git a/Documentation/devicetree/bindings/arm/freescale.txt b/Documentation/devicetree/bindings/arm/freescale.txt
new file mode 100644
index 00000000000..8c52102b225
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale.txt
@@ -0,0 +1,7 @@
+mx51 "Babbage" evalutation board
+Required root node properties:
+ - compatible = "fsl,mx51-babbage", "fsl,mx51";
+
+mx53 "Loco" evaluation board
+Required root node properties:
+ - compatible = "fsl,mx53-loco", "fsl,mx53";
diff --git a/Documentation/devicetree/bindings/arm/genesi.txt b/Documentation/devicetree/bindings/arm/genesi.txt
new file mode 100644
index 00000000000..b353489acd4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/genesi.txt
@@ -0,0 +1,8 @@
+Genesi EfikaMX based on Freescale mx51
+Required root node properties:
+ - compatible = "genesi,efikamx", "fsl,mx51";
+
+Genesi EfikaMX Smartbook based on Freescale mx51
+Required root node properties:
+ - compatible = "genesi,efikasb", "fsl,mx51";
+
diff --git a/Documentation/devicetree/bindings/arm/primecell.txt b/Documentation/devicetree/bindings/arm/primecell.txt
new file mode 100644
index 00000000000..951ca46789d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/primecell.txt
@@ -0,0 +1,23 @@
+* ARM Primecell Peripherals
+
+ARM, Ltd. Primecell peripherals have a standard id register that can be used to
+identify the peripheral type, vendor, and revision. This value can be used for
+driver matching.
+
+Required properties:
+
+- compatible : should be a specific name for the peripheral and
+ "arm,primecell". The specific name will match the ARM
+ engineering name for the logic block in the form: "arm,pl???"
+
+Optional properties:
+
+- arm,primecell-periphid : Value to override the h/w value with
+
+Example:
+
+serial@fff36000 {
+ compatible = "arm,pl011", "arm,primecell";
+ arm,primecell-periphid = <0x00341011>;
+};
+
diff --git a/Documentation/devicetree/bindings/arm/samsung.txt b/Documentation/devicetree/bindings/arm/samsung.txt
new file mode 100644
index 00000000000..594cb97e3d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung.txt
@@ -0,0 +1,9 @@
+Samsung Exynos4 S5PV310 SoC based SMDKV310 eval board
+
+ SMDKV310 eval board is based on S5PV310 SoC which belongs to
+ Samsung's Exynos4 family of application processors.
+
+Required root node properties:
+ - compatible = "samsung,smdkv310","samsung,s5pv310"
+ (a) "samsung,smdkv310" - for Samsung's SMDKV310 eval board.
+ (b) "samsung,s5pv310" - for boards based on S5PV310 SoC.
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/sec.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
index 2b6f2d45c45..38988ef1336 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/sec.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
@@ -1,4 +1,4 @@
-Freescale SoC SEC Security Engines
+Freescale SoC SEC Security Engines versions 2.x-3.x
Required properties:
diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt
index edaa84d288a..4e16ba4feab 100644
--- a/Documentation/devicetree/bindings/gpio/gpio.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio.txt
@@ -4,17 +4,45 @@ Specifying GPIO information for devices
1) gpios property
-----------------
-Nodes that makes use of GPIOs should define them using `gpios' property,
-format of which is: <&gpio-controller1-phandle gpio1-specifier
- &gpio-controller2-phandle gpio2-specifier
- 0 /* holes are permitted, means no GPIO 3 */
- &gpio-controller4-phandle gpio4-specifier
- ...>;
+Nodes that makes use of GPIOs should specify them using one or more
+properties, each containing a 'gpio-list':
-Note that gpio-specifier length is controller dependent.
+ gpio-list ::= <single-gpio> [gpio-list]
+ single-gpio ::= <gpio-phandle> <gpio-specifier>
+ gpio-phandle : phandle to gpio controller node
+ gpio-specifier : Array of #gpio-cells specifying specific gpio
+ (controller specific)
+
+GPIO properties should be named "[<name>-]gpios". Exact
+meaning of each gpios property must be documented in the device tree
+binding for each device.
+
+For example, the following could be used to describe gpios pins to use
+as chip select lines; with chip selects 0, 1 and 3 populated, and chip
+select 2 left empty:
+
+ gpio1: gpio1 {
+ gpio-controller
+ #gpio-cells = <2>;
+ };
+ gpio2: gpio2 {
+ gpio-controller
+ #gpio-cells = <1>;
+ };
+ [...]
+ chipsel-gpios = <&gpio1 12 0>,
+ <&gpio1 13 0>,
+ <0>, /* holes are permitted, means no GPIO 2 */
+ <&gpio2 2>;
+
+Note that gpio-specifier length is controller dependent. In the
+above example, &gpio1 uses 2 cells to specify a gpio, while &gpio2
+only uses one.
gpio-specifier may encode: bank, pin position inside the bank,
whether pin is open-drain and whether pin is logically inverted.
+Exact meaning of each specifier cell is controller specific, and must
+be documented in the device tree binding for the device.
Example of the node using GPIOs:
@@ -28,8 +56,8 @@ and empty GPIO flags as accepted by the "qe_pio_e" gpio-controller.
2) gpio-controller nodes
------------------------
-Every GPIO controller node must have #gpio-cells property defined,
-this information will be used to translate gpio-specifiers.
+Every GPIO controller node must both an empty "gpio-controller"
+property, and have #gpio-cells contain the size of the gpio-specifier.
Example of two SOC GPIO banks defined as gpio-controller nodes:
diff --git a/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt b/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt
new file mode 100644
index 00000000000..64aac39e6ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio_nvidia.txt
@@ -0,0 +1,7 @@
+NVIDIA Tegra 2 GPIO controller
+
+Required properties:
+- compatible : "nvidia,tegra20-gpio"
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+- gpio-controller : Marks the device node as a GPIO controller.
diff --git a/Documentation/devicetree/bindings/i2c/arm-versatile.txt b/Documentation/devicetree/bindings/i2c/arm-versatile.txt
new file mode 100644
index 00000000000..361d31c51b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/arm-versatile.txt
@@ -0,0 +1,10 @@
+i2c Controller on ARM Versatile platform:
+
+Required properties:
+- compatible : Must be "arm,versatile-i2c";
+- reg
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Optional properties:
+- Child nodes conforming to i2c bus binding
diff --git a/Documentation/devicetree/bindings/mtd/arm-versatile.txt b/Documentation/devicetree/bindings/mtd/arm-versatile.txt
new file mode 100644
index 00000000000..476845db94d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/arm-versatile.txt
@@ -0,0 +1,8 @@
+Flash device on ARM Versatile board
+
+Required properties:
+- compatible : must be "arm,versatile-flash";
+- bank-width : width in bytes of flash interface.
+
+Optional properties:
+- Subnode partition map from mtd flash binding
diff --git a/Documentation/devicetree/bindings/net/smsc-lan91c111.txt b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt
new file mode 100644
index 00000000000..953049b4248
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt
@@ -0,0 +1,10 @@
+SMSC LAN91c111 Ethernet mac
+
+Required properties:
+- compatible = "smsc,lan91c111";
+- reg : physical address and size of registers
+- interrupts : interrupt connection
+
+Optional properties:
+- phy-device : phandle to Ethernet phy
+- local-mac-address : Ethernet mac address to use
diff --git a/Documentation/devicetree/bindings/spi/spi_nvidia.txt b/Documentation/devicetree/bindings/spi/spi_nvidia.txt
new file mode 100644
index 00000000000..6b9e5189669
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi_nvidia.txt
@@ -0,0 +1,5 @@
+NVIDIA Tegra 2 SPI device
+
+Required properties:
+- compatible : should be "nvidia,tegra20-spi".
+- gpios : should specify GPIOs used for chipselect.
diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt
new file mode 100644
index 00000000000..b8b27b0aca1
--- /dev/null
+++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt
@@ -0,0 +1,36 @@
+* UART (Universal Asynchronous Receiver/Transmitter)
+
+Required properties:
+- compatible : one of:
+ - "ns8250"
+ - "ns16450"
+ - "ns16550a"
+ - "ns16550"
+ - "ns16750"
+ - "ns16850"
+ - "nvidia,tegra20-uart"
+ - "ibm,qpace-nwp-serial"
+ - "serial" if the port type is unknown.
+- reg : offset and length of the register set for the device.
+- interrupts : should contain uart interrupt.
+- clock-frequency : the input clock frequency for the UART.
+
+Optional properties:
+- current-speed : the current active speed of the UART.
+- reg-offset : offset to apply to the mapbase from the start of the registers.
+- reg-shift : quantity to shift the register offsets by.
+- reg-io-width : the size (in bytes) of the IO accesses that should be
+ performed on the device. There are some systems that require 32-bit
+ accesses to the UART (e.g. TI davinci).
+- used-by-rtas : set to indicate that the port is in use by the OpenFirmware
+ RTAS and should not be registered.
+
+Example:
+
+ uart@80230000 {
+ compatible = "ns8250";
+ reg = <0x80230000 0x100>;
+ clock-frequency = <3686400>;
+ interrupts = <10>;
+ reg-shift = <2>;
+ };
diff --git a/Documentation/devicetree/usage-model b/Documentation/devicetree/usage-model
new file mode 100644
index 00000000000..45e03b8dd04
--- /dev/null
+++ b/Documentation/devicetree/usage-model
@@ -0,0 +1,403 @@
+Linux and the Device Tree
+The Linux usage model for device tree data
+
+Author: Grant Likely <grant.likely@secretlab.ca>
+
+This article describes how Linux uses the device tree. An overview of
+the device tree data format can be found at the <a
+href="http://devicetree.org/Device_Tree_Usage">Device Tree Usage</a>
+page on <a href="http://devicetree.org">devicetree.org</a>.
+
+
+ All the cool architectures are using device tree. I want to
+ use device tree too!
+
+The "Open Firmware Device Tree", or simply Device Tree (DT), is a data
+structure and language for describing hardware. More specifically, it
+is a description of hardware that is readable by an operating system
+so that the operating system doesn't need to hard code details of the
+machine.
+
+Structurally, the DT is a tree, or acyclic graph with named nodes, and
+nodes may have an arbitrary number of named properties encapsulating
+arbitrary data. A mechanism also exists to create arbitrary
+links from one node to another outside of the natural tree structure.
+
+Conceptually, a common set of usage conventions, called 'bindings',
+is defined for how data should appear in the tree to describe typical
+hardware characteristics including data busses, interrupt lines, GPIO
+connections, and peripheral devices.
+
+As much as possible, hardware is described using existing bindings to
+maximize use of existing support code, but since property and node
+names are simply text strings, it is easy to extend existing bindings
+or create new ones by defining new nodes and properties.
+
+<h2>History</h2>
+The DT was originally created by Open Firmware as part of the
+communication method for passing data from Open Firmware to a client
+program (like to an operating system). An operating system used the
+Device Tree to discover the topology of the hardware at runtime, and
+thereby support a majority of available hardware without hard coded
+information (assuming drivers were available for all devices).
+
+Since Open Firmware is commonly used on PowerPC and SPARC platforms,
+the Linux support for those architectures has for a long time used the
+Device Tree.
+
+In 2005, when PowerPC Linux began a major cleanup and to merge 32-bit
+and 64-bit support, the decision was made to require DT support on all
+powerpc platforms, regardless of whether or not they used Open
+Firmware. To do this, a DT representation called the Flattened Device
+Tree (FDT) was created which could be passed to the kernel as a binary
+blob without requiring a real Open Firmware implementation. U-Boot,
+kexec, and other bootloaders were modified to support both passing a
+Device Tree Binary (dtb) and to modify a dtb at boot time.
+
+Some time later, FDT infrastructure was generalized to be usable by
+all architectures. At the time of this writing, 6 mainlined
+architectures (arm, microblaze, mips, powerpc, sparc, and x86) and 1
+out of mainline (nios) have some level of DT support.
+
+<h2>Data Model</h2>
+If you haven't already read the
+href="http://devicetree.org/Device_Tree_Usage">Device Tree Usage</a>
+page, then go read it now. It's okay, I'll wait....
+
+<h3>High Level View</h3>
+The most important thing to understand is that the DT is simply a data
+structure that describes the hardware. There is nothing magical about
+it, and it doesn't magically make all hardware configuration problems
+go away. What it does do is provide a language for decoupling the
+hardware configuration from the board and device driver support in the
+Linux kernel (or any other operating system for that matter). Using
+it allows board and device support to become data driven; to make
+setup decisions based on data passed into the kernel instead of on
+per-machine hard coded selections.
+
+Ideally, data driven platform setup should result in less code
+duplication and make it easier to support a wide range of hardware
+with a single kernel image.
+
+Linux uses DT data for three major purposes:
+1) platform identification,
+2) runtime configuration, and
+3) device population.
+
+<h4>Platform Identification</h4>
+First and foremost, the kernel will use data in the DT to identify the
+specific machine. In a perfect world, the specific platform shouldn't
+matter to the kernel because all platform details would be described
+perfectly by the device tree in a consistent and reliable manner.
+Hardware is not perfect though, and so the kernel must identify the
+machine during early boot so that it has the opportunity to run
+machine-specific fixups.
+
+In the majority of cases, the machine identity is irrelevant, and the
+kernel will instead select setup code based on the machine's core
+CPU or SoC. On ARM for example, setup_arch() in
+arch/arm/kernel/setup.c will call setup_machine_fdt() in
+arch/arm/kernel/devicetree.c which searches through the machine_desc
+table and selects the machine_desc which best matches the device tree
+data. It determines the best match by looking at the 'compatible'
+property in the root device tree node, and comparing it with the
+dt_compat list in struct machine_desc.
+
+The 'compatible' property contains a sorted list of strings starting
+with the exact name of the machine, followed by an optional list of
+boards it is compatible with sorted from most compatible to least. For
+example, the root compatible properties for the TI BeagleBoard and its
+successor, the BeagleBoard xM board might look like:
+
+ compatible = "ti,omap3-beagleboard", "ti,omap3450", "ti,omap3";
+ compatible = "ti,omap3-beagleboard-xm", "ti,omap3450", "ti,omap3";
+
+Where "ti,omap3-beagleboard-xm" specifies the exact model, it also
+claims that it compatible with the OMAP 3450 SoC, and the omap3 family
+of SoCs in general. You'll notice that the list is sorted from most
+specific (exact board) to least specific (SoC family).
+
+Astute readers might point out that the Beagle xM could also claim
+compatibility with the original Beagle board. However, one should be
+cautioned about doing so at the board level since there is typically a
+high level of change from one board to another, even within the same
+product line, and it is hard to nail down exactly what is meant when one
+board claims to be compatible with another. For the top level, it is
+better to err on the side of caution and not claim one board is
+compatible with another. The notable exception would be when one
+board is a carrier for another, such as a CPU module attached to a
+carrier board.
+
+One more note on compatible values. Any string used in a compatible
+property must be documented as to what it indicates. Add
+documentation for compatible strings in Documentation/devicetree/bindings.
+
+Again on ARM, for each machine_desc, the kernel looks to see if
+any of the dt_compat list entries appear in the compatible property.
+If one does, then that machine_desc is a candidate for driving the
+machine. After searching the entire table of machine_descs,
+setup_machine_fdt() returns the 'most compatible' machine_desc based
+on which entry in the compatible property each machine_desc matches
+against. If no matching machine_desc is found, then it returns NULL.
+
+The reasoning behind this scheme is the observation that in the majority
+of cases, a single machine_desc can support a large number of boards
+if they all use the same SoC, or same family of SoCs. However,
+invariably there will be some exceptions where a specific board will
+require special setup code that is not useful in the generic case.
+Special cases could be handled by explicitly checking for the
+troublesome board(s) in generic setup code, but doing so very quickly
+becomes ugly and/or unmaintainable if it is more than just a couple of
+cases.
+
+Instead, the compatible list allows a generic machine_desc to provide
+support for a wide common set of boards by specifying "less
+compatible" value in the dt_compat list. In the example above,
+generic board support can claim compatibility with "ti,omap3" or
+"ti,omap3450". If a bug was discovered on the original beagleboard
+that required special workaround code during early boot, then a new
+machine_desc could be added which implements the workarounds and only
+matches on "ti,omap3-beagleboard".
+
+PowerPC uses a slightly different scheme where it calls the .probe()
+hook from each machine_desc, and the first one returning TRUE is used.
+However, this approach does not take into account the priority of the
+compatible list, and probably should be avoided for new architecture
+support.
+
+<h4>Runtime configuration</h4>
+In most cases, a DT will be the sole method of communicating data from
+firmware to the kernel, so also gets used to pass in runtime and
+configuration data like the kernel parameters string and the location
+of an initrd image.
+
+Most of this data is contained in the /chosen node, and when booting
+Linux it will look something like this:
+
+ chosen {
+ bootargs = "console=ttyS0,115200 loglevel=8";
+ initrd-start = &lt;0xc8000000&gt;;
+ initrd-end = &lt;0xc8200000&gt;;
+ };
+
+The bootargs property contains the kernel arguments, and the initrd-*
+properties define the address and size of an initrd blob. The
+chosen node may also optionally contain an arbitrary number of
+additional properties for platform-specific configuration data.
+
+During early boot, the architecture setup code calls of_scan_flat_dt()
+several times with different helper callbacks to parse device tree
+data before paging is setup. The of_scan_flat_dt() code scans through
+the device tree and uses the helpers to extract information required
+during early boot. Typically the early_init_dt_scan_chosen() helper
+is used to parse the chosen node including kernel parameters,
+early_init_dt_scan_root() to initialize the DT address space model,
+and early_init_dt_scan_memory() to determine the size and
+location of usable RAM.
+
+On ARM, the function setup_machine_fdt() is responsible for early
+scanning of the device tree after selecting the correct machine_desc
+that supports the board.
+
+<h4>Device population</h4>
+After the board has been identified, and after the early configuration data
+has been parsed, then kernel initialization can proceed in the normal
+way. At some point in this process, unflatten_device_tree() is called
+to convert the data into a more efficient runtime representation.
+This is also when machine-specific setup hooks will get called, like
+the machine_desc .init_early(), .init_irq() and .init_machine() hooks
+on ARM. The remainder of this section uses examples from the ARM
+implementation, but all architectures will do pretty much the same
+thing when using a DT.
+
+As can be guessed by the names, .init_early() is used for any machine-
+specific setup that needs to be executed early in the boot process,
+and .init_irq() is used to set up interrupt handling. Using a DT
+doesn't materially change the behaviour of either of these functions.
+If a DT is provided, then both .init_early() and .init_irq() are able
+to call any of the DT query functions (of_* in include/linux/of*.h) to
+get additional data about the platform.
+
+The most interesting hook in the DT context is .init_machine() which
+is primarily responsible for populating the Linux device model with
+data about the platform. Historically this has been implemented on
+embedded platforms by defining a set of static clock structures,
+platform_devices, and other data in the board support .c file, and
+registering it en-masse in .init_machine(). When DT is used, then
+instead of hard coding static devices for each platform, the list of
+devices can be obtained by parsing the DT, and allocating device
+structures dynamically.
+
+The simplest case is when .init_machine() is only responsible for
+registering a block of platform_devices. A platform_device is a concept
+used by Linux for memory or I/O mapped devices which cannot be detected
+by hardware, and for 'composite' or 'virtual' devices (more on those
+later). While there is no 'platform device' terminology for the DT,
+platform devices roughly correspond to device nodes at the root of the
+tree and children of simple memory mapped bus nodes.
+
+About now is a good time to lay out an example. Here is part of the
+device tree for the NVIDIA Tegra board.
+
+/{
+ compatible = "nvidia,harmony", "nvidia,tegra20";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&intc>;
+
+ chosen { };
+ aliases { };
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x40000000>;
+ };
+
+ soc {
+ compatible = "nvidia,tegra20-soc", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ intc: interrupt-controller@50041000 {
+ compatible = "nvidia,tegra20-gic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x50041000 0x1000>, < 0x50040100 0x0100 >;
+ };
+
+ serial@70006300 {
+ compatible = "nvidia,tegra20-uart";
+ reg = <0x70006300 0x100>;
+ interrupts = <122>;
+ };
+
+ i2s-1: i2s@70002800 {
+ compatible = "nvidia,tegra20-i2s";
+ reg = <0x70002800 0x100>;
+ interrupts = <77>;
+ codec = <&wm8903>;
+ };
+
+ i2c@7000c000 {
+ compatible = "nvidia,tegra20-i2c";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x7000c000 0x100>;
+ interrupts = <70>;
+
+ wm8903: codec@1a {
+ compatible = "wlf,wm8903";
+ reg = <0x1a>;
+ interrupts = <347>;
+ };
+ };
+ };
+
+ sound {
+ compatible = "nvidia,harmony-sound";
+ i2s-controller = <&i2s-1>;
+ i2s-codec = <&wm8903>;
+ };
+};
+
+At .machine_init() time, Tegra board support code will need to look at
+this DT and decide which nodes to create platform_devices for.
+However, looking at the tree, it is not immediately obvious what kind
+of device each node represents, or even if a node represents a device
+at all. The /chosen, /aliases, and /memory nodes are informational
+nodes that don't describe devices (although arguably memory could be
+considered a device). The children of the /soc node are memory mapped
+devices, but the codec@1a is an i2c device, and the sound node
+represents not a device, but rather how other devices are connected
+together to create the audio subsystem. I know what each device is
+because I'm familiar with the board design, but how does the kernel
+know what to do with each node?
+
+The trick is that the kernel starts at the root of the tree and looks
+for nodes that have a 'compatible' property. First, it is generally
+assumed that any node with a 'compatible' property represents a device
+of some kind, and second, it can be assumed that any node at the root
+of the tree is either directly attached to the processor bus, or is a
+miscellaneous system device that cannot be described any other way.
+For each of these nodes, Linux allocates and registers a
+platform_device, which in turn may get bound to a platform_driver.
+
+Why is using a platform_device for these nodes a safe assumption?
+Well, for the way that Linux models devices, just about all bus_types
+assume that its devices are children of a bus controller. For
+example, each i2c_client is a child of an i2c_master. Each spi_device
+is a child of an SPI bus. Similarly for USB, PCI, MDIO, etc. The
+same hierarchy is also found in the DT, where I2C device nodes only
+ever appear as children of an I2C bus node. Ditto for SPI, MDIO, USB,
+etc. The only devices which do not require a specific type of parent
+device are platform_devices (and amba_devices, but more on that
+later), which will happily live at the base of the Linux /sys/devices
+tree. Therefore, if a DT node is at the root of the tree, then it
+really probably is best registered as a platform_device.
+
+Linux board support code calls of_platform_populate(NULL, NULL, NULL)
+to kick off discovery of devices at the root of the tree. The
+parameters are all NULL because when starting from the root of the
+tree, there is no need to provide a starting node (the first NULL), a
+parent struct device (the last NULL), and we're not using a match
+table (yet). For a board that only needs to register devices,
+.init_machine() can be completely empty except for the
+of_platform_populate() call.
+
+In the Tegra example, this accounts for the /soc and /sound nodes, but
+what about the children of the SoC node? Shouldn't they be registered
+as platform devices too? For Linux DT support, the generic behaviour
+is for child devices to be registered by the parent's device driver at
+driver .probe() time. So, an i2c bus device driver will register a
+i2c_client for each child node, an SPI bus driver will register
+its spi_device children, and similarly for other bus_types.
+According to that model, a driver could be written that binds to the
+SoC node and simply registers platform_devices for each of its
+children. The board support code would allocate and register an SoC
+device, an SoC device driver would bind to the SoC device, and
+register platform_devices for /soc/interrupt-controller, /soc/serial,
+/soc/i2s, and /soc/i2c in its .probe() hook. Easy, right? Although
+it is a lot of mucking about for just registering platform devices.
+
+It turns out that registering children of certain platform_devices as
+more platform_devices is a common pattern, and the device tree support
+code reflects that. The second argument to of_platform_populate() is
+an of_device_id table, and any node that matches an entry in that
+table will also get its child nodes registered. In the tegra case,
+the code can look something like this:
+
+static struct of_device_id harmony_bus_ids[] __initdata = {
+ { .compatible = "simple-bus", },
+ {}
+};
+
+static void __init harmony_init_machine(void)
+{
+ /* ... */
+ of_platform_populate(NULL, harmony_bus_ids, NULL);
+}
+
+"simple-bus" is defined in the ePAPR 1.0 specification as a property
+meaning a simple memory mapped bus, so the of_platform_populate() code
+could be written to just assume simple-bus compatible nodes will
+always be traversed. However, we pass it in as an argument so that
+board support code can always override the default behaviour.
+
+<h2>Appendix A: AMBA devices</h2>
+
+ARM Primecells are a certain kind of device attached to the ARM AMBA
+bus which include some support for hardware detection and power
+management. In Linux, struct amba_device and the amba_bus_type is
+used to represent Primecell devices. However, the fiddly bit is that
+not all devices on an AMBA bus are Primecells, and for Linux it is
+typical for both amba_device and platform_device instances to be
+siblings of the same bus segment.
+
+When using the DT, this creates problems for of_platform_populate()
+because it must decide whether to register each node as either a
+platform_device or an amba_device. This unfortunately complicates the
+device creation model a little bit, but the solution turns out not to
+be too invasive. If a node is compatible with "arm,amba-primecell", then
+of_platform_populate() will register it as an amba_device instead of a
+platform_device.
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 1a9446b5915..b1c921c2751 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -481,23 +481,6 @@ Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
----------------------------
-What: namespace cgroup (ns_cgroup)
-When: 2.6.38
-Why: The ns_cgroup leads to some problems:
- * cgroup creation is out-of-control
- * cgroup name can conflict when pids are looping
- * it is not possible to have a single process handling
- a lot of namespaces without falling in a exponential creation time
- * we may want to create a namespace without creating a cgroup
-
- The ns_cgroup is replaced by a compatibility flag 'clone_children',
- where a newly created cgroup will copy the parent cgroup values.
- The userspace has to manually create a cgroup and add a task to
- the 'tasks' file.
-Who: Daniel Lezcano <daniel.lezcano@free.fr>
-
-----------------------------
-
What: iwlwifi disable_hw_scan module parameters
When: 2.6.40
Why: Hareware scan is the prefer method for iwlwifi devices for
@@ -600,3 +583,25 @@ Why: Superseded by the UVCIOC_CTRL_QUERY ioctl.
Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
----------------------------
+
+What: For VIDIOC_S_FREQUENCY the type field must match the device node's type.
+ If not, return -EINVAL.
+When: 3.2
+Why: It makes no sense to switch the tuner to radio mode by calling
+ VIDIOC_S_FREQUENCY on a video node, or to switch the tuner to tv mode by
+ calling VIDIOC_S_FREQUENCY on a radio node. This is the first step of a
+ move to more consistent handling of tv and radio tuners.
+Who: Hans Verkuil <hans.verkuil@cisco.com>
+
+----------------------------
+
+What: Opening a radio device node will no longer automatically switch the
+ tuner mode from tv to radio.
+When: 3.3
+Why: Just opening a V4L device should not change the state of the hardware
+ like that. It's very unexpected and against the V4L spec. Instead, you
+ switch to radio mode by calling VIDIOC_S_FREQUENCY. This is the second
+ and last step of the move to consistent handling of tv and radio tuners.
+Who: Hans Verkuil <hans.verkuil@cisco.com>
+
+----------------------------
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index a167ab876c3..7cc6bf2871e 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -673,6 +673,22 @@ storage request to complete, or it may attempt to cancel the storage request -
in which case the page will not be stored in the cache this time.
+BULK INODE PAGE UNCACHE
+-----------------------
+
+A convenience routine is provided to perform an uncache on all the pages
+attached to an inode. This assumes that the pages on the inode correspond on a
+1:1 basis with the pages in the cache.
+
+ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+ struct inode *inode);
+
+This takes the netfs cookie that the pages were cached with and the inode that
+the pages are attached to. This function will wait for pages to finish being
+written to the cache and for the cache to finish with the page generally. No
+error is returned.
+
+
==========================
INDEX AND DATA FILE UPDATE
==========================
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index d5c0cef38a7..873a2ab2e9f 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -40,7 +40,6 @@ Features which NILFS2 does not support yet:
- POSIX ACLs
- quotas
- fsck
- - resize
- defragmentation
Mount options
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index f4817802406..db3b1aba32a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
TASKLET: 0 0 0 290
SCHED: 27035 26983 26971 26746
HRTIMER: 0 0 0 0
+ RCU: 1678 1769 2178 2250
1.3 IDE devices in /proc/ide
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg
index 84d2623810f..de91c0db584 100644
--- a/Documentation/hwmon/f71882fg
+++ b/Documentation/hwmon/f71882fg
@@ -22,6 +22,10 @@ Supported chips:
Prefix: 'f71869'
Addresses scanned: none, address read from Super I/O config space
Datasheet: Available from the Fintek website
+ * Fintek F71869A
+ Prefix: 'f71869a'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Not public
* Fintek F71882FG and F71883FG
Prefix: 'f71882fg'
Addresses scanned: none, address read from Super I/O config space
diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp
index 0393c89277c..a10f73624ad 100644
--- a/Documentation/hwmon/k10temp
+++ b/Documentation/hwmon/k10temp
@@ -9,8 +9,8 @@ Supported chips:
Socket S1G3: Athlon II, Sempron, Turion II
* AMD Family 11h processors:
Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra)
-* AMD Family 12h processors: "Llano"
-* AMD Family 14h processors: "Brazos" (C/E/G-Series)
+* AMD Family 12h processors: "Llano" (E2/A4/A6/A8-Series)
+* AMD Family 14h processors: "Brazos" (C/E/G/Z-Series)
* AMD Family 15h processors: "Bulldozer"
Prefix: 'k10temp'
@@ -20,12 +20,16 @@ Supported chips:
http://support.amd.com/us/Processor_TechDocs/31116.pdf
BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors:
http://support.amd.com/us/Processor_TechDocs/41256.pdf
+ BIOS and Kernel Developer's Guide (BKDG) for AMD Family 12h Processors:
+ http://support.amd.com/us/Processor_TechDocs/41131.pdf
BIOS and Kernel Developer's Guide (BKDG) for AMD Family 14h Models 00h-0Fh Processors:
http://support.amd.com/us/Processor_TechDocs/43170.pdf
Revision Guide for AMD Family 10h Processors:
http://support.amd.com/us/Processor_TechDocs/41322.pdf
Revision Guide for AMD Family 11h Processors:
http://support.amd.com/us/Processor_TechDocs/41788.pdf
+ Revision Guide for AMD Family 12h Processors:
+ http://support.amd.com/us/Processor_TechDocs/44739.pdf
Revision Guide for AMD Family 14h Models 00h-0Fh Processors:
http://support.amd.com/us/Processor_TechDocs/47534.pdf
AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks:
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5438a2d7907..aa47be71df4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
With this option on every unmap_single operation will
result in a hardware IOTLB flush operation as opposed
to batching them for performance.
-
+ sp_off [Default Off]
+ By default, super page will be supported if Intel IOMMU
+ has the capability. With this option, super page will
+ not be supported.
intremap= [X86-64, Intel-IOMMU]
Format: { on (default) | off | nosid }
on enable Interrupt Remapping (default)
@@ -2012,6 +2015,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
the default.
off: Turn ECRC off
on: Turn ECRC on.
+ realloc reallocate PCI resources if allocations done by BIOS
+ are erroneous.
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
Management.
@@ -2595,6 +2600,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
unlock ejectable media);
m = MAX_SECTORS_64 (don't transfer more
than 64 sectors = 32 KB at a time);
+ n = INITIAL_READ10 (force a retry of the
+ initial READ(10) command);
o = CAPACITY_OK (accept the capacity
reported by the device);
r = IGNORE_RESIDUE (the device reports
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index 090e6ee0453..51063e681ca 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt
@@ -11,7 +11,9 @@ with the difference that the orphan objects are not freed but only
reported via /sys/kernel/debug/kmemleak. A similar method is used by the
Valgrind tool (memcheck --leak-check) to detect the memory leaks in
user-space applications.
-Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze and tile.
+
+Please check DEBUG_KMEMLEAK dependencies in lib/Kconfig.debug for supported
+architectures.
Usage
-----
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 1565eefd6fd..61815483efa 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -534,6 +534,8 @@ Events that are never propagated by the driver:
0x2404 System is waking up from hibernation to undock
0x2405 System is waking up from hibernation to eject bay
0x5010 Brightness level changed/control event
+0x6000 KEYBOARD: Numlock key pressed
+0x6005 KEYBOARD: Fn key pressed (TO BE VERIFIED)
Events that are propagated by the driver to userspace:
@@ -545,6 +547,8 @@ Events that are propagated by the driver to userspace:
0x3006 Bay hotplug request (hint to power up SATA link when
the optical drive tray is ejected)
0x4003 Undocked (see 0x2x04), can sleep again
+0x4010 Docked into hotplug port replicator (non-ACPI dock)
+0x4011 Undocked from hotplug port replicator (non-ACPI dock)
0x500B Tablet pen inserted into its storage bay
0x500C Tablet pen removed from its storage bay
0x6011 ALARM: battery is too hot
@@ -552,6 +556,7 @@ Events that are propagated by the driver to userspace:
0x6021 ALARM: a sensor is too hot
0x6022 ALARM: a sensor is extremely hot
0x6030 System thermal table changed
+0x6040 Nvidia Optimus/AC adapter related (TO BE VERIFIED)
Battery nearly empty alarms are a last resort attempt to get the
operating system to hibernate or shutdown cleanly (0x2313), or shutdown
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 2366b1c8cf1..f0eee83ff78 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -555,7 +555,7 @@ also have
sync_min
sync_max
The two values, given as numbers of sectors, indicate a range
- withing the array where 'check'/'repair' will operate. Must be
+ within the array where 'check'/'repair' will operate. Must be
a multiple of chunk_size. When it reaches "sync_max" it will
pause, rather than complete.
You can use 'select' or 'poll' on "sync_completed" to wait for
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d3d653a5f9b..bfe924217f2 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -346,7 +346,7 @@ tcp_orphan_retries - INTEGER
when RTO retransmissions remain unacknowledged.
See tcp_retries2 for more details.
- The default value is 7.
+ The default value is 8.
If your machine is a loaded WEB server,
you should think about lowering this value, such sockets
may consume significant resources. Cf. tcp_max_orphans.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 88880839ece..64565aac6e4 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -520,59 +520,20 @@ Support for power domains is provided through the pwr_domain field of struct
device. This field is a pointer to an object of type struct dev_power_domain,
defined in include/linux/pm.h, providing a set of power management callbacks
analogous to the subsystem-level and device driver callbacks that are executed
-for the given device during all power transitions, in addition to the respective
-subsystem-level callbacks. Specifically, the power domain "suspend" callbacks
-(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are
-executed after the analogous subsystem-level callbacks, while the power domain
-"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore,
-etc.) are executed before the analogous subsystem-level callbacks. Error codes
-returned by the "suspend" and "resume" power domain callbacks are ignored.
-
-Power domain ->runtime_idle() callback is executed before the subsystem-level
-->runtime_idle() callback and the result returned by it is not ignored. Namely,
-if it returns error code, the subsystem-level ->runtime_idle() callback will not
-be called and the helper function rpm_idle() executing it will return error
-code. This mechanism is intended to help platforms where saving device state
-is a time consuming operation and should only be carried out if all devices
-in the power domain are idle, before turning off the shared power resource(s).
-Namely, the power domain ->runtime_idle() callback may return error code until
-the pm_runtime_idle() helper (or its asychronous version) has been called for
-all devices in the power domain (it is recommended that the returned error code
-be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle()
-callback from being run prematurely.
-
-The support for device power domains is only relevant to platforms needing to
-use the same subsystem-level (e.g. platform bus type) and device driver power
-management callbacks in many different power domain configurations and wanting
-to avoid incorporating the support for power domains into the subsystem-level
-callbacks. The other platforms need not implement it or take it into account
-in any way.
-
-
-System Devices
---------------
-System devices (sysdevs) follow a slightly different API, which can be found in
-
- include/linux/sysdev.h
- drivers/base/sys.c
-
-System devices will be suspended with interrupts disabled, and after all other
-devices have been suspended. On resume, they will be resumed before any other
-devices, and also with interrupts disabled. These things occur in special
-"sysdev_driver" phases, which affect only system devices.
-
-Thus, after the suspend_noirq (or freeze_noirq or poweroff_noirq) phase, when
-the non-boot CPUs are all offline and IRQs are disabled on the remaining online
-CPU, then a sysdev_driver.suspend phase is carried out, and the system enters a
-sleep state (or a system image is created). During resume (or after the image
-has been created or loaded) a sysdev_driver.resume phase is carried out, IRQs
-are enabled on the only online CPU, the non-boot CPUs are enabled, and the
-resume_noirq (or thaw_noirq or restore_noirq) phase begins.
-
-Code to actually enter and exit the system-wide low power state sometimes
-involves hardware details that are only known to the boot firmware, and
-may leave a CPU running software (from SRAM or flash memory) that monitors
-the system and manages its wakeup sequence.
+for the given device during all power transitions, instead of the respective
+subsystem-level callbacks. Specifically, if a device's pm_domain pointer is
+not NULL, the ->suspend() callback from the object pointed to by it will be
+executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and
+anlogously for all of the remaining callbacks. In other words, power management
+domain callbacks, if defined for the given device, always take precedence over
+the callbacks provided by the device's subsystem (e.g. bus type).
+
+The support for device power management domains is only relevant to platforms
+needing to use the same device driver power management callbacks in many
+different power domain configurations and wanting to avoid incorporating the
+support for power domains into subsystem-level callbacks, for example by
+modifying the platform bus type. Other platforms need not implement it or take
+it into account in any way.
Device Low Power (suspend) States
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 654097b130b..b24875b1ced 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -501,13 +501,29 @@ helper functions described in Section 4. In that case, pm_runtime_resume()
should be used. Of course, for this purpose the device's run-time PM has to be
enabled earlier by calling pm_runtime_enable().
-If the device bus type's or driver's ->probe() or ->remove() callback runs
+If the device bus type's or driver's ->probe() callback runs
pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
they will fail returning -EAGAIN, because the device's usage counter is
-incremented by the core before executing ->probe() and ->remove(). Still, it
-may be desirable to suspend the device as soon as ->probe() or ->remove() has
-finished, so the PM core uses pm_runtime_idle_sync() to invoke the
-subsystem-level idle callback for the device at that time.
+incremented by the driver core before executing ->probe(). Still, it may be
+desirable to suspend the device as soon as ->probe() has finished, so the driver
+core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for
+the device at that time.
+
+Moreover, the driver core prevents runtime PM callbacks from racing with the bus
+notifier callback in __device_release_driver(), which is necessary, because the
+notifier is used by some subsystems to carry out operations affecting the
+runtime PM functionality. It does so by calling pm_runtime_get_sync() before
+driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications. This
+resumes the device if it's in the suspended state and prevents it from
+being suspended again while those routines are being executed.
+
+To allow bus types and drivers to put devices into the suspended state by
+calling pm_runtime_suspend() from their ->remove() routines, the driver core
+executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER
+notifications in __device_release_driver(). This requires bus types and
+drivers to make their ->remove() callbacks avoid races with runtime PM directly,
+but also it allows of more flexibility in the handling of devices during the
+removal of their drivers.
The user space can effectively disallow the driver of the device to power manage
it at run time by changing the value of its /sys/devices/.../power/control
@@ -566,11 +582,6 @@ to do this is:
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
-The PM core always increments the run-time usage counter before calling the
-->prepare() callback and decrements it after calling the ->complete() callback.
-Hence disabling run-time PM temporarily like this will not cause any run-time
-suspend callbacks to be lost.
-
7. Generic subsystem callbacks
Subsystems may wish to conserve code space by using the set of generic power
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 1b5a5ddbc3e..5df176ed59b 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -9,7 +9,121 @@ If variable is of Type, use printk format specifier:
size_t %zu or %zx
ssize_t %zd or %zx
-Raw pointer value SHOULD be printed with %p.
+Raw pointer value SHOULD be printed with %p. The kernel supports
+the following extended format specifiers for pointer types:
+
+Symbols/Function Pointers:
+
+ %pF versatile_init+0x0/0x110
+ %pf versatile_init
+ %pS versatile_init+0x0/0x110
+ %ps versatile_init
+ %pB prev_fn_of_versatile_init+0x88/0x88
+
+ For printing symbols and function pointers. The 'S' and 's' specifiers
+ result in the symbol name with ('S') or without ('s') offsets. Where
+ this is used on a kernel without KALLSYMS - the symbol address is
+ printed instead.
+
+ The 'B' specifier results in the symbol name with offsets and should be
+ used when printing stack backtraces. The specifier takes into
+ consideration the effect of compiler optimisations which may occur
+ when tail-call's are used and marked with the noreturn GCC attribute.
+
+ On ia64, ppc64 and parisc64 architectures function pointers are
+ actually function descriptors which must first be resolved. The 'F' and
+ 'f' specifiers perform this resolution and then provide the same
+ functionality as the 'S' and 's' specifiers.
+
+Kernel Pointers:
+
+ %pK 0x01234567 or 0x0123456789abcdef
+
+ For printing kernel pointers which should be hidden from unprivileged
+ users. The behaviour of %pK depends on the kptr_restrict sysctl - see
+ Documentation/sysctl/kernel.txt for more details.
+
+Struct Resources:
+
+ %pr [mem 0x60000000-0x6fffffff flags 0x2200] or
+ [mem 0x0000000060000000-0x000000006fffffff flags 0x2200]
+ %pR [mem 0x60000000-0x6fffffff pref] or
+ [mem 0x0000000060000000-0x000000006fffffff pref]
+
+ For printing struct resources. The 'R' and 'r' specifiers result in a
+ printed resource with ('R') or without ('r') a decoded flags member.
+
+MAC/FDDI addresses:
+
+ %pM 00:01:02:03:04:05
+ %pMF 00-01-02-03-04-05
+ %pm 000102030405
+
+ For printing 6-byte MAC/FDDI addresses in hex notation. The 'M' and 'm'
+ specifiers result in a printed address with ('M') or without ('m') byte
+ separators. The default byte separator is the colon (':').
+
+ Where FDDI addresses are concerned the 'F' specifier can be used after
+ the 'M' specifier to use dash ('-') separators instead of the default
+ separator.
+
+IPv4 addresses:
+
+ %pI4 1.2.3.4
+ %pi4 001.002.003.004
+ %p[Ii][hnbl]
+
+ For printing IPv4 dot-separated decimal addresses. The 'I4' and 'i4'
+ specifiers result in a printed address with ('i4') or without ('I4')
+ leading zeros.
+
+ The additional 'h', 'n', 'b', and 'l' specifiers are used to specify
+ host, network, big or little endian order addresses respectively. Where
+ no specifier is provided the default network/big endian order is used.
+
+IPv6 addresses:
+
+ %pI6 0001:0002:0003:0004:0005:0006:0007:0008
+ %pi6 00010002000300040005000600070008
+ %pI6c 1:2:3:4:5:6:7:8
+
+ For printing IPv6 network-order 16-bit hex addresses. The 'I6' and 'i6'
+ specifiers result in a printed address with ('I6') or without ('i6')
+ colon-separators. Leading zeros are always used.
+
+ The additional 'c' specifier can be used with the 'I' specifier to
+ print a compressed IPv6 address as described by
+ http://tools.ietf.org/html/rfc5952
+
+UUID/GUID addresses:
+
+ %pUb 00010203-0405-0607-0809-0a0b0c0d0e0f
+ %pUB 00010203-0405-0607-0809-0A0B0C0D0E0F
+ %pUl 03020100-0504-0706-0809-0a0b0c0e0e0f
+ %pUL 03020100-0504-0706-0809-0A0B0C0E0E0F
+
+ For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L',
+ 'b' and 'B' specifiers are used to specify a little endian order in
+ lower ('l') or upper case ('L') hex characters - and big endian order
+ in lower ('b') or upper case ('B') hex characters.
+
+ Where no additional specifiers are used the default little endian
+ order with lower case hex characters will be printed.
+
+struct va_format:
+
+ %pV
+
+ For printing struct va_format structures. These contain a format string
+ and va_list as follows:
+
+ struct va_format {
+ const char *fmt;
+ va_list *va;
+ };
+
+ Do not use this feature without some mechanism to verify the
+ correctness of the format string and va_list arguments.
u64 SHOULD be printed with %llu/%llx, (unsigned long long):
@@ -32,4 +146,5 @@ Reminder: sizeof() result is of type size_t.
Thank you for your cooperation and attention.
-By Randy Dunlap <rdunlap@xenotime.net>
+By Randy Dunlap <rdunlap@xenotime.net> and
+Andrew Murray <amurray@mpc-data.co.uk>
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt
index 99961993257..91ecff07ced 100644
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ b/Documentation/scheduler/sched-design-CFS.txt
@@ -223,9 +223,10 @@ When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
group created using the pseudo filesystem. See example steps below to create
task groups and modify their CPU share using the "cgroups" pseudo filesystem.
- # mkdir /dev/cpuctl
- # mount -t cgroup -ocpu none /dev/cpuctl
- # cd /dev/cpuctl
+ # mount -t tmpfs cgroup_root /sys/fs/cgroup
+ # mkdir /sys/fs/cgroup/cpu
+ # mount -t cgroup -ocpu none /sys/fs/cgroup/cpu
+ # cd /sys/fs/cgroup/cpu
# mkdir multimedia # create "multimedia" group of tasks
# mkdir browser # create "browser" group of tasks
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 605b0d40329..71b54d54998 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -129,9 +129,8 @@ priority!
Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real
CPU bandwidth to task groups.
-This uses the /cgroup virtual file system and
-"/cgroup/<cgroup>/cpu.rt_runtime_us" to control the CPU time reserved for each
-control group.
+This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
+to control the CPU time reserved for each control group.
For more information on working with control groups, you should read
Documentation/cgroups/cgroups.txt as well.
@@ -150,7 +149,7 @@ For now, this can be simplified to just the following (but see Future plans):
===============
There is work in progress to make the scheduling period for each group
-("/cgroup/<cgroup>/cpu.rt_period_us") configurable as well.
+("<cgroup>/cpu.rt_period_us") configurable as well.
The constraint on the period is that a subgroup must have a smaller or
equal period to its parent. But realistically its not very useful _yet_
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 2e3c64b1a6a..9dbe885ecd8 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -13,18 +13,8 @@ static DEFINE_SPINLOCK(xxx_lock);
The above is always safe. It will disable interrupts _locally_, but the
spinlock itself will guarantee the global lock, so it will guarantee that
there is only one thread-of-control within the region(s) protected by that
-lock. This works well even under UP. The above sequence under UP
-essentially is just the same as doing
-
- unsigned long flags;
-
- save_flags(flags); cli();
- ... critical section ...
- restore_flags(flags);
-
-so the code does _not_ need to worry about UP vs SMP issues: the spinlocks
-work correctly under both (and spinlocks are actually more efficient on
-architectures that allow doing the "save_flags + cli" in one operation).
+lock. This works well even under UP also, so the code does _not_ need to
+worry about UP vs SMP issues: the spinlocks work correctly under both.
NOTE! Implications of spin_locks for memory are further described in:
@@ -36,27 +26,7 @@ The above is usually pretty simple (you usually need and want only one
spinlock for most things - using more than one spinlock can make things a
lot more complex and even slower and is usually worth it only for
sequences that you _know_ need to be split up: avoid it at all cost if you
-aren't sure). HOWEVER, it _does_ mean that if you have some code that does
-
- cli();
- .. critical section ..
- sti();
-
-and another sequence that does
-
- spin_lock_irqsave(flags);
- .. critical section ..
- spin_unlock_irqrestore(flags);
-
-then they are NOT mutually exclusive, and the critical regions can happen
-at the same time on two different CPU's. That's fine per se, but the
-critical regions had better be critical for different things (ie they
-can't stomp on each other).
-
-The above is a problem mainly if you end up mixing code - for example the
-routines in ll_rw_block() tend to use cli/sti to protect the atomicity of
-their actions, and if a driver uses spinlocks instead then you should
-think about issues like the above.
+aren't sure).
This is really the only really hard part about spinlocks: once you start
using spinlocks they tend to expand to areas you might not have noticed
@@ -120,11 +90,10 @@ Lesson 3: spinlocks revisited.
The single spin-lock primitives above are by no means the only ones. They
are the most safe ones, and the ones that work under all circumstances,
-but partly _because_ they are safe they are also fairly slow. They are
-much faster than a generic global cli/sti pair, but slower than they'd
-need to be, because they do have to disable interrupts (which is just a
-single instruction on a x86, but it's an expensive one - and on other
-architectures it can be worse).
+but partly _because_ they are safe they are also fairly slow. They are slower
+than they'd need to be, because they do have to disable interrupts
+(which is just a single instruction on a x86, but it's an expensive one -
+and on other architectures it can be worse).
If you have a case where you have to protect a data structure across
several CPU's and you want to use spinlocks you can potentially use
diff --git a/Documentation/usb/error-codes.txt b/Documentation/usb/error-codes.txt
index d83703ea74b..b3f606b81a0 100644
--- a/Documentation/usb/error-codes.txt
+++ b/Documentation/usb/error-codes.txt
@@ -76,6 +76,13 @@ A transfer's actual_length may be positive even when an error has been
reported. That's because transfers often involve several packets, so that
one or more packets could finish before an error stops further endpoint I/O.
+For isochronous URBs, the urb status value is non-zero only if the URB is
+unlinked, the device is removed, the host controller is disabled, or the total
+transferred length is less than the requested length and the URB_SHORT_NOT_OK
+flag is set. Completion handlers for isochronous URBs should only see
+urb->status set to zero, -ENOENT, -ECONNRESET, -ESHUTDOWN, or -EREMOTEIO.
+Individual frame descriptor status fields may report more status codes.
+
0 Transfer completed successfully
@@ -132,7 +139,7 @@ one or more packets could finish before an error stops further endpoint I/O.
device removal events immediately.
-EXDEV ISO transfer only partially completed
- look at individual frame status for details
+ (only set in iso_frame_desc[n].status, not urb->status)
-EINVAL ISO madness, if this happens: Log off and go home
diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile
index bebac6b4f33..0ac34206f7a 100644
--- a/Documentation/virtual/lguest/Makefile
+++ b/Documentation/virtual/lguest/Makefile
@@ -1,5 +1,5 @@
# This creates the demonstration utility "lguest" which runs a Linux guest.
-# Missing headers? Add "-I../../include -I../../arch/x86/include"
+# Missing headers? Add "-I../../../include -I../../../arch/x86/include"
CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
all: lguest
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
index d9da7e14853..cd9d6af61d0 100644
--- a/Documentation/virtual/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c
@@ -49,7 +49,7 @@
#include <linux/virtio_rng.h>
#include <linux/virtio_ring.h>
#include <asm/bootparam.h>
-#include "../../include/linux/lguest_launcher.h"
+#include "../../../include/linux/lguest_launcher.h"
/*L:110
* We can ignore the 42 include files we need for this program, but I do want
* to draw attention to the use of kernel-style types.
@@ -135,9 +135,6 @@ struct device {
/* Is it operational */
bool running;
- /* Does Guest want an intrrupt on empty? */
- bool irq_on_empty;
-
/* Device-specific data. */
void *priv;
};
@@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq)
/* If they don't want an interrupt, don't send one... */
if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
- /* ... unless they've asked us to force one on empty. */
- if (!vq->dev->irq_on_empty
- || lg_last_avail(vq) != vq->vring.avail->idx)
- return;
+ return;
}
/* Send the Guest an interrupt tell them we used something up. */
@@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq)
close(vq->eventfd);
}
-static bool accepted_feature(struct device *dev, unsigned int bit)
-{
- const u8 *features = get_feature_bits(dev) + dev->feature_len;
-
- if (dev->feature_len < bit / CHAR_BIT)
- return false;
- return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT));
-}
-
static void start_device(struct device *dev)
{
unsigned int i;
@@ -1079,8 +1064,6 @@ static void start_device(struct device *dev)
verbose(" %02x", get_feature_bits(dev)
[dev->feature_len+i]);
- dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
-
for (vq = dev->vq; vq; vq = vq->next) {
if (vq->service)
create_thread(vq);
@@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg)
/* Set up the tun device. */
configure_device(ipfd, tapif, ip);
- add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
/* Expect Guest to handle everything except UFO */
add_feature(dev, VIRTIO_NET_F_CSUM);
add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
index 12f9ba20ccb..55006846660 100644
--- a/Documentation/vm/hwpoison.txt
+++ b/Documentation/vm/hwpoison.txt
@@ -129,12 +129,12 @@ Limit injection to pages owned by memgroup. Specified by inode number
of the memcg.
Example:
- mkdir /cgroup/hwpoison
+ mkdir /sys/fs/cgroup/mem/hwpoison
usemem -m 100 -s 1000 &
- echo `jobs -p` > /cgroup/hwpoison/tasks
+ echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
- memcg_ino=$(ls -id /cgroup/hwpoison | cut -f1 -d' ')
+ memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
page-types -p `pidof init` --hwpoison # shall do nothing
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 9b7221a86df..7c3a8801b7c 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -674,7 +674,7 @@ Protocol: 2.10+
Field name: init_size
Type: read
-Offset/size: 0x25c/4
+Offset/size: 0x260/4
This field indicates the amount of linear contiguous memory starting
at the kernel runtime start address that the kernel needs before it