summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-02-17 15:53:02 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2017-02-17 15:53:04 +1100
commit6641ce8e95e9cb5c678cf882f9eb7c7632fa1a2c (patch)
treee36b518353cf1b36c20a8b315de17ea8ee577c39
parent200445685b48a1f4eb8e4abb4ae46f22236b1197 (diff)
parent1224d22e9f1c57618e6db3737c17f8a1e0927d18 (diff)
Merge branch 'akpm-current/current'
-rw-r--r--Documentation/ABI/obsolete/sysfs-block-zram119
-rw-r--r--Documentation/ABI/testing/sysfs-block-zram101
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt8
-rw-r--r--Documentation/blockdev/zram.txt74
-rw-r--r--Documentation/filesystems/autofs4-mount-control.txt1
-rw-r--r--Documentation/filesystems/autofs4.txt39
-rw-r--r--Documentation/sysctl/vm.txt4
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl26
-rw-r--r--Documentation/vm/ksm.txt18
-rw-r--r--Documentation/vm/transhuge.txt8
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/include/asm/Kbuild2
-rw-r--r--arch/alpha/include/asm/current.h9
-rw-r--r--arch/arc/include/asm/kprobes.h6
-rw-r--r--arch/arm/include/asm/kprobes.h4
-rw-r--r--arch/arm/include/asm/page.h2
-rw-r--r--arch/arm/mm/dma-mapping.c16
-rw-r--r--arch/arm/probes/decode.h1
-rw-r--r--arch/arm64/include/asm/kprobes.h4
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c1
-rw-r--r--arch/arm64/kernel/insn.c1
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h2
-rw-r--r--arch/arm64/mm/dma-mapping.c4
-rw-r--r--arch/avr32/include/asm/kprobes.h7
-rw-r--r--arch/blackfin/include/asm/Kbuild1
-rw-r--r--arch/c6x/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/Kbuild2
-rw-r--r--arch/cris/include/asm/current.h15
-rw-r--r--arch/frv/include/asm/Kbuild1
-rw-r--r--arch/frv/mb93090-mb00/pci-frv.c11
-rw-r--r--arch/h8300/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/ia64/include/asm/kprobes.h12
-rw-r--r--arch/ia64/mm/init.c48
-rw-r--r--arch/m32r/include/asm/Kbuild2
-rw-r--r--arch/m32r/include/asm/cmpxchg.h15
-rw-r--r--arch/m32r/include/asm/current.h15
-rw-r--r--arch/m68k/68000/bootlogo-vz.h4
-rw-r--r--arch/m68k/68000/bootlogo.h4
-rw-r--r--arch/m68k/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/asm/MC68328.h3
-rw-r--r--arch/m68k/include/asm/MC68EZ328.h3
-rw-r--r--arch/m68k/include/asm/MC68VZ328.h2
-rw-r--r--arch/m68k/include/asm/natfeat.h3
-rw-r--r--arch/m68k/lib/ashldi3.c8
-rw-r--r--arch/m68k/lib/ashrdi3.c8
-rw-r--r--arch/m68k/lib/lshrdi3.c8
-rw-r--r--arch/m68k/lib/muldi3.c8
-rw-r--r--arch/metag/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/kprobes.h6
-rw-r--r--arch/mips/kernel/vdso.c2
-rw-r--r--arch/mips/mm/dma-default.c4
-rw-r--r--arch/mn10300/include/asm/kprobes.h7
-rw-r--r--arch/nios2/include/asm/Kbuild1
-rw-r--r--arch/openrisc/include/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/Kbuild2
-rw-r--r--arch/parisc/include/asm/current.h15
-rw-r--r--arch/parisc/mm/init.c49
-rw-r--r--arch/powerpc/include/asm/kprobes.h3
-rw-r--r--arch/powerpc/include/asm/page.h4
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c3
-rw-r--r--arch/powerpc/lib/code-patching.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c39
-rw-r--r--arch/powerpc/xmon/xmon.c2
-rw-r--r--arch/s390/include/asm/kprobes.h7
-rw-r--r--arch/s390/kernel/crash_dump.c1
-rw-r--r--arch/s390/mm/gmap.c2
-rw-r--r--arch/score/include/asm/Kbuild2
-rw-r--r--arch/score/include/asm/current.h6
-rw-r--r--arch/sh/include/asm/kprobes.h5
-rw-r--r--arch/sparc/include/asm/kprobes.h10
-rw-r--r--arch/sparc/kernel/setup_32.c2
-rw-r--r--arch/sparc/mm/init_32.c11
-rw-r--r--arch/tile/include/asm/kprobes.h6
-rw-r--r--arch/tile/mm/elf.c2
-rw-r--r--arch/tile/mm/pgtable.c45
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/unicore32/include/asm/Kbuild1
-rw-r--r--arch/unicore32/mm/init.c44
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/include/asm/cacheflush.h10
-rw-r--r--arch/x86/include/asm/kprobes.h9
-rw-r--r--arch/x86/include/asm/paravirt.h11
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/pgtable-2level.h17
-rw-r--r--arch/x86/include/asm/pgtable-3level.h28
-rw-r--r--arch/x86/include/asm/pgtable.h140
-rw-r--r--arch/x86/include/asm/pgtable_64.h15
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/pci-dma.c3
-rw-r--r--arch/x86/kernel/test_rodata.c75
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c22
-rw-r--r--arch/x86/mm/mpx.c6
-rw-r--r--arch/x86/mm/pgtable.c31
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--arch/xtensa/kernel/pci-dma.c3
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioprio.c8
-rw-r--r--crypto/lz4.c23
-rw-r--r--crypto/lz4hc.c23
-rw-r--r--crypto/testmgr.h142
-rw-r--r--drivers/android/binder.c4
-rw-r--r--drivers/base/dma-contiguous.c5
-rw-r--r--drivers/base/memory.c26
-rw-r--r--drivers/block/zram/zram_drv.c228
-rw-r--r--drivers/block/zram/zram_drv.h12
-rw-r--r--drivers/char/agp/alpha-agp.c5
-rw-r--r--drivers/char/mspec.c6
-rw-r--r--drivers/dax/dax.c110
-rw-r--r--drivers/gpu/drm/armada/armada_gem.c9
-rw-r--r--drivers/gpu/drm/drm_vm.c36
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.h2
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.c3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.c3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.h2
-rw-r--r--drivers/gpu/drm/gma500/framebuffer.c3
-rw-r--r--drivers/gpu/drm/gma500/gem.c3
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c4
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h2
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c3
-rw-r--r--drivers/gpu/drm/omapdrm/omap_drv.h2
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem.c4
-rw-r--r--drivers/gpu/drm/qxl/qxl_ttm.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c6
-rw-r--r--drivers/gpu/drm/tegra/gem.c3
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c8
-rw-r--r--drivers/gpu/drm/udl/udl_drv.h2
-rw-r--r--drivers/gpu/drm/udl/udl_gem.c3
-rw-r--r--drivers/gpu/drm/vgem/vgem_drv.c3
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_ttm.c7
-rw-r--r--drivers/hsi/clients/cmt_speech.c4
-rw-r--r--drivers/hwtracing/intel_th/msu.c6
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/iommu/intel-iommu.c2
-rw-r--r--drivers/media/v4l2-core/videobuf-dma-sg.c3
-rw-r--r--drivers/misc/cxl/context.c3
-rw-r--r--drivers/misc/sgi-gru/grumain.c3
-rw-r--r--drivers/misc/sgi-gru/grutables.h2
-rw-r--r--drivers/net/ethernet/sgi/ioc3-eth.c2
-rw-r--r--drivers/nvdimm/pfn_devs.c42
-rw-r--r--drivers/rapidio/devices/rio_mport_cdev.c11
-rw-r--r--drivers/scsi/cxlflash/superpipe.c6
-rw-r--r--drivers/scsi/sg.c3
-rw-r--r--drivers/staging/android/ion/ion.c9
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c7
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c2
-rw-r--r--drivers/target/target_core_user.c6
-rw-r--r--drivers/tty/sysrq.c2
-rw-r--r--drivers/tty/vt/keyboard.c2
-rw-r--r--drivers/uio/uio.c6
-rw-r--r--drivers/usb/mon/mon_bin.c4
-rw-r--r--drivers/video/fbdev/core/fb_defio.c16
-rw-r--r--drivers/xen/privcmd.c4
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/affs/affs.h22
-rw-r--r--fs/affs/amigaffs.c42
-rw-r--r--fs/affs/inode.c9
-rw-r--r--fs/affs/namei.c95
-rw-r--r--fs/affs/super.c3
-rw-r--r--fs/aio.c2
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/root.c17
-rw-r--r--fs/binfmt_elf.c30
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/dax.c159
-rw-r--r--fs/ext2/file.c19
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/file.c41
-rw-r--r--fs/ext4/inode.c9
-rw-r--r--fs/f2fs/file.c7
-rw-r--r--fs/fuse/file.c6
-rw-r--r--fs/gfs2/file.c8
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/iomap.c5
-rw-r--r--fs/kernfs/file.c13
-rw-r--r--fs/ncpfs/mmap.c7
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nilfs2/file.c3
-rw-r--r--fs/ocfs2/acl.c29
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c66
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c40
-rw-r--r--fs/ocfs2/dlmglue.c105
-rw-r--r--fs/ocfs2/dlmglue.h18
-rw-r--r--fs/ocfs2/file.c58
-rw-r--r--fs/ocfs2/mmap.c15
-rw-r--r--fs/ocfs2/ocfs2.h1
-rw-r--r--fs/proc/base.c144
-rw-r--r--fs/proc/generic.c11
-rw-r--r--fs/proc/kcore.c5
-rw-r--r--fs/proc/vmcore.c8
-rw-r--r--fs/pstore/platform.c22
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/squashfs/lz4_wrapper.c12
-rw-r--r--fs/ubifs/file.c5
-rw-r--r--fs/userfaultfd.c556
-rw-r--r--fs/xfs/xfs_file.c49
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--include/asm-generic/kprobes.h25
-rw-r--r--include/asm-generic/pgtable.h80
-rw-r--r--include/asm-generic/tlb.h14
-rw-r--r--include/linux/bug.h12
-rw-r--r--include/linux/cma.h3
-rw-r--r--include/linux/compat.h4
-rw-r--r--include/linux/compiler-gcc.h1
-rw-r--r--include/linux/compiler.h8
-rw-r--r--include/linux/dax.h14
-rw-r--r--include/linux/dma-contiguous.h4
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/huge_mm.h84
-rw-r--r--include/linux/hugetlb.h12
-rw-r--r--include/linux/iomap.h3
-rw-r--r--include/linux/iopoll.h2
-rw-r--r--include/linux/kasan.h4
-rw-r--r--include/linux/kernel.h10
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/kprobes.h19
-rw-r--r--include/linux/lz4.h701
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/memory.h4
-rw-r--r--include/linux/memory_hotplug.h6
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mm.h129
-rw-r--r--include/linux/mm_inline.h7
-rw-r--r--include/linux/mmu_notifier.h14
-rw-r--r--include/linux/mmzone.h38
-rw-r--r--include/linux/pagemap.h13
-rw-r--r--include/linux/pfn_t.h18
-rw-r--r--include/linux/pid.h4
-rw-r--r--include/linux/rbtree_augmented.h4
-rw-r--r--include/linux/rmap.h52
-rw-r--r--include/linux/rodata_test.h24
-rw-r--r--include/linux/sem.h2
-rw-r--r--include/linux/shmem_fs.h11
-rw-r--r--include/linux/slab.h45
-rw-r--r--include/linux/slub_def.h4
-rw-r--r--include/linux/swap.h30
-rw-r--r--include/linux/swap_slots.h30
-rw-r--r--include/linux/trace_events.h4
-rw-r--r--include/linux/userfaultfd_k.h67
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/linux/writeback.h2
-rw-r--r--include/trace/events/compaction.h60
-rw-r--r--include/trace/events/fs_dax.h156
-rw-r--r--include/trace/events/mmflags.h98
-rw-r--r--include/trace/events/oom.h81
-rw-r--r--include/trace/events/vmscan.h150
-rw-r--r--include/trace/events/writeback.h2
-rw-r--r--include/trace/trace_events.h11
-rw-r--r--include/uapi/asm-generic/ioctl.h10
-rw-r--r--include/uapi/linux/auto_dev-ioctl.h10
-rw-r--r--include/uapi/linux/auto_fs.h25
-rw-r--r--include/uapi/linux/auto_fs4.h16
-rw-r--r--include/uapi/linux/mqueue.h2
-rw-r--r--include/uapi/linux/userfaultfd.h73
-rw-r--r--init/Kconfig14
-rw-r--r--init/initramfs.c2
-rw-r--r--init/main.c9
-rw-r--r--ipc/mqueue.c1
-rw-r--r--ipc/sem.c109
-rw-r--r--ipc/shm.c27
-rw-r--r--kernel/configs/android-base.config2
-rw-r--r--kernel/configs/android-recommended.config1
-rw-r--r--kernel/events/core.c6
-rw-r--r--kernel/events/uprobes.c26
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/memremap.c74
-rw-r--r--kernel/notifier.c2
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/trace/trace_output.c38
-rw-r--r--kernel/watchdog_hld.c25
-rw-r--r--lib/Kconfig6
-rw-r--r--lib/Kconfig.debug13
-rw-r--r--lib/Makefile3
-rw-r--r--lib/atomic64_test.c10
-rw-r--r--lib/crc32.c824
-rw-r--r--lib/crc32test.c856
-rw-r--r--lib/decompress_unlz4.c13
-rw-r--r--lib/dma-debug.c5
-rw-r--r--lib/find_bit.c4
-rw-r--r--lib/fonts/Kconfig16
-rw-r--r--lib/glob.c164
-rw-r--r--lib/globtest.c167
-rw-r--r--lib/list_debug.c45
-rw-r--r--lib/lz4/lz4_compress.c1094
-rw-r--r--lib/lz4/lz4_decompress.c656
-rw-r--r--lib/lz4/lz4defs.h334
-rw-r--r--lib/lz4/lz4hc_compress.c846
-rw-r--r--lib/rbtree.c4
-rw-r--r--lib/scatterlist.c28
-rw-r--r--lib/show_mem.c4
-rw-r--r--lib/sort.c41
-rw-r--r--lib/test_kasan.c34
-rw-r--r--lib/test_sort.c44
-rw-r--r--mm/Kconfig1
-rw-r--r--mm/Kconfig.debug7
-rw-r--r--mm/Makefile9
-rw-r--r--mm/backing-dev.c4
-rw-r--r--mm/bootmem.c2
-rw-r--r--mm/cma.c40
-rw-r--r--mm/cma_debug.c2
-rw-r--r--mm/compaction.c31
-rw-r--r--mm/filemap.c31
-rw-r--r--mm/gup.c9
-rw-r--r--mm/huge_memory.c420
-rw-r--r--mm/hugetlb.c167
-rw-r--r--mm/internal.h20
-rw-r--r--mm/kasan/kasan.c2
-rw-r--r--mm/kasan/quarantine.c1
-rw-r--r--mm/ksm.c102
-rw-r--r--mm/madvise.c60
-rw-r--r--mm/memblock.c118
-rw-r--r--mm/memcontrol.c23
-rw-r--r--mm/memory-failure.c26
-rw-r--r--mm/memory.c231
-rw-r--r--mm/memory_hotplug.c133
-rw-r--r--mm/migrate.c108
-rw-r--r--mm/mmap.c90
-rw-r--r--mm/mmzone.c2
-rw-r--r--mm/mprotect.c44
-rw-r--r--mm/mremap.c30
-rw-r--r--mm/nommu.c17
-rw-r--r--mm/oom_kill.c31
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/page_alloc.c671
-rw-r--r--mm/page_idle.c34
-rw-r--r--mm/page_isolation.c10
-rw-r--r--mm/page_owner.c4
-rw-r--r--mm/page_vma_mapped.c218
-rw-r--r--mm/pagewalk.c20
-rw-r--r--mm/pgtable-generic.c14
-rw-r--r--mm/rmap.c574
-rw-r--r--mm/rodata_test.c56
-rw-r--r--mm/shmem.c154
-rw-r--r--mm/slab.c10
-rw-r--r--mm/slab.h28
-rw-r--r--mm/slab_common.c303
-rw-r--r--mm/slub.c85
-rw-r--r--mm/sparse-vmemmap.c24
-rw-r--r--mm/sparse.c466
-rw-r--r--mm/swap.c15
-rw-r--r--mm/swap_slots.c342
-rw-r--r--mm/swap_state.c80
-rw-r--r--mm/swapfile.c521
-rw-r--r--mm/userfaultfd.c245
-rw-r--r--mm/util.c5
-rw-r--r--mm/vmalloc.c11
-rw-r--r--mm/vmscan.c327
-rw-r--r--mm/vmstat.c2
-rw-r--r--mm/workingset.c2
-rw-r--r--mm/z3fold.c389
-rw-r--r--mm/zsmalloc.c6
-rw-r--r--mm/zswap.c109
-rwxr-xr-xscripts/Lindent14
-rwxr-xr-xscripts/checkincludes.pl8
-rwxr-xr-xscripts/checkpatch.pl31
-rwxr-xr-xscripts/checkstack.pl3
-rw-r--r--scripts/gdb/linux/constants.py.in7
-rw-r--r--scripts/gdb/linux/proc.py73
-rw-r--r--scripts/spelling.txt43
-rwxr-xr-xscripts/tags.sh2
-rw-r--r--security/selinux/selinuxfs.c5
-rw-r--r--sound/core/pcm_native.c15
-rw-r--r--sound/usb/usx2y/us122l.c5
-rw-r--r--sound/usb/usx2y/usX2Yhwdep.c7
-rw-r--r--sound/usb/usx2y/usx2yhwdeppcm.c5
-rw-r--r--tools/lib/find_bit.c2
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c7
-rw-r--r--tools/testing/selftests/vm/Makefile4
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests24
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c484
-rw-r--r--tools/vm/Makefile8
-rw-r--r--virt/kvm/kvm_main.c4
393 files changed, 12652 insertions, 6250 deletions
diff --git a/Documentation/ABI/obsolete/sysfs-block-zram b/Documentation/ABI/obsolete/sysfs-block-zram
deleted file mode 100644
index 720ea92cfb2e..000000000000
--- a/Documentation/ABI/obsolete/sysfs-block-zram
+++ /dev/null
@@ -1,119 +0,0 @@
-What: /sys/block/zram<id>/num_reads
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The num_reads file is read-only and specifies the number of
- reads (failed or successful) done on this device.
- Now accessible via zram<id>/stat node.
-
-What: /sys/block/zram<id>/num_writes
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The num_writes file is read-only and specifies the number of
- writes (failed or successful) done on this device.
- Now accessible via zram<id>/stat node.
-
-What: /sys/block/zram<id>/invalid_io
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The invalid_io file is read-only and specifies the number of
- non-page-size-aligned I/O requests issued to this device.
- Now accessible via zram<id>/io_stat node.
-
-What: /sys/block/zram<id>/failed_reads
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The failed_reads file is read-only and specifies the number of
- failed reads happened on this device.
- Now accessible via zram<id>/io_stat node.
-
-What: /sys/block/zram<id>/failed_writes
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The failed_writes file is read-only and specifies the number of
- failed writes happened on this device.
- Now accessible via zram<id>/io_stat node.
-
-What: /sys/block/zram<id>/notify_free
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The notify_free file is read-only. Depending on device usage
- scenario it may account a) the number of pages freed because
- of swap slot free notifications or b) the number of pages freed
- because of REQ_DISCARD requests sent by bio. The former ones
- are sent to a swap block device when a swap slot is freed, which
- implies that this disk is being used as a swap disk. The latter
- ones are sent by filesystem mounted with discard option,
- whenever some data blocks are getting discarded.
- Now accessible via zram<id>/io_stat node.
-
-What: /sys/block/zram<id>/zero_pages
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The zero_pages file is read-only and specifies number of zero
- filled pages written to this disk. No memory is allocated for
- such pages.
- Now accessible via zram<id>/mm_stat node.
-
-What: /sys/block/zram<id>/orig_data_size
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The orig_data_size file is read-only and specifies uncompressed
- size of data stored in this disk. This excludes zero-filled
- pages (zero_pages) since no memory is allocated for them.
- Unit: bytes
- Now accessible via zram<id>/mm_stat node.
-
-What: /sys/block/zram<id>/compr_data_size
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The compr_data_size file is read-only and specifies compressed
- size of data stored in this disk. So, compression ratio can be
- calculated using orig_data_size and this statistic.
- Unit: bytes
- Now accessible via zram<id>/mm_stat node.
-
-What: /sys/block/zram<id>/mem_used_total
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The mem_used_total file is read-only and specifies the amount
- of memory, including allocator fragmentation and metadata
- overhead, allocated for this disk. So, allocator space
- efficiency can be calculated using compr_data_size and this
- statistic.
- Unit: bytes
- Now accessible via zram<id>/mm_stat node.
-
-What: /sys/block/zram<id>/mem_used_max
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The mem_used_max file is read/write and specifies the amount
- of maximum memory zram have consumed to store compressed data.
- For resetting the value, you should write "0". Otherwise,
- you could see -EINVAL.
- Unit: bytes
- Downgraded to write-only node: so it's possible to set new
- value only; its current value is stored in zram<id>/mm_stat
- node.
-
-What: /sys/block/zram<id>/mem_limit
-Date: August 2015
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The mem_limit file is read/write and specifies the maximum
- amount of memory ZRAM can use to store the compressed data.
- The limit could be changed in run time and "0" means disable
- the limit. No limit is the initial state. Unit: bytes
- Downgraded to write-only node: so it's possible to set new
- value only; its current value is stored in zram<id>/mm_stat
- node.
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index 4518d30b8c2e..451b6d882b2c 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -22,41 +22,6 @@ Description:
device. The reset operation frees all the memory associated
with this device.
-What: /sys/block/zram<id>/num_reads
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The num_reads file is read-only and specifies the number of
- reads (failed or successful) done on this device.
-
-What: /sys/block/zram<id>/num_writes
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The num_writes file is read-only and specifies the number of
- writes (failed or successful) done on this device.
-
-What: /sys/block/zram<id>/invalid_io
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The invalid_io file is read-only and specifies the number of
- non-page-size-aligned I/O requests issued to this device.
-
-What: /sys/block/zram<id>/failed_reads
-Date: February 2014
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The failed_reads file is read-only and specifies the number of
- failed reads happened on this device.
-
-What: /sys/block/zram<id>/failed_writes
-Date: February 2014
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The failed_writes file is read-only and specifies the number of
- failed writes happened on this device.
-
What: /sys/block/zram<id>/max_comp_streams
Date: February 2014
Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
@@ -73,74 +38,24 @@ Description:
available and selected compression algorithms, change
compression algorithm selection.
-What: /sys/block/zram<id>/notify_free
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The notify_free file is read-only. Depending on device usage
- scenario it may account a) the number of pages freed because
- of swap slot free notifications or b) the number of pages freed
- because of REQ_DISCARD requests sent by bio. The former ones
- are sent to a swap block device when a swap slot is freed, which
- implies that this disk is being used as a swap disk. The latter
- ones are sent by filesystem mounted with discard option,
- whenever some data blocks are getting discarded.
-
-What: /sys/block/zram<id>/zero_pages
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The zero_pages file is read-only and specifies number of zero
- filled pages written to this disk. No memory is allocated for
- such pages.
-
-What: /sys/block/zram<id>/orig_data_size
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The orig_data_size file is read-only and specifies uncompressed
- size of data stored in this disk. This excludes zero-filled
- pages (zero_pages) since no memory is allocated for them.
- Unit: bytes
-
-What: /sys/block/zram<id>/compr_data_size
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The compr_data_size file is read-only and specifies compressed
- size of data stored in this disk. So, compression ratio can be
- calculated using orig_data_size and this statistic.
- Unit: bytes
-
-What: /sys/block/zram<id>/mem_used_total
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The mem_used_total file is read-only and specifies the amount
- of memory, including allocator fragmentation and metadata
- overhead, allocated for this disk. So, allocator space
- efficiency can be calculated using compr_data_size and this
- statistic.
- Unit: bytes
-
What: /sys/block/zram<id>/mem_used_max
Date: August 2014
Contact: Minchan Kim <minchan@kernel.org>
Description:
- The mem_used_max file is read/write and specifies the amount
- of maximum memory zram have consumed to store compressed data.
- For resetting the value, you should write "0". Otherwise,
- you could see -EINVAL.
+ The mem_used_max file is write-only and is used to reset
+ the counter of maximum memory zram have consumed to store
+ compressed data. For resetting the value, you should write
+ "0". Otherwise, you could see -EINVAL.
Unit: bytes
What: /sys/block/zram<id>/mem_limit
Date: August 2014
Contact: Minchan Kim <minchan@kernel.org>
Description:
- The mem_limit file is read/write and specifies the maximum
- amount of memory ZRAM can use to store the compressed data. The
- limit could be changed in run time and "0" means disable the
- limit. No limit is the initial state. Unit: bytes
+ The mem_limit file is write-only and specifies the maximum
+ amount of memory ZRAM can use to store the compressed data.
+ The limit could be changed in run time and "0" means disable
+ the limit. No limit is the initial state. Unit: bytes
What: /sys/block/zram<id>/compact
Date: August 2015
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d05533e6120b..986e44387dad 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3694,6 +3694,14 @@
last alloc / free. For more information see
Documentation/vm/slub.txt.
+ slub_memcg_sysfs= [MM, SLUB]
+ Determines whether to enable sysfs directories for
+ memory cgroup sub-caches. 1 to enable, 0 to disable.
+ The default is determined by CONFIG_SLUB_MEMCG_SYSFS_ON.
+ Enabling this can lead to a very high number of debug
+ directories and files being created under
+ /sys/kernel/slub.
+
slub_max_order= [MM, SLUB]
Determines the maximum allowed order for slabs.
A high setting may cause OOMs due to memory
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 0535ae1f73e5..4fced8a21307 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -161,42 +161,14 @@ Name access description
disksize RW show and set the device's disk size
initstate RO shows the initialization state of the device
reset WO trigger device reset
-num_reads RO the number of reads
-failed_reads RO the number of failed reads
-num_write RO the number of writes
-failed_writes RO the number of failed writes
-invalid_io RO the number of non-page-size-aligned I/O requests
+mem_used_max WO reset the `mem_used_max' counter (see later)
+mem_limit WO specifies the maximum amount of memory ZRAM can use
+ to store the compressed data
max_comp_streams RW the number of possible concurrent compress operations
comp_algorithm RW show and change the compression algorithm
-notify_free RO the number of notifications to free pages (either
- slot free notifications or REQ_DISCARD requests)
-zero_pages RO the number of zero filled pages written to this disk
-orig_data_size RO uncompressed size of data stored in this disk
-compr_data_size RO compressed size of data stored in this disk
-mem_used_total RO the amount of memory allocated for this disk
-mem_used_max RW the maximum amount of memory zram have consumed to
- store the data (to reset this counter to the actual
- current value, write 1 to this attribute)
-mem_limit RW the maximum amount of memory ZRAM can use to store
- the compressed data
-pages_compacted RO the number of pages freed during compaction
- (available only via zram<id>/mm_stat node)
compact WO trigger memory compaction
debug_stat RO this file is used for zram debugging purposes
-WARNING
-=======
-per-stat sysfs attributes are considered to be deprecated.
-The basic strategy is:
--- the existing RW nodes will be downgraded to WO nodes (in linux 4.11)
--- deprecated RO sysfs nodes will eventually be removed (in linux 4.11)
-
-The list of deprecated attributes can be found here:
-Documentation/ABI/obsolete/sysfs-block-zram
-
-Basically, every attribute that has its own read accessible sysfs node
-(e.g. num_reads) *AND* is accessible via one of the stat files (zram<id>/stat
-or zram<id>/io_stat or zram<id>/mm_stat) is considered to be deprecated.
User space is advised to use the following files to read the device statistics.
@@ -211,22 +183,40 @@ The stat file represents device's I/O statistics not accounted by block
layer and, thus, not available in zram<id>/stat file. It consists of a
single line of text and contains the following stats separated by
whitespace:
- failed_reads
- failed_writes
- invalid_io
- notify_free
+ failed_reads the number of failed reads
+ failed_writes the number of failed writes
+ invalid_io the number of non-page-size-aligned I/O requests
+ notify_free Depending on device usage scenario it may account
+ a) the number of pages freed because of swap slot free
+ notifications or b) the number of pages freed because of
+ REQ_DISCARD requests sent by bio. The former ones are
+ sent to a swap block device when a swap slot is freed,
+ which implies that this disk is being used as a swap disk.
+ The latter ones are sent by filesystem mounted with
+ discard option, whenever some data blocks are getting
+ discarded.
File /sys/block/zram<id>/mm_stat
The stat file represents device's mm statistics. It consists of a single
line of text and contains the following stats separated by whitespace:
- orig_data_size
- compr_data_size
- mem_used_total
- mem_limit
- mem_used_max
- zero_pages
- num_migrated
+ orig_data_size uncompressed size of data stored in this disk.
+ This excludes same-element-filled pages (same_pages) since
+ no memory is allocated for them.
+ Unit: bytes
+ compr_data_size compressed size of data stored in this disk
+ mem_used_total the amount of memory allocated for this disk. This
+ includes allocator fragmentation and metadata overhead,
+ allocated for this disk. So, allocator space efficiency
+ can be calculated using compr_data_size and this statistic.
+ Unit: bytes
+ mem_limit the maximum amount of memory ZRAM can use to store
+ the compressed data
+ mem_used_max the maximum amount of memory zram have consumed to
+ store the data
+ same_pages the number of same element filled pages written to this disk.
+ No memory is allocated for such pages.
+ pages_compacted the number of pages freed during compaction
9) Deactivate:
swapoff /dev/zram0
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt
index 50a3e01a36f8..e5177cb31a04 100644
--- a/Documentation/filesystems/autofs4-mount-control.txt
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -179,6 +179,7 @@ struct autofs_dev_ioctl {
* including this struct */
__s32 ioctlfd; /* automount command fd */
+ /* Command parameters */
union {
struct args_protover protover;
struct args_protosubver protosubver;
diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt
index 8fac3fe7b8c9..f10dd590f69f 100644
--- a/Documentation/filesystems/autofs4.txt
+++ b/Documentation/filesystems/autofs4.txt
@@ -65,7 +65,7 @@ directory is a mount trap only if the filesystem is mounted *direct*
and the root is empty.
Directories created in the root directory are mount traps only if the
-filesystem is mounted *indirect* and they are empty.
+filesystem is mounted *indirect* and they are empty.
Directories further down the tree depend on the *maxproto* mount
option and particularly whether it is less than five or not.
@@ -352,7 +352,7 @@ Communicating with autofs: root directory ioctls
------------------------------------------------
The root directory of an autofs filesystem will respond to a number of
-ioctls. The process issuing the ioctl must have the CAP_SYS_ADMIN
+ioctls. The process issuing the ioctl must have the CAP_SYS_ADMIN
capability, or must be the automount daemon.
The available ioctl commands are:
@@ -425,8 +425,20 @@ Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:
* including this struct */
__s32 ioctlfd; /* automount command fd */
- __u32 arg1; /* Command parameters */
- __u32 arg2;
+ /* Command parameters */
+ union {
+ struct args_protover protover;
+ struct args_protosubver protosubver;
+ struct args_openmount openmount;
+ struct args_ready ready;
+ struct args_fail fail;
+ struct args_setpipefd setpipefd;
+ struct args_timeout timeout;
+ struct args_requester requester;
+ struct args_expire expire;
+ struct args_askumount askumount;
+ struct args_ismountpoint ismountpoint;
+ };
char path[0];
};
@@ -446,25 +458,22 @@ Commands are:
set version numbers.
- **AUTOFS_DEV_IOCTL_OPENMOUNT_CMD**: return an open file descriptor
on the root of an autofs filesystem. The filesystem is identified
- by name and device number, which is stored in `arg1`. Device
- numbers for existing filesystems can be found in
+ by name and device number, which is stored in `openmount.devid`.
+ Device numbers for existing filesystems can be found in
`/proc/self/mountinfo`.
- **AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD**: same as `close(ioctlfd)`.
- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the filesystem is in
catatonic mode, this can provide the write end of a new pipe
- in `arg1` to re-establish communication with a daemon. The
- process group of the calling process is used to identify the
+ in `setpipefd.pipefd` to re-establish communication with a daemon.
+ The process group of the calling process is used to identify the
daemon.
- **AUTOFS_DEV_IOCTL_REQUESTER_CMD**: `path` should be a
name within the filesystem that has been auto-mounted on.
- arg1 is the dev number of the underlying autofs. On successful
- return, `arg1` and `arg2` will be the UID and GID of the process
- which triggered that mount.
-
+ On successful return, `requester.uid` and `requester.gid` will be
+ the UID and GID of the process which triggered that mount.
- **AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD**: Check if path is a
mountpoint of a particular type - see separate documentation for
details.
-
- **AUTOFS_DEV_IOCTL_PROTOVER_CMD**:
- **AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD**:
- **AUTOFS_DEV_IOCTL_READY_CMD**:
@@ -474,7 +483,7 @@ Commands are:
- **AUTOFS_DEV_IOCTL_EXPIRE_CMD**:
- **AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD**: These all have the same
function as the similarly named **AUTOFS_IOC** ioctls, except
- that **FAIL** can be given an explicit error number in `arg1`
+ that **FAIL** can be given an explicit error number in `fail.status`
instead of assuming `ENOENT`, and this **EXPIRE** command
corresponds to **AUTOFS_IOC_EXPIRE_MULTI**.
@@ -512,7 +521,7 @@ always be mounted "shared". e.g.
> `mount --make-shared /autofs/mount/point`
-The automount daemon is only able to mange a single mount location for
+The automount daemon is only able to manage a single mount location for
an autofs filesystem and if mounts on that are not 'shared', other
locations will not behave as expected. In particular access to those
other locations will likely result in the `ELOOP` error
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 95ccbe6d79ce..b4ad97f10b8e 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -376,8 +376,8 @@ max_map_count:
This file contains the maximum number of memory map areas a process
may have. Memory map areas are used as a side-effect of calling
-malloc, directly by mmap and mprotect, and also when loading shared
-libraries.
+malloc, directly by mmap, mprotect, and madvise, and also when loading
+shared libraries.
While most applications need less than a thousand maps, certain
programs, particularly malloc debuggers, may consume lots of them,
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index 8f961ef2b457..ba976805853a 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -112,8 +112,8 @@ my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)';
my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)';
my $regex_kswapd_sleep_default = 'nid=([0-9]*)';
my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*)';
-my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_taken=([0-9]*) file=([0-9]*)';
-my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) zid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
+my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)';
+my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)';
my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)';
@@ -205,15 +205,15 @@ $regex_wakeup_kswapd = generate_traceevent_regex(
$regex_lru_isolate = generate_traceevent_regex(
"vmscan/mm_vmscan_lru_isolate",
$regex_lru_isolate_default,
- "isolate_mode", "order",
- "nr_requested", "nr_scanned", "nr_taken",
- "file");
+ "isolate_mode", "classzone_idx", "order",
+ "nr_requested", "nr_scanned", "nr_skipped", "nr_taken",
+ "lru");
$regex_lru_shrink_inactive = generate_traceevent_regex(
"vmscan/mm_vmscan_lru_shrink_inactive",
$regex_lru_shrink_inactive_default,
- "nid", "zid",
- "nr_scanned", "nr_reclaimed", "priority",
- "flags");
+ "nid", "nr_scanned", "nr_reclaimed", "nr_dirty", "nr_writeback",
+ "nr_congested", "nr_immediate", "nr_activate", "nr_ref_keep",
+ "nr_unmap_fail", "priority", "flags");
$regex_lru_shrink_active = generate_traceevent_regex(
"vmscan/mm_vmscan_lru_shrink_active",
$regex_lru_shrink_active_default,
@@ -381,8 +381,8 @@ EVENT_PROCESS:
next;
}
my $isolate_mode = $1;
- my $nr_scanned = $4;
- my $file = $6;
+ my $nr_scanned = $5;
+ my $file = $8;
# To closer match vmstat scanning statistics, only count isolate_both
# and isolate_inactive as scanning. isolate_active is rotation
@@ -391,7 +391,7 @@ EVENT_PROCESS:
# isolate_both == 3
if ($isolate_mode != 2) {
$perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
- if ($file == 1) {
+ if ($file =~ /_file/) {
$perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned;
} else {
$perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned;
@@ -406,8 +406,8 @@ EVENT_PROCESS:
next;
}
- my $nr_reclaimed = $4;
- my $flags = $6;
+ my $nr_reclaimed = $3;
+ my $flags = $12;
my $file = 0;
if ($flags =~ /RECLAIM_WB_FILE/) {
$file = 1;
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
index f34a8ee6f860..6b0ca7feb135 100644
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -38,6 +38,10 @@ the range for whenever the KSM daemon is started; even if the range
cannot contain any pages which KSM could actually merge; even if
MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
+If a region of memory must be split into at least one new MADV_MERGEABLE
+or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
+will exceed vm.max_map_count (see Documentation/sysctl/vm.txt).
+
Like other madvise calls, they are intended for use on mapped areas of
the user address space: they will report ENOMEM if the specified range
includes unmapped gaps (though working on the intervening mapped areas),
@@ -80,6 +84,20 @@ run - set 0 to stop ksmd from running but keep merged pages,
Default: 0 (must be changed to 1 to activate KSM,
except if CONFIG_SYSFS is disabled)
+use_zero_pages - specifies whether empty pages (i.e. allocated pages
+ that only contain zeroes) should be treated specially.
+ When set to 1, empty pages are merged with the kernel
+ zero page(s) instead of with each other as it would
+ happen normally. This can improve the performance on
+ architectures with coloured zero pages, depending on
+ the workload. Care should be taken when enabling this
+ setting, as it can potentially degrade the performance
+ of KSM for some workloads, for example if the checksums
+ of pages candidate for merging match the checksum of
+ an empty page. This setting can be changed at any time,
+ it is only effective for pages merged after the change.
+ Default: 0 (normal KSM behaviour as in earlier releases)
+
The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
pages_shared - how many shared pages are being used
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index f2e739545e74..cd28d5ee5273 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -110,6 +110,7 @@ MADV_HUGEPAGE region.
echo always >/sys/kernel/mm/transparent_hugepage/defrag
echo defer >/sys/kernel/mm/transparent_hugepage/defrag
+echo defer+madvise >/sys/kernel/mm/transparent_hugepage/defrag
echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
echo never >/sys/kernel/mm/transparent_hugepage/defrag
@@ -120,10 +121,15 @@ that benefit heavily from THP use and are willing to delay the VM start
to utilise them.
"defer" means that an application will wake kswapd in the background
-to reclaim pages and wake kcompact to compact memory so that THP is
+to reclaim pages and wake kcompactd to compact memory so that THP is
available in the near future. It's the responsibility of khugepaged
to then install the THP pages later.
+"defer+madvise" will enter direct reclaim and compaction like "always", but
+only for regions that have used madvise(MADV_HUGEPAGE); all other regions
+will wake kswapd in the background to reclaim pages and wake kcompactd to
+compact memory so that THP is available in the near future.
+
"madvise" will enter direct reclaim like "always" but only for regions
that are have used madvise(MADV_HUGEPAGE). This is the default behaviour.
diff --git a/MAINTAINERS b/MAINTAINERS
index fa08166d617f..3723e50e2ca2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3967,10 +3967,13 @@ S: Maintained
F: drivers/i2c/busses/i2c-diolan-u2c.c
DIRECT ACCESS (DAX)
-M: Matthew Wilcox <willy@linux.intel.com>
+M: Matthew Wilcox <mawilcox@microsoft.com>
+M: Ross Zwisler <ross.zwisler@linux.intel.com>
L: linux-fsdevel@vger.kernel.org
S: Supported
F: fs/dax.c
+F: include/linux/dax.h
+F: include/trace/events/fs_dax.h
DIRECTORY NOTIFICATION (DNOTIFY)
M: Eric Paris <eparis@parisplace.org>
@@ -7281,6 +7284,7 @@ M: Masami Hiramatsu <mhiramat@kernel.org>
S: Maintained
F: Documentation/kprobes.txt
F: include/linux/kprobes.h
+F: include/asm-generic/kprobes.h
F: kernel/kprobes.c
KS0108 LCD CONTROLLER DRIVER
diff --git a/arch/Kconfig b/arch/Kconfig
index b6e78dc56935..995be50704d7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -607,6 +607,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
bool
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ bool
+
config HAVE_ARCH_HUGE_VMAP
bool
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index baa152b9348e..d103db5af5ff 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -9,3 +9,5 @@ generic-y += mm-arch-hooks.h
generic-y += preempt.h
generic-y += sections.h
generic-y += trace_clock.h
+generic-y += current.h
+generic-y += kprobes.h
diff --git a/arch/alpha/include/asm/current.h b/arch/alpha/include/asm/current.h
deleted file mode 100644
index 094d285a1b34..000000000000
--- a/arch/alpha/include/asm/current.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _ALPHA_CURRENT_H
-#define _ALPHA_CURRENT_H
-
-#include <linux/thread_info.h>
-
-#define get_current() (current_thread_info()->task)
-#define current get_current()
-
-#endif /* _ALPHA_CURRENT_H */
diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h
index 944dbedb38b5..00bdbe167615 100644
--- a/arch/arc/include/asm/kprobes.h
+++ b/arch/arc/include/asm/kprobes.h
@@ -9,6 +9,8 @@
#ifndef _ARC_KPROBES_H
#define _ARC_KPROBES_H
+#include <asm-generic/kprobes.h>
+
#ifdef CONFIG_KPROBES
typedef u16 kprobe_opcode_t;
@@ -55,6 +57,6 @@ void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
static void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
{
}
-#endif
+#endif /* CONFIG_KPROBES */
-#endif
+#endif /* _ARC_KPROBES_H */
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index 3ea9be559726..59655459da59 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -16,6 +16,9 @@
#ifndef _ARM_KPROBES_H
#define _ARM_KPROBES_H
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/notifier.h>
@@ -83,4 +86,5 @@ struct arch_optimized_insn {
*/
};
+#endif /* CONFIG_KPROBES */
#endif /* _ARM_KPROBES_H */
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 4355f0ec44d6..f98baaec0a15 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -17,6 +17,8 @@
#ifndef __ASSEMBLY__
+#include <linux/personality.h> /* For READ_IMPLIES_EXEC */
+
#ifndef CONFIG_MMU
#include <asm/page-nommu.h>
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ec91733e2580..fc4a4eaa9934 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -349,7 +349,7 @@ static void __dma_free_buffer(struct page *page, size_t size)
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag);
+ int coherent_flag, gfp_t gfp);
static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
pgprot_t prot, struct page **ret_page,
@@ -420,7 +420,8 @@ static int __init atomic_pool_init(void)
*/
if (dev_get_cma_area(NULL))
ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
- &page, atomic_pool_init, true, NORMAL);
+ &page, atomic_pool_init, true, NORMAL,
+ GFP_KERNEL);
else
ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
&page, atomic_pool_init, true);
@@ -594,14 +595,14 @@ static int __free_from_pool(void *start, size_t size)
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag)
+ int coherent_flag, gfp_t gfp)
{
unsigned long order = get_order(size);
size_t count = size >> PAGE_SHIFT;
struct page *page;
void *ptr = NULL;
- page = dma_alloc_from_contiguous(dev, count, order);
+ page = dma_alloc_from_contiguous(dev, count, order, gfp);
if (!page)
return NULL;
@@ -655,7 +656,7 @@ static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot)
#define __get_dma_pgprot(attrs, prot) __pgprot(0)
#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL
#define __alloc_from_pool(size, ret_page) NULL
-#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag) NULL
+#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag, gfp) NULL
#define __free_from_pool(cpu_addr, size) do { } while (0)
#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0)
#define __dma_free_remap(cpu_addr, size) do { } while (0)
@@ -697,7 +698,8 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
{
return __alloc_from_contiguous(args->dev, args->size, args->prot,
ret_page, args->caller,
- args->want_vaddr, args->coherent_flag);
+ args->want_vaddr, args->coherent_flag,
+ args->gfp);
}
static void cma_allocator_free(struct arm_dma_free_args *args)
@@ -1315,7 +1317,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
unsigned long order = get_order(size);
struct page *page;
- page = dma_alloc_from_contiguous(dev, count, order);
+ page = dma_alloc_from_contiguous(dev, count, order, gfp);
if (!page)
goto error;
diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h
index f9b08ba7fe73..548d622a3159 100644
--- a/arch/arm/probes/decode.h
+++ b/arch/arm/probes/decode.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/stddef.h>
#include <asm/probes.h>
+#include <asm/kprobes.h>
void __init arm_probes_decode_init(void);
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 1737aecfcc5e..6deb8d726041 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -16,6 +16,9 @@
#ifndef _ARM_KPROBES_H
#define _ARM_KPROBES_H
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
@@ -57,4 +60,5 @@ int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr);
void kretprobe_trampoline(void);
void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
+#endif /* CONFIG_KPROBES */
#endif /* _ARM_KPROBES_H */
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 86032a012388..657977e77ec8 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -19,6 +19,7 @@
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
+#include <asm/kprobes.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index b6badff5a151..3a63954a8b14 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -31,6 +31,7 @@
#include <asm/debug-monitors.h>
#include <asm/fixmap.h>
#include <asm/insn.h>
+#include <asm/kprobes.h>
#define AARCH64_INSN_SF_BIT BIT(31)
#define AARCH64_INSN_N_BIT BIT(22)
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
index 76d3f315407f..192ab007bacb 100644
--- a/arch/arm64/kernel/probes/decode-insn.h
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -16,6 +16,8 @@
#ifndef _ARM_KERNEL_KPROBES_ARM64_H
#define _ARM_KERNEL_KPROBES_ARM64_H
+#include <asm/kprobes.h>
+
/*
* ARM strongly recommends a limit of 128 bytes between LoadExcl and
* StoreExcl instructions in a single thread of execution. So keep the
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 351f7595cb3e..aff1d0afeb1e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -107,7 +107,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
void *addr;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size));
+ get_order(size), flags);
if (!page)
return NULL;
@@ -390,7 +390,7 @@ static int __init atomic_pool_init(void)
if (dev_get_cma_area(NULL))
page = dma_alloc_from_contiguous(NULL, nr_pages,
- pool_size_order);
+ pool_size_order, GFP_KERNEL);
else
page = alloc_pages(GFP_DMA, pool_size_order);
diff --git a/arch/avr32/include/asm/kprobes.h b/arch/avr32/include/asm/kprobes.h
index 45f563ed73fd..28dfc61ad384 100644
--- a/arch/avr32/include/asm/kprobes.h
+++ b/arch/avr32/include/asm/kprobes.h
@@ -11,10 +11,14 @@
#ifndef __ASM_AVR32_KPROBES_H
#define __ASM_AVR32_KPROBES_H
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */
+
+#ifdef CONFIG_KPROBES
#include <linux/types.h>
typedef u16 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */
#define MAX_INSN_SIZE 2
#define MAX_STACK_SIZE 64 /* 32 would probably be OK */
@@ -46,4 +50,5 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
#define flush_insn_slot(p) do { } while (0)
+#endif /* CONFIG_KPROBES */
#endif /* __ASM_AVR32_KPROBES_H */
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index d6fa60b158be..625db8ac815e 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -46,3 +46,4 @@ generic-y += unaligned.h
generic-y += user.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 4e9f57433f3a..82619c32d25b 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -61,3 +61,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 9f19e19bff9d..0f5132b08896 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += clkdev.h
generic-y += cmpxchg.h
+generic-y += current.h
generic-y += device.h
generic-y += div64.h
generic-y += errno.h
@@ -44,3 +45,4 @@ generic-y += types.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/cris/include/asm/current.h b/arch/cris/include/asm/current.h
deleted file mode 100644
index 5f5c0efd00be..000000000000
--- a/arch/cris/include/asm/current.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _CRIS_CURRENT_H
-#define _CRIS_CURRENT_H
-
-#include <linux/thread_info.h>
-
-struct task_struct;
-
-static inline struct task_struct * get_current(void)
-{
- return current_thread_info()->task;
-}
-
-#define current get_current()
-
-#endif /* !(_CRIS_CURRENT_H) */
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 0f5b0d5d313c..c33b46715f65 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -7,3 +7,4 @@ generic-y += mm-arch-hooks.h
generic-y += preempt.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
+generic-y += kprobes.h
diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c
index 34bb4b13e079..c452ddb5620f 100644
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@@ -147,7 +147,7 @@ static void __init pcibios_allocate_resources(int pass)
static void __init pcibios_assign_resources(void)
{
struct pci_dev *dev = NULL;
- int idx;
+ int idx, err;
struct resource *r;
for_each_pci_dev(dev) {
@@ -172,8 +172,13 @@ static void __init pcibios_assign_resources(void)
* the BIOS forgot to do so or because we have decided the old
* address was unusable for some reason.
*/
- if (!r->start && r->end)
- pci_assign_resource(dev, idx);
+ if (!r->start && r->end) {
+ err = pci_assign_resource(dev, idx);
+ if (err)
+ dev_err(&dev->dev,
+ "Failed to assign new address to %d\n",
+ idx);
+ }
}
}
}
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 5efd0c87f3c0..341740c3581c 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -74,3 +74,4 @@ generic-y += unaligned.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index a43a7c90e4af..797b64a4b80b 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -59,3 +59,4 @@ generic-y += unaligned.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/ia64/include/asm/kprobes.h b/arch/ia64/include/asm/kprobes.h
index d5505d6f2382..0302b3664789 100644
--- a/arch/ia64/include/asm/kprobes.h
+++ b/arch/ia64/include/asm/kprobes.h
@@ -23,14 +23,19 @@
* 2005-Apr Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
* <anil.s.keshavamurthy@intel.com> adapted from i386
*/
+#include <asm-generic/kprobes.h>
+#include <asm/break.h>
+
+#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6)
+
+#ifdef CONFIG_KPROBES
+
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
-#include <asm/break.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
#define MAX_INSN_SIZE 2 /* last half is for kprobe-booster */
-#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6)
#define NOP_M_INST (long)(1<<27)
#define BRL_INST(i1, i2) ((long)((0xcL << 37) | /* brl */ \
(0x1L << 12) | /* many */ \
@@ -124,4 +129,5 @@ extern void invalidate_stacked_regs(void);
extern void flush_register_stack(void);
extern void arch_remove_kprobe(struct kprobe *p);
-#endif /* _ASM_KPROBES_H */
+#endif /* CONFIG_KPROBES */
+#endif /* _ASM_KPROBES_H */
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index bb4610faca84..06cdaef54b2e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -684,51 +684,3 @@ int arch_remove_memory(u64 start, u64 size)
}
#endif
#endif
-
-/**
- * show_mem - give short summary of memory stats
- *
- * Shows a simple page count of reserved and used pages in the system.
- * For discontig machines, it does this on a per-pgdat basis.
- */
-void show_mem(unsigned int filter)
-{
- int total_reserved = 0;
- unsigned long total_present = 0;
- pg_data_t *pgdat;
-
- printk(KERN_INFO "Mem-info:\n");
- show_free_areas(filter);
- printk(KERN_INFO "Node memory in pages:\n");
- for_each_online_pgdat(pgdat) {
- unsigned long present;
- unsigned long flags;
- int reserved = 0;
- int nid = pgdat->node_id;
- int zoneid;
-
- if (skip_free_areas_node(filter, nid))
- continue;
- pgdat_resize_lock(pgdat, &flags);
-
- for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
- struct zone *zone = &pgdat->node_zones[zoneid];
- if (!populated_zone(zone))
- continue;
-
- reserved += zone->present_pages - zone->managed_pages;
- }
- present = pgdat->node_present_pages;
-
- pgdat_resize_unlock(pgdat, &flags);
- total_present += present;
- total_reserved += reserved;
- printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, ",
- nid, present, reserved);
- }
- printk(KERN_INFO "%ld pages of RAM\n", total_present);
- printk(KERN_INFO "%d reserved pages\n", total_reserved);
- printk(KERN_INFO "Total of %ld pages in page table cache\n",
- quicklist_total_size());
- printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
-}
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 652100b64a71..deb298777df2 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -1,5 +1,6 @@
generic-y += clkdev.h
+generic-y += current.h
generic-y += exec.h
generic-y += irq_work.h
generic-y += kvm_para.h
@@ -10,3 +11,4 @@ generic-y += preempt.h
generic-y += sections.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
+generic-y += kprobes.h
diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h
index 14bf9b739dd2..23c9d0537201 100644
--- a/arch/m32r/include/asm/cmpxchg.h
+++ b/arch/m32r/include/asm/cmpxchg.h
@@ -64,8 +64,10 @@ __xchg(unsigned long x, volatile void *ptr, int size)
return (tmp);
}
-#define xchg(ptr, x) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, x) ({ \
+ ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), \
+ sizeof(*(ptr)))); \
+})
static __always_inline unsigned long
__xchg_local(unsigned long x, volatile void *ptr, int size)
@@ -187,9 +189,12 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
return old;
}
-#define cmpxchg(ptr, o, n) \
- ((__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)(o), \
- (unsigned long)(n), sizeof(*(ptr))))
+#define cmpxchg(ptr, o, n) ({ \
+ ((__typeof__(*(ptr))) \
+ __cmpxchg((ptr), (unsigned long)(o), \
+ (unsigned long)(n), \
+ sizeof(*(ptr)))); \
+})
#include <asm-generic/cmpxchg-local.h>
diff --git a/arch/m32r/include/asm/current.h b/arch/m32r/include/asm/current.h
deleted file mode 100644
index 7859d864f2c2..000000000000
--- a/arch/m32r/include/asm/current.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _ASM_M32R_CURRENT_H
-#define _ASM_M32R_CURRENT_H
-
-#include <linux/thread_info.h>
-
-struct task_struct;
-
-static __inline__ struct task_struct *get_current(void)
-{
- return current_thread_info()->task;
-}
-
-#define current (get_current())
-
-#endif /* _ASM_M32R_CURRENT_H */
diff --git a/arch/m68k/68000/bootlogo-vz.h b/arch/m68k/68000/bootlogo-vz.h
index b38e2b255142..6ff09beba1ba 100644
--- a/arch/m68k/68000/bootlogo-vz.h
+++ b/arch/m68k/68000/bootlogo-vz.h
@@ -1,6 +1,8 @@
+#include <linux/compiler.h>
+
#define splash_width 640
#define splash_height 480
-unsigned char __attribute__ ((aligned(16))) bootlogo_bits[] = {
+unsigned char __aligned(16) bootlogo_bits[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
diff --git a/arch/m68k/68000/bootlogo.h b/arch/m68k/68000/bootlogo.h
index b896c933fafc..c466db3ca3a8 100644
--- a/arch/m68k/68000/bootlogo.h
+++ b/arch/m68k/68000/bootlogo.h
@@ -1,6 +1,8 @@
+#include <linux/compiler.h>
+
#define bootlogo_width 160
#define bootlogo_height 160
-unsigned char __attribute__ ((aligned(16))) bootlogo_bits[] = {
+unsigned char __aligned(16) bootlogo_bits[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x40, 0x55, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 6c76d6c24b3d..d4f9ccbfa85c 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -33,3 +33,4 @@ generic-y += trace_clock.h
generic-y += types.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/m68k/include/asm/MC68328.h b/arch/m68k/include/asm/MC68328.h
index 1a8080c4cc40..b61230e74e63 100644
--- a/arch/m68k/include/asm/MC68328.h
+++ b/arch/m68k/include/asm/MC68328.h
@@ -8,6 +8,7 @@
* Copyright (C) 1998 Kenneth Albanowski <kjahds@kjahds.com>,
*
*/
+#include <linux/compiler.h>
#ifndef _MC68328_H_
#define _MC68328_H_
@@ -993,7 +994,7 @@ typedef volatile struct {
volatile unsigned short int pad1;
volatile unsigned short int pad2;
volatile unsigned short int pad3;
-} __attribute__((packed)) m68328_uart;
+} __packed m68328_uart;
/**********
diff --git a/arch/m68k/include/asm/MC68EZ328.h b/arch/m68k/include/asm/MC68EZ328.h
index fedac87c5d13..703331ece328 100644
--- a/arch/m68k/include/asm/MC68EZ328.h
+++ b/arch/m68k/include/asm/MC68EZ328.h
@@ -9,6 +9,7 @@
* The Silver Hammer Group, Ltd.
*
*/
+#include <linux/compiler.h>
#ifndef _MC68EZ328_H_
#define _MC68EZ328_H_
@@ -815,7 +816,7 @@ typedef volatile struct {
volatile unsigned short int nipr;
volatile unsigned short int pad1;
volatile unsigned short int pad2;
-} __attribute__((packed)) m68328_uart;
+} __packed m68328_uart;
/**********
diff --git a/arch/m68k/include/asm/MC68VZ328.h b/arch/m68k/include/asm/MC68VZ328.h
index 34a51b2c784f..fbaed7ddfb41 100644
--- a/arch/m68k/include/asm/MC68VZ328.h
+++ b/arch/m68k/include/asm/MC68VZ328.h
@@ -909,7 +909,7 @@ typedef struct {
volatile unsigned short int nipr;
volatile unsigned short int hmark;
volatile unsigned short int unused;
-} __attribute__((packed)) m68328_uart;
+} __packed m68328_uart;
diff --git a/arch/m68k/include/asm/natfeat.h b/arch/m68k/include/asm/natfeat.h
index a3521b80c3b9..2d2424de1d65 100644
--- a/arch/m68k/include/asm/natfeat.h
+++ b/arch/m68k/include/asm/natfeat.h
@@ -6,6 +6,7 @@
* This software may be used and distributed according to the terms of
* the GNU General Public License (GPL), incorporated herein by reference.
*/
+#include <linux/compiler.h>
#ifndef _NATFEAT_H
#define _NATFEAT_H
@@ -17,6 +18,6 @@ void nf_init(void);
void nf_shutdown(void);
void nfprint(const char *fmt, ...)
- __attribute__ ((format (printf, 1, 2)));
+ __printf(1, 2);
# endif /* _NATFEAT_H */
diff --git a/arch/m68k/lib/ashldi3.c b/arch/m68k/lib/ashldi3.c
index 8dffd36ec4f2..ac08f8141390 100644
--- a/arch/m68k/lib/ashldi3.c
+++ b/arch/m68k/lib/ashldi3.c
@@ -18,10 +18,10 @@ GNU General Public License for more details. */
#define BITS_PER_UNIT 8
-typedef int SItype __attribute__ ((mode (SI)));
-typedef unsigned int USItype __attribute__ ((mode (SI)));
-typedef int DItype __attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
+typedef int SItype __mode(SI);
+typedef unsigned int USItype __mode(SI);
+typedef int DItype __mode(DI);
+typedef int word_type __mode(__word__);
struct DIstruct {SItype high, low;};
diff --git a/arch/m68k/lib/ashrdi3.c b/arch/m68k/lib/ashrdi3.c
index e6565a3ee2c3..5837b1dd3334 100644
--- a/arch/m68k/lib/ashrdi3.c
+++ b/arch/m68k/lib/ashrdi3.c
@@ -18,10 +18,10 @@ GNU General Public License for more details. */
#define BITS_PER_UNIT 8
-typedef int SItype __attribute__ ((mode (SI)));
-typedef unsigned int USItype __attribute__ ((mode (SI)));
-typedef int DItype __attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
+typedef int SItype __mode(SI);
+typedef unsigned int USItype __mode(SI);
+typedef int DItype __mode(DI);
+typedef int word_type __mode(__word__);
struct DIstruct {SItype high, low;};
diff --git a/arch/m68k/lib/lshrdi3.c b/arch/m68k/lib/lshrdi3.c
index 039779737c7d..7f40566be6c8 100644
--- a/arch/m68k/lib/lshrdi3.c
+++ b/arch/m68k/lib/lshrdi3.c
@@ -18,10 +18,10 @@ GNU General Public License for more details. */
#define BITS_PER_UNIT 8
-typedef int SItype __attribute__ ((mode (SI)));
-typedef unsigned int USItype __attribute__ ((mode (SI)));
-typedef int DItype __attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
+typedef int SItype __mode(SI);
+typedef unsigned int USItype __mode(SI);
+typedef int DItype __mode(DI);
+typedef int word_type __mode(__word__);
struct DIstruct {SItype high, low;};
diff --git a/arch/m68k/lib/muldi3.c b/arch/m68k/lib/muldi3.c
index 6459af5b2af0..3fb05c698c41 100644
--- a/arch/m68k/lib/muldi3.c
+++ b/arch/m68k/lib/muldi3.c
@@ -65,10 +65,10 @@ GNU General Public License for more details. */
umul_ppmm (__w.s.high, __w.s.low, u, v); \
__w.ll; })
-typedef int SItype __attribute__ ((mode (SI)));
-typedef unsigned int USItype __attribute__ ((mode (SI)));
-typedef int DItype __attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
+typedef int SItype __mode(SI);
+typedef unsigned int USItype __mode(SI);
+typedef int DItype __mode(DI);
+typedef int word_type __mode(__word__);
struct DIstruct {SItype high, low;};
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index d3731f0db73b..f9b9df5d6de9 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -54,3 +54,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 6275eb051801..1732ec13b211 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -10,3 +10,4 @@ generic-y += preempt.h
generic-y += syscalls.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
+generic-y += kprobes.h
diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h
index daba1f9a4f79..291846d9ba83 100644
--- a/arch/mips/include/asm/kprobes.h
+++ b/arch/mips/include/asm/kprobes.h
@@ -22,6 +22,9 @@
#ifndef _ASM_KPROBES_H
#define _ASM_KPROBES_H
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
#include <linux/ptrace.h>
#include <linux/types.h>
@@ -94,4 +97,5 @@ struct kprobe_ctlblk {
extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
-#endif /* _ASM_KPROBES_H */
+#endif /* CONFIG_KPROBES */
+#endif /* _ASM_KPROBES_H */
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index f9dbfb14af33..093517e85a6c 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -111,7 +111,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ|VM_WRITE|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- 0);
+ 0, NULL);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index a39c36af97ad..1895a692efd4 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -148,8 +148,8 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
gfp = massage_gfp_flags(dev, gfp);
if (IS_ENABLED(CONFIG_DMA_CMA) && gfpflags_allow_blocking(gfp))
- page = dma_alloc_from_contiguous(dev,
- count, get_order(size));
+ page = dma_alloc_from_contiguous(dev, count, get_order(size),
+ gfp);
if (!page)
page = alloc_pages(gfp, get_order(size));
diff --git a/arch/mn10300/include/asm/kprobes.h b/arch/mn10300/include/asm/kprobes.h
index c800b590183a..7abea0bdb549 100644
--- a/arch/mn10300/include/asm/kprobes.h
+++ b/arch/mn10300/include/asm/kprobes.h
@@ -21,13 +21,17 @@
#ifndef _ASM_KPROBES_H
#define _ASM_KPROBES_H
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0xff
+
+#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
struct kprobe;
typedef unsigned char kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xff
#define MAX_INSN_SIZE 8
#define MAX_STACK_SIZE 128
@@ -47,4 +51,5 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern void arch_remove_kprobe(struct kprobe *p);
+#endif /* CONFIG_KPROBES */
#endif /* _ASM_KPROBES_H */
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 35b0e883761a..aaa3c218b56c 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -62,3 +62,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index fb241757f7f0..fb01873a5aad 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -67,3 +67,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 4e179d770d69..a9909c2d04c5 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -2,6 +2,7 @@
generic-y += auxvec.h
generic-y += barrier.h
generic-y += clkdev.h
+generic-y += current.h
generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
@@ -27,3 +28,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h
deleted file mode 100644
index 0fb9338e3bf2..000000000000
--- a/arch/parisc/include/asm/current.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _PARISC_CURRENT_H
-#define _PARISC_CURRENT_H
-
-#include <linux/thread_info.h>
-
-struct task_struct;
-
-static inline struct task_struct * get_current(void)
-{
- return current_thread_info()->task;
-}
-
-#define current get_current()
-
-#endif /* !(_PARISC_CURRENT_H) */
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index a055e5b6b380..66f3a6345105 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -653,55 +653,6 @@ void __init mem_init(void)
unsigned long *empty_zero_page __read_mostly;
EXPORT_SYMBOL(empty_zero_page);
-void show_mem(unsigned int filter)
-{
- int total = 0,reserved = 0;
- pg_data_t *pgdat;
-
- printk(KERN_INFO "Mem-info:\n");
- show_free_areas(filter);
-
- for_each_online_pgdat(pgdat) {
- unsigned long flags;
- int zoneid;
-
- pgdat_resize_lock(pgdat, &flags);
- for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
- struct zone *zone = &pgdat->node_zones[zoneid];
- if (!populated_zone(zone))
- continue;
-
- total += zone->present_pages;
- reserved = zone->present_pages - zone->managed_pages;
- }
- pgdat_resize_unlock(pgdat, &flags);
- }
-
- printk(KERN_INFO "%d pages of RAM\n", total);
- printk(KERN_INFO "%d reserved pages\n", reserved);
-
-#ifdef CONFIG_DISCONTIGMEM
- {
- struct zonelist *zl;
- int i, j;
-
- for (i = 0; i < npmem_ranges; i++) {
- zl = node_zonelist(i, 0);
- for (j = 0; j < MAX_NR_ZONES; j++) {
- struct zoneref *z;
- struct zone *zone;
-
- printk("Zone list for zone %d on node %d: ", j, i);
- for_each_zone_zonelist(zone, z, zl, j)
- printk("[%d/%s] ", zone_to_nid(zone),
- zone->name);
- printk("\n");
- }
- }
- }
-#endif
-}
-
/*
* pagetable_init() sets up the page tables
*
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index d821835ade86..0503c98b2117 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -1,5 +1,8 @@
#ifndef _ASM_POWERPC_KPROBES_H
#define _ASM_POWERPC_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
#ifdef __KERNEL__
/*
* Kernel Probes (KProbes)
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 47120bf2670c..2a32483c7b6c 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -230,7 +230,9 @@ extern long long virt_phys_offset;
* and needs to be executable. This means the whole heap ends
* up being executable.
*/
-#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
+#define VM_DATA_DEFAULT_FLAGS32 \
+ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
+ VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 491c5d8120f7..ab9d14c0e460 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -102,9 +102,9 @@ static void release_spapr_tce_table(struct rcu_head *head)
kfree(stt);
}
-static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kvm_spapr_tce_fault(struct vm_fault *vmf)
{
- struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
+ struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index c42a7e63b39e..4d6c64b3041c 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -56,7 +56,8 @@ struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
{
VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
+ return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES),
+ GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 0899315e1434..0d3002b7e2b4 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -14,6 +14,7 @@
#include <asm/page.h>
#include <asm/code-patching.h>
#include <linux/uaccess.h>
+#include <linux/kprobes.h>
int patch_instruction(unsigned int *addr, unsigned int instr)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 03f2cdfabf23..ae2f740a82f1 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -233,8 +233,9 @@ spufs_mem_write(struct file *file, const char __user *buffer,
}
static int
-spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_mem_mmap_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct spu_context *ctx = vma->vm_file->private_data;
unsigned long pfn, offset;
@@ -311,12 +312,11 @@ static const struct file_operations spufs_mem_fops = {
.mmap = spufs_mem_mmap,
};
-static int spufs_ps_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf,
+static int spufs_ps_fault(struct vm_fault *vmf,
unsigned long ps_offs,
unsigned long ps_size)
{
- struct spu_context *ctx = vma->vm_file->private_data;
+ struct spu_context *ctx = vmf->vma->vm_file->private_data;
unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
int ret = 0;
@@ -354,7 +354,7 @@ static int spufs_ps_fault(struct vm_area_struct *vma,
down_read(&current->mm->mmap_sem);
} else {
area = ctx->spu->problem_phys + ps_offs;
- vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
+ vm_insert_pfn(vmf->vma, vmf->address, (area + offset) >> PAGE_SHIFT);
spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
}
@@ -367,10 +367,9 @@ refault:
}
#if SPUFS_MMAP_4K
-static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int spufs_cntl_mmap_fault(struct vm_fault *vmf)
{
- return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+ return spufs_ps_fault(vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
}
static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
@@ -1042,15 +1041,15 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
}
static int
-spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_signal1_mmap_fault(struct vm_fault *vmf)
{
#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
- return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+ return spufs_ps_fault(vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
* signal 1 and 2 area
*/
- return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+ return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
#else
#error unsupported page size
#endif
@@ -1180,15 +1179,15 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
#if SPUFS_MMAP_4K
static int
-spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_signal2_mmap_fault(struct vm_fault *vmf)
{
#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
- return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+ return spufs_ps_fault(vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
* signal 1 and 2 area
*/
- return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+ return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
#else
#error unsupported page size
#endif
@@ -1309,9 +1308,9 @@ DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
#if SPUFS_MMAP_4K
static int
-spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_mss_mmap_fault(struct vm_fault *vmf)
{
- return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+ return spufs_ps_fault(vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
}
static const struct vm_operations_struct spufs_mss_mmap_vmops = {
@@ -1371,9 +1370,9 @@ static const struct file_operations spufs_mss_fops = {
};
static int
-spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_psmap_mmap_fault(struct vm_fault *vmf)
{
- return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+ return spufs_ps_fault(vmf, 0x0000, SPUFS_PS_MAP_SIZE);
}
static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
@@ -1431,9 +1430,9 @@ static const struct file_operations spufs_psmap_fops = {
#if SPUFS_MMAP_4K
static int
-spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+spufs_mfc_mmap_fault(struct vm_fault *vmf)
{
- return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+ return spufs_ps_fault(vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
}
static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1be0499f5397..5720236d0266 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -916,7 +916,7 @@ cmds(struct pt_regs *excp)
memzcan();
break;
case 'i':
- show_mem(0);
+ show_mem(0, NULL);
break;
default:
termch = cmd;
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 591e5a5279b0..84c0f9086483 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -27,6 +27,11 @@
* 2005-Dec Used as a template for s390 by Mike Grundy
* <grundym@us.ibm.com>
*/
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0x0002
+
+#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
@@ -37,7 +42,6 @@ struct pt_regs;
struct kprobe;
typedef u16 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0x0002
/* Maximum instruction size is 3 (16bit) halfwords: */
#define MAX_INSN_SIZE 0x0003
@@ -91,4 +95,5 @@ int probe_is_insn_relative_long(u16 *insn);
#define flush_insn_slot(p) do { } while (0)
+#endif /* CONFIG_KPROBES */
#endif /* _ASM_S390_KPROBES_H */
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f9293bfefb7f..9c9440dc253a 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -31,6 +31,7 @@ static struct memblock_type oldmem_type = {
.max = 1,
.total_size = 0,
.regions = &oldmem_region,
+ .name = "oldmem",
};
struct save_area {
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index ec1f0dedb948..59ac93714fa4 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -687,7 +687,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
/* Find vma in the parent mm */
vma = find_vma(gmap->mm, vmaddr);
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range(vma, vmaddr, size, NULL);
+ zap_page_range(vma, vmaddr, size);
}
up_read(&gmap->mm->mmap_sem);
}
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 51970bb6c4fe..3ed3206ccf5e 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -4,6 +4,7 @@ header-y +=
generic-y += barrier.h
generic-y += clkdev.h
+generic-y += current.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
@@ -13,3 +14,4 @@ generic-y += trace_clock.h
generic-y += xor.h
generic-y += serial.h
generic-y += word-at-a-time.h
+generic-y += kprobes.h
diff --git a/arch/score/include/asm/current.h b/arch/score/include/asm/current.h
deleted file mode 100644
index 16eae9cbaf1a..000000000000
--- a/arch/score/include/asm/current.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_CURRENT_H
-#define _ASM_SCORE_CURRENT_H
-
-#include <asm-generic/current.h>
-
-#endif /* _ASM_SCORE_CURRENT_H */
diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
index 134f3980e44a..f0986f9b3844 100644
--- a/arch/sh/include/asm/kprobes.h
+++ b/arch/sh/include/asm/kprobes.h
@@ -1,13 +1,16 @@
#ifndef __ASM_SH_KPROBES_H
#define __ASM_SH_KPROBES_H
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0xc33a
+
#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
typedef insn_size_t kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xc33a
#define MAX_INSN_SIZE 16
#define MAX_STACK_SIZE 64
diff --git a/arch/sparc/include/asm/kprobes.h b/arch/sparc/include/asm/kprobes.h
index a145d798e112..49f8402035d7 100644
--- a/arch/sparc/include/asm/kprobes.h
+++ b/arch/sparc/include/asm/kprobes.h
@@ -1,13 +1,17 @@
#ifndef _SPARC64_KPROBES_H
#define _SPARC64_KPROBES_H
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0x91d02070 /* ta 0x70 */
+#define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */
+
+#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/percpu.h>
typedef u32 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0x91d02070 /* ta 0x70 */
-#define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */
#define MAX_INSN_SIZE 2
#define kretprobe_blacklist_size 0
@@ -48,4 +52,6 @@ int kprobe_exceptions_notify(struct notifier_block *self,
int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
#endif /* _SPARC64_KPROBES_H */
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index c4e65cb3280f..6f06058c5ae7 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -82,7 +82,7 @@ static void prom_sync_me(void)
"nop\n\t" : : "r" (&trapbase));
prom_printf("PROM SYNC COMMAND...\n");
- show_free_areas(0);
+ show_free_areas(0, NULL);
if (!is_idle_task(current)) {
local_irq_enable();
sys_sync();
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index eb8287155279..c6afe98de4d9 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -55,17 +55,6 @@ extern unsigned int sparc_ramdisk_size;
unsigned long highstart_pfn, highend_pfn;
-void show_mem(unsigned int filter)
-{
- printk("Mem-info:\n");
- show_free_areas(filter);
- printk("Free swap: %6ldkB\n",
- get_nr_swap_pages() << (PAGE_SHIFT-10));
- printk("%ld pages of RAM\n", totalram_pages);
- printk("%ld free pages\n", nr_free_pages());
-}
-
-
unsigned long last_valid_pfn;
unsigned long calc_highpages(void)
diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h
index d8f9a83943b1..4a8b1cadca24 100644
--- a/arch/tile/include/asm/kprobes.h
+++ b/arch/tile/include/asm/kprobes.h
@@ -17,10 +17,13 @@
#ifndef _ASM_TILE_KPROBES_H
#define _ASM_TILE_KPROBES_H
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
+
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
-
#include <arch/opcode.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
@@ -76,4 +79,5 @@ void arch_remove_kprobe(struct kprobe *);
extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
+#endif /* CONFIG_KPROBES */
#endif /* _ASM_TILE_KPROBES_H */
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 6225cc998db1..889901824400 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -143,7 +143,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
unsigned long addr = MEM_USER_INTRPT;
addr = mmap_region(NULL, addr, INTRPT_SIZE,
VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0);
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0, NULL);
if (addr > (unsigned long) -PAGE_SIZE)
retval = (int) addr;
}
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 7cc6ee7f1a58..492a7361e58e 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -36,51 +36,6 @@
#define K(x) ((x) << (PAGE_SHIFT-10))
-/*
- * The normal show_free_areas() is too verbose on Tile, with dozens
- * of processors and often four NUMA zones each with high and lowmem.
- */
-void show_mem(unsigned int filter)
-{
- struct zone *zone;
-
- pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n",
- (global_node_page_state(NR_ACTIVE_ANON) +
- global_node_page_state(NR_ACTIVE_FILE)),
- (global_node_page_state(NR_INACTIVE_ANON) +
- global_node_page_state(NR_INACTIVE_FILE)),
- global_node_page_state(NR_FILE_DIRTY),
- global_node_page_state(NR_WRITEBACK),
- global_node_page_state(NR_UNSTABLE_NFS),
- global_page_state(NR_FREE_PAGES),
- (global_page_state(NR_SLAB_RECLAIMABLE) +
- global_page_state(NR_SLAB_UNRECLAIMABLE)),
- global_node_page_state(NR_FILE_MAPPED),
- global_page_state(NR_PAGETABLE),
- global_page_state(NR_BOUNCE),
- global_node_page_state(NR_FILE_PAGES),
- get_nr_swap_pages());
-
- for_each_zone(zone) {
- unsigned long flags, order, total = 0, largest_order = -1;
-
- if (!populated_zone(zone))
- continue;
-
- spin_lock_irqsave(&zone->lock, flags);
- for (order = 0; order < MAX_ORDER; order++) {
- int nr = zone->free_area[order].nr_free;
- total += nr << order;
- if (nr)
- largest_order = order;
- }
- spin_unlock_irqrestore(&zone->lock, flags);
- pr_err("Node %d %7s: %lukB (largest %luKb)\n",
- zone_to_nid(zone), zone->name,
- K(total), largest_order ? K(1UL) << largest_order : 0);
- }
-}
-
/**
* shatter_huge_page() - ensure a given address is mapped by a small page.
*
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 90c281cd7e1d..e9d42aab76dc 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -25,3 +25,4 @@ generic-y += topology.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index f2fa4c64932c..1397b867906f 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -60,3 +60,4 @@ generic-y += user.h
generic-y += vga.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index be2bde9b07cf..f4950fbfe574 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -57,50 +57,6 @@ early_param("initrd", early_initrd);
*/
struct meminfo meminfo;
-void show_mem(unsigned int filter)
-{
- int free = 0, total = 0, reserved = 0;
- int shared = 0, cached = 0, slab = 0, i;
- struct meminfo *mi = &meminfo;
-
- printk(KERN_DEFAULT "Mem-info:\n");
- show_free_areas(filter);
-
- for_each_bank(i, mi) {
- struct membank *bank = &mi->bank[i];
- unsigned int pfn1, pfn2;
- struct page *page, *end;
-
- pfn1 = bank_pfn_start(bank);
- pfn2 = bank_pfn_end(bank);
-
- page = pfn_to_page(pfn1);
- end = pfn_to_page(pfn2 - 1) + 1;
-
- do {
- total++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (PageSlab(page))
- slab++;
- else if (!page_count(page))
- free++;
- else
- shared += page_count(page) - 1;
- page++;
- } while (page < end);
- }
-
- printk(KERN_DEFAULT "%d pages of RAM\n", total);
- printk(KERN_DEFAULT "%d free pages\n", free);
- printk(KERN_DEFAULT "%d reserved pages\n", reserved);
- printk(KERN_DEFAULT "%d slab pages\n", slab);
- printk(KERN_DEFAULT "%d pages shared\n", shared);
- printk(KERN_DEFAULT "%d pages swap cached\n", cached);
-}
-
static void __init find_limits(unsigned long *min, unsigned long *max_low,
unsigned long *max_high)
{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 37f4373aa166..d6ec989a73c4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,6 +108,7 @@ config X86
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_CC_STACKPROTECTOR
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c4cba00dbdee..63c1d13aaf9f 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -74,14 +74,6 @@ config EFI_PGT_DUMP
issues with the mapping of the EFI runtime regions into that
table.
-config DEBUG_RODATA_TEST
- bool "Testcase for the marking rodata read-only"
- default y
- ---help---
- This option enables a testcase for the setting rodata read-only
- as well as for the change_page_attr() infrastructure.
- If in doubt, say "N"
-
config DEBUG_WX
bool "Warn on W+X mappings at boot"
select X86_PTDUMP_CORE
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10820f6cefbf..572cee3fccff 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -186,7 +186,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
- do_munmap(mm, text_start, image->size);
+ do_munmap(mm, text_start, image->size, NULL);
} else {
current->mm->context.vdso = (void __user *)text_start;
current->mm->context.vdso_image = image;
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 872877d930de..e7e1942edff7 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -90,18 +90,8 @@ void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
-extern const int rodata_test_data;
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
-#ifdef CONFIG_DEBUG_RODATA_TEST
-int rodata_test(void);
-#else
-static inline int rodata_test(void)
-{
- return 0;
-}
-#endif
-
#endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index d1d1e5094c28..200581691c6e 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -21,6 +21,12 @@
*
* See arch/x86/kernel/kprobes.c for x86 kprobes history.
*/
+
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0xcc
+
+#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
@@ -32,7 +38,6 @@ struct pt_regs;
struct kprobe;
typedef u8 kprobe_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0xcc
#define RELATIVEJUMP_OPCODE 0xe9
#define RELATIVEJUMP_SIZE 5
#define RELATIVECALL_OPCODE 0xe8
@@ -116,4 +121,6 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
extern int kprobe_int3_handler(struct pt_regs *regs);
extern int kprobe_debug_handler(struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
#endif /* _ASM_X86_KPROBES_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 1eea6ca40694..1b5c1725c630 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -475,6 +475,17 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
native_pmd_val(pmd));
}
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ if (sizeof(pudval_t) > sizeof(long))
+ /* 5 arg words */
+ pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
+ else
+ PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
+ native_pud_val(pud));
+}
+
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
pmdval_t val = native_pmd_val(pmd);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index bb2de45a60f2..b060f962d581 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -249,6 +249,8 @@ struct pv_mmu_ops {
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmdval);
+ void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pudval);
void (*pte_update)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index fd74a11959de..a8b96e708c2b 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -21,6 +21,10 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
*pmdp = pmd;
}
+static inline void native_set_pud(pud_t *pudp, pud_t pud)
+{
+}
+
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
native_set_pte(ptep, pte);
@@ -31,6 +35,10 @@ static inline void native_pmd_clear(pmd_t *pmdp)
native_set_pmd(pmdp, __pmd(0));
}
+static inline void native_pud_clear(pud_t *pudp)
+{
+}
+
static inline void native_pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *xp)
{
@@ -55,6 +63,15 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif
+#ifdef CONFIG_SMP
+static inline pud_t native_pudp_get_and_clear(pud_t *xp)
+{
+ return __pud(xchg((pudval_t *)xp, 0));
+}
+#else
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
+#endif
+
/* Bit manipulation helper on pte/pgoff entry */
static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift,
unsigned long mask, unsigned int leftshift)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index cdaa58c9b39e..50d35e3185f5 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,6 +121,10 @@ static inline void native_pmd_clear(pmd_t *pmd)
*(tmp + 1) = 0;
}
+static inline void native_pud_clear(pud_t *pudp)
+{
+}
+
static inline void pud_clear(pud_t *pudp)
{
set_pud(pudp, __pud(0));
@@ -176,6 +180,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif
+#ifdef CONFIG_SMP
+union split_pud {
+ struct {
+ u32 pud_low;
+ u32 pud_high;
+ };
+ pud_t pud;
+};
+
+static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
+{
+ union split_pud res, *orig = (union split_pud *)pudp;
+
+ /* xchg acts as a barrier before setting of the high bits */
+ res.pud_low = xchg(&orig->pud_low, 0);
+ res.pud_high = orig->pud_high;
+ orig->pud_high = 0;
+
+ return res.pud;
+}
+#else
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
+#endif
+
/* Encode and de-code a swap entry */
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
#define __swp_type(x) (((x).val) & 0x1f)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 437feb436efa..1cfb36b8c024 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -46,6 +46,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
+#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)
#define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte)
@@ -128,6 +129,16 @@ static inline int pmd_young(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_ACCESSED;
}
+static inline int pud_dirty(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_DIRTY;
+}
+
+static inline int pud_young(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_ACCESSED;
+}
+
static inline int pte_write(pte_t pte)
{
return pte_flags(pte) & _PAGE_RW;
@@ -181,6 +192,13 @@ static inline int pmd_trans_huge(pmd_t pmd)
return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
}
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline int pud_trans_huge(pud_t pud)
+{
+ return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+}
+#endif
+
#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
@@ -192,6 +210,18 @@ static inline int pmd_devmap(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_DEVMAP);
}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline int pud_devmap(pud_t pud)
+{
+ return !!(pud_val(pud) & _PAGE_DEVMAP);
+}
+#else
+static inline int pud_devmap(pud_t pud)
+{
+ return 0;
+}
+#endif
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -333,6 +363,65 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
}
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return __pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return __pud(v & ~clear);
+}
+
+static inline pud_t pud_mkold(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_ACCESSED);
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_DIRTY);
+}
+
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_RW);
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+}
+
+static inline pud_t pud_mkdevmap(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_DEVMAP);
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_PSE);
+}
+
+static inline pud_t pud_mkyoung(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_ACCESSED);
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_RW);
+}
+
+static inline pud_t pud_mknotpresent(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE);
+}
+
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline int pte_soft_dirty(pte_t pte)
{
@@ -344,6 +433,11 @@ static inline int pmd_soft_dirty(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
}
+static inline int pud_soft_dirty(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_SOFT_DIRTY;
+}
+
static inline pte_t pte_mksoft_dirty(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
@@ -354,6 +448,11 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}
+static inline pud_t pud_mksoft_dirty(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_SOFT_DIRTY);
+}
+
static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
@@ -364,6 +463,11 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
}
+static inline pud_t pud_clear_soft_dirty(pud_t pud)
+{
+ return pud_clear_flags(pud, _PAGE_SOFT_DIRTY);
+}
+
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
/*
@@ -392,6 +496,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
massage_pgprot(pgprot));
}
+static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
+{
+ return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
+ massage_pgprot(pgprot));
+}
+
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pteval_t val = pte_val(pte);
@@ -771,6 +881,14 @@ static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
return res;
}
+static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
+{
+ pud_t res = *pudp;
+
+ native_pud_clear(pudp);
+ return res;
+}
+
static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep , pte_t pte)
{
@@ -783,6 +901,12 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
native_set_pmd(pmdp, pmd);
}
+static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ native_set_pud(pudp, pud);
+}
+
#ifndef CONFIG_PARAVIRT
/*
* Rules for using pte_update - it must be called after any PTE update which
@@ -861,10 +985,15 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty);
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp);
+extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp);
#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
@@ -884,6 +1013,13 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long
return native_pmdp_get_and_clear(pmdp);
}
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ return native_pudp_get_and_clear(pudp);
+}
+
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
@@ -932,6 +1068,10 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd)
{
}
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud)
+{
+}
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 62b775926045..73c7ccc38912 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -106,6 +106,21 @@ static inline void native_pud_clear(pud_t *pud)
native_set_pud(pud, native_make_pud(0));
}
+static inline pud_t native_pudp_get_and_clear(pud_t *xp)
+{
+#ifdef CONFIG_SMP
+ return native_make_pud(xchg(&xp->pud, 0));
+#else
+ /* native_local_pudp_get_and_clear,
+ * but duplicated because of cyclic dependency
+ */
+ pud_t ret = *xp;
+
+ native_pud_clear(xp);
+ return ret;
+#endif
+}
+
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = pgd;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bdcdb3b3a219..84c00592d359 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,7 +100,6 @@ obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_APB_TIMER) += apb_timer.o
obj-$(CONFIG_AMD_NB) += amd_nb.o
-obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 307b1f4543de..2e3c34b1df37 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -338,6 +338,7 @@ void arch_crash_save_vmcoreinfo(void)
vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
kaslr_offset());
VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
+ VMCOREINFO_PHYS_BASE(phys_base);
}
/* arch-dependent functionality related to kexec file-based syscall */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a1bfba0f7234..4797e87b0fb6 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -425,6 +425,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.pmd_clear = native_pmd_clear,
#endif
.set_pud = native_set_pud,
+ .set_pud_at = native_set_pud_at,
.pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index d30c37750765..d5c223c9cf11 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -91,7 +91,8 @@ again:
page = NULL;
/* CMA can be used only in the context which permits sleeping */
if (gfpflags_allow_blocking(flag)) {
- page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ page = dma_alloc_from_contiguous(dev, count, get_order(size),
+ flag);
if (page && page_to_phys(page) + size > dma_mask) {
dma_release_from_contiguous(dev, page, count);
page = NULL;
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
deleted file mode 100644
index 222e84e2432e..000000000000
--- a/arch/x86/kernel/test_rodata.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * test_rodata.c: functional test for mark_rodata_ro function
- *
- * (C) Copyright 2008 Intel Corporation
- * Author: Arjan van de Ven <arjan@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <asm/cacheflush.h>
-#include <asm/sections.h>
-#include <asm/asm.h>
-
-int rodata_test(void)
-{
- unsigned long result;
- unsigned long start, end;
-
- /* test 1: read the value */
- /* If this test fails, some previous testrun has clobbered the state */
- if (!rodata_test_data) {
- printk(KERN_ERR "rodata_test: test 1 fails (start data)\n");
- return -ENODEV;
- }
-
- /* test 2: write to the variable; this should fault */
- /*
- * If this test fails, we managed to overwrite the data
- *
- * This is written in assembly to be able to catch the
- * exception that is supposed to happen in the correct
- * case
- */
-
- result = 1;
- asm volatile(
- "0: mov %[zero],(%[rodata_test])\n"
- " mov %[zero], %[rslt]\n"
- "1:\n"
- ".section .fixup,\"ax\"\n"
- "2: jmp 1b\n"
- ".previous\n"
- _ASM_EXTABLE(0b,2b)
- : [rslt] "=r" (result)
- : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
- );
-
-
- if (!result) {
- printk(KERN_ERR "rodata_test: test data was not read only\n");
- return -ENODEV;
- }
-
- /* test 3: check the value hasn't changed */
- /* If this test fails, we managed to overwrite the data */
- if (!rodata_test_data) {
- printk(KERN_ERR "rodata_test: Test 3 fails (end data)\n");
- return -ENODEV;
- }
- /* test 4: check if the rodata section is 4Kb aligned */
- start = (unsigned long)__start_rodata;
- end = (unsigned long)__end_rodata;
- if (start & (PAGE_SIZE - 1)) {
- printk(KERN_ERR "rodata_test: .rodata is not 4k aligned\n");
- return -ENODEV;
- }
- if (end & (PAGE_SIZE - 1)) {
- printk(KERN_ERR "rodata_test: .rodata end is not 4k aligned\n");
- return -ENODEV;
- }
-
- return 0;
-}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 928d657de829..2b4b53e6793f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -864,9 +864,6 @@ static noinline int do_test_wp_bit(void)
return flag;
}
-const int rodata_test_data = 0xC3;
-EXPORT_SYMBOL_GPL(rodata_test_data);
-
int kernel_set_to_readonly __read_mostly;
void set_kernel_text_rw(void)
@@ -939,7 +936,6 @@ void mark_rodata_ro(void)
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
size >> 10);
- rodata_test();
#ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index af85b686a7b0..a4880d8143c7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -650,6 +650,17 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
+ /*
+ * Only allow partial section hotplug for ZONE_DEVICE ranges,
+ * since register_new_memory() requires section alignment, and
+ * CONFIG_SPARSEMEM_VMEMMAP=n requires sections to be fully
+ * populated.
+ */
+ if ((!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) || !for_device)
+ && ((start & ~PA_SECTION_MASK)
+ || (size & ~PA_SECTION_MASK)))
+ return -EINVAL;
+
init_memory_mapping(start, start + size);
ret = __add_pages(nid, zone, start_pfn, nr_pages);
@@ -679,7 +690,7 @@ static void __meminit free_pagetable(struct page *page, int order)
if (PageReserved(page)) {
__ClearPageReserved(page);
- magic = (unsigned long)page->lru.next;
+ magic = (unsigned long)page->freelist;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
@@ -1000,9 +1011,6 @@ void __init mem_init(void)
mem_init_print_info(NULL);
}
-const int rodata_test_data = 0xC3;
-EXPORT_SYMBOL_GPL(rodata_test_data);
-
int kernel_set_to_readonly;
void set_kernel_text_rw(void)
@@ -1071,8 +1079,6 @@ void mark_rodata_ro(void)
all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
- rodata_test();
-
#ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
set_memory_rw(start, (end-start) >> PAGE_SHIFT);
@@ -1220,7 +1226,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
struct vmem_altmap *altmap = to_vmem_altmap(start);
int err;
- if (boot_cpu_has(X86_FEATURE_PSE))
+ if (end - start < PAGES_PER_SECTION * sizeof(struct page))
+ err = vmemmap_populate_basepages(start, end, node);
+ else if (boot_cpu_has(X86_FEATURE_PSE))
err = vmemmap_populate_hugepages(start, end, node, altmap);
else if (altmap) {
pr_err_once("%s: no cpu support for altmap allocations\n",
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index af59f808742f..c98079684bdb 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -51,7 +51,7 @@ static unsigned long mpx_mmap(unsigned long len)
down_write(&mm->mmap_sem);
addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
+ MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
up_write(&mm->mmap_sem);
if (populate)
mm_populate(addr, populate);
@@ -796,7 +796,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
return -EINVAL;
len = min(vma->vm_end, end) - addr;
- zap_page_range(vma, addr, len, NULL);
+ zap_page_range(vma, addr, len);
trace_mpx_unmap_zap(addr, addr+len);
vma = vma->vm_next;
@@ -893,7 +893,7 @@ static int unmap_entire_bt(struct mm_struct *mm,
* avoid recursion, do_munmap() will check whether it comes
* from one bounds table through VM_MPX flag.
*/
- return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
+ return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
}
static int try_unmap_single_bt(struct mm_struct *mm,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3feec5af4e67..6cbdff26bb96 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -445,6 +445,26 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
return changed;
}
+
+int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp, pud_t entry, int dirty)
+{
+ int changed = !pud_same(*pudp, entry);
+
+ VM_BUG_ON(address & ~HPAGE_PUD_MASK);
+
+ if (changed && dirty) {
+ *pudp = entry;
+ /*
+ * We had a write-protection fault here and changed the pud
+ * to to more permissive. No need to flush the TLB for that,
+ * #PF is architecturally guaranteed to do that and in the
+ * worst-case we'll generate a spurious fault.
+ */
+ }
+
+ return changed;
+}
#endif
int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -474,6 +494,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
+int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp)
+{
+ int ret = 0;
+
+ if (pud_young(*pudp))
+ ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+ (unsigned long *)pudp);
+
+ return ret;
+}
#endif
int ptep_clear_flush_young(struct vm_area_struct *vma,
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 9e9760b20be5..f41408c53fe1 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -31,3 +31,4 @@ generic-y += topology.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
generic-y += xor.h
+generic-y += kprobes.h
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 70e362e6038e..34c1f9fa6acc 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -158,7 +158,8 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
flag |= GFP_DMA;
if (gfpflags_allow_blocking(flag))
- page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ page = dma_alloc_from_contiguous(dev, count, get_order(size),
+ flag);
if (!page)
page = alloc_pages(flag, get_order(size));
diff --git a/block/genhd.c b/block/genhd.c
index 3631cd480295..412659daeed6 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -901,7 +901,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+ if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/block/ioprio.c b/block/ioprio.c
index 01b8116298a1..3790669232ff 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -122,14 +122,14 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
if (!user)
break;
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (!uid_eq(task_uid(p), uid) ||
!task_pid_vnr(p))
continue;
ret = set_task_ioprio(p, ioprio);
if (ret)
goto free_uid;
- } while_each_thread(g, p);
+ }
free_uid:
if (who)
free_uid(user);
@@ -222,7 +222,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
if (!user)
break;
- do_each_thread(g, p) {
+ for_each_process_thread(g, p) {
if (!uid_eq(task_uid(p), user->uid) ||
!task_pid_vnr(p))
continue;
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
ret = tmpio;
else
ret = ioprio_best(ret, tmpio);
- } while_each_thread(g, p);
+ }
if (who)
free_uid(user);
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2cc2976..71eff9b01b12 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen, void *ctx)
{
- size_t tmp_len = *dlen;
- int err;
+ int out_len = LZ4_compress_default(src, dst,
+ slen, *dlen, ctx);
- err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
- if (err < 0)
+ if (!out_len)
return -EINVAL;
- *dlen = tmp_len;
+ *dlen = out_len;
return 0;
}
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen, void *ctx)
{
- int err;
- size_t tmp_len = *dlen;
- size_t __slen = slen;
+ int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
- err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
- if (err < 0)
- return -EINVAL;
+ if (out_len < 0)
+ return out_len;
- *dlen = tmp_len;
- return err;
+ *dlen = out_len;
+ return 0;
}
static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a3f786..03a34a8109c0 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen, void *ctx)
{
- size_t tmp_len = *dlen;
- int err;
+ int out_len = LZ4_compress_HC(src, dst, slen,
+ *dlen, LZ4HC_DEFAULT_CLEVEL, ctx);
- err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
- if (err < 0)
+ if (!out_len)
return -EINVAL;
- *dlen = tmp_len;
+ *dlen = out_len;
return 0;
}
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen, void *ctx)
{
- int err;
- size_t tmp_len = *dlen;
- size_t __slen = slen;
+ int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
- err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
- if (err < 0)
- return -EINVAL;
+ if (out_len < 0)
+ return out_len;
- *dlen = tmp_len;
- return err;
+ *dlen = out_len;
+ return 0;
}
static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index f85e51cf7dcc..006ecc434351 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -34293,61 +34293,123 @@ static struct hash_testvec bfin_crc_tv_template[] = {
static struct comp_testvec lz4_comp_tv_template[] = {
{
- .inlen = 70,
- .outlen = 45,
- .input = "Join us now and share the software "
- "Join us now and share the software ",
- .output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
- "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
- "\x64\x20\x73\x68\x61\x72\x65\x20"
- "\x74\x68\x65\x20\x73\x6f\x66\x74"
- "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
- "\x77\x61\x72\x65\x20",
+ .inlen = 255,
+ .outlen = 218,
+ .input = "LZ4 is lossless compression algorithm, providing"
+ " compression speed at 400 MB/s per core, scalable "
+ "with multi-cores CPU. It features an extremely fast "
+ "decoder, with speed in multiple GB/s per core, "
+ "typically reaching RAM speed limits on multi-core "
+ "systems.",
+ .output = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+ "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+ "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+ "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+ "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+ "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+ "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+ "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+ "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+ "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+ "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+ "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+ "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+ "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+ "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83"
+ "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00"
+ "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e",
+
},
};
static struct comp_testvec lz4_decomp_tv_template[] = {
{
- .inlen = 45,
- .outlen = 70,
- .input = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
- "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
- "\x64\x20\x73\x68\x61\x72\x65\x20"
- "\x74\x68\x65\x20\x73\x6f\x66\x74"
- "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
- "\x77\x61\x72\x65\x20",
- .output = "Join us now and share the software "
- "Join us now and share the software ",
+ .inlen = 218,
+ .outlen = 255,
+ .input = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+ "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+ "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+ "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+ "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+ "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+ "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+ "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+ "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+ "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+ "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+ "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+ "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+ "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+ "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83"
+ "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00"
+ "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e",
+ .output = "LZ4 is lossless compression algorithm, providing"
+ " compression speed at 400 MB/s per core, scalable "
+ "with multi-cores CPU. It features an extremely fast "
+ "decoder, with speed in multiple GB/s per core, "
+ "typically reaching RAM speed limits on multi-core "
+ "systems.",
},
};
static struct comp_testvec lz4hc_comp_tv_template[] = {
{
- .inlen = 70,
- .outlen = 45,
- .input = "Join us now and share the software "
- "Join us now and share the software ",
- .output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
- "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
- "\x64\x20\x73\x68\x61\x72\x65\x20"
- "\x74\x68\x65\x20\x73\x6f\x66\x74"
- "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
- "\x77\x61\x72\x65\x20",
+ .inlen = 255,
+ .outlen = 216,
+ .input = "LZ4 is lossless compression algorithm, providing"
+ " compression speed at 400 MB/s per core, scalable "
+ "with multi-cores CPU. It features an extremely fast "
+ "decoder, with speed in multiple GB/s per core, "
+ "typically reaching RAM speed limits on multi-core "
+ "systems.",
+ .output = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+ "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+ "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+ "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+ "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+ "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+ "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+ "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+ "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+ "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+ "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+ "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+ "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+ "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+ "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97"
+ "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20"
+ "\x73\x79\x73\x74\x65\x6d\x73\x2e",
+
},
};
static struct comp_testvec lz4hc_decomp_tv_template[] = {
{
- .inlen = 45,
- .outlen = 70,
- .input = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
- "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
- "\x64\x20\x73\x68\x61\x72\x65\x20"
- "\x74\x68\x65\x20\x73\x6f\x66\x74"
- "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
- "\x77\x61\x72\x65\x20",
- .output = "Join us now and share the software "
- "Join us now and share the software ",
+ .inlen = 216,
+ .outlen = 255,
+ .input = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+ "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+ "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+ "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+ "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+ "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+ "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+ "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+ "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+ "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+ "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+ "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+ "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+ "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+ "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97"
+ "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20"
+ "\x73\x79\x73\x74\x65\x6d\x73\x2e",
+ .output = "LZ4 is lossless compression algorithm, providing"
+ " compression speed at 400 MB/s per core, scalable "
+ "with multi-cores CPU. It features an extremely fast "
+ "decoder, with speed in multiple GB/s per core, "
+ "typically reaching RAM speed limits on multi-core "
+ "systems.",
},
};
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 9451b762fa1c..2bbcdc6fdfee 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -657,7 +657,7 @@ free_range:
page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
if (vma)
zap_page_range(vma, (uintptr_t)page_addr +
- proc->user_buffer_offset, PAGE_SIZE, NULL);
+ proc->user_buffer_offset, PAGE_SIZE);
err_vm_insert_page_failed:
unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
err_map_kernel_failed:
@@ -3342,7 +3342,7 @@ static void binder_vma_close(struct vm_area_struct *vma)
binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES);
}
-static int binder_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int binder_vm_fault(struct vm_fault *vmf)
{
return VM_FAULT_SIGBUS;
}
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index e167a1e1bccb..b55804cac4c4 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -181,6 +181,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
* @dev: Pointer to device for which the allocation is performed.
* @count: Requested number of pages.
* @align: Requested alignment of pages (in PAGE_SIZE order).
+ * @gfp_mask: GFP flags to use for this allocation.
*
* This function allocates memory buffer for specified device. It uses
* device specific contiguous memory area if available or the default
@@ -188,12 +189,12 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
* function.
*/
struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
- unsigned int align)
+ unsigned int align, gfp_t gfp_mask)
{
if (align > CONFIG_CMA_ALIGNMENT)
align = CONFIG_CMA_ALIGNMENT;
- return cma_alloc(dev_get_cma_area(dev), count, align);
+ return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask);
}
/**
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index fa26ffd25fa6..fb381464e9cb 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -685,24 +685,16 @@ static int add_memory_block(int base_section_nr)
return 0;
}
-static bool is_zone_device_section(struct mem_section *ms)
-{
- struct page *page;
-
- page = sparse_decode_mem_map(ms->section_mem_map, __section_nr(ms));
- return is_zone_device_page(page);
-}
-
/*
* need an interface for the VM to add new memory regions,
* but without onlining it.
*/
-int register_new_memory(int nid, struct mem_section *section)
+int register_new_memory(struct zone *zone, int nid, struct mem_section *section)
{
int ret = 0;
struct memory_block *mem;
- if (is_zone_device_section(section))
+ if (is_dev_zone(zone))
return 0;
mutex_lock(&mem_sysfs_mutex);
@@ -736,14 +728,11 @@ unregister_memory(struct memory_block *memory)
device_unregister(&memory->dev);
}
-static int remove_memory_section(unsigned long node_id,
- struct mem_section *section, int phys_device)
+static int remove_memory_section(struct zone *zone, unsigned long node_id,
+ struct mem_section *section, int phys_device)
{
struct memory_block *mem;
- if (is_zone_device_section(section))
- return 0;
-
mutex_lock(&mem_sysfs_mutex);
mem = find_memory_block(section);
unregister_mem_sect_under_nodes(mem, __section_nr(section));
@@ -758,12 +747,15 @@ static int remove_memory_section(unsigned long node_id,
return 0;
}
-int unregister_memory_section(struct mem_section *section)
+int unregister_memory_section(struct zone *zone, struct mem_section *section)
{
+ if (is_dev_zone(zone))
+ return 0;
+
if (!present_section(section))
return -EINVAL;
- return remove_memory_section(0, section, 0);
+ return remove_memory_section(zone, 0, section, 0);
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 3cd7856156b4..e27d89a36c34 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -45,27 +45,6 @@ static const char *default_compressor = "lzo";
/* Module params (documentation at end) */
static unsigned int num_devices = 1;
-static inline void deprecated_attr_warn(const char *name)
-{
- pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
- task_pid_nr(current),
- current->comm,
- name,
- "See zram documentation.");
-}
-
-#define ZRAM_ATTR_RO(name) \
-static ssize_t name##_show(struct device *d, \
- struct device_attribute *attr, char *b) \
-{ \
- struct zram *zram = dev_to_zram(d); \
- \
- deprecated_attr_warn(__stringify(name)); \
- return scnprintf(b, PAGE_SIZE, "%llu\n", \
- (u64)atomic64_read(&zram->stats.name)); \
-} \
-static DEVICE_ATTR_RO(name);
-
static inline bool init_done(struct zram *zram)
{
return zram->disksize;
@@ -95,6 +74,17 @@ static void zram_clear_flag(struct zram_meta *meta, u32 index,
meta->table[index].value &= ~BIT(flag);
}
+static inline void zram_set_element(struct zram_meta *meta, u32 index,
+ unsigned long element)
+{
+ meta->table[index].element = element;
+}
+
+static inline void zram_clear_element(struct zram_meta *meta, u32 index)
+{
+ meta->table[index].element = 0;
+}
+
static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
{
return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
@@ -167,31 +157,46 @@ static inline void update_used_max(struct zram *zram,
} while (old_max != cur_max);
}
-static bool page_zero_filled(void *ptr)
+static inline void zram_fill_page(char *ptr, unsigned long len,
+ unsigned long value)
+{
+ int i;
+ unsigned long *page = (unsigned long *)ptr;
+
+ WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
+
+ if (likely(value == 0)) {
+ memset(ptr, 0, len);
+ } else {
+ for (i = 0; i < len / sizeof(*page); i++)
+ page[i] = value;
+ }
+}
+
+static bool page_same_filled(void *ptr, unsigned long *element)
{
unsigned int pos;
unsigned long *page;
page = (unsigned long *)ptr;
- for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
- if (page[pos])
+ for (pos = 0; pos < PAGE_SIZE / sizeof(*page) - 1; pos++) {
+ if (page[pos] != page[pos + 1])
return false;
}
+ *element = page[pos];
+
return true;
}
-static void handle_zero_page(struct bio_vec *bvec)
+static void handle_same_page(struct bio_vec *bvec, unsigned long element)
{
struct page *page = bvec->bv_page;
void *user_mem;
user_mem = kmap_atomic(page);
- if (is_partial_io(bvec))
- memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
- else
- clear_page(user_mem);
+ zram_fill_page(user_mem + bvec->bv_offset, bvec->bv_len, element);
kunmap_atomic(user_mem);
flush_dcache_page(page);
@@ -218,47 +223,6 @@ static ssize_t disksize_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
}
-static ssize_t orig_data_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- deprecated_attr_warn("orig_data_size");
- return scnprintf(buf, PAGE_SIZE, "%llu\n",
- (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
-}
-
-static ssize_t mem_used_total_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u64 val = 0;
- struct zram *zram = dev_to_zram(dev);
-
- deprecated_attr_warn("mem_used_total");
- down_read(&zram->init_lock);
- if (init_done(zram)) {
- struct zram_meta *meta = zram->meta;
- val = zs_get_total_pages(meta->mem_pool);
- }
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
-}
-
-static ssize_t mem_limit_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u64 val;
- struct zram *zram = dev_to_zram(dev);
-
- deprecated_attr_warn("mem_limit");
- down_read(&zram->init_lock);
- val = zram->limit_pages;
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
-}
-
static ssize_t mem_limit_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
@@ -277,21 +241,6 @@ static ssize_t mem_limit_store(struct device *dev,
return len;
}
-static ssize_t mem_used_max_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u64 val = 0;
- struct zram *zram = dev_to_zram(dev);
-
- deprecated_attr_warn("mem_used_max");
- down_read(&zram->init_lock);
- if (init_done(zram))
- val = atomic_long_read(&zram->stats.max_used_pages);
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
-}
-
static ssize_t mem_used_max_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
@@ -440,7 +389,7 @@ static ssize_t mm_stat_show(struct device *dev,
mem_used << PAGE_SHIFT,
zram->limit_pages << PAGE_SHIFT,
max_used << PAGE_SHIFT,
- (u64)atomic64_read(&zram->stats.zero_pages),
+ (u64)atomic64_read(&zram->stats.same_pages),
pool_stats.pages_compacted);
up_read(&zram->init_lock);
@@ -467,26 +416,6 @@ static ssize_t debug_stat_show(struct device *dev,
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
static DEVICE_ATTR_RO(debug_stat);
-ZRAM_ATTR_RO(num_reads);
-ZRAM_ATTR_RO(num_writes);
-ZRAM_ATTR_RO(failed_reads);
-ZRAM_ATTR_RO(failed_writes);
-ZRAM_ATTR_RO(invalid_io);
-ZRAM_ATTR_RO(notify_free);
-ZRAM_ATTR_RO(zero_pages);
-ZRAM_ATTR_RO(compr_data_size);
-
-static inline bool zram_meta_get(struct zram *zram)
-{
- if (atomic_inc_not_zero(&zram->refcount))
- return true;
- return false;
-}
-
-static inline void zram_meta_put(struct zram *zram)
-{
- atomic_dec(&zram->refcount);
-}
static void zram_meta_free(struct zram_meta *meta, u64 disksize)
{
@@ -496,8 +425,11 @@ static void zram_meta_free(struct zram_meta *meta, u64 disksize)
/* Free all pages that are still in this zram device */
for (index = 0; index < num_pages; index++) {
unsigned long handle = meta->table[index].handle;
-
- if (!handle)
+ /*
+ * No memory is allocated for same element filled pages.
+ * Simply clear same page flag.
+ */
+ if (!handle || zram_test_flag(meta, index, ZRAM_SAME))
continue;
zs_free(meta->mem_pool, handle);
@@ -547,18 +479,20 @@ static void zram_free_page(struct zram *zram, size_t index)
struct zram_meta *meta = zram->meta;
unsigned long handle = meta->table[index].handle;
- if (unlikely(!handle)) {
- /*
- * No memory is allocated for zero filled pages.
- * Simply clear zero page flag.
- */
- if (zram_test_flag(meta, index, ZRAM_ZERO)) {
- zram_clear_flag(meta, index, ZRAM_ZERO);
- atomic64_dec(&zram->stats.zero_pages);
- }
+ /*
+ * No memory is allocated for same element filled pages.
+ * Simply clear same page flag.
+ */
+ if (zram_test_flag(meta, index, ZRAM_SAME)) {
+ zram_clear_flag(meta, index, ZRAM_SAME);
+ zram_clear_element(meta, index);
+ atomic64_dec(&zram->stats.same_pages);
return;
}
+ if (!handle)
+ return;
+
zs_free(meta->mem_pool, handle);
atomic64_sub(zram_get_obj_size(meta, index),
@@ -581,9 +515,9 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
handle = meta->table[index].handle;
size = zram_get_obj_size(meta, index);
- if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
+ if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) {
bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- clear_page(mem);
+ zram_fill_page(mem, PAGE_SIZE, meta->table[index].element);
return 0;
}
@@ -619,9 +553,9 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
if (unlikely(!meta->table[index].handle) ||
- zram_test_flag(meta, index, ZRAM_ZERO)) {
+ zram_test_flag(meta, index, ZRAM_SAME)) {
bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- handle_zero_page(bvec);
+ handle_same_page(bvec, meta->table[index].element);
return 0;
}
bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
@@ -669,6 +603,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
struct zram_meta *meta = zram->meta;
struct zcomp_strm *zstrm = NULL;
unsigned long alloced_pages;
+ unsigned long element;
page = bvec->bv_page;
if (is_partial_io(bvec)) {
@@ -697,16 +632,17 @@ compress_again:
uncmem = user_mem;
}
- if (page_zero_filled(uncmem)) {
+ if (page_same_filled(uncmem, &element)) {
if (user_mem)
kunmap_atomic(user_mem);
/* Free memory associated with this sector now. */
bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
- zram_set_flag(meta, index, ZRAM_ZERO);
+ zram_set_flag(meta, index, ZRAM_SAME);
+ zram_set_element(meta, index, element);
bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- atomic64_inc(&zram->stats.zero_pages);
+ atomic64_inc(&zram->stats.same_pages);
ret = 0;
goto out;
}
@@ -944,22 +880,17 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
{
struct zram *zram = queue->queuedata;
- if (unlikely(!zram_meta_get(zram)))
- goto error;
-
blk_queue_split(queue, &bio, queue->bio_split);
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size)) {
atomic64_inc(&zram->stats.invalid_io);
- goto put_zram;
+ goto error;
}
__zram_make_request(zram, bio);
- zram_meta_put(zram);
return BLK_QC_T_NONE;
-put_zram:
- zram_meta_put(zram);
+
error:
bio_io_error(bio);
return BLK_QC_T_NONE;
@@ -989,13 +920,11 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct bio_vec bv;
zram = bdev->bd_disk->private_data;
- if (unlikely(!zram_meta_get(zram)))
- goto out;
if (!valid_io_request(zram, sector, PAGE_SIZE)) {
atomic64_inc(&zram->stats.invalid_io);
err = -EINVAL;
- goto put_zram;
+ goto out;
}
index = sector >> SECTORS_PER_PAGE_SHIFT;
@@ -1006,8 +935,6 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_offset = 0;
err = zram_bvec_rw(zram, &bv, index, offset, is_write);
-put_zram:
- zram_meta_put(zram);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
@@ -1040,17 +967,6 @@ static void zram_reset_device(struct zram *zram)
meta = zram->meta;
comp = zram->comp;
disksize = zram->disksize;
- /*
- * Refcount will go down to 0 eventually and r/w handler
- * cannot handle further I/O so it will bail out by
- * check zram_meta_get.
- */
- zram_meta_put(zram);
- /*
- * We want to free zram_meta in process context to avoid
- * deadlock between reclaim path and any other locks.
- */
- wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
/* Reset stats */
memset(&zram->stats, 0, sizeof(zram->stats));
@@ -1098,8 +1014,6 @@ static ssize_t disksize_store(struct device *dev,
goto out_destroy_comp;
}
- init_waitqueue_head(&zram->io_done);
- atomic_set(&zram->refcount, 1);
zram->meta = meta;
zram->comp = comp;
zram->disksize = disksize;
@@ -1188,10 +1102,8 @@ static DEVICE_ATTR_WO(compact);
static DEVICE_ATTR_RW(disksize);
static DEVICE_ATTR_RO(initstate);
static DEVICE_ATTR_WO(reset);
-static DEVICE_ATTR_RO(orig_data_size);
-static DEVICE_ATTR_RO(mem_used_total);
-static DEVICE_ATTR_RW(mem_limit);
-static DEVICE_ATTR_RW(mem_used_max);
+static DEVICE_ATTR_WO(mem_limit);
+static DEVICE_ATTR_WO(mem_used_max);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
@@ -1199,17 +1111,7 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
&dev_attr_initstate.attr,
&dev_attr_reset.attr,
- &dev_attr_num_reads.attr,
- &dev_attr_num_writes.attr,
- &dev_attr_failed_reads.attr,
- &dev_attr_failed_writes.attr,
&dev_attr_compact.attr,
- &dev_attr_invalid_io.attr,
- &dev_attr_notify_free.attr,
- &dev_attr_zero_pages.attr,
- &dev_attr_orig_data_size.attr,
- &dev_attr_compr_data_size.attr,
- &dev_attr_mem_used_total.attr,
&dev_attr_mem_limit.attr,
&dev_attr_mem_used_max.attr,
&dev_attr_max_comp_streams.attr,
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 74fcf10da374..caeff51f1571 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -61,7 +61,7 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/* Flags for zram pages (table[page_no].value) */
enum zram_pageflags {
/* Page consists entirely of zeros */
- ZRAM_ZERO = ZRAM_FLAG_SHIFT,
+ ZRAM_SAME = ZRAM_FLAG_SHIFT,
ZRAM_ACCESS, /* page is now accessed */
__NR_ZRAM_PAGEFLAGS,
@@ -71,7 +71,10 @@ enum zram_pageflags {
/* Allocated for each disk page */
struct zram_table_entry {
- unsigned long handle;
+ union {
+ unsigned long handle;
+ unsigned long element;
+ };
unsigned long value;
};
@@ -83,7 +86,7 @@ struct zram_stats {
atomic64_t failed_writes; /* can happen when memory is too low */
atomic64_t invalid_io; /* non-page-aligned I/O requests */
atomic64_t notify_free; /* no. of swap slot free notifications */
- atomic64_t zero_pages; /* no. of zero filled pages */
+ atomic64_t same_pages; /* no. of same element filled pages */
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
atomic64_t writestall; /* no. of write slow paths */
@@ -106,9 +109,6 @@ struct zram {
unsigned long limit_pages;
struct zram_stats stats;
- atomic_t refcount; /* refcount for zram_meta */
- /* wait all IO under all of cpu are done */
- wait_queue_head_t io_done;
/*
* This is the limit on amount of *uncompressed* worth of data
* we can store in a disk.
diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c
index 737187865269..53fe633df1e8 100644
--- a/drivers/char/agp/alpha-agp.c
+++ b/drivers/char/agp/alpha-agp.c
@@ -11,15 +11,14 @@
#include "agp.h"
-static int alpha_core_agp_vm_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int alpha_core_agp_vm_fault(struct vm_fault *vmf)
{
alpha_agp_info *agp = agp_bridge->dev_private_data;
dma_addr_t dma_addr;
unsigned long pa;
struct page *page;
- dma_addr = vmf->address - vma->vm_start + agp->aperture.bus_base;
+ dma_addr = vmf->address - vmf->vma->vm_start + agp->aperture.bus_base;
pa = agp->ops->translate(agp, dma_addr);
if (pa == (unsigned long)-EINVAL)
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index a697ca0cab1e..a9c2fa3c81e5 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -191,12 +191,12 @@ mspec_close(struct vm_area_struct *vma)
* Creates a mspec page and maps it to user space.
*/
static int
-mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+mspec_fault(struct vm_fault *vmf)
{
unsigned long paddr, maddr;
unsigned long pfn;
pgoff_t index = vmf->pgoff;
- struct vma_data *vdata = vma->vm_private_data;
+ struct vma_data *vdata = vmf->vma->vm_private_data;
maddr = (volatile unsigned long) vdata->maddr[index];
if (maddr == 0) {
@@ -227,7 +227,7 @@ mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
* be because another thread has installed the pte first, so it
* is no problem.
*/
- vm_insert_pfn(vma, vmf->address, pfn);
+ vm_insert_pfn(vmf->vma, vmf->address, pfn);
return VM_FAULT_NOPAGE;
}
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index ed758b74ddf0..b75c77254fdb 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -419,8 +419,7 @@ static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
return -1;
}
-static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
{
struct device *dev = &dax_dev->dev;
struct dax_region *dax_region;
@@ -428,7 +427,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
phys_addr_t phys;
pfn_t pfn;
- if (check_vma(dax_dev, vma, __func__))
+ if (check_vma(dax_dev, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
dax_region = dax_dev->region;
@@ -446,7 +445,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
- rc = vm_insert_mixed(vma, vmf->address, pfn);
+ rc = vm_insert_mixed(vmf->vma, vmf->address, pfn);
if (rc == -ENOMEM)
return VM_FAULT_OOM;
@@ -456,50 +455,71 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
return VM_FAULT_NOPAGE;
}
-static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
{
- int rc;
- struct file *filp = vma->vm_file;
- struct dax_dev *dax_dev = filp->private_data;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct device *dev = &dax_dev->dev;
+ struct dax_region *dax_region;
+ phys_addr_t phys;
+ pgoff_t pgoff;
+ pfn_t pfn;
- dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
- current->comm, (vmf->flags & FAULT_FLAG_WRITE)
- ? "write" : "read", vma->vm_start, vma->vm_end);
- rcu_read_lock();
- rc = __dax_dev_fault(dax_dev, vma, vmf);
- rcu_read_unlock();
+ if (check_vma(dax_dev, vmf->vma, __func__))
+ return VM_FAULT_SIGBUS;
- return rc;
+ dax_region = dax_dev->region;
+ if (dax_region->align > PMD_SIZE) {
+ dev_dbg(dev, "%s: alignment > fault size\n", __func__);
+ return VM_FAULT_SIGBUS;
+ }
+
+ /* dax pmd mappings require pfn_t_devmap() */
+ if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
+ dev_dbg(dev, "%s: alignment > fault size\n", __func__);
+ return VM_FAULT_SIGBUS;
+ }
+
+ pgoff = linear_page_index(vmf->vma, pmd_addr);
+ phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
+ if (phys == -1) {
+ dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+ pgoff);
+ return VM_FAULT_SIGBUS;
+ }
+
+ pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
+
+ return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn,
+ vmf->flags & FAULT_FLAG_WRITE);
}
-static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
- struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd,
- unsigned int flags)
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
{
- unsigned long pmd_addr = addr & PMD_MASK;
+ unsigned long pud_addr = vmf->address & PUD_MASK;
struct device *dev = &dax_dev->dev;
struct dax_region *dax_region;
phys_addr_t phys;
pgoff_t pgoff;
pfn_t pfn;
- if (check_vma(dax_dev, vma, __func__))
+ if (check_vma(dax_dev, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
dax_region = dax_dev->region;
- if (dax_region->align > PMD_SIZE) {
+ if (dax_region->align > PUD_SIZE) {
dev_dbg(dev, "%s: alignment > fault size\n", __func__);
return VM_FAULT_SIGBUS;
}
- /* dax pmd mappings require pfn_t_devmap() */
+ /* dax pud mappings require pfn_t_devmap() */
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
dev_dbg(dev, "%s: alignment > fault size\n", __func__);
return VM_FAULT_SIGBUS;
}
- pgoff = linear_page_index(vma, pmd_addr);
- phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
+ pgoff = linear_page_index(vmf->vma, pud_addr);
+ phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
if (phys == -1) {
dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
pgoff);
@@ -508,31 +528,55 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
- return vmf_insert_pfn_pmd(vma, addr, pmd, pfn,
- flags & FAULT_FLAG_WRITE);
+ return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, pfn,
+ vmf->flags & FAULT_FLAG_WRITE);
+}
+#else
+static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
+{
+ return VM_FAULT_FALLBACK;
}
+#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, unsigned int flags)
+static int dax_dev_huge_fault(struct vm_fault *vmf,
+ enum page_entry_size pe_size)
{
int rc;
- struct file *filp = vma->vm_file;
+ struct file *filp = vmf->vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
- current->comm, (flags & FAULT_FLAG_WRITE)
- ? "write" : "read", vma->vm_start, vma->vm_end);
+ current->comm, (vmf->flags & FAULT_FLAG_WRITE)
+ ? "write" : "read",
+ vmf->vma->vm_start, vmf->vma->vm_end);
rcu_read_lock();
- rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
+ switch (pe_size) {
+ case PE_SIZE_PTE:
+ rc = __dax_dev_pte_fault(dax_dev, vmf);
+ break;
+ case PE_SIZE_PMD:
+ rc = __dax_dev_pmd_fault(dax_dev, vmf);
+ break;
+ case PE_SIZE_PUD:
+ rc = __dax_dev_pud_fault(dax_dev, vmf);
+ break;
+ default:
+ return VM_FAULT_FALLBACK;
+ }
rcu_read_unlock();
return rc;
}
+static int dax_dev_fault(struct vm_fault *vmf)
+{
+ return dax_dev_huge_fault(vmf, PE_SIZE_PTE);
+}
+
static const struct vm_operations_struct dax_dev_vm_ops = {
.fault = dax_dev_fault,
- .pmd_fault = dax_dev_pmd_fault,
+ .huge_fault = dax_dev_huge_fault,
};
static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 560d416deab2..1597458d884e 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -14,14 +14,15 @@
#include <drm/armada_drm.h>
#include "armada_ioctlP.h"
-static int armada_gem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int armada_gem_vm_fault(struct vm_fault *vmf)
{
- struct armada_gem_object *obj = drm_to_armada_gem(vma->vm_private_data);
+ struct drm_gem_object *gobj = vmf->vma->vm_private_data;
+ struct armada_gem_object *obj = drm_to_armada_gem(gobj);
unsigned long pfn = obj->phys_addr >> PAGE_SHIFT;
int ret;
- pfn += (vmf->address - vma->vm_start) >> PAGE_SHIFT;
- ret = vm_insert_pfn(vma, vmf->address, pfn);
+ pfn += (vmf->address - vmf->vma->vm_start) >> PAGE_SHIFT;
+ ret = vm_insert_pfn(vmf->vma, vmf->address, pfn);
switch (ret) {
case 0:
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index bd311c77c254..1170b3209a12 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -96,8 +96,9 @@ static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma)
* map, get the page, increment the use count and return it.
*/
#if IS_ENABLED(CONFIG_AGP)
-static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_file *priv = vma->vm_file->private_data;
struct drm_device *dev = priv->minor->dev;
struct drm_local_map *map = NULL;
@@ -168,7 +169,7 @@ vm_fault_error:
return VM_FAULT_SIGBUS; /* Disallow mremap */
}
#else
-static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_fault(struct vm_fault *vmf)
{
return VM_FAULT_SIGBUS;
}
@@ -184,8 +185,9 @@ static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
* Get the mapping, find the real physical page to map, get the page, and
* return it.
*/
-static int drm_do_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_shm_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_local_map *map = vma->vm_private_data;
unsigned long offset;
unsigned long i;
@@ -280,14 +282,14 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
/**
* \c fault method for DMA virtual memory.
*
- * \param vma virtual memory area.
* \param address access address.
* \return pointer to the page structure.
*
* Determine the page number from the page offset and get it from drm_device_dma::pagelist.
*/
-static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_dma_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_file *priv = vma->vm_file->private_data;
struct drm_device *dev = priv->minor->dev;
struct drm_device_dma *dma = dev->dma;
@@ -315,14 +317,14 @@ static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
/**
* \c fault method for scatter-gather virtual memory.
*
- * \param vma virtual memory area.
* \param address access address.
* \return pointer to the page structure.
*
* Determine the map offset from the page offset and get it from drm_sg_mem::pagelist.
*/
-static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_sg_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_local_map *map = vma->vm_private_data;
struct drm_file *priv = vma->vm_file->private_data;
struct drm_device *dev = priv->minor->dev;
@@ -347,26 +349,6 @@ static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return 0;
}
-static int drm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- return drm_do_vm_fault(vma, vmf);
-}
-
-static int drm_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- return drm_do_vm_shm_fault(vma, vmf);
-}
-
-static int drm_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- return drm_do_vm_dma_fault(vma, vmf);
-}
-
-static int drm_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- return drm_do_vm_sg_fault(vma, vmf);
-}
-
/** AGP virtual memory operations */
static const struct vm_operations_struct drm_vm_ops = {
.fault = drm_vm_fault,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index c255eda40526..e41f38667c1c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -73,7 +73,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
struct drm_file *file);
int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int etnaviv_gem_fault(struct vm_fault *vmf);
int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset);
struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj);
void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index aa6e35ddc87f..e78f1406885d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -175,8 +175,9 @@ int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma)
return obj->ops->mmap(obj, vma);
}
-int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int etnaviv_gem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
struct page **pages, *page;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 57b81460fec8..4c28f7ffcc4d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -447,8 +447,9 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
return ret;
}
-int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int exynos_drm_gem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
unsigned long pfn;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index df7c543d6558..85457255fcd1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -116,7 +116,7 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
uint64_t *offset);
/* page fault handler and mmap fault address(virtual) to physical memory. */
-int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int exynos_drm_gem_fault(struct vm_fault *vmf);
/* set vm_flags and we can change the vm attribute to other one at here. */
int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index da42d2e1d397..ffe6b4ffa1a8 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -111,8 +111,9 @@ static int psbfb_pan(struct fb_var_screeninfo *var, struct fb_info *info)
return 0;
}
-static int psbfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int psbfb_vm_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct psb_framebuffer *psbfb = vma->vm_private_data;
struct drm_device *dev = psbfb->base.dev;
struct drm_psb_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
index 527c62917660..7da061aab729 100644
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -164,8 +164,9 @@ int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
* vma->vm_private_data points to the GEM object that is backing this
* mapping.
*/
-int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int psb_gem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj;
struct gtt_range *r;
int ret;
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index 05d7aaf47eea..83e22fd4cfc0 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -752,7 +752,7 @@ extern int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
struct drm_mode_create_dumb *args);
extern int psb_gem_dumb_map_gtt(struct drm_file *file, struct drm_device *dev,
uint32_t handle, uint64_t *offset);
-extern int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int psb_gem_fault(struct vm_fault *vmf);
/* psb_device.c */
extern const struct psb_ops psb_chip_ops;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bcc81912b5e5..0a4b42d31391 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3352,7 +3352,7 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
unsigned int flags);
int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
void i915_gem_resume(struct drm_i915_private *dev_priv);
-int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int i915_gem_fault(struct vm_fault *vmf);
int i915_gem_object_wait(struct drm_i915_gem_object *obj,
unsigned int flags,
long timeout,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 88f3628b4e29..6908123162d1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1772,7 +1772,6 @@ compute_partial_view(struct drm_i915_gem_object *obj,
/**
* i915_gem_fault - fault a page into the GTT
- * @area: CPU VMA in question
* @vmf: fault info
*
* The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
@@ -1789,9 +1788,10 @@ compute_partial_view(struct drm_i915_gem_object *obj,
* The current feature set supported by i915_gem_fault() and thus GTT mmaps
* is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
*/
-int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
+int i915_gem_fault(struct vm_fault *vmf)
{
#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
+ struct vm_area_struct *area = vmf->vma;
struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index cdd7b2f8e977..c3b14876edaa 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -206,7 +206,7 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev);
int msm_gem_mmap_obj(struct drm_gem_object *obj,
struct vm_area_struct *vma);
int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int msm_gem_fault(struct vm_fault *vmf);
uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj);
int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id,
uint64_t *iova);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index e140b05af134..59811f29607d 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -191,8 +191,9 @@ int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
return msm_gem_mmap_obj(vma->vm_private_data, vma);
}
-int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int msm_gem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct drm_device *dev = obj->dev;
struct msm_drm_private *priv = dev->dev_private;
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 36d93ce84a29..65977982f15f 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -188,7 +188,7 @@ int omap_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
int omap_gem_mmap(struct file *filp, struct vm_area_struct *vma);
int omap_gem_mmap_obj(struct drm_gem_object *obj,
struct vm_area_struct *vma);
-int omap_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int omap_gem_fault(struct vm_fault *vmf);
int omap_gem_op_start(struct drm_gem_object *obj, enum omap_gem_op op);
int omap_gem_op_finish(struct drm_gem_object *obj, enum omap_gem_op op);
int omap_gem_op_sync(struct drm_gem_object *obj, enum omap_gem_op op);
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 74a9968df421..5d5a9f517c30 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -518,7 +518,6 @@ static int fault_2d(struct drm_gem_object *obj,
/**
* omap_gem_fault - pagefault handler for GEM objects
- * @vma: the VMA of the GEM object
* @vmf: fault detail
*
* Invoked when a fault occurs on an mmap of a GEM managed area. GEM
@@ -529,8 +528,9 @@ static int fault_2d(struct drm_gem_object *obj,
* vma->vm_private_data points to the GEM object that is backing this
* mapping.
*/
-int omap_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int omap_gem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct omap_gem_object *omap_obj = to_omap_bo(obj);
struct drm_device *dev = obj->dev;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 4e1a40389964..7d1cab57c89e 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -105,15 +105,15 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev)
static struct vm_operations_struct qxl_ttm_vm_ops;
static const struct vm_operations_struct *ttm_vm_ops;
-static int qxl_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int qxl_ttm_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo;
int r;
- bo = (struct ttm_buffer_object *)vma->vm_private_data;
+ bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
if (bo == NULL)
return VM_FAULT_NOPAGE;
- r = ttm_vm_ops->fault(vma, vmf);
+ r = ttm_vm_ops->fault(vmf);
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 7a10b3852970..684f1703aa5c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -979,19 +979,19 @@ void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
static struct vm_operations_struct radeon_ttm_vm_ops;
static const struct vm_operations_struct *ttm_vm_ops = NULL;
-static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int radeon_ttm_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo;
struct radeon_device *rdev;
int r;
- bo = (struct ttm_buffer_object *)vma->vm_private_data;
+ bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
if (bo == NULL) {
return VM_FAULT_NOPAGE;
}
rdev = radeon_get_rdev(bo->bdev);
down_read(&rdev->pm.mclk_lock);
- r = ttm_vm_ops->fault(vma, vmf);
+ r = ttm_vm_ops->fault(vmf);
up_read(&rdev->pm.mclk_lock);
return r;
}
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index b523a5d4a38c..17e62ecb5d4d 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -441,8 +441,9 @@ int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
return 0;
}
-static int tegra_bo_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int tegra_bo_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *gem = vma->vm_private_data;
struct tegra_bo *bo = to_tegra_bo(gem);
struct page *page;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 68ef993ab431..247986a1d097 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -43,7 +43,6 @@
#define TTM_BO_VM_NUM_PREFAULT 16
static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
int ret = 0;
@@ -66,7 +65,7 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
goto out_unlock;
- up_read(&vma->vm_mm->mmap_sem);
+ up_read(&vmf->vma->vm_mm->mmap_sem);
(void) dma_fence_wait(bo->moving, true);
goto out_unlock;
}
@@ -89,8 +88,9 @@ out_unlock:
return ret;
}
-static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ttm_bo_vm_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
vma->vm_private_data;
struct ttm_bo_device *bdev = bo->bdev;
@@ -163,7 +163,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
* Wait for buffer data in transit, due to a pipelined
* move.
*/
- ret = ttm_bo_vm_fault_idle(bo, vma, vmf);
+ ret = ttm_bo_vm_fault_idle(bo, vmf);
if (unlikely(ret != 0)) {
retval = ret;
goto out_unlock;
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index 6c4286e57362..2a75ab80527a 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -134,7 +134,7 @@ void udl_gem_put_pages(struct udl_gem_object *obj);
int udl_gem_vmap(struct udl_gem_object *obj);
void udl_gem_vunmap(struct udl_gem_object *obj);
int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int udl_gem_fault(struct vm_fault *vmf);
int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
int width, int height);
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index 3c0c4bd3f750..775c50e4f02c 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -100,8 +100,9 @@ int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
return ret;
}
-int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int udl_gem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct udl_gem_object *obj = to_udl_bo(vma->vm_private_data);
struct page *page;
unsigned int page_offset;
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 477e07f0ecb6..7ccbb03e98de 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -50,8 +50,9 @@ static void vgem_gem_free_object(struct drm_gem_object *obj)
kfree(vgem_obj);
}
-static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int vgem_gem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct drm_vgem_gem_object *obj = vma->vm_private_data;
/* We don't use vmf->pgoff since that has the fake offset */
unsigned long vaddr = vmf->address;
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index 9cc7079f7aca..70ec8ca8d9b1 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -114,18 +114,17 @@ static void virtio_gpu_ttm_global_fini(struct virtio_gpu_device *vgdev)
static struct vm_operations_struct virtio_gpu_ttm_vm_ops;
static const struct vm_operations_struct *ttm_vm_ops;
-static int virtio_gpu_ttm_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int virtio_gpu_ttm_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo;
struct virtio_gpu_device *vgdev;
int r;
- bo = (struct ttm_buffer_object *)vma->vm_private_data;
+ bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
if (bo == NULL)
return VM_FAULT_NOPAGE;
vgdev = virtio_gpu_get_vgdev(bo->bdev);
- r = ttm_vm_ops->fault(vma, vmf);
+ r = ttm_vm_ops->fault(vmf);
return r;
}
#endif
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
index 3deef6cc7d7c..7175e6bedf21 100644
--- a/drivers/hsi/clients/cmt_speech.c
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -1098,9 +1098,9 @@ static void cs_hsi_stop(struct cs_hsi_iface *hi)
kfree(hi);
}
-static int cs_char_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int cs_char_vma_fault(struct vm_fault *vmf)
{
- struct cs_char *csdata = vma->vm_private_data;
+ struct cs_char *csdata = vmf->vma->vm_private_data;
struct page *page;
page = virt_to_page(csdata->mmap_base);
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index e8d55a153a65..e88afe1a435c 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -1188,9 +1188,9 @@ static void msc_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&msc->buf_mutex);
}
-static int msc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int msc_mmap_fault(struct vm_fault *vmf)
{
- struct msc_iter *iter = vma->vm_file->private_data;
+ struct msc_iter *iter = vmf->vma->vm_file->private_data;
struct msc *msc = iter->msc;
vmf->page = msc_buffer_get_page(msc, vmf->pgoff);
@@ -1198,7 +1198,7 @@ static int msc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
get_page(vmf->page);
- vmf->page->mapping = vma->vm_file->f_mapping;
+ vmf->page->mapping = vmf->vma->vm_file->f_mapping;
vmf->page->index = vmf->pgoff;
return 0;
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index bd786b7bd30b..f46033984d07 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -92,7 +92,7 @@ static unsigned int poll_next(struct file *, struct poll_table_struct *);
static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
-static int vma_fault(struct vm_area_struct *, struct vm_fault *);
+static int vma_fault(struct vm_fault *);
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg);
@@ -695,7 +695,7 @@ done:
* Local (non-chip) user memory is not mapped right away but as it is
* accessed by the user-level code.
*/
-static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int vma_fault(struct vm_fault *vmf)
{
struct page *page;
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 2d1eacf1dfed..9396c1807cc3 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -893,7 +893,7 @@ bail:
/*
* qib_file_vma_fault - handle a VMA page fault.
*/
-static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int qib_file_vma_fault(struct vm_fault *vmf)
{
struct page *page;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 1b5b8c5361c5..09bd3b290bb8 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2672,7 +2672,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
return NULL;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size));
+ get_order(size), flag);
if (!page)
return NULL;
}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f5e02f8e7371..a8f7ae0eb7a4 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3829,7 +3829,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
if (gfpflags_allow_blocking(flags)) {
unsigned int count = size >> PAGE_SHIFT;
- page = dma_alloc_from_contiguous(dev, count, order);
+ page = dma_alloc_from_contiguous(dev, count, order, flags);
if (page && iommu_no_mapping(dev) &&
page_to_phys(page) + size > dev->coherent_dma_mask) {
dma_release_from_contiguous(dev, page, count);
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index ba63ca57ed7e..36bd904946bd 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -434,8 +434,9 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
* now ...). Bounce buffers don't work very well for the data rates
* video capture has.
*/
-static int videobuf_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int videobuf_vm_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct page *page;
dprintk(3, "fault: fault @ %08lx [vma %08lx-%08lx]\n",
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 3907387b6d15..062bf6ca2625 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -121,8 +121,9 @@ void cxl_context_set_mapping(struct cxl_context *ctx,
mutex_unlock(&ctx->mapping_lock);
}
-static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int cxl_mmap_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct cxl_context *ctx = vma->vm_file->private_data;
u64 area, offset;
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index af2e077da4b8..3641f1334cf0 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -926,8 +926,9 @@ again:
*
* Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
*/
-int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int gru_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct gru_thread_state *gts;
unsigned long paddr, vaddr;
unsigned long expires;
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index 5c3ce2459675..b5e308b50ed1 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -665,7 +665,7 @@ extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
int cbr_au_count, char *cbmap);
extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
int dsr_au_count, char *dsmap);
-extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
+extern int gru_fault(struct vm_fault *vmf);
extern struct gru_mm_struct *gru_register_mmu_notifier(void);
extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index d390b9663dc3..57e6cef81ebe 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -914,7 +914,7 @@ static void ioc3_alloc_rings(struct net_device *dev)
skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
if (!skb) {
- show_free_areas(0);
+ show_free_areas(0, NULL);
continue;
}
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 6c033c9a2f06..00b4071cab8d 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -538,7 +538,7 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
altmap = NULL;
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
- nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE;
+ nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset));
if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
dev_info(&nd_pfn->dev,
"number of pfns truncated from %lld to %ld\n",
@@ -557,7 +557,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
{
u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
struct nd_namespace_common *ndns = nd_pfn->ndns;
- u32 start_pad = 0, end_trunc = 0;
resource_size_t start, size;
struct nd_namespace_io *nsio;
struct nd_region *nd_region;
@@ -590,42 +589,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
return -ENXIO;
}
- memset(pfn_sb, 0, sizeof(*pfn_sb));
-
- /*
- * Check if pmem collides with 'System RAM' when section aligned and
- * trim it accordingly
- */
- nsio = to_nd_namespace_io(&ndns->dev);
- start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
- size = resource_size(&nsio->res);
- if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
- IORES_DESC_NONE) == REGION_MIXED) {
- start = nsio->res.start;
- start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
- }
-
- start = nsio->res.start;
- size = PHYS_SECTION_ALIGN_UP(start + size) - start;
- if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
- IORES_DESC_NONE) == REGION_MIXED) {
- size = resource_size(&nsio->res);
- end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
- }
-
- if (start_pad + end_trunc)
- dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
- dev_name(&ndns->dev), start_pad + end_trunc);
-
/*
* Note, we use 64 here for the standard size of struct page,
* debugging options may cause it to be larger in which case the
* implementation will limit the pfns advertised through
* ->direct_access() to those that are included in the memmap.
*/
- start += start_pad;
+ nsio = to_nd_namespace_io(&ndns->dev);
+ start = nsio->res.start;
size = resource_size(&nsio->res);
- npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K;
+ npfns = PHYS_PFN(size - SZ_8K);
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
* vmemmap_populate_hugepages() allocates the memmap array in
@@ -639,13 +612,14 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
else
return -ENXIO;
- if (offset + start_pad + end_trunc >= size) {
+ if (offset >= size) {
dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
dev_name(&ndns->dev));
return -ENXIO;
}
- npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
+ memset(pfn_sb, 0, sizeof(*pfn_sb));
+ npfns = PHYS_PFN(size - offset);
pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
pfn_sb->dataoff = cpu_to_le64(offset);
pfn_sb->npfns = cpu_to_le64(npfns);
@@ -654,8 +628,6 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
pfn_sb->version_major = cpu_to_le16(1);
pfn_sb->version_minor = cpu_to_le16(2);
- pfn_sb->start_pad = cpu_to_le32(start_pad);
- pfn_sb->end_trunc = cpu_to_le32(end_trunc);
pfn_sb->align = cpu_to_le32(nd_pfn->align);
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
index 9013a585507e..50b617af81bd 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -889,17 +889,16 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
goto err_req;
}
- down_read(&current->mm->mmap_sem);
- pinned = get_user_pages(
+ pinned = get_user_pages_unlocked(
(unsigned long)xfer->loc_addr & PAGE_MASK,
nr_pages,
- dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0,
- page_list, NULL);
- up_read(&current->mm->mmap_sem);
+ page_list,
+ dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0);
if (pinned != nr_pages) {
if (pinned < 0) {
- rmcd_error("get_user_pages err=%ld", pinned);
+ rmcd_error("get_user_pages_unlocked err=%ld",
+ pinned);
nr_pages = 0;
} else
rmcd_error("pinned %ld out of %ld pages",
diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
index 90869cee2b20..ef5bf55f08a4 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -1053,7 +1053,6 @@ out:
/**
* cxlflash_mmap_fault() - mmap fault handler for adapter file descriptor
- * @vma: VM area associated with mapping.
* @vmf: VM fault associated with current fault.
*
* To support error notification via MMIO, faults are 'caught' by this routine
@@ -1067,8 +1066,9 @@ out:
*
* Return: 0 on success, VM_FAULT_SIGBUS on failure
*/
-static int cxlflash_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int cxlflash_mmap_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct file *file = vma->vm_file;
struct cxl_context *ctx = cxl_fops_get_context(file);
struct cxlflash_cfg *cfg = container_of(file->f_op, struct cxlflash_cfg,
@@ -1097,7 +1097,7 @@ static int cxlflash_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (likely(!ctxi->err_recovery_active)) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- rc = ctxi->cxl_mmap_vmops->fault(vma, vmf);
+ rc = ctxi->cxl_mmap_vmops->fault(vmf);
} else {
dev_dbg(dev, "%s: err recovery active, use err_page\n",
__func__);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index e0e308b7e01a..f12e520923ea 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1185,8 +1185,9 @@ sg_fasync(int fd, struct file *filp, int mode)
}
static int
-sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+sg_vma_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
Sg_fd *sfp;
unsigned long offset, len, sa;
Sg_scatter_hold *rsv_schp;
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 937c2d5d7ec3..2c3ffbcbd621 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -865,15 +865,14 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
list_for_each_entry(vma_list, &buffer->vmas, list) {
struct vm_area_struct *vma = vma_list->vma;
- zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start,
- NULL);
+ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
}
mutex_unlock(&buffer->lock);
}
-static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ion_vm_fault(struct vm_fault *vmf)
{
- struct ion_buffer *buffer = vma->vm_private_data;
+ struct ion_buffer *buffer = vmf->vma->vm_private_data;
unsigned long pfn;
int ret;
@@ -882,7 +881,7 @@ static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
BUG_ON(!buffer->pages || !buffer->pages[vmf->pgoff]);
pfn = page_to_pfn(ion_buffer_page(buffer->pages[vmf->pgoff]));
- ret = vm_insert_pfn(vma, vmf->address, pfn);
+ ret = vm_insert_pfn(vmf->vma, vmf->address, pfn);
mutex_unlock(&buffer->lock);
if (ret)
return VM_FAULT_ERROR;
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 9afa6bec3e6f..896196c74cd2 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -321,7 +321,7 @@ out:
return fault_ret;
}
-static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ll_fault(struct vm_fault *vmf)
{
int count = 0;
bool printed = false;
@@ -335,7 +335,7 @@ static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
restart:
- result = ll_fault0(vma, vmf);
+ result = ll_fault0(vmf->vma, vmf);
LASSERT(!(result & VM_FAULT_LOCKED));
if (result == 0) {
struct page *vmpage = vmf->page;
@@ -362,8 +362,9 @@ restart:
return result;
}
-static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ll_page_mkwrite(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
int count = 0;
bool printed = false;
bool retry;
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 3e9cf710501b..4c57755e06e7 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -1014,7 +1014,7 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
{
struct vm_fault *vmf = cfio->ft_vmf;
- cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf);
+ cfio->ft_flags = filemap_fault(vmf);
cfio->ft_flags_valid = 1;
if (vmf->page) {
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 18f0ec2e1f9c..c3adefe95e50 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -781,15 +781,15 @@ static int tcmu_find_mem_index(struct vm_area_struct *vma)
return -1;
}
-static int tcmu_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int tcmu_vma_fault(struct vm_fault *vmf)
{
- struct tcmu_dev *udev = vma->vm_private_data;
+ struct tcmu_dev *udev = vmf->vma->vm_private_data;
struct uio_info *info = &udev->uio_info;
struct page *page;
unsigned long offset;
void *addr;
- int mi = tcmu_find_mem_index(vma);
+ int mi = tcmu_find_mem_index(vmf->vma);
if (mi < 0)
return VM_FAULT_SIGBUS;
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 701c085bb19b..71136742e606 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -317,7 +317,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
static void sysrq_handle_showmem(int key)
{
- show_mem(0);
+ show_mem(0, NULL);
}
static struct sysrq_key_op sysrq_showmem_op = {
.handler = sysrq_handle_showmem,
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 3dd6a491cdba..397e1509fe51 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -572,7 +572,7 @@ static void fn_scroll_back(struct vc_data *vc)
static void fn_show_mem(struct vc_data *vc)
{
- show_mem(0);
+ show_mem(0, NULL);
}
static void fn_show_state(struct vc_data *vc)
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index fba021f5736a..31d95dc9c202 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -597,14 +597,14 @@ static int uio_find_mem_index(struct vm_area_struct *vma)
return -1;
}
-static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int uio_vma_fault(struct vm_fault *vmf)
{
- struct uio_device *idev = vma->vm_private_data;
+ struct uio_device *idev = vmf->vma->vm_private_data;
struct page *page;
unsigned long offset;
void *addr;
- int mi = uio_find_mem_index(vma);
+ int mi = uio_find_mem_index(vmf->vma);
if (mi < 0)
return VM_FAULT_SIGBUS;
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 91c22276c03b..9fb8b1e6ecc2 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1223,9 +1223,9 @@ static void mon_bin_vma_close(struct vm_area_struct *vma)
/*
* Map ring pages to user space.
*/
-static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int mon_bin_vma_fault(struct vm_fault *vmf)
{
- struct mon_reader_bin *rp = vma->vm_private_data;
+ struct mon_reader_bin *rp = vmf->vma->vm_private_data;
unsigned long offset, chunk_idx;
struct page *pageptr;
diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index 74b5bcac8bf2..37f69c061210 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -37,12 +37,11 @@ static struct page *fb_deferred_io_page(struct fb_info *info, unsigned long offs
}
/* this is to find and return the vmalloc-ed fb pages */
-static int fb_deferred_io_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int fb_deferred_io_fault(struct vm_fault *vmf)
{
unsigned long offset;
struct page *page;
- struct fb_info *info = vma->vm_private_data;
+ struct fb_info *info = vmf->vma->vm_private_data;
offset = vmf->pgoff << PAGE_SHIFT;
if (offset >= info->fix.smem_len)
@@ -54,8 +53,8 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma,
get_page(page);
- if (vma->vm_file)
- page->mapping = vma->vm_file->f_mapping;
+ if (vmf->vma->vm_file)
+ page->mapping = vmf->vma->vm_file->f_mapping;
else
printk(KERN_ERR "no mapping available\n");
@@ -91,11 +90,10 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy
EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);
/* vm_ops->page_mkwrite handler */
-static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int fb_deferred_io_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct fb_info *info = vma->vm_private_data;
+ struct fb_info *info = vmf->vma->vm_private_data;
struct fb_deferred_io *fbdefio = info->fbdefio;
struct page *cur;
@@ -105,7 +103,7 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
deferred framebuffer IO. then if userspace touches a page
again, we repeat the same scheme */
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
/* protect against the workqueue changing the page list */
mutex_lock(&fbdefio->lock);
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 2077a3ac7c0c..7a92a5e1d40c 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -804,10 +804,10 @@ static void privcmd_close(struct vm_area_struct *vma)
kfree(pages);
}
-static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int privcmd_fault(struct vm_fault *vmf)
{
printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
- vma, vma->vm_start, vma->vm_end,
+ vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
vmf->pgoff, (void *)vmf->address);
return VM_FAULT_SIGBUS;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6a0f3fa85ef7..3de3b4a89d89 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -534,11 +534,11 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
}
static int
-v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+v9fs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct v9fs_inode *v9inode;
struct page *page = vmf->page;
- struct file *filp = vma->vm_file;
+ struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 2f088773f1c0..2f8bab390d13 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -138,9 +138,9 @@ extern int affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh);
extern int affs_remove_header(struct dentry *dentry);
extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
-extern void secs_to_datestamp(time64_t secs, struct affs_date *ds);
-extern umode_t prot_to_mode(u32 prot);
-extern void mode_to_prot(struct inode *inode);
+extern void affs_secs_to_datestamp(time64_t secs, struct affs_date *ds);
+extern umode_t affs_prot_to_mode(u32 prot);
+extern void affs_mode_to_prot(struct inode *inode);
__printf(3, 4)
extern void affs_error(struct super_block *sb, const char *function,
const char *fmt, ...);
@@ -162,6 +162,7 @@ extern void affs_free_bitmap(struct super_block *sb);
/* namei.c */
+extern const struct export_operations affs_export_ops;
extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
extern int affs_unlink(struct inode *dir, struct dentry *dentry);
@@ -178,7 +179,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* inode.c */
-extern unsigned long affs_parent_ino(struct inode *dir);
extern struct inode *affs_new_inode(struct inode *dir);
extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
extern void affs_evict_inode(struct inode *inode);
@@ -213,6 +213,12 @@ extern const struct address_space_operations affs_aops_ofs;
extern const struct dentry_operations affs_dentry_operations;
extern const struct dentry_operations affs_intl_dentry_operations;
+static inline bool affs_validblock(struct super_block *sb, int block)
+{
+ return(block >= AFFS_SB(sb)->s_reserved &&
+ block < AFFS_SB(sb)->s_partition_size);
+}
+
static inline void
affs_set_blocksize(struct super_block *sb, int size)
{
@@ -222,7 +228,7 @@ static inline struct buffer_head *
affs_bread(struct super_block *sb, int block)
{
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
+ if (affs_validblock(sb, block))
return sb_bread(sb, block);
return NULL;
}
@@ -230,7 +236,7 @@ static inline struct buffer_head *
affs_getblk(struct super_block *sb, int block)
{
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
+ if (affs_validblock(sb, block))
return sb_getblk(sb, block);
return NULL;
}
@@ -239,7 +245,7 @@ affs_getzeroblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
+ if (affs_validblock(sb, block)) {
bh = sb_getblk(sb, block);
lock_buffer(bh);
memset(bh->b_data, 0 , sb->s_blocksize);
@@ -254,7 +260,7 @@ affs_getemptyblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
+ if (affs_validblock(sb, block)) {
bh = sb_getblk(sb, block);
wait_on_buffer(bh);
set_buffer_uptodate(bh);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 0ec65c133b93..b573c3b9a328 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -367,7 +367,7 @@ affs_fix_checksum(struct super_block *sb, struct buffer_head *bh)
}
void
-secs_to_datestamp(time64_t secs, struct affs_date *ds)
+affs_secs_to_datestamp(time64_t secs, struct affs_date *ds)
{
u32 days;
u32 minute;
@@ -386,55 +386,55 @@ secs_to_datestamp(time64_t secs, struct affs_date *ds)
}
umode_t
-prot_to_mode(u32 prot)
+affs_prot_to_mode(u32 prot)
{
umode_t mode = 0;
if (!(prot & FIBF_NOWRITE))
- mode |= S_IWUSR;
+ mode |= 0200;
if (!(prot & FIBF_NOREAD))
- mode |= S_IRUSR;
+ mode |= 0400;
if (!(prot & FIBF_NOEXECUTE))
- mode |= S_IXUSR;
+ mode |= 0100;
if (prot & FIBF_GRP_WRITE)
- mode |= S_IWGRP;
+ mode |= 0020;
if (prot & FIBF_GRP_READ)
- mode |= S_IRGRP;
+ mode |= 0040;
if (prot & FIBF_GRP_EXECUTE)
- mode |= S_IXGRP;
+ mode |= 0010;
if (prot & FIBF_OTR_WRITE)
- mode |= S_IWOTH;
+ mode |= 0002;
if (prot & FIBF_OTR_READ)
- mode |= S_IROTH;
+ mode |= 0004;
if (prot & FIBF_OTR_EXECUTE)
- mode |= S_IXOTH;
+ mode |= 0001;
return mode;
}
void
-mode_to_prot(struct inode *inode)
+affs_mode_to_prot(struct inode *inode)
{
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
- if (!(mode & S_IXUSR))
+ if (!(mode & 0100))
prot |= FIBF_NOEXECUTE;
- if (!(mode & S_IRUSR))
+ if (!(mode & 0400))
prot |= FIBF_NOREAD;
- if (!(mode & S_IWUSR))
+ if (!(mode & 0200))
prot |= FIBF_NOWRITE;
- if (mode & S_IXGRP)
+ if (mode & 0010)
prot |= FIBF_GRP_EXECUTE;
- if (mode & S_IRGRP)
+ if (mode & 0040)
prot |= FIBF_GRP_READ;
- if (mode & S_IWGRP)
+ if (mode & 0020)
prot |= FIBF_GRP_WRITE;
- if (mode & S_IXOTH)
+ if (mode & 0001)
prot |= FIBF_OTR_EXECUTE;
- if (mode & S_IROTH)
+ if (mode & 0004)
prot |= FIBF_OTR_READ;
- if (mode & S_IWOTH)
+ if (mode & 0002)
prot |= FIBF_OTR_WRITE;
AFFS_I(inode)->i_protect = prot;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index fe4e1290dbb5..a5e6097eb5a9 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -69,7 +69,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
if (affs_test_opt(sbi->s_flags, SF_SETMODE))
inode->i_mode = sbi->s_mode;
else
- inode->i_mode = prot_to_mode(prot);
+ inode->i_mode = affs_prot_to_mode(prot);
id = be16_to_cpu(tail->uid);
if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
@@ -184,11 +184,12 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
}
tail = AFFS_TAIL(sb, bh);
if (tail->stype == cpu_to_be32(ST_ROOT)) {
- secs_to_datestamp(inode->i_mtime.tv_sec,&AFFS_ROOT_TAIL(sb, bh)->root_change);
+ affs_secs_to_datestamp(inode->i_mtime.tv_sec,
+ &AFFS_ROOT_TAIL(sb, bh)->root_change);
} else {
tail->protect = cpu_to_be32(AFFS_I(inode)->i_protect);
tail->size = cpu_to_be32(inode->i_size);
- secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change);
+ affs_secs_to_datestamp(inode->i_mtime.tv_sec, &tail->change);
if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
uid = i_uid_read(inode);
gid = i_gid_read(inode);
@@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
mark_inode_dirty(inode);
if (attr->ia_valid & ATTR_MODE)
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
out:
return error;
}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 29186d29a3b6..96dd1d09a273 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -9,29 +9,10 @@
*/
#include "affs.h"
+#include <linux/exportfs.h>
typedef int (*toupper_t)(int);
-static int affs_toupper(int ch);
-static int affs_hash_dentry(const struct dentry *, struct qstr *);
-static int affs_compare_dentry(const struct dentry *dentry,
- unsigned int len, const char *str, const struct qstr *name);
-static int affs_intl_toupper(int ch);
-static int affs_intl_hash_dentry(const struct dentry *, struct qstr *);
-static int affs_intl_compare_dentry(const struct dentry *dentry,
- unsigned int len, const char *str, const struct qstr *name);
-
-const struct dentry_operations affs_dentry_operations = {
- .d_hash = affs_hash_dentry,
- .d_compare = affs_compare_dentry,
-};
-
-const struct dentry_operations affs_intl_dentry_operations = {
- .d_hash = affs_intl_hash_dentry,
- .d_compare = affs_intl_compare_dentry,
-};
-
-
/* Simple toupper() for DOS\1 */
static int
@@ -271,7 +252,7 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
return -ENOSPC;
inode->i_mode = mode;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
mark_inode_dirty(inode);
inode->i_op = &affs_file_inode_operations;
@@ -301,7 +282,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return -ENOSPC;
inode->i_mode = S_IFDIR | mode;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
inode->i_op = &affs_dir_inode_operations;
inode->i_fop = &affs_dir_operations;
@@ -347,7 +328,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
inode_nohighmem(inode);
inode->i_data.a_ops = &affs_symlink_aops;
inode->i_mode = S_IFLNK | 0777;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
error = -EIO;
bh = affs_bread(sb, inode->i_ino);
@@ -465,3 +446,71 @@ done:
affs_brelse(bh);
return retval;
}
+
+static struct dentry *affs_get_parent(struct dentry *child)
+{
+ struct inode *parent;
+ struct buffer_head *bh;
+
+ bh = affs_bread(child->d_sb, d_inode(child)->i_ino);
+ if (!bh)
+ return ERR_PTR(-EIO);
+
+ parent = affs_iget(child->d_sb,
+ be32_to_cpu(AFFS_TAIL(child->d_sb, bh)->parent));
+ brelse(bh);
+ if (IS_ERR(parent))
+ return ERR_CAST(parent);
+
+ return d_obtain_alias(parent);
+}
+
+static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino,
+ u32 generation)
+{
+ struct inode *inode;
+
+ if (!affs_validblock(sb, ino))
+ return ERR_PTR(-ESTALE);
+
+ inode = affs_iget(sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ if (generation && inode->i_generation != generation) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+
+ return inode;
+}
+
+static struct dentry *affs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+ affs_nfs_get_inode);
+}
+
+static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+ affs_nfs_get_inode);
+}
+
+const struct export_operations affs_export_ops = {
+ .fh_to_dentry = affs_fh_to_dentry,
+ .fh_to_parent = affs_fh_to_parent,
+ .get_parent = affs_get_parent,
+};
+
+const struct dentry_operations affs_dentry_operations = {
+ .d_hash = affs_hash_dentry,
+ .d_compare = affs_compare_dentry,
+};
+
+const struct dentry_operations affs_intl_dentry_operations = {
+ .d_hash = affs_intl_hash_dentry,
+ .d_compare = affs_intl_compare_dentry,
+};
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d6384863192c..37532538e8ab 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -32,7 +32,7 @@ affs_commit_super(struct super_block *sb, int wait)
struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
lock_buffer(bh);
- secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
+ affs_secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
affs_fix_checksum(sb, bh);
unlock_buffer(bh);
@@ -507,6 +507,7 @@ got_root:
return -ENOMEM;
}
+ sb->s_export_op = &affs_export_ops;
pr_debug("s_flags=%lX\n", sb->s_flags);
return 0;
}
diff --git a/fs/aio.c b/fs/aio.c
index 873b4ca82ccb..7e2ab9c8e39c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,7 +512,7 @@ static int aio_setup_ring(struct kioctx *ctx)
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_SHARED, 0, &unused);
+ MAP_SHARED, 0, &unused, NULL);
up_write(&mm->mmap_sem);
if (IS_ERR((void *)ctx->mmap_base)) {
ctx->mmap_size = 0;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 6f48d670c941..806df746f1a9 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -38,8 +38,6 @@
* which have been left busy at at service shutdown.
*/
-#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
-
typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *,
struct autofs_dev_ioctl *);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 82e8f6edfb48..d79ced925861 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
+ ino->last_used = jiffies;
}
- ino->last_used = jiffies;
return status;
}
@@ -321,16 +321,21 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
*/
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
- struct autofs_info *ino;
struct dentry *new;
new = d_lookup(parent, &dentry->d_name);
if (!new)
return NULL;
- ino = autofs4_dentry_ino(new);
- ino->last_used = jiffies;
- dput(path->dentry);
- path->dentry = new;
+ if (new == dentry)
+ dput(new);
+ else {
+ struct autofs_info *ino;
+
+ ino = autofs4_dentry_ino(new);
+ ino->last_used = jiffies;
+ dput(path->dentry);
+ path->dentry = new;
+ }
}
return path->dentry;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e7bf01373bc4..443a6f537d56 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -91,12 +91,18 @@ static struct linux_binfmt elf_format = {
#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
-static int set_brk(unsigned long start, unsigned long end)
+static int set_brk(unsigned long start, unsigned long end, int prot)
{
start = ELF_PAGEALIGN(start);
end = ELF_PAGEALIGN(end);
if (end > start) {
- int error = vm_brk(start, end - start);
+ /*
+ * Map the last of the bss segment.
+ * If the header is requesting these pages to be
+ * executable, honour that (ppc32 needs this).
+ */
+ int error = vm_brk_flags(start, end - start,
+ prot & PROT_EXEC ? VM_EXEC : 0);
if (error)
return error;
}
@@ -524,6 +530,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
unsigned long load_addr = 0;
int load_addr_set = 0;
unsigned long last_bss = 0, elf_bss = 0;
+ int bss_prot = 0;
unsigned long error = ~0UL;
unsigned long total_size;
int i;
@@ -606,8 +613,10 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
* elf_bss and last_bss is the bss section.
*/
k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
- if (k > last_bss)
+ if (k > last_bss) {
last_bss = k;
+ bss_prot = elf_prot;
+ }
}
}
@@ -623,13 +632,14 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
/*
* Next, align both the file and mem bss up to the page size,
* since this is where elf_bss was just zeroed up to, and where
- * last_bss will end after the vm_brk() below.
+ * last_bss will end after the vm_brk_flags() below.
*/
elf_bss = ELF_PAGEALIGN(elf_bss);
last_bss = ELF_PAGEALIGN(last_bss);
/* Finally, if there is still more bss to allocate, do it. */
if (last_bss > elf_bss) {
- error = vm_brk(elf_bss, last_bss - elf_bss);
+ error = vm_brk_flags(elf_bss, last_bss - elf_bss,
+ bss_prot & PROT_EXEC ? VM_EXEC : 0);
if (error)
goto out;
}
@@ -674,6 +684,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
unsigned long elf_bss, elf_brk;
+ int bss_prot = 0;
int retval, i;
unsigned long elf_entry;
unsigned long interp_load_addr = 0;
@@ -882,7 +893,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
before this one. Map anonymous pages, if needed,
and clear the area. */
retval = set_brk(elf_bss + load_bias,
- elf_brk + load_bias);
+ elf_brk + load_bias,
+ bss_prot);
if (retval)
goto out_free_dentry;
nbyte = ELF_PAGEOFFSET(elf_bss);
@@ -976,8 +988,10 @@ static int load_elf_binary(struct linux_binprm *bprm)
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
- if (k > elf_brk)
+ if (k > elf_brk) {
+ bss_prot = elf_prot;
elf_brk = k;
+ }
}
loc->elf_ex.e_entry += load_bias;
@@ -993,7 +1007,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
* mapping in the interpreter, to make sure it doesn't wind
* up getting placed where the bss needs to go.
*/
- retval = set_brk(elf_bss, elf_brk);
+ retval = set_brk(elf_bss, elf_brk, bss_prot);
if (retval)
goto out_free_dentry;
if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad23a73ac7e7..105d4d43993e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3154,7 +3154,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+int btrfs_page_mkwrite(struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 956df76dc235..23cab16751f2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8893,10 +8893,10 @@ again:
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
*/
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int btrfs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
@@ -8929,7 +8929,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = btrfs_delalloc_reserve_space(inode, page_start,
reserved_space);
if (!ret) {
- ret = file_update_time(vma->vm_file);
+ ret = file_update_time(vmf->vma->vm_file);
reserved = 1;
}
if (ret) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e4b066cd912a..09860c0ec7c1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1386,8 +1386,9 @@ static void ceph_restore_sigs(sigset_t *oldset)
/*
* vm ops
*/
-static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ceph_filemap_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
@@ -1416,7 +1417,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE) {
current->journal_info = vma->vm_file;
- ret = filemap_fault(vma, vmf);
+ ret = filemap_fault(vmf);
current->journal_info = NULL;
} else
ret = -EAGAIN;
@@ -1477,8 +1478,9 @@ out_restore:
/*
* Reuse write_begin here for simplicity.
*/
-static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ceph_page_mkwrite(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 98dc842e7245..aa3debbba826 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3282,7 +3282,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
* sure that it doesn't change while being written back.
*/
static int
-cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+cifs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
diff --git a/fs/dax.c b/fs/dax.c
index 99b5b4458a78..3bbbde5da06c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -35,6 +35,9 @@
#include <linux/iomap.h>
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/fs_dax.h>
+
/* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
@@ -922,12 +925,11 @@ static int dax_insert_mapping(struct address_space *mapping,
/**
* dax_pfn_mkwrite - handle first write to DAX page
- * @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
*/
-int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int dax_pfn_mkwrite(struct vm_fault *vmf)
{
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct address_space *mapping = file->f_mapping;
void *entry, **slot;
pgoff_t index = vmf->pgoff;
@@ -1116,20 +1118,9 @@ static int dax_fault_return(int error)
return VM_FAULT_SIGBUS;
}
-/**
- * dax_iomap_fault - handle a page fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @ops: iomap ops passed from the file system
- *
- * When a page fault occurs, filesystems may call this helper in their fault
- * or mkwrite handler for DAX files. Assumes the caller has done all the
- * necessary locking for the page fault to proceed successfully.
- */
-int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- const struct iomap_ops *ops)
+static int dax_iomap_pte_fault(struct vm_fault *vmf, const struct iomap_ops *ops)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
unsigned long vaddr = vmf->address;
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
@@ -1202,11 +1193,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
case IOMAP_MAPPED:
if (iomap.flags & IOMAP_F_NEW) {
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
error = dax_insert_mapping(mapping, iomap.bdev, sector,
- PAGE_SIZE, &entry, vma, vmf);
+ PAGE_SIZE, &entry, vmf->vma, vmf);
/* -EBUSY is fine, somebody else faulted on the same PTE */
if (error == -EBUSY)
error = 0;
@@ -1244,7 +1235,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
}
return vmf_ret;
}
-EXPORT_SYMBOL_GPL(dax_iomap_fault);
#ifdef CONFIG_FS_DAX_PMD
/*
@@ -1253,21 +1243,21 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
*/
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
-static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
- struct vm_fault *vmf, unsigned long address,
- struct iomap *iomap, loff_t pos, bool write, void **entryp)
+static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
+ loff_t pos, void **entryp)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct block_device *bdev = iomap->bdev;
+ struct inode *inode = mapping->host;
struct blk_dax_ctl dax = {
.sector = dax_iomap_sector(iomap, pos),
.size = PMD_SIZE,
};
long length = dax_map_atomic(bdev, &dax);
- void *ret;
+ void *ret = NULL;
if (length < 0) /* dax_map_atomic() failed */
- return VM_FAULT_FALLBACK;
+ goto fallback;
if (length < PMD_SIZE)
goto unmap_fallback;
if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
@@ -1280,67 +1270,86 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
RADIX_DAX_PMD);
if (IS_ERR(ret))
- return VM_FAULT_FALLBACK;
+ goto fallback;
*entryp = ret;
- return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
+ trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret);
+ return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+ dax.pfn, vmf->flags & FAULT_FLAG_WRITE);
unmap_fallback:
dax_unmap_atomic(bdev, &dax);
+fallback:
+ trace_dax_pmd_insert_mapping_fallback(inode, vmf, length,
+ dax.pfn, ret);
return VM_FAULT_FALLBACK;
}
-static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
- struct vm_fault *vmf, unsigned long address,
- struct iomap *iomap, void **entryp)
+static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
+ void **entryp)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
- unsigned long pmd_addr = address & PMD_MASK;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct inode *inode = mapping->host;
struct page *zero_page;
+ void *ret = NULL;
spinlock_t *ptl;
pmd_t pmd_entry;
- void *ret;
- zero_page = mm_get_huge_zero_page(vma->vm_mm);
+ zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
if (unlikely(!zero_page))
- return VM_FAULT_FALLBACK;
+ goto fallback;
ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
RADIX_DAX_PMD | RADIX_DAX_HZP);
if (IS_ERR(ret))
- return VM_FAULT_FALLBACK;
+ goto fallback;
*entryp = ret;
- ptl = pmd_lock(vma->vm_mm, pmd);
- if (!pmd_none(*pmd)) {
+ ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
+ if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
- return VM_FAULT_FALLBACK;
+ goto fallback;
}
- pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
+ pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
- set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
+ set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
+ trace_dax_pmd_load_hole(inode, vmf, zero_page, ret);
return VM_FAULT_NOPAGE;
+
+fallback:
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret);
+ return VM_FAULT_FALLBACK;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops)
+static int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops)
{
+ struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
- unsigned long pmd_addr = address & PMD_MASK;
- bool write = flags & FAULT_FLAG_WRITE;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
struct inode *inode = mapping->host;
int result = VM_FAULT_FALLBACK;
struct iomap iomap = { 0 };
pgoff_t max_pgoff, pgoff;
- struct vm_fault vmf;
void *entry;
loff_t pos;
int error;
+ /*
+ * Check whether offset isn't beyond end of file now. Caller is
+ * supposed to hold locks serializing us with truncate / punch hole so
+ * this is a reliable test.
+ */
+ pgoff = linear_page_index(vma, pmd_addr);
+ max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+
+ trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
+
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
goto fallback;
@@ -1351,16 +1360,10 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
goto fallback;
- /*
- * Check whether offset isn't beyond end of file now. Caller is
- * supposed to hold locks serializing us with truncate / punch hole so
- * this is a reliable test.
- */
- pgoff = linear_page_index(vma, pmd_addr);
- max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
-
- if (pgoff > max_pgoff)
- return VM_FAULT_SIGBUS;
+ if (pgoff > max_pgoff) {
+ result = VM_FAULT_SIGBUS;
+ goto out;
+ }
/* If the PMD would extend beyond the file size */
if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
@@ -1389,21 +1392,15 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (IS_ERR(entry))
goto finish_iomap;
- vmf.pgoff = pgoff;
- vmf.flags = flags;
- vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
-
switch (iomap.type) {
case IOMAP_MAPPED:
- result = dax_pmd_insert_mapping(vma, pmd, &vmf, address,
- &iomap, pos, write, &entry);
+ result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (WARN_ON_ONCE(write))
goto unlock_entry;
- result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
- &entry);
+ result = dax_pmd_load_hole(vmf, &iomap, &entry);
break;
default:
WARN_ON_ONCE(1);
@@ -1429,10 +1426,40 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
fallback:
if (result == VM_FAULT_FALLBACK) {
- split_huge_pmd(vma, pmd, address);
+ split_huge_pmd(vma, vmf->pmd, vmf->address);
count_vm_event(THP_FAULT_FALLBACK);
}
+out:
+ trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
return result;
}
-EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
+#else
+static int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops)
+{
+ return VM_FAULT_FALLBACK;
+}
#endif /* CONFIG_FS_DAX_PMD */
+
+/**
+ * dax_iomap_fault - handle a page fault on a DAX file
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in
+ * their fault handler for DAX files. dax_iomap_fault() assumes the caller
+ * has done all the necessary locking for page fault to proceed
+ * successfully.
+ */
+int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+ const struct iomap_ops *ops)
+{
+ switch (pe_size) {
+ case PE_SIZE_PTE:
+ return dax_iomap_pte_fault(vmf, ops);
+ case PE_SIZE_PMD:
+ return dax_iomap_pmd_fault(vmf, ops);
+ default:
+ return VM_FAULT_FALLBACK;
+ }
+}
+EXPORT_SYMBOL_GPL(dax_iomap_fault);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index b0f241528a30..b21891a6bfca 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -87,19 +87,19 @@ out_unlock:
* The default page_lock and i_size verification done by non-DAX fault paths
* is sufficient because ext2 doesn't support hole punching.
*/
-static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ext2_dax_fault(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
int ret;
if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
}
down_read(&ei->dax_sem);
- ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &ext2_iomap_ops);
up_read(&ei->dax_sem);
if (vmf->flags & FAULT_FLAG_WRITE)
@@ -107,16 +107,15 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return ret;
}
-static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int ext2_dax_pfn_mkwrite(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
loff_t size;
int ret;
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
down_read(&ei->dax_sem);
/* check that the faulting page hasn't raced with truncate */
@@ -124,7 +123,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else
- ret = dax_pfn_mkwrite(vma, vmf);
+ ret = dax_pfn_mkwrite(vmf);
up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
@@ -134,7 +133,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault,
/*
- * .pmd_fault is not supported for DAX because allocation in ext2
+ * .huge_fault is not supported for DAX because allocation in ext2
* cannot be reliably aligned to huge page sizes and so pmd faults
* will always fail and fail back to regular faults.
*/
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 212ceb5cd83e..2bef0bd01ab2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2483,8 +2483,8 @@ extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
-extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
-extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_page_mkwrite(struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 87e11dfe3cde..8210c1f43556 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -253,19 +253,20 @@ out:
}
#ifdef CONFIG_FS_DAX
-static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ext4_dax_huge_fault(struct vm_fault *vmf,
+ enum page_entry_size pe_size)
{
int result;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;
if (write) {
sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
}
down_read(&EXT4_I(inode)->i_mmap_sem);
- result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
+ result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
up_read(&EXT4_I(inode)->i_mmap_sem);
if (write)
sb_end_pagefault(sb);
@@ -273,26 +274,9 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return result;
}
-static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, unsigned int flags)
+static int ext4_dax_fault(struct vm_fault *vmf)
{
- int result;
- struct inode *inode = file_inode(vma->vm_file);
- struct super_block *sb = inode->i_sb;
- bool write = flags & FAULT_FLAG_WRITE;
-
- if (write) {
- sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
- }
- down_read(&EXT4_I(inode)->i_mmap_sem);
- result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
- &ext4_iomap_ops);
- up_read(&EXT4_I(inode)->i_mmap_sem);
- if (write)
- sb_end_pagefault(sb);
-
- return result;
+ return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
}
/*
@@ -304,22 +288,21 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
* wp_pfn_shared() fails. Thus fault gets retried and things work out as
* desired.
*/
-static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int ext4_dax_pfn_mkwrite(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb;
loff_t size;
int ret;
sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else
- ret = dax_pfn_mkwrite(vma, vmf);
+ ret = dax_pfn_mkwrite(vmf);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
@@ -328,7 +311,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
- .pmd_fault = ext4_dax_pmd_fault,
+ .huge_fault = ext4_dax_huge_fault,
.page_mkwrite = ext4_dax_fault,
.pfn_mkwrite = ext4_dax_pfn_mkwrite,
};
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 75212a6e69f8..41d8e53e5a7f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5821,8 +5821,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
return !buffer_mapped(bh);
}
-int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int ext4_page_mkwrite(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct page *page = vmf->page;
loff_t size;
unsigned long len;
@@ -5912,13 +5913,13 @@ out:
return ret;
}
-int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int ext4_filemap_fault(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int err;
down_read(&EXT4_I(inode)->i_mmap_sem);
- err = filemap_fault(vma, vmf);
+ err = filemap_fault(vmf);
up_read(&EXT4_I(inode)->i_mmap_sem);
return err;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a3808c49e326..29b366b5c9cf 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -33,11 +33,10 @@
#include "trace.h"
#include <trace/events/f2fs.h>
-static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
int err;
@@ -59,7 +58,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
f2fs_balance_fs(sbi, dn.node_changed);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2401c5dabb2a..e80bfd06daf5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2043,12 +2043,12 @@ static void fuse_vma_close(struct vm_area_struct *vma)
* - sync(2)
* - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
*/
-static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int fuse_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
lock_page(page);
if (page->mapping != inode->i_mapping) {
unlock_page(page);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 016c11eaca7c..6fe2a59c6a9a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -379,10 +379,10 @@ static int gfs2_allocate_page_backing(struct page *page)
* blocks allocated on disk to back that page.
*/
-static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int gfs2_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
@@ -399,7 +399,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out;
- gfs2_size_hint(vma->vm_file, pos, PAGE_SIZE);
+ gfs2_size_hint(vmf->vma->vm_file, pos, PAGE_SIZE);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
@@ -407,7 +407,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
goto out_uninit;
/* Update file times before taking page lock */
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 5de5c48b418d..75b254280ff6 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -169,7 +169,7 @@ static int hfs_readdir(struct file *file, struct dir_context *ctx)
* Can be done after the list insertion; exclusion with
* hfs_delete_cat() is provided by directory lock.
*/
- memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key));
+ memcpy(&rd->key, &fd.key->cat, sizeof(struct hfs_cat_key));
out:
hfs_find_exit(&fd);
return err;
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index a3ec3ae7d347..482081bcdf70 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -38,7 +38,7 @@ static int hfs_get_last_session(struct super_block *sb,
/* default values */
*start = 0;
- *size = sb->s_bdev->bd_inode->i_size >> 9;
+ *size = i_size_read(sb->s_bdev->bd_inode) >> 9;
if (HFS_SB(sb)->session >= 0) {
te.cdte_track = HFS_SB(sb)->session;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index ebb85e5f6549..e254fa0f0697 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -132,7 +132,7 @@ static int hfsplus_get_last_session(struct super_block *sb,
/* default values */
*start = 0;
- *size = sb->s_bdev->bd_inode->i_size >> 9;
+ *size = i_size_read(sb->s_bdev->bd_inode) >> 9;
if (HFSPLUS_SB(sb)->session >= 0) {
te.cdte_track = HFSPLUS_SB(sb)->session;
diff --git a/fs/iomap.c b/fs/iomap.c
index d89f70bbb952..d209f42cdcb8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -445,11 +445,10 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
return length;
}
-int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
- const struct iomap_ops *ops)
+int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
unsigned long length;
loff_t offset, size;
ssize_t ret;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 20c396291ac1..fb3ab62b9c54 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -348,9 +348,9 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
kernfs_put_active(of->kn);
}
-static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kernfs_vma_fault(struct vm_fault *vmf)
{
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct kernfs_open_file *of = kernfs_of(file);
int ret;
@@ -362,16 +362,15 @@ static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
if (of->vm_ops->fault)
- ret = of->vm_ops->fault(vma, vmf);
+ ret = of->vm_ops->fault(vmf);
kernfs_put_active(of->kn);
return ret;
}
-static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int kernfs_vma_page_mkwrite(struct vm_fault *vmf)
{
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct kernfs_open_file *of = kernfs_of(file);
int ret;
@@ -383,7 +382,7 @@ static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
ret = 0;
if (of->vm_ops->page_mkwrite)
- ret = of->vm_ops->page_mkwrite(vma, vmf);
+ ret = of->vm_ops->page_mkwrite(vmf);
else
file_update_time(file);
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 39f57bef8531..0c3905e0542e 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -27,10 +27,9 @@
* XXX: how are we excluding truncate/invalidate here? Maybe need to lock
* page?
*/
-static int ncp_file_mmap_fault(struct vm_area_struct *area,
- struct vm_fault *vmf)
+static int ncp_file_mmap_fault(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(area->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
char *pg_addr;
unsigned int already_read;
unsigned int count;
@@ -90,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
* -- nyc
*/
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT);
+ mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
return VM_FAULT_MAJOR;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 26dbe8b0c10d..668213984d68 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -528,10 +528,10 @@ const struct address_space_operations nfs_file_aops = {
* writable, implying that someone is about to modify the page through a
* shared-writable mapping
*/
-static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int nfs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct file *filp = vma->vm_file;
+ struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
unsigned pagelen;
int ret = VM_FAULT_NOPAGE;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b00d53d13d47..006068526542 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -728,8 +728,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
set_page_private(head->wb_page, 0);
ClearPagePrivate(head->wb_page);
- smp_mb__after_atomic();
- wake_up_page(head->wb_page, PG_private);
clear_bit(PG_MAPPED, &head->wb_flags);
}
nfsi->nrequests--;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 547381f3ce13..c5fa3dee72fc 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -51,8 +51,9 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return err;
}
-static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int nilfs_page_mkwrite(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
struct nilfs_transaction_info ti;
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb63088..dc22ba8c710f 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -283,16 +283,14 @@ int ocfs2_set_acl(handle_t *handle,
int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
struct buffer_head *bh = NULL;
- int status = 0;
+ int status, had_lock;
+ struct ocfs2_lock_holder oh;
- status = ocfs2_inode_lock(inode, &bh, 1);
- if (status < 0) {
- if (status != -ENOENT)
- mlog_errno(status);
- return status;
- }
+ had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh);
+ if (had_lock < 0)
+ return had_lock;
status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
- ocfs2_inode_unlock(inode, 1);
+ ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
brelse(bh);
return status;
}
@@ -302,21 +300,20 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
struct ocfs2_super *osb;
struct buffer_head *di_bh = NULL;
struct posix_acl *acl;
- int ret;
+ int had_lock;
+ struct ocfs2_lock_holder oh;
osb = OCFS2_SB(inode->i_sb);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return NULL;
- ret = ocfs2_inode_lock(inode, &di_bh, 0);
- if (ret < 0) {
- if (ret != -ENOENT)
- mlog_errno(ret);
- return ERR_PTR(ret);
- }
+
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+ if (had_lock < 0)
+ return ERR_PTR(had_lock);
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
- ocfs2_inode_unlock(inode, 0);
+ ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
brelse(di_bh);
return acl;
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 7025d8c27999..d5af152b94e0 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2612,20 +2612,48 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
spin_lock(&dlm->master_lock);
ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
namelen, target, dlm->node_num);
+ if (ret == -EEXIST) {
+ if (oldmle)
+ __dlm_put_mle(oldmle);
+
+ spin_unlock(&dlm->master_lock);
+ spin_unlock(&dlm->spinlock);
+ mlog(0, "another process is already migrating it\n");
+ goto fail;
+ }
+
+ /*
+ * If an old mle is found, it should be put. If its type is BLOCK,
+ * it should be put again. Because it has been unhasded from the map
+ * in the function dlm_add_migration_mle.
+ * Otherwise the memory will be leaked. It will not be found again from
+ * the hash map.
+ */
+ if (oldmle) {
+ /* master is known, detach if not already detached */
+ __dlm_mle_detach_hb_events(dlm, oldmle);
+ __dlm_put_mle(oldmle);
+
+ /*
+ * If the type of the mle is BLOCK, it should be put once for
+ * release. Otherwise a memory leak may be caused because
+ * oldmle has been unhashed from the hash map and it will not
+ * be found any more.
+ */
+ if (oldmle->type == DLM_MLE_BLOCK)
+ __dlm_put_mle(oldmle);
+ }
+
/* get an extra reference on the mle.
* otherwise the assert_master from the new
* master will destroy this.
*/
dlm_get_mle_inuse(mle);
+ mle_added = 1;
+
spin_unlock(&dlm->master_lock);
spin_unlock(&dlm->spinlock);
- if (ret == -EEXIST) {
- mlog(0, "another process is already migrating it\n");
- goto fail;
- }
- mle_added = 1;
-
/*
* set the MIGRATING flag and flush asts
* if we fail after this we need to re-dirty the lockres
@@ -2642,12 +2670,6 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
}
fail:
- if (ret != -EEXIST && oldmle) {
- /* master is known, detach if not already detached */
- dlm_mle_detach_hb_events(dlm, oldmle);
- dlm_put_mle(oldmle);
- }
-
if (ret < 0) {
if (mle_added) {
dlm_mle_detach_hb_events(dlm, mle);
@@ -3182,16 +3204,24 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
if (ret < 0)
kmem_cache_free(dlm_mle_cache, mle);
+ /*
+ * If an old mle is found, it should be put. If its type is BLOCK,
+ * it should be put again because it has been unhashed from the map
+ * in the dlm_add_migration_mle().
+ * Otherwise the memory will be leaked. It will not be found again from
+ * the hash map.
+ */
+ if (oldmle) {
+ __dlm_mle_detach_hb_events(dlm, oldmle);
+ __dlm_put_mle(oldmle);
+ if (ret >= 0 && oldmle->type == DLM_MLE_BLOCK)
+ __dlm_put_mle(oldmle);
+ }
+
spin_unlock(&dlm->master_lock);
unlock:
spin_unlock(&dlm->spinlock);
- if (oldmle) {
- /* master is known, detach if not already detached */
- dlm_mle_detach_hb_events(dlm, oldmle);
- dlm_put_mle(oldmle);
- }
-
if (res)
dlm_lockres_put(res);
leave:
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 74407c6dd592..908b05942282 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2268,6 +2268,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
{
struct dlm_lock *lock, *next;
unsigned int freed = 0;
+ struct list_head *queue = NULL;
+ int i;
/* this node is the lockres master:
* 1) remove any stale locks for the dead node
@@ -2280,31 +2282,19 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
* to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */
/* TODO: check pending_asts, pending_basts here */
- list_for_each_entry_safe(lock, next, &res->granted, list) {
- if (lock->ml.node == dead_node) {
- list_del_init(&lock->list);
- dlm_lock_put(lock);
- /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
- dlm_lock_put(lock);
- freed++;
- }
- }
- list_for_each_entry_safe(lock, next, &res->converting, list) {
- if (lock->ml.node == dead_node) {
- list_del_init(&lock->list);
- dlm_lock_put(lock);
- /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
- dlm_lock_put(lock);
- freed++;
- }
- }
- list_for_each_entry_safe(lock, next, &res->blocked, list) {
- if (lock->ml.node == dead_node) {
- list_del_init(&lock->list);
- dlm_lock_put(lock);
- /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */
- dlm_lock_put(lock);
- freed++;
+ for (i = DLM_GRANTED_LIST; i <= DLM_BLOCKED_LIST; i++) {
+ queue = dlm_list_idx_to_ptr(res, i);
+ list_for_each_entry_safe(lock, next, queue, list) {
+ if (lock->ml.node == dead_node) {
+ list_del_init(&lock->list);
+ dlm_lock_put(lock);
+ /*
+ * Can't schedule DLM_UNLOCK_FREE_LOCK: do
+ * manually
+ */
+ dlm_lock_put(lock);
+ freed++;
+ }
}
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 77d1632e905d..8dce4099a6ca 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -532,6 +532,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
init_waitqueue_head(&res->l_event);
INIT_LIST_HEAD(&res->l_blocked_list);
INIT_LIST_HEAD(&res->l_mask_waiters);
+ INIT_LIST_HEAD(&res->l_holders);
}
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
@@ -749,6 +750,50 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
res->l_flags = 0UL;
}
+/*
+ * Keep a list of processes who have interest in a lockres.
+ * Note: this is now only uesed for check recursive cluster locking.
+ */
+static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
+ struct ocfs2_lock_holder *oh)
+{
+ INIT_LIST_HEAD(&oh->oh_list);
+ oh->oh_owner_pid = get_pid(task_pid(current));
+
+ spin_lock(&lockres->l_lock);
+ list_add_tail(&oh->oh_list, &lockres->l_holders);
+ spin_unlock(&lockres->l_lock);
+}
+
+static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
+ struct ocfs2_lock_holder *oh)
+{
+ spin_lock(&lockres->l_lock);
+ list_del(&oh->oh_list);
+ spin_unlock(&lockres->l_lock);
+
+ put_pid(oh->oh_owner_pid);
+}
+
+static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres)
+{
+ struct ocfs2_lock_holder *oh;
+ struct pid *pid;
+
+ /* look in the list of holders for one with the current task as owner */
+ spin_lock(&lockres->l_lock);
+ pid = task_pid(current);
+ list_for_each_entry(oh, &lockres->l_holders, oh_list) {
+ if (oh->oh_owner_pid == pid) {
+ spin_unlock(&lockres->l_lock);
+ return 1;
+ }
+ }
+ spin_unlock(&lockres->l_lock);
+
+ return 0;
+}
+
static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
int level)
{
@@ -2333,8 +2378,9 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
goto getbh;
}
- if (ocfs2_mount_local(osb))
- goto local;
+ if ((arg_flags & OCFS2_META_LOCK_GETBH) ||
+ ocfs2_mount_local(osb))
+ goto update;
if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
ocfs2_wait_for_recovery(osb);
@@ -2363,7 +2409,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
ocfs2_wait_for_recovery(osb);
-local:
+update:
/*
* We only see this flag if we're being called from
* ocfs2_read_locked_inode(). It means we're locking an inode
@@ -2497,6 +2543,59 @@ void ocfs2_inode_unlock(struct inode *inode,
ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
}
+/*
+ * This _tracker variantes are introduced to deal with the recursive cluster
+ * locking issue. The idea is to keep track of a lock holder on the stack of
+ * the current process. If there's a lock holder on the stack, we know the
+ * task context is already protected by cluster locking. Currently, they're
+ * used in some VFS entry routines.
+ *
+ * return < 0 on error, return == 0 if there's no lock holder on the stack
+ * before this call, return == 1 if this call would be a recursive locking.
+ */
+int ocfs2_inode_lock_tracker(struct inode *inode,
+ struct buffer_head **ret_bh,
+ int ex,
+ struct ocfs2_lock_holder *oh)
+{
+ int status;
+ int arg_flags = 0, has_locked;
+ struct ocfs2_lock_res *lockres;
+
+ lockres = &OCFS2_I(inode)->ip_inode_lockres;
+ has_locked = ocfs2_is_locked_by_me(lockres);
+ /* Just get buffer head if the cluster lock has been taken */
+ if (has_locked)
+ arg_flags = OCFS2_META_LOCK_GETBH;
+
+ if (likely(!has_locked || ret_bh)) {
+ status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ return status;
+ }
+ }
+ if (!has_locked)
+ ocfs2_add_holder(lockres, oh);
+
+ return has_locked;
+}
+
+void ocfs2_inode_unlock_tracker(struct inode *inode,
+ int ex,
+ struct ocfs2_lock_holder *oh,
+ int had_lock)
+{
+ struct ocfs2_lock_res *lockres;
+
+ lockres = &OCFS2_I(inode)->ip_inode_lockres;
+ if (!had_lock) {
+ ocfs2_remove_holder(lockres, oh);
+ ocfs2_inode_unlock(inode, ex);
+ }
+}
+
int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
{
struct ocfs2_lock_res *lockres;
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index d293a22c32c5..a7fc18ba0dc1 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -70,6 +70,11 @@ struct ocfs2_orphan_scan_lvb {
__be32 lvb_os_seqno;
};
+struct ocfs2_lock_holder {
+ struct list_head oh_list;
+ struct pid *oh_owner_pid;
+};
+
/* ocfs2_inode_lock_full() 'arg_flags' flags */
/* don't wait on recovery. */
#define OCFS2_META_LOCK_RECOVERY (0x01)
@@ -77,6 +82,8 @@ struct ocfs2_orphan_scan_lvb {
#define OCFS2_META_LOCK_NOQUEUE (0x02)
/* don't block waiting for the downconvert thread, instead return -EAGAIN */
#define OCFS2_LOCK_NONBLOCK (0x04)
+/* just get back disk inode bh if we've got cluster lock. */
+#define OCFS2_META_LOCK_GETBH (0x08)
/* Locking subclasses of inode cluster lock */
enum {
@@ -170,4 +177,15 @@ void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);
/* To set the locking protocol on module initialization */
void ocfs2_set_locking_protocol(void);
+
+/* The _tracker pair is used to avoid cluster recursive locking */
+int ocfs2_inode_lock_tracker(struct inode *inode,
+ struct buffer_head **ret_bh,
+ int ex,
+ struct ocfs2_lock_holder *oh);
+void ocfs2_inode_unlock_tracker(struct inode *inode,
+ int ex,
+ struct ocfs2_lock_holder *oh,
+ int had_lock);
+
#endif /* DLMGLUE_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c4889655d32b..7b6a146327d7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +1138,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
handle_t *handle = NULL;
struct dquot *transfer_to[MAXQUOTAS] = { };
int qtype;
+ int had_lock;
+ struct ocfs2_lock_holder oh;
trace_ocfs2_setattr(inode, dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1173,11 +1175,30 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
}
}
- status = ocfs2_inode_lock(inode, &bh, 1);
- if (status < 0) {
- if (status != -ENOENT)
- mlog_errno(status);
+ had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh);
+ if (had_lock < 0) {
+ status = had_lock;
goto bail_unlock_rw;
+ } else if (had_lock) {
+ /*
+ * As far as we know, ocfs2_setattr() could only be the first
+ * VFS entry point in the call chain of recursive cluster
+ * locking issue.
+ *
+ * For instance:
+ * chmod_common()
+ * notify_change()
+ * ocfs2_setattr()
+ * posix_acl_chmod()
+ * ocfs2_iop_get_acl()
+ *
+ * But, we're not 100% sure if it's always true, because the
+ * ordering of the VFS entry points in the call chain is out
+ * of our control. So, we'd better dump the stack here to
+ * catch the other cases of recursive locking.
+ */
+ mlog(ML_ERROR, "Another case of recursive locking:\n");
+ dump_stack();
}
inode_locked = 1;
@@ -1260,8 +1281,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
bail_commit:
ocfs2_commit_trans(osb, handle);
bail_unlock:
- if (status) {
- ocfs2_inode_unlock(inode, 1);
+ if (status && inode_locked) {
+ ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
inode_locked = 0;
}
bail_unlock_rw:
@@ -1279,7 +1300,7 @@ bail:
mlog_errno(status);
}
if (inode_locked)
- ocfs2_inode_unlock(inode, 1);
+ ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
brelse(bh);
return status;
@@ -1320,21 +1341,32 @@ bail:
int ocfs2_permission(struct inode *inode, int mask)
{
- int ret;
+ int ret, had_lock;
+ struct ocfs2_lock_holder oh;
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
- ret = ocfs2_inode_lock(inode, NULL, 0);
- if (ret) {
- if (ret != -ENOENT)
- mlog_errno(ret);
+ had_lock = ocfs2_inode_lock_tracker(inode, NULL, 0, &oh);
+ if (had_lock < 0) {
+ ret = had_lock;
goto out;
+ } else if (had_lock) {
+ /* See comments in ocfs2_setattr() for details.
+ * The call chain of this case could be:
+ * do_sys_open()
+ * may_open()
+ * inode_permission()
+ * ocfs2_permission()
+ * ocfs2_iop_get_acl()
+ */
+ mlog(ML_ERROR, "Another case of recursive locking:\n");
+ dump_stack();
}
ret = generic_permission(inode, mask);
- ocfs2_inode_unlock(inode, 0);
+ ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
out:
return ret;
}
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 429088786e93..098f5c712569 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -44,17 +44,18 @@
#include "ocfs2_trace.h"
-static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
+static int ocfs2_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
sigset_t oldset;
int ret;
ocfs2_block_signals(&oldset);
- ret = filemap_fault(area, vmf);
+ ret = filemap_fault(vmf);
ocfs2_unblock_signals(&oldset);
- trace_ocfs2_fault(OCFS2_I(area->vm_file->f_mapping->host)->ip_blkno,
- area, vmf->page, vmf->pgoff);
+ trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno,
+ vma, vmf->page, vmf->pgoff);
return ret;
}
@@ -127,10 +128,10 @@ out:
return ret;
}
-static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ocfs2_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct buffer_head *di_bh = NULL;
sigset_t oldset;
int ret;
@@ -160,7 +161,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page);
+ ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7e5958b0be6b..0c39d71c67a1 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -172,6 +172,7 @@ struct ocfs2_lock_res {
struct list_head l_blocked_list;
struct list_head l_mask_waiters;
+ struct list_head l_holders;
unsigned long l_flags;
char l_name[OCFS2_LOCK_ID_MAX_LEN];
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b73b4de8fb36..4ecb5edc3c61 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -292,101 +292,69 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
}
} else {
/*
- * Command line (1 string) occupies ARGV and maybe
- * extends into ENVP.
- */
- if (len1 + len2 <= *pos)
- goto skip_argv_envp;
- if (len1 <= *pos)
- goto skip_argv;
-
- p = arg_start + *pos;
- len = len1 - *pos;
- while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- /*
- * Command line can be shorter than whole ARGV
- * even if last "marker" byte says it is not.
- */
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
- }
-
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
- }
-
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
-
- if (final)
- goto out_free_page;
- }
-skip_argv:
- /*
* Command line (1 string) occupies ARGV and
* extends into ENVP.
*/
- if (len1 <= *pos) {
- p = env_start + *pos - len1;
- len = len1 + len2 - *pos;
- } else {
- p = env_start;
- len = len2;
+ struct {
+ unsigned long p;
+ unsigned long len;
+ } cmdline[2] = {
+ { .p = arg_start, .len = len1 },
+ { .p = env_start, .len = len2 },
+ };
+ loff_t pos1 = *pos;
+ unsigned int i;
+
+ i = 0;
+ while (i < 2 && pos1 >= cmdline[i].len) {
+ pos1 -= cmdline[i].len;
+ i++;
}
- while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- /* Find EOS. */
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
- }
-
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
+ while (i < 2) {
+ p = cmdline[i].p + pos1;
+ len = cmdline[i].len - pos1;
+ while (count > 0 && len > 0) {
+ unsigned int _count, l;
+ int nr_read;
+ bool final;
+
+ _count = min3(count, len, PAGE_SIZE);
+ nr_read = access_remote_vm(mm, p, page, _count, 0);
+ if (nr_read < 0)
+ rv = nr_read;
+ if (nr_read <= 0)
+ goto out_free_page;
+
+ /*
+ * Command line can be shorter than whole ARGV
+ * even if last "marker" byte says it is not.
+ */
+ final = false;
+ l = strnlen(page, nr_read);
+ if (l < nr_read) {
+ nr_read = l;
+ final = true;
+ }
+
+ if (copy_to_user(buf, page, nr_read)) {
+ rv = -EFAULT;
+ goto out_free_page;
+ }
+
+ p += nr_read;
+ len -= nr_read;
+ buf += nr_read;
+ count -= nr_read;
+ rv += nr_read;
+
+ if (final)
+ goto out_free_page;
}
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
-
- if (final)
- goto out_free_page;
+ /* Only first chunk can be read partially. */
+ pos1 = 0;
+ i++;
}
-skip_argv_envp:
- ;
}
out_free_page:
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f6a01f09f79d..06c73904d497 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -57,9 +57,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
struct rb_node *node = dir->subdir.rb_node;
while (node) {
- struct proc_dir_entry *de = container_of(node,
- struct proc_dir_entry,
- subdir_node);
+ struct proc_dir_entry *de = rb_entry(node,
+ struct proc_dir_entry,
+ subdir_node);
int result = proc_match(len, name, de);
if (result < 0)
@@ -80,8 +80,9 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
/* Figure out where to put new node */
while (*new) {
- struct proc_dir_entry *this =
- container_of(*new, struct proc_dir_entry, subdir_node);
+ struct proc_dir_entry *this = rb_entry(*new,
+ struct proc_dir_entry,
+ subdir_node);
int result = proc_match(de->namelen, de->name, this);
parent = *new;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0b80ad87b4d6..ea9f3d1ae830 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
phdr->p_vaddr = (size_t)m->addr;
- phdr->p_paddr = 0;
+ if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
phdr->p_filesz = phdr->p_memsz = m->size;
phdr->p_align = PAGE_SIZE;
}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 5105b1599981..885d445afa0d 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -265,10 +265,10 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
* On s390 the fault handler is used for memory regions that can't be mapped
* directly with remap_pfn_range().
*/
-static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int mmap_vmcore_fault(struct vm_fault *vmf)
{
#ifdef CONFIG_S390
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
pgoff_t index = vmf->pgoff;
struct page *page;
loff_t offset;
@@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
}
return 0;
fail:
- do_munmap(vma->vm_mm, from, len);
+ do_munmap(vma->vm_mm, from, len, NULL);
return -EAGAIN;
}
@@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
return 0;
fail:
- do_munmap(vma->vm_mm, vma->vm_start, len);
+ do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
return -EAGAIN;
}
#else
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e18e36..efab7b64925b 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,35 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
{
int ret;
- ret = lz4_compress(in, inlen, out, &outlen, workspace);
- if (ret) {
- pr_err("lz4_compress error, ret = %d!\n", ret);
+ ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+ if (!ret) {
+ pr_err("LZ4_compress_default error; compression failed!\n");
return -EIO;
}
- return outlen;
+ return ret;
}
static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
{
int ret;
- ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
- if (ret) {
- pr_err("lz4_decompress error, ret = %d!\n", ret);
+ ret = LZ4_decompress_safe(in, out, inlen, outlen);
+ if (ret < 0) {
+ /*
+ * LZ4_decompress_safe will return an error code
+ * (< 0) if decompression failed
+ */
+ pr_err("LZ4_decompress_safe error, ret = %d!\n", ret);
return -EIO;
}
- return outlen;
+ return ret;
}
static void allocate_lz4(void)
{
- big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+ big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
if (big_oops_buf) {
workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e314cb30a181..feabcde0290d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1166,7 +1166,7 @@ static int reiserfs_parse_options(struct super_block *s,
if (!strcmp(arg, "auto")) {
/* From JFS code, to auto-get the size. */
*blocks =
- s->s_bdev->bd_inode->i_size >> s->
+ i_size_read(s->s_bdev->bd_inode) >> s->
s_blocksize_bits;
} else {
*blocks = simple_strtoul(arg, &p, 0);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468bd18b0..95da65366548 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct squashfs_lz4 *stream = strm;
void *buff = stream->input, *data;
int avail, i, bytes = length, res;
- size_t dest_len = output->length;
for (i = 0; i < b; i++) {
avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
put_bh(bh[i]);
}
- res = lz4_decompress_unknownoutputsize(stream->input, length,
- stream->output, &dest_len);
- if (res)
+ res = LZ4_decompress_safe(stream->input, stream->output,
+ length, output->length);
+
+ if (res < 0)
return -EIO;
- bytes = dest_len;
+ bytes = res;
data = squashfs_first_page(output);
buff = stream->output;
while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
}
squashfs_finish_page(output);
- return dest_len;
+ return res;
}
const struct squashfs_decompressor squashfs_lz4_comp_ops = {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b0d783774c96..d9ae86f96df7 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1506,11 +1506,10 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
* mmap()d file has taken write protection fault and is being made writable.
* UBIFS must ensure page is budgeted for.
*/
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int ubifs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct timespec now = ubifs_current_time(inode);
struct ubifs_budget_req req = { .new_page = 1 };
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 43953e03c356..625b7285a37b 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -12,6 +12,7 @@
* mm/ksm.c (mm hashing).
*/
+#include <linux/list.h>
#include <linux/hashtable.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -26,6 +27,7 @@
#include <linux/mempolicy.h>
#include <linux/ioctl.h>
#include <linux/security.h>
+#include <linux/hugetlb.h>
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
@@ -45,12 +47,16 @@ struct userfaultfd_ctx {
wait_queue_head_t fault_wqh;
/* waitqueue head for the pseudo fd to wakeup poll/read */
wait_queue_head_t fd_wqh;
+ /* waitqueue head for events */
+ wait_queue_head_t event_wqh;
/* a refile sequence protected by fault_pending_wqh lock */
struct seqcount refile_seq;
/* pseudo fd refcounting */
atomic_t refcount;
/* userfaultfd syscall flags */
unsigned int flags;
+ /* features requested from the userspace */
+ unsigned int features;
/* state machine */
enum userfaultfd_state state;
/* released */
@@ -59,6 +65,19 @@ struct userfaultfd_ctx {
struct mm_struct *mm;
};
+struct userfaultfd_fork_ctx {
+ struct userfaultfd_ctx *orig;
+ struct userfaultfd_ctx *new;
+ struct list_head list;
+};
+
+struct userfaultfd_unmap_ctx {
+ struct userfaultfd_ctx *ctx;
+ unsigned long start;
+ unsigned long end;
+ struct list_head list;
+};
+
struct userfaultfd_wait_queue {
struct uffd_msg msg;
wait_queue_t wq;
@@ -142,6 +161,8 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
+ VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock));
+ VM_BUG_ON(waitqueue_active(&ctx->event_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
mmdrop(ctx->mm);
@@ -169,7 +190,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
msg.arg.pagefault.address = address;
if (flags & FAULT_FLAG_WRITE)
/*
- * If UFFD_FEATURE_PAGEFAULT_FLAG_WRITE was set in the
+ * If UFFD_FEATURE_PAGEFAULT_FLAG_WP was set in the
* uffdio_api.features and UFFD_PAGEFAULT_FLAG_WRITE
* was not set in a UFFD_EVENT_PAGEFAULT, it means it
* was a read fault, otherwise if set it means it's
@@ -188,6 +209,49 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
return msg;
}
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Same functionality as userfaultfd_must_wait below with modifications for
+ * hugepmd ranges.
+ */
+static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ unsigned long address,
+ unsigned long flags,
+ unsigned long reason)
+{
+ struct mm_struct *mm = ctx->mm;
+ pte_t *pte;
+ bool ret = true;
+
+ VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
+
+ pte = huge_pte_offset(mm, address);
+ if (!pte)
+ goto out;
+
+ ret = false;
+
+ /*
+ * Lockless access: we're in a wait_event so it's ok if it
+ * changes under us.
+ */
+ if (huge_pte_none(*pte))
+ ret = true;
+ if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP))
+ ret = true;
+out:
+ return ret;
+}
+#else
+static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ unsigned long address,
+ unsigned long flags,
+ unsigned long reason)
+{
+ return false; /* should never get here */
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
/*
* Verify the pagetables are still not ok after having reigstered into
* the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
@@ -364,8 +428,12 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
set_current_state(blocking_state);
spin_unlock(&ctx->fault_pending_wqh.lock);
- must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
- reason);
+ if (!is_vm_hugetlb_page(vmf->vma))
+ must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
+ reason);
+ else
+ must_wait = userfaultfd_huge_must_wait(ctx, vmf->address,
+ vmf->flags, reason);
up_read(&mm->mmap_sem);
if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
@@ -458,6 +526,282 @@ out:
return ret;
}
+static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
+ struct userfaultfd_wait_queue *ewq)
+{
+ int ret = 0;
+
+ ewq->ctx = ctx;
+ init_waitqueue_entry(&ewq->wq, current);
+
+ spin_lock(&ctx->event_wqh.lock);
+ /*
+ * After the __add_wait_queue the uwq is visible to userland
+ * through poll/read().
+ */
+ __add_wait_queue(&ctx->event_wqh, &ewq->wq);
+ for (;;) {
+ set_current_state(TASK_KILLABLE);
+ if (ewq->msg.event == 0)
+ break;
+ if (ACCESS_ONCE(ctx->released) ||
+ fatal_signal_pending(current)) {
+ ret = -1;
+ __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+ break;
+ }
+
+ spin_unlock(&ctx->event_wqh.lock);
+
+ wake_up_poll(&ctx->fd_wqh, POLLIN);
+ schedule();
+
+ spin_lock(&ctx->event_wqh.lock);
+ }
+ __set_current_state(TASK_RUNNING);
+ spin_unlock(&ctx->event_wqh.lock);
+
+ /*
+ * ctx may go away after this if the userfault pseudo fd is
+ * already released.
+ */
+
+ userfaultfd_ctx_put(ctx);
+ return ret;
+}
+
+static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
+ struct userfaultfd_wait_queue *ewq)
+{
+ ewq->msg.event = 0;
+ wake_up_locked(&ctx->event_wqh);
+ __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+}
+
+int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
+{
+ struct userfaultfd_ctx *ctx = NULL, *octx;
+ struct userfaultfd_fork_ctx *fctx;
+
+ octx = vma->vm_userfaultfd_ctx.ctx;
+ if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
+ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+ vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
+ return 0;
+ }
+
+ list_for_each_entry(fctx, fcs, list)
+ if (fctx->orig == octx) {
+ ctx = fctx->new;
+ break;
+ }
+
+ if (!ctx) {
+ fctx = kmalloc(sizeof(*fctx), GFP_KERNEL);
+ if (!fctx)
+ return -ENOMEM;
+
+ ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL);
+ if (!ctx) {
+ kfree(fctx);
+ return -ENOMEM;
+ }
+
+ atomic_set(&ctx->refcount, 1);
+ ctx->flags = octx->flags;
+ ctx->state = UFFD_STATE_RUNNING;
+ ctx->features = octx->features;
+ ctx->released = false;
+ ctx->mm = vma->vm_mm;
+ atomic_inc(&ctx->mm->mm_count);
+
+ userfaultfd_ctx_get(octx);
+ fctx->orig = octx;
+ fctx->new = ctx;
+ list_add_tail(&fctx->list, fcs);
+ }
+
+ vma->vm_userfaultfd_ctx.ctx = ctx;
+ return 0;
+}
+
+static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
+{
+ struct userfaultfd_ctx *ctx = fctx->orig;
+ struct userfaultfd_wait_queue ewq;
+
+ msg_init(&ewq.msg);
+
+ ewq.msg.event = UFFD_EVENT_FORK;
+ ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new;
+
+ return userfaultfd_event_wait_completion(ctx, &ewq);
+}
+
+void dup_userfaultfd_complete(struct list_head *fcs)
+{
+ int ret = 0;
+ struct userfaultfd_fork_ctx *fctx, *n;
+
+ list_for_each_entry_safe(fctx, n, fcs, list) {
+ if (!ret)
+ ret = dup_fctx(fctx);
+ list_del(&fctx->list);
+ kfree(fctx);
+ }
+}
+
+void mremap_userfaultfd_prep(struct vm_area_struct *vma,
+ struct vm_userfaultfd_ctx *vm_ctx)
+{
+ struct userfaultfd_ctx *ctx;
+
+ ctx = vma->vm_userfaultfd_ctx.ctx;
+ if (ctx && (ctx->features & UFFD_FEATURE_EVENT_REMAP)) {
+ vm_ctx->ctx = ctx;
+ userfaultfd_ctx_get(ctx);
+ }
+}
+
+void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
+ unsigned long from, unsigned long to,
+ unsigned long len)
+{
+ struct userfaultfd_ctx *ctx = vm_ctx->ctx;
+ struct userfaultfd_wait_queue ewq;
+
+ if (!ctx)
+ return;
+
+ if (to & ~PAGE_MASK) {
+ userfaultfd_ctx_put(ctx);
+ return;
+ }
+
+ msg_init(&ewq.msg);
+
+ ewq.msg.event = UFFD_EVENT_REMAP;
+ ewq.msg.arg.remap.from = from;
+ ewq.msg.arg.remap.to = to;
+ ewq.msg.arg.remap.len = len;
+
+ userfaultfd_event_wait_completion(ctx, &ewq);
+}
+
+void userfaultfd_remove(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct userfaultfd_ctx *ctx;
+ struct userfaultfd_wait_queue ewq;
+
+ ctx = vma->vm_userfaultfd_ctx.ctx;
+ if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
+ return;
+
+ userfaultfd_ctx_get(ctx);
+ up_read(&mm->mmap_sem);
+
+ *prev = NULL; /* We wait for ACK w/o the mmap semaphore */
+
+ msg_init(&ewq.msg);
+
+ ewq.msg.event = UFFD_EVENT_REMOVE;
+ ewq.msg.arg.remove.start = start;
+ ewq.msg.arg.remove.end = end;
+
+ userfaultfd_event_wait_completion(ctx, &ewq);
+
+ down_read(&mm->mmap_sem);
+}
+
+static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
+ unsigned long start, unsigned long end)
+{
+ struct userfaultfd_unmap_ctx *unmap_ctx;
+
+ list_for_each_entry(unmap_ctx, unmaps, list)
+ if (unmap_ctx->ctx == ctx && unmap_ctx->start == start &&
+ unmap_ctx->end == end)
+ return true;
+
+ return false;
+}
+
+int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *unmaps)
+{
+ for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
+ struct userfaultfd_unmap_ctx *unmap_ctx;
+ struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+ if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+ has_unmap_ctx(ctx, unmaps, start, end))
+ continue;
+
+ unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+ if (!unmap_ctx)
+ return -ENOMEM;
+
+ userfaultfd_ctx_get(ctx);
+ unmap_ctx->ctx = ctx;
+ unmap_ctx->start = start;
+ unmap_ctx->end = end;
+ list_add_tail(&unmap_ctx->list, unmaps);
+ }
+
+ return 0;
+}
+
+void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
+{
+ struct userfaultfd_unmap_ctx *ctx, *n;
+ struct userfaultfd_wait_queue ewq;
+
+ list_for_each_entry_safe(ctx, n, uf, list) {
+ msg_init(&ewq.msg);
+
+ ewq.msg.event = UFFD_EVENT_UNMAP;
+ ewq.msg.arg.remove.start = ctx->start;
+ ewq.msg.arg.remove.end = ctx->end;
+
+ userfaultfd_event_wait_completion(ctx->ctx, &ewq);
+
+ list_del(&ctx->list);
+ kfree(ctx);
+ }
+}
+
+void userfaultfd_exit(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma = mm->mmap;
+
+ /*
+ * We can do the vma walk without locking because the caller
+ * (exit_mm) knows it now has exclusive access
+ */
+ while (vma) {
+ struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+ if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
+ struct userfaultfd_wait_queue ewq;
+
+ userfaultfd_ctx_get(ctx);
+
+ msg_init(&ewq.msg);
+ ewq.msg.event = UFFD_EVENT_EXIT;
+
+ userfaultfd_event_wait_completion(ctx, &ewq);
+
+ ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
+ }
+
+ vma = vma->vm_next;
+ }
+}
+
static int userfaultfd_release(struct inode *inode, struct file *file)
{
struct userfaultfd_ctx *ctx = file->private_data;
@@ -522,25 +866,36 @@ wakeup:
}
/* fault_pending_wqh.lock must be hold by the caller */
-static inline struct userfaultfd_wait_queue *find_userfault(
- struct userfaultfd_ctx *ctx)
+static inline struct userfaultfd_wait_queue *find_userfault_in(
+ wait_queue_head_t *wqh)
{
wait_queue_t *wq;
struct userfaultfd_wait_queue *uwq;
- VM_BUG_ON(!spin_is_locked(&ctx->fault_pending_wqh.lock));
+ VM_BUG_ON(!spin_is_locked(&wqh->lock));
uwq = NULL;
- if (!waitqueue_active(&ctx->fault_pending_wqh))
+ if (!waitqueue_active(wqh))
goto out;
/* walk in reverse to provide FIFO behavior to read userfaults */
- wq = list_last_entry(&ctx->fault_pending_wqh.task_list,
- typeof(*wq), task_list);
+ wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list);
uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
out:
return uwq;
}
+static inline struct userfaultfd_wait_queue *find_userfault(
+ struct userfaultfd_ctx *ctx)
+{
+ return find_userfault_in(&ctx->fault_pending_wqh);
+}
+
+static inline struct userfaultfd_wait_queue *find_userfault_evt(
+ struct userfaultfd_ctx *ctx)
+{
+ return find_userfault_in(&ctx->event_wqh);
+}
+
static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
{
struct userfaultfd_ctx *ctx = file->private_data;
@@ -572,10 +927,42 @@ static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
smp_mb();
if (waitqueue_active(&ctx->fault_pending_wqh))
ret = POLLIN;
+ else if (waitqueue_active(&ctx->event_wqh))
+ ret = POLLIN;
+
return ret;
default:
- BUG();
+ WARN_ON_ONCE(1);
+ return POLLERR;
+ }
+}
+
+static const struct file_operations userfaultfd_fops;
+
+static int resolve_userfault_fork(struct userfaultfd_ctx *ctx,
+ struct userfaultfd_ctx *new,
+ struct uffd_msg *msg)
+{
+ int fd;
+ struct file *file;
+ unsigned int flags = new->flags & UFFD_SHARED_FCNTL_FLAGS;
+
+ fd = get_unused_fd_flags(flags);
+ if (fd < 0)
+ return fd;
+
+ file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, new,
+ O_RDWR | flags);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ return PTR_ERR(file);
}
+
+ fd_install(fd, file);
+ msg->arg.reserved.reserved1 = 0;
+ msg->arg.fork.ufd = fd;
+
+ return 0;
}
static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
@@ -584,6 +971,15 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
ssize_t ret;
DECLARE_WAITQUEUE(wait, current);
struct userfaultfd_wait_queue *uwq;
+ /*
+ * Handling fork event requires sleeping operations, so
+ * we drop the event_wqh lock, then do these ops, then
+ * lock it back and wake up the waiter. While the lock is
+ * dropped the ewq may go away so we keep track of it
+ * carefully.
+ */
+ LIST_HEAD(fork_event);
+ struct userfaultfd_ctx *fork_nctx = NULL;
/* always take the fd_wqh lock before the fault_pending_wqh lock */
spin_lock(&ctx->fd_wqh.lock);
@@ -635,6 +1031,29 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
break;
}
spin_unlock(&ctx->fault_pending_wqh.lock);
+
+ spin_lock(&ctx->event_wqh.lock);
+ uwq = find_userfault_evt(ctx);
+ if (uwq) {
+ *msg = uwq->msg;
+
+ if (uwq->msg.event == UFFD_EVENT_FORK) {
+ fork_nctx = (struct userfaultfd_ctx *)
+ (unsigned long)
+ uwq->msg.arg.reserved.reserved1;
+ list_move(&uwq->wq.task_list, &fork_event);
+ spin_unlock(&ctx->event_wqh.lock);
+ ret = 0;
+ break;
+ }
+
+ userfaultfd_event_complete(ctx, uwq);
+ spin_unlock(&ctx->event_wqh.lock);
+ ret = 0;
+ break;
+ }
+ spin_unlock(&ctx->event_wqh.lock);
+
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -651,6 +1070,23 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
__set_current_state(TASK_RUNNING);
spin_unlock(&ctx->fd_wqh.lock);
+ if (!ret && msg->event == UFFD_EVENT_FORK) {
+ ret = resolve_userfault_fork(ctx, fork_nctx, msg);
+
+ if (!ret) {
+ spin_lock(&ctx->event_wqh.lock);
+ if (!list_empty(&fork_event)) {
+ uwq = list_first_entry(&fork_event,
+ typeof(*uwq),
+ wq.task_list);
+ list_del(&uwq->wq.task_list);
+ __add_wait_queue(&ctx->event_wqh, &uwq->wq);
+ userfaultfd_event_complete(ctx, uwq);
+ }
+ spin_unlock(&ctx->event_wqh.lock);
+ }
+ }
+
return ret;
}
@@ -753,6 +1189,12 @@ static __always_inline int validate_range(struct mm_struct *mm,
return 0;
}
+static inline bool vma_can_userfault(struct vm_area_struct *vma)
+{
+ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
+ vma_is_shmem(vma);
+}
+
static int userfaultfd_register(struct userfaultfd_ctx *ctx,
unsigned long arg)
{
@@ -763,6 +1205,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
struct uffdio_register __user *user_uffdio_register;
unsigned long vm_flags, new_flags;
bool found;
+ bool non_anon_pages;
unsigned long start, end, vma_end;
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -814,13 +1257,21 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out_unlock;
/*
+ * If the first vma contains huge pages, make sure start address
+ * is aligned to huge page size.
+ */
+ if (is_vm_hugetlb_page(vma)) {
+ unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
+
+ if (start & (vma_hpagesize - 1))
+ goto out_unlock;
+ }
+
+ /*
* Search for not compatible vmas.
- *
- * FIXME: this shall be relaxed later so that it doesn't fail
- * on tmpfs backed vmas (in addition to the current allowance
- * on anonymous vmas).
*/
found = false;
+ non_anon_pages = false;
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
cond_resched();
@@ -829,8 +1280,21 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
/* check not compatible vmas */
ret = -EINVAL;
- if (cur->vm_ops)
+ if (!vma_can_userfault(cur))
goto out_unlock;
+ /*
+ * If this vma contains ending address, and huge pages
+ * check alignment.
+ */
+ if (is_vm_hugetlb_page(cur) && end <= cur->vm_end &&
+ end > cur->vm_start) {
+ unsigned long vma_hpagesize = vma_kernel_pagesize(cur);
+
+ ret = -EINVAL;
+
+ if (end & (vma_hpagesize - 1))
+ goto out_unlock;
+ }
/*
* Check that this vma isn't already owned by a
@@ -843,6 +1307,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
cur->vm_userfaultfd_ctx.ctx != ctx)
goto out_unlock;
+ /*
+ * Note vmas containing huge pages
+ */
+ if (is_vm_hugetlb_page(cur) || vma_is_shmem(cur))
+ non_anon_pages = true;
+
found = true;
}
BUG_ON(!found);
@@ -854,7 +1324,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
do {
cond_resched();
- BUG_ON(vma->vm_ops);
+ BUG_ON(!vma_can_userfault(vma));
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
@@ -912,7 +1382,8 @@ out_unlock:
* userland which ioctls methods are guaranteed to
* succeed on this range.
*/
- if (put_user(UFFD_API_RANGE_IOCTLS,
+ if (put_user(non_anon_pages ? UFFD_API_RANGE_IOCTLS_BASIC :
+ UFFD_API_RANGE_IOCTLS,
&user_uffdio_register->ioctls))
ret = -EFAULT;
}
@@ -959,11 +1430,18 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out_unlock;
/*
+ * If the first vma contains huge pages, make sure start address
+ * is aligned to huge page size.
+ */
+ if (is_vm_hugetlb_page(vma)) {
+ unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
+
+ if (start & (vma_hpagesize - 1))
+ goto out_unlock;
+ }
+
+ /*
* Search for not compatible vmas.
- *
- * FIXME: this shall be relaxed later so that it doesn't fail
- * on tmpfs backed vmas (in addition to the current allowance
- * on anonymous vmas).
*/
found = false;
ret = -EINVAL;
@@ -980,7 +1458,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* provides for more strict behavior to notice
* unregistration errors.
*/
- if (cur->vm_ops)
+ if (!vma_can_userfault(cur))
goto out_unlock;
found = true;
@@ -994,7 +1472,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
do {
cond_resched();
- BUG_ON(vma->vm_ops);
+ BUG_ON(!vma_can_userfault(vma));
/*
* Nothing to do: this vma is already registered into this
@@ -1007,6 +1485,19 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
start = vma->vm_start;
vma_end = min(end, vma->vm_end);
+ if (userfaultfd_missing(vma)) {
+ /*
+ * Wake any concurrent pending userfault while
+ * we unregister, so they will not hang
+ * permanently and it avoids userland to call
+ * UFFDIO_WAKE explicitly.
+ */
+ struct userfaultfd_wake_range range;
+ range.start = start;
+ range.len = vma_end - start;
+ wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
+ }
+
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
@@ -1116,6 +1607,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
uffdio_copy.len);
mmput(ctx->mm);
+ } else {
+ return -ENOSPC;
}
if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
return -EFAULT;
@@ -1178,6 +1671,14 @@ out:
return ret;
}
+static inline unsigned int uffd_ctx_features(__u64 user_features)
+{
+ /*
+ * For the current set of features the bits just coincide
+ */
+ return (unsigned int)user_features;
+}
+
/*
* userland asks for a certain API version and we return which bits
* and ioctl commands are implemented in this kernel for such API
@@ -1189,6 +1690,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
struct uffdio_api uffdio_api;
void __user *buf = (void __user *)arg;
int ret;
+ __u64 features;
ret = -EINVAL;
if (ctx->state != UFFD_STATE_WAIT_API)
@@ -1196,19 +1698,23 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
ret = -EFAULT;
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
goto out;
- if (uffdio_api.api != UFFD_API || uffdio_api.features) {
+ features = uffdio_api.features;
+ if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) {
memset(&uffdio_api, 0, sizeof(uffdio_api));
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
goto out;
ret = -EINVAL;
goto out;
}
+ /* report all available features and ioctls to userland */
uffdio_api.features = UFFD_API_FEATURES;
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
goto out;
ctx->state = UFFD_STATE_RUNNING;
+ /* only enable the requested features for this uffd context */
+ ctx->features = uffd_ctx_features(features);
ret = 0;
out:
return ret;
@@ -1295,6 +1801,7 @@ static void init_once_userfaultfd_ctx(void *mem)
init_waitqueue_head(&ctx->fault_pending_wqh);
init_waitqueue_head(&ctx->fault_wqh);
+ init_waitqueue_head(&ctx->event_wqh);
init_waitqueue_head(&ctx->fd_wqh);
seqcount_init(&ctx->refile_seq);
}
@@ -1335,6 +1842,7 @@ static struct file *userfaultfd_file_create(int flags)
atomic_set(&ctx->refcount, 1);
ctx->flags = flags;
+ ctx->features = 0;
ctx->state = UFFD_STATE_WAIT_API;
ctx->released = false;
ctx->mm = current->mm;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 086440e79b86..a50eca676670 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1379,22 +1379,21 @@ xfs_file_llseek(
*/
STATIC int
xfs_filemap_page_mkwrite(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int ret;
trace_xfs_filemap_page_mkwrite(XFS_I(inode));
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
} else {
- ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
+ ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret);
}
@@ -1406,23 +1405,22 @@ xfs_filemap_page_mkwrite(
STATIC int
xfs_filemap_fault(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int ret;
trace_xfs_filemap_fault(XFS_I(inode));
/* DAX can shortcut the normal fault path on write faults! */
if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
- return xfs_filemap_page_mkwrite(vma, vmf);
+ return xfs_filemap_page_mkwrite(vmf);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode))
- ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
else
- ret = filemap_fault(vma, vmf);
+ ret = filemap_fault(vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
return ret;
@@ -1431,36 +1429,34 @@ xfs_filemap_fault(
/*
* Similar to xfs_filemap_fault(), the DAX fault path can call into here on
* both read and write faults. Hence we need to handle both cases. There is no
- * ->pmd_mkwrite callout for huge pages, so we have a single function here to
+ * ->huge_mkwrite callout for huge pages, so we have a single function here to
* handle both cases here. @flags carries the information on the type of fault
* occuring.
*/
STATIC int
-xfs_filemap_pmd_fault(
- struct vm_area_struct *vma,
- unsigned long addr,
- pmd_t *pmd,
- unsigned int flags)
+xfs_filemap_huge_fault(
+ struct vm_fault *vmf,
+ enum page_entry_size pe_size)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
int ret;
if (!IS_DAX(inode))
return VM_FAULT_FALLBACK;
- trace_xfs_filemap_pmd_fault(ip);
+ trace_xfs_filemap_huge_fault(ip);
- if (flags & FAULT_FLAG_WRITE) {
+ if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
}
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- if (flags & FAULT_FLAG_WRITE)
+ if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(inode->i_sb);
return ret;
@@ -1474,11 +1470,10 @@ xfs_filemap_pmd_fault(
*/
static int
xfs_filemap_pfn_mkwrite(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
int ret = VM_FAULT_NOPAGE;
loff_t size;
@@ -1486,7 +1481,7 @@ xfs_filemap_pfn_mkwrite(
trace_xfs_filemap_pfn_mkwrite(ip);
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
/* check if the faulting page hasn't raced with truncate */
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
@@ -1494,7 +1489,7 @@ xfs_filemap_pfn_mkwrite(
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else if (IS_DAX(inode))
- ret = dax_pfn_mkwrite(vma, vmf);
+ ret = dax_pfn_mkwrite(vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
@@ -1503,7 +1498,7 @@ xfs_filemap_pfn_mkwrite(
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = xfs_filemap_fault,
- .pmd_fault = xfs_filemap_pmd_fault,
+ .huge_fault = xfs_filemap_huge_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_filemap_page_mkwrite,
.pfn_mkwrite = xfs_filemap_pfn_mkwrite,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index fb7555e73a62..383ac227ce2c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -687,7 +687,7 @@ DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
DEFINE_INODE_EVENT(xfs_filemap_fault);
-DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
+DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h
new file mode 100644
index 000000000000..57af9f21d148
--- /dev/null
+++ b/include/asm-generic/kprobes.h
@@ -0,0 +1,25 @@
+#ifndef _ASM_GENERIC_KPROBES_H
+#define _ASM_GENERIC_KPROBES_H
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef CONFIG_KPROBES
+/*
+ * Blacklist ganerating macro. Specify functions which is not probed
+ * by using this macro.
+ */
+# define __NOKPROBE_SYMBOL(fname) \
+static unsigned long __used \
+ __attribute__((__section__("_kprobe_blacklist"))) \
+ _kbl_addr_##fname = (unsigned long)fname;
+# define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname)
+/* Use this to forbid a kprobes attach on very low level functions */
+# define __kprobes __attribute__((__section__(".kprobes.text")))
+# define nokprobe_inline __always_inline
+#else
+# define NOKPROBE_SYMBOL(fname)
+# define __kprobes
+# define nokprobe_inline inline
+#endif
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+
+#endif /* _ASM_GENERIC_KPROBES_H */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 18af2bcefe6a..a0aba0f9c57b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -36,6 +36,9 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma,
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty);
#else
static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
@@ -44,6 +47,13 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
BUILD_BUG();
return 0;
}
+static inline int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty)
+{
+ BUILD_BUG();
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
@@ -121,8 +131,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
}
#endif
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address,
pmd_t *pmdp)
@@ -131,20 +141,40 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
pmd_clear(pmdp);
return pmd;
}
+#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address,
+ pud_t *pudp)
+{
+ pud_t pud = *pudp;
+
+ pud_clear(pudp);
+ return pud;
+}
+#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#endif
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp,
int full)
{
return pmdp_huge_get_and_clear(mm, address, pmdp);
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp,
+ int full)
+{
+ return pudp_huge_get_and_clear(mm, address, pudp);
+}
+#endif
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
@@ -181,6 +211,9 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp);
+extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
+ unsigned long address,
+ pud_t *pudp);
#endif
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -208,6 +241,23 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp)
+{
+ pud_t old_pud = *pudp;
+
+ set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
+}
+#else
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp)
+{
+ BUILD_BUG();
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+#endif
#ifndef pmdp_collapse_flush
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -273,12 +323,23 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
return pmd_val(pmd_a) == pmd_val(pmd_b);
}
+
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return pud_val(pud_a) == pud_val(pud_b);
+}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
BUILD_BUG();
return 0;
}
+
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ BUILD_BUG();
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
@@ -640,6 +701,15 @@ static inline int pmd_write(pmd_t pmd)
#endif /* __HAVE_ARCH_PMD_WRITE */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
+ (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+static inline int pud_trans_huge(pud_t pud)
+{
+ return 0;
+}
+#endif
+
#ifndef pmd_read_atomic
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
{
@@ -785,8 +855,10 @@ static inline int pmd_clear_huge(pmd_t *pmd)
* e.g. see arch/arc: flush_pmd_tlb_range
*/
#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
+#define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
#else
#define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG()
+#define flush_pud_tlb_range(vma, addr, end) BUILD_BUG()
#endif
#endif
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 7eed8cf3130a..4329bc6ef04b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -232,6 +232,20 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
__tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
} while (0)
+/**
+ * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
+ * invalidation. This is a nop so far, because only x86 needs it.
+ */
+#ifndef __tlb_remove_pud_tlb_entry
+#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
+#endif
+
+#define tlb_remove_pud_tlb_entry(tlb, pudp, address) \
+ do { \
+ __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \
+ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \
+ } while (0)
+
/*
* For things like page tables caches (ie caching addresses "inside" the
* page tables, like x86 does), for legacy reasons, flushing an
diff --git a/include/linux/bug.h b/include/linux/bug.h
index baff2e8fc8a8..5828489309bb 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -124,18 +124,20 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
/*
* Since detected data corruption should stop operation on the affected
- * structures, this returns false if the corruption condition is found.
+ * structures. Return value must be checked and sanely acted on by caller.
*/
+static inline __must_check bool check_data_corruption(bool v) { return v; }
#define CHECK_DATA_CORRUPTION(condition, fmt, ...) \
- do { \
- if (unlikely(condition)) { \
+ check_data_corruption(({ \
+ bool corruption = unlikely(condition); \
+ if (corruption) { \
if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
pr_err(fmt, ##__VA_ARGS__); \
BUG(); \
} else \
WARN(1, fmt, ##__VA_ARGS__); \
- return false; \
} \
- } while (0)
+ corruption; \
+ }))
#endif /* _LINUX_BUG_H */
diff --git a/include/linux/cma.h b/include/linux/cma.h
index 6f0a91b37f68..03f32d0bd1d8 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -29,6 +29,7 @@ extern int __init cma_declare_contiguous(phys_addr_t base,
extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
unsigned int order_per_bit,
struct cma **res_cma);
-extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align);
+extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
+ gfp_t gfp_mask);
extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
#endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 9e40be522793..aef47be2a5c1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long);
compat_stack_t __user *__uss = uss; \
struct task_struct *t = current; \
put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \
- put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \
+ put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
put_user_ex(t->sas_ss_size, &__uss->ss_size); \
+ if (t->sas_ss_flags & SS_AUTODISARM) \
+ sas_ss_reset(t); \
} while (0);
asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cad999386d61..d5e1fedbad24 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -122,6 +122,7 @@
#define __attribute_const__ __attribute__((__const__))
#define __maybe_unused __attribute__((unused))
#define __always_unused __attribute__((unused))
+#define __mode(x) __attribute__((mode(x)))
/* gcc version specific checks */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d93221e4849f..6e8e160b1e4b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -585,12 +585,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
(_________p1); \
})
-/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
-#ifdef CONFIG_KPROBES
-# define __kprobes __attribute__((__section__(".kprobes.text")))
-# define nokprobe_inline __always_inline
-#else
-# define __kprobes
-# define nokprobe_inline inline
-#endif
#endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 2983e52efd07..c5c1bf29efeb 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,8 +38,8 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops);
-int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- const struct iomap_ops *ops);
+int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+ const struct iomap_ops *ops);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
@@ -71,21 +71,13 @@ static inline unsigned int dax_radix_order(void *entry)
return PMD_SHIFT - PAGE_SHIFT;
return 0;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops);
#else
static inline unsigned int dax_radix_order(void *entry)
{
return 0;
}
-static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmd, unsigned int flags,
- const struct iomap_ops *ops)
-{
- return VM_FAULT_FALLBACK;
-}
#endif
-int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
+int dax_pfn_mkwrite(struct vm_fault *vmf);
static inline bool vma_is_dax(struct vm_area_struct *vma)
{
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index fec734df1524..b67bf6ac907d 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -112,7 +112,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
}
struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
- unsigned int order);
+ unsigned int order, gfp_t gfp_mask);
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
int count);
@@ -145,7 +145,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size,
static inline
struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
- unsigned int order)
+ unsigned int order, gfp_t gfp_mask)
{
return NULL;
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0fe0b6295ab5..db373b9d3223 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -541,7 +541,7 @@ static inline bool pm_suspended_storage(void)
#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
/* The below functions must be run on a range from a single zone. */
extern int alloc_contig_range(unsigned long start, unsigned long end,
- unsigned migratetype);
+ unsigned migratetype, gfp_t gfp_mask);
extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
#endif
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 97e478d6b690..a3762d49ba39 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,6 +6,18 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *vma);
extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+ struct vm_area_struct *vma);
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+#else
+static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
+{
+}
+#endif
+
extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
@@ -17,6 +29,9 @@ extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
extern int zap_huge_pmd(struct mmu_gather *tlb,
struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr);
+extern int zap_huge_pud(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ pud_t *pud, unsigned long addr);
extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned char *vec);
@@ -26,13 +41,16 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
int prot_numa);
-int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
- pfn_t pfn, bool write);
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, pfn_t pfn, bool write);
+int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, pfn_t pfn, bool write);
enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
+ TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
@@ -57,13 +75,14 @@ extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, int flags);
-
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
+#define HPAGE_PUD_SHIFT PUD_SHIFT
+#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
+#define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
+
extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
#define transparent_hugepage_enabled(__vma) \
@@ -117,6 +136,17 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
bool freeze, struct page *page);
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long address);
+
+#define split_huge_pud(__vma, __pud, __address) \
+ do { \
+ pud_t *____pud = (__pud); \
+ if (pud_trans_huge(*____pud) \
+ || pud_devmap(*____pud)) \
+ __split_huge_pud(__vma, __pud, __address); \
+ } while (0)
+
extern int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice);
extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -125,6 +155,8 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
long adjust_next);
extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma);
+extern spinlock_t *__pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma);
/* mmap_sem must be held on entry */
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
@@ -135,6 +167,15 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
else
return NULL;
}
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma)
+{
+ VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
+ if (pud_trans_huge(*pud) || pud_devmap(*pud))
+ return __pud_trans_huge_lock(pud, vma);
+ else
+ return NULL;
+}
static inline int hpage_nr_pages(struct page *page)
{
if (unlikely(PageTransHuge(page)))
@@ -142,6 +183,11 @@ static inline int hpage_nr_pages(struct page *page)
return 1;
}
+struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, int flags);
+struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, int flags);
+
extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page;
@@ -156,6 +202,11 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
return is_huge_zero_page(pmd_page(pmd));
}
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+ return false;
+}
+
struct page *mm_get_huge_zero_page(struct mm_struct *mm);
void mm_put_huge_zero_page(struct mm_struct *mm);
@@ -166,6 +217,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
+
#define hpage_nr_pages(x) 1
#define transparent_hugepage_enabled(__vma) 0
@@ -194,6 +249,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
unsigned long address, bool freeze, struct page *page) {}
+#define split_huge_pud(__vma, __pmd, __address) \
+ do { } while (0)
+
static inline int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
@@ -211,6 +269,11 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
{
return NULL;
}
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma)
+{
+ return NULL;
+}
static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
{
@@ -222,6 +285,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+ return false;
+}
+
static inline void mm_put_huge_zero_page(struct mm_struct *mm)
{
return;
@@ -232,6 +300,12 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
{
return NULL;
}
+
+static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud, int flags)
+{
+ return NULL;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 48c76d612d40..503099d8aada 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -65,7 +65,8 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
- unsigned long *, unsigned long *, long, unsigned int);
+ unsigned long *, unsigned long *, long, unsigned int,
+ int *);
void unmap_hugepage_range(struct vm_area_struct *,
unsigned long, unsigned long, struct page *);
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
@@ -81,6 +82,11 @@ void hugetlb_show_meminfo(void);
unsigned long hugetlb_total_pages(void);
int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
+int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep);
int hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma,
vm_flags_t vm_flags);
@@ -131,7 +137,7 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}
-#define follow_hugetlb_page(m,v,p,vs,a,b,i,w) ({ BUG(); 0; })
+#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
static inline void hugetlb_report_meminfo(struct seq_file *m)
@@ -149,6 +155,8 @@ static inline void hugetlb_show_meminfo(void)
#define is_hugepage_only_range(mm, addr, len) 0
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
+#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+ src_addr, pagep) ({ BUG(); 0; })
#define huge_pte_offset(mm, address) 0
static inline int dequeue_hwpoisoned_huge_page(struct page *page)
{
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 891459caa278..7291810067eb 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -79,8 +79,7 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
bool *did_zero, const struct iomap_ops *ops);
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops);
-int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
- const struct iomap_ops *ops);
+int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len, const struct iomap_ops *ops);
diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 1c30014ed176..d29e1e21bf3f 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -17,7 +17,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/hrtimer.h>
+#include <linux/ktime.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/io.h>
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 820c0ad54a01..c908b25bf5a5 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order);
void kasan_cache_create(struct kmem_cache *cache, size_t *size,
unsigned long *flags);
void kasan_cache_shrink(struct kmem_cache *cache);
-void kasan_cache_destroy(struct kmem_cache *cache);
+void kasan_cache_shutdown(struct kmem_cache *cache);
void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
size_t *size,
unsigned long *flags) {}
static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
-static inline void kasan_cache_destroy(struct kmem_cache *cache) {}
+static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index cb09238f6d32..4c26dc3a8295 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -100,16 +100,18 @@
)
/*
- * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors and
- * for negative dividends if the divisor variable type is unsigned.
+ * Divide positive or negative dividend by positive or negative divisor
+ * and round to closest integer. Result is undefined for negative
+ * divisors if he dividend variable type is unsigned and for negative
+ * dividends if the divisor variable type is unsigned.
*/
#define DIV_ROUND_CLOSEST(x, divisor)( \
{ \
typeof(x) __x = x; \
typeof(divisor) __d = divisor; \
(((typeof(x))-1) > 0 || \
- ((typeof(divisor))-1) > 0 || (__x) > 0) ? \
+ ((typeof(divisor))-1) > 0 || \
+ (((__x) > 0) == ((__d) > 0))) ? \
(((__x) + ((__d) / 2)) / (__d)) : \
(((__x) - ((__d) / 2)) / (__d)); \
} \
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d419d0e51fe5..e98e546b543c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -283,6 +283,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
#define VMCOREINFO_CONFIG(name) \
vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+#define VMCOREINFO_PHYS_BASE(value) \
+ vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 16ddfb8b304a..c328e4f7dcad 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -29,7 +29,7 @@
* <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
* <prasanna@in.ibm.com> added function-return probes.
*/
-#include <linux/compiler.h> /* for __kprobes */
+#include <linux/compiler.h>
#include <linux/linkage.h>
#include <linux/list.h>
#include <linux/notifier.h>
@@ -40,9 +40,9 @@
#include <linux/rcupdate.h>
#include <linux/mutex.h>
#include <linux/ftrace.h>
+#include <asm/kprobes.h>
#ifdef CONFIG_KPROBES
-#include <asm/kprobes.h>
/* kprobe_status settings */
#define KPROBE_HIT_ACTIVE 0x00000001
@@ -51,6 +51,7 @@
#define KPROBE_HIT_SSDONE 0x00000008
#else /* CONFIG_KPROBES */
+#include <asm-generic/kprobes.h>
typedef int kprobe_opcode_t;
struct arch_specific_insn {
int dummy;
@@ -509,18 +510,4 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
}
#endif
-#ifdef CONFIG_KPROBES
-/*
- * Blacklist ganerating macro. Specify functions which is not probed
- * by using this macro.
- */
-#define __NOKPROBE_SYMBOL(fname) \
-static unsigned long __used \
- __attribute__((section("_kprobe_blacklist"))) \
- _kbl_addr_##fname = (unsigned long)fname;
-#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname)
-#else
-#define NOKPROBE_SYMBOL(fname)
-#endif
-
#endif /* _LINUX_KPROBES_H */
diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c59f321..a3912d7984b5 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,648 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
*
* Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
*/
-#define LZ4_MEM_COMPRESS (16384)
-#define LZ4HC_MEM_COMPRESS (262144 + (2 * sizeof(unsigned char *)))
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h> /* memset, memcpy */
+
+/*-************************************************************************
+ * CONSTANTS
+ **************************************************************************/
/*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
*/
-static inline size_t lz4_compressbound(size_t isize)
-{
- return isize + (isize / 255) + 16;
-}
+#define LZ4_MEMORY_USAGE 14
+
+#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize) (\
+ (unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+ ? 0 \
+ : (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL 3
+#define LZ4HC_DEFAULT_CLEVEL 9
+#define LZ4HC_MAX_CLEVEL 16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE - 1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ * STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE 262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64 4
+#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * \
+ sizeof(unsigned long long))
/*
- * lz4_compress()
- * src : source address of the original data
- * src_len : size of the original data
- * dst : output buffer address of the compressed data
- * This requires 'dst' of size LZ4_COMPRESSBOUND.
- * dst_len : is the output size, which is returned after compress done
- * workmem : address of the working memory.
- * This requires 'workmem' of size LZ4_MEM_COMPRESS.
- * return : Success if return 0
- * Error if return (< 0)
- * note : Destination buffer and workmem must be already allocated with
- * the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len,
- unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
- * lz4hc_compress()
- * src : source address of the original data
- * src_len : size of the original data
- * dst : output buffer address of the compressed data
- * This requires 'dst' of size LZ4_COMPRESSBOUND.
- * dst_len : is the output size, which is returned after compress done
- * workmem : address of the working memory.
- * This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
- * return : Success if return 0
- * Error if return (< 0)
- * note : Destination buffer and workmem must be already allocated with
- * the defined size.
- */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
- unsigned char *dst, size_t *dst_len, void *wrkmem);
+ * LZ4_stream_t - information structure to track an LZ4 stream.
+ */
+typedef struct {
+ uint32_t hashTable[LZ4_HASH_SIZE_U32];
+ uint32_t currentOffset;
+ uint32_t initCheck;
+ const uint8_t *dictionary;
+ uint8_t *bufferStart;
+ uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+ unsigned long long table[LZ4_STREAMSIZE_U64];
+ LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
/*
- * lz4_decompress()
- * src : source address of the compressed data
- * src_len : is the input size, whcih is returned after decompress done
- * dest : output buffer address of the decompressed data
- * actual_dest_len: is the size of uncompressed data, supposing it's known
- * return : Success if return 0
- * Error if return (< 0)
- * note : Destination buffer must be already allocated.
- * slightly faster than lz4_decompress_unknownoutputsize()
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
- unsigned char *dest, size_t actual_dest_len);
+ * LZ4_streamHC_t - information structure to track an LZ4HC stream.
+ */
+typedef struct {
+ unsigned int hashTable[LZ4HC_HASHTABLESIZE];
+ unsigned short chainTable[LZ4HC_MAXD];
+ /* next block to continue on current prefix */
+ const unsigned char *end;
+ /* All index relative to this position */
+ const unsigned char *base;
+ /* alternate base for extDict */
+ const unsigned char *dictBase;
+ /* below that point, need extDict */
+ unsigned int dictLimit;
+ /* below that point, no more dict */
+ unsigned int lowLimit;
+ /* index from which to continue dict update */
+ unsigned int nextToUpdate;
+ unsigned int compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+ size_t table[LZ4_STREAMHCSIZE_SIZET];
+ LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
/*
- * lz4_decompress_unknownoutputsize()
- * src : source address of the compressed data
- * src_len : is the input size, therefore the compressed size
- * dest : output buffer address of the decompressed data
- * dest_len: is the max size of the destination buffer, which is
- * returned with actual size of decompressed data after
- * decompress done
- * return : Success if return 0
- * Error if return (< 0)
- * note : Destination buffer must be already allocated.
- */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
- unsigned char *dest, size_t *dest_len);
+ * LZ4_streamDecode_t - information structure to track an
+ * LZ4 stream during decompression.
+ *
+ * init this structure using LZ4_setStreamDecode (or memset()) before first use
+ */
+typedef struct {
+ const uint8_t *externalDict;
+ size_t extDictSize;
+ const uint8_t *prefixEnd;
+ size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+ unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+ LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ * SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ * Compression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_compressBound() - Max. output size in worst case szenarios
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ * (data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
+{
+ return LZ4_COMPRESSBOUND(isize);
+}
+
+/**
+ * LZ4_compress_default() - Compress data from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ * which must be already allocated
+ * @wrkmem: address of the working memory.
+ * This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Compresses 'sourceSize' bytes from buffer 'source'
+ * into already allocated 'dest' buffer of size 'maxOutputSize'.
+ * Compression is guaranteed to succeed if
+ * 'maxOutputSize' >= LZ4_compressBound(inputSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'source' into a more limited 'dest' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * As a consequence, 'dest' content is not valid.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ * (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+ int maxOutputSize, void *wrkmem);
+
+/**
+ * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ * which must be already allocated
+ * @acceleration: acceleration factor
+ * @wrkmem: address of the working memory.
+ * This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Same as LZ4_compress_default(), but allows to select an "acceleration"
+ * factor. The larger the acceleration value, the faster the algorithm,
+ * but also the lesser the compression. It's a trade-off. It can be fine tuned,
+ * with each successive value providing roughly +~3% to speed.
+ * An acceleration value of "1" is the same as regular LZ4_compress_default()
+ * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ * (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+ int maxOutputSize, int acceleration, void *wrkmem);
+
+/**
+ * LZ4_compress_destSize() - Compress as much data as possible
+ * from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @sourceSizePtr: will be modified to indicate how many bytes where read
+ * from 'source' to fill 'dest'. New value is necessarily <= old value.
+ * @targetDestSize: Size of buffer 'dest' which must be already allocated
+ * @wrkmem: address of the working memory.
+ * This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * possible from 'source'.
+ *
+ * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize)
+ * or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+ int targetDestSize, void *wrkmem);
+
+/*-************************************************************************
+ * Decompression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * Decompresses data from 'source' into 'dest'.
+ * This function fully respect memory boundaries for properly formed
+ * compressed data.
+ * It is a bit faster than LZ4_decompress_safe().
+ * However, it does not provide any protection against intentionally
+ * modified data stream (malicious input).
+ * Use this function in trusted environment only
+ * (data to decode comes from a trusted source).
+ *
+ * Return: number of bytes read from the source buffer
+ * or a negative result if decompression fails.
+ */
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe() - Decompression protected against buffer overflow
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * Decompresses data fom 'source' into 'dest'.
+ * If the source stream is detected malformed, the function will
+ * stop decoding and return a negative result.
+ * This function is protected against buffer overflow exploits,
+ * including malicious data packets. It never writes outside output buffer,
+ * nor reads outside input buffer.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+ int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize'
+ * at position 'source' into buffer 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the decompressed data which must be
+ * already allocated
+ * @compressedSize: is the precise full size of the compressed block.
+ * @targetOutputSize: the decompression operation will try
+ * to stop as soon as 'targetOutputSize' has been reached
+ * @maxDecompressedSize: is the size of destination buffer
+ *
+ * This function decompresses a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ * This function never writes outside of output buffer,
+ * and never reads outside of input buffer.
+ * It is therefore protected against malicious data packets.
+ *
+ * Return: the number of bytes decoded in the destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ *
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+ int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+/*-************************************************************************
+ * LZ4 HC Compression
+ **************************************************************************/
+
+/**
+ * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @dstCapacity: full or partial size of buffer 'dst',
+ * which must be already allocated
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ * value between 1 and LZ4HC_MAX_CLEVEL will work.
+ * Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ * @wrkmem: address of the working memory.
+ * This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Compress data from 'src' into 'dst', using the more powerful
+ * but slower "HC" algorithm. Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ *
+ * Return : the number of bytes written into 'dst' or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+ int compressionLevel, void *wrkmem);
+
+/**
+ * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
+ * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ * value between 1 and LZ4HC_MAX_CLEVEL will work.
+ * Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *
+ * An LZ4_streamHC_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_streamHC_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+
+/**
+ * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC
+ * @streamHCPtr: pointer to the LZ4HC_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4HC_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+ int dictSize);
+
+/**
+ * LZ4_compress_HC_continue() - Compress 'src' using data from previously
+ * compressed blocks as a dictionary using the HC algorithm
+ * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ * which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ * which must be already allocated
+ *
+ * These functions compress data in successive blocks of any size, using
+ * previous blocks as dictionary. One key assumption is that previous
+ * blocks (up to 64 KB) remain read-accessible while
+ * compressing next blocks. There is an exception for ring buffers,
+ * which can be smaller than 64 KB.
+ * Ring buffers scenario is automatically detected and handled by
+ * LZ4_compress_HC_continue().
+ * Before starting compression, state must be properly initialized,
+ * using LZ4_resetStreamHC().
+ * A first "fictional block" can then be designated as
+ * initial dictionary, using LZ4_loadDictHC() (Optional).
+ * Then, use LZ4_compress_HC_continue()
+ * to compress each successive block. Previous memory blocks
+ * (including initial dictionary when present) must remain accessible
+ * and unmodified during compression.
+ * 'dst' buffer should be sized to handle worst case scenarios, using
+ * LZ4_compressBound(), to ensure operation success.
+ * If, for any reason, previous data blocks can't be preserved unmodified
+ * in memory during next compression block,
+ * you must save it to a safer memory space, using LZ4_saveDictHC().
+ * Return value of LZ4_saveDictHC() is the size of dictionary
+ * effectively saved into 'safeBuffer'.
+ *
+ * Return: Number of bytes written into buffer 'dst' or 0 if compression fails
+ */
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+ char *dst, int srcSize, int maxDstSize);
+
+/**
+ * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream
+ * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @maxDictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDictHC() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_HC_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= maxDictSize),
+ * or 0 if error.
+ */
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+ int maxDictSize);
+
+/*-*********************************************
+ * Streaming Compression Functions
+ ***********************************************/
+
+/**
+ * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure
+ * @LZ4_stream: pointer to the 'LZ4_stream_t' structure
+ *
+ * An LZ4_stream_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_stream_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/**
+ * LZ4_loadDict() - Load a static dictionary into LZ4_stream
+ * @streamPtr: pointer to the LZ4_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+ int dictSize);
+
+/**
+ * LZ4_saveDict() - Save static dictionary from LZ4_stream
+ * @streamPtr: pointer to the 'LZ4_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @dictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_fast_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= dictSize),
+ * or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/**
+ * LZ4_compress_fast_continue() - Compress 'src' using data from previously
+ * compressed blocks as a dictionary
+ * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ * which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ * which must be already allocated
+ * @acceleration: acceleration factor
+ *
+ * Compress buffer content 'src', using data from previously compressed blocks
+ * as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still
+ * be present and unmodified !
+ * If maxDstSize >= LZ4_compressBound(srcSize),
+ * compression is guaranteed to succeed, and runs faster.
+ *
+ * Return: Number of bytes written into buffer 'dst' or 0 if compression fails
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+ char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/**
+ * LZ4_setStreamDecode() - Instruct where to find dictionary
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @dictionary: dictionary to use
+ * @dictSize: size of dictionary
+ *
+ * Use this function to instruct where to find the dictionary.
+ * Setting a size of 0 is allowed (same effect as reset).
+ *
+ * Return: 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *dictionary, int dictSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ * - Exactly same size as encoding buffer, with same update rule
+ * (block boundaries at same positions) In which case,
+ * the decoding & encoding ring buffer can have any size,
+ * including very small ones ( < 64 KB).
+ * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ * maxBlockSize is implementation dependent.
+ * It's the maximum size you intend to compress into a single block.
+ * In which case, encoding and decoding buffers do not need
+ * to be synchronized, and encoding ring buffer can have any size,
+ * including small ones ( < 64 KB).
+ * - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ * In which case, encoding and decoding buffers do not need to be
+ * synchronized, and encoding ring buffer can have any size,
+ * including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *source, char *dest, int compressedSize,
+ int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ * - Exactly same size as encoding buffer, with same update rule
+ * (block boundaries at same positions) In which case,
+ * the decoding & encoding ring buffer can have any size,
+ * including very small ones ( < 64 KB).
+ * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ * maxBlockSize is implementation dependent.
+ * It's the maximum size you intend to compress into a single block.
+ * In which case, encoding and decoding buffers do not need
+ * to be synchronized, and encoding ring buffer can have any size,
+ * including small ones ( < 64 KB).
+ * - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ * In which case, encoding and decoding buffers do not need to be
+ * synchronized, and encoding ring buffer can have any size,
+ * including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode()
+ * followed by LZ4_decompress_safe_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+ int compressedSize, int maxDecompressedSize, const char *dictStart,
+ int dictSize);
+
+/**
+ * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode()
+ * followed by LZ4_decompress_fast_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+ int originalSize, const char *dictStart, int dictSize);
+
#endif
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5b759c9acf97..bdfc65af4152 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,6 +42,7 @@ struct memblock_type {
unsigned long max; /* size of the allocated array */
phys_addr_t total_size; /* size of all regions */
struct memblock_region *regions;
+ char *name;
};
struct memblock {
@@ -203,6 +204,7 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
unsigned long *end_pfn);
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
unsigned long *out_end_pfn, int *out_nid);
+unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
/**
* for_each_mem_pfn_range - early memory pfn range iterator
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 254698856b8f..5af377303880 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -253,6 +253,7 @@ struct mem_cgroup {
/* Index in the kmem_cache->memcg_params.memcg_caches array */
int kmemcg_id;
enum memcg_kmem_state kmem_state;
+ struct list_head kmem_caches;
#endif
int last_scanned_node;
@@ -829,6 +830,7 @@ void memcg_kmem_uncharge(struct page *page, int order);
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
extern struct static_key_false memcg_kmem_enabled_key;
+extern struct workqueue_struct *memcg_kmem_cache_wq;
extern int memcg_nr_cache_ids;
void memcg_get_cache_ids(void);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 093607f90b91..301dfb03ecb7 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -108,12 +108,12 @@ extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
extern int register_memory_isolate_notifier(struct notifier_block *nb);
extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
-extern int register_new_memory(int, struct mem_section *);
+extern int register_new_memory(struct zone *, int, struct mem_section *);
extern int memory_block_change_state(struct memory_block *mem,
unsigned long to_state,
unsigned long from_state_req);
#ifdef CONFIG_MEMORY_HOTREMOVE
-extern int unregister_memory_section(struct mem_section *);
+extern int unregister_memory_section(struct zone *, struct mem_section *);
#endif
extern int memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 134a2f69c21a..2f2c0d1290a1 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -280,8 +280,10 @@ extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size);
-extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
-extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
+extern int sparse_add_section(struct zone *zone, unsigned long pfn,
+ unsigned long nr_pages);
+extern void sparse_remove_section(struct zone *zone, struct mem_section *ms,
+ unsigned long pfn, unsigned long nr_pages,
unsigned long map_offset);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ae8d475a9385..fa76b516fa47 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -37,7 +37,7 @@ extern int migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
unsigned long private, enum migrate_mode mode, int reason);
-extern bool isolate_movable_page(struct page *page, isolate_mode_t mode);
+extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
extern int migrate_prep(void);
@@ -56,6 +56,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
free_page_t free, unsigned long private, enum migrate_mode mode,
int reason)
{ return -ENOSYS; }
+static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
+ { return -EBUSY; }
static inline int migrate_prep(void) { return -ENOSYS; }
static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6ff66d6fe8e2..7b11431124c3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -285,6 +285,17 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */
+#define FAULT_FLAG_TRACE \
+ { FAULT_FLAG_WRITE, "WRITE" }, \
+ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \
+ { FAULT_FLAG_ALLOW_RETRY, "ALLOW_RETRY" }, \
+ { FAULT_FLAG_RETRY_NOWAIT, "RETRY_NOWAIT" }, \
+ { FAULT_FLAG_KILLABLE, "KILLABLE" }, \
+ { FAULT_FLAG_TRIED, "TRIED" }, \
+ { FAULT_FLAG_USER, "USER" }, \
+ { FAULT_FLAG_REMOTE, "REMOTE" }, \
+ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }
+
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
* ->fault function. The vma's ->fault is responsible for returning a bitmask
@@ -303,6 +314,9 @@ struct vm_fault {
unsigned long address; /* Faulting virtual address */
pmd_t *pmd; /* Pointer to pmd entry matching
* the 'address' */
+ pud_t *pud; /* Pointer to pud entry matching
+ * the 'address'
+ */
pte_t orig_pte; /* Value of PTE at the time of fault */
struct page *cow_page; /* Page handler may use for COW fault */
@@ -330,6 +344,13 @@ struct vm_fault {
*/
};
+/* page entry size for vm->huge_fault() */
+enum page_entry_size {
+ PE_SIZE_PTE = 0,
+ PE_SIZE_PMD,
+ PE_SIZE_PUD,
+};
+
/*
* These are the virtual MM functions - opening of an area, closing and
* unmapping it (needed to keep files on disk up-to-date etc), pointer
@@ -339,18 +360,17 @@ struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
int (*mremap)(struct vm_area_struct * area);
- int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
- int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
- pmd_t *, unsigned int flags);
+ int (*fault)(struct vm_fault *vmf);
+ int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
void (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
- int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ int (*page_mkwrite)(struct vm_fault *vmf);
/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
- int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ int (*pfn_mkwrite)(struct vm_fault *vmf);
/* called by access_process_vm when get_user_pages() fails, typically
* for use by special VMAs that can switch between memory and hardware
@@ -406,6 +426,10 @@ static inline int pmd_devmap(pmd_t pmd)
{
return 0;
}
+static inline int pud_devmap(pud_t pud)
+{
+ return 0;
+}
#endif
/*
@@ -1111,6 +1135,20 @@ static inline void clear_page_pfmemalloc(struct page *page)
VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
VM_FAULT_FALLBACK)
+#define VM_FAULT_RESULT_TRACE \
+ { VM_FAULT_OOM, "OOM" }, \
+ { VM_FAULT_SIGBUS, "SIGBUS" }, \
+ { VM_FAULT_MAJOR, "MAJOR" }, \
+ { VM_FAULT_WRITE, "WRITE" }, \
+ { VM_FAULT_HWPOISON, "HWPOISON" }, \
+ { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \
+ { VM_FAULT_SIGSEGV, "SIGSEGV" }, \
+ { VM_FAULT_NOPAGE, "NOPAGE" }, \
+ { VM_FAULT_LOCKED, "LOCKED" }, \
+ { VM_FAULT_RETRY, "RETRY" }, \
+ { VM_FAULT_FALLBACK, "FALLBACK" }, \
+ { VM_FAULT_DONE_COW, "DONE_COW" }
+
/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
@@ -1128,8 +1166,7 @@ extern void pagefault_out_of_memory(void);
*/
#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
-extern void show_free_areas(unsigned int flags);
-extern bool skip_free_areas_node(unsigned int flags, int nid);
+extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
int shmem_zero_setup(struct vm_area_struct *);
#ifdef CONFIG_SHMEM
@@ -1152,8 +1189,6 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
- bool ignore_dirty; /* Ignore dirty pages */
- bool check_swap_entries; /* Check also swap entries */
};
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1164,12 +1199,16 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
- unsigned long size, struct zap_details *);
+ unsigned long size);
void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long start, unsigned long end);
/**
* mm_walk - callbacks for walk_page_range
+ * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ * this handler should only handle pud_trans_huge() puds.
+ * the pmd_entry or pte_entry callbacks will be used for
+ * regular PUDs.
* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
@@ -1189,6 +1228,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
* (see the comment on walk_page_range() for more details)
*/
struct mm_walk {
+ int (*pud_entry)(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1359,6 +1400,16 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
return !vma->vm_ops;
}
+#ifdef CONFIG_SHMEM
+/*
+ * The vma_is_shmem is not inline because it is used only by slow
+ * paths in userfault.
+ */
+bool vma_is_shmem(struct vm_area_struct *vma);
+#else
+static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+#endif
+
static inline int stack_guard_page_start(struct vm_area_struct *vma,
unsigned long addr)
{
@@ -1762,8 +1813,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
return ptl;
}
-extern void __init pagecache_init(void);
+/*
+ * No scalability reason to split PUD locks yet, but follow the same pattern
+ * as the PMD locks to make it easier if we decide to. The VM should not be
+ * considered ready to switch to split PUD locks yet; there may be places
+ * which need to be converted from page_table_lock.
+ */
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+ return &mm->page_table_lock;
+}
+static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
+{
+ spinlock_t *ptl = pud_lockptr(mm, pud);
+
+ spin_lock(ptl);
+ return ptl;
+}
+
+extern void __init pagecache_init(void);
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
@@ -1900,7 +1969,7 @@ extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);
-extern void show_mem(unsigned int flags);
+extern void show_mem(unsigned int flags, nodemask_t *nodemask);
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
@@ -1908,8 +1977,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
extern unsigned long arch_reserved_kernel_pages(void);
#endif
-extern __printf(2, 3)
-void warn_alloc(gfp_t gfp_mask, const char *fmt, ...);
+extern __printf(3, 4)
+void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
extern void setup_per_cpu_pageset(void);
@@ -1972,8 +2041,10 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *,
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
struct mempolicy *, struct vm_userfaultfd_ctx);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
-extern int split_vma(struct mm_struct *,
- struct vm_area_struct *, unsigned long addr, int new_below);
+extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
+ unsigned long addr, int new_below);
+extern int split_vma(struct mm_struct *, struct vm_area_struct *,
+ unsigned long addr, int new_below);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *);
@@ -2021,18 +2092,22 @@ extern int install_special_mapping(struct mm_struct *mm,
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
extern unsigned long mmap_region(struct file *file, unsigned long addr,
- unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
+ unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
+ struct list_head *uf);
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
-extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+ vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf);
+extern int do_munmap(struct mm_struct *, unsigned long, size_t,
+ struct list_head *uf);
static inline unsigned long
do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate)
+ unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf)
{
- return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
+ return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
}
#ifdef CONFIG_MMU
@@ -2049,6 +2124,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {}
/* These take the mm semaphore themselves */
extern int __must_check vm_brk(unsigned long, unsigned long);
+extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t);
extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
unsigned long, unsigned long,
@@ -2092,10 +2168,10 @@ extern void truncate_inode_pages_range(struct address_space *,
extern void truncate_inode_pages_final(struct address_space *);
/* generic vm_area_ops exported for stackable file systems */
-extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
+extern int filemap_fault(struct vm_fault *vmf);
extern void filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
-extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int filemap_page_mkwrite(struct vm_fault *vmf);
/* mm/page-writeback.c */
int write_one_page(struct page *page, int wait);
@@ -2317,7 +2393,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
unsigned long map_count,
int nodeid);
-struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
+struct page *__populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
@@ -2400,6 +2477,10 @@ extern void clear_huge_page(struct page *page,
extern void copy_user_huge_page(struct page *dst, struct page *src,
unsigned long addr, struct vm_area_struct *vma,
unsigned int pages_per_huge_page);
+extern long copy_huge_page_from_user(struct page *dst_page,
+ const void __user *usr_src,
+ unsigned int pages_per_huge_page,
+ bool allow_pagefault);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
extern struct page_ext_operations debug_guardpage_ops;
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 41d376e7116d..e030a68ead7e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -50,6 +50,13 @@ static __always_inline void add_page_to_lru_list(struct page *page,
list_add(&page->lru, &lruvec->lists[lru]);
}
+static __always_inline void add_page_to_lru_list_tail(struct page *page,
+ struct lruvec *lruvec, enum lru_list lru)
+{
+ update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
+ list_add_tail(&page->lru, &lruvec->lists[lru]);
+}
+
static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index a1a210d59961..51891fb0d3ce 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -381,6 +381,19 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
___pmd; \
})
+#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pud_t ___pud; \
+ \
+ ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \
+ mmu_notifier_invalidate_range(___mm, ___haddr, \
+ ___haddr + HPAGE_PUD_SIZE); \
+ \
+ ___pud; \
+})
+
#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \
({ \
unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
@@ -475,6 +488,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define pmdp_clear_young_notify pmdp_test_and_clear_young
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
+#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
#define set_pte_at_notify set_pte_at
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f4aac87adcc3..338a786a993f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -236,8 +236,6 @@ struct lruvec {
#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
#define LRU_ALL ((1 << NR_LRU_LISTS) - 1)
-/* Isolate clean file */
-#define ISOLATE_CLEAN ((__force isolate_mode_t)0x1)
/* Isolate unmapped file */
#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
/* Isolate for asynchronous migration */
@@ -779,7 +777,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
#endif
}
-extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru);
+extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
#ifdef CONFIG_HAVE_MEMORY_PRESENT
void memory_present(int nid, unsigned long start, unsigned long end);
@@ -1052,6 +1050,8 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
* PFN_SECTION_SHIFT pfn to/from section number
*/
#define PA_SECTION_SHIFT (SECTION_SIZE_BITS)
+#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
+#define PA_SECTION_MASK (~(PA_SECTION_SIZE-1))
#define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT)
#define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT)
@@ -1066,12 +1066,27 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
#error Allocator MAX_ORDER exceeds SECTION_SIZE
#endif
-#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
-#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+#define pfn_to_section_nr(pfn) ((unsigned long)(pfn) >> PFN_SECTION_SHIFT)
+#define section_nr_to_pfn(sec) ((unsigned long)(sec) << PFN_SECTION_SHIFT)
#define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
+#define SECTION_ACTIVE_SIZE ((1UL << SECTION_SIZE_BITS) / BITS_PER_LONG)
+#define SECTION_ACTIVE_MASK (~(SECTION_ACTIVE_SIZE - 1))
+
+struct mem_section_usage {
+ /*
+ * SECTION_ACTIVE_SIZE portions of the section that are populated in
+ * the memmap
+ */
+ unsigned long map_active;
+ /* See declaration of similar field in struct zone */
+ unsigned long pageblock_flags[0];
+};
+
+void __init section_active_init(unsigned long pfn, unsigned long nr_pages);
+
struct page;
struct page_ext;
struct mem_section {
@@ -1089,8 +1104,7 @@ struct mem_section {
*/
unsigned long section_mem_map;
- /* See declaration of similar field in struct zone */
- unsigned long *pageblock_flags;
+ struct mem_section_usage *usage;
#ifdef CONFIG_PAGE_EXTENSION
/*
* If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
@@ -1121,6 +1135,11 @@ extern struct mem_section *mem_section[NR_SECTION_ROOTS];
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif
+static inline unsigned long *section_to_usemap(struct mem_section *ms)
+{
+ return ms->usage->pageblock_flags;
+}
+
static inline struct mem_section *__nr_to_section(unsigned long nr)
{
if (!mem_section[SECTION_NR_TO_ROOT(nr)])
@@ -1209,6 +1228,11 @@ void sparse_init(void);
#else
#define sparse_init() do {} while (0)
#define sparse_index_init(_sec, _nid) do {} while (0)
+static inline void section_active_init(unsigned long pfn,
+ unsigned long nr_pages)
+{
+}
+#define section_active_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
/*
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 324c8dbad1e1..84943e8057ef 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -266,7 +266,6 @@ static inline struct page *find_get_page_flags(struct address_space *mapping,
/**
* find_lock_page - locate, pin and lock a pagecache page
- * pagecache_get_page - find and get a page reference
* @mapping: the address_space to search
* @offset: the page index
*
@@ -482,19 +481,11 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
}
/*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback,
- * and for filesystems which need to wait on PG_private.
+ * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
+ * and should not be used directly.
*/
extern void wait_on_page_bit(struct page *page, int bit_nr);
extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
-extern void wake_up_page_bit(struct page *page, int bit_nr);
-
-static inline void wake_up_page(struct page *page, int bit)
-{
- if (!PageWaiters(page))
- return;
- wake_up_page_bit(page, bit);
-}
/*
* Wait for a page to be unlocked.
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index a3d90b9da18d..a49b3259cad7 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -15,6 +15,12 @@
#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
+#define PFN_FLAGS_TRACE \
+ { PFN_SG_CHAIN, "SG_CHAIN" }, \
+ { PFN_SG_LAST, "SG_LAST" }, \
+ { PFN_DEV, "DEV" }, \
+ { PFN_MAP, "MAP" }
+
static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
{
pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
@@ -84,6 +90,13 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot)
{
return pfn_pmd(pfn_t_to_pfn(pfn), pgprot);
}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
+{
+ return pfn_pud(pfn_t_to_pfn(pfn), pgprot);
+}
+#endif
#endif
#ifdef __HAVE_ARCH_PTE_DEVMAP
@@ -100,5 +113,10 @@ static inline bool pfn_t_devmap(pfn_t pfn)
}
pte_t pte_mkdevmap(pte_t pte);
pmd_t pmd_mkdevmap(pmd_t pmd);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+pud_t pud_mkdevmap(pud_t pud);
#endif
+#endif /* __HAVE_ARCH_PTE_DEVMAP */
+
#endif /* _LINUX_PFN_T_H_ */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 23705a53abba..298ead5512e5 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -191,10 +191,10 @@ pid_t pid_vnr(struct pid *pid);
#define do_each_pid_thread(pid, type, task) \
do_each_pid_task(pid, type, task) { \
struct task_struct *tg___ = task; \
- do {
+ for_each_thread(tg___, task) {
#define while_each_pid_thread(pid, type, task) \
- } while_each_thread(tg___, task); \
+ } \
task = tg___; \
} while_each_pid_task(pid, type, task)
#endif /* _LINUX_PID_H */
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index d076183e49be..9702b6e183bc 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -90,7 +90,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
old->rbaugmented = rbcompute(old); \
} \
rbstatic const struct rb_augment_callbacks rbname = { \
- rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
+ .propagate = rbname ## _propagate, \
+ .copy = rbname ## _copy, \
+ .rotate = rbname ## _rotate \
};
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 15321fb1df6b..8c89e902df3e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/rwsem.h>
#include <linux/memcontrol.h>
+#include <linux/highmem.h>
/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -196,41 +197,30 @@ int page_referenced(struct page *, int is_locked,
int try_to_unmap(struct page *, enum ttu_flags flags);
-/*
- * Used by uprobes to replace a userspace page safely
- */
-pte_t *__page_check_address(struct page *, struct mm_struct *,
- unsigned long, spinlock_t **, int);
-
-static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm,
- unsigned long address,
- spinlock_t **ptlp, int sync)
-{
- pte_t *ptep;
+/* Avoid racy checks */
+#define PVMW_SYNC (1 << 0)
+/* Look for migarion entries rather than present PTEs */
+#define PVMW_MIGRATION (1 << 1)
- __cond_lock(*ptlp, ptep = __page_check_address(page, mm, address,
- ptlp, sync));
- return ptep;
-}
+struct page_vma_mapped_walk {
+ struct page *page;
+ struct vm_area_struct *vma;
+ unsigned long address;
+ pmd_t *pmd;
+ pte_t *pte;
+ spinlock_t *ptl;
+ unsigned int flags;
+};
-/*
- * Used by idle page tracking to check if a page was referenced via page
- * tables.
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
- unsigned long address, pmd_t **pmdp,
- pte_t **ptep, spinlock_t **ptlp);
-#else
-static inline bool page_check_address_transhuge(struct page *page,
- struct mm_struct *mm, unsigned long address,
- pmd_t **pmdp, pte_t **ptep, spinlock_t **ptlp)
+static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
{
- *ptep = page_check_address(page, mm, address, ptlp, 0);
- *pmdp = NULL;
- return !!*ptep;
+ if (pvmw->pte)
+ pte_unmap(pvmw->pte);
+ if (pvmw->ptl)
+ spin_unlock(pvmw->ptl);
}
-#endif
+
+bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
/*
* Used by swapoff to help locate where page is expected in vma.
diff --git a/include/linux/rodata_test.h b/include/linux/rodata_test.h
new file mode 100644
index 000000000000..562537f85a28
--- /dev/null
+++ b/include/linux/rodata_test.h
@@ -0,0 +1,24 @@
+/*
+ * rodata_test.h: functional test for mark_rodata_ro function
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifndef _RODATA_TEST_H
+#define _RODATA_TEST_H
+
+#ifdef CONFIG_DEBUG_RODATA_TEST
+extern const int rodata_test_data;
+void rodata_test(void);
+#else
+static inline void rodata_test(void) {}
+#endif
+
+#endif /* _RODATA_TEST_H */
+
diff --git a/include/linux/sem.h b/include/linux/sem.h
index d0efd6e6c20a..4fc222f8755d 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -21,7 +21,7 @@ struct sem_array {
struct list_head list_id; /* undo requests on this array */
int sem_nsems; /* no. of semaphores in array */
int complex_count; /* pending complex operations */
- bool complex_mode; /* no parallel simple ops */
+ unsigned int use_global_lock;/* >0: global lock required */
};
#ifdef CONFIG_SYSVIPC
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index ff078e7043b6..fdaac9d4d46d 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -124,4 +124,15 @@ static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
}
#endif
+#ifdef CONFIG_SHMEM
+extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep);
+#else
+#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+ src_addr, pagep) ({ BUG(); 0; })
+#endif
+
#endif
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4c5363566815..3c37a8c51921 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -545,22 +545,49 @@ struct memcg_cache_array {
* array to be accessed without taking any locks, on relocation we free the old
* version only after a grace period.
*
- * Child caches will hold extra metadata needed for its operation. Fields are:
+ * Root and child caches hold different metadata.
*
- * @memcg: pointer to the memcg this cache belongs to
- * @root_cache: pointer to the global, root cache, this cache was derived from
+ * @root_cache: Common to root and child caches. NULL for root, pointer to
+ * the root cache for children.
*
- * Both root and child caches of the same kind are linked into a list chained
- * through @list.
+ * The following fields are specific to root caches.
+ *
+ * @memcg_caches: kmemcg ID indexed table of child caches. This table is
+ * used to index child cachces during allocation and cleared
+ * early during shutdown.
+ *
+ * @root_caches_node: List node for slab_root_caches list.
+ *
+ * @children: List of all child caches. While the child caches are also
+ * reachable through @memcg_caches, a child cache remains on
+ * this list until it is actually destroyed.
+ *
+ * The following fields are specific to child caches.
+ *
+ * @memcg: Pointer to the memcg this cache belongs to.
+ *
+ * @children_node: List node for @root_cache->children list.
+ *
+ * @kmem_caches_node: List node for @memcg->kmem_caches list.
*/
struct memcg_cache_params {
- bool is_root_cache;
- struct list_head list;
+ struct kmem_cache *root_cache;
union {
- struct memcg_cache_array __rcu *memcg_caches;
+ struct {
+ struct memcg_cache_array __rcu *memcg_caches;
+ struct list_head __root_caches_node;
+ struct list_head children;
+ };
struct {
struct mem_cgroup *memcg;
- struct kmem_cache *root_cache;
+ struct list_head children_node;
+ struct list_head kmem_caches_node;
+
+ void (*deact_fn)(struct kmem_cache *);
+ union {
+ struct rcu_head deact_rcu_head;
+ struct work_struct deact_work;
+ };
};
};
};
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 75f56c2ef2d4..07ef550c6627 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -113,9 +113,9 @@ struct kmem_cache {
#ifdef CONFIG_SYSFS
#define SLAB_SUPPORTS_SYSFS
-void sysfs_slab_remove(struct kmem_cache *);
+void sysfs_slab_release(struct kmem_cache *);
#else
-static inline void sysfs_slab_remove(struct kmem_cache *s)
+static inline void sysfs_slab_release(struct kmem_cache *s)
{
}
#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7f47b7098b1b..45e91dd6716d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -27,6 +27,7 @@ struct bio;
#define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
SWAP_FLAG_DISCARD_PAGES)
+#define SWAP_BATCH 64
static inline int current_is_kswapd(void)
{
@@ -176,6 +177,12 @@ enum {
* protected by swap_info_struct.lock.
*/
struct swap_cluster_info {
+ spinlock_t lock; /*
+ * Protect swap_cluster_info fields
+ * and swap_info_struct->swap_map
+ * elements correspond to the swap
+ * cluster
+ */
unsigned int data:24;
unsigned int flags:8;
};
@@ -337,8 +344,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
sector_t *);
/* linux/mm/swap_state.c */
-extern struct address_space swapper_spaces[];
-#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
+/* One swap address space for each 64M swap space */
+#define SWAP_ADDRESS_SPACE_SHIFT 14
+#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
+extern struct address_space *swapper_spaces[];
+#define swap_address_space(entry) \
+ (&swapper_spaces[swp_type(entry)][swp_offset(entry) \
+ >> SWAP_ADDRESS_SPACE_SHIFT])
extern unsigned long total_swapcache_pages(void);
extern void show_swap_cache_info(void);
extern int add_to_swap(struct page *, struct list_head *list);
@@ -360,6 +372,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
+extern bool has_usable_swap(void);
/* Swap 50% full? Release swapcache more aggressively.. */
static inline bool vm_swap_full(void)
@@ -375,23 +388,31 @@ static inline long get_nr_swap_pages(void)
extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(void);
extern swp_entry_t get_swap_page_of_type(int);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
extern void swapcache_free(swp_entry_t);
+extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern int free_swap_and_cache(swp_entry_t);
extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
extern sector_t map_swap_page(struct page *, struct block_device **);
extern sector_t swapdev_block(int, pgoff_t);
extern int page_swapcount(struct page *);
+extern int __swp_swapcount(swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *);
extern bool reuse_swap_page(struct page *, int *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
+extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
+extern void exit_swap_address_space(unsigned int type);
+
+extern int get_swap_slots(int n, swp_entry_t *slots);
+extern void swapcache_free_batch(swp_entry_t *entries, int n);
#else /* CONFIG_SWAP */
@@ -479,6 +500,11 @@ static inline int page_swapcount(struct page *page)
return 0;
}
+static inline int __swp_swapcount(swp_entry_t entry)
+{
+ return 0;
+}
+
static inline int swp_swapcount(swp_entry_t entry)
{
return 0;
diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h
new file mode 100644
index 000000000000..6ef92d17633d
--- /dev/null
+++ b/include/linux/swap_slots.h
@@ -0,0 +1,30 @@
+#ifndef _LINUX_SWAP_SLOTS_H
+#define _LINUX_SWAP_SLOTS_H
+
+#include <linux/swap.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+
+#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH
+#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE)
+#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE)
+
+struct swap_slots_cache {
+ bool lock_initialized;
+ struct mutex alloc_lock; /* protects slots, nr, cur */
+ swp_entry_t *slots;
+ int nr;
+ int cur;
+ spinlock_t free_lock; /* protects slots_ret, n_ret */
+ swp_entry_t *slots_ret;
+ int n_ret;
+};
+
+void disable_swap_slots_cache_lock(void);
+void reenable_swap_slots_cache_unlock(void);
+int enable_swap_slots_cache(void);
+int free_swap_slot(swp_entry_t entry);
+
+extern bool swap_slot_cache_enabled;
+
+#endif /* _LINUX_SWAP_SLOTS_H */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 0f165507495c..0af63c4381b9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -23,6 +23,10 @@ const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
const struct trace_print_flags *symbol_array);
#if BITS_PER_LONG == 32
+const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
+ unsigned long long flags,
+ const struct trace_print_flags_u64 *flag_array);
+
const char *trace_print_symbols_seq_u64(struct trace_seq *p,
unsigned long long val,
const struct trace_print_flags_u64
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 11b92b047a1e..0468548acebf 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -52,6 +52,28 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP);
}
+extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
+extern void dup_userfaultfd_complete(struct list_head *);
+
+extern void mremap_userfaultfd_prep(struct vm_area_struct *,
+ struct vm_userfaultfd_ctx *);
+extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
+ unsigned long from, unsigned long to,
+ unsigned long len);
+
+extern void userfaultfd_remove(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start,
+ unsigned long end);
+
+extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *uf);
+extern void userfaultfd_unmap_complete(struct mm_struct *mm,
+ struct list_head *uf);
+
+extern void userfaultfd_exit(struct mm_struct *mm);
+
#else /* CONFIG_USERFAULTFD */
/* mm helpers */
@@ -76,6 +98,51 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
return false;
}
+static inline int dup_userfaultfd(struct vm_area_struct *vma,
+ struct list_head *l)
+{
+ return 0;
+}
+
+static inline void dup_userfaultfd_complete(struct list_head *l)
+{
+}
+
+static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
+ struct vm_userfaultfd_ctx *ctx)
+{
+}
+
+static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
+ unsigned long from,
+ unsigned long to,
+ unsigned long len)
+{
+}
+
+static inline void userfaultfd_remove(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start,
+ unsigned long end)
+{
+}
+
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *uf)
+{
+ return 0;
+}
+
+static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
+ struct list_head *uf)
+{
+}
+
+static inline void userfaultfd_exit(struct mm_struct *mm)
+{
+}
+
#endif /* CONFIG_USERFAULTFD */
#endif /* _LINUX_USERFAULTFD_K_H */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 4d6ec58a8d45..6aa1b6cb5828 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -56,6 +56,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
COMPACTISOLATED,
COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
KCOMPACTD_WAKE,
+ KCOMPACTD_MIGRATE_SCANNED, KCOMPACTD_FREE_SCANNED,
#endif
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 5527d910ba3d..a3c0cbd7c888 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -46,7 +46,7 @@ enum writeback_sync_modes {
*/
enum wb_reason {
WB_REASON_BACKGROUND,
- WB_REASON_TRY_TO_FREE_PAGES,
+ WB_REASON_VMSCAN,
WB_REASON_SYNC,
WB_REASON_PERIODIC,
WB_REASON_LAPTOP_TIMER,
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index cbdb90b6b308..0a18ab6483ff 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -9,62 +9,6 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
-#define COMPACTION_STATUS \
- EM( COMPACT_SKIPPED, "skipped") \
- EM( COMPACT_DEFERRED, "deferred") \
- EM( COMPACT_CONTINUE, "continue") \
- EM( COMPACT_SUCCESS, "success") \
- EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \
- EM( COMPACT_COMPLETE, "complete") \
- EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
- EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
- EMe(COMPACT_CONTENDED, "contended")
-
-#ifdef CONFIG_ZONE_DMA
-#define IFDEF_ZONE_DMA(X) X
-#else
-#define IFDEF_ZONE_DMA(X)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define IFDEF_ZONE_DMA32(X) X
-#else
-#define IFDEF_ZONE_DMA32(X)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define IFDEF_ZONE_HIGHMEM(X) X
-#else
-#define IFDEF_ZONE_HIGHMEM(X)
-#endif
-
-#define ZONE_TYPE \
- IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
- IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
- EM (ZONE_NORMAL, "Normal") \
- IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
- EMe(ZONE_MOVABLE,"Movable")
-
-/*
- * First define the enums in the above macros to be exported to userspace
- * via TRACE_DEFINE_ENUM().
- */
-#undef EM
-#undef EMe
-#define EM(a, b) TRACE_DEFINE_ENUM(a);
-#define EMe(a, b) TRACE_DEFINE_ENUM(a);
-
-COMPACTION_STATUS
-ZONE_TYPE
-
-/*
- * Now redefine the EM() and EMe() macros to map the enums to the strings
- * that will be printed in the output.
- */
-#undef EM
-#undef EMe
-#define EM(a, b) {a, b},
-#define EMe(a, b) {a, b}
DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
@@ -187,6 +131,7 @@ TRACE_EVENT(mm_compaction_begin,
__entry->sync ? "sync" : "async")
);
+#ifdef CONFIG_COMPACTION
TRACE_EVENT(mm_compaction_end,
TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
unsigned long free_pfn, unsigned long zone_end, bool sync,
@@ -220,6 +165,7 @@ TRACE_EVENT(mm_compaction_end,
__entry->sync ? "sync" : "async",
__print_symbolic(__entry->status, COMPACTION_STATUS))
);
+#endif
TRACE_EVENT(mm_compaction_try_to_compact_pages,
@@ -248,6 +194,7 @@ TRACE_EVENT(mm_compaction_try_to_compact_pages,
__entry->prio)
);
+#ifdef CONFIG_COMPACTION
DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
TP_PROTO(struct zone *zone,
@@ -295,7 +242,6 @@ DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable,
TP_ARGS(zone, order, ret)
);
-#ifdef CONFIG_COMPACTION
DECLARE_EVENT_CLASS(mm_compaction_defer_template,
TP_PROTO(struct zone *zone, int order),
diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h
new file mode 100644
index 000000000000..c566ddc87f73
--- /dev/null
+++ b/include/trace/events/fs_dax.h
@@ -0,0 +1,156 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fs_dax
+
+#if !defined(_TRACE_FS_DAX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FS_DAX_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(dax_pmd_fault_class,
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ pgoff_t max_pgoff, int result),
+ TP_ARGS(inode, vmf, max_pgoff, result),
+ TP_STRUCT__entry(
+ __field(unsigned long, ino)
+ __field(unsigned long, vm_start)
+ __field(unsigned long, vm_end)
+ __field(unsigned long, vm_flags)
+ __field(unsigned long, address)
+ __field(pgoff_t, pgoff)
+ __field(pgoff_t, max_pgoff)
+ __field(dev_t, dev)
+ __field(unsigned int, flags)
+ __field(int, result)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->vm_start = vmf->vma->vm_start;
+ __entry->vm_end = vmf->vma->vm_end;
+ __entry->vm_flags = vmf->vma->vm_flags;
+ __entry->address = vmf->address;
+ __entry->flags = vmf->flags;
+ __entry->pgoff = vmf->pgoff;
+ __entry->max_pgoff = max_pgoff;
+ __entry->result = result;
+ ),
+ TP_printk("dev %d:%d ino %#lx %s %s address %#lx vm_start "
+ "%#lx vm_end %#lx pgoff %#lx max_pgoff %#lx %s",
+ MAJOR(__entry->dev),
+ MINOR(__entry->dev),
+ __entry->ino,
+ __entry->vm_flags & VM_SHARED ? "shared" : "private",
+ __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE),
+ __entry->address,
+ __entry->vm_start,
+ __entry->vm_end,
+ __entry->pgoff,
+ __entry->max_pgoff,
+ __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE)
+ )
+)
+
+#define DEFINE_PMD_FAULT_EVENT(name) \
+DEFINE_EVENT(dax_pmd_fault_class, name, \
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
+ pgoff_t max_pgoff, int result), \
+ TP_ARGS(inode, vmf, max_pgoff, result))
+
+DEFINE_PMD_FAULT_EVENT(dax_pmd_fault);
+DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done);
+
+DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ struct page *zero_page,
+ void *radix_entry),
+ TP_ARGS(inode, vmf, zero_page, radix_entry),
+ TP_STRUCT__entry(
+ __field(unsigned long, ino)
+ __field(unsigned long, vm_flags)
+ __field(unsigned long, address)
+ __field(struct page *, zero_page)
+ __field(void *, radix_entry)
+ __field(dev_t, dev)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->vm_flags = vmf->vma->vm_flags;
+ __entry->address = vmf->address;
+ __entry->zero_page = zero_page;
+ __entry->radix_entry = radix_entry;
+ ),
+ TP_printk("dev %d:%d ino %#lx %s address %#lx zero_page %p "
+ "radix_entry %#lx",
+ MAJOR(__entry->dev),
+ MINOR(__entry->dev),
+ __entry->ino,
+ __entry->vm_flags & VM_SHARED ? "shared" : "private",
+ __entry->address,
+ __entry->zero_page,
+ (unsigned long)__entry->radix_entry
+ )
+)
+
+#define DEFINE_PMD_LOAD_HOLE_EVENT(name) \
+DEFINE_EVENT(dax_pmd_load_hole_class, name, \
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
+ struct page *zero_page, void *radix_entry), \
+ TP_ARGS(inode, vmf, zero_page, radix_entry))
+
+DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole);
+DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole_fallback);
+
+DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class,
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ long length, pfn_t pfn, void *radix_entry),
+ TP_ARGS(inode, vmf, length, pfn, radix_entry),
+ TP_STRUCT__entry(
+ __field(unsigned long, ino)
+ __field(unsigned long, vm_flags)
+ __field(unsigned long, address)
+ __field(long, length)
+ __field(u64, pfn_val)
+ __field(void *, radix_entry)
+ __field(dev_t, dev)
+ __field(int, write)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->vm_flags = vmf->vma->vm_flags;
+ __entry->address = vmf->address;
+ __entry->write = vmf->flags & FAULT_FLAG_WRITE;
+ __entry->length = length;
+ __entry->pfn_val = pfn.val;
+ __entry->radix_entry = radix_entry;
+ ),
+ TP_printk("dev %d:%d ino %#lx %s %s address %#lx length %#lx "
+ "pfn %#llx %s radix_entry %#lx",
+ MAJOR(__entry->dev),
+ MINOR(__entry->dev),
+ __entry->ino,
+ __entry->vm_flags & VM_SHARED ? "shared" : "private",
+ __entry->write ? "write" : "read",
+ __entry->address,
+ __entry->length,
+ __entry->pfn_val & ~PFN_FLAGS_MASK,
+ __print_flags_u64(__entry->pfn_val & PFN_FLAGS_MASK, "|",
+ PFN_FLAGS_TRACE),
+ (unsigned long)__entry->radix_entry
+ )
+)
+
+#define DEFINE_PMD_INSERT_MAPPING_EVENT(name) \
+DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
+ long length, pfn_t pfn, void *radix_entry), \
+ TP_ARGS(inode, vmf, length, pfn, radix_entry))
+
+DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping);
+DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback);
+
+#endif /* _TRACE_FS_DAX_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 15bf875d0e4a..12cd88c86b66 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -1,3 +1,6 @@
+#include <linux/node.h>
+#include <linux/mmzone.h>
+#include <linux/compaction.h>
/*
* The order of these masks is important. Matching masks will be seen
* first and the left over flags will end up showing by themselves.
@@ -171,3 +174,98 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
(flags) ? __print_flags(flags, "|", \
__def_vmaflag_names \
) : "none"
+
+#ifdef CONFIG_COMPACTION
+#define COMPACTION_STATUS \
+ EM( COMPACT_SKIPPED, "skipped") \
+ EM( COMPACT_DEFERRED, "deferred") \
+ EM( COMPACT_CONTINUE, "continue") \
+ EM( COMPACT_SUCCESS, "success") \
+ EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \
+ EM( COMPACT_COMPLETE, "complete") \
+ EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
+ EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
+ EMe(COMPACT_CONTENDED, "contended")
+
+/* High-level compaction status feedback */
+#define COMPACTION_FAILED 1
+#define COMPACTION_WITHDRAWN 2
+#define COMPACTION_PROGRESS 3
+
+#define compact_result_to_feedback(result) \
+({ \
+ enum compact_result __result = result; \
+ (compaction_failed(__result)) ? COMPACTION_FAILED : \
+ (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
+})
+
+#define COMPACTION_FEEDBACK \
+ EM(COMPACTION_FAILED, "failed") \
+ EM(COMPACTION_WITHDRAWN, "withdrawn") \
+ EMe(COMPACTION_PROGRESS, "progress")
+
+#define COMPACTION_PRIORITY \
+ EM(COMPACT_PRIO_SYNC_FULL, "COMPACT_PRIO_SYNC_FULL") \
+ EM(COMPACT_PRIO_SYNC_LIGHT, "COMPACT_PRIO_SYNC_LIGHT") \
+ EMe(COMPACT_PRIO_ASYNC, "COMPACT_PRIO_ASYNC")
+#else
+#define COMPACTION_STATUS
+#define COMPACTION_PRIORITY
+#define COMPACTION_FEEDBACK
+#endif
+
+#ifdef CONFIG_ZONE_DMA
+#define IFDEF_ZONE_DMA(X) X
+#else
+#define IFDEF_ZONE_DMA(X)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define IFDEF_ZONE_DMA32(X) X
+#else
+#define IFDEF_ZONE_DMA32(X)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define IFDEF_ZONE_HIGHMEM(X) X
+#else
+#define IFDEF_ZONE_HIGHMEM(X)
+#endif
+
+#define ZONE_TYPE \
+ IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
+ IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
+ EM (ZONE_NORMAL, "Normal") \
+ IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
+ EMe(ZONE_MOVABLE,"Movable")
+
+#define LRU_NAMES \
+ EM (LRU_INACTIVE_ANON, "inactive_anon") \
+ EM (LRU_ACTIVE_ANON, "active_anon") \
+ EM (LRU_INACTIVE_FILE, "inactive_file") \
+ EM (LRU_ACTIVE_FILE, "active_file") \
+ EMe(LRU_UNEVICTABLE, "unevictable")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+COMPACTION_STATUS
+COMPACTION_PRIORITY
+COMPACTION_FEEDBACK
+ZONE_TYPE
+LRU_NAMES
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 1e974983757e..38baeb27221a 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -4,6 +4,7 @@
#if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_OOM_H
#include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
TRACE_EVENT(oom_score_adj_update,
@@ -27,6 +28,86 @@ TRACE_EVENT(oom_score_adj_update,
__entry->pid, __entry->comm, __entry->oom_score_adj)
);
+TRACE_EVENT(reclaim_retry_zone,
+
+ TP_PROTO(struct zoneref *zoneref,
+ int order,
+ unsigned long reclaimable,
+ unsigned long available,
+ unsigned long min_wmark,
+ int no_progress_loops,
+ bool wmark_check),
+
+ TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check),
+
+ TP_STRUCT__entry(
+ __field( int, node)
+ __field( int, zone_idx)
+ __field( int, order)
+ __field( unsigned long, reclaimable)
+ __field( unsigned long, available)
+ __field( unsigned long, min_wmark)
+ __field( int, no_progress_loops)
+ __field( bool, wmark_check)
+ ),
+
+ TP_fast_assign(
+ __entry->node = zone_to_nid(zoneref->zone);
+ __entry->zone_idx = zoneref->zone_idx;
+ __entry->order = order;
+ __entry->reclaimable = reclaimable;
+ __entry->available = available;
+ __entry->min_wmark = min_wmark;
+ __entry->no_progress_loops = no_progress_loops;
+ __entry->wmark_check = wmark_check;
+ ),
+
+ TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d",
+ __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE),
+ __entry->order,
+ __entry->reclaimable, __entry->available, __entry->min_wmark,
+ __entry->no_progress_loops,
+ __entry->wmark_check)
+);
+
+#ifdef CONFIG_COMPACTION
+TRACE_EVENT(compact_retry,
+
+ TP_PROTO(int order,
+ enum compact_priority priority,
+ enum compact_result result,
+ int retries,
+ int max_retries,
+ bool ret),
+
+ TP_ARGS(order, priority, result, retries, max_retries, ret),
+
+ TP_STRUCT__entry(
+ __field( int, order)
+ __field( int, priority)
+ __field( int, result)
+ __field( int, retries)
+ __field( int, max_retries)
+ __field( bool, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->order = order;
+ __entry->priority = priority;
+ __entry->result = compact_result_to_feedback(result);
+ __entry->retries = retries;
+ __entry->max_retries = max_retries;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d",
+ __entry->order,
+ __print_symbolic(__entry->priority, COMPACTION_PRIORITY),
+ __print_symbolic(__entry->result, COMPACTION_FEEDBACK),
+ __entry->retries, __entry->max_retries,
+ __entry->ret)
+);
+#endif /* CONFIG_COMPACTION */
#endif
/* This part must be outside protection */
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index c88fd0934e7e..27e8a5c77579 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -15,6 +15,7 @@
#define RECLAIM_WB_MIXED 0x0010u
#define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */
#define RECLAIM_WB_ASYNC 0x0008u
+#define RECLAIM_WB_LRU (RECLAIM_WB_ANON|RECLAIM_WB_FILE)
#define show_reclaim_flags(flags) \
(flags) ? __print_flags(flags, "|", \
@@ -269,26 +270,27 @@ TRACE_EVENT(mm_shrink_slab_end,
__entry->retval)
);
-DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
-
+TRACE_EVENT(mm_vmscan_lru_isolate,
TP_PROTO(int classzone_idx,
int order,
unsigned long nr_requested,
unsigned long nr_scanned,
+ unsigned long nr_skipped,
unsigned long nr_taken,
isolate_mode_t isolate_mode,
- int file),
+ int lru),
- TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file),
+ TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
TP_STRUCT__entry(
__field(int, classzone_idx)
__field(int, order)
__field(unsigned long, nr_requested)
__field(unsigned long, nr_scanned)
+ __field(unsigned long, nr_skipped)
__field(unsigned long, nr_taken)
__field(isolate_mode_t, isolate_mode)
- __field(int, file)
+ __field(int, lru)
),
TP_fast_assign(
@@ -296,47 +298,21 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
__entry->order = order;
__entry->nr_requested = nr_requested;
__entry->nr_scanned = nr_scanned;
+ __entry->nr_skipped = nr_skipped;
__entry->nr_taken = nr_taken;
__entry->isolate_mode = isolate_mode;
- __entry->file = file;
+ __entry->lru = lru;
),
- TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d",
+ TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
__entry->isolate_mode,
__entry->classzone_idx,
__entry->order,
__entry->nr_requested,
__entry->nr_scanned,
+ __entry->nr_skipped,
__entry->nr_taken,
- __entry->file)
-);
-
-DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
-
- TP_PROTO(int classzone_idx,
- int order,
- unsigned long nr_requested,
- unsigned long nr_scanned,
- unsigned long nr_taken,
- isolate_mode_t isolate_mode,
- int file),
-
- TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
-
-);
-
-DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
-
- TP_PROTO(int classzone_idx,
- int order,
- unsigned long nr_requested,
- unsigned long nr_scanned,
- unsigned long nr_taken,
- isolate_mode_t isolate_mode,
- int file),
-
- TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
-
+ __print_symbolic(__entry->lru, LRU_NAMES))
);
TRACE_EVENT(mm_vmscan_writepage,
@@ -365,14 +341,27 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
TP_PROTO(int nid,
unsigned long nr_scanned, unsigned long nr_reclaimed,
+ unsigned long nr_dirty, unsigned long nr_writeback,
+ unsigned long nr_congested, unsigned long nr_immediate,
+ unsigned long nr_activate, unsigned long nr_ref_keep,
+ unsigned long nr_unmap_fail,
int priority, int file),
- TP_ARGS(nid, nr_scanned, nr_reclaimed, priority, file),
+ TP_ARGS(nid, nr_scanned, nr_reclaimed, nr_dirty, nr_writeback,
+ nr_congested, nr_immediate, nr_activate, nr_ref_keep,
+ nr_unmap_fail, priority, file),
TP_STRUCT__entry(
__field(int, nid)
__field(unsigned long, nr_scanned)
__field(unsigned long, nr_reclaimed)
+ __field(unsigned long, nr_dirty)
+ __field(unsigned long, nr_writeback)
+ __field(unsigned long, nr_congested)
+ __field(unsigned long, nr_immediate)
+ __field(unsigned long, nr_activate)
+ __field(unsigned long, nr_ref_keep)
+ __field(unsigned long, nr_unmap_fail)
__field(int, priority)
__field(int, reclaim_flags)
),
@@ -381,17 +370,102 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
__entry->nid = nid;
__entry->nr_scanned = nr_scanned;
__entry->nr_reclaimed = nr_reclaimed;
+ __entry->nr_dirty = nr_dirty;
+ __entry->nr_writeback = nr_writeback;
+ __entry->nr_congested = nr_congested;
+ __entry->nr_immediate = nr_immediate;
+ __entry->nr_activate = nr_activate;
+ __entry->nr_ref_keep = nr_ref_keep;
+ __entry->nr_unmap_fail = nr_unmap_fail;
__entry->priority = priority;
__entry->reclaim_flags = trace_shrink_flags(file);
),
- TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
+ TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate=%ld nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
__entry->nid,
__entry->nr_scanned, __entry->nr_reclaimed,
+ __entry->nr_dirty, __entry->nr_writeback,
+ __entry->nr_congested, __entry->nr_immediate,
+ __entry->nr_activate, __entry->nr_ref_keep,
+ __entry->nr_unmap_fail, __entry->priority,
+ show_reclaim_flags(__entry->reclaim_flags))
+);
+
+TRACE_EVENT(mm_vmscan_lru_shrink_active,
+
+ TP_PROTO(int nid, unsigned long nr_taken,
+ unsigned long nr_active, unsigned long nr_deactivated,
+ unsigned long nr_referenced, int priority, int file),
+
+ TP_ARGS(nid, nr_taken, nr_active, nr_deactivated, nr_referenced, priority, file),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ __field(unsigned long, nr_taken)
+ __field(unsigned long, nr_active)
+ __field(unsigned long, nr_deactivated)
+ __field(unsigned long, nr_referenced)
+ __field(int, priority)
+ __field(int, reclaim_flags)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = nid;
+ __entry->nr_taken = nr_taken;
+ __entry->nr_active = nr_active;
+ __entry->nr_deactivated = nr_deactivated;
+ __entry->nr_referenced = nr_referenced;
+ __entry->priority = priority;
+ __entry->reclaim_flags = trace_shrink_flags(file);
+ ),
+
+ TP_printk("nid=%d nr_taken=%ld nr_active=%ld nr_deactivated=%ld nr_referenced=%ld priority=%d flags=%s",
+ __entry->nid,
+ __entry->nr_taken,
+ __entry->nr_active, __entry->nr_deactivated, __entry->nr_referenced,
__entry->priority,
show_reclaim_flags(__entry->reclaim_flags))
);
+TRACE_EVENT(mm_vmscan_inactive_list_is_low,
+
+ TP_PROTO(int nid, int reclaim_idx,
+ unsigned long total_inactive, unsigned long inactive,
+ unsigned long total_active, unsigned long active,
+ unsigned long ratio, int file),
+
+ TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ __field(int, reclaim_idx)
+ __field(unsigned long, total_inactive)
+ __field(unsigned long, inactive)
+ __field(unsigned long, total_active)
+ __field(unsigned long, active)
+ __field(unsigned long, ratio)
+ __field(int, reclaim_flags)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = nid;
+ __entry->reclaim_idx = reclaim_idx;
+ __entry->total_inactive = total_inactive;
+ __entry->inactive = inactive;
+ __entry->total_active = total_active;
+ __entry->active = active;
+ __entry->ratio = ratio;
+ __entry->reclaim_flags = trace_shrink_flags(file) & RECLAIM_WB_LRU;
+ ),
+
+ TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s",
+ __entry->nid,
+ __entry->reclaim_idx,
+ __entry->total_inactive, __entry->inactive,
+ __entry->total_active, __entry->active,
+ __entry->ratio,
+ show_reclaim_flags(__entry->reclaim_flags))
+);
#endif /* _TRACE_VMSCAN_H */
/* This part must be outside protection */
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 2ccd9ccbf9ef..7bd8783a590f 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -31,7 +31,7 @@
#define WB_WORK_REASON \
EM( WB_REASON_BACKGROUND, "background") \
- EM( WB_REASON_TRY_TO_FREE_PAGES, "try_to_free_pages") \
+ EM( WB_REASON_VMSCAN, "vmscan") \
EM( WB_REASON_SYNC, "sync") \
EM( WB_REASON_PERIODIC, "periodic") \
EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 5c06f4af8323..00f643164ca2 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -283,8 +283,16 @@ TRACE_MAKE_SYSTEM_STR();
trace_print_symbols_seq(p, value, symbols); \
})
+#undef __print_flags_u64
#undef __print_symbolic_u64
#if BITS_PER_LONG == 32
+#define __print_flags_u64(flag, delim, flag_array...) \
+ ({ \
+ static const struct trace_print_flags_u64 __flags[] = \
+ { flag_array, { -1, NULL } }; \
+ trace_print_flags_seq_u64(p, delim, flag, __flags); \
+ })
+
#define __print_symbolic_u64(value, symbol_array...) \
({ \
static const struct trace_print_flags_u64 symbols[] = \
@@ -292,6 +300,9 @@ TRACE_MAKE_SYSTEM_STR();
trace_print_symbols_seq_u64(p, value, symbols); \
})
#else
+#define __print_flags_u64(flag, delim, flag_array...) \
+ __print_flags(flag, delim, flag_array)
+
#define __print_symbolic_u64(value, symbol_array...) \
__print_symbolic(value, symbol_array)
#endif
diff --git a/include/uapi/asm-generic/ioctl.h b/include/uapi/asm-generic/ioctl.h
index 7e7c11b52143..749b32fe5623 100644
--- a/include/uapi/asm-generic/ioctl.h
+++ b/include/uapi/asm-generic/ioctl.h
@@ -48,6 +48,9 @@
/*
* Direction bits, which any architecture can choose to override
* before including this file.
+ *
+ * NOTE: _IOC_WRITE means userland is writing and kernel is
+ * reading. _IOC_READ means userland is reading and kernel is writing.
*/
#ifndef _IOC_NONE
@@ -72,7 +75,12 @@
#define _IOC_TYPECHECK(t) (sizeof(t))
#endif
-/* used to create numbers */
+/*
+ * Used to create numbers.
+ *
+ * NOTE: _IOW means userland is writing and kernel is reading. _IOR
+ * means userland is reading and kernel is writing.
+ */
#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size)))
#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h
index 021ed331dd71..744b3d060968 100644
--- a/include/uapi/linux/auto_dev-ioctl.h
+++ b/include/uapi/linux/auto_dev-ioctl.h
@@ -113,17 +113,13 @@ struct autofs_dev_ioctl {
static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
{
- memset(in, 0, sizeof(struct autofs_dev_ioctl));
+ memset(in, 0, AUTOFS_DEV_IOCTL_SIZE);
in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
- in->size = sizeof(struct autofs_dev_ioctl);
+ in->size = AUTOFS_DEV_IOCTL_SIZE;
in->ioctlfd = -1;
}
-/*
- * If you change this make sure you make the corresponding change
- * to autofs-dev-ioctl.c:lookup_ioctl()
- */
enum {
/* Get various version info */
AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
@@ -160,8 +156,6 @@ enum {
AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
};
-#define AUTOFS_IOCTL 0x93
-
#define AUTOFS_DEV_IOCTL_VERSION \
_IOWR(AUTOFS_IOCTL, \
AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 1bfc3ed8b284..aa63451ef20a 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -61,12 +61,23 @@ struct autofs_packet_expire {
char name[NAME_MAX+1];
};
-#define AUTOFS_IOC_READY _IO(0x93, 0x60)
-#define AUTOFS_IOC_FAIL _IO(0x93, 0x61)
-#define AUTOFS_IOC_CATATONIC _IO(0x93, 0x62)
-#define AUTOFS_IOC_PROTOVER _IOR(0x93, 0x63, int)
-#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93, 0x64, compat_ulong_t)
-#define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93, 0x64, unsigned long)
-#define AUTOFS_IOC_EXPIRE _IOR(0x93, 0x65, struct autofs_packet_expire)
+#define AUTOFS_IOCTL 0x93
+
+enum {
+ AUTOFS_IOC_READY_CMD = 0x60,
+ AUTOFS_IOC_FAIL_CMD,
+ AUTOFS_IOC_CATATONIC_CMD,
+ AUTOFS_IOC_PROTOVER_CMD,
+ AUTOFS_IOC_SETTIMEOUT_CMD,
+ AUTOFS_IOC_EXPIRE_CMD,
+};
+
+#define AUTOFS_IOC_READY _IO(AUTOFS_IOCTL, AUTOFS_IOC_READY_CMD)
+#define AUTOFS_IOC_FAIL _IO(AUTOFS_IOCTL, AUTOFS_IOC_FAIL_CMD)
+#define AUTOFS_IOC_CATATONIC _IO(AUTOFS_IOCTL, AUTOFS_IOC_CATATONIC_CMD)
+#define AUTOFS_IOC_PROTOVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOVER_CMD, int)
+#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, compat_ulong_t)
+#define AUTOFS_IOC_SETTIMEOUT _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, unsigned long)
+#define AUTOFS_IOC_EXPIRE _IOR(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_CMD, struct autofs_packet_expire)
#endif /* _UAPI_LINUX_AUTO_FS_H */
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 8f8f1bdcca8c..7c6da423d54e 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h
@@ -148,10 +148,16 @@ union autofs_v5_packet_union {
autofs_packet_expire_direct_t expire_direct;
};
-#define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93, 0x66, int)
-#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI
-#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI
-#define AUTOFS_IOC_PROTOSUBVER _IOR(0x93, 0x67, int)
-#define AUTOFS_IOC_ASKUMOUNT _IOR(0x93, 0x70, int)
+enum {
+ AUTOFS_IOC_EXPIRE_MULTI_CMD = 0x66, /* AUTOFS_IOC_EXPIRE_CMD + 1 */
+ AUTOFS_IOC_PROTOSUBVER_CMD,
+ AUTOFS_IOC_ASKUMOUNT_CMD = 0x70, /* AUTOFS_DEV_IOCTL_VERSION_CMD - 1 */
+};
+
+#define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int)
+#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI
+#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI
+#define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int)
+#define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int)
#endif /* _LINUX_AUTO_FS4_H */
diff --git a/include/uapi/linux/mqueue.h b/include/uapi/linux/mqueue.h
index d0a2b8e89813..bbd5116ea739 100644
--- a/include/uapi/linux/mqueue.h
+++ b/include/uapi/linux/mqueue.h
@@ -18,6 +18,8 @@
#ifndef _LINUX_MQUEUE_H
#define _LINUX_MQUEUE_H
+#include <linux/types.h>
+
#define MQ_PRIO_MAX 32768
/* per-uid limit of kernel memory used by mqueue, in bytes */
#define MQ_BYTES_MAX 819200
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 9057d7af3ae1..c055947c5c98 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -11,13 +11,20 @@
#include <linux/types.h>
-#define UFFD_API ((__u64)0xAA)
/*
- * After implementing the respective features it will become:
- * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
- * UFFD_FEATURE_EVENT_FORK)
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
*/
-#define UFFD_API_FEATURES (0)
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT | \
+ UFFD_FEATURE_EVENT_FORK | \
+ UFFD_FEATURE_EVENT_REMAP | \
+ UFFD_FEATURE_EVENT_REMOVE | \
+ UFFD_FEATURE_EVENT_UNMAP | \
+ UFFD_FEATURE_MISSING_HUGETLBFS | \
+ UFFD_FEATURE_MISSING_SHMEM)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -26,6 +33,9 @@
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_ZEROPAGE)
+#define UFFD_API_RANGE_IOCTLS_BASIC \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY)
/*
* Valid ioctl command number range with this API is from 0x00 to
@@ -72,6 +82,21 @@ struct uffd_msg {
} pagefault;
struct {
+ __u32 ufd;
+ } fork;
+
+ struct {
+ __u64 from;
+ __u64 to;
+ __u64 len;
+ } remap;
+
+ struct {
+ __u64 start;
+ __u64 end;
+ } remove;
+
+ struct {
/* unused reserved fields */
__u64 reserved1;
__u64 reserved2;
@@ -84,9 +109,11 @@ struct uffd_msg {
* Start at 0x12 and not at 0 to be more strict against bugs.
*/
#define UFFD_EVENT_PAGEFAULT 0x12
-#if 0 /* not available yet */
#define UFFD_EVENT_FORK 0x13
-#endif
+#define UFFD_EVENT_REMAP 0x14
+#define UFFD_EVENT_REMOVE 0x15
+#define UFFD_EVENT_UNMAP 0x16
+#define UFFD_EVENT_EXIT 0x17
/* flags for UFFD_EVENT_PAGEFAULT */
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
@@ -104,11 +131,39 @@ struct uffdio_api {
* Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
* are to be considered implicitly always enabled in all kernels as
* long as the uffdio_api.api requested matches UFFD_API.
+ *
+ * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+ * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+ * hugetlbfs virtual memory ranges. Adding or not adding
+ * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+ * no real functional effect after UFFDIO_API returns, but
+ * it's only useful for an initial feature set probe at
+ * UFFDIO_API time. There are two ways to use it:
+ *
+ * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+ * uffdio_api.features before calling UFFDIO_API, an error
+ * will be returned by UFFDIO_API on a kernel without
+ * hugetlbfs missing support
+ *
+ * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+ * uffdio_api.features and instead it will be set by the
+ * kernel in the uffdio_api.features if the kernel supports
+ * it, so userland can later check if the feature flag is
+ * present in uffdio_api.features after UFFDIO_API
+ * succeeded.
+ *
+ * UFFD_FEATURE_MISSING_SHMEM works the same as
+ * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+ * (i.e. tmpfs and other shmem based APIs).
*/
-#if 0 /* not available yet */
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
-#endif
+#define UFFD_FEATURE_EVENT_REMAP (1<<2)
+#define UFFD_FEATURE_EVENT_REMOVE (1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
+#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
+#define UFFD_FEATURE_EVENT_EXIT (1<<7)
__u64 features;
__u64 ioctls;
diff --git a/init/Kconfig b/init/Kconfig
index c57764fbd8b4..a92f27da4a27 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1791,6 +1791,20 @@ config SLUB_DEBUG
SLUB sysfs support. /sys/slab will not exist and there will be
no support for cache validation etc.
+config SLUB_MEMCG_SYSFS_ON
+ default n
+ bool "Enable memcg SLUB sysfs support by default" if EXPERT
+ depends on SLUB && SYSFS && MEMCG
+ help
+ SLUB creates a directory under /sys/kernel/slab for each
+ allocation cache to host info and debug files. If memory
+ cgroup is enabled, each cache can have per memory cgroup
+ caches. SLUB can create the same sysfs directories for these
+ caches under /sys/kernel/slab/CACHE/cgroup but it can lead
+ to a very high number of debug files being created. This is
+ controlled by slub_memcg_sysfs boot parameter and this
+ config option determines the parameter's default value.
+
config COMPAT_BRK
bool "Disable heap randomization"
default y
diff --git a/init/initramfs.c b/init/initramfs.c
index b32ad7d97ac9..981f286c1d16 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -18,6 +18,7 @@
#include <linux/dirent.h>
#include <linux/syscalls.h>
#include <linux/utime.h>
+#include <linux/file.h>
static ssize_t __init xwrite(int fd, const char *p, size_t count)
{
@@ -647,6 +648,7 @@ static int __init populate_rootfs(void)
printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
free_initrd();
#endif
+ flush_delayed_fput();
/*
* Try loading default modules from initramfs. This gives
* us a chance to load before device_initcalls.
diff --git a/init/main.c b/init/main.c
index 33a6773d7246..63d640ef439b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -71,7 +71,6 @@
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
-#include <linux/file.h>
#include <linux/ptrace.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
@@ -83,6 +82,7 @@
#include <linux/proc_ns.h>
#include <linux/io.h>
#include <linux/cache.h>
+#include <linux/rodata_test.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -934,9 +934,10 @@ __setup("rodata=", set_debug_rodata);
#ifdef CONFIG_STRICT_KERNEL_RWX
static void mark_readonly(void)
{
- if (rodata_enabled)
+ if (rodata_enabled) {
mark_rodata_ro();
- else
+ rodata_test();
+ } else
pr_info("Kernel memory protection disabled.\n");
}
#else
@@ -958,8 +959,6 @@ static int __ref kernel_init(void *unused)
system_state = SYSTEM_RUNNING;
numa_default_policy();
- flush_delayed_fput();
-
rcu_end_inkernel_boot();
if (ramdisk_execute_command) {
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 7a2d8f0c8ae5..4fdd97031431 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -558,6 +558,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
*/
static int wq_sleep(struct mqueue_inode_info *info, int sr,
ktime_t *timeout, struct ext_wait_queue *ewp)
+ __releases(&info->lock)
{
int retval;
signed long time;
diff --git a/ipc/sem.c b/ipc/sem.c
index 3ec5742b5640..e468cd1c12f0 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -159,22 +159,42 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
#define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
/*
+ * Switching from the mode suitable for simple ops
+ * to the mode for complex ops is costly. Therefore:
+ * use some hysteresis
+ */
+#define USE_GLOBAL_LOCK_HYSTERESIS 10
+
+/*
* Locking:
* a) global sem_lock() for read/write
* sem_undo.id_next,
* sem_array.complex_count,
- * sem_array.complex_mode
* sem_array.pending{_alter,_const},
* sem_array.sem_undo
*
* b) global or semaphore sem_lock() for read/write:
* sem_array.sem_base[i].pending_{const,alter}:
- * sem_array.complex_mode (for read)
*
* c) special:
* sem_undo_list.list_proc:
* * undo_list->lock for write
* * rcu for read
+ * use_global_lock:
+ * * global sem_lock() for write
+ * * either local or global sem_lock() for read.
+ *
+ * Memory ordering:
+ * Most ordering is enforced by using spin_lock() and spin_unlock().
+ * The special case is use_global_lock:
+ * Setting it from non-zero to 0 is a RELEASE, this is ensured by
+ * using smp_store_release().
+ * Testing if it is non-zero is an ACQUIRE, this is ensured by using
+ * smp_load_acquire().
+ * Setting it from 0 to non-zero must be ordered with regards to
+ * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
+ * is inside a spin_lock() and after a write from 0 to non-zero a
+ * spin_lock()+spin_unlock() is done.
*/
#define sc_semmsl sem_ctls[0]
@@ -273,29 +293,22 @@ static void complexmode_enter(struct sem_array *sma)
int i;
struct sem *sem;
- if (sma->complex_mode) {
- /* We are already in complex_mode. Nothing to do */
+ if (sma->use_global_lock > 0) {
+ /*
+ * We are already in global lock mode.
+ * Nothing to do, just reset the
+ * counter until we return to simple mode.
+ */
+ sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
return;
}
-
- /* We need a full barrier after seting complex_mode:
- * The write to complex_mode must be visible
- * before we read the first sem->lock spinlock state.
- */
- smp_store_mb(sma->complex_mode, true);
+ sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
for (i = 0; i < sma->sem_nsems; i++) {
sem = sma->sem_base + i;
- spin_unlock_wait(&sem->lock);
+ spin_lock(&sem->lock);
+ spin_unlock(&sem->lock);
}
- /*
- * spin_unlock_wait() is not a memory barriers, it is only a
- * control barrier. The code must pair with spin_unlock(&sem->lock),
- * thus just the control barrier is insufficient.
- *
- * smp_rmb() is sufficient, as writes cannot pass the control barrier.
- */
- smp_rmb();
}
/*
@@ -310,13 +323,17 @@ static void complexmode_tryleave(struct sem_array *sma)
*/
return;
}
- /*
- * Immediately after setting complex_mode to false,
- * a simple op can start. Thus: all memory writes
- * performed by the current operation must be visible
- * before we set complex_mode to false.
- */
- smp_store_release(&sma->complex_mode, false);
+ if (sma->use_global_lock == 1) {
+ /*
+ * Immediately after setting use_global_lock to 0,
+ * a simple op can start. Thus: all memory writes
+ * performed by the current operation must be visible
+ * before we set use_global_lock to 0.
+ */
+ smp_store_release(&sma->use_global_lock, 0);
+ } else {
+ sma->use_global_lock--;
+ }
}
#define SEM_GLOBAL_LOCK (-1)
@@ -346,30 +363,23 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
* Optimized locking is possible if no complex operation
* is either enqueued or processed right now.
*
- * Both facts are tracked by complex_mode.
+ * Both facts are tracked by use_global_mode.
*/
sem = sma->sem_base + sops->sem_num;
/*
- * Initial check for complex_mode. Just an optimization,
+ * Initial check for use_global_lock. Just an optimization,
* no locking, no memory barrier.
*/
- if (!sma->complex_mode) {
+ if (!sma->use_global_lock) {
/*
* It appears that no complex operation is around.
* Acquire the per-semaphore lock.
*/
spin_lock(&sem->lock);
- /*
- * See 51d7d5205d33
- * ("powerpc: Add smp_mb() to arch_spin_is_locked()"):
- * A full barrier is required: the write of sem->lock
- * must be visible before the read is executed
- */
- smp_mb();
-
- if (!smp_load_acquire(&sma->complex_mode)) {
+ /* pairs with smp_store_release() */
+ if (!smp_load_acquire(&sma->use_global_lock)) {
/* fast path successful! */
return sops->sem_num;
}
@@ -379,19 +389,26 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
/* slow path: acquire the full lock */
ipc_lock_object(&sma->sem_perm);
- if (sma->complex_count == 0) {
- /* False alarm:
- * There is no complex operation, thus we can switch
- * back to the fast path.
+ if (sma->use_global_lock == 0) {
+ /*
+ * The use_global_lock mode ended while we waited for
+ * sma->sem_perm.lock. Thus we must switch to locking
+ * with sem->lock.
+ * Unlike in the fast path, there is no need to recheck
+ * sma->use_global_lock after we have acquired sem->lock:
+ * We own sma->sem_perm.lock, thus use_global_lock cannot
+ * change.
*/
spin_lock(&sem->lock);
+
ipc_unlock_object(&sma->sem_perm);
return sops->sem_num;
} else {
- /* Not a false alarm, thus complete the sequence for a
- * full lock.
+ /*
+ * Not a false alarm, thus continue to use the global lock
+ * mode. No need for complexmode_enter(), this was done by
+ * the caller that has set use_global_mode to non-zero.
*/
- complexmode_enter(sma);
return SEM_GLOBAL_LOCK;
}
}
@@ -495,7 +512,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
}
sma->complex_count = 0;
- sma->complex_mode = true; /* dropped by sem_unlock below */
+ sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
INIT_LIST_HEAD(&sma->pending_alter);
INIT_LIST_HEAD(&sma->pending_const);
INIT_LIST_HEAD(&sma->list_id);
diff --git a/ipc/shm.c b/ipc/shm.c
index 81203e8ba013..06ea9ef7f54a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -374,12 +374,12 @@ void exit_shm(struct task_struct *task)
up_write(&shm_ids(ns).rwsem);
}
-static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int shm_fault(struct vm_fault *vmf)
{
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
- return sfd->vm_ops->fault(vma, vmf);
+ return sfd->vm_ops->fault(vmf);
}
#ifdef CONFIG_NUMA
@@ -1091,8 +1091,8 @@ out_unlock1:
* "raddr" thing points to kernel space, and there has to be a wrapper around
* this.
*/
-long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
- unsigned long shmlba)
+long do_shmat(int shmid, char __user *shmaddr, int shmflg,
+ ulong *raddr, unsigned long shmlba)
{
struct shmid_kernel *shp;
unsigned long addr;
@@ -1113,8 +1113,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
goto out;
else if ((addr = (ulong)shmaddr)) {
if (addr & (shmlba - 1)) {
- if (shmflg & SHM_RND)
- addr &= ~(shmlba - 1); /* round down */
+ /*
+ * Round down to the nearest multiple of shmlba.
+ * For sane do_mmap_pgoff() parameters, avoid
+ * round downs that trigger nil-page and MAP_FIXED.
+ */
+ if ((shmflg & SHM_RND) && addr >= shmlba)
+ addr &= ~(shmlba - 1);
else
#ifndef __ARCH_FORCE_SHMLBA
if (addr & ~PAGE_MASK)
@@ -1222,7 +1227,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
goto invalid;
}
- addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
+ addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
*raddr = addr;
err = 0;
if (IS_ERR_VALUE(addr))
@@ -1329,7 +1334,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
*/
file = vma->vm_file;
size = i_size_read(file_inode(vma->vm_file));
- do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+ do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
/*
* We discovered the size of the shm segment, so
* break out of here and fall through to the next
@@ -1356,7 +1361,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
if ((vma->vm_ops == &shm_vm_ops) &&
((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
(vma->vm_file == file))
- do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+ do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
vma = next;
}
@@ -1365,7 +1370,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
* given
*/
if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
- do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+ do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
retval = 0;
}
diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config
index 1a8f34f63601..26a06e09a5bd 100644
--- a/kernel/configs/android-base.config
+++ b/kernel/configs/android-base.config
@@ -21,6 +21,7 @@ CONFIG_CP15_BARRIER_EMULATION=y
CONFIG_DEFAULT_SECURITY_SELINUX=y
CONFIG_EMBEDDED=y
CONFIG_FB=y
+CONFIG_HARDENED_USERCOPY=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_INET6_AH=y
CONFIG_INET6_ESP=y
@@ -129,6 +130,7 @@ CONFIG_PPP_DEFLATE=y
CONFIG_PPP_MPPE=y
CONFIG_PREEMPT=y
CONFIG_QUOTA=y
+CONFIG_RANDOMIZE_BASE=y
CONFIG_RTC_CLASS=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_SECCOMP=y
diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config
index 99127edc5204..28ee064b6744 100644
--- a/kernel/configs/android-recommended.config
+++ b/kernel/configs/android-recommended.config
@@ -1,4 +1,5 @@
# KEEP ALPHABETICALLY SORTED
+# CONFIG_AIO is not set
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_LEGACY_PTYS is not set
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 890d2bfc3855..5b4e0b98f4eb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4925,9 +4925,9 @@ unlock:
rcu_read_unlock();
}
-static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int perf_mmap_fault(struct vm_fault *vmf)
{
- struct perf_event *event = vma->vm_file->private_data;
+ struct perf_event *event = vmf->vma->vm_file->private_data;
struct ring_buffer *rb;
int ret = VM_FAULT_SIGBUS;
@@ -4950,7 +4950,7 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
goto unlock;
get_page(vmf->page);
- vmf->page->mapping = vma->vm_file->f_mapping;
+ vmf->page->mapping = vmf->vma->vm_file->f_mapping;
vmf->page->index = vmf->pgoff;
ret = 0;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d416f3baf392..18c6b23edd3c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -153,14 +153,19 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
struct page *old_page, struct page *new_page)
{
struct mm_struct *mm = vma->vm_mm;
- spinlock_t *ptl;
- pte_t *ptep;
+ struct page_vma_mapped_walk pvmw = {
+ .page = old_page,
+ .vma = vma,
+ .address = addr,
+ };
int err;
/* For mmu_notifiers */
const unsigned long mmun_start = addr;
const unsigned long mmun_end = addr + PAGE_SIZE;
struct mem_cgroup *memcg;
+ VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
+
err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
false);
if (err)
@@ -171,11 +176,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
err = -EAGAIN;
- ptep = page_check_address(old_page, mm, addr, &ptl, 0);
- if (!ptep) {
+ if (!page_vma_mapped_walk(&pvmw)) {
mem_cgroup_cancel_charge(new_page, memcg, false);
goto unlock;
}
+ VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
get_page(new_page);
page_add_new_anon_rmap(new_page, vma, addr, false);
@@ -187,14 +192,15 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
inc_mm_counter(mm, MM_ANONPAGES);
}
- flush_cache_page(vma, addr, pte_pfn(*ptep));
- ptep_clear_flush_notify(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
+ flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
+ ptep_clear_flush_notify(vma, addr, pvmw.pte);
+ set_pte_at_notify(mm, addr, pvmw.pte,
+ mk_pte(new_page, vma->vm_page_prot));
page_remove_rmap(old_page, false);
if (!page_mapped(old_page))
try_to_free_swap(old_page);
- pte_unmap_unlock(ptep, ptl);
+ page_vma_mapped_walk_done(&pvmw);
if (vma->vm_flags & VM_LOCKED)
munlock_vma_page(old_page);
@@ -300,8 +306,8 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
retry:
/* Read the page with vaddr into memory */
- ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
- &vma, NULL);
+ ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+ FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
if (ret <= 0)
return ret;
diff --git a/kernel/exit.c b/kernel/exit.c
index 9960accbf2ab..90b09ca35c84 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -45,6 +45,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/tracehook.h>
#include <linux/fs_struct.h>
+#include <linux/userfaultfd_k.h>
#include <linux/init_task.h>
#include <linux/perf_event.h>
#include <trace/events/sched.h>
@@ -547,6 +548,7 @@ static void exit_mm(void)
enter_lazy_tlb(mm, current);
task_unlock(current);
mm_update_next_owner(mm);
+ userfaultfd_exit(mm);
mmput(mm);
if (test_thread_flag(TIF_MEMDIE))
exit_oom_victim();
diff --git a/kernel/fork.c b/kernel/fork.c
index 043a2312a09a..613f51a55a85 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -55,6 +55,7 @@
#include <linux/rmap.h>
#include <linux/ksm.h>
#include <linux/acct.h>
+#include <linux/userfaultfd_k.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/freezer.h>
@@ -561,6 +562,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
+ LIST_HEAD(uf);
uprobe_start_dup_mmap();
if (down_write_killable(&oldmm->mmap_sem)) {
@@ -617,12 +619,13 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
if (retval)
goto fail_nomem_policy;
tmp->vm_mm = mm;
+ retval = dup_userfaultfd(tmp, &uf);
+ if (retval)
+ goto fail_nomem_anon_vma_fork;
if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
- tmp->vm_flags &=
- ~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP);
+ tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
tmp->vm_next = tmp->vm_prev = NULL;
- tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
file = tmp->vm_file;
if (file) {
struct inode *inode = file_inode(file);
@@ -678,6 +681,7 @@ out:
up_write(&mm->mmap_sem);
flush_tlb_mm(oldmm);
up_write(&oldmm->mmap_sem);
+ dup_userfaultfd_complete(&uf);
fail_uprobe_end:
uprobe_end_dup_mmap();
return retval;
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 9ecedc28b928..e6476a8e8b6a 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -172,8 +172,6 @@ EXPORT_SYMBOL(devm_memunmap);
#ifdef CONFIG_ZONE_DEVICE
static DEFINE_MUTEX(pgmap_lock);
static RADIX_TREE(pgmap_radix, GFP_KERNEL);
-#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
-#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
struct page_map {
struct resource res;
@@ -194,18 +192,41 @@ void put_zone_device_page(struct page *page)
}
EXPORT_SYMBOL(put_zone_device_page);
-static void pgmap_radix_release(struct resource *res)
+static unsigned long order_at(struct resource *res, unsigned long pgoff)
{
- resource_size_t key, align_start, align_size, align_end;
+ unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
+ unsigned long nr_pages, mask;
+
+ nr_pages = PHYS_PFN(resource_size(res));
+ if (nr_pages == pgoff)
+ return ULONG_MAX;
+
+ /*
+ * What is the largest aligned power-of-2 range available from
+ * this resource pgoff to the end of the resource range,
+ * considering the alignment of the current pgoff?
+ */
+ mask = phys_pgoff | rounddown_pow_of_two(nr_pages - pgoff);
+ if (!mask)
+ return ULONG_MAX;
+
+ return find_first_bit(&mask, BITS_PER_LONG);
+}
- align_start = res->start & ~(SECTION_SIZE - 1);
- align_size = ALIGN(resource_size(res), SECTION_SIZE);
- align_end = align_start + align_size - 1;
+#define foreach_order_pgoff(res, order, pgoff) \
+ for (pgoff = 0, order = order_at((res), pgoff); order < ULONG_MAX; \
+ pgoff += 1UL << order, order = order_at((res), pgoff))
+
+static void pgmap_radix_release(struct resource *res)
+{
+ unsigned long pgoff, order;
mutex_lock(&pgmap_lock);
- for (key = res->start; key <= res->end; key += SECTION_SIZE)
- radix_tree_delete(&pgmap_radix, key >> PA_SECTION_SHIFT);
+ foreach_order_pgoff(res, order, pgoff)
+ radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff);
mutex_unlock(&pgmap_lock);
+
+ synchronize_rcu();
}
static unsigned long pfn_first(struct page_map *page_map)
@@ -235,7 +256,6 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
{
struct page_map *page_map = data;
struct resource *res = &page_map->res;
- resource_size_t align_start, align_size;
struct dev_pagemap *pgmap = &page_map->pgmap;
if (percpu_ref_tryget_live(pgmap->ref)) {
@@ -244,12 +264,10 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
}
/* pages are dead and unused, undo the arch mapping */
- align_start = res->start & ~(SECTION_SIZE - 1);
- align_size = ALIGN(resource_size(res), SECTION_SIZE);
mem_hotplug_begin();
- arch_remove_memory(align_start, align_size);
+ arch_remove_memory(res->start, resource_size(res));
mem_hotplug_done();
- untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
pgmap_radix_release(res);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
"%s: failed to free all reserved pages\n", __func__);
@@ -262,7 +280,7 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
WARN_ON_ONCE(!rcu_read_lock_held());
- page_map = radix_tree_lookup(&pgmap_radix, phys >> PA_SECTION_SHIFT);
+ page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
return page_map ? &page_map->pgmap : NULL;
}
@@ -284,17 +302,13 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap)
{
- resource_size_t key, align_start, align_size, align_end;
+ unsigned long pfn, pgoff, order;
pgprot_t pgprot = PAGE_KERNEL;
struct dev_pagemap *pgmap;
struct page_map *page_map;
int error, nid, is_ram;
- unsigned long pfn;
- align_start = res->start & ~(SECTION_SIZE - 1);
- align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
- - align_start;
- is_ram = region_intersects(align_start, align_size,
+ is_ram = region_intersects(res->start, resource_size(res),
IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
if (is_ram == REGION_MIXED) {
@@ -327,12 +341,12 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
mutex_lock(&pgmap_lock);
error = 0;
- align_end = align_start + align_size - 1;
- for (key = align_start; key <= align_end; key += SECTION_SIZE) {
+
+ foreach_order_pgoff(res, order, pgoff) {
struct dev_pagemap *dup;
rcu_read_lock();
- dup = find_dev_pagemap(key);
+ dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff));
rcu_read_unlock();
if (dup) {
dev_err(dev, "%s: %pr collides with mapping for %s\n",
@@ -340,8 +354,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
error = -EBUSY;
break;
}
- error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT,
- page_map);
+ error = __radix_tree_insert(&pgmap_radix,
+ PHYS_PFN(res->start) + pgoff, order, page_map);
if (error) {
dev_err(dev, "%s: failed: %d\n", __func__, error);
break;
@@ -355,13 +369,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (nid < 0)
nid = numa_mem_id();
- error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
- align_size);
+ error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0,
+ resource_size(res));
if (error)
goto err_pfn_remap;
mem_hotplug_begin();
- error = arch_add_memory(nid, align_start, align_size, true);
+ error = arch_add_memory(nid, res->start, resource_size(res), true);
mem_hotplug_done();
if (error)
goto err_add_memory;
@@ -382,7 +396,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
return __va(res->start);
err_add_memory:
- untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
err_pfn_remap:
err_radix:
pgmap_radix_release(res);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index fd2c9acbcc19..6196af8a8223 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -95,7 +95,7 @@ static int notifier_call_chain(struct notifier_block **nl,
if (nr_calls)
(*nr_calls)++;
- if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
+ if (ret & NOTIFY_STOP_MASK)
break;
nb = next_nb;
nr_to_call--;
diff --git a/kernel/relay.c b/kernel/relay.c
index 9b48284eac56..8a7d57ec1a4d 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -39,10 +39,10 @@ static void relay_file_mmap_close(struct vm_area_struct *vma)
/*
* fault() vm_op implementation for relay file mapping.
*/
-static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int relay_buf_fault(struct vm_fault *vmf)
{
struct page *page;
- struct rchan_buf *buf = vma->vm_private_data;
+ struct rchan_buf *buf = vmf->vma->vm_private_data;
pgoff_t pgoff = vmf->pgoff;
if (!buf)
diff --git a/kernel/signal.c b/kernel/signal.c
index 13f9def8b24a..214a8feeb771 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3239,10 +3239,17 @@ int compat_restore_altstack(const compat_stack_t __user *uss)
int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
{
+ int err;
struct task_struct *t = current;
- return __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) |
- __put_user(sas_ss_flags(sp), &uss->ss_flags) |
+ err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp),
+ &uss->ss_sp) |
+ __put_user(t->sas_ss_flags, &uss->ss_flags) |
__put_user(t->sas_ss_size, &uss->ss_size);
+ if (err)
+ return err;
+ if (t->sas_ss_flags & SS_AUTODISARM)
+ sas_ss_reset(t);
+ return 0;
}
#endif
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index aea6a1218c7d..070866c32eb9 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -124,6 +124,44 @@ EXPORT_SYMBOL(trace_print_symbols_seq);
#if BITS_PER_LONG == 32
const char *
+trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
+ unsigned long long flags,
+ const struct trace_print_flags_u64 *flag_array)
+{
+ unsigned long long mask;
+ const char *str;
+ const char *ret = trace_seq_buffer_ptr(p);
+ int i, first = 1;
+
+ for (i = 0; flag_array[i].name && flags; i++) {
+
+ mask = flag_array[i].mask;
+ if ((flags & mask) != mask)
+ continue;
+
+ str = flag_array[i].name;
+ flags &= ~mask;
+ if (!first && delim)
+ trace_seq_puts(p, delim);
+ else
+ first = 0;
+ trace_seq_puts(p, str);
+ }
+
+ /* check for left over flags */
+ if (flags) {
+ if (!first && delim)
+ trace_seq_puts(p, delim);
+ trace_seq_printf(p, "0x%llx", flags);
+ }
+
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+EXPORT_SYMBOL(trace_print_flags_seq_u64);
+
+const char *
trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
const struct trace_print_flags_u64 *symbol_array)
{
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 12b8dd640786..b5de262a9eb9 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -137,12 +137,14 @@ static void watchdog_overflow_callback(struct perf_event *event,
* Reduce the watchdog noise by only printing messages
* that are different from what cpu0 displayed.
*/
-static unsigned long cpu0_err;
+static unsigned long firstcpu_err;
+static atomic_t watchdog_cpus;
int watchdog_nmi_enable(unsigned int cpu)
{
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
+ int firstcpu = 0;
/* nothing to do if the hard lockup detector is disabled */
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
@@ -156,19 +158,22 @@ int watchdog_nmi_enable(unsigned int cpu)
if (event != NULL)
goto out_enable;
+ if (atomic_inc_return(&watchdog_cpus) == 1)
+ firstcpu = 1;
+
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
- /* save cpu0 error for future comparision */
- if (cpu == 0 && IS_ERR(event))
- cpu0_err = PTR_ERR(event);
+ /* save the first cpu's error for future comparision */
+ if (firstcpu && IS_ERR(event))
+ firstcpu_err = PTR_ERR(event);
if (!IS_ERR(event)) {
- /* only print for cpu0 or different than cpu0 */
- if (cpu == 0 || cpu0_err)
+ /* only print for the first cpu initialized */
+ if (firstcpu || firstcpu_err)
pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
goto out_save;
}
@@ -186,7 +191,7 @@ int watchdog_nmi_enable(unsigned int cpu)
smp_mb__after_atomic();
/* skip displaying the same error again */
- if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
+ if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
return PTR_ERR(event);
/* vary the KERN level based on the returned errno */
@@ -222,9 +227,9 @@ void watchdog_nmi_disable(unsigned int cpu)
/* should be in cleanup, but blocks oprofile */
perf_event_release_kernel(event);
- }
- if (cpu == 0) {
+
/* watchdog_nmi_enable() expects this to be zero initially. */
- cpu0_err = 0;
+ if (atomic_dec_and_test(&watchdog_cpus))
+ firstcpu_err = 0;
}
}
diff --git a/lib/Kconfig b/lib/Kconfig
index a28604ea1429..c4d3db5cfce1 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -103,8 +103,7 @@ config CRC32
functions require M here.
config CRC32_SELFTEST
- bool "CRC32 perform self test on init"
- default n
+ tristate "CRC32 perform self test on init"
depends on CRC32
help
This option enables the CRC32 library functions to perform a
@@ -432,8 +431,7 @@ config GLOB
depends on this.
config GLOB_SELFTEST
- bool "glob self-test on init"
- default n
+ tristate "glob self-test on init"
depends on GLOB
help
This option enables a simple self-test of the glob_match
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 66fb4389f05c..55735c9bdb75 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1739,6 +1739,14 @@ config TEST_LIST_SORT
If unsure, say N.
+config TEST_SORT
+ bool "Array-based sort test"
+ depends on DEBUG_KERNEL
+ help
+ This option enables the self-test function of 'sort()' at boot.
+
+ If unsure, say N.
+
config KPROBES_SANITY_TEST
bool "Kprobes sanity tests"
depends on DEBUG_KERNEL
@@ -1790,9 +1798,10 @@ config PERCPU_TEST
If unsure, say N.
config ATOMIC64_SELFTEST
- bool "Perform an atomic64_t self-test at boot"
+ tristate "Perform an atomic64_t self-test"
help
- Enable this option to test the atomic64_t functions at boot.
+ Enable this option to test the atomic64_t functions at boot or
+ at module load time.
If unsure, say N.
diff --git a/lib/Makefile b/lib/Makefile
index f1a0364af377..445a39c21f46 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_TEST_KASAN) += test_kasan.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LKM) += test_module.o
obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
+obj-$(CONFIG_TEST_SORT) += test_sort.o
obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
@@ -92,6 +93,7 @@ obj-$(CONFIG_CRC16) += crc16.o
obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o
obj-$(CONFIG_CRC32) += crc32.o
+obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
@@ -161,6 +163,7 @@ obj-$(CONFIG_CORDIC) += cordic.o
obj-$(CONFIG_DQL) += dynamic_queue_limits.o
obj-$(CONFIG_GLOB) += glob.o
+obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
obj-$(CONFIG_MPILIB) += mpi/
obj-$(CONFIG_SIGNATURE) += digsig.o
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 46042901130f..fd70c0e0e673 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/atomic.h>
+#include <linux/module.h>
#ifdef CONFIG_X86
#include <asm/cpufeature.h> /* for boot_cpu_has below */
@@ -241,7 +242,7 @@ static __init void test_atomic64(void)
BUG_ON(v.counter != r);
}
-static __init int test_atomics(void)
+static __init int test_atomics_init(void)
{
test_atomic();
test_atomic64();
@@ -264,4 +265,9 @@ static __init int test_atomics(void)
return 0;
}
-core_initcall(test_atomics);
+static __exit void test_atomics_exit(void) {}
+
+module_init(test_atomics_init);
+module_exit(test_atomics_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/crc32.c b/lib/crc32.c
index 7fbd1a112b9d..6ddc92bc1460 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -340,827 +340,3 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
}
#endif
EXPORT_SYMBOL(crc32_be);
-
-#ifdef CONFIG_CRC32_SELFTEST
-
-/* 4096 random bytes */
-static u8 const __aligned(8) test_buf[] __initconst =
-{
- 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
- 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
- 0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60,
- 0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c,
- 0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4,
- 0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a,
- 0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a,
- 0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4,
- 0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9,
- 0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4,
- 0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca,
- 0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61,
- 0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e,
- 0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a,
- 0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f,
- 0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd,
- 0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c,
- 0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88,
- 0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53,
- 0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f,
- 0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4,
- 0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74,
- 0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60,
- 0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09,
- 0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07,
- 0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1,
- 0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f,
- 0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2,
- 0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0,
- 0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95,
- 0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22,
- 0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93,
- 0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86,
- 0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d,
- 0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40,
- 0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b,
- 0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35,
- 0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40,
- 0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63,
- 0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b,
- 0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8,
- 0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72,
- 0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86,
- 0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff,
- 0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed,
- 0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c,
- 0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed,
- 0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30,
- 0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99,
- 0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4,
- 0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80,
- 0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37,
- 0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04,
- 0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e,
- 0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd,
- 0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c,
- 0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09,
- 0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb,
- 0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b,
- 0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53,
- 0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b,
- 0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f,
- 0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff,
- 0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40,
- 0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6,
- 0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb,
- 0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73,
- 0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f,
- 0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4,
- 0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66,
- 0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1,
- 0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80,
- 0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f,
- 0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5,
- 0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7,
- 0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce,
- 0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff,
- 0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48,
- 0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26,
- 0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72,
- 0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88,
- 0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9,
- 0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc,
- 0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8,
- 0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09,
- 0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8,
- 0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c,
- 0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48,
- 0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d,
- 0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f,
- 0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae,
- 0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97,
- 0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8,
- 0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75,
- 0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc,
- 0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27,
- 0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf,
- 0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7,
- 0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0,
- 0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8,
- 0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c,
- 0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44,
- 0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54,
- 0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38,
- 0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f,
- 0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b,
- 0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7,
- 0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef,
- 0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e,
- 0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c,
- 0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c,
- 0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0,
- 0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37,
- 0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf,
- 0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e,
- 0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4,
- 0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60,
- 0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe,
- 0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61,
- 0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3,
- 0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe,
- 0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40,
- 0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec,
- 0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f,
- 0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7,
- 0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79,
- 0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c,
- 0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f,
- 0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21,
- 0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9,
- 0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30,
- 0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b,
- 0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee,
- 0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6,
- 0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3,
- 0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09,
- 0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd,
- 0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f,
- 0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9,
- 0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc,
- 0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59,
- 0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60,
- 0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5,
- 0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1,
- 0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8,
- 0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9,
- 0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab,
- 0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80,
- 0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01,
- 0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e,
- 0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d,
- 0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35,
- 0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38,
- 0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a,
- 0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac,
- 0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca,
- 0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57,
- 0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed,
- 0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20,
- 0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef,
- 0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c,
- 0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a,
- 0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64,
- 0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4,
- 0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54,
- 0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16,
- 0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26,
- 0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc,
- 0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87,
- 0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60,
- 0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d,
- 0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54,
- 0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13,
- 0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59,
- 0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb,
- 0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f,
- 0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15,
- 0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78,
- 0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93,
- 0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e,
- 0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31,
- 0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1,
- 0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37,
- 0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15,
- 0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78,
- 0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f,
- 0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31,
- 0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f,
- 0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc,
- 0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9,
- 0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3,
- 0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe,
- 0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4,
- 0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24,
- 0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1,
- 0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85,
- 0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8,
- 0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09,
- 0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c,
- 0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46,
- 0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5,
- 0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39,
- 0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2,
- 0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc,
- 0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35,
- 0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde,
- 0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80,
- 0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15,
- 0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63,
- 0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58,
- 0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d,
- 0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf,
- 0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12,
- 0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c,
- 0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b,
- 0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1,
- 0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6,
- 0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73,
- 0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9,
- 0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e,
- 0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22,
- 0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb,
- 0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2,
- 0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c,
- 0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c,
- 0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93,
- 0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f,
- 0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38,
- 0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57,
- 0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03,
- 0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90,
- 0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8,
- 0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4,
- 0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36,
- 0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7,
- 0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47,
- 0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46,
- 0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73,
- 0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72,
- 0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23,
- 0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a,
- 0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58,
- 0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f,
- 0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96,
- 0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9,
- 0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b,
- 0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c,
- 0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef,
- 0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3,
- 0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4,
- 0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f,
- 0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17,
- 0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18,
- 0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8,
- 0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98,
- 0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42,
- 0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97,
- 0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97,
- 0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1,
- 0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77,
- 0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb,
- 0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c,
- 0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb,
- 0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56,
- 0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04,
- 0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48,
- 0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe,
- 0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d,
- 0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97,
- 0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8,
- 0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f,
- 0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e,
- 0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca,
- 0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44,
- 0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f,
- 0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6,
- 0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63,
- 0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19,
- 0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58,
- 0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b,
- 0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28,
- 0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf,
- 0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6,
- 0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3,
- 0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe,
- 0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f,
- 0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf,
- 0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9,
- 0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e,
- 0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7,
- 0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70,
- 0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0,
- 0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d,
- 0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4,
- 0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5,
- 0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85,
- 0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc,
- 0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f,
- 0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56,
- 0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb,
- 0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b,
- 0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5,
- 0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03,
- 0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23,
- 0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03,
- 0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87,
- 0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4,
- 0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43,
- 0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11,
- 0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40,
- 0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59,
- 0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9,
- 0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30,
- 0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd,
- 0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45,
- 0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83,
- 0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b,
- 0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5,
- 0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3,
- 0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84,
- 0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8,
- 0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34,
- 0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b,
- 0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31,
- 0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b,
- 0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40,
- 0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b,
- 0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e,
- 0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38,
- 0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb,
- 0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2,
- 0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c,
- 0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1,
- 0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc,
- 0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec,
- 0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34,
- 0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95,
- 0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92,
- 0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f,
- 0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c,
- 0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b,
- 0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c,
- 0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5,
- 0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb,
- 0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4,
- 0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9,
- 0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4,
- 0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41,
- 0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a,
- 0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8,
- 0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06,
- 0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62,
- 0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47,
- 0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4,
- 0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00,
- 0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67,
- 0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81,
- 0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0,
- 0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10,
- 0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79,
- 0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19,
- 0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8,
- 0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1,
- 0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83,
- 0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86,
- 0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55,
- 0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66,
- 0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0,
- 0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49,
- 0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea,
- 0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24,
- 0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e,
- 0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88,
- 0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87,
- 0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34,
- 0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f,
- 0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a,
- 0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a,
- 0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93,
- 0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37,
- 0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38,
- 0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4,
- 0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48,
- 0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65,
- 0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09,
- 0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e,
- 0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5,
- 0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b,
- 0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4,
- 0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e,
- 0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d,
- 0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0,
- 0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5,
- 0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48,
- 0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e,
- 0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f,
- 0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a,
- 0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d,
- 0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14,
- 0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69,
- 0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53,
- 0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56,
- 0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48,
- 0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4,
- 0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26,
- 0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e,
- 0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40,
- 0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7,
- 0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62,
- 0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe,
- 0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf,
- 0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2,
- 0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d,
- 0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32,
- 0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa,
- 0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45,
- 0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04,
- 0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33,
- 0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad,
- 0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4,
- 0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c,
- 0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b,
- 0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36,
- 0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa,
- 0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9,
- 0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28,
- 0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b,
- 0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03,
- 0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d,
- 0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff,
- 0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39,
- 0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b,
- 0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2,
- 0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34,
- 0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe,
- 0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0,
- 0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27,
- 0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86,
- 0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90,
- 0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03,
- 0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb,
- 0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57,
- 0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9,
- 0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5,
- 0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16,
- 0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5,
- 0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a,
- 0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d,
- 0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0,
- 0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f,
- 0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48,
- 0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1,
- 0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09,
- 0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51,
- 0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b,
- 0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf,
- 0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe,
- 0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad,
- 0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e,
- 0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57,
- 0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f,
- 0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef,
- 0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8,
- 0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69,
- 0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d,
- 0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59,
- 0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9,
- 0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d,
- 0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea,
- 0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56,
- 0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4,
- 0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8,
- 0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78,
- 0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f,
- 0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4,
- 0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91,
- 0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f,
- 0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c,
- 0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57,
- 0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4,
- 0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23,
- 0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17,
- 0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66,
- 0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39,
- 0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36,
- 0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00,
- 0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7,
- 0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60,
- 0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c,
- 0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e,
- 0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7,
- 0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a,
- 0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d,
- 0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37,
- 0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82,
- 0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8,
- 0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e,
- 0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85,
- 0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98,
- 0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22,
- 0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7,
- 0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49,
- 0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33,
- 0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc,
- 0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8,
- 0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f,
- 0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3,
- 0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98,
- 0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c,
- 0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6,
- 0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc,
- 0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d,
-};
-
-/* 100 test cases */
-static struct crc_test {
- u32 crc; /* random starting crc */
- u32 start; /* random 6 bit offset in buf */
- u32 length; /* random 11 bit length of test */
- u32 crc_le; /* expected crc32_le result */
- u32 crc_be; /* expected crc32_be result */
- u32 crc32c_le; /* expected crc32c_le result */
-} const test[] __initconst =
-{
- {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
- {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
- {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8},
- {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a},
- {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152},
- {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7},
- {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc},
- {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2},
- {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d},
- {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5},
- {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f},
- {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a},
- {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8},
- {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa},
- {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801},
- {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597},
- {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b},
- {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a},
- {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d},
- {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982},
- {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18},
- {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7},
- {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3},
- {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5},
- {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59},
- {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e},
- {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603},
- {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060},
- {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072},
- {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59},
- {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213},
- {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41},
- {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5},
- {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2},
- {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a},
- {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2},
- {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b},
- {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1},
- {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba},
- {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62},
- {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe},
- {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988},
- {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be},
- {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546},
- {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc},
- {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69},
- {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a},
- {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2},
- {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd},
- {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb},
- {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b},
- {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76},
- {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339},
- {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9},
- {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548},
- {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de},
- {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59},
- {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b},
- {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73},
- {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11},
- {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c},
- {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b},
- {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb},
- {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc},
- {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196},
- {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a},
- {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de},
- {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9},
- {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0},
- {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60},
- {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6},
- {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c},
- {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73},
- {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7},
- {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf},
- {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83},
- {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867},
- {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211},
- {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2},
- {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874},
- {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f},
- {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff},
- {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95},
- {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd},
- {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06},
- {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784},
- {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616},
- {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c},
- {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c},
- {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d},
- {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d},
- {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272},
- {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb},
- {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b},
- {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e},
- {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23},
- {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672},
- {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86},
- {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd},
- {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48},
-};
-
-#include <linux/time.h>
-
-static int __init crc32c_test(void)
-{
- int i;
- int errors = 0;
- int bytes = 0;
- u64 nsec;
- unsigned long flags;
-
- /* keep static to prevent cache warming code from
- * getting eliminated by the compiler */
- static u32 crc;
-
- /* pre-warm the cache */
- for (i = 0; i < 100; i++) {
- bytes += 2*test[i].length;
-
- crc ^= __crc32c_le(test[i].crc, test_buf +
- test[i].start, test[i].length);
- }
-
- /* reduce OS noise */
- local_irq_save(flags);
- local_irq_disable();
-
- nsec = ktime_get_ns();
- for (i = 0; i < 100; i++) {
- if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
- test[i].start, test[i].length))
- errors++;
- }
- nsec = ktime_get_ns() - nsec;
-
- local_irq_restore(flags);
- local_irq_enable();
-
- pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
-
- if (errors)
- pr_warn("crc32c: %d self tests failed\n", errors);
- else {
- pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n",
- bytes, nsec);
- }
-
- return 0;
-}
-
-static int __init crc32c_combine_test(void)
-{
- int i, j;
- int errors = 0, runs = 0;
-
- for (i = 0; i < 10; i++) {
- u32 crc_full;
-
- crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start,
- test[i].length);
- for (j = 0; j <= test[i].length; ++j) {
- u32 crc1, crc2;
- u32 len1 = j, len2 = test[i].length - j;
-
- crc1 = __crc32c_le(test[i].crc, test_buf +
- test[i].start, len1);
- crc2 = __crc32c_le(0, test_buf + test[i].start +
- len1, len2);
-
- if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) &&
- crc_full == test[i].crc32c_le))
- errors++;
- runs++;
- cond_resched();
- }
- }
-
- if (errors)
- pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs);
- else
- pr_info("crc32c_combine: %d self tests passed\n", runs);
-
- return 0;
-}
-
-static int __init crc32_test(void)
-{
- int i;
- int errors = 0;
- int bytes = 0;
- u64 nsec;
- unsigned long flags;
-
- /* keep static to prevent cache warming code from
- * getting eliminated by the compiler */
- static u32 crc;
-
- /* pre-warm the cache */
- for (i = 0; i < 100; i++) {
- bytes += 2*test[i].length;
-
- crc ^= crc32_le(test[i].crc, test_buf +
- test[i].start, test[i].length);
-
- crc ^= crc32_be(test[i].crc, test_buf +
- test[i].start, test[i].length);
- }
-
- /* reduce OS noise */
- local_irq_save(flags);
- local_irq_disable();
-
- nsec = ktime_get_ns();
- for (i = 0; i < 100; i++) {
- if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
- test[i].start, test[i].length))
- errors++;
-
- if (test[i].crc_be != crc32_be(test[i].crc, test_buf +
- test[i].start, test[i].length))
- errors++;
- }
- nsec = ktime_get_ns() - nsec;
-
- local_irq_restore(flags);
- local_irq_enable();
-
- pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
- CRC_LE_BITS, CRC_BE_BITS);
-
- if (errors)
- pr_warn("crc32: %d self tests failed\n", errors);
- else {
- pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n",
- bytes, nsec);
- }
-
- return 0;
-}
-
-static int __init crc32_combine_test(void)
-{
- int i, j;
- int errors = 0, runs = 0;
-
- for (i = 0; i < 10; i++) {
- u32 crc_full;
-
- crc_full = crc32_le(test[i].crc, test_buf + test[i].start,
- test[i].length);
- for (j = 0; j <= test[i].length; ++j) {
- u32 crc1, crc2;
- u32 len1 = j, len2 = test[i].length - j;
-
- crc1 = crc32_le(test[i].crc, test_buf +
- test[i].start, len1);
- crc2 = crc32_le(0, test_buf + test[i].start +
- len1, len2);
-
- if (!(crc_full == crc32_le_combine(crc1, crc2, len2) &&
- crc_full == test[i].crc_le))
- errors++;
- runs++;
- cond_resched();
- }
- }
-
- if (errors)
- pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs);
- else
- pr_info("crc32_combine: %d self tests passed\n", runs);
-
- return 0;
-}
-
-static int __init crc32test_init(void)
-{
- crc32_test();
- crc32c_test();
-
- crc32_combine_test();
- crc32c_combine_test();
-
- return 0;
-}
-
-static void __exit crc32_exit(void)
-{
-}
-
-module_init(crc32test_init);
-module_exit(crc32_exit);
-#endif /* CONFIG_CRC32_SELFTEST */
diff --git a/lib/crc32test.c b/lib/crc32test.c
new file mode 100644
index 000000000000..97d6a57cefcc
--- /dev/null
+++ b/lib/crc32test.c
@@ -0,0 +1,856 @@
+/*
+ * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin
+ * cleaned up code to current version of sparse and added the slicing-by-8
+ * algorithm to the closely similar existing slicing-by-4 algorithm.
+ *
+ * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com>
+ * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks!
+ * Code was from the public domain, copyright abandoned. Code was
+ * subsequently included in the kernel, thus was re-licensed under the
+ * GNU GPL v2.
+ *
+ * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com>
+ * Same crc32 function was used in 5 other places in the kernel.
+ * I made one version, and deleted the others.
+ * There are various incantations of crc32(). Some use a seed of 0 or ~0.
+ * Some xor at the end with ~0. The generic crc32() function takes
+ * seed as an argument, and doesn't xor at the end. Then individual
+ * users can do whatever they need.
+ * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
+ * fs/jffs2 uses seed 0, doesn't xor with ~0.
+ * fs/partitions/efi.c uses seed ~0, xor's with ~0.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/crc32.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include "crc32defs.h"
+
+/* 4096 random bytes */
+static u8 const __aligned(8) test_buf[] __initconst =
+{
+ 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
+ 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
+ 0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60,
+ 0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c,
+ 0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4,
+ 0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a,
+ 0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a,
+ 0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4,
+ 0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9,
+ 0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4,
+ 0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca,
+ 0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61,
+ 0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e,
+ 0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a,
+ 0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f,
+ 0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd,
+ 0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c,
+ 0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88,
+ 0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53,
+ 0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f,
+ 0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4,
+ 0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74,
+ 0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60,
+ 0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09,
+ 0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07,
+ 0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1,
+ 0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f,
+ 0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2,
+ 0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0,
+ 0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95,
+ 0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22,
+ 0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93,
+ 0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86,
+ 0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d,
+ 0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40,
+ 0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b,
+ 0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35,
+ 0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40,
+ 0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63,
+ 0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b,
+ 0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8,
+ 0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72,
+ 0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86,
+ 0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff,
+ 0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed,
+ 0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c,
+ 0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed,
+ 0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30,
+ 0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99,
+ 0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4,
+ 0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80,
+ 0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37,
+ 0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04,
+ 0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e,
+ 0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd,
+ 0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c,
+ 0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09,
+ 0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb,
+ 0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b,
+ 0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53,
+ 0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b,
+ 0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f,
+ 0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff,
+ 0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40,
+ 0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6,
+ 0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb,
+ 0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73,
+ 0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f,
+ 0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4,
+ 0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66,
+ 0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1,
+ 0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80,
+ 0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f,
+ 0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5,
+ 0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7,
+ 0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce,
+ 0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff,
+ 0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48,
+ 0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26,
+ 0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72,
+ 0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88,
+ 0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9,
+ 0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc,
+ 0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8,
+ 0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09,
+ 0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8,
+ 0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c,
+ 0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48,
+ 0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d,
+ 0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f,
+ 0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae,
+ 0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97,
+ 0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8,
+ 0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75,
+ 0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc,
+ 0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27,
+ 0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf,
+ 0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7,
+ 0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0,
+ 0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8,
+ 0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c,
+ 0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44,
+ 0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54,
+ 0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38,
+ 0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f,
+ 0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b,
+ 0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7,
+ 0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef,
+ 0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e,
+ 0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c,
+ 0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c,
+ 0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0,
+ 0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37,
+ 0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf,
+ 0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e,
+ 0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4,
+ 0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60,
+ 0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe,
+ 0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61,
+ 0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3,
+ 0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe,
+ 0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40,
+ 0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec,
+ 0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f,
+ 0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7,
+ 0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79,
+ 0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c,
+ 0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f,
+ 0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21,
+ 0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9,
+ 0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30,
+ 0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b,
+ 0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee,
+ 0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6,
+ 0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3,
+ 0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09,
+ 0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd,
+ 0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f,
+ 0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9,
+ 0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc,
+ 0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59,
+ 0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60,
+ 0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5,
+ 0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1,
+ 0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8,
+ 0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9,
+ 0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab,
+ 0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80,
+ 0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01,
+ 0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e,
+ 0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d,
+ 0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35,
+ 0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38,
+ 0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a,
+ 0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac,
+ 0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca,
+ 0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57,
+ 0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed,
+ 0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20,
+ 0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef,
+ 0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c,
+ 0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a,
+ 0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64,
+ 0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4,
+ 0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54,
+ 0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16,
+ 0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26,
+ 0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc,
+ 0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87,
+ 0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60,
+ 0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d,
+ 0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54,
+ 0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13,
+ 0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59,
+ 0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb,
+ 0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f,
+ 0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15,
+ 0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78,
+ 0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93,
+ 0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e,
+ 0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31,
+ 0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1,
+ 0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37,
+ 0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15,
+ 0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78,
+ 0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f,
+ 0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31,
+ 0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f,
+ 0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc,
+ 0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9,
+ 0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3,
+ 0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe,
+ 0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4,
+ 0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24,
+ 0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1,
+ 0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85,
+ 0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8,
+ 0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09,
+ 0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c,
+ 0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46,
+ 0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5,
+ 0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39,
+ 0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2,
+ 0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc,
+ 0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35,
+ 0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde,
+ 0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80,
+ 0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15,
+ 0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63,
+ 0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58,
+ 0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d,
+ 0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf,
+ 0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12,
+ 0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c,
+ 0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b,
+ 0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1,
+ 0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6,
+ 0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73,
+ 0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9,
+ 0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e,
+ 0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22,
+ 0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb,
+ 0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2,
+ 0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c,
+ 0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c,
+ 0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93,
+ 0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f,
+ 0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38,
+ 0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57,
+ 0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03,
+ 0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90,
+ 0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8,
+ 0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4,
+ 0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36,
+ 0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7,
+ 0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47,
+ 0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46,
+ 0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73,
+ 0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72,
+ 0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23,
+ 0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a,
+ 0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58,
+ 0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f,
+ 0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96,
+ 0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9,
+ 0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b,
+ 0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c,
+ 0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef,
+ 0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3,
+ 0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4,
+ 0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f,
+ 0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17,
+ 0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18,
+ 0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8,
+ 0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98,
+ 0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42,
+ 0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97,
+ 0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97,
+ 0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1,
+ 0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77,
+ 0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb,
+ 0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c,
+ 0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb,
+ 0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56,
+ 0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04,
+ 0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48,
+ 0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe,
+ 0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d,
+ 0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97,
+ 0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8,
+ 0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f,
+ 0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e,
+ 0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca,
+ 0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44,
+ 0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f,
+ 0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6,
+ 0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63,
+ 0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19,
+ 0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58,
+ 0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b,
+ 0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28,
+ 0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf,
+ 0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6,
+ 0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3,
+ 0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe,
+ 0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f,
+ 0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf,
+ 0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9,
+ 0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e,
+ 0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7,
+ 0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70,
+ 0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0,
+ 0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d,
+ 0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4,
+ 0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5,
+ 0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85,
+ 0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc,
+ 0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f,
+ 0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56,
+ 0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb,
+ 0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b,
+ 0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5,
+ 0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03,
+ 0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23,
+ 0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03,
+ 0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87,
+ 0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4,
+ 0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43,
+ 0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11,
+ 0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40,
+ 0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59,
+ 0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9,
+ 0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30,
+ 0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd,
+ 0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45,
+ 0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83,
+ 0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b,
+ 0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5,
+ 0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3,
+ 0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84,
+ 0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8,
+ 0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34,
+ 0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b,
+ 0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31,
+ 0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b,
+ 0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40,
+ 0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b,
+ 0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e,
+ 0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38,
+ 0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb,
+ 0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2,
+ 0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c,
+ 0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1,
+ 0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc,
+ 0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec,
+ 0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34,
+ 0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95,
+ 0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92,
+ 0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f,
+ 0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c,
+ 0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b,
+ 0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c,
+ 0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5,
+ 0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb,
+ 0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4,
+ 0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9,
+ 0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4,
+ 0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41,
+ 0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a,
+ 0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8,
+ 0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06,
+ 0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62,
+ 0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47,
+ 0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4,
+ 0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00,
+ 0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67,
+ 0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81,
+ 0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0,
+ 0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10,
+ 0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79,
+ 0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19,
+ 0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8,
+ 0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1,
+ 0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83,
+ 0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86,
+ 0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55,
+ 0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66,
+ 0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0,
+ 0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49,
+ 0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea,
+ 0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24,
+ 0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e,
+ 0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88,
+ 0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87,
+ 0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34,
+ 0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f,
+ 0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a,
+ 0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a,
+ 0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93,
+ 0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37,
+ 0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38,
+ 0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4,
+ 0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48,
+ 0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65,
+ 0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09,
+ 0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e,
+ 0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5,
+ 0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b,
+ 0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4,
+ 0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e,
+ 0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d,
+ 0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0,
+ 0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5,
+ 0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48,
+ 0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e,
+ 0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f,
+ 0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a,
+ 0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d,
+ 0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14,
+ 0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69,
+ 0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53,
+ 0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56,
+ 0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48,
+ 0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4,
+ 0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26,
+ 0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e,
+ 0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40,
+ 0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7,
+ 0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62,
+ 0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe,
+ 0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf,
+ 0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2,
+ 0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d,
+ 0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32,
+ 0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa,
+ 0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45,
+ 0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04,
+ 0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33,
+ 0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad,
+ 0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4,
+ 0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c,
+ 0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b,
+ 0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36,
+ 0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa,
+ 0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9,
+ 0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28,
+ 0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b,
+ 0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03,
+ 0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d,
+ 0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff,
+ 0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39,
+ 0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b,
+ 0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2,
+ 0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34,
+ 0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe,
+ 0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0,
+ 0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27,
+ 0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86,
+ 0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90,
+ 0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03,
+ 0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb,
+ 0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57,
+ 0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9,
+ 0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5,
+ 0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16,
+ 0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5,
+ 0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a,
+ 0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d,
+ 0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0,
+ 0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f,
+ 0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48,
+ 0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1,
+ 0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09,
+ 0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51,
+ 0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b,
+ 0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf,
+ 0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe,
+ 0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad,
+ 0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e,
+ 0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57,
+ 0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f,
+ 0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef,
+ 0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8,
+ 0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69,
+ 0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d,
+ 0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59,
+ 0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9,
+ 0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d,
+ 0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea,
+ 0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56,
+ 0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4,
+ 0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8,
+ 0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78,
+ 0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f,
+ 0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4,
+ 0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91,
+ 0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f,
+ 0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c,
+ 0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57,
+ 0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4,
+ 0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23,
+ 0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17,
+ 0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66,
+ 0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39,
+ 0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36,
+ 0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00,
+ 0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7,
+ 0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60,
+ 0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c,
+ 0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e,
+ 0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7,
+ 0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a,
+ 0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d,
+ 0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37,
+ 0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82,
+ 0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8,
+ 0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e,
+ 0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85,
+ 0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98,
+ 0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22,
+ 0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7,
+ 0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49,
+ 0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33,
+ 0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc,
+ 0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8,
+ 0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f,
+ 0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3,
+ 0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98,
+ 0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c,
+ 0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6,
+ 0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc,
+ 0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d,
+};
+
+/* 100 test cases */
+static struct crc_test {
+ u32 crc; /* random starting crc */
+ u32 start; /* random 6 bit offset in buf */
+ u32 length; /* random 11 bit length of test */
+ u32 crc_le; /* expected crc32_le result */
+ u32 crc_be; /* expected crc32_be result */
+ u32 crc32c_le; /* expected crc32c_le result */
+} const test[] __initconst =
+{
+ {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
+ {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
+ {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8},
+ {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a},
+ {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152},
+ {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7},
+ {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc},
+ {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2},
+ {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d},
+ {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5},
+ {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f},
+ {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a},
+ {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8},
+ {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa},
+ {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801},
+ {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597},
+ {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b},
+ {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a},
+ {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d},
+ {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982},
+ {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18},
+ {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7},
+ {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3},
+ {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5},
+ {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59},
+ {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e},
+ {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603},
+ {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060},
+ {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072},
+ {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59},
+ {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213},
+ {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41},
+ {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5},
+ {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2},
+ {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a},
+ {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2},
+ {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b},
+ {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1},
+ {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba},
+ {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62},
+ {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe},
+ {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988},
+ {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be},
+ {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546},
+ {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc},
+ {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69},
+ {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a},
+ {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2},
+ {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd},
+ {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb},
+ {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b},
+ {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76},
+ {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339},
+ {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9},
+ {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548},
+ {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de},
+ {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59},
+ {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b},
+ {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73},
+ {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11},
+ {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c},
+ {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b},
+ {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb},
+ {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc},
+ {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196},
+ {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a},
+ {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de},
+ {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9},
+ {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0},
+ {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60},
+ {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6},
+ {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c},
+ {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73},
+ {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7},
+ {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf},
+ {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83},
+ {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867},
+ {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211},
+ {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2},
+ {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874},
+ {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f},
+ {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff},
+ {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95},
+ {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd},
+ {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06},
+ {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784},
+ {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616},
+ {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c},
+ {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c},
+ {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d},
+ {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d},
+ {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272},
+ {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb},
+ {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b},
+ {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e},
+ {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23},
+ {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672},
+ {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86},
+ {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd},
+ {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48},
+};
+
+#include <linux/time.h>
+
+static int __init crc32c_test(void)
+{
+ int i;
+ int errors = 0;
+ int bytes = 0;
+ u64 nsec;
+ unsigned long flags;
+
+ /* keep static to prevent cache warming code from
+ * getting eliminated by the compiler */
+ static u32 crc;
+
+ /* pre-warm the cache */
+ for (i = 0; i < 100; i++) {
+ bytes += 2*test[i].length;
+
+ crc ^= __crc32c_le(test[i].crc, test_buf +
+ test[i].start, test[i].length);
+ }
+
+ /* reduce OS noise */
+ local_irq_save(flags);
+ local_irq_disable();
+
+ nsec = ktime_get_ns();
+ for (i = 0; i < 100; i++) {
+ if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
+ test[i].start, test[i].length))
+ errors++;
+ }
+ nsec = ktime_get_ns() - nsec;
+
+ local_irq_restore(flags);
+ local_irq_enable();
+
+ pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
+
+ if (errors)
+ pr_warn("crc32c: %d self tests failed\n", errors);
+ else {
+ pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n",
+ bytes, nsec);
+ }
+
+ return 0;
+}
+
+static int __init crc32c_combine_test(void)
+{
+ int i, j;
+ int errors = 0, runs = 0;
+
+ for (i = 0; i < 10; i++) {
+ u32 crc_full;
+
+ crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start,
+ test[i].length);
+ for (j = 0; j <= test[i].length; ++j) {
+ u32 crc1, crc2;
+ u32 len1 = j, len2 = test[i].length - j;
+
+ crc1 = __crc32c_le(test[i].crc, test_buf +
+ test[i].start, len1);
+ crc2 = __crc32c_le(0, test_buf + test[i].start +
+ len1, len2);
+
+ if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) &&
+ crc_full == test[i].crc32c_le))
+ errors++;
+ runs++;
+ cond_resched();
+ }
+ }
+
+ if (errors)
+ pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs);
+ else
+ pr_info("crc32c_combine: %d self tests passed\n", runs);
+
+ return 0;
+}
+
+static int __init crc32_test(void)
+{
+ int i;
+ int errors = 0;
+ int bytes = 0;
+ u64 nsec;
+ unsigned long flags;
+
+ /* keep static to prevent cache warming code from
+ * getting eliminated by the compiler */
+ static u32 crc;
+
+ /* pre-warm the cache */
+ for (i = 0; i < 100; i++) {
+ bytes += 2*test[i].length;
+
+ crc ^= crc32_le(test[i].crc, test_buf +
+ test[i].start, test[i].length);
+
+ crc ^= crc32_be(test[i].crc, test_buf +
+ test[i].start, test[i].length);
+ }
+
+ /* reduce OS noise */
+ local_irq_save(flags);
+ local_irq_disable();
+
+ nsec = ktime_get_ns();
+ for (i = 0; i < 100; i++) {
+ if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
+ test[i].start, test[i].length))
+ errors++;
+
+ if (test[i].crc_be != crc32_be(test[i].crc, test_buf +
+ test[i].start, test[i].length))
+ errors++;
+ }
+ nsec = ktime_get_ns() - nsec;
+
+ local_irq_restore(flags);
+ local_irq_enable();
+
+ pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
+ CRC_LE_BITS, CRC_BE_BITS);
+
+ if (errors)
+ pr_warn("crc32: %d self tests failed\n", errors);
+ else {
+ pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n",
+ bytes, nsec);
+ }
+
+ return 0;
+}
+
+static int __init crc32_combine_test(void)
+{
+ int i, j;
+ int errors = 0, runs = 0;
+
+ for (i = 0; i < 10; i++) {
+ u32 crc_full;
+
+ crc_full = crc32_le(test[i].crc, test_buf + test[i].start,
+ test[i].length);
+ for (j = 0; j <= test[i].length; ++j) {
+ u32 crc1, crc2;
+ u32 len1 = j, len2 = test[i].length - j;
+
+ crc1 = crc32_le(test[i].crc, test_buf +
+ test[i].start, len1);
+ crc2 = crc32_le(0, test_buf + test[i].start +
+ len1, len2);
+
+ if (!(crc_full == crc32_le_combine(crc1, crc2, len2) &&
+ crc_full == test[i].crc_le))
+ errors++;
+ runs++;
+ cond_resched();
+ }
+ }
+
+ if (errors)
+ pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs);
+ else
+ pr_info("crc32_combine: %d self tests passed\n", runs);
+
+ return 0;
+}
+
+static int __init crc32test_init(void)
+{
+ crc32_test();
+ crc32c_test();
+
+ crc32_combine_test();
+ crc32c_combine_test();
+
+ return 0;
+}
+
+static void __exit crc32_exit(void)
+{
+}
+
+module_init(crc32test_init);
+module_exit(crc32_exit);
+
+MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
+MODULE_DESCRIPTION("CRC32 selftest");
+MODULE_LICENSE("GPL");
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc882cd72..1b0baf3008ea 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
error("NULL input pointer and missing fill function");
goto exit_1;
} else {
- inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+ inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
if (!inp) {
error("Could not allocate input buffer");
goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
inp += 4;
size -= 4;
} else {
- if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+ if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
error("chunk length is longer than allocated");
goto exit_2;
}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
out_len -= dest_len;
} else
dest_len = out_len;
- ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+ ret = LZ4_decompress_fast(inp, outp, dest_len);
+ chunksize = ret;
#else
dest_len = uncomp_chunksize;
- ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
- &dest_len);
+
+ ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+ dest_len = ret;
#endif
if (ret < 0) {
error("Decoding failed");
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 8971370bfb16..60c57ec936db 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1155,6 +1155,11 @@ static void check_unmap(struct dma_debug_entry *ref)
dir2name[ref->direction]);
}
+ /*
+ * Drivers should use dma_mapping_error() to check the returned
+ * addresses of dma_map_single() and dma_map_page().
+ * If not, print this warning message. See Documentation/DMA-API.txt.
+ */
if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
err_printk(ref->dev, entry,
"DMA-API: device driver failed to check map error"
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 18072ea9c20e..6ed74f78380c 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -33,7 +33,7 @@ static unsigned long _find_next_bit(const unsigned long *addr,
{
unsigned long tmp;
- if (!nbits || start >= nbits)
+ if (unlikely(start >= nbits))
return nbits;
tmp = addr[start / BITS_PER_LONG] ^ invert;
@@ -151,7 +151,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
{
unsigned long tmp;
- if (!nbits || start >= nbits)
+ if (unlikely(start >= nbits))
return nbits;
tmp = addr[start / BITS_PER_LONG] ^ invert;
diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig
index e77dfe00de36..8fa0791e8a1e 100644
--- a/lib/fonts/Kconfig
+++ b/lib/fonts/Kconfig
@@ -87,6 +87,14 @@ config FONT_6x10
embedded devices with a 320x240 screen, to get a reasonable number
of characters (53x24) that are still at a readable size.
+config FONT_10x18
+ bool "console 10x18 font (not supported by all drivers)" if FONTS
+ depends on FRAMEBUFFER_CONSOLE
+ help
+ This is a high resolution console font for machines with very
+ big letters. It fits between the sun 12x22 and the normal 8x16 font.
+ If other fonts are too big or too small for you, say Y, otherwise say N.
+
config FONT_SUN8x16
bool "Sparc console 8x16 font"
depends on FRAMEBUFFER_CONSOLE && (!SPARC && FONTS || SPARC)
@@ -101,14 +109,6 @@ config FONT_SUN12x22
big letters (like the letters used in the SPARC PROM). If the
standard font is unreadable for you, say Y, otherwise say N.
-config FONT_10x18
- bool "console 10x18 font (not supported by all drivers)" if FONTS
- depends on FRAMEBUFFER_CONSOLE
- help
- This is a high resolution console font for machines with very
- big letters. It fits between the sun 12x22 and the normal 8x16 font.
- If other fonts are too big or too small for you, say Y, otherwise say N.
-
config FONT_AUTOSELECT
def_bool y
depends on !FONT_8x8
diff --git a/lib/glob.c b/lib/glob.c
index 500fc80d23e1..0ba3ea86b546 100644
--- a/lib/glob.c
+++ b/lib/glob.c
@@ -121,167 +121,3 @@ backtrack:
}
}
EXPORT_SYMBOL(glob_match);
-
-
-#ifdef CONFIG_GLOB_SELFTEST
-
-#include <linux/printk.h>
-#include <linux/moduleparam.h>
-
-/* Boot with "glob.verbose=1" to show successful tests, too */
-static bool verbose = false;
-module_param(verbose, bool, 0);
-
-struct glob_test {
- char const *pat, *str;
- bool expected;
-};
-
-static bool __pure __init test(char const *pat, char const *str, bool expected)
-{
- bool match = glob_match(pat, str);
- bool success = match == expected;
-
- /* Can't get string literals into a particular section, so... */
- static char const msg_error[] __initconst =
- KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
- static char const msg_ok[] __initconst =
- KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
- static char const mismatch[] __initconst = "mismatch";
- char const *message;
-
- if (!success)
- message = msg_error;
- else if (verbose)
- message = msg_ok;
- else
- return success;
-
- printk(message, pat, str, mismatch + 3*match);
- return success;
-}
-
-/*
- * The tests are all jammed together in one array to make it simpler
- * to place that array in the .init.rodata section. The obvious
- * "array of structures containing char *" has no way to force the
- * pointed-to strings to be in a particular section.
- *
- * Anyway, a test consists of:
- * 1. Expected glob_match result: '1' or '0'.
- * 2. Pattern to match: null-terminated string
- * 3. String to match against: null-terminated string
- *
- * The list of tests is terminated with a final '\0' instead of
- * a glob_match result character.
- */
-static char const glob_tests[] __initconst =
- /* Some basic tests */
- "1" "a\0" "a\0"
- "0" "a\0" "b\0"
- "0" "a\0" "aa\0"
- "0" "a\0" "\0"
- "1" "\0" "\0"
- "0" "\0" "a\0"
- /* Simple character class tests */
- "1" "[a]\0" "a\0"
- "0" "[a]\0" "b\0"
- "0" "[!a]\0" "a\0"
- "1" "[!a]\0" "b\0"
- "1" "[ab]\0" "a\0"
- "1" "[ab]\0" "b\0"
- "0" "[ab]\0" "c\0"
- "1" "[!ab]\0" "c\0"
- "1" "[a-c]\0" "b\0"
- "0" "[a-c]\0" "d\0"
- /* Corner cases in character class parsing */
- "1" "[a-c-e-g]\0" "-\0"
- "0" "[a-c-e-g]\0" "d\0"
- "1" "[a-c-e-g]\0" "f\0"
- "1" "[]a-ceg-ik[]\0" "a\0"
- "1" "[]a-ceg-ik[]\0" "]\0"
- "1" "[]a-ceg-ik[]\0" "[\0"
- "1" "[]a-ceg-ik[]\0" "h\0"
- "0" "[]a-ceg-ik[]\0" "f\0"
- "0" "[!]a-ceg-ik[]\0" "h\0"
- "0" "[!]a-ceg-ik[]\0" "]\0"
- "1" "[!]a-ceg-ik[]\0" "f\0"
- /* Simple wild cards */
- "1" "?\0" "a\0"
- "0" "?\0" "aa\0"
- "0" "??\0" "a\0"
- "1" "?x?\0" "axb\0"
- "0" "?x?\0" "abx\0"
- "0" "?x?\0" "xab\0"
- /* Asterisk wild cards (backtracking) */
- "0" "*??\0" "a\0"
- "1" "*??\0" "ab\0"
- "1" "*??\0" "abc\0"
- "1" "*??\0" "abcd\0"
- "0" "??*\0" "a\0"
- "1" "??*\0" "ab\0"
- "1" "??*\0" "abc\0"
- "1" "??*\0" "abcd\0"
- "0" "?*?\0" "a\0"
- "1" "?*?\0" "ab\0"
- "1" "?*?\0" "abc\0"
- "1" "?*?\0" "abcd\0"
- "1" "*b\0" "b\0"
- "1" "*b\0" "ab\0"
- "0" "*b\0" "ba\0"
- "1" "*b\0" "bb\0"
- "1" "*b\0" "abb\0"
- "1" "*b\0" "bab\0"
- "1" "*bc\0" "abbc\0"
- "1" "*bc\0" "bc\0"
- "1" "*bc\0" "bbc\0"
- "1" "*bc\0" "bcbc\0"
- /* Multiple asterisks (complex backtracking) */
- "1" "*ac*\0" "abacadaeafag\0"
- "1" "*ac*ae*ag*\0" "abacadaeafag\0"
- "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
- "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
- "1" "*abcd*\0" "abcabcabcabcdefg\0"
- "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
- "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
- "0" "*abcd*\0" "abcabcabcabcefg\0"
- "0" "*ab*cd*\0" "abcabcabcabcefg\0";
-
-static int __init glob_init(void)
-{
- unsigned successes = 0;
- unsigned n = 0;
- char const *p = glob_tests;
- static char const message[] __initconst =
- KERN_INFO "glob: %u self-tests passed, %u failed\n";
-
- /*
- * Tests are jammed together in a string. The first byte is '1'
- * or '0' to indicate the expected outcome, or '\0' to indicate the
- * end of the tests. Then come two null-terminated strings: the
- * pattern and the string to match it against.
- */
- while (*p) {
- bool expected = *p++ & 1;
- char const *pat = p;
-
- p += strlen(p) + 1;
- successes += test(pat, p, expected);
- p += strlen(p) + 1;
- n++;
- }
-
- n -= successes;
- printk(message, successes, n);
-
- /* What's the errno for "kernel bug detected"? Guess... */
- return n ? -ECANCELED : 0;
-}
-
-/* We need a dummy exit function to allow unload */
-static void __exit glob_fini(void) { }
-
-module_init(glob_init);
-module_exit(glob_fini);
-
-#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/globtest.c b/lib/globtest.c
new file mode 100644
index 000000000000..d8e97d43b905
--- /dev/null
+++ b/lib/globtest.c
@@ -0,0 +1,167 @@
+/*
+ * Extracted fronm glob.c
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/glob.h>
+#include <linux/printk.h>
+
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+
+struct glob_test {
+ char const *pat, *str;
+ bool expected;
+};
+
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+ bool match = glob_match(pat, str);
+ bool success = match == expected;
+
+ /* Can't get string literals into a particular section, so... */
+ static char const msg_error[] __initconst =
+ KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+ static char const msg_ok[] __initconst =
+ KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+ static char const mismatch[] __initconst = "mismatch";
+ char const *message;
+
+ if (!success)
+ message = msg_error;
+ else if (verbose)
+ message = msg_ok;
+ else
+ return success;
+
+ printk(message, pat, str, mismatch + 3*match);
+ return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section. The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+ /* Some basic tests */
+ "1" "a\0" "a\0"
+ "0" "a\0" "b\0"
+ "0" "a\0" "aa\0"
+ "0" "a\0" "\0"
+ "1" "\0" "\0"
+ "0" "\0" "a\0"
+ /* Simple character class tests */
+ "1" "[a]\0" "a\0"
+ "0" "[a]\0" "b\0"
+ "0" "[!a]\0" "a\0"
+ "1" "[!a]\0" "b\0"
+ "1" "[ab]\0" "a\0"
+ "1" "[ab]\0" "b\0"
+ "0" "[ab]\0" "c\0"
+ "1" "[!ab]\0" "c\0"
+ "1" "[a-c]\0" "b\0"
+ "0" "[a-c]\0" "d\0"
+ /* Corner cases in character class parsing */
+ "1" "[a-c-e-g]\0" "-\0"
+ "0" "[a-c-e-g]\0" "d\0"
+ "1" "[a-c-e-g]\0" "f\0"
+ "1" "[]a-ceg-ik[]\0" "a\0"
+ "1" "[]a-ceg-ik[]\0" "]\0"
+ "1" "[]a-ceg-ik[]\0" "[\0"
+ "1" "[]a-ceg-ik[]\0" "h\0"
+ "0" "[]a-ceg-ik[]\0" "f\0"
+ "0" "[!]a-ceg-ik[]\0" "h\0"
+ "0" "[!]a-ceg-ik[]\0" "]\0"
+ "1" "[!]a-ceg-ik[]\0" "f\0"
+ /* Simple wild cards */
+ "1" "?\0" "a\0"
+ "0" "?\0" "aa\0"
+ "0" "??\0" "a\0"
+ "1" "?x?\0" "axb\0"
+ "0" "?x?\0" "abx\0"
+ "0" "?x?\0" "xab\0"
+ /* Asterisk wild cards (backtracking) */
+ "0" "*??\0" "a\0"
+ "1" "*??\0" "ab\0"
+ "1" "*??\0" "abc\0"
+ "1" "*??\0" "abcd\0"
+ "0" "??*\0" "a\0"
+ "1" "??*\0" "ab\0"
+ "1" "??*\0" "abc\0"
+ "1" "??*\0" "abcd\0"
+ "0" "?*?\0" "a\0"
+ "1" "?*?\0" "ab\0"
+ "1" "?*?\0" "abc\0"
+ "1" "?*?\0" "abcd\0"
+ "1" "*b\0" "b\0"
+ "1" "*b\0" "ab\0"
+ "0" "*b\0" "ba\0"
+ "1" "*b\0" "bb\0"
+ "1" "*b\0" "abb\0"
+ "1" "*b\0" "bab\0"
+ "1" "*bc\0" "abbc\0"
+ "1" "*bc\0" "bc\0"
+ "1" "*bc\0" "bbc\0"
+ "1" "*bc\0" "bcbc\0"
+ /* Multiple asterisks (complex backtracking) */
+ "1" "*ac*\0" "abacadaeafag\0"
+ "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+ "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+ "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+ "1" "*abcd*\0" "abcabcabcabcdefg\0"
+ "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+ "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+ "0" "*abcd*\0" "abcabcabcabcefg\0"
+ "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+static int __init glob_init(void)
+{
+ unsigned successes = 0;
+ unsigned n = 0;
+ char const *p = glob_tests;
+ static char const message[] __initconst =
+ KERN_INFO "glob: %u self-tests passed, %u failed\n";
+
+ /*
+ * Tests are jammed together in a string. The first byte is '1'
+ * or '0' to indicate the expected outcome, or '\0' to indicate the
+ * end of the tests. Then come two null-terminated strings: the
+ * pattern and the string to match it against.
+ */
+ while (*p) {
+ bool expected = *p++ & 1;
+ char const *pat = p;
+
+ p += strlen(p) + 1;
+ successes += test(pat, p, expected);
+ p += strlen(p) + 1;
+ n++;
+ }
+
+ n -= successes;
+ printk(message, successes, n);
+
+ /* What's the errno for "kernel bug detected"? Guess... */
+ return n ? -ECANCELED : 0;
+}
+
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+
+module_init(glob_init);
+module_exit(glob_fini);
+
+MODULE_DESCRIPTION("glob(7) matching tests");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 7f7bfa55eb6d..a34db8d27667 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,15 +20,16 @@
bool __list_add_valid(struct list_head *new, struct list_head *prev,
struct list_head *next)
{
- CHECK_DATA_CORRUPTION(next->prev != prev,
- "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
- prev, next->prev, next);
- CHECK_DATA_CORRUPTION(prev->next != next,
- "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
- next, prev->next, prev);
- CHECK_DATA_CORRUPTION(new == prev || new == next,
- "list_add double add: new=%p, prev=%p, next=%p.\n",
- new, prev, next);
+ if (CHECK_DATA_CORRUPTION(next->prev != prev,
+ "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+ prev, next->prev, next) ||
+ CHECK_DATA_CORRUPTION(prev->next != next,
+ "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+ next, prev->next, prev) ||
+ CHECK_DATA_CORRUPTION(new == prev || new == next,
+ "list_add double add: new=%p, prev=%p, next=%p.\n",
+ new, prev, next))
+ return false;
return true;
}
@@ -41,18 +42,20 @@ bool __list_del_entry_valid(struct list_head *entry)
prev = entry->prev;
next = entry->next;
- CHECK_DATA_CORRUPTION(next == LIST_POISON1,
- "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
- entry, LIST_POISON1);
- CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
- "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
- entry, LIST_POISON2);
- CHECK_DATA_CORRUPTION(prev->next != entry,
- "list_del corruption. prev->next should be %p, but was %p\n",
- entry, prev->next);
- CHECK_DATA_CORRUPTION(next->prev != entry,
- "list_del corruption. next->prev should be %p, but was %p\n",
- entry, next->prev);
+ if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
+ "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+ entry, LIST_POISON1) ||
+ CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
+ "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+ entry, LIST_POISON2) ||
+ CHECK_DATA_CORRUPTION(prev->next != entry,
+ "list_del corruption. prev->next should be %p, but was %p\n",
+ entry, prev->next) ||
+ CHECK_DATA_CORRUPTION(next->prev != entry,
+ "list_del corruption. next->prev should be %p, but was %p\n",
+ entry, next->prev))
+ return false;
+
return true;
}
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8f75ef..697dbda34408 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,19 +1,16 @@
/*
* LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
- *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,419 +22,866 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
* You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
*
- * Changed for kernel use by:
- * Chanho Min <chanho.min@lge.com>
+ * Changed for kernel usage by:
+ * Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
*/
+/*-************************************
+ * Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
#include <linux/module.h>
#include <linux/kernel.h>
-#include <linux/lz4.h>
#include <asm/unaligned.h>
-#include "lz4defs.h"
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'. * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
- const char *source,
- char *dest,
- int isize,
- int maxoutputsize)
+/*-******************************
+ * Compression functions
+ ********************************/
+static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
{
- HTYPE *hashtable = (HTYPE *)ctx;
- const u8 *ip = (u8 *)source;
+ if (tableType == byU16)
+ return ((sequence * 2654435761U)
+ >> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+ else
+ return ((sequence * 2654435761U)
+ >> ((MINMATCH*8) - LZ4_HASHLOG));
+}
+
#if LZ4_ARCH64
- const BYTE * const base = ip;
+static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+ const U32 hashLog = (tableType == byU16)
+ ? LZ4_HASHLOG + 1
+ : LZ4_HASHLOG;
+
+#ifdef __LITTLE_ENDIAN__
+ static const U64 prime5bytes = 889523592379ULL;
+
+ return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
#else
- const int base = 0;
+ static const U64 prime8bytes = 11400714785074694791ULL;
+
+ return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
#endif
- const u8 *anchor = ip;
- const u8 *const iend = ip + isize;
- const u8 *const mflimit = iend - MFLIMIT;
- #define MATCHLIMIT (iend - LASTLITERALS)
-
- u8 *op = (u8 *) dest;
- u8 *const oend = op + maxoutputsize;
- int length;
- const int skipstrength = SKIPSTRENGTH;
- u32 forwardh;
- int lastrun;
-
- /* Init */
- if (isize < MINLENGTH)
- goto _last_literals;
+}
+#endif
+
+static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+{
+#if LZ4_ARCH64
+ if (tableType == byU32)
+ return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+ return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
+ tableType_t const tableType, const BYTE *srcBase)
+{
+ switch (tableType) {
+ case byPtr:
+ {
+ const BYTE **hashTable = (const BYTE **)tableBase;
+
+ hashTable[h] = p;
+ return;
+ }
+ case byU32:
+ {
+ U32 *hashTable = (U32 *) tableBase;
+
+ hashTable[h] = (U32)(p - srcBase);
+ return;
+ }
+ case byU16:
+ {
+ U16 *hashTable = (U16 *) tableBase;
+
+ hashTable[h] = (U16)(p - srcBase);
+ return;
+ }
+ }
+}
+
+static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
+ tableType_t tableType, const BYTE *srcBase)
+{
+ U32 const h = LZ4_hashPosition(p, tableType);
+
+ LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
+ tableType_t tableType, const BYTE *srcBase)
+{
+ if (tableType == byPtr) {
+ const BYTE **hashTable = (const BYTE **) tableBase;
+
+ return hashTable[h];
+ }
+
+ if (tableType == byU32) {
+ const U32 * const hashTable = (U32 *) tableBase;
+
+ return hashTable[h] + srcBase;
+ }
+
+ {
+ /* default, to ensure a return */
+ const U16 * const hashTable = (U16 *) tableBase;
+ return hashTable[h] + srcBase;
+ }
+}
+
+static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
+ tableType_t tableType, const BYTE *srcBase)
+{
+ U32 const h = LZ4_hashPosition(p, tableType);
+
+ return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
- memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static inline int LZ4_compress_generic(
+ LZ4_stream_t_internal * const dictPtr,
+ const char * const source,
+ char * const dest,
+ const int inputSize,
+ const int maxOutputSize,
+ const limitedOutput_directive outputLimited,
+ const tableType_t tableType,
+ const dict_directive dict,
+ const dictIssue_directive dictIssue,
+ const U32 acceleration)
+{
+ const BYTE *ip = (const BYTE *) source;
+ const BYTE *base;
+ const BYTE *lowLimit;
+ const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+ const BYTE * const dictionary = dictPtr->dictionary;
+ const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+ const size_t dictDelta = dictEnd - (const BYTE *)source;
+ const BYTE *anchor = (const BYTE *) source;
+ const BYTE * const iend = ip + inputSize;
+ const BYTE * const mflimit = iend - MFLIMIT;
+ const BYTE * const matchlimit = iend - LASTLITERALS;
+
+ BYTE *op = (BYTE *) dest;
+ BYTE * const olimit = op + maxOutputSize;
+
+ U32 forwardH;
+ size_t refDelta = 0;
+
+ /* Init conditions */
+ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+ /* Unsupported inputSize, too large (or negative) */
+ return 0;
+ }
+ switch (dict) {
+ case noDict:
+ default:
+ base = (const BYTE *)source;
+ lowLimit = (const BYTE *)source;
+ break;
+ case withPrefix64k:
+ base = (const BYTE *)source - dictPtr->currentOffset;
+ lowLimit = (const BYTE *)source - dictPtr->dictSize;
+ break;
+ case usingExtDict:
+ base = (const BYTE *)source - dictPtr->currentOffset;
+ lowLimit = (const BYTE *)source;
+ break;
+ }
+
+ if ((tableType == byU16)
+ && (inputSize >= LZ4_64Klimit)) {
+ /* Size too large (not within 64K limit) */
+ return 0;
+ }
+
+ if (inputSize < LZ4_minLength) {
+ /* Input too small, no compression (all literals) */
+ goto _last_literals;
+ }
/* First Byte */
- hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
- ip++;
- forwardh = LZ4_HASH_VALUE(ip);
+ LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+ ip++; forwardH = LZ4_hashPosition(ip, tableType);
/* Main Loop */
- for (;;) {
- int findmatchattempts = (1U << skipstrength) + 3;
- const u8 *forwardip = ip;
- const u8 *ref;
- u8 *token;
+ for ( ; ; ) {
+ const BYTE *match;
+ BYTE *token;
/* Find a match */
- do {
- u32 h = forwardh;
- int step = findmatchattempts++ >> skipstrength;
- ip = forwardip;
- forwardip = ip + step;
-
- if (unlikely(forwardip > mflimit))
- goto _last_literals;
-
- forwardh = LZ4_HASH_VALUE(forwardip);
- ref = base + hashtable[h];
- hashtable[h] = ip - base;
- } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+ {
+ const BYTE *forwardIp = ip;
+ unsigned int step = 1;
+ unsigned int searchMatchNb = acceleration
+ << LZ4_skipTrigger;
+
+ do {
+ U32 const h = forwardH;
+
+ ip = forwardIp;
+ forwardIp += step;
+ step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+ if (unlikely(forwardIp > mflimit))
+ goto _last_literals;
+
+ match = LZ4_getPositionOnHash(h,
+ dictPtr->hashTable,
+ tableType, base);
+ if (dict == usingExtDict) {
+ if (match < (const BYTE *)source) {
+ refDelta = dictDelta;
+ lowLimit = dictionary;
+ } else {
+ refDelta = 0;
+ lowLimit = (const BYTE *)source;
+ } }
+ forwardH = LZ4_hashPosition(forwardIp,
+ tableType);
+ LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+ tableType, base);
+
+ } while (((dictIssue == dictSmall)
+ ? (match < lowRefLimit)
+ : 0)
+ || ((tableType == byU16)
+ ? 0
+ : (match + MAX_DISTANCE < ip))
+ || (LZ4_read32(match + refDelta)
+ != LZ4_read32(ip)));
+ }
/* Catch up */
- while ((ip > anchor) && (ref > (u8 *)source) &&
- unlikely(ip[-1] == ref[-1])) {
+ while (((ip > anchor) & (match + refDelta > lowLimit))
+ && (unlikely(ip[-1] == match[refDelta - 1]))) {
ip--;
- ref--;
- }
+ match--;
+ }
- /* Encode Literal length */
- length = (int)(ip - anchor);
- token = op++;
- /* check output limit */
- if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
- (length >> 8) > oend))
- return 0;
+ /* Encode Literals */
+ {
+ unsigned const int litLength = (unsigned int)(ip - anchor);
- if (length >= (int)RUN_MASK) {
- int len;
- *token = (RUN_MASK << ML_BITS);
- len = length - RUN_MASK;
- for (; len > 254 ; len -= 255)
- *op++ = 255;
- *op++ = (u8)len;
- } else
- *token = (length << ML_BITS);
+ token = op++;
+ if ((outputLimited) &&
+ /* Check output buffer overflow */
+ (unlikely(op + litLength +
+ (2 + 1 + LASTLITERALS) +
+ (litLength/255) > olimit)))
+ return 0;
+ if (litLength >= RUN_MASK) {
+ int len = (int)litLength - RUN_MASK;
+
+ *token = (RUN_MASK<<ML_BITS);
+ for (; len >= 255 ; len -= 255)
+ *op++ = 255;
+ *op++ = (BYTE)len;
+ } else
+ *token = (BYTE)(litLength<<ML_BITS);
+
+ /* Copy Literals */
+ LZ4_wildCopy(op, anchor, op + litLength);
+ op += litLength;
+ }
- /* Copy Literals */
- LZ4_BLINDCOPY(anchor, op, length);
_next_match:
/* Encode Offset */
- LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+ LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
- /* Start Counting */
- ip += MINMATCH;
- /* MinMatch verified */
- ref += MINMATCH;
- anchor = ip;
- while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
- #if LZ4_ARCH64
- u64 diff = A64(ref) ^ A64(ip);
- #else
- u32 diff = A32(ref) ^ A32(ip);
- #endif
- if (!diff) {
- ip += STEPSIZE;
- ref += STEPSIZE;
- continue;
- }
- ip += LZ4_NBCOMMONBYTES(diff);
- goto _endcount;
- }
- #if LZ4_ARCH64
- if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
- ip += 4;
- ref += 4;
- }
- #endif
- if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
- ip += 2;
- ref += 2;
- }
- if ((ip < MATCHLIMIT) && (*ref == *ip))
- ip++;
-_endcount:
/* Encode MatchLength */
- length = (int)(ip - anchor);
- /* Check output limit */
- if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
- return 0;
- if (length >= (int)ML_MASK) {
- *token += ML_MASK;
- length -= ML_MASK;
- for (; length > 509 ; length -= 510) {
- *op++ = 255;
- *op++ = 255;
- }
- if (length > 254) {
- length -= 255;
- *op++ = 255;
+ {
+ unsigned int matchCode;
+
+ if ((dict == usingExtDict)
+ && (lowLimit == dictionary)) {
+ const BYTE *limit;
+
+ match += refDelta;
+ limit = ip + (dictEnd - match);
+ if (limit > matchlimit)
+ limit = matchlimit;
+ matchCode = LZ4_count(ip + MINMATCH,
+ match + MINMATCH, limit);
+ ip += MINMATCH + matchCode;
+ if (ip == limit) {
+ unsigned const int more = LZ4_count(ip,
+ (const BYTE *)source,
+ matchlimit);
+
+ matchCode += more;
+ ip += more;
+ }
+ } else {
+ matchCode = LZ4_count(ip + MINMATCH,
+ match + MINMATCH, matchlimit);
+ ip += MINMATCH + matchCode;
}
- *op++ = (u8)length;
- } else
- *token += length;
+
+ if (outputLimited &&
+ /* Check output buffer overflow */
+ (unlikely(op +
+ (1 + LASTLITERALS) +
+ (matchCode>>8) > olimit)))
+ return 0;
+ if (matchCode >= ML_MASK) {
+ *token += ML_MASK;
+ matchCode -= ML_MASK;
+ LZ4_write32(op, 0xFFFFFFFF);
+ while (matchCode >= 4*255) {
+ op += 4;
+ LZ4_write32(op, 0xFFFFFFFF);
+ matchCode -= 4*255;
+ }
+ op += matchCode / 255;
+ *op++ = (BYTE)(matchCode % 255);
+ } else
+ *token += (BYTE)(matchCode);
+ }
+
+ anchor = ip;
/* Test end of chunk */
- if (ip > mflimit) {
- anchor = ip;
+ if (ip > mflimit)
break;
- }
/* Fill table */
- hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+ LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
/* Test next position */
- ref = base + hashtable[LZ4_HASH_VALUE(ip)];
- hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
- if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+ match = LZ4_getPosition(ip, dictPtr->hashTable,
+ tableType, base);
+ if (dict == usingExtDict) {
+ if (match < (const BYTE *)source) {
+ refDelta = dictDelta;
+ lowLimit = dictionary;
+ } else {
+ refDelta = 0;
+ lowLimit = (const BYTE *)source;
+ }
+ }
+ LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+ if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+ && (match + MAX_DISTANCE >= ip)
+ && (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
token = op++;
*token = 0;
goto _next_match;
}
/* Prepare next loop */
- anchor = ip++;
- forwardh = LZ4_HASH_VALUE(ip);
+ forwardH = LZ4_hashPosition(++ip, tableType);
}
_last_literals:
/* Encode Last Literals */
- lastrun = (int)(iend - anchor);
- if (((char *)op - dest) + lastrun + 1
- + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
- return 0;
+ {
+ size_t const lastRun = (size_t)(iend - anchor);
- if (lastrun >= (int)RUN_MASK) {
- *op++ = (RUN_MASK << ML_BITS);
- lastrun -= RUN_MASK;
- for (; lastrun > 254 ; lastrun -= 255)
- *op++ = 255;
- *op++ = (u8)lastrun;
- } else
- *op++ = (lastrun << ML_BITS);
- memcpy(op, anchor, iend - anchor);
- op += iend - anchor;
+ if ((outputLimited) &&
+ /* Check output buffer overflow */
+ ((op - (BYTE *)dest) + lastRun + 1 +
+ ((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+ return 0;
+ if (lastRun >= RUN_MASK) {
+ size_t accumulator = lastRun - RUN_MASK;
+ *op++ = RUN_MASK << ML_BITS;
+ for (; accumulator >= 255 ; accumulator -= 255)
+ *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRun<<ML_BITS);
+ }
+ memcpy(op, anchor, lastRun);
+ op += lastRun;
+ }
/* End */
- return (int)(((char *)op) - dest);
+ return (int) (((char *)op) - dest);
}
-static inline int lz4_compress64kctx(void *ctx,
- const char *source,
- char *dest,
- int isize,
- int maxoutputsize)
+static int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
+ int inputSize, int maxOutputSize, int acceleration)
{
- u16 *hashtable = (u16 *)ctx;
- const u8 *ip = (u8 *) source;
- const u8 *anchor = ip;
- const u8 *const base = ip;
- const u8 *const iend = ip + isize;
- const u8 *const mflimit = iend - MFLIMIT;
- #define MATCHLIMIT (iend - LASTLITERALS)
-
- u8 *op = (u8 *) dest;
- u8 *const oend = op + maxoutputsize;
- int len, length;
- const int skipstrength = SKIPSTRENGTH;
- u32 forwardh;
- int lastrun;
-
- /* Init */
- if (isize < MINLENGTH)
- goto _last_literals;
+ #if LZ4_ARCH64
+ tableType_t tableType = byU32;
+ #else
+ tableType_t tableType = byPtr;
+ #endif
+
+ LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+
+ LZ4_resetStream((LZ4_stream_t *)state);
+
+ if (acceleration < 1)
+ acceleration = LZ4_ACCELERATION_DEFAULT;
+
+ if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+ if (inputSize < LZ4_64Klimit)
+ return LZ4_compress_generic(ctx, source,
+ dest, inputSize, 0,
+ noLimit, byU16, noDict,
+ noDictIssue, acceleration);
+ else
+ return LZ4_compress_generic(ctx, source,
+ dest, inputSize, 0,
+ noLimit, tableType, noDict,
+ noDictIssue, acceleration);
+ } else {
+ if (inputSize < LZ4_64Klimit)
+ return LZ4_compress_generic(ctx, source,
+ dest, inputSize,
+ maxOutputSize, limitedOutput, byU16, noDict,
+ noDictIssue, acceleration);
+ else
+ return LZ4_compress_generic(ctx, source,
+ dest, inputSize,
+ maxOutputSize, limitedOutput, tableType, noDict,
+ noDictIssue, acceleration);
+ }
+}
- memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+ int maxOutputSize, int acceleration, void *wrkmem)
+{
+ return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+ maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+ int maxOutputSize, void *wrkmem)
+{
+ return LZ4_compress_fast(source, dest, inputSize,
+ maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ * *_destSize() variant
+ ********************************/
+
+static int LZ4_compress_destSize_generic(
+ LZ4_stream_t_internal * const ctx,
+ const char * const src,
+ char * const dst,
+ int * const srcSizePtr,
+ const int targetDstSize,
+ const tableType_t tableType)
+{
+ const BYTE *ip = (const BYTE *) src;
+ const BYTE *base = (const BYTE *) src;
+ const BYTE *lowLimit = (const BYTE *) src;
+ const BYTE *anchor = ip;
+ const BYTE * const iend = ip + *srcSizePtr;
+ const BYTE * const mflimit = iend - MFLIMIT;
+ const BYTE * const matchlimit = iend - LASTLITERALS;
+
+ BYTE *op = (BYTE *) dst;
+ BYTE * const oend = op + targetDstSize;
+ BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+ - 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+ BYTE * const oMaxMatch = op + targetDstSize
+ - (LASTLITERALS + 1 /* token */);
+ BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+ U32 forwardH;
+
+ /* Init conditions */
+ /* Impossible to store anything */
+ if (targetDstSize < 1)
+ return 0;
+ /* Unsupported input size, too large (or negative) */
+ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+ return 0;
+ /* Size too large (not within 64K limit) */
+ if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+ return 0;
+ /* Input too small, no compression (all literals) */
+ if (*srcSizePtr < LZ4_minLength)
+ goto _last_literals;
/* First Byte */
- ip++;
- forwardh = LZ4_HASH64K_VALUE(ip);
+ *srcSizePtr = 0;
+ LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+ ip++; forwardH = LZ4_hashPosition(ip, tableType);
/* Main Loop */
- for (;;) {
- int findmatchattempts = (1U << skipstrength) + 3;
- const u8 *forwardip = ip;
- const u8 *ref;
- u8 *token;
+ for ( ; ; ) {
+ const BYTE *match;
+ BYTE *token;
/* Find a match */
- do {
- u32 h = forwardh;
- int step = findmatchattempts++ >> skipstrength;
- ip = forwardip;
- forwardip = ip + step;
-
- if (forwardip > mflimit)
- goto _last_literals;
-
- forwardh = LZ4_HASH64K_VALUE(forwardip);
- ref = base + hashtable[h];
- hashtable[h] = (u16)(ip - base);
- } while (A32(ref) != A32(ip));
+ {
+ const BYTE *forwardIp = ip;
+ unsigned int step = 1;
+ unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+
+ do {
+ U32 h = forwardH;
+
+ ip = forwardIp;
+ forwardIp += step;
+ step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+ if (unlikely(forwardIp > mflimit))
+ goto _last_literals;
+
+ match = LZ4_getPositionOnHash(h, ctx->hashTable,
+ tableType, base);
+ forwardH = LZ4_hashPosition(forwardIp,
+ tableType);
+ LZ4_putPositionOnHash(ip, h,
+ ctx->hashTable, tableType,
+ base);
+
+ } while (((tableType == byU16)
+ ? 0
+ : (match + MAX_DISTANCE < ip))
+ || (LZ4_read32(match) != LZ4_read32(ip)));
+ }
/* Catch up */
- while ((ip > anchor) && (ref > (u8 *)source)
- && (ip[-1] == ref[-1])) {
- ip--;
- ref--;
- }
+ while ((ip > anchor)
+ && (match > lowLimit)
+ && (unlikely(ip[-1] == match[-1]))) {
+ ip--; match--;
+ }
/* Encode Literal length */
- length = (int)(ip - anchor);
- token = op++;
- /* Check output limit */
- if (unlikely(op + length + (2 + 1 + LASTLITERALS)
- + (length >> 8) > oend))
- return 0;
- if (length >= (int)RUN_MASK) {
- *token = (RUN_MASK << ML_BITS);
- len = length - RUN_MASK;
- for (; len > 254 ; len -= 255)
- *op++ = 255;
- *op++ = (u8)len;
- } else
- *token = (length << ML_BITS);
+ {
+ unsigned int litLength = (unsigned int)(ip - anchor);
- /* Copy Literals */
- LZ4_BLINDCOPY(anchor, op, length);
+ token = op++;
+ if (op + ((litLength + 240)/255)
+ + litLength > oMaxLit) {
+ /* Not enough space for a last match */
+ op--;
+ goto _last_literals;
+ }
+ if (litLength >= RUN_MASK) {
+ unsigned int len = litLength - RUN_MASK;
+ *token = (RUN_MASK<<ML_BITS);
+ for (; len >= 255 ; len -= 255)
+ *op++ = 255;
+ *op++ = (BYTE)len;
+ } else
+ *token = (BYTE)(litLength<<ML_BITS);
+
+ /* Copy Literals */
+ LZ4_wildCopy(op, anchor, op + litLength);
+ op += litLength;
+ }
_next_match:
/* Encode Offset */
- LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+ LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
- /* Start Counting */
- ip += MINMATCH;
- /* MinMatch verified */
- ref += MINMATCH;
- anchor = ip;
+ /* Encode MatchLength */
+ {
+ size_t matchLength = LZ4_count(ip + MINMATCH,
+ match + MINMATCH, matchlimit);
- while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
- #if LZ4_ARCH64
- u64 diff = A64(ref) ^ A64(ip);
- #else
- u32 diff = A32(ref) ^ A32(ip);
- #endif
-
- if (!diff) {
- ip += STEPSIZE;
- ref += STEPSIZE;
- continue;
+ if (op + ((matchLength + 240)/255) > oMaxMatch) {
+ /* Match description too long : reduce it */
+ matchLength = (15 - 1) + (oMaxMatch - op) * 255;
}
- ip += LZ4_NBCOMMONBYTES(diff);
- goto _endcount;
- }
- #if LZ4_ARCH64
- if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
- ip += 4;
- ref += 4;
+ ip += MINMATCH + matchLength;
+
+ if (matchLength >= ML_MASK) {
+ *token += ML_MASK;
+ matchLength -= ML_MASK;
+ while (matchLength >= 255) {
+ matchLength -= 255; *op++ = 255;
+ }
+ *op++ = (BYTE)matchLength;
+ } else
+ *token += (BYTE)(matchLength);
}
- #endif
- if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
- ip += 2;
- ref += 2;
- }
- if ((ip < MATCHLIMIT) && (*ref == *ip))
- ip++;
-_endcount:
- /* Encode MatchLength */
- len = (int)(ip - anchor);
- /* Check output limit */
- if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
- return 0;
- if (len >= (int)ML_MASK) {
- *token += ML_MASK;
- len -= ML_MASK;
- for (; len > 509 ; len -= 510) {
- *op++ = 255;
- *op++ = 255;
- }
- if (len > 254) {
- len -= 255;
- *op++ = 255;
- }
- *op++ = (u8)len;
- } else
- *token += len;
+ anchor = ip;
- /* Test end of chunk */
- if (ip > mflimit) {
- anchor = ip;
+ /* Test end of block */
+ if (ip > mflimit)
+ break;
+ if (op > oMaxSeq)
break;
- }
/* Fill table */
- hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+ LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
/* Test next position */
- ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
- hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
- if (A32(ref) == A32(ip)) {
- token = op++;
- *token = 0;
+ match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+ LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+ if ((match + MAX_DISTANCE >= ip)
+ && (LZ4_read32(match) == LZ4_read32(ip))) {
+ token = op++; *token = 0;
goto _next_match;
}
/* Prepare next loop */
- anchor = ip++;
- forwardh = LZ4_HASH64K_VALUE(ip);
+ forwardH = LZ4_hashPosition(++ip, tableType);
}
_last_literals:
/* Encode Last Literals */
- lastrun = (int)(iend - anchor);
- if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
- return 0;
- if (lastrun >= (int)RUN_MASK) {
- *op++ = (RUN_MASK << ML_BITS);
- lastrun -= RUN_MASK;
- for (; lastrun > 254 ; lastrun -= 255)
- *op++ = 255;
- *op++ = (u8)lastrun;
- } else
- *op++ = (lastrun << ML_BITS);
- memcpy(op, anchor, iend - anchor);
- op += iend - anchor;
+ {
+ size_t lastRunSize = (size_t)(iend - anchor);
+
+ if (op + 1 /* token */
+ + ((lastRunSize + 240)/255) /* litLength */
+ + lastRunSize /* literals */ > oend) {
+ /* adapt lastRunSize to fill 'dst' */
+ lastRunSize = (oend - op) - 1;
+ lastRunSize -= (lastRunSize + 240)/255;
+ }
+ ip = anchor + lastRunSize;
+
+ if (lastRunSize >= RUN_MASK) {
+ size_t accumulator = lastRunSize - RUN_MASK;
+
+ *op++ = RUN_MASK << ML_BITS;
+ for (; accumulator >= 255 ; accumulator -= 255)
+ *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRunSize<<ML_BITS);
+ }
+ memcpy(op, anchor, lastRunSize);
+ op += lastRunSize;
+ }
+
/* End */
- return (int)(((char *)op) - dest);
+ *srcSizePtr = (int) (((const char *)ip) - src);
+ return (int) (((char *)op) - dst);
}
-int lz4_compress(const unsigned char *src, size_t src_len,
- unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
+ char *dst, int *srcSizePtr, int targetDstSize)
{
- int ret = -1;
- int out_len = 0;
+ #if LZ4_ARCH64
+ tableType_t tableType = byU32;
+ #else
+ tableType_t tableType = byPtr;
+ #endif
+
+ LZ4_resetStream(state);
+
+ if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+ /* compression success is guaranteed */
+ return LZ4_compress_fast_extState(
+ state, src, dst, *srcSizePtr,
+ targetDstSize, 1);
+ } else {
+ if (*srcSizePtr < LZ4_64Klimit)
+ return LZ4_compress_destSize_generic(
+ &state->internal_donotuse,
+ src, dst, srcSizePtr,
+ targetDstSize, byU16);
+ else
+ return LZ4_compress_destSize_generic(
+ &state->internal_donotuse,
+ src, dst, srcSizePtr,
+ targetDstSize, tableType);
+ }
+}
- if (src_len < LZ4_64KLIMIT)
- out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
- lz4_compressbound(src_len));
- else
- out_len = lz4_compressctx(wrkmem, src, dst, src_len,
- lz4_compressbound(src_len));
- if (out_len < 0)
- goto exit;
+int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
+ int targetDstSize, void *wrkmem)
+{
+ return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+ targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ * Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+ memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#define HASH_UNIT sizeof(size_t)
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+ const char *dictionary, int dictSize)
+{
+ LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+ const BYTE *p = (const BYTE *)dictionary;
+ const BYTE * const dictEnd = p + dictSize;
+ const BYTE *base;
+
+ if ((dict->initCheck)
+ || (dict->currentOffset > 1 * GB)) {
+ /* Uninitialized structure, or reuse overflow */
+ LZ4_resetStream(LZ4_dict);
+ }
+
+ if (dictSize < (int)HASH_UNIT) {
+ dict->dictionary = NULL;
+ dict->dictSize = 0;
+ return 0;
+ }
+
+ if ((dictEnd - p) > 64 * KB)
+ p = dictEnd - 64 * KB;
+ dict->currentOffset += 64 * KB;
+ base = p - dict->currentOffset;
+ dict->dictionary = p;
+ dict->dictSize = (U32)(dictEnd - p);
+ dict->currentOffset += dict->dictSize;
+
+ while (p <= dictEnd - HASH_UNIT) {
+ LZ4_putPosition(p, dict->hashTable, byU32, base);
+ p += 3;
+ }
+
+ return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+ const BYTE *src)
+{
+ if ((LZ4_dict->currentOffset > 0x80000000) ||
+ ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+ /* address space overflow */
+ /* rescale hash table */
+ U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+ const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+ int i;
+
+ for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+ if (LZ4_dict->hashTable[i] < delta)
+ LZ4_dict->hashTable[i] = 0;
+ else
+ LZ4_dict->hashTable[i] -= delta;
+ }
+ LZ4_dict->currentOffset = 64 * KB;
+ if (LZ4_dict->dictSize > 64 * KB)
+ LZ4_dict->dictSize = 64 * KB;
+ LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+ }
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+ LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+ const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+ if ((U32)dictSize > 64 * KB) {
+ /* useless to define a dictionary > 64 * KB */
+ dictSize = 64 * KB;
+ }
+ if ((U32)dictSize > dict->dictSize)
+ dictSize = dict->dictSize;
- *dst_len = out_len;
+ memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+ dict->dictionary = (const BYTE *)safeBuffer;
+ dict->dictSize = (U32)dictSize;
+
+ return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+ char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+ LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+ const BYTE * const dictEnd = streamPtr->dictionary
+ + streamPtr->dictSize;
+
+ const BYTE *smallest = (const BYTE *) source;
+
+ if (streamPtr->initCheck) {
+ /* Uninitialized structure detected */
+ return 0;
+ }
- return 0;
-exit:
- return ret;
+ if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+ smallest = dictEnd;
+
+ LZ4_renormDictT(streamPtr, smallest);
+
+ if (acceleration < 1)
+ acceleration = LZ4_ACCELERATION_DEFAULT;
+
+ /* Check overlapping input/dictionary space */
+ {
+ const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+ if ((sourceEnd > streamPtr->dictionary)
+ && (sourceEnd < dictEnd)) {
+ streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+ if (streamPtr->dictSize > 64 * KB)
+ streamPtr->dictSize = 64 * KB;
+ if (streamPtr->dictSize < 4)
+ streamPtr->dictSize = 0;
+ streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+ }
+ }
+
+ /* prefix mode : source data follows dictionary */
+ if (dictEnd == (const BYTE *)source) {
+ int result;
+
+ if ((streamPtr->dictSize < 64 * KB) &&
+ (streamPtr->dictSize < streamPtr->currentOffset)) {
+ result = LZ4_compress_generic(
+ streamPtr, source, dest, inputSize,
+ maxOutputSize, limitedOutput, byU32,
+ withPrefix64k, dictSmall, acceleration);
+ } else {
+ result = LZ4_compress_generic(
+ streamPtr, source, dest, inputSize,
+ maxOutputSize, limitedOutput, byU32,
+ withPrefix64k, noDictIssue, acceleration);
+ }
+ streamPtr->dictSize += (U32)inputSize;
+ streamPtr->currentOffset += (U32)inputSize;
+ return result;
+ }
+
+ /* external dictionary mode */
+ {
+ int result;
+
+ if ((streamPtr->dictSize < 64 * KB) &&
+ (streamPtr->dictSize < streamPtr->currentOffset)) {
+ result = LZ4_compress_generic(
+ streamPtr, source, dest, inputSize,
+ maxOutputSize, limitedOutput, byU32,
+ usingExtDict, dictSmall, acceleration);
+ } else {
+ result = LZ4_compress_generic(
+ streamPtr, source, dest, inputSize,
+ maxOutputSize, limitedOutput, byU32,
+ usingExtDict, noDictIssue, acceleration);
+ }
+ streamPtr->dictionary = (const BYTE *)source;
+ streamPtr->dictSize = (U32)inputSize;
+ streamPtr->currentOffset += (U32)inputSize;
+ return result;
+ }
}
-EXPORT_SYMBOL(lz4_compress);
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c72b5fc..9bf918233749 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,25 +1,16 @@
/*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
* LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
- *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,313 +22,466 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
*
- * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ * Changed for kernel usage by:
+ * Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
*/
-#ifndef STATIC
+/*-************************************
+ * Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
#include <asm/unaligned.h>
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ * Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static inline int LZ4_decompress_generic(
+ const char *const source,
+ char * const dest,
+ int inputSize,
+ /*
+ * If endOnInput == endOnInputSize,
+ * this value is the max size of Output Buffer.
+ */
+ int outputSize,
+ /* endOnOutputSize, endOnInputSize */
+ int endOnInput,
+ /* full, partial */
+ int partialDecoding,
+ /* only used if partialDecoding == partial */
+ int targetOutputSize,
+ /* noDict, withPrefix64k, usingExtDict */
+ int dict,
+ /* == dest when no prefix */
+ const BYTE * const lowPrefix,
+ /* only if dict == usingExtDict */
+ const BYTE * const dictStart,
+ /* note : = 0 if noDict */
+ const size_t dictSize
+ )
{
+ /* Local Variables */
const BYTE *ip = (const BYTE *) source;
- const BYTE *ref;
+ const BYTE * const iend = ip + inputSize;
+
BYTE *op = (BYTE *) dest;
- BYTE * const oend = op + osize;
+ BYTE * const oend = op + outputSize;
BYTE *cpy;
- unsigned token;
- size_t length;
+ BYTE *oexit = op + targetOutputSize;
+ const BYTE * const lowLimit = lowPrefix - dictSize;
+
+ const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+ const unsigned int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
+ const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+ const int safeDecode = (endOnInput == endOnInputSize);
+ const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
+
+ /* Special cases */
+ /* targetOutputSize too high => decode everything */
+ if ((partialDecoding) && (oexit > oend - MFLIMIT))
+ oexit = oend - MFLIMIT;
+
+ /* Empty output buffer */
+ if ((endOnInput) && (unlikely(outputSize == 0)))
+ return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+ if ((!endOnInput) && (unlikely(outputSize == 0)))
+ return (*ip == 0 ? 1 : -1);
+
+ /* Main Loop : decode sequences */
while (1) {
+ size_t length;
+ const BYTE *match;
+ size_t offset;
+
+ /* get literal length */
+ unsigned int const token = *ip++;
+
+ length = token>>ML_BITS;
- /* get runlength */
- token = *ip++;
- length = (token >> ML_BITS);
if (length == RUN_MASK) {
- size_t len;
+ unsigned int s;
- len = *ip++;
- for (; len == 255; length += 255)
- len = *ip++;
- if (unlikely(length > (size_t)(length + len)))
+ do {
+ s = *ip++;
+ length += s;
+ } while (likely(endOnInput
+ ? ip < iend - RUN_MASK
+ : 1) & (s == 255));
+
+ if ((safeDecode)
+ && unlikely(
+ (size_t)(op + length) < (size_t)(op))) {
+ /* overflow detection */
+ goto _output_error;
+ }
+ if ((safeDecode)
+ && unlikely(
+ (size_t)(ip + length) < (size_t)(ip))) {
+ /* overflow detection */
goto _output_error;
- length += len;
+ }
}
/* copy literals */
cpy = op + length;
- if (unlikely(cpy > oend - COPYLENGTH)) {
- /*
- * Error: not enough place for another match
- * (min 4) + 5 literals
- */
- if (cpy != oend)
- goto _output_error;
-
+ if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+ || (ip + length > iend - (2 + 1 + LASTLITERALS))))
+ || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+ if (partialDecoding) {
+ if (cpy > oend) {
+ /*
+ * Error :
+ * write attempt beyond end of output buffer
+ */
+ goto _output_error;
+ }
+ if ((endOnInput)
+ && (ip + length > iend)) {
+ /*
+ * Error :
+ * read attempt beyond
+ * end of input buffer
+ */
+ goto _output_error;
+ }
+ } else {
+ if ((!endOnInput)
+ && (cpy != oend)) {
+ /*
+ * Error :
+ * block decoding must
+ * stop exactly there
+ */
+ goto _output_error;
+ }
+ if ((endOnInput)
+ && ((ip + length != iend)
+ || (cpy > oend))) {
+ /*
+ * Error :
+ * input must be consumed
+ */
+ goto _output_error;
+ }
+ }
memcpy(op, ip, length);
ip += length;
- break; /* EOF */
+ op += length;
+ /* Necessarily EOF, due to parsing restrictions */
+ break;
}
- LZ4_WILDCOPY(ip, op, cpy);
- ip -= (op - cpy);
- op = cpy;
+ LZ4_wildCopy(op, ip, cpy);
+ ip += length; op = cpy;
/* get offset */
- LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
- ip += 2;
-
- /* Error: offset create reference outside destination buffer */
- if (unlikely(ref < (BYTE *const) dest))
+ offset = LZ4_readLE16(ip); ip += 2;
+ match = op - offset;
+ if ((checkOffset) && (unlikely(match < lowLimit))) {
+ /* Error : offset outside buffers */
goto _output_error;
+ }
+ /* costs ~1%; silence an msan warning when offset == 0 */
+ LZ4_write32(op, (U32)offset);
/* get matchlength */
length = token & ML_MASK;
if (length == ML_MASK) {
- for (; *ip == 255; length += 255)
- ip++;
- if (unlikely(length > (size_t)(length + *ip)))
+ unsigned int s;
+
+ do {
+ s = *ip++;
+ if ((endOnInput) && (ip > iend - LASTLITERALS))
goto _output_error;
- length += *ip++;
+ length += s;
+ } while (s == 255);
+ if ((safeDecode)
+ && unlikely(
+ (size_t)(op + length) < (size_t)op)) {
+ /* overflow detection */
+ goto _output_error;
+ }
}
+ length += MINMATCH;
- /* copy repeated sequence */
- if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
- int dec64 = dec64table[op - ref];
-#else
- const int dec64 = 0;
-#endif
- op[0] = ref[0];
- op[1] = ref[1];
- op[2] = ref[2];
- op[3] = ref[3];
- op += 4;
- ref += 4;
- ref -= dec32table[op-ref];
- PUT4(ref, op);
- op += STEPSIZE - 4;
- ref -= dec64;
+ /* check external dictionary */
+ if ((dict == usingExtDict) && (match < lowPrefix)) {
+ if (unlikely(op + length > oend - LASTLITERALS)) {
+ /* doesn't respect parsing restriction */
+ goto _output_error;
+ }
+
+ if (length <= (size_t)(lowPrefix - match)) {
+ /*
+ * match can be copied as a single segment
+ * from external dictionary
+ */
+ memmove(op, dictEnd - (lowPrefix - match), length);
+ op += length;
+ } else {
+ /*
+ * match encompass external
+ * dictionary and current block
+ */
+ size_t const copySize = (size_t)(lowPrefix - match);
+ size_t const restSize = length - copySize;
+
+ memcpy(op, dictEnd - copySize, copySize);
+ op += copySize;
+
+ if (restSize > (size_t)(op - lowPrefix)) {
+ /* overlap copy */
+ BYTE * const endOfMatch = op + restSize;
+ const BYTE *copyFrom = lowPrefix;
+
+ while (op < endOfMatch)
+ *op++ = *copyFrom++;
+ } else {
+ memcpy(op, lowPrefix, restSize);
+ op += restSize;
+ }
+ }
+ continue;
+ }
+
+ /* copy match within block */
+ cpy = op + length;
+ if (unlikely(offset < 8)) {
+ const int dec64 = dec64table[offset];
+
+ op[0] = match[0];
+ op[1] = match[1];
+ op[2] = match[2];
+ op[3] = match[3];
+ match += dec32table[offset];
+ memcpy(op + 4, match, 4);
+ match -= dec64;
} else {
- LZ4_COPYSTEP(ref, op);
+ LZ4_copy8(op, match); match += 8;
}
- cpy = op + length - (STEPSIZE - 4);
- if (cpy > (oend - COPYLENGTH)) {
- /* Error: request to write beyond destination buffer */
- if (cpy > oend)
- goto _output_error;
-#if LZ4_ARCH64
- if ((ref + COPYLENGTH) > oend)
-#else
- if ((ref + COPYLENGTH) > oend ||
- (op + COPYLENGTH) > oend)
-#endif
+ op += 8;
+
+ if (unlikely(cpy > oend - 12)) {
+ BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+ if (cpy > oend - LASTLITERALS) {
+ /*
+ * Error : last LASTLITERALS bytes
+ * must be literals (uncompressed)
+ */
goto _output_error;
- LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+ }
+ if (op < oCopyLimit) {
+ LZ4_wildCopy(op, match, oCopyLimit);
+ match += oCopyLimit - op;
+ op = oCopyLimit;
+ }
while (op < cpy)
- *op++ = *ref++;
- op = cpy;
- /*
- * Check EOF (should never happen, since last 5 bytes
- * are supposed to be literals)
- */
- if (op == oend)
- goto _output_error;
- continue;
+ *op++ = *match++;
+ } else {
+ LZ4_copy8(op, match);
+ if (length > 16)
+ LZ4_wildCopy(op + 8, match + 8, cpy);
}
- LZ4_SECURECOPY(ref, op, cpy);
op = cpy; /* correction */
}
+
/* end of decoding */
- return (int) (((char *)ip) - source);
+ if (endOnInput) {
+ /* Nb of output bytes decoded */
+ return (int) (((char *)op) - dest);
+ } else {
+ /* Nb of input bytes read */
+ return (int) (((const char *)ip) - source);
+ }
- /* write overflow error detected */
+ /* Overflow error detected */
_output_error:
return -1;
}
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
- int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+ int compressedSize, int maxDecompressedSize)
{
- const BYTE *ip = (const BYTE *) source;
- const BYTE *const iend = ip + isize;
- const BYTE *ref;
-
+ return LZ4_decompress_generic(source, dest, compressedSize,
+ maxDecompressedSize, endOnInputSize, full, 0,
+ noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe);
- BYTE *op = (BYTE *) dest;
- BYTE * const oend = op + maxoutputsize;
- BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+ int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize,
+ maxDecompressedSize, endOnInputSize, partial,
+ targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
- /* Main Loop */
- while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, full, 0, withPrefix64k,
+ (BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast);
- unsigned token;
- size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *dictionary, int dictSize)
+{
+ LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
- /* get runlength */
- token = *ip++;
- length = (token >> ML_BITS);
- if (length == RUN_MASK) {
- int s = 255;
- while ((ip < iend) && (s == 255)) {
- s = *ip++;
- if (unlikely(length > (size_t)(length + s)))
- goto _output_error;
- length += s;
- }
- }
- /* copy literals */
- cpy = op + length;
- if ((cpy > oend - COPYLENGTH) ||
- (ip + length > iend - COPYLENGTH)) {
-
- if (cpy > oend)
- goto _output_error;/* writes beyond buffer */
-
- if (ip + length != iend)
- goto _output_error;/*
- * Error: LZ4 format requires
- * to consume all input
- * at this stage
- */
- memcpy(op, ip, length);
- op += length;
- break;/* Necessarily EOF, due to parsing restrictions */
- }
- LZ4_WILDCOPY(ip, op, cpy);
- ip -= (op - cpy);
- op = cpy;
+ lz4sd->prefixSize = (size_t) dictSize;
+ lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+ lz4sd->externalDict = NULL;
+ lz4sd->extDictSize = 0;
+ return 1;
+}
+EXPORT_SYMBOL(LZ4_setStreamDecode);
- /* get offset */
- LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
- ip += 2;
- if (ref < (BYTE * const) dest)
- goto _output_error;
- /*
- * Error : offset creates reference
- * outside of destination buffer
- */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+ LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+ int result;
+
+ if (lz4sd->prefixEnd == (BYTE *)dest) {
+ result = LZ4_decompress_generic(source, dest,
+ compressedSize,
+ maxOutputSize,
+ endOnInputSize, full, 0,
+ usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+ lz4sd->externalDict,
+ lz4sd->extDictSize);
+
+ if (result <= 0)
+ return result;
+
+ lz4sd->prefixSize += result;
+ lz4sd->prefixEnd += result;
+ } else {
+ lz4sd->extDictSize = lz4sd->prefixSize;
+ lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+ result = LZ4_decompress_generic(source, dest,
+ compressedSize, maxOutputSize,
+ endOnInputSize, full, 0,
+ usingExtDict, (BYTE *)dest,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0)
+ return result;
+ lz4sd->prefixSize = result;
+ lz4sd->prefixEnd = (BYTE *)dest + result;
+ }
- /* get matchlength */
- length = (token & ML_MASK);
- if (length == ML_MASK) {
- while (ip < iend) {
- int s = *ip++;
- if (unlikely(length > (size_t)(length + s)))
- goto _output_error;
- length += s;
- if (s == 255)
- continue;
- break;
- }
- }
+ return result;
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
- /* copy repeated sequence */
- if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
- int dec64 = dec64table[op - ref];
-#else
- const int dec64 = 0;
-#endif
- op[0] = ref[0];
- op[1] = ref[1];
- op[2] = ref[2];
- op[3] = ref[3];
- op += 4;
- ref += 4;
- ref -= dec32table[op - ref];
- PUT4(ref, op);
- op += STEPSIZE - 4;
- ref -= dec64;
- } else {
- LZ4_COPYSTEP(ref, op);
- }
- cpy = op + length - (STEPSIZE-4);
- if (cpy > oend - COPYLENGTH) {
- if (cpy > oend)
- goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
- if ((ref + COPYLENGTH) > oend)
-#else
- if ((ref + COPYLENGTH) > oend ||
- (op + COPYLENGTH) > oend)
-#endif
- goto _output_error;
- LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
- while (op < cpy)
- *op++ = *ref++;
- op = cpy;
- /*
- * Check EOF (should never happen, since last 5 bytes
- * are supposed to be literals)
- */
- if (op == oend)
- goto _output_error;
- continue;
- }
- LZ4_SECURECOPY(ref, op, cpy);
- op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *source, char *dest, int originalSize)
+{
+ LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+ int result;
+
+ if (lz4sd->prefixEnd == (BYTE *)dest) {
+ result = LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, full, 0,
+ usingExtDict,
+ lz4sd->prefixEnd - lz4sd->prefixSize,
+ lz4sd->externalDict, lz4sd->extDictSize);
+
+ if (result <= 0)
+ return result;
+
+ lz4sd->prefixSize += originalSize;
+ lz4sd->prefixEnd += originalSize;
+ } else {
+ lz4sd->extDictSize = lz4sd->prefixSize;
+ lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+ result = LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, full, 0,
+ usingExtDict, (BYTE *)dest,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0)
+ return result;
+ lz4sd->prefixSize = originalSize;
+ lz4sd->prefixEnd = (BYTE *)dest + originalSize;
}
- /* end of decoding */
- return (int) (((char *) op) - dest);
- /* write overflow error detected */
-_output_error:
- return -1;
+ return result;
}
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
-int lz4_decompress(const unsigned char *src, size_t *src_len,
- unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static inline int LZ4_decompress_usingDict_generic(const char *source,
+ char *dest, int compressedSize, int maxOutputSize, int safe,
+ const char *dictStart, int dictSize)
{
- int ret = -1;
- int input_len = 0;
-
- input_len = lz4_uncompress(src, dest, actual_dest_len);
- if (input_len < 0)
- goto exit_0;
- *src_len = input_len;
+ if (dictSize == 0)
+ return LZ4_decompress_generic(source, dest,
+ compressedSize, maxOutputSize, safe, full, 0,
+ noDict, (BYTE *)dest, NULL, 0);
+ if (dictStart + dictSize == dest) {
+ if (dictSize >= (int)(64 * KB - 1))
+ return LZ4_decompress_generic(source, dest,
+ compressedSize, maxOutputSize, safe, full, 0,
+ withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+ return LZ4_decompress_generic(source, dest, compressedSize,
+ maxOutputSize, safe, full, 0, noDict,
+ (BYTE *)dest - dictSize, NULL, 0);
+ }
+ return LZ4_decompress_generic(source, dest, compressedSize,
+ maxOutputSize, safe, full, 0, usingExtDict,
+ (BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
- return 0;
-exit_0:
- return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+ int compressedSize, int maxOutputSize,
+ const char *dictStart, int dictSize)
+{
+ return LZ4_decompress_usingDict_generic(source, dest,
+ compressedSize, maxOutputSize, 1, dictStart, dictSize);
}
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
- unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+ int originalSize, const char *dictStart, int dictSize)
{
- int ret = -1;
- int out_len = 0;
-
- out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
- *dest_len);
- if (out_len < 0)
- goto exit_0;
- *dest_len = out_len;
-
- return 0;
-exit_0:
- return ret;
+ return LZ4_decompress_usingDict_generic(source, dest, 0,
+ originalSize, 0, dictStart, dictSize);
}
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea8a38e..23e1a1bd0d8d 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,229 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
/*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Changed for kernel usage by:
+ * Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
*/
+#include <asm/unaligned.h>
+#include <linux/string.h> /* memset, memcpy */
+
/*
* Detects 64 bits mode
- */
+*/
#if defined(CONFIG_64BIT)
#define LZ4_ARCH64 1
#else
#define LZ4_ARCH64 0
#endif
-/*
- * Architecture-specific macros
- */
-#define BYTE u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
- (d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
- do { \
- A16(p) = v; \
- p += 2; \
- } while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
- put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
- put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
- (d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
- do { \
- put_unaligned_le16(v, (u16 *)(p)); \
- p += 2; \
- } while (0)
-#endif
+/*-************************************
+ * Basic Types
+ **************************************/
+#include <linux/types.h>
-#define COPYLENGTH 8
-#define ML_BITS 4
-#define ML_MASK ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE 14
-#define MINMATCH 4
-#define SKIPSTRENGTH 6
-#define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH + MINMATCH)
-#define MINLENGTH (MFLIMIT + 1)
-#define MAXD_LOG 16
-#define MAXD (1 << MAXD_LOG)
-#define MAXD_MASK (u32)(MAXD - 1)
-#define MAX_DISTANCE (MAXD - 1)
-#define HASH_LOG (MAXD_LOG - 1)
-#define HASHTABLESIZE (1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS 256
-#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K ((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE (1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \
- ((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \
- ((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \
- ((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d) \
- do { \
- PUT8(s, d); \
- d += 8; \
- s += 8; \
- } while (0)
-
-#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e) \
- do { \
- if (d < e) { \
- LZ4_WILDCOPY(s, d, e); \
- } \
- } while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+typedef uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+/*-************************************
+ * Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB (1<<10)
+#define MB (1<<20)
+#define GB (1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1<<MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS 4
+#define ML_MASK ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;
+
+/*-************************************
+ * Reading and writing into memory
+ **************************************/
+
+static inline U16 LZ4_read16(const void *memPtr)
+{
+ U16 val;
+
+ memcpy(&val, memPtr, sizeof(val));
+
+ return val;
+}
+
+static inline U32 LZ4_read32(const void *memPtr)
+{
+ U32 val;
+
+ memcpy(&val, memPtr, sizeof(val));
+
+ return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void *memPtr)
+{
+ size_t val;
+
+ memcpy(&val, memPtr, sizeof(val));
+
+ return val;
+}
+
+static inline void LZ4_write16(void *memPtr, U16 value)
+{
+ memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline void LZ4_write32(void *memPtr, U32 value)
+{
+ memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline U16 LZ4_readLE16(const void *memPtr)
+{
+#ifdef __LITTLE_ENDIAN__
+ return LZ4_read16(memPtr);
#else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
-#endif
+ const BYTE *p = (const BYTE *)memPtr;
-#else /* 32-bit */
-#define STEPSIZE 4
+ return (U16)((U16)p[0] + (p[1] << 8));
+#endif
+}
-#define LZ4_COPYSTEP(s, d) \
- do { \
- PUT4(s, d); \
- d += 4; \
- s += 4; \
- } while (0)
+static inline void LZ4_writeLE16(void *memPtr, U16 value)
+{
+#ifdef __LITTLE_ENDIAN__
+ LZ4_write16(memPtr, value);
+#else
+ BYTE *p = (BYTE *)memPtr;
-#define LZ4_COPYPACKET(s, d) \
- do { \
- LZ4_COPYSTEP(s, d); \
- LZ4_COPYSTEP(s, d); \
- } while (0)
+ p[0] = (BYTE) value;
+ p[1] = (BYTE)(value>>8);
+#endif
+}
-#define LZ4_SECURECOPY LZ4_WILDCOPY
-#define HTYPE const u8*
+static inline void LZ4_copy8(void *dst, const void *src)
+{
+ memcpy(dst, src, 8);
+}
-#ifdef __BIG_ENDIAN
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+ BYTE *d = (BYTE *)dstPtr;
+ const BYTE *s = (const BYTE *)srcPtr;
+ BYTE *const e = (BYTE *)dstEnd;
+
+ do {
+ LZ4_copy8(d, s);
+ d += 8;
+ s += 8;
+ } while (d < e);
+}
+
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN__
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+#endif
+#else
+#ifdef __BIG_ENDIAN__
#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
#else
#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
#endif
+#endif
+static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+ const BYTE *pInLimit)
+{
+ const BYTE *const pStart = pIn;
+
+ while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+ if (!diff) {
+ pIn += STEPSIZE;
+ pMatch += STEPSIZE;
+ continue;
+ }
+ pIn += LZ4_NBCOMMONBYTES(diff);
+ return (unsigned int)(pIn - pStart);
+ }
+
+#ifdef LZ4_ARCH64
+ if ((pIn < (pInLimit-3))
+ && (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+ pIn += 4; pMatch += 4;
+ }
#endif
+ if ((pIn < (pInLimit-1))
+ && (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+ pIn += 2; pMatch += 2;
+ }
+ if ((pIn < pInLimit) && (*pMatch == *pIn))
+ pIn++;
+ return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
-#define LZ4_WILDCOPY(s, d, e) \
- do { \
- LZ4_COPYPACKET(s, d); \
- } while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l) \
- do { \
- u8 *e = (d) + l; \
- LZ4_WILDCOPY(s, d, e); \
- d = e; \
- } while (0)
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76b6559..836329219181 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,19 +1,17 @@
/*
* LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
*
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
- *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,323 +23,361 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
* You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
*
- * Changed for kernel use by:
- * Chanho Min <chanho.min@lge.com>
+ * Changed for kernel usage by:
+ * Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
*/
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ * Dependencies
+ **************************************/
#include <linux/lz4.h>
-#include <asm/unaligned.h>
#include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
-struct lz4hc_data {
- const u8 *base;
- HTYPE hashtable[HASHTABLESIZE];
- u16 chaintable[MAXD];
- const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+ * Local Constants and types
+ ***************************************/
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
+
+#define HASH_FUNCTION(i) (((i) * 2654435761U) \
+ >> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
{
- memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
- memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
- hc4->nexttoupdate = base + 1;
-#else
- hc4->nexttoupdate = base;
-#endif
- hc4->base = base;
- return 1;
+ return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ * HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
+{
+ memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+ memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+ hc4->nextToUpdate = 64 * KB;
+ hc4->base = start - 64 * KB;
+ hc4->end = start;
+ hc4->dictBase = start - 64 * KB;
+ hc4->dictLimit = 64 * KB;
+ hc4->lowLimit = 64 * KB;
}
/* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+ const BYTE *ip)
{
- u16 *chaintable = hc4->chaintable;
- HTYPE *hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+ U16 * const chainTable = hc4->chainTable;
+ U32 * const hashTable = hc4->hashTable;
const BYTE * const base = hc4->base;
-#else
- const int base = 0;
-#endif
+ U32 const target = (U32)(ip - base);
+ U32 idx = hc4->nextToUpdate;
+
+ while (idx < target) {
+ U32 const h = LZ4HC_hashPtr(base + idx);
+ size_t delta = idx - hashTable[h];
- while (hc4->nexttoupdate < ip) {
- const u8 *p = hc4->nexttoupdate;
- size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
if (delta > MAX_DISTANCE)
delta = MAX_DISTANCE;
- chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
- hashtable[HASH_VALUE(p)] = (p) - base;
- hc4->nexttoupdate++;
- }
-}
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
- const u8 *const matchlimit)
-{
- const u8 *p1t = p1;
-
- while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
- u64 diff = A64(p2) ^ A64(p1t);
-#else
- u32 diff = A32(p2) ^ A32(p1t);
-#endif
- if (!diff) {
- p1t += STEPSIZE;
- p2 += STEPSIZE;
- continue;
- }
- p1t += LZ4_NBCOMMONBYTES(diff);
- return p1t - p1;
- }
-#if LZ4_ARCH64
- if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
- p1t += 4;
- p2 += 4;
- }
-#endif
+ DELTANEXTU16(idx) = (U16)delta;
- if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
- p1t += 2;
- p2 += 2;
+ hashTable[h] = idx;
+ idx++;
}
- if ((p1t < matchlimit) && (*p2 == *p1t))
- p1t++;
- return p1t - p1;
+
+ hc4->nextToUpdate = target;
}
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
- const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static inline int LZ4HC_InsertAndFindBestMatch(
+ LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+ const BYTE *ip,
+ const BYTE * const iLimit,
+ const BYTE **matchpos,
+ const int maxNbAttempts)
{
- u16 *const chaintable = hc4->chaintable;
- HTYPE *const hashtable = hc4->hashtable;
- const u8 *ref;
-#if LZ4_ARCH64
+ U16 * const chainTable = hc4->chainTable;
+ U32 * const HashTable = hc4->hashTable;
const BYTE * const base = hc4->base;
-#else
- const int base = 0;
-#endif
- int nbattempts = MAX_NB_ATTEMPTS;
- size_t repl = 0, ml = 0;
- u16 delta;
+ const BYTE * const dictBase = hc4->dictBase;
+ const U32 dictLimit = hc4->dictLimit;
+ const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+ ? hc4->lowLimit
+ : (U32)(ip - base) - (64 * KB - 1);
+ U32 matchIndex;
+ int nbAttempts = maxNbAttempts;
+ size_t ml = 0;
/* HC4 match finder */
- lz4hc_insert(hc4, ip);
- ref = hashtable[HASH_VALUE(ip)] + base;
-
- /* potential repetition */
- if (ref >= ip-4) {
- /* confirmed */
- if (A32(ref) == A32(ip)) {
- delta = (u16)(ip-ref);
- repl = ml = lz4hc_commonlength(ip + MINMATCH,
- ref + MINMATCH, matchlimit) + MINMATCH;
- *matchpos = ref;
- }
- ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
- }
+ LZ4HC_Insert(hc4, ip);
+ matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+ while ((matchIndex >= lowLimit)
+ && (nbAttempts)) {
+ nbAttempts--;
+ if (matchIndex >= dictLimit) {
+ const BYTE * const match = base + matchIndex;
+
+ if (*(match + ml) == *(ip + ml)
+ && (LZ4_read32(match) == LZ4_read32(ip))) {
+ size_t const mlt = LZ4_count(ip + MINMATCH,
+ match + MINMATCH, iLimit) + MINMATCH;
- while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
- nbattempts--;
- if (*(ref + ml) == *(ip + ml)) {
- if (A32(ref) == A32(ip)) {
- size_t mlt =
- lz4hc_commonlength(ip + MINMATCH,
- ref + MINMATCH, matchlimit) + MINMATCH;
if (mlt > ml) {
ml = mlt;
- *matchpos = ref;
+ *matchpos = match;
+ }
+ }
+ } else {
+ const BYTE * const match = dictBase + matchIndex;
+
+ if (LZ4_read32(match) == LZ4_read32(ip)) {
+ size_t mlt;
+ const BYTE *vLimit = ip
+ + (dictLimit - matchIndex);
+
+ if (vLimit > iLimit)
+ vLimit = iLimit;
+ mlt = LZ4_count(ip + MINMATCH,
+ match + MINMATCH, vLimit) + MINMATCH;
+ if ((ip + mlt == vLimit)
+ && (vLimit < iLimit))
+ mlt += LZ4_count(ip + mlt,
+ base + dictLimit,
+ iLimit);
+ if (mlt > ml) {
+ /* virtual matchpos */
+ ml = mlt;
+ *matchpos = base + matchIndex;
}
}
}
- ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
- }
-
- /* Complete table */
- if (repl) {
- const BYTE *ptr = ip;
- const BYTE *end;
- end = ip + repl - (MINMATCH-1);
- /* Pre-Load */
- while (ptr < end - delta) {
- chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
- ptr++;
- }
- do {
- chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
- /* Head of chain */
- hashtable[HASH_VALUE(ptr)] = (ptr) - base;
- ptr++;
- } while (ptr < end);
- hc4->nexttoupdate = end;
+ matchIndex -= DELTANEXTU16(matchIndex);
}
return (int)ml;
}
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
- const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
- const u8 **matchpos, const u8 **startpos)
+static inline int LZ4HC_InsertAndGetWiderMatch(
+ LZ4HC_CCtx_internal *hc4,
+ const BYTE * const ip,
+ const BYTE * const iLowLimit,
+ const BYTE * const iHighLimit,
+ int longest,
+ const BYTE **matchpos,
+ const BYTE **startpos,
+ const int maxNbAttempts)
{
- u16 *const chaintable = hc4->chaintable;
- HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+ U16 * const chainTable = hc4->chainTable;
+ U32 * const HashTable = hc4->hashTable;
const BYTE * const base = hc4->base;
-#else
- const int base = 0;
-#endif
- const u8 *ref;
- int nbattempts = MAX_NB_ATTEMPTS;
- int delta = (int)(ip - startlimit);
+ const U32 dictLimit = hc4->dictLimit;
+ const BYTE * const lowPrefixPtr = base + dictLimit;
+ const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+ ? hc4->lowLimit
+ : (U32)(ip - base) - (64 * KB - 1);
+ const BYTE * const dictBase = hc4->dictBase;
+ U32 matchIndex;
+ int nbAttempts = maxNbAttempts;
+ int delta = (int)(ip - iLowLimit);
/* First Match */
- lz4hc_insert(hc4, ip);
- ref = hashtable[HASH_VALUE(ip)] + base;
-
- while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
- && (nbattempts)) {
- nbattempts--;
- if (*(startlimit + longest) == *(ref - delta + longest)) {
- if (A32(ref) == A32(ip)) {
- const u8 *reft = ref + MINMATCH;
- const u8 *ipt = ip + MINMATCH;
- const u8 *startt = ip;
-
- while (ipt < matchlimit-(STEPSIZE - 1)) {
- #if LZ4_ARCH64
- u64 diff = A64(reft) ^ A64(ipt);
- #else
- u32 diff = A32(reft) ^ A32(ipt);
- #endif
-
- if (!diff) {
- ipt += STEPSIZE;
- reft += STEPSIZE;
- continue;
+ LZ4HC_Insert(hc4, ip);
+ matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+ while ((matchIndex >= lowLimit)
+ && (nbAttempts)) {
+ nbAttempts--;
+ if (matchIndex >= dictLimit) {
+ const BYTE *matchPtr = base + matchIndex;
+
+ if (*(iLowLimit + longest)
+ == *(matchPtr - delta + longest)) {
+ if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+ int mlt = MINMATCH + LZ4_count(
+ ip + MINMATCH,
+ matchPtr + MINMATCH,
+ iHighLimit);
+ int back = 0;
+
+ while ((ip + back > iLowLimit)
+ && (matchPtr + back > lowPrefixPtr)
+ && (ip[back - 1] == matchPtr[back - 1]))
+ back--;
+
+ mlt -= back;
+
+ if (mlt > longest) {
+ longest = (int)mlt;
+ *matchpos = matchPtr + back;
+ *startpos = ip + back;
}
- ipt += LZ4_NBCOMMONBYTES(diff);
- goto _endcount;
- }
- #if LZ4_ARCH64
- if ((ipt < (matchlimit - 3))
- && (A32(reft) == A32(ipt))) {
- ipt += 4;
- reft += 4;
- }
- ipt += 2;
- #endif
- if ((ipt < (matchlimit - 1))
- && (A16(reft) == A16(ipt))) {
- reft += 2;
}
- if ((ipt < matchlimit) && (*reft == *ipt))
- ipt++;
-_endcount:
- reft = ref;
-
- while ((startt > startlimit)
- && (reft > hc4->base)
- && (startt[-1] == reft[-1])) {
- startt--;
- reft--;
- }
-
- if ((ipt - startt) > longest) {
- longest = (int)(ipt - startt);
- *matchpos = reft;
- *startpos = startt;
+ }
+ } else {
+ const BYTE * const matchPtr = dictBase + matchIndex;
+
+ if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+ size_t mlt;
+ int back = 0;
+ const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+ if (vLimit > iHighLimit)
+ vLimit = iHighLimit;
+
+ mlt = LZ4_count(ip + MINMATCH,
+ matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+ if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+ mlt += LZ4_count(ip + mlt, base + dictLimit,
+ iHighLimit);
+ while ((ip + back > iLowLimit)
+ && (matchIndex + back > lowLimit)
+ && (ip[back - 1] == matchPtr[back - 1]))
+ back--;
+
+ mlt -= back;
+
+ if ((int)mlt > longest) {
+ longest = (int)mlt;
+ *matchpos = base + matchIndex + back;
+ *startpos = ip + back;
}
}
}
- ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+
+ matchIndex -= DELTANEXTU16(matchIndex);
}
+
return longest;
}
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
- int ml, const u8 *ref)
+static inline int LZ4HC_encodeSequence(
+ const BYTE **ip,
+ BYTE **op,
+ const BYTE **anchor,
+ int matchLength,
+ const BYTE * const match,
+ limitedOutput_directive limitedOutputBuffer,
+ BYTE *oend)
{
- int length, len;
- u8 *token;
+ int length;
+ BYTE *token;
/* Encode Literal length */
length = (int)(*ip - *anchor);
token = (*op)++;
+
+ if ((limitedOutputBuffer)
+ && ((*op + (length>>8)
+ + length + (2 + 1 + LASTLITERALS)) > oend)) {
+ /* Check output limit */
+ return 1;
+ }
if (length >= (int)RUN_MASK) {
- *token = (RUN_MASK << ML_BITS);
+ int len;
+
+ *token = (RUN_MASK<<ML_BITS);
len = length - RUN_MASK;
for (; len > 254 ; len -= 255)
*(*op)++ = 255;
- *(*op)++ = (u8)len;
+ *(*op)++ = (BYTE)len;
} else
- *token = (length << ML_BITS);
+ *token = (BYTE)(length<<ML_BITS);
/* Copy Literals */
- LZ4_BLINDCOPY(*anchor, *op, length);
+ LZ4_wildCopy(*op, *anchor, (*op) + length);
+ *op += length;
/* Encode Offset */
- LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+ LZ4_writeLE16(*op, (U16)(*ip - match));
+ *op += 2;
/* Encode MatchLength */
- len = (int)(ml - MINMATCH);
- if (len >= (int)ML_MASK) {
+ length = (int)(matchLength - MINMATCH);
+
+ if ((limitedOutputBuffer)
+ && (*op + (length>>8)
+ + (1 + LASTLITERALS) > oend)) {
+ /* Check output limit */
+ return 1;
+ }
+
+ if (length >= (int)ML_MASK) {
*token += ML_MASK;
- len -= ML_MASK;
- for (; len > 509 ; len -= 510) {
+ length -= ML_MASK;
+
+ for (; length > 509 ; length -= 510) {
*(*op)++ = 255;
*(*op)++ = 255;
}
- if (len > 254) {
- len -= 255;
+
+ if (length > 254) {
+ length -= 255;
*(*op)++ = 255;
}
- *(*op)++ = (u8)len;
+
+ *(*op)++ = (BYTE)length;
} else
- *token += len;
+ *token += (BYTE)(length);
/* Prepare next loop */
- *ip += ml;
+ *ip += matchLength;
*anchor = *ip;
return 0;
}
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
- const char *source,
- char *dest,
- int isize)
+static int LZ4HC_compress_generic(
+ LZ4HC_CCtx_internal *const ctx,
+ const char * const source,
+ char * const dest,
+ int const inputSize,
+ int const maxOutputSize,
+ int compressionLevel,
+ limitedOutput_directive limit
+ )
{
- const u8 *ip = (const u8 *)source;
- const u8 *anchor = ip;
- const u8 *const iend = ip + isize;
- const u8 *const mflimit = iend - MFLIMIT;
- const u8 *const matchlimit = (iend - LASTLITERALS);
+ const BYTE *ip = (const BYTE *) source;
+ const BYTE *anchor = ip;
+ const BYTE * const iend = ip + inputSize;
+ const BYTE * const mflimit = iend - MFLIMIT;
+ const BYTE * const matchlimit = (iend - LASTLITERALS);
- u8 *op = (u8 *)dest;
+ BYTE *op = (BYTE *) dest;
+ BYTE * const oend = op + maxOutputSize;
+ unsigned int maxNbAttempts;
int ml, ml2, ml3, ml0;
- const u8 *ref = NULL;
- const u8 *start2 = NULL;
- const u8 *ref2 = NULL;
- const u8 *start3 = NULL;
- const u8 *ref3 = NULL;
- const u8 *start0;
- const u8 *ref0;
- int lastrun;
+ const BYTE *ref = NULL;
+ const BYTE *start2 = NULL;
+ const BYTE *ref2 = NULL;
+ const BYTE *start3 = NULL;
+ const BYTE *ref3 = NULL;
+ const BYTE *start0;
+ const BYTE *ref0;
+
+ /* init */
+ if (compressionLevel > LZ4HC_MAX_CLEVEL)
+ compressionLevel = LZ4HC_MAX_CLEVEL;
+ if (compressionLevel < 1)
+ compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+ maxNbAttempts = 1 << (compressionLevel - 1);
+ ctx->end += inputSize;
ip++;
/* Main Loop */
while (ip < mflimit) {
- ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+ ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+ matchlimit, (&ref), maxNbAttempts);
if (!ml) {
ip++;
continue;
@@ -351,51 +387,59 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
start0 = ip;
ref0 = ref;
ml0 = ml;
-_search2:
- if (ip+ml < mflimit)
- ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
- ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+ if (ip + ml < mflimit)
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ ip + ml - 2, ip + 0,
+ matchlimit, ml, &ref2,
+ &start2, maxNbAttempts);
else
ml2 = ml;
- /* No better match */
+
if (ml2 == ml) {
- lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+ /* No better match */
+ if (LZ4HC_encodeSequence(&ip, &op,
+ &anchor, ml, ref, limit, oend))
+ return 0;
continue;
}
if (start0 < ip) {
- /* empirical */
if (start2 < ip + ml0) {
+ /* empirical */
ip = start0;
ref = ref0;
ml = ml0;
}
}
- /*
- * Here, start0==ip
- * First Match too small : removed
- */
+
+ /* Here, start0 == ip */
if ((start2 - ip) < 3) {
+ /* First Match too small : removed */
ml = ml2;
ip = start2;
ref = ref2;
- goto _search2;
+ goto _Search2;
}
-_search3:
+_Search3:
/*
- * Currently we have :
- * ml2 > ml1, and
- * ip1+3 <= ip2 (usually < ip1+ml1)
- */
+ * Currently we have :
+ * ml2 > ml1, and
+ * ip1 + 3 <= ip2 (usually < ip1 + ml1)
+ */
if ((start2 - ip) < OPTIMAL_ML) {
int correction;
int new_ml = ml;
+
if (new_ml > OPTIMAL_ML)
new_ml = OPTIMAL_ML;
if (ip + new_ml > start2 + ml2 - MINMATCH)
new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+
correction = new_ml - (int)(start2 - ip);
+
if (correction > 0) {
start2 += correction;
ref2 += correction;
@@ -403,39 +447,44 @@ _search3:
}
}
/*
- * Now, we have start2 = ip+new_ml,
- * with new_ml=min(ml, OPTIMAL_ML=18)
+ * Now, we have start2 = ip + new_ml,
+ * with new_ml = min(ml, OPTIMAL_ML = 18)
*/
+
if (start2 + ml2 < mflimit)
- ml3 = lz4hc_insertandgetwidermatch(ctx,
- start2 + ml2 - 3, start2, matchlimit,
- ml2, &ref3, &start3);
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ start2 + ml2 - 3, start2,
+ matchlimit, ml2, &ref3, &start3,
+ maxNbAttempts);
else
ml3 = ml2;
- /* No better match : 2 sequences to encode */
if (ml3 == ml2) {
+ /* No better match : 2 sequences to encode */
/* ip & ref are known; Now for ml */
- if (start2 < ip+ml)
+ if (start2 < ip + ml)
ml = (int)(start2 - ip);
-
/* Now, encode 2 sequences */
- lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+ ml, ref, limit, oend))
+ return 0;
ip = start2;
- lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+ ml2, ref2, limit, oend))
+ return 0;
continue;
}
- /* Not enough space for match 2 : remove it */
if (start3 < ip + ml + 3) {
- /*
- * can write Seq1 immediately ==> Seq2 is removed,
- * so Seq3 becomes Seq1
- */
+ /* Not enough space for match 2 : remove it */
if (start3 >= (ip + ml)) {
+ /* can write Seq1 immediately
+ * ==> Seq2 is removed,
+ * so Seq3 becomes Seq1
+ */
if (start2 < ip + ml) {
- int correction =
- (int)(ip + ml - start2);
+ int correction = (int)(ip + ml - start2);
+
start2 += correction;
ref2 += correction;
ml2 -= correction;
@@ -446,35 +495,38 @@ _search3:
}
}
- lz4_encodesequence(&ip, &op, &anchor, ml, ref);
- ip = start3;
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+ ml, ref, limit, oend))
+ return 0;
+ ip = start3;
ref = ref3;
- ml = ml3;
+ ml = ml3;
start0 = start2;
ref0 = ref2;
ml0 = ml2;
- goto _search2;
+ goto _Search2;
}
start2 = start3;
ref2 = ref3;
ml2 = ml3;
- goto _search3;
+ goto _Search3;
}
/*
- * OK, now we have 3 ascending matches; let's write at least
- * the first one ip & ref are known; Now for ml
- */
+ * OK, now we have 3 ascending matches;
+ * let's write at least the first one
+ * ip & ref are known; Now for ml
+ */
if (start2 < ip + ml) {
if ((start2 - ip) < (int)ML_MASK) {
int correction;
+
if (ml > OPTIMAL_ML)
ml = OPTIMAL_ML;
if (ip + ml > start2 + ml2 - MINMATCH)
- ml = (int)(start2 - ip) + ml2
- - MINMATCH;
+ ml = (int)(start2 - ip) + ml2 - MINMATCH;
correction = ml - (int)(start2 - ip);
if (correction > 0) {
start2 += correction;
@@ -484,7 +536,9 @@ _search3:
} else
ml = (int)(start2 - ip);
}
- lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+ ref, limit, oend))
+ return 0;
ip = start2;
ref = ref2;
@@ -494,46 +548,222 @@ _search3:
ref2 = ref3;
ml2 = ml3;
- goto _search3;
+ goto _Search3;
}
/* Encode Last Literals */
- lastrun = (int)(iend - anchor);
- if (lastrun >= (int)RUN_MASK) {
- *op++ = (RUN_MASK << ML_BITS);
- lastrun -= RUN_MASK;
- for (; lastrun > 254 ; lastrun -= 255)
- *op++ = 255;
- *op++ = (u8) lastrun;
- } else
- *op++ = (lastrun << ML_BITS);
- memcpy(op, anchor, iend - anchor);
- op += iend - anchor;
+ {
+ int lastRun = (int)(iend - anchor);
+
+ if ((limit)
+ && (((char *)op - dest) + lastRun + 1
+ + ((lastRun + 255 - RUN_MASK)/255)
+ > (U32)maxOutputSize)) {
+ /* Check output limit */
+ return 0;
+ }
+ if (lastRun >= (int)RUN_MASK) {
+ *op++ = (RUN_MASK<<ML_BITS);
+ lastRun -= RUN_MASK;
+ for (; lastRun > 254 ; lastRun -= 255)
+ *op++ = 255;
+ *op++ = (BYTE) lastRun;
+ } else
+ *op++ = (BYTE)(lastRun<<ML_BITS);
+ memcpy(op, anchor, iend - anchor);
+ op += iend - anchor;
+ }
+
/* End */
return (int) (((char *)op) - dest);
}
-int lz4hc_compress(const unsigned char *src, size_t src_len,
- unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_HC_extStateHC(
+ void *state,
+ const char *src,
+ char *dst,
+ int srcSize,
+ int maxDstSize,
+ int compressionLevel)
{
- int ret = -1;
- int out_len = 0;
+ LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
- struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
- lz4hc_init(hc4, (const u8 *)src);
- out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
- (char *)dst, (int)src_len);
+ if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+ /* Error : state is not aligned
+ * for pointers (32 or 64 bits)
+ */
+ return 0;
+ }
- if (out_len < 0)
- goto exit;
+ LZ4HC_init(ctx, (const BYTE *)src);
- *dst_len = out_len;
- return 0;
+ if (maxDstSize < LZ4_compressBound(srcSize))
+ return LZ4HC_compress_generic(ctx, src, dst,
+ srcSize, maxDstSize, compressionLevel, limitedOutput);
+ else
+ return LZ4HC_compress_generic(ctx, src, dst,
+ srcSize, maxDstSize, compressionLevel, noLimit);
+}
+
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+ int maxDstSize, int compressionLevel, void *wrkmem)
+{
+ return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+ srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
+
+/**************************************
+ * Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+ LZ4_streamHCPtr->internal_donotuse.base = NULL;
+ LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+ const char *dictionary,
+ int dictSize)
+{
+ LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+ if (dictSize > 64 * KB) {
+ dictionary += dictSize - 64 * KB;
+ dictSize = 64 * KB;
+ }
+ LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+ if (dictSize >= 4)
+ LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+ ctxPtr->end = (const BYTE *)dictionary + dictSize;
+ return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
-exit:
- return ret;
+/* compression */
+
+static void LZ4HC_setExternalDict(
+ LZ4HC_CCtx_internal *ctxPtr,
+ const BYTE *newBlock)
+{
+ if (ctxPtr->end >= ctxPtr->base + 4) {
+ /* Referencing remaining dictionary content */
+ LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+ }
+
+ /*
+ * Only one memory segment for extDict,
+ * so any previous extDict is lost at this stage
+ */
+ ctxPtr->lowLimit = ctxPtr->dictLimit;
+ ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+ ctxPtr->dictBase = ctxPtr->base;
+ ctxPtr->base = newBlock - ctxPtr->dictLimit;
+ ctxPtr->end = newBlock;
+ /* match referencing will resume from there */
+ ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+ LZ4_streamHC_t *LZ4_streamHCPtr,
+ const char *source,
+ char *dest,
+ int inputSize,
+ int maxOutputSize,
+ limitedOutput_directive limit)
+{
+ LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+ /* auto - init if forgotten */
+ if (ctxPtr->base == NULL)
+ LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+ /* Check overflow */
+ if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+ size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+ - ctxPtr->dictLimit;
+ if (dictSize > 64 * KB)
+ dictSize = 64 * KB;
+ LZ4_loadDictHC(LZ4_streamHCPtr,
+ (const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+ }
+
+ /* Check if blocks follow each other */
+ if ((const BYTE *)source != ctxPtr->end)
+ LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+ /* Check overlapping input/dictionary space */
+ {
+ const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+ const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+ const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+ if ((sourceEnd > dictBegin)
+ && ((const BYTE *)source < dictEnd)) {
+ if (sourceEnd > dictEnd)
+ sourceEnd = dictEnd;
+ ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+ if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+ ctxPtr->lowLimit = ctxPtr->dictLimit;
+ }
+ }
+
+ return LZ4HC_compress_generic(ctxPtr, source, dest,
+ inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+ LZ4_streamHC_t *LZ4_streamHCPtr,
+ const char *source,
+ char *dest,
+ int inputSize,
+ int maxOutputSize)
+{
+ if (maxOutputSize < LZ4_compressBound(inputSize))
+ return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+ source, dest, inputSize, maxOutputSize, limitedOutput);
+ else
+ return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+ source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+ LZ4_streamHC_t *LZ4_streamHCPtr,
+ char *safeBuffer,
+ int dictSize)
+{
+ LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+ int const prefixSize = (int)(streamPtr->end
+ - (streamPtr->base + streamPtr->dictLimit));
+
+ if (dictSize > 64 * KB)
+ dictSize = 64 * KB;
+ if (dictSize < 4)
+ dictSize = 0;
+ if (dictSize > prefixSize)
+ dictSize = prefixSize;
+
+ memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+
+ {
+ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+ streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+ streamPtr->base = streamPtr->end - endIndex;
+ streamPtr->dictLimit = endIndex - dictSize;
+ streamPtr->lowLimit = endIndex - dictSize;
+
+ if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+ streamPtr->nextToUpdate = streamPtr->dictLimit;
+ }
+ return dictSize;
}
-EXPORT_SYMBOL(lz4hc_compress);
+EXPORT_SYMBOL(LZ4_saveDictHC);
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1f8b112a7c35..4ba2828a67c0 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -427,7 +427,9 @@ static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
static const struct rb_augment_callbacks dummy_callbacks = {
- dummy_propagate, dummy_copy, dummy_rotate
+ .propagate = dummy_propagate,
+ .copy = dummy_copy,
+ .rotate = dummy_rotate
};
void rb_insert_color(struct rb_node *node, struct rb_root *root)
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 004fc70fc56a..77df28b21597 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -394,17 +394,26 @@ int sg_alloc_table_from_pages(struct sg_table *sgt,
unsigned long offset, unsigned long size,
gfp_t gfp_mask)
{
+ unsigned int chunk_pages;
unsigned int chunks;
unsigned int i;
unsigned int cur_page;
int ret;
struct scatterlist *s;
+ BUILD_BUG_ON(!typecheck(typeof(s->length), unsigned int));
+
/* compute number of contiguous chunks */
chunks = 1;
- for (i = 1; i < n_pages; ++i)
- if (page_to_pfn(pages[i]) != page_to_pfn(pages[i - 1]) + 1)
+ chunk_pages = 1;
+ for (i = 1; i < n_pages; ++i) {
+ if (page_to_pfn(pages[i]) != page_to_pfn(pages[i - 1]) + 1 ||
+ chunk_pages >= UINT_MAX >> PAGE_SHIFT) {
++chunks;
+ chunk_pages = 0;
+ }
+ ++chunk_pages;
+ }
ret = sg_alloc_table(sgt, chunks, gfp_mask);
if (unlikely(ret))
@@ -417,10 +426,15 @@ int sg_alloc_table_from_pages(struct sg_table *sgt,
unsigned int j;
/* look for the end of the current chunk */
- for (j = cur_page + 1; j < n_pages; ++j)
+ chunk_pages = 1;
+ for (j = cur_page + 1; j < n_pages; ++j) {
if (page_to_pfn(pages[j]) !=
- page_to_pfn(pages[j - 1]) + 1)
+ page_to_pfn(pages[j - 1]) + 1 ||
+ chunk_pages >= UINT_MAX >> PAGE_SHIFT) {
break;
+ }
+ ++chunk_pages;
+ }
chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
sg_set_page(s, pages[cur_page], min(size, chunk_size), offset);
@@ -651,7 +665,6 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
{
unsigned int offset = 0;
struct sg_mapping_iter miter;
- unsigned long flags;
unsigned int sg_flags = SG_MITER_ATOMIC;
if (to_buffer)
@@ -664,9 +677,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
if (!sg_miter_skip(&miter, skip))
return false;
- local_irq_save(flags);
-
- while (sg_miter_next(&miter) && offset < buflen) {
+ while ((offset < buflen) && sg_miter_next(&miter)) {
unsigned int len;
len = min(miter.length, buflen - offset);
@@ -681,7 +692,6 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
sg_miter_stop(&miter);
- local_irq_restore(flags);
return offset;
}
EXPORT_SYMBOL(sg_copy_buffer);
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 1feed6a2b12a..0beaa1d899aa 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -9,13 +9,13 @@
#include <linux/quicklist.h>
#include <linux/cma.h>
-void show_mem(unsigned int filter)
+void show_mem(unsigned int filter, nodemask_t *nodemask)
{
pg_data_t *pgdat;
unsigned long total = 0, reserved = 0, highmem = 0;
printk("Mem-Info:\n");
- show_free_areas(filter);
+ show_free_areas(filter, nodemask);
for_each_online_pgdat(pgdat) {
unsigned long flags;
diff --git a/lib/sort.c b/lib/sort.c
index fc20df42aa6f..975c6ef6fec7 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -4,6 +4,8 @@
* Jan 23 2005 Matt Mackall <mpm@selenic.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/types.h>
#include <linux/export.h>
#include <linux/sort.h>
@@ -101,42 +103,3 @@ void sort(void *base, size_t num, size_t size,
}
EXPORT_SYMBOL(sort);
-
-#if 0
-#include <linux/slab.h>
-/* a simple boot-time regression test */
-
-int cmpint(const void *a, const void *b)
-{
- return *(int *)a - *(int *)b;
-}
-
-static int sort_test(void)
-{
- int *a, i, r = 1;
-
- a = kmalloc(1000 * sizeof(int), GFP_KERNEL);
- BUG_ON(!a);
-
- printk("testing sort()\n");
-
- for (i = 0; i < 1000; i++) {
- r = (r * 725861) % 6599;
- a[i] = r;
- }
-
- sort(a, 1000, sizeof(int), cmpint, NULL);
-
- for (i = 0; i < 999; i++)
- if (a[i] > a[i+1]) {
- printk("sort() failed!\n");
- break;
- }
-
- kfree(a);
-
- return 0;
-}
-
-module_init(sort_test);
-#endif
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index fbdf87920093..0b1d3140fbb8 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
+#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/mman.h>
#include <linux/mm.h>
@@ -331,6 +332,38 @@ static noinline void __init kmem_cache_oob(void)
kmem_cache_destroy(cache);
}
+static noinline void __init memcg_accounted_kmem_cache(void)
+{
+ int i;
+ char *p;
+ size_t size = 200;
+ struct kmem_cache *cache;
+
+ cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL);
+ if (!cache) {
+ pr_err("Cache allocation failed\n");
+ return;
+ }
+
+ pr_info("allocate memcg accounted object\n");
+ /*
+ * Several allocations with a delay to allow for lazy per memcg kmem
+ * cache creation.
+ */
+ for (i = 0; i < 5; i++) {
+ p = kmem_cache_alloc(cache, GFP_KERNEL);
+ if (!p) {
+ pr_err("Allocation failed\n");
+ goto free_cache;
+ }
+ kmem_cache_free(cache, p);
+ msleep(100);
+ }
+
+free_cache:
+ kmem_cache_destroy(cache);
+}
+
static char global_array[10];
static noinline void __init kasan_global_oob(void)
@@ -460,6 +493,7 @@ static int __init kmalloc_tests_init(void)
kmalloc_uaf_memset();
kmalloc_uaf2();
kmem_cache_oob();
+ memcg_accounted_kmem_cache();
kasan_stack_oob();
kasan_global_oob();
ksize_unpoisons_memory();
diff --git a/lib/test_sort.c b/lib/test_sort.c
new file mode 100644
index 000000000000..4db3911db50a
--- /dev/null
+++ b/lib/test_sort.c
@@ -0,0 +1,44 @@
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+
+/*
+ * A simple boot-time regression test
+ * License: GPL
+ */
+
+#define TEST_LEN 1000
+
+static int __init cmpint(const void *a, const void *b)
+{
+ return *(int *)a - *(int *)b;
+}
+
+static int __init test_sort_init(void)
+{
+ int *a, i, r = 1, err = -ENOMEM;
+
+ a = kmalloc_array(TEST_LEN, sizeof(*a), GFP_KERNEL);
+ if (!a)
+ return err;
+
+ for (i = 0; i < TEST_LEN; i++) {
+ r = (r * 725861) % 6599;
+ a[i] = r;
+ }
+
+ sort(a, TEST_LEN, sizeof(*a), cmpint, NULL);
+
+ err = -EINVAL;
+ for (i = 0; i < TEST_LEN-1; i++)
+ if (a[i] > a[i+1]) {
+ pr_err("test has failed\n");
+ goto exit;
+ }
+ err = 0;
+ pr_info("test passed\n");
+exit:
+ kfree(a);
+ return err;
+}
+subsys_initcall(test_sort_init);
diff --git a/mm/Kconfig b/mm/Kconfig
index 9b8fccb969dc..4aff67e042f1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -690,6 +690,7 @@ config ZONE_DEVICE
depends on MEMORY_HOTREMOVE
depends on SPARSEMEM_VMEMMAP
depends on X86_64 #arch_add_memory() comprehends device memory
+ select RADIX_TREE_MULTIORDER
help
Device memory hotplug support allows for establishing pmem,
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index afcc550877ff..3e5eada506df 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -90,3 +90,10 @@ config DEBUG_PAGE_REF
careful when enabling this feature because it adds about 30 KB to the
kernel code. However the runtime performance overhead is virtually
nil until the tracepoints are actually enabled.
+
+config DEBUG_RODATA_TEST
+ bool "Testcase for the marking rodata read-only"
+ depends on DEBUG_RODATA
+ ---help---
+ This option enables a testcase for the setting rodata read-only.
+
diff --git a/mm/Makefile b/mm/Makefile
index 295bd7a9f76b..026f6a828a50 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,8 +23,10 @@ KCOV_INSTRUMENT_vmstat.o := n
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
- mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
- vmalloc.o pagewalk.o pgtable-generic.o
+ mlock.o mmap.o mprotect.o mremap.o msync.o \
+ page_vma_mapped.o pagewalk.o pgtable-generic.o \
+ rmap.o vmalloc.o
+
ifdef CONFIG_CROSS_MEMORY_ATTACH
mmu-$(CONFIG_MMU) += process_vm_access.o
@@ -35,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
mm_init.o mmu_context.o percpu.o slab_common.o \
- compaction.o vmacache.o \
+ compaction.o vmacache.o swap_slots.o \
interval_tree.o list_lru.o workingset.o \
debug.o $(mmu-y)
@@ -83,6 +85,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
+obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o
obj-$(CONFIG_PAGE_OWNER) += page_owner.o
obj-$(CONFIG_CLEANCACHE) += cleancache.o
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 39ce616a9d71..6d861d090e9f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -411,8 +411,8 @@ retry:
while (*node != NULL) {
parent = *node;
- congested = container_of(parent, struct bdi_writeback_congested,
- rb_node);
+ congested = rb_entry(parent, struct bdi_writeback_congested,
+ rb_node);
if (congested->blkcg_id < blkcg_id)
node = &parent->rb_left;
else if (congested->blkcg_id > blkcg_id)
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e8a55a3c9feb..9fedb27c6451 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -53,7 +53,7 @@ early_param("bootmem_debug", bootmem_debug_setup);
static unsigned long __init bootmap_bytes(unsigned long pages)
{
- unsigned long bytes = DIV_ROUND_UP(pages, 8);
+ unsigned long bytes = DIV_ROUND_UP(pages, BITS_PER_BYTE);
return ALIGN(bytes, sizeof(long));
}
diff --git a/mm/cma.c b/mm/cma.c
index 94b3460cd608..a6033e344430 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -348,6 +348,32 @@ err:
return ret;
}
+#ifdef CONFIG_CMA_DEBUG
+static void cma_debug_show_areas(struct cma *cma)
+{
+ unsigned long next_zero_bit, next_set_bit;
+ unsigned long start = 0;
+ unsigned int nr_zero, nr_total = 0;
+
+ mutex_lock(&cma->lock);
+ pr_info("number of available pages: ");
+ for (;;) {
+ next_zero_bit = find_next_zero_bit(cma->bitmap, cma->count, start);
+ if (next_zero_bit >= cma->count)
+ break;
+ next_set_bit = find_next_bit(cma->bitmap, cma->count, next_zero_bit);
+ nr_zero = next_set_bit - next_zero_bit;
+ pr_cont("%s%u@%lu", nr_total ? "+" : "", nr_zero, next_zero_bit);
+ nr_total += nr_zero;
+ start = next_zero_bit + nr_zero;
+ }
+ pr_cont("=> %u free of %lu total pages\n", nr_total, cma->count);
+ mutex_unlock(&cma->lock);
+}
+#else
+static inline void cma_debug_show_areas(struct cma *cma) { }
+#endif
+
/**
* cma_alloc() - allocate pages from contiguous area
* @cma: Contiguous memory region for which the allocation is performed.
@@ -357,14 +383,15 @@ err:
* This function allocates part of contiguous memory on specific
* contiguous memory area.
*/
-struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
+struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
+ gfp_t gfp_mask)
{
unsigned long mask, offset;
unsigned long pfn = -1;
unsigned long start = 0;
unsigned long bitmap_maxno, bitmap_no, bitmap_count;
struct page *page = NULL;
- int ret;
+ int ret = -ENOMEM;
if (!cma || !cma->count)
return NULL;
@@ -402,7 +429,8 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
mutex_lock(&cma_mutex);
- ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+ ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
+ gfp_mask);
mutex_unlock(&cma_mutex);
if (ret == 0) {
page = pfn_to_page(pfn);
@@ -421,6 +449,12 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
trace_cma_alloc(pfn, page, count, align);
+ if (ret) {
+ pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n",
+ __func__, count, ret);
+ cma_debug_show_areas(cma);
+ }
+
pr_debug("%s(): returned %p\n", __func__, page);
return page;
}
diff --git a/mm/cma_debug.c b/mm/cma_debug.c
index f8e4b60db167..ffc0c3d0ae64 100644
--- a/mm/cma_debug.c
+++ b/mm/cma_debug.c
@@ -138,7 +138,7 @@ static int cma_alloc_mem(struct cma *cma, int count)
if (!mem)
return -ENOMEM;
- p = cma_alloc(cma, count, 0);
+ p = cma_alloc(cma, count, 0, GFP_KERNEL);
if (!p) {
kfree(mem);
return -ENOMEM;
diff --git a/mm/compaction.c b/mm/compaction.c
index 949198d01260..0fdfde016ee2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -548,7 +548,7 @@ isolate_fail:
if (blockpfn == end_pfn)
update_pageblock_skip(cc, valid_page, total_isolated, false);
- count_compact_events(COMPACTFREE_SCANNED, nr_scanned);
+ cc->total_free_scanned += nr_scanned;
if (total_isolated)
count_compact_events(COMPACTISOLATED, total_isolated);
return total_isolated;
@@ -802,7 +802,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
locked = false;
}
- if (isolate_movable_page(page, isolate_mode))
+ if (!isolate_movable_page(page, isolate_mode))
goto isolate_success;
}
@@ -931,7 +931,7 @@ isolate_fail:
trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
nr_scanned, nr_isolated);
- count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned);
+ cc->total_migrate_scanned += nr_scanned;
if (nr_isolated)
count_compact_events(COMPACTISOLATED, nr_isolated);
@@ -1631,6 +1631,9 @@ out:
zone->compact_cached_free_pfn = free_pfn;
}
+ count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
+ count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
+
trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
cc->free_pfn, end_pfn, sync, ret);
@@ -1645,6 +1648,8 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
struct compact_control cc = {
.nr_freepages = 0,
.nr_migratepages = 0,
+ .total_migrate_scanned = 0,
+ .total_free_scanned = 0,
.order = order,
.gfp_mask = gfp_mask,
.zone = zone,
@@ -1757,6 +1762,8 @@ static void compact_node(int nid)
struct zone *zone;
struct compact_control cc = {
.order = -1,
+ .total_migrate_scanned = 0,
+ .total_free_scanned = 0,
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
.whole_zone = true,
@@ -1883,6 +1890,8 @@ static void kcompactd_do_work(pg_data_t *pgdat)
struct zone *zone;
struct compact_control cc = {
.order = pgdat->kcompactd_max_order,
+ .total_migrate_scanned = 0,
+ .total_free_scanned = 0,
.classzone_idx = pgdat->kcompactd_classzone_idx,
.mode = MIGRATE_SYNC_LIGHT,
.ignore_skip_hint = true,
@@ -1891,7 +1900,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
};
trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
cc.classzone_idx);
- count_vm_event(KCOMPACTD_WAKE);
+ count_compact_event(KCOMPACTD_WAKE);
for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) {
int status;
@@ -1909,6 +1918,8 @@ static void kcompactd_do_work(pg_data_t *pgdat)
cc.nr_freepages = 0;
cc.nr_migratepages = 0;
+ cc.total_migrate_scanned = 0;
+ cc.total_free_scanned = 0;
cc.zone = zone;
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -1927,6 +1938,11 @@ static void kcompactd_do_work(pg_data_t *pgdat)
defer_compaction(zone, cc.order);
}
+ count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
+ cc.total_migrate_scanned);
+ count_compact_events(KCOMPACTD_FREE_SCANNED,
+ cc.total_free_scanned);
+
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
}
@@ -1950,6 +1966,13 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
if (pgdat->kcompactd_max_order < order)
pgdat->kcompactd_max_order = order;
+ /*
+ * Pairs with implicit barrier in wait_event_freezable()
+ * such that wakeups are not missed in the lockless
+ * waitqueue_active() call.
+ */
+ smp_acquire__after_ctrl_dep();
+
if (pgdat->kcompactd_classzone_idx > classzone_idx)
pgdat->kcompactd_classzone_idx = classzone_idx;
diff --git a/mm/filemap.c b/mm/filemap.c
index 3f9afded581b..2ba46f410c7c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -788,7 +788,7 @@ static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void
return autoremove_wake_function(wait, mode, sync, key);
}
-void wake_up_page_bit(struct page *page, int bit_nr)
+static void wake_up_page_bit(struct page *page, int bit_nr)
{
wait_queue_head_t *q = page_waitqueue(page);
struct wait_page_key key;
@@ -821,7 +821,13 @@ void wake_up_page_bit(struct page *page, int bit_nr)
}
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(wake_up_page_bit);
+
+static void wake_up_page(struct page *page, int bit)
+{
+ if (!PageWaiters(page))
+ return;
+ wake_up_page_bit(page, bit);
+}
static inline int wait_on_page_bit_common(wait_queue_head_t *q,
struct page *page, int bit_nr, int state, bool lock)
@@ -1013,7 +1019,7 @@ EXPORT_SYMBOL_GPL(page_endio);
/**
* __lock_page - get a lock on the page, assuming we need to sleep to get it
- * @page: the page to lock
+ * @__page: the page to lock
*/
void __lock_page(struct page *__page)
{
@@ -2163,7 +2169,6 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
/**
* filemap_fault - read in file data for page fault handling
- * @vma: vma in which the fault was taken
* @vmf: struct vm_fault containing details of the fault
*
* filemap_fault() is invoked via the vma operations vector for a
@@ -2185,10 +2190,10 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
*
* We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
*/
-int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int filemap_fault(struct vm_fault *vmf)
{
int error;
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct address_space *mapping = file->f_mapping;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
@@ -2210,12 +2215,12 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
* We found the page, so try async readahead before
* waiting for the lock.
*/
- do_async_mmap_readahead(vma, ra, file, page, offset);
+ do_async_mmap_readahead(vmf->vma, ra, file, page, offset);
} else if (!page) {
/* No page in the page cache at all */
- do_sync_mmap_readahead(vma, ra, file, offset);
+ do_sync_mmap_readahead(vmf->vma, ra, file, offset);
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
ret = VM_FAULT_MAJOR;
retry_find:
page = find_get_page(mapping, offset);
@@ -2223,7 +2228,7 @@ retry_find:
goto no_cached_page;
}
- if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
+ if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
put_page(page);
return ret | VM_FAULT_RETRY;
}
@@ -2390,14 +2395,14 @@ next:
}
EXPORT_SYMBOL(filemap_map_pages);
-int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int filemap_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int ret = VM_FAULT_LOCKED;
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
lock_page(page);
if (page->mapping != inode->i_mapping) {
unlock_page(page);
diff --git a/mm/gup.c b/mm/gup.c
index 55315555489d..1e67461b2733 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -253,6 +253,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
return page;
return no_page_table(vma, flags);
}
+ if (pud_devmap(*pud)) {
+ ptl = pud_lock(mm, pud);
+ page = follow_devmap_pud(vma, address, pud, flags);
+ spin_unlock(ptl);
+ if (page)
+ return page;
+ }
if (unlikely(pud_bad(*pud)))
return no_page_table(vma, flags);
@@ -572,7 +579,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
- gup_flags);
+ gup_flags, nonblocking);
continue;
}
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5f3ad65c85de..efddd02141a8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -142,42 +142,6 @@ static struct shrinker huge_zero_page_shrinker = {
};
#ifdef CONFIG_SYSFS
-
-static ssize_t triple_flag_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count,
- enum transparent_hugepage_flag enabled,
- enum transparent_hugepage_flag deferred,
- enum transparent_hugepage_flag req_madv)
-{
- if (!memcmp("defer", buf,
- min(sizeof("defer")-1, count))) {
- if (enabled == deferred)
- return -EINVAL;
- clear_bit(enabled, &transparent_hugepage_flags);
- clear_bit(req_madv, &transparent_hugepage_flags);
- set_bit(deferred, &transparent_hugepage_flags);
- } else if (!memcmp("always", buf,
- min(sizeof("always")-1, count))) {
- clear_bit(deferred, &transparent_hugepage_flags);
- clear_bit(req_madv, &transparent_hugepage_flags);
- set_bit(enabled, &transparent_hugepage_flags);
- } else if (!memcmp("madvise", buf,
- min(sizeof("madvise")-1, count))) {
- clear_bit(enabled, &transparent_hugepage_flags);
- clear_bit(deferred, &transparent_hugepage_flags);
- set_bit(req_madv, &transparent_hugepage_flags);
- } else if (!memcmp("never", buf,
- min(sizeof("never")-1, count))) {
- clear_bit(enabled, &transparent_hugepage_flags);
- clear_bit(req_madv, &transparent_hugepage_flags);
- clear_bit(deferred, &transparent_hugepage_flags);
- } else
- return -EINVAL;
-
- return count;
-}
-
static ssize_t enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -193,19 +157,28 @@ static ssize_t enabled_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
- ssize_t ret;
+ ssize_t ret = count;
- ret = triple_flag_store(kobj, attr, buf, count,
- TRANSPARENT_HUGEPAGE_FLAG,
- TRANSPARENT_HUGEPAGE_FLAG,
- TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);
+ if (!memcmp("always", buf,
+ min(sizeof("always")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags);
+ } else if (!memcmp("madvise", buf,
+ min(sizeof("madvise")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ } else if (!memcmp("never", buf,
+ min(sizeof("never")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ } else
+ ret = -EINVAL;
if (ret > 0) {
int err = start_stop_khugepaged();
if (err)
ret = err;
}
-
return ret;
}
static struct kobj_attribute enabled_attr =
@@ -241,32 +214,58 @@ ssize_t single_hugepage_flag_store(struct kobject *kobj,
return count;
}
-/*
- * Currently defrag only disables __GFP_NOWAIT for allocation. A blind
- * __GFP_REPEAT is too aggressive, it's never worth swapping tons of
- * memory just to allocate one more hugepage.
- */
static ssize_t defrag_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "[always] defer madvise never\n");
+ return sprintf(buf, "[always] defer defer+madvise madvise never\n");
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "always [defer] madvise never\n");
- else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "always defer [madvise] never\n");
- else
- return sprintf(buf, "always defer madvise [never]\n");
-
+ return sprintf(buf, "always [defer] defer+madvise madvise never\n");
+ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
+ return sprintf(buf, "always defer [defer+madvise] madvise never\n");
+ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
+ return sprintf(buf, "always defer defer+madvise [madvise] never\n");
+ return sprintf(buf, "always defer defer+madvise madvise [never]\n");
}
+
static ssize_t defrag_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
- return triple_flag_store(kobj, attr, buf, count,
- TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
- TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
- TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG);
+ if (!memcmp("always", buf,
+ min(sizeof("always")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+ } else if (!memcmp("defer", buf,
+ min(sizeof("defer")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+ } else if (!memcmp("defer+madvise", buf,
+ min(sizeof("defer+madvise")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+ } else if (!memcmp("madvise", buf,
+ min(sizeof("madvise")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ } else if (!memcmp("never", buf,
+ min(sizeof("never")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ } else
+ return -EINVAL;
+
+ return count;
}
static struct kobj_attribute defrag_attr =
__ATTR(defrag, 0644, defrag_show, defrag_store);
@@ -612,25 +611,28 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
}
/*
- * If THP defrag is set to always then directly reclaim/compact as necessary
- * If set to defer then do only background reclaim/compact and defer to khugepaged
- * If set to madvise and the VMA is flagged then directly reclaim/compact
- * When direct reclaim/compact is allowed, don't retry except for flagged VMA's
+ * always: directly stall for all thp allocations
+ * defer: wake kswapd and fail if not immediately available
+ * defer+madvise: wake kswapd and directly stall for MADV_HUGEPAGE, otherwise
+ * fail if not immediately available
+ * madvise: directly stall for MADV_HUGEPAGE, otherwise fail if not immediately
+ * available
+ * never: never stall for any thp allocation
*/
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
{
- bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+ const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
- if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
- &transparent_hugepage_flags) && vma_madvised)
- return GFP_TRANSHUGE;
- else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
- &transparent_hugepage_flags))
- return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
- else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
- &transparent_hugepage_flags))
+ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
-
+ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
+ return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
+ return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+ __GFP_KSWAPD_RECLAIM);
+ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
+ return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+ 0);
return GFP_TRANSHUGE_LIGHT;
}
@@ -755,6 +757,60 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
}
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_flags & VM_WRITE))
+ pud = pud_mkwrite(pud);
+ return pud;
+}
+
+static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, pfn_t pfn, pgprot_t prot, bool write)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pud_t entry;
+ spinlock_t *ptl;
+
+ ptl = pud_lock(mm, pud);
+ entry = pud_mkhuge(pfn_t_pud(pfn, prot));
+ if (pfn_t_devmap(pfn))
+ entry = pud_mkdevmap(entry);
+ if (write) {
+ entry = pud_mkyoung(pud_mkdirty(entry));
+ entry = maybe_pud_mkwrite(entry, vma);
+ }
+ set_pud_at(mm, addr, pud, entry);
+ update_mmu_cache_pud(vma, addr, pud);
+ spin_unlock(ptl);
+}
+
+int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, pfn_t pfn, bool write)
+{
+ pgprot_t pgprot = vma->vm_page_prot;
+ /*
+ * If we had pud_special, we could avoid all these restrictions,
+ * but we need to be consistent with PTEs and architectures that
+ * can't support a 'special' bit.
+ */
+ BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
+ BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
+ (VM_PFNMAP|VM_MIXEDMAP));
+ BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
+ BUG_ON(!pfn_t_devmap(pfn));
+
+ if (addr < vma->vm_start || addr >= vma->vm_end)
+ return VM_FAULT_SIGBUS;
+
+ track_pfn_insert(vma, &pgprot, pfn);
+
+ insert_pfn_pud(vma, addr, pud, pfn, pgprot, write);
+ return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+
static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd)
{
@@ -885,6 +941,123 @@ out:
return ret;
}
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud)
+{
+ pud_t _pud;
+
+ /*
+ * We should set the dirty bit only for FOLL_WRITE but for now
+ * the dirty bit in the pud is meaningless. And if the dirty
+ * bit will become meaningful and we'll only set it with
+ * FOLL_WRITE, an atomic set_bit will be required on the pud to
+ * set the young bit, instead of the current set_pud_at.
+ */
+ _pud = pud_mkyoung(pud_mkdirty(*pud));
+ if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK,
+ pud, _pud, 1))
+ update_mmu_cache_pud(vma, addr, pud);
+}
+
+struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, int flags)
+{
+ unsigned long pfn = pud_pfn(*pud);
+ struct mm_struct *mm = vma->vm_mm;
+ struct dev_pagemap *pgmap;
+ struct page *page;
+
+ assert_spin_locked(pud_lockptr(mm, pud));
+
+ if (flags & FOLL_WRITE && !pud_write(*pud))
+ return NULL;
+
+ if (pud_present(*pud) && pud_devmap(*pud))
+ /* pass */;
+ else
+ return NULL;
+
+ if (flags & FOLL_TOUCH)
+ touch_pud(vma, addr, pud);
+
+ /*
+ * device mapped pages can only be returned if the
+ * caller will manage the page reference count.
+ */
+ if (!(flags & FOLL_GET))
+ return ERR_PTR(-EEXIST);
+
+ pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
+ pgmap = get_dev_pagemap(pfn, NULL);
+ if (!pgmap)
+ return ERR_PTR(-EFAULT);
+ page = pfn_to_page(pfn);
+ get_page(page);
+ put_dev_pagemap(pgmap);
+
+ return page;
+}
+
+int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+ struct vm_area_struct *vma)
+{
+ spinlock_t *dst_ptl, *src_ptl;
+ pud_t pud;
+ int ret;
+
+ dst_ptl = pud_lock(dst_mm, dst_pud);
+ src_ptl = pud_lockptr(src_mm, src_pud);
+ spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+
+ ret = -EAGAIN;
+ pud = *src_pud;
+ if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud)))
+ goto out_unlock;
+
+ /*
+ * When page table lock is held, the huge zero pud should not be
+ * under splitting since we don't split the page itself, only pud to
+ * a page table.
+ */
+ if (is_huge_zero_pud(pud)) {
+ /* No huge zero pud yet */
+ }
+
+ pudp_set_wrprotect(src_mm, addr, src_pud);
+ pud = pud_mkold(pud_wrprotect(pud));
+ set_pud_at(dst_mm, addr, dst_pud, pud);
+
+ ret = 0;
+out_unlock:
+ spin_unlock(src_ptl);
+ spin_unlock(dst_ptl);
+ return ret;
+}
+
+void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
+{
+ pud_t entry;
+ unsigned long haddr;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+ vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud);
+ if (unlikely(!pud_same(*vmf->pud, orig_pud)))
+ goto unlock;
+
+ entry = pud_mkyoung(orig_pud);
+ if (write)
+ entry = pud_mkdirty(entry);
+ haddr = vmf->address & HPAGE_PUD_MASK;
+ if (pudp_set_access_flags(vmf->vma, haddr, vmf->pud, entry, write))
+ update_mmu_cache_pud(vmf->vma, vmf->address, vmf->pud);
+
+unlock:
+ spin_unlock(vmf->ptl);
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+
void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
{
pmd_t entry;
@@ -1599,6 +1772,84 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
return NULL;
}
+/*
+ * Returns true if a given pud maps a thp, false otherwise.
+ *
+ * Note that if it returns true, this routine returns without unlocking page
+ * table lock. So callers must unlock it.
+ */
+spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma)
+{
+ spinlock_t *ptl;
+
+ ptl = pud_lock(vma->vm_mm, pud);
+ if (likely(pud_trans_huge(*pud) || pud_devmap(*pud)))
+ return ptl;
+ spin_unlock(ptl);
+ return NULL;
+}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ pud_t *pud, unsigned long addr)
+{
+ pud_t orig_pud;
+ spinlock_t *ptl;
+
+ ptl = __pud_trans_huge_lock(pud, vma);
+ if (!ptl)
+ return 0;
+ /*
+ * For architectures like ppc64 we look at deposited pgtable
+ * when calling pudp_huge_get_and_clear. So do the
+ * pgtable_trans_huge_withdraw after finishing pudp related
+ * operations.
+ */
+ orig_pud = pudp_huge_get_and_clear_full(tlb->mm, addr, pud,
+ tlb->fullmm);
+ tlb_remove_pud_tlb_entry(tlb, pud, addr);
+ if (vma_is_dax(vma)) {
+ spin_unlock(ptl);
+ /* No zero page support yet */
+ } else {
+ /* No support for anonymous PUD pages yet */
+ BUG();
+ }
+ return 1;
+}
+
+static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long haddr)
+{
+ VM_BUG_ON(haddr & ~HPAGE_PUD_MASK);
+ VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
+ VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
+ VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud));
+
+ count_vm_event(THP_SPLIT_PMD);
+
+ pudp_huge_clear_flush_notify(vma, haddr, pud);
+}
+
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long address)
+{
+ spinlock_t *ptl;
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long haddr = address & HPAGE_PUD_MASK;
+
+ mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PUD_SIZE);
+ ptl = pud_lock(mm, pud);
+ if (unlikely(!pud_trans_huge(*pud) && !pud_devmap(*pud)))
+ goto out;
+ __split_huge_pud_locked(vma, pud, haddr);
+
+out:
+ spin_unlock(ptl);
+ mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE);
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+
static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
unsigned long haddr, pmd_t *pmd)
{
@@ -1855,32 +2106,27 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void freeze_page(struct page *page)
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
- TTU_RMAP_LOCKED;
- int i, ret;
+ TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+ int ret;
VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page))
ttu_flags |= TTU_MIGRATION;
- /* We only need TTU_SPLIT_HUGE_PMD once */
- ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD);
- for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
- /* Cut short if the page is unmapped */
- if (page_count(page) == 1)
- return;
-
- ret = try_to_unmap(page + i, ttu_flags);
- }
- VM_BUG_ON_PAGE(ret, page + i - 1);
+ ret = try_to_unmap(page, ttu_flags);
+ VM_BUG_ON_PAGE(ret, page);
}
static void unfreeze_page(struct page *page)
{
int i;
-
- for (i = 0; i < HPAGE_PMD_NR; i++)
- remove_migration_ptes(page + i, page + i, true);
+ if (PageTransHuge(page)) {
+ remove_migration_ptes(page, page, true);
+ } else {
+ for (i = 0; i < HPAGE_PMD_NR; i++)
+ remove_migration_ptes(page + i, page + i, true);
+ }
}
static void __split_huge_page_tail(struct page *head, int tail,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c7025c132670..d0d1d083c432 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -32,6 +32,7 @@
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/node.h>
+#include <linux/userfaultfd_k.h>
#include "internal.h"
int hugepages_treat_as_movable;
@@ -1051,7 +1052,8 @@ static int __alloc_gigantic_page(unsigned long start_pfn,
unsigned long nr_pages)
{
unsigned long end_pfn = start_pfn + nr_pages;
- return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+ return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
+ GFP_KERNEL);
}
static bool pfn_range_valid_gigantic(struct zone *z,
@@ -3141,7 +3143,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
* hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
* this far.
*/
-static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int hugetlb_vm_op_fault(struct vm_fault *vmf)
{
BUG();
return 0;
@@ -3680,6 +3682,38 @@ retry:
size = i_size_read(mapping->host) >> huge_page_shift(h);
if (idx >= size)
goto out;
+
+ /*
+ * Check for page in userfault range
+ */
+ if (userfaultfd_missing(vma)) {
+ u32 hash;
+ struct vm_fault vmf = {
+ .vma = vma,
+ .address = address,
+ .flags = flags,
+ /*
+ * Hard to debug if it ends up being
+ * used by a callee that assumes
+ * something about the other
+ * uninitialized fields... same as in
+ * memory.c
+ */
+ };
+
+ /*
+ * hugetlb_fault_mutex must be dropped before
+ * handling userfault. Reacquire after handling
+ * fault to make calling code simpler.
+ */
+ hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping,
+ idx, address);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ ret = handle_userfault(&vmf, VM_UFFD_MISSING);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ goto out;
+ }
+
page = alloc_huge_page(vma, address, 0);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
@@ -3948,10 +3982,91 @@ out_mutex:
return ret;
}
+/*
+ * Used by userfaultfd UFFDIO_COPY. Based on mcopy_atomic_pte with
+ * modifications for huge pages.
+ */
+int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
+ pte_t *dst_pte,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep)
+{
+ struct hstate *h = hstate_vma(dst_vma);
+ pte_t _dst_pte;
+ spinlock_t *ptl;
+ int ret;
+ struct page *page;
+
+ if (!*pagep) {
+ ret = -ENOMEM;
+ page = alloc_huge_page(dst_vma, dst_addr, 0);
+ if (IS_ERR(page))
+ goto out;
+
+ ret = copy_huge_page_from_user(page,
+ (const void __user *) src_addr,
+ pages_per_huge_page(h), false);
+
+ /* fallback to copy_from_user outside mmap_sem */
+ if (unlikely(ret)) {
+ ret = -EFAULT;
+ *pagep = page;
+ /* don't free the page */
+ goto out;
+ }
+ } else {
+ page = *pagep;
+ *pagep = NULL;
+ }
+
+ /*
+ * The memory barrier inside __SetPageUptodate makes sure that
+ * preceding stores to the page contents become visible before
+ * the set_pte_at() write.
+ */
+ __SetPageUptodate(page);
+ set_page_huge_active(page);
+
+ ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
+ spin_lock(ptl);
+
+ ret = -EEXIST;
+ if (!huge_pte_none(huge_ptep_get(dst_pte)))
+ goto out_release_unlock;
+
+ ClearPagePrivate(page);
+ hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
+
+ _dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE);
+ if (dst_vma->vm_flags & VM_WRITE)
+ _dst_pte = huge_pte_mkdirty(_dst_pte);
+ _dst_pte = pte_mkyoung(_dst_pte);
+
+ set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+ (void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
+ dst_vma->vm_flags & VM_WRITE);
+ hugetlb_count_add(pages_per_huge_page(h), dst_mm);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(dst_vma, dst_addr, dst_pte);
+
+ spin_unlock(ptl);
+ ret = 0;
+out:
+ return ret;
+out_release_unlock:
+ spin_unlock(ptl);
+ put_page(page);
+ goto out;
+}
+
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, unsigned long *nr_pages,
- long i, unsigned int flags)
+ long i, unsigned int flags, int *nonblocking)
{
unsigned long pfn_offset;
unsigned long vaddr = *position;
@@ -4014,16 +4129,43 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
((flags & FOLL_WRITE) &&
!huge_pte_write(huge_ptep_get(pte)))) {
int ret;
+ unsigned int fault_flags = 0;
if (pte)
spin_unlock(ptl);
- ret = hugetlb_fault(mm, vma, vaddr,
- (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
- if (!(ret & VM_FAULT_ERROR))
- continue;
-
- remainder = 0;
- break;
+ if (flags & FOLL_WRITE)
+ fault_flags |= FAULT_FLAG_WRITE;
+ if (nonblocking)
+ fault_flags |= FAULT_FLAG_ALLOW_RETRY;
+ if (flags & FOLL_NOWAIT)
+ fault_flags |= FAULT_FLAG_ALLOW_RETRY |
+ FAULT_FLAG_RETRY_NOWAIT;
+ if (flags & FOLL_TRIED) {
+ VM_WARN_ON_ONCE(fault_flags &
+ FAULT_FLAG_ALLOW_RETRY);
+ fault_flags |= FAULT_FLAG_TRIED;
+ }
+ ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
+ if (ret & VM_FAULT_ERROR) {
+ remainder = 0;
+ break;
+ }
+ if (ret & VM_FAULT_RETRY) {
+ if (nonblocking)
+ *nonblocking = 0;
+ *nr_pages = 0;
+ /*
+ * VM_FAULT_RETRY must not return an
+ * error, it will return zero
+ * instead.
+ *
+ * No need to update "position" as the
+ * caller will not check it after
+ * *nr_pages is set to 0.
+ */
+ return i;
+ }
+ continue;
}
pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
@@ -4052,6 +4194,11 @@ same_page:
spin_unlock(ptl);
}
*nr_pages = remainder;
+ /*
+ * setting position is actually required only if remainder is
+ * not zero but it's faster not to add a "if (remainder)"
+ * branch.
+ */
*position = vaddr;
return i ? i : -EFAULT;
diff --git a/mm/internal.h b/mm/internal.h
index 7aa2ea0a8623..ccfc2a2969f4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -43,6 +43,11 @@ int do_swap_page(struct vm_fault *vmf);
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
+static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma)
+{
+ return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
+}
+
void unmap_page_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
@@ -133,9 +138,9 @@ struct alloc_context {
* Assumption: *_mem_map is contiguous at least up to MAX_ORDER
*/
static inline unsigned long
-__find_buddy_index(unsigned long page_idx, unsigned int order)
+__find_buddy_pfn(unsigned long page_pfn, unsigned int order)
{
- return page_idx ^ (1 << order);
+ return page_pfn ^ (1 << order);
}
extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
@@ -175,6 +180,8 @@ struct compact_control {
struct list_head migratepages; /* List of pages being migrated */
unsigned long nr_freepages; /* Number of isolated free pages */
unsigned long nr_migratepages; /* Number of pages to migrate */
+ unsigned long total_migrate_scanned;
+ unsigned long total_free_scanned;
unsigned long free_pfn; /* isolate_freepages search base */
unsigned long migrate_pfn; /* isolate_migratepages search base */
unsigned long last_migrated_pfn;/* Not yet flushed page being freed */
@@ -328,12 +335,15 @@ __vma_address(struct page *page, struct vm_area_struct *vma)
static inline unsigned long
vma_address(struct page *page, struct vm_area_struct *vma)
{
- unsigned long address = __vma_address(page, vma);
+ unsigned long start, end;
+
+ start = __vma_address(page, vma);
+ end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
/* page should be within @vma mapping range */
- VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
+ VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
- return address;
+ return max(start, vma->vm_start);
}
#else /* !CONFIG_MMU */
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 82e27d52d67a..a25dc7e82593 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -437,7 +437,7 @@ void kasan_cache_shrink(struct kmem_cache *cache)
quarantine_remove_cache(cache);
}
-void kasan_cache_destroy(struct kmem_cache *cache)
+void kasan_cache_shutdown(struct kmem_cache *cache)
{
quarantine_remove_cache(cache);
}
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index dae929c02bbb..6f1ed1630873 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -274,6 +274,7 @@ static void per_cpu_remove_cache(void *arg)
qlist_free_all(&to_free, cache);
}
+/* Free all quarantined objects belonging to cache. */
void quarantine_remove_cache(struct kmem_cache *cache)
{
unsigned long flags, i;
diff --git a/mm/ksm.c b/mm/ksm.c
index 9ae6011a41f8..8960f6ecbc12 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -223,6 +223,12 @@ static unsigned int ksm_thread_pages_to_scan = 100;
/* Milliseconds ksmd should sleep between batches */
static unsigned int ksm_thread_sleep_millisecs = 20;
+/* Checksum of an empty (zeroed) page */
+static unsigned int zero_checksum __read_mostly;
+
+/* Whether to merge empty (zeroed) pages with actual zero pages */
+static bool ksm_use_zero_pages __read_mostly;
+
#ifdef CONFIG_NUMA
/* Zeroed when merging across nodes is not allowed */
static unsigned int ksm_merge_across_nodes = 1;
@@ -850,33 +856,35 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
pte_t *orig_pte)
{
struct mm_struct *mm = vma->vm_mm;
- unsigned long addr;
- pte_t *ptep;
- spinlock_t *ptl;
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ };
int swapped;
int err = -EFAULT;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
- addr = page_address_in_vma(page, vma);
- if (addr == -EFAULT)
+ pvmw.address = page_address_in_vma(page, vma);
+ if (pvmw.address == -EFAULT)
goto out;
BUG_ON(PageTransCompound(page));
- mmun_start = addr;
- mmun_end = addr + PAGE_SIZE;
+ mmun_start = pvmw.address;
+ mmun_end = pvmw.address + PAGE_SIZE;
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
- ptep = page_check_address(page, mm, addr, &ptl, 0);
- if (!ptep)
+ if (!page_vma_mapped_walk(&pvmw))
goto out_mn;
+ if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
+ goto out_unlock;
- if (pte_write(*ptep) || pte_dirty(*ptep)) {
+ if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte)) {
pte_t entry;
swapped = PageSwapCache(page);
- flush_cache_page(vma, addr, page_to_pfn(page));
+ flush_cache_page(vma, pvmw.address, page_to_pfn(page));
/*
* Ok this is tricky, when get_user_pages_fast() run it doesn't
* take any lock, therefore the check that we are going to make
@@ -886,25 +894,25 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
* this assure us that no O_DIRECT can happen after the check
* or in the middle of the check.
*/
- entry = ptep_clear_flush_notify(vma, addr, ptep);
+ entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
/*
* Check that no O_DIRECT or similar I/O is in progress on the
* page
*/
if (page_mapcount(page) + 1 + swapped != page_count(page)) {
- set_pte_at(mm, addr, ptep, entry);
+ set_pte_at(mm, pvmw.address, pvmw.pte, entry);
goto out_unlock;
}
if (pte_dirty(entry))
set_page_dirty(page);
entry = pte_mkclean(pte_wrprotect(entry));
- set_pte_at_notify(mm, addr, ptep, entry);
+ set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
}
- *orig_pte = *ptep;
+ *orig_pte = *pvmw.pte;
err = 0;
out_unlock:
- pte_unmap_unlock(ptep, ptl);
+ page_vma_mapped_walk_done(&pvmw);
out_mn:
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out:
@@ -926,6 +934,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
struct mm_struct *mm = vma->vm_mm;
pmd_t *pmd;
pte_t *ptep;
+ pte_t newpte;
spinlock_t *ptl;
unsigned long addr;
int err = -EFAULT;
@@ -950,12 +959,22 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
goto out_mn;
}
- get_page(kpage);
- page_add_anon_rmap(kpage, vma, addr, false);
+ /*
+ * No need to check ksm_use_zero_pages here: we can only have a
+ * zero_page here if ksm_use_zero_pages was enabled alreaady.
+ */
+ if (!is_zero_pfn(page_to_pfn(kpage))) {
+ get_page(kpage);
+ page_add_anon_rmap(kpage, vma, addr, false);
+ newpte = mk_pte(kpage, vma->vm_page_prot);
+ } else {
+ newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
+ vma->vm_page_prot));
+ }
flush_cache_page(vma, addr, pte_pfn(*ptep));
ptep_clear_flush_notify(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+ set_pte_at_notify(mm, addr, ptep, newpte);
page_remove_rmap(page, false);
if (!page_mapped(page))
@@ -1467,6 +1486,23 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
return;
}
+ /*
+ * Same checksum as an empty page. We attempt to merge it with the
+ * appropriate zero page if the user enabled this via sysfs.
+ */
+ if (ksm_use_zero_pages && (checksum == zero_checksum)) {
+ struct vm_area_struct *vma;
+
+ vma = find_mergeable_vma(rmap_item->mm, rmap_item->address);
+ err = try_to_merge_one_page(vma, page,
+ ZERO_PAGE(rmap_item->address));
+ /*
+ * In case of failure, the page was not really empty, so we
+ * need to continue. Otherwise we're done.
+ */
+ if (!err)
+ return;
+ }
tree_rmap_item =
unstable_tree_search_insert(rmap_item, page, &tree_page);
if (tree_rmap_item) {
@@ -2233,6 +2269,28 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
KSM_ATTR(merge_across_nodes);
#endif
+static ssize_t use_zero_pages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", ksm_use_zero_pages);
+}
+static ssize_t use_zero_pages_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err;
+ bool value;
+
+ err = kstrtobool(buf, &value);
+ if (err)
+ return -EINVAL;
+
+ ksm_use_zero_pages = value;
+
+ return count;
+}
+KSM_ATTR(use_zero_pages);
+
static ssize_t pages_shared_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -2290,6 +2348,7 @@ static struct attribute *ksm_attrs[] = {
#ifdef CONFIG_NUMA
&merge_across_nodes_attr.attr,
#endif
+ &use_zero_pages_attr.attr,
NULL,
};
@@ -2304,6 +2363,11 @@ static int __init ksm_init(void)
struct task_struct *ksm_thread;
int err;
+ /* The correct value depends on page size and endianness */
+ zero_checksum = calc_checksum(ZERO_PAGE(0));
+ /* Default to false for backwards compatibility */
+ ksm_use_zero_pages = false;
+
err = ksm_slab_init();
if (err)
goto out;
diff --git a/mm/madvise.c b/mm/madvise.c
index 0e3828eae9f8..11fc65f81ecd 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -10,6 +10,7 @@
#include <linux/syscalls.h>
#include <linux/mempolicy.h>
#include <linux/page-isolation.h>
+#include <linux/userfaultfd_k.h>
#include <linux/hugetlb.h>
#include <linux/falloc.h>
#include <linux/sched.h>
@@ -24,6 +25,8 @@
#include <asm/tlb.h>
+#include "internal.h"
+
/*
* Any behaviour which results in changes to the vma->vm_flags needs to
* take mmap_sem for writing. Others, which simply traverse vmas, need
@@ -89,14 +92,28 @@ static long madvise_behavior(struct vm_area_struct *vma,
case MADV_MERGEABLE:
case MADV_UNMERGEABLE:
error = ksm_madvise(vma, start, end, behavior, &new_flags);
- if (error)
+ if (error) {
+ /*
+ * madvise() returns EAGAIN if kernel resources, such as
+ * slab, are temporarily unavailable.
+ */
+ if (error == -ENOMEM)
+ error = -EAGAIN;
goto out;
+ }
break;
case MADV_HUGEPAGE:
case MADV_NOHUGEPAGE:
error = hugepage_madvise(vma, &new_flags, behavior);
- if (error)
+ if (error) {
+ /*
+ * madvise() returns EAGAIN if kernel resources, such as
+ * slab, are temporarily unavailable.
+ */
+ if (error == -ENOMEM)
+ error = -EAGAIN;
goto out;
+ }
break;
}
@@ -117,15 +134,37 @@ static long madvise_behavior(struct vm_area_struct *vma,
*prev = vma;
if (start != vma->vm_start) {
- error = split_vma(mm, vma, start, 1);
- if (error)
+ if (unlikely(mm->map_count >= sysctl_max_map_count)) {
+ error = -ENOMEM;
goto out;
+ }
+ error = __split_vma(mm, vma, start, 1);
+ if (error) {
+ /*
+ * madvise() returns EAGAIN if kernel resources, such as
+ * slab, are temporarily unavailable.
+ */
+ if (error == -ENOMEM)
+ error = -EAGAIN;
+ goto out;
+ }
}
if (end != vma->vm_end) {
- error = split_vma(mm, vma, end, 0);
- if (error)
+ if (unlikely(mm->map_count >= sysctl_max_map_count)) {
+ error = -ENOMEM;
goto out;
+ }
+ error = __split_vma(mm, vma, end, 0);
+ if (error) {
+ /*
+ * madvise() returns EAGAIN if kernel resources, such as
+ * slab, are temporarily unavailable.
+ */
+ if (error == -ENOMEM)
+ error = -EAGAIN;
+ goto out;
+ }
}
success:
@@ -133,10 +172,7 @@ success:
* vm_flags is protected by the mmap_sem held in write mode.
*/
vma->vm_flags = new_flags;
-
out:
- if (error == -ENOMEM)
- error = -EAGAIN;
return error;
}
@@ -473,10 +509,11 @@ static long madvise_dontneed(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
*prev = vma;
- if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+ if (!can_madv_dontneed_vma(vma))
return -EINVAL;
- zap_page_range(vma, start, end - start, NULL);
+ userfaultfd_remove(vma, prev, start, end);
+ zap_page_range(vma, start, end - start);
return 0;
}
@@ -516,6 +553,7 @@ static long madvise_remove(struct vm_area_struct *vma,
* mmap_sem.
*/
get_file(f);
+ userfaultfd_remove(vma, prev, start, end);
up_read(&current->mm->mmap_sem);
error = vfs_fallocate(f,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
diff --git a/mm/memblock.c b/mm/memblock.c
index 7608bc305936..b64b47803e52 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -35,15 +35,18 @@ struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
.memory.cnt = 1, /* empty dummy entry */
.memory.max = INIT_MEMBLOCK_REGIONS,
+ .memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
.reserved.cnt = 1, /* empty dummy entry */
.reserved.max = INIT_MEMBLOCK_REGIONS,
+ .reserved.name = "reserved",
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
.physmem.regions = memblock_physmem_init_regions,
.physmem.cnt = 1, /* empty dummy entry */
.physmem.max = INIT_PHYSMEM_REGIONS,
+ .physmem.name = "physmem",
#endif
.bottom_up = false,
@@ -64,18 +67,6 @@ ulong __init_memblock choose_memblock_flags(void)
return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
}
-/* inline so we don't get a warning when pr_debug is compiled out */
-static __init_memblock const char *
-memblock_type_name(struct memblock_type *type)
-{
- if (type == &memblock.memory)
- return "memory";
- else if (type == &memblock.reserved)
- return "reserved";
- else
- return "unknown";
-}
-
/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
{
@@ -402,12 +393,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
}
if (!addr) {
pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
- memblock_type_name(type), type->max, type->max * 2);
+ type->name, type->max, type->max * 2);
return -1;
}
memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]",
- memblock_type_name(type), type->max * 2, (u64)addr,
+ type->name, type->max * 2, (u64)addr,
(u64)addr + new_size - 1);
/*
@@ -611,10 +602,10 @@ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
{
- memblock_dbg("memblock_add: [%#016llx-%#016llx] flags %#02lx %pF\n",
- (unsigned long long)base,
- (unsigned long long)base + size - 1,
- 0UL, (void *)_RET_IP_);
+ phys_addr_t end = base + size - 1;
+
+ memblock_dbg("memblock_add: [%pa-%pa] %pF\n",
+ &base, &end, (void *)_RET_IP_);
return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0);
}
@@ -718,10 +709,10 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
{
- memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n",
- (unsigned long long)base,
- (unsigned long long)base + size - 1,
- (void *)_RET_IP_);
+ phys_addr_t end = base + size - 1;
+
+ memblock_dbg(" memblock_free: [%pa-%pa] %pF\n",
+ &base, &end, (void *)_RET_IP_);
kmemleak_free_part_phys(base, size);
return memblock_remove_range(&memblock.reserved, base, size);
@@ -729,10 +720,10 @@ int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
{
- memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n",
- (unsigned long long)base,
- (unsigned long long)base + size - 1,
- 0UL, (void *)_RET_IP_);
+ phys_addr_t end = base + size - 1;
+
+ memblock_dbg("memblock_reserve: [%pa-%pa] %pF\n",
+ &base, &end, (void *)_RET_IP_);
return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0);
}
@@ -1105,6 +1096,31 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
*out_nid = r->nid;
}
+unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
+ unsigned long max_pfn)
+{
+ struct memblock_type *type = &memblock.memory;
+ unsigned int right = type->cnt;
+ unsigned int mid, left = 0;
+ phys_addr_t addr = PFN_PHYS(pfn + 1);
+
+ do {
+ mid = (right + left) / 2;
+
+ if (addr < type->regions[mid].base)
+ right = mid;
+ else if (addr >= (type->regions[mid].base +
+ type->regions[mid].size))
+ left = mid + 1;
+ else {
+ /* addr is within the region, so pfn + 1 is valid */
+ return min(pfn + 1, max_pfn);
+ }
+ } while (left < right);
+
+ return min(PHYS_PFN(type->regions[right].base), max_pfn);
+}
+
/**
* memblock_set_node - set node ID on memblock regions
* @base: base of area to set node ID for
@@ -1202,8 +1218,8 @@ phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys
alloc = __memblock_alloc_base(size, align, max_addr);
if (alloc == 0)
- panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
- (unsigned long long) size, (unsigned long long) max_addr);
+ panic("ERROR: Failed to allocate %pa bytes below %pa.\n",
+ &size, &max_addr);
return alloc;
}
@@ -1274,18 +1290,17 @@ static void * __init memblock_virt_alloc_internal(
if (max_addr > memblock.current_limit)
max_addr = memblock.current_limit;
-
again:
alloc = memblock_find_in_range_node(size, align, min_addr, max_addr,
nid, flags);
- if (alloc)
+ if (alloc && !memblock_reserve(alloc, size))
goto done;
if (nid != NUMA_NO_NODE) {
alloc = memblock_find_in_range_node(size, align, min_addr,
max_addr, NUMA_NO_NODE,
flags);
- if (alloc)
+ if (alloc && !memblock_reserve(alloc, size))
goto done;
}
@@ -1303,7 +1318,6 @@ again:
return NULL;
done:
- memblock_reserve(alloc, size);
ptr = phys_to_virt(alloc);
memset(ptr, 0, size);
@@ -1615,8 +1629,7 @@ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size
if (idx == -1)
return 0;
- return memblock.memory.regions[idx].base <= base &&
- (memblock.memory.regions[idx].base +
+ return (memblock.memory.regions[idx].base +
memblock.memory.regions[idx].size) >= end;
}
@@ -1671,40 +1684,44 @@ phys_addr_t __init_memblock memblock_get_current_limit(void)
return memblock.current_limit;
}
-static void __init_memblock memblock_dump(struct memblock_type *type, char *name)
+static void __init_memblock memblock_dump(struct memblock_type *type)
{
- unsigned long long base, size;
+ phys_addr_t base, end, size;
unsigned long flags;
int idx;
struct memblock_region *rgn;
- pr_info(" %s.cnt = 0x%lx\n", name, type->cnt);
+ pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt);
for_each_memblock_type(type, rgn) {
char nid_buf[32] = "";
base = rgn->base;
size = rgn->size;
+ end = base + size - 1;
flags = rgn->flags;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
if (memblock_get_region_node(rgn) != MAX_NUMNODES)
snprintf(nid_buf, sizeof(nid_buf), " on node %d",
memblock_get_region_node(rgn));
#endif
- pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s flags: %#lx\n",
- name, idx, base, base + size - 1, size, nid_buf, flags);
+ pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#lx\n",
+ type->name, idx, &base, &end, &size, nid_buf, flags);
}
}
void __init_memblock __memblock_dump_all(void)
{
pr_info("MEMBLOCK configuration:\n");
- pr_info(" memory size = %#llx reserved size = %#llx\n",
- (unsigned long long)memblock.memory.total_size,
- (unsigned long long)memblock.reserved.total_size);
+ pr_info(" memory size = %pa reserved size = %pa\n",
+ &memblock.memory.total_size,
+ &memblock.reserved.total_size);
- memblock_dump(&memblock.memory, "memory");
- memblock_dump(&memblock.reserved, "reserved");
+ memblock_dump(&memblock.memory);
+ memblock_dump(&memblock.reserved);
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+ memblock_dump(&memblock.physmem);
+#endif
}
void __init memblock_allow_resize(void)
@@ -1727,19 +1744,14 @@ static int memblock_debug_show(struct seq_file *m, void *private)
struct memblock_type *type = m->private;
struct memblock_region *reg;
int i;
+ phys_addr_t end;
for (i = 0; i < type->cnt; i++) {
reg = &type->regions[i];
- seq_printf(m, "%4d: ", i);
- if (sizeof(phys_addr_t) == 4)
- seq_printf(m, "0x%08lx..0x%08lx\n",
- (unsigned long)reg->base,
- (unsigned long)(reg->base + reg->size - 1));
- else
- seq_printf(m, "0x%016llx..0x%016llx\n",
- (unsigned long long)reg->base,
- (unsigned long long)(reg->base + reg->size - 1));
+ end = reg->base + reg->size - 1;
+ seq_printf(m, "%4d: ", i);
+ seq_printf(m, "%pa..%pa\n", &reg->base, &end);
}
return 0;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b822e158b319..1fd6affcdde7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -317,6 +317,8 @@ void memcg_put_cache_ids(void)
DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
EXPORT_SYMBOL(memcg_kmem_enabled_key);
+struct workqueue_struct *memcg_kmem_cache_wq;
+
#endif /* !CONFIG_SLOB */
/**
@@ -2143,8 +2145,6 @@ struct memcg_kmem_cache_create_work {
struct work_struct work;
};
-static struct workqueue_struct *memcg_kmem_cache_create_wq;
-
static void memcg_kmem_cache_create_func(struct work_struct *w)
{
struct memcg_kmem_cache_create_work *cw =
@@ -2176,7 +2176,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
cw->cachep = cachep;
INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
- queue_work(memcg_kmem_cache_create_wq, &cw->work);
+ queue_work(memcg_kmem_cache_wq, &cw->work);
}
static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
@@ -2837,6 +2837,7 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
*/
memcg->kmemcg_id = memcg_id;
memcg->kmem_state = KMEM_ONLINE;
+ INIT_LIST_HEAD(&memcg->kmem_caches);
return 0;
}
@@ -4002,9 +4003,9 @@ static struct cftype mem_cgroup_legacy_files[] = {
#ifdef CONFIG_SLABINFO
{
.name = "kmem.slabinfo",
- .seq_start = slab_start,
- .seq_next = slab_next,
- .seq_stop = slab_stop,
+ .seq_start = memcg_slab_start,
+ .seq_next = memcg_slab_next,
+ .seq_stop = memcg_slab_stop,
.seq_show = memcg_slab_show,
},
#endif
@@ -5777,12 +5778,12 @@ static int __init mem_cgroup_init(void)
#ifndef CONFIG_SLOB
/*
* Kmem cache creation is mostly done with the slab_mutex held,
- * so use a special workqueue to avoid stalling all worker
- * threads in case lots of cgroups are created simultaneously.
+ * so use a workqueue with limited concurrency to avoid stalling
+ * all worker threads in case lots of cgroups are created and
+ * destroyed simultaneously.
*/
- memcg_kmem_cache_create_wq =
- alloc_ordered_workqueue("memcg_kmem_cache_create", 0);
- BUG_ON(!memcg_kmem_cache_create_wq);
+ memcg_kmem_cache_wq = alloc_workqueue("memcg_kmem_cache", 0, 1);
+ BUG_ON(!memcg_kmem_cache_wq);
#endif
cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f283c7e0a2a3..3d0f2fd4bf73 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1527,7 +1527,8 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
{
int ret = __get_any_page(page, pfn, flags);
- if (ret == 1 && !PageHuge(page) && !PageLRU(page)) {
+ if (ret == 1 && !PageHuge(page) &&
+ !PageLRU(page) && !__PageMovable(page)) {
/*
* Try to free it.
*/
@@ -1649,7 +1650,10 @@ static int __soft_offline_page(struct page *page, int flags)
* Try to migrate to a new page instead. migrate.c
* handles a large number of cases for us.
*/
- ret = isolate_lru_page(page);
+ if (PageLRU(page))
+ ret = isolate_lru_page(page);
+ else
+ ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
/*
* Drop page reference which is came from get_any_page()
* successful isolate_lru_page() already took another one.
@@ -1657,18 +1661,20 @@ static int __soft_offline_page(struct page *page, int flags)
put_hwpoison_page(page);
if (!ret) {
LIST_HEAD(pagelist);
- inc_node_page_state(page, NR_ISOLATED_ANON +
- page_is_file_cache(page));
+ /*
+ * After isolated lru page, the PageLRU will be cleared,
+ * so use !__PageMovable instead for LRU page's mapping
+ * cannot have PAGE_MAPPING_MOVABLE.
+ */
+ if (!__PageMovable(page))
+ inc_node_page_state(page, NR_ISOLATED_ANON +
+ page_is_file_cache(page));
list_add(&page->lru, &pagelist);
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
- if (!list_empty(&pagelist)) {
- list_del(&page->lru);
- dec_node_page_state(page, NR_ISOLATED_ANON +
- page_is_file_cache(page));
- putback_lru_page(page);
- }
+ if (!list_empty(&pagelist))
+ putback_movable_pages(&pagelist);
pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
pfn, ret, page->flags);
diff --git a/mm/memory.c b/mm/memory.c
index 6bf2b471e30c..0c759ba122e1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -30,7 +30,7 @@
/*
* 05.04.94 - Multi-page memory management added for v1.1.
- * Idea by Alex Bligh (alex@cconcepts.co.uk)
+ * Idea by Alex Bligh (alex@cconcepts.co.uk)
*
* 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG
* (Gerhard.Wichert@pdb.siemens.de)
@@ -82,9 +82,9 @@
#ifndef CONFIG_NEED_MULTIPLE_NODES
/* use the per-pgdat data instead for discontigmem - mbligh */
unsigned long max_mapnr;
-struct page *mem_map;
-
EXPORT_SYMBOL(max_mapnr);
+
+struct page *mem_map;
EXPORT_SYMBOL(mem_map);
#endif
@@ -95,8 +95,7 @@ EXPORT_SYMBOL(mem_map);
* highstart_pfn must be the same; there must be no gap between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
-void * high_memory;
-
+void *high_memory;
EXPORT_SYMBOL(high_memory);
/*
@@ -120,10 +119,10 @@ static int __init disable_randmaps(char *s)
__setup("norandmaps", disable_randmaps);
unsigned long zero_pfn __read_mostly;
-unsigned long highest_memmap_pfn __read_mostly;
-
EXPORT_SYMBOL(zero_pfn);
+unsigned long highest_memmap_pfn __read_mostly;
+
/*
* CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
*/
@@ -556,7 +555,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (is_vm_hugetlb_page(vma)) {
hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
- floor, next? next->vm_start: ceiling);
+ floor, next ? next->vm_start : ceiling);
} else {
/*
* Optimization: gather nearby vmas into one call down
@@ -569,7 +568,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
unlink_file_vma(vma);
}
free_pgd_range(tlb, addr, vma->vm_end,
- floor, next? next->vm_start: ceiling);
+ floor, next ? next->vm_start : ceiling);
}
vma = next;
}
@@ -1001,7 +1000,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) {
int err;
- VM_BUG_ON(next-addr != HPAGE_PMD_SIZE);
+ VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, vma);
err = copy_huge_pmd(dst_mm, src_mm,
dst_pmd, src_pmd, addr, vma);
if (err == -ENOMEM)
@@ -1032,6 +1031,18 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
src_pud = pud_offset(src_pgd, addr);
do {
next = pud_addr_end(addr, end);
+ if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
+ int err;
+
+ VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, vma);
+ err = copy_huge_pud(dst_mm, src_mm,
+ dst_pud, src_pud, addr, vma);
+ if (err == -ENOMEM)
+ return -ENOMEM;
+ if (!err)
+ continue;
+ /* fall through */
+ }
if (pud_none_or_clear_bad(src_pud))
continue;
if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
@@ -1129,9 +1140,8 @@ again:
arch_enter_lazy_mmu_mode();
do {
pte_t ptent = *pte;
- if (pte_none(ptent)) {
+ if (pte_none(ptent))
continue;
- }
if (pte_present(ptent)) {
struct page *page;
@@ -1155,12 +1165,6 @@ again:
if (!PageAnon(page)) {
if (pte_dirty(ptent)) {
- /*
- * oom_reaper cannot tear down dirty
- * pages
- */
- if (unlikely(details && details->ignore_dirty))
- continue;
force_flush = 1;
set_page_dirty(page);
}
@@ -1179,8 +1183,8 @@ again:
}
continue;
}
- /* only check swap_entries if explicitly asked for in details */
- if (unlikely(details && !details->check_swap_entries))
+ /* If details->check_mapping, we leave swap entries. */
+ if (unlikely(details))
continue;
entry = pte_to_swp_entry(ptent);
@@ -1269,9 +1273,19 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
+ if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
+ if (next - addr != HPAGE_PUD_SIZE) {
+ VM_BUG_ON_VMA(!rwsem_is_locked(&tlb->mm->mmap_sem), vma);
+ split_huge_pud(vma, pud, addr);
+ } else if (zap_huge_pud(tlb, vma, pud, addr))
+ goto next;
+ /* fall through */
+ }
if (pud_none_or_clear_bad(pud))
continue;
next = zap_pmd_range(tlb, vma, pud, addr, next, details);
+next:
+ cond_resched();
} while (pud++, addr = next, addr != end);
return addr;
@@ -1376,12 +1390,11 @@ void unmap_vmas(struct mmu_gather *tlb,
* @vma: vm_area_struct holding the applicable pages
* @start: starting address of pages to zap
* @size: number of bytes to zap
- * @details: details of shared cache invalidation
*
* Caller must protect the VMA list
*/
void zap_page_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long size, struct zap_details *details)
+ unsigned long size)
{
struct mm_struct *mm = vma->vm_mm;
struct mmu_gather tlb;
@@ -1392,7 +1405,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
update_hiwater_rss(mm);
mmu_notifier_invalidate_range_start(mm, start, end);
for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
- unmap_single_vma(&tlb, vma, start, end, details);
+ unmap_single_vma(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
@@ -1448,10 +1461,10 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
spinlock_t **ptl)
{
- pgd_t * pgd = pgd_offset(mm, addr);
- pud_t * pud = pud_alloc(mm, pgd, addr);
+ pgd_t *pgd = pgd_offset(mm, addr);
+ pud_t *pud = pud_alloc(mm, pgd, addr);
if (pud) {
- pmd_t * pmd = pmd_alloc(mm, pud, addr);
+ pmd_t *pmd = pmd_alloc(mm, pud, addr);
if (pmd) {
VM_BUG_ON(pmd_trans_huge(*pmd));
return pte_alloc_map_lock(mm, pmd, addr, ptl);
@@ -2042,7 +2055,7 @@ static int do_page_mkwrite(struct vm_fault *vmf)
vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
- ret = vmf->vma->vm_ops->page_mkwrite(vmf->vma, vmf);
+ ret = vmf->vma->vm_ops->page_mkwrite(vmf);
/* Restore original flags so that caller is not surprised */
vmf->flags = old_flags;
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
@@ -2314,7 +2327,7 @@ static int wp_pfn_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->flags |= FAULT_FLAG_MKWRITE;
- ret = vma->vm_ops->pfn_mkwrite(vma, vmf);
+ ret = vma->vm_ops->pfn_mkwrite(vmf);
if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
return ret;
return finish_mkwrite_fault(vmf);
@@ -2510,7 +2523,7 @@ void unmap_mapping_range(struct address_space *mapping,
hlen = ULONG_MAX - hba + 1;
}
- details.check_mapping = even_cows? NULL: mapping;
+ details.check_mapping = even_cows ? NULL : mapping;
details.first_index = hba;
details.last_index = hba + hlen - 1;
if (details.last_index < details.first_index)
@@ -2868,7 +2881,7 @@ static int __do_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
int ret;
- ret = vma->vm_ops->fault(vma, vmf);
+ ret = vma->vm_ops->fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
VM_FAULT_DONE_COW)))
return ret;
@@ -2905,7 +2918,7 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
atomic_long_inc(&vma->vm_mm->nr_ptes);
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
spin_unlock(vmf->ptl);
- vmf->prealloc_pte = 0;
+ vmf->prealloc_pte = NULL;
} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
return VM_FAULT_OOM;
}
@@ -2953,7 +2966,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
* count that as nr_ptes.
*/
atomic_long_inc(&vma->vm_mm->nr_ptes);
- vmf->prealloc_pte = 0;
+ vmf->prealloc_pte = NULL;
}
static int do_set_pmd(struct vm_fault *vmf, struct page *page)
@@ -3359,7 +3372,7 @@ static int do_fault(struct vm_fault *vmf)
/* preallocated pagetable is unused: free it */
if (vmf->prealloc_pte) {
pte_free(vma->vm_mm, vmf->prealloc_pte);
- vmf->prealloc_pte = 0;
+ vmf->prealloc_pte = NULL;
}
return ret;
}
@@ -3387,32 +3400,32 @@ static int do_numa_page(struct vm_fault *vmf)
int last_cpupid;
int target_nid;
bool migrated = false;
- pte_t pte = vmf->orig_pte;
- bool was_writable = pte_write(pte);
+ pte_t pte;
+ bool was_writable = pte_write(vmf->orig_pte);
int flags = 0;
/*
- * The "pte" at this point cannot be used safely without
- * validation through pte_unmap_same(). It's of NUMA type but
- * the pfn may be screwed if the read is non atomic.
- *
- * We can safely just do a "set_pte_at()", because the old
- * page table entry is not accessible, so there would be no
- * concurrent hardware modifications to the PTE.
- */
+ * The "pte" at this point cannot be used safely without
+ * validation through pte_unmap_same(). It's of NUMA type but
+ * the pfn may be screwed if the read is non atomic.
+ */
vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd);
spin_lock(vmf->ptl);
- if (unlikely(!pte_same(*vmf->pte, pte))) {
+ if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
}
- /* Make it present again */
+ /*
+ * Make it present again, Depending on how arch implementes non
+ * accessible ptes, some can allow access by kernel mode.
+ */
+ pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte);
pte = pte_modify(pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (was_writable)
pte = pte_mkwrite(pte);
- set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+ ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte);
update_mmu_cache(vma, vmf->address, vmf->pte);
page = vm_normal_page(vma, vmf->address, pte);
@@ -3471,12 +3484,10 @@ out:
static int create_huge_pmd(struct vm_fault *vmf)
{
- struct vm_area_struct *vma = vmf->vma;
- if (vma_is_anonymous(vma))
+ if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_anonymous_page(vmf);
- if (vma->vm_ops->pmd_fault)
- return vma->vm_ops->pmd_fault(vma, vmf->address, vmf->pmd,
- vmf->flags);
+ if (vmf->vma->vm_ops->huge_fault)
+ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
return VM_FAULT_FALLBACK;
}
@@ -3484,9 +3495,8 @@ static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
{
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_wp_page(vmf, orig_pmd);
- if (vmf->vma->vm_ops->pmd_fault)
- return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf->address,
- vmf->pmd, vmf->flags);
+ if (vmf->vma->vm_ops->huge_fault)
+ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
/* COW handled on pte level: split pmd */
VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
@@ -3500,6 +3510,30 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
}
+static int create_huge_pud(struct vm_fault *vmf)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ /* No support for anonymous transparent PUD pages yet */
+ if (vma_is_anonymous(vmf->vma))
+ return VM_FAULT_FALLBACK;
+ if (vmf->vma->vm_ops->huge_fault)
+ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ return VM_FAULT_FALLBACK;
+}
+
+static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ /* No support for anonymous transparent PUD pages yet */
+ if (vma_is_anonymous(vmf->vma))
+ return VM_FAULT_FALLBACK;
+ if (vmf->vma->vm_ops->huge_fault)
+ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ return VM_FAULT_FALLBACK;
+}
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
@@ -3615,22 +3649,46 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
};
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
- pud_t *pud;
+ int ret;
pgd = pgd_offset(mm, address);
- pud = pud_alloc(mm, pgd, address);
- if (!pud)
+
+ vmf.pud = pud_alloc(mm, pgd, address);
+ if (!vmf.pud)
return VM_FAULT_OOM;
- vmf.pmd = pmd_alloc(mm, pud, address);
+ if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
+ ret = create_huge_pud(&vmf);
+ if (!(ret & VM_FAULT_FALLBACK))
+ return ret;
+ } else {
+ pud_t orig_pud = *vmf.pud;
+
+ barrier();
+ if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {
+ unsigned int dirty = flags & FAULT_FLAG_WRITE;
+
+ /* NUMA case for anonymous PUDs would go here */
+
+ if (dirty && !pud_write(orig_pud)) {
+ ret = wp_huge_pud(&vmf, orig_pud);
+ if (!(ret & VM_FAULT_FALLBACK))
+ return ret;
+ } else {
+ huge_pud_set_accessed(&vmf, orig_pud);
+ return 0;
+ }
+ }
+ }
+
+ vmf.pmd = pmd_alloc(mm, vmf.pud, address);
if (!vmf.pmd)
return VM_FAULT_OOM;
if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) {
- int ret = create_huge_pmd(&vmf);
+ ret = create_huge_pmd(&vmf);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
} else {
pmd_t orig_pmd = *vmf.pmd;
- int ret;
barrier();
if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
@@ -3690,14 +3748,14 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
if (flags & FAULT_FLAG_USER) {
mem_cgroup_oom_disable();
- /*
- * The task may have entered a memcg OOM situation but
- * if the allocation error was handled gracefully (no
- * VM_FAULT_OOM), there is no need to kill anything.
- * Just clean up the OOM state peacefully.
- */
- if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
- mem_cgroup_oom_synchronize(false);
+ /*
+ * The task may have entered a memcg OOM situation but
+ * if the allocation error was handled gracefully (no
+ * VM_FAULT_OOM), there is no need to kill anything.
+ * Just clean up the OOM state peacefully.
+ */
+ if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
+ mem_cgroup_oom_synchronize(false);
}
/*
@@ -3747,13 +3805,14 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
*/
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
+ spinlock_t *ptl;
pmd_t *new = pmd_alloc_one(mm, address);
if (!new)
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
- spin_lock(&mm->page_table_lock);
+ ptl = pud_lock(mm, pud);
#ifndef __ARCH_HAS_4LEVEL_HACK
if (!pud_present(*pud)) {
mm_inc_nr_pmds(mm);
@@ -3767,7 +3826,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
} else /* Another has populated it */
pmd_free(mm, new);
#endif /* __ARCH_HAS_4LEVEL_HACK */
- spin_unlock(&mm->page_table_lock);
+ spin_unlock(ptl);
return 0;
}
#endif /* __PAGETABLE_PMD_FOLDED */
@@ -4155,6 +4214,38 @@ void copy_user_huge_page(struct page *dst, struct page *src,
copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
}
}
+
+long copy_huge_page_from_user(struct page *dst_page,
+ const void __user *usr_src,
+ unsigned int pages_per_huge_page,
+ bool allow_pagefault)
+{
+ void *src = (void *)usr_src;
+ void *page_kaddr;
+ unsigned long i, rc = 0;
+ unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
+
+ for (i = 0; i < pages_per_huge_page; i++) {
+ if (allow_pagefault)
+ page_kaddr = kmap(dst_page + i);
+ else
+ page_kaddr = kmap_atomic(dst_page + i);
+ rc = copy_from_user(page_kaddr,
+ (const void __user *)(src + i * PAGE_SIZE),
+ PAGE_SIZE);
+ if (allow_pagefault)
+ kunmap(dst_page + i);
+ else
+ kunmap_atomic(page_kaddr);
+
+ ret_val -= (PAGE_SIZE - rc);
+ if (rc)
+ break;
+
+ cond_resched();
+ }
+ return ret_val;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
#if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b8c11e063ff0..11581f4cfbb4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -179,7 +179,7 @@ static void release_memory_resource(struct resource *res)
void get_page_bootmem(unsigned long info, struct page *page,
unsigned long type)
{
- page->lru.next = (struct list_head *) type;
+ page->freelist = (void *)type;
SetPagePrivate(page);
set_page_private(page, info);
page_ref_inc(page);
@@ -189,11 +189,12 @@ void put_page_bootmem(struct page *page)
{
unsigned long type;
- type = (unsigned long) page->lru.next;
+ type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
+ page->freelist = NULL;
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
@@ -227,7 +228,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
for (i = 0; i < mapsize; i++, page++)
get_page_bootmem(section_nr, page, SECTION_INFO);
- usemap = __nr_to_section(section_nr)->pageblock_flags;
+ usemap = section_to_usemap(__nr_to_section(section_nr));
page = virt_to_page(usemap);
mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
@@ -253,7 +254,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
- usemap = __nr_to_section(section_nr)->pageblock_flags;
+ usemap = section_to_usemap(__nr_to_section(section_nr));
page = virt_to_page(usemap);
mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
@@ -466,10 +467,10 @@ static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long s
pgdat->node_start_pfn;
}
-static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
+static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn,
+ unsigned long nr_pages)
{
struct pglist_data *pgdat = zone->zone_pgdat;
- int nr_pages = PAGES_PER_SECTION;
int nid = pgdat->node_id;
int zone_type;
unsigned long flags, pfn;
@@ -499,24 +500,21 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
}
static int __meminit __add_section(int nid, struct zone *zone,
- unsigned long phys_start_pfn)
+ unsigned long pfn, unsigned long nr_pages)
{
int ret;
- if (pfn_valid(phys_start_pfn))
- return -EEXIST;
-
- ret = sparse_add_one_section(zone, phys_start_pfn);
+ ret = sparse_add_section(zone, pfn, nr_pages);
if (ret < 0)
return ret;
- ret = __add_zone(zone, phys_start_pfn);
+ ret = __add_zone(zone, pfn, nr_pages);
if (ret < 0)
return ret;
- return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
+ return register_new_memory(zone, nid, __pfn_to_section(pfn));
}
/*
@@ -525,26 +523,20 @@ static int __meminit __add_section(int nid, struct zone *zone,
* call this function after deciding the zone to which to
* add the new pages.
*/
-int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
+int __ref __add_pages(int nid, struct zone *zone, unsigned long pfn,
unsigned long nr_pages)
{
- unsigned long i;
- int err = 0;
- int start_sec, end_sec;
+ int err = 0, i, start_sec, end_sec;
struct vmem_altmap *altmap;
clear_zone_contiguous(zone);
- /* during initialize mem_map, align hot-added range to section */
- start_sec = pfn_to_section_nr(phys_start_pfn);
- end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
-
- altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
+ altmap = to_vmem_altmap((unsigned long) pfn_to_page(pfn));
if (altmap) {
/*
* Validate altmap is within bounds of the total request
*/
- if (altmap->base_pfn != phys_start_pfn
+ if (altmap->base_pfn != pfn
|| vmem_altmap_offset(altmap) > nr_pages) {
pr_warn_once("memory add fail, invalid altmap\n");
err = -EINVAL;
@@ -553,8 +545,16 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
altmap->alloc = 0;
}
+ start_sec = pfn_to_section_nr(pfn);
+ end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
for (i = start_sec; i <= end_sec; i++) {
- err = __add_section(nid, zone, section_nr_to_pfn(i));
+ unsigned long pfns;
+
+ pfns = min(nr_pages, PAGES_PER_SECTION
+ - (pfn & ~PAGE_SECTION_MASK));
+ err = __add_section(nid, zone, pfn, pfns);
+ pfn += pfns;
+ nr_pages -= pfns;
/*
* EEXIST is finally dealt with by ioresource collision
@@ -760,10 +760,10 @@ static void shrink_pgdat_span(struct pglist_data *pgdat,
pgdat->node_spanned_pages = 0;
}
-static void __remove_zone(struct zone *zone, unsigned long start_pfn)
+static void __remove_zone(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages)
{
struct pglist_data *pgdat = zone->zone_pgdat;
- int nr_pages = PAGES_PER_SECTION;
int zone_type;
unsigned long flags;
@@ -775,25 +775,22 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
pgdat_resize_unlock(zone->zone_pgdat, &flags);
}
-static int __remove_section(struct zone *zone, struct mem_section *ms,
- unsigned long map_offset)
+static int __remove_section(struct zone *zone, unsigned long pfn,
+ unsigned long nr_pages, unsigned long map_offset)
{
- unsigned long start_pfn;
- int scn_nr;
+ struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
int ret = -EINVAL;
if (!valid_section(ms))
return ret;
- ret = unregister_memory_section(ms);
+ ret = unregister_memory_section(zone, ms);
if (ret)
return ret;
- scn_nr = __section_nr(ms);
- start_pfn = section_nr_to_pfn(scn_nr);
- __remove_zone(zone, start_pfn);
+ __remove_zone(zone, pfn, nr_pages);
- sparse_remove_one_section(zone, ms, map_offset);
+ sparse_remove_section(zone, ms, pfn, nr_pages, map_offset);
return 0;
}
@@ -808,16 +805,15 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
* sure that pages are marked reserved and zones are adjust properly by
* calling offline_pages().
*/
-int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
+int __remove_pages(struct zone *zone, unsigned long pfn,
unsigned long nr_pages)
{
- unsigned long i;
unsigned long map_offset = 0;
- int sections_to_remove, ret = 0;
+ int i, start_sec, end_sec, ret = 0;
/* In the ZONE_DEVICE case device driver owns the memory region */
if (is_dev_zone(zone)) {
- struct page *page = pfn_to_page(phys_start_pfn);
+ struct page *page = pfn_to_page(pfn);
struct vmem_altmap *altmap;
altmap = to_vmem_altmap((unsigned long) page);
@@ -826,7 +822,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
} else {
resource_size_t start, size;
- start = phys_start_pfn << PAGE_SHIFT;
+ start = pfn << PAGE_SHIFT;
size = nr_pages * PAGE_SIZE;
ret = release_mem_region_adjustable(&iomem_resource, start,
@@ -842,16 +838,26 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
clear_zone_contiguous(zone);
/*
- * We can only remove entire sections
+ * Only ZONE_DEVICE memory is enabled to remove
+ * section-unaligned ranges. See register_new_memory() which
+ * assumes section alignment and is skipped for ZONE_DEVICE
+ * ranges.
*/
- BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
- BUG_ON(nr_pages % PAGES_PER_SECTION);
+ if (!is_dev_zone(zone) && ((pfn | nr_pages) & ~PAGE_SECTION_MASK)) {
+ WARN(1, "section unaligned removal not supported\n");
+ return -EINVAL;
+ }
- sections_to_remove = nr_pages / PAGES_PER_SECTION;
- for (i = 0; i < sections_to_remove; i++) {
- unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ start_sec = pfn_to_section_nr(pfn);
+ end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
+ for (i = start_sec; i <= end_sec; i++) {
+ unsigned long pfns;
- ret = __remove_section(zone, __pfn_to_section(pfn), map_offset);
+ pfns = min(nr_pages, PAGES_PER_SECTION
+ - (pfn & ~PAGE_SECTION_MASK));
+ ret = __remove_section(zone, pfn, pfns, map_offset);
+ pfn += pfns;
+ nr_pages -= pfns;
map_offset = 0;
if (ret)
break;
@@ -861,7 +867,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
return ret;
}
-EXPORT_SYMBOL_GPL(__remove_pages);
#endif /* CONFIG_MEMORY_HOTREMOVE */
int set_online_page_callback(online_page_callback_t callback)
@@ -1507,7 +1512,7 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
while ((i < MAX_ORDER_NR_PAGES) &&
!pfn_valid_within(pfn + i))
i++;
- if (i == MAX_ORDER_NR_PAGES)
+ if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
continue;
page = pfn_to_page(pfn + i);
if (zone && page_zone(page) != zone)
@@ -1521,7 +1526,7 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
if (zone) {
*valid_start = start;
- *valid_end = end;
+ *valid_end = min(end, end_pfn);
return 1;
} else {
return 0;
@@ -1529,10 +1534,10 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
}
/*
- * Scan pfn range [start,end) to find movable/migratable pages (LRU pages
- * and hugepages). We scan pfn because it's much easier than scanning over
- * linked list. This function returns the pfn of the first found movable
- * page if it's found, otherwise 0.
+ * Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
+ * non-lru movable pages and hugepages). We scan pfn because it's much
+ * easier than scanning over linked list. This function returns the pfn
+ * of the first found movable page if it's found, otherwise 0.
*/
static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
{
@@ -1543,6 +1548,8 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
page = pfn_to_page(pfn);
if (PageLRU(page))
return pfn;
+ if (__PageMovable(page))
+ return pfn;
if (PageHuge(page)) {
if (page_huge_active(page))
return pfn;
@@ -1619,21 +1626,25 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
if (!get_page_unless_zero(page))
continue;
/*
- * We can skip free pages. And we can only deal with pages on
- * LRU.
+ * We can skip free pages. And we can deal with pages on
+ * LRU and non-lru movable pages.
*/
- ret = isolate_lru_page(page);
+ if (PageLRU(page))
+ ret = isolate_lru_page(page);
+ else
+ ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
if (!ret) { /* Success */
put_page(page);
list_add_tail(&page->lru, &source);
move_pages--;
- inc_node_page_state(page, NR_ISOLATED_ANON +
- page_is_file_cache(page));
+ if (!__PageMovable(page))
+ inc_node_page_state(page, NR_ISOLATED_ANON +
+ page_is_file_cache(page));
} else {
#ifdef CONFIG_DEBUG_VM
- pr_alert("removing pfn %lx from LRU failed\n", pfn);
- dump_page(page, "failed to remove from LRU");
+ pr_alert("failed to isolate pfn %lx\n", pfn);
+ dump_page(page, "isolation failed");
#endif
put_page(page);
/* Because we don't have big zone->lock. we should
diff --git a/mm/migrate.c b/mm/migrate.c
index 87f4d0f81819..2c63ac06791b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -74,7 +74,7 @@ int migrate_prep_local(void)
return 0;
}
-bool isolate_movable_page(struct page *page, isolate_mode_t mode)
+int isolate_movable_page(struct page *page, isolate_mode_t mode)
{
struct address_space *mapping;
@@ -125,14 +125,14 @@ bool isolate_movable_page(struct page *page, isolate_mode_t mode)
__SetPageIsolated(page);
unlock_page(page);
- return true;
+ return 0;
out_no_isolated:
unlock_page(page);
out_putpage:
put_page(page);
out:
- return false;
+ return -EBUSY;
}
/* It should be called on page which is PG_movable */
@@ -193,82 +193,62 @@ void putback_movable_pages(struct list_head *l)
/*
* Restore a potential migration pte to a working pte entry
*/
-static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
+static int remove_migration_pte(struct page *page, struct vm_area_struct *vma,
unsigned long addr, void *old)
{
- struct mm_struct *mm = vma->vm_mm;
+ struct page_vma_mapped_walk pvmw = {
+ .page = old,
+ .vma = vma,
+ .address = addr,
+ .flags = PVMW_SYNC | PVMW_MIGRATION,
+ };
+ struct page *new;
+ pte_t pte;
swp_entry_t entry;
- pmd_t *pmd;
- pte_t *ptep, pte;
- spinlock_t *ptl;
- if (unlikely(PageHuge(new))) {
- ptep = huge_pte_offset(mm, addr);
- if (!ptep)
- goto out;
- ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
- } else {
- pmd = mm_find_pmd(mm, addr);
- if (!pmd)
- goto out;
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ while (page_vma_mapped_walk(&pvmw)) {
+ new = page - pvmw.page->index +
+ linear_page_index(vma, pvmw.address);
- ptep = pte_offset_map(pmd, addr);
+ get_page(new);
+ pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
+ if (pte_swp_soft_dirty(*pvmw.pte))
+ pte = pte_mksoft_dirty(pte);
/*
- * Peek to check is_swap_pte() before taking ptlock? No, we
- * can race mremap's move_ptes(), which skips anon_vma lock.
+ * Recheck VMA as permissions can change since migration started
*/
-
- ptl = pte_lockptr(mm, pmd);
- }
-
- spin_lock(ptl);
- pte = *ptep;
- if (!is_swap_pte(pte))
- goto unlock;
-
- entry = pte_to_swp_entry(pte);
-
- if (!is_migration_entry(entry) ||
- migration_entry_to_page(entry) != old)
- goto unlock;
-
- get_page(new);
- pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
- if (pte_swp_soft_dirty(*ptep))
- pte = pte_mksoft_dirty(pte);
-
- /* Recheck VMA as permissions can change since migration started */
- if (is_write_migration_entry(entry))
- pte = maybe_mkwrite(pte, vma);
+ entry = pte_to_swp_entry(*pvmw.pte);
+ if (is_write_migration_entry(entry))
+ pte = maybe_mkwrite(pte, vma);
#ifdef CONFIG_HUGETLB_PAGE
- if (PageHuge(new)) {
- pte = pte_mkhuge(pte);
- pte = arch_make_huge_pte(pte, vma, new, 0);
- }
+ if (PageHuge(new)) {
+ pte = pte_mkhuge(pte);
+ pte = arch_make_huge_pte(pte, vma, new, 0);
+ }
#endif
- flush_dcache_page(new);
- set_pte_at(mm, addr, ptep, pte);
+ flush_dcache_page(new);
+ set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
- if (PageHuge(new)) {
- if (PageAnon(new))
- hugepage_add_anon_rmap(new, vma, addr);
+ if (PageHuge(new)) {
+ if (PageAnon(new))
+ hugepage_add_anon_rmap(new, vma, pvmw.address);
+ else
+ page_dup_rmap(new, true);
+ } else if (PageAnon(new))
+ page_add_anon_rmap(new, vma, pvmw.address, false);
else
- page_dup_rmap(new, true);
- } else if (PageAnon(new))
- page_add_anon_rmap(new, vma, addr, false);
- else
- page_add_file_rmap(new, false);
+ page_add_file_rmap(new, false);
- if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
- mlock_vma_page(new);
+ if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
+ mlock_vma_page(new);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, pvmw.address, pvmw.pte);
+ }
- /* No need to invalidate - it was non-present before */
- update_mmu_cache(vma, addr, ptep);
-unlock:
- pte_unmap_unlock(ptep, ptl);
-out:
return SWAP_AGAIN;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index dc4291dcc99b..499b988b1639 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -176,7 +176,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
return next;
}
-static int do_brk(unsigned long addr, unsigned long len);
+static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
SYSCALL_DEFINE1(brk, unsigned long, brk)
{
@@ -185,6 +185,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
struct mm_struct *mm = current->mm;
unsigned long min_brk;
bool populate;
+ LIST_HEAD(uf);
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
@@ -222,7 +223,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
/* Always allow shrinking brk. */
if (brk <= mm->brk) {
- if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+ if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
goto set_brk;
goto out;
}
@@ -232,13 +233,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
goto out;
/* Ok, looks good - let it rip. */
- if (do_brk(oldbrk, newbrk-oldbrk) < 0)
+ if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
goto out;
set_brk:
mm->brk = brk;
populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
up_write(&mm->mmap_sem);
+ userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(oldbrk, newbrk - oldbrk);
return brk;
@@ -1304,7 +1306,8 @@ static inline int mlock_future_check(struct mm_struct *mm,
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flags, vm_flags_t vm_flags,
- unsigned long pgoff, unsigned long *populate)
+ unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf)
{
struct mm_struct *mm = current->mm;
int pkey = 0;
@@ -1447,7 +1450,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags |= VM_NORESERVE;
}
- addr = mmap_region(file, addr, len, vm_flags, pgoff);
+ addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
if (!IS_ERR_VALUE(addr) &&
((vm_flags & VM_LOCKED) ||
(flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
@@ -1583,7 +1586,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
}
unsigned long mmap_region(struct file *file, unsigned long addr,
- unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
+ unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
+ struct list_head *uf)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
@@ -1609,7 +1613,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
/* Clear old maps */
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
&rb_parent)) {
- if (do_munmap(mm, addr, len))
+ if (do_munmap(mm, addr, len, uf))
return -ENOMEM;
}
@@ -2495,11 +2499,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
- * __split_vma() bypasses sysctl_max_map_count checking. We use this on the
- * munmap path where it doesn't make sense to fail.
+ * __split_vma() bypasses sysctl_max_map_count checking. We use this where it
+ * has already been checked or doesn't make sense to fail.
*/
-static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, int new_below)
+int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, int new_below)
{
struct vm_area_struct *new;
int err;
@@ -2579,7 +2583,8 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
* work. This now handles partial unmappings.
* Jeremy Fitzhardinge <jeremy@goop.org>
*/
-int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
+ struct list_head *uf)
{
unsigned long end;
struct vm_area_struct *vma, *prev, *last;
@@ -2603,6 +2608,13 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
if (vma->vm_start >= end)
return 0;
+ if (uf) {
+ int error = userfaultfd_unmap_prep(vma, start, end, uf);
+
+ if (error)
+ return error;
+ }
+
/*
* If we need to split any vma, do it now to save pain later.
*
@@ -2668,27 +2680,22 @@ int vm_munmap(unsigned long start, size_t len)
{
int ret;
struct mm_struct *mm = current->mm;
+ LIST_HEAD(uf);
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
- ret = do_munmap(mm, start, len);
+ ret = do_munmap(mm, start, len, &uf);
up_write(&mm->mmap_sem);
+ userfaultfd_unmap_complete(mm, &uf);
return ret;
}
EXPORT_SYMBOL(vm_munmap);
SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
{
- int ret;
- struct mm_struct *mm = current->mm;
-
profile_munmap(addr);
- if (down_write_killable(&mm->mmap_sem))
- return -EINTR;
- ret = do_munmap(mm, addr, len);
- up_write(&mm->mmap_sem);
- return ret;
+ return vm_munmap(addr, len);
}
@@ -2780,7 +2787,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
file = get_file(vma->vm_file);
ret = do_mmap_pgoff(vma->vm_file, start, size,
- prot, flags, pgoff, &populate);
+ prot, flags, pgoff, &populate, NULL);
fput(file);
out:
up_write(&mm->mmap_sem);
@@ -2806,11 +2813,11 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
* anonymous maps. eventually we may be able to do some
* brk-specific accounting here.
*/
-static int do_brk(unsigned long addr, unsigned long request)
+static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
- unsigned long flags, len;
+ unsigned long len;
struct rb_node **rb_link, *rb_parent;
pgoff_t pgoff = addr >> PAGE_SHIFT;
int error;
@@ -2821,7 +2828,10 @@ static int do_brk(unsigned long addr, unsigned long request)
if (!len)
return 0;
- flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+ /* Until we need other flags, refuse anything except VM_EXEC. */
+ if ((flags & (~VM_EXEC)) != 0)
+ return -EINVAL;
+ flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
if (offset_in_page(error))
@@ -2842,7 +2852,7 @@ static int do_brk(unsigned long addr, unsigned long request)
*/
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
&rb_parent)) {
- if (do_munmap(mm, addr, len))
+ if (do_munmap(mm, addr, len, uf))
return -ENOMEM;
}
@@ -2889,22 +2899,35 @@ out:
return 0;
}
-int vm_brk(unsigned long addr, unsigned long len)
+static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
+{
+ return do_brk_flags(addr, len, 0, uf);
+}
+
+int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
{
struct mm_struct *mm = current->mm;
int ret;
bool populate;
+ LIST_HEAD(uf);
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
- ret = do_brk(addr, len);
+ ret = do_brk_flags(addr, len, flags, &uf);
populate = ((mm->def_flags & VM_LOCKED) != 0);
up_write(&mm->mmap_sem);
+ userfaultfd_unmap_complete(mm, &uf);
if (populate && !ret)
mm_populate(addr, len);
return ret;
}
+EXPORT_SYMBOL(vm_brk_flags);
+
+int vm_brk(unsigned long addr, unsigned long len)
+{
+ return vm_brk_flags(addr, len, 0);
+}
EXPORT_SYMBOL(vm_brk);
/* Release all mmaps. */
@@ -3111,8 +3134,7 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
mm->data_vm += npages;
}
-static int special_mapping_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf);
+static int special_mapping_fault(struct vm_fault *vmf);
/*
* Having a close hook prevents vma merging regardless of flags.
@@ -3147,9 +3169,9 @@ static const struct vm_operations_struct legacy_special_mapping_vmops = {
.fault = special_mapping_fault,
};
-static int special_mapping_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int special_mapping_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
pgoff_t pgoff;
struct page **pages;
@@ -3159,7 +3181,7 @@ static int special_mapping_fault(struct vm_area_struct *vma,
struct vm_special_mapping *sm = vma->vm_private_data;
if (sm->fault)
- return sm->fault(sm, vma, vmf);
+ return sm->fault(sm, vmf->vma, vmf);
pages = sm->pages;
}
@@ -3433,7 +3455,7 @@ void mm_drop_all_locks(struct mm_struct *mm)
}
/*
- * initialise the VMA slab
+ * initialise the percpu counter for VM
*/
void __init mmap_init(void)
{
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 5652be858e5e..a51c0a67ea3d 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -60,7 +60,7 @@ struct zoneref *__next_zones_zonelist(struct zoneref *z,
* Find the next suitable zone to use for the allocation.
* Only filter based on nodemask if it's set
*/
- if (likely(nodes == NULL))
+ if (unlikely(nodes == NULL))
while (zonelist_zone_idx(z) > highest_zoneidx)
z++;
else
diff --git a/mm/mprotect.c b/mm/mprotect.c
index f9c07f54dd62..77115bb7c953 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -33,34 +33,6 @@
#include "internal.h"
-/*
- * For a prot_numa update we only hold mmap_sem for read so there is a
- * potential race with faulting where a pmd was temporarily none. This
- * function checks for a transhuge pmd under the appropriate lock. It
- * returns a pte if it was successfully locked or NULL if it raced with
- * a transhuge insertion.
- */
-static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, int prot_numa, spinlock_t **ptl)
-{
- pte_t *pte;
- spinlock_t *pmdl;
-
- /* !prot_numa is protected by mmap_sem held for write */
- if (!prot_numa)
- return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
-
- pmdl = pmd_lock(vma->vm_mm, pmd);
- if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) {
- spin_unlock(pmdl);
- return NULL;
- }
-
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
- spin_unlock(pmdl);
- return pte;
-}
-
static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, pgprot_t newprot,
int dirty_accountable, int prot_numa)
@@ -71,7 +43,21 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long pages = 0;
int target_node = NUMA_NO_NODE;
- pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl);
+ /*
+ * Can be called with only the mmap_sem for reading by
+ * prot_numa so we must check the pmd isn't constantly
+ * changing from under us from pmd_none to pmd_trans_huge
+ * and/or the other way around.
+ */
+ if (pmd_trans_unstable(pmd))
+ return 0;
+
+ /*
+ * The pmd points to a regular pte so the pmd can't change
+ * from under us even if the mmap_sem is only hold for
+ * reading.
+ */
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (!pte)
return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 30d7d2482eea..8233b0105c82 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -22,6 +22,7 @@
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/mm-arch-hooks.h>
+#include <linux/userfaultfd_k.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
@@ -250,7 +251,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
- unsigned long new_len, unsigned long new_addr, bool *locked)
+ unsigned long new_len, unsigned long new_addr,
+ bool *locked, struct vm_userfaultfd_ctx *uf,
+ struct list_head *uf_unmap)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
@@ -309,6 +312,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
old_addr = new_addr;
new_addr = err;
} else {
+ mremap_userfaultfd_prep(new_vma, uf);
arch_remap(mm, old_addr, old_addr + old_len,
new_addr, new_addr + new_len);
}
@@ -338,7 +342,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (unlikely(vma->vm_flags & VM_PFNMAP))
untrack_pfn_moved(vma);
- if (do_munmap(mm, old_addr, old_len) < 0) {
+ if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT);
excess = 0;
@@ -413,7 +417,9 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
}
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
- unsigned long new_addr, unsigned long new_len, bool *locked)
+ unsigned long new_addr, unsigned long new_len, bool *locked,
+ struct vm_userfaultfd_ctx *uf,
+ struct list_head *uf_unmap)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -431,12 +437,12 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
if (addr + old_len > new_addr && new_addr + new_len > addr)
goto out;
- ret = do_munmap(mm, new_addr, new_len);
+ ret = do_munmap(mm, new_addr, new_len, NULL);
if (ret)
goto out;
if (old_len >= new_len) {
- ret = do_munmap(mm, addr+new_len, old_len - new_len);
+ ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
if (ret && old_len != new_len)
goto out;
old_len = new_len;
@@ -458,7 +464,8 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
if (offset_in_page(ret))
goto out1;
- ret = move_vma(vma, addr, old_len, new_len, new_addr, locked);
+ ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
+ uf_unmap);
if (!(offset_in_page(ret)))
goto out;
out1:
@@ -497,6 +504,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
unsigned long ret = -EINVAL;
unsigned long charged = 0;
bool locked = false;
+ struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
+ LIST_HEAD(uf_unmap);
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
return ret;
@@ -523,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
if (flags & MREMAP_FIXED) {
ret = mremap_to(addr, old_len, new_addr, new_len,
- &locked);
+ &locked, &uf, &uf_unmap);
goto out;
}
@@ -533,7 +542,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
* do_munmap does all the needed commit accounting
*/
if (old_len >= new_len) {
- ret = do_munmap(mm, addr+new_len, old_len - new_len);
+ ret = do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap);
if (ret && old_len != new_len)
goto out;
ret = addr;
@@ -592,7 +601,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
goto out;
}
- ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked);
+ ret = move_vma(vma, addr, old_len, new_len, new_addr,
+ &locked, &uf, &uf_unmap);
}
out:
if (offset_in_page(ret)) {
@@ -602,5 +612,7 @@ out:
up_write(&current->mm->mmap_sem);
if (locked && new_len > old_len)
mm_populate(new_addr + old_len, new_len - old_len);
+ mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
+ userfaultfd_unmap_complete(mm, &uf_unmap);
return ret;
}
diff --git a/mm/nommu.c b/mm/nommu.c
index 24f9f5f39145..fe9f4fa4a7a7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -517,7 +517,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
}
/*
- * initialise the VMA and region record slabs
+ * initialise the percpu counter for VM and region record slabs
*/
void __init mmap_init(void)
{
@@ -1191,7 +1191,7 @@ error_free:
enomem:
pr_err("Allocation of length %lu from process %d (%s) failed\n",
len, current->pid, current->comm);
- show_free_areas(0);
+ show_free_areas(0, NULL);
return -ENOMEM;
}
@@ -1205,7 +1205,8 @@ unsigned long do_mmap(struct file *file,
unsigned long flags,
vm_flags_t vm_flags,
unsigned long pgoff,
- unsigned long *populate)
+ unsigned long *populate,
+ struct list_head *uf)
{
struct vm_area_struct *vma;
struct vm_region *region;
@@ -1412,13 +1413,13 @@ error_getting_vma:
kmem_cache_free(vm_region_jar, region);
pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n",
len, current->pid);
- show_free_areas(0);
+ show_free_areas(0, NULL);
return -ENOMEM;
error_getting_region:
pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n",
len, current->pid);
- show_free_areas(0);
+ show_free_areas(0, NULL);
return -ENOMEM;
}
@@ -1577,7 +1578,7 @@ static int shrink_vma(struct mm_struct *mm,
* - under NOMMU conditions the chunk to be unmapped must be backed by a single
* VMA, though it need not cover the whole VMA
*/
-int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf)
{
struct vm_area_struct *vma;
unsigned long end;
@@ -1643,7 +1644,7 @@ int vm_munmap(unsigned long addr, size_t len)
int ret;
down_write(&mm->mmap_sem);
- ret = do_munmap(mm, addr, len);
+ ret = do_munmap(mm, addr, len, NULL);
up_write(&mm->mmap_sem);
return ret;
}
@@ -1794,7 +1795,7 @@ void unmap_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL(unmap_mapping_range);
-int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int filemap_fault(struct vm_fault *vmf)
{
BUG();
return 0;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ec9f11d4f094..578321f1c070 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -403,12 +403,14 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
static void dump_header(struct oom_control *oc, struct task_struct *p)
{
- nodemask_t *nm = (oc->nodemask) ? oc->nodemask : &cpuset_current_mems_allowed;
-
- pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd\n",
- current->comm, oc->gfp_mask, &oc->gfp_mask,
- nodemask_pr_args(nm), oc->order,
- current->signal->oom_score_adj);
+ pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=",
+ current->comm, oc->gfp_mask, &oc->gfp_mask);
+ if (oc->nodemask)
+ pr_cont("%*pbl", nodemask_pr_args(oc->nodemask));
+ else
+ pr_cont("(null)");
+ pr_cont(", order=%d, oom_score_adj=%hd\n",
+ oc->order, current->signal->oom_score_adj);
if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
pr_warn("COMPACTION is disabled!!!\n");
@@ -417,7 +419,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
if (oc->memcg)
mem_cgroup_print_oom_info(oc->memcg, p);
else
- show_mem(SHOW_MEM_FILTER_NODES);
+ show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
if (sysctl_oom_dump_tasks)
dump_tasks(oc->memcg, oc->nodemask);
}
@@ -465,8 +467,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
struct mmu_gather tlb;
struct vm_area_struct *vma;
- struct zap_details details = {.check_swap_entries = true,
- .ignore_dirty = true};
bool ret = true;
/*
@@ -510,14 +510,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
tlb_gather_mmu(&tlb, mm, 0, -1);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
- if (is_vm_hugetlb_page(vma))
- continue;
-
- /*
- * mlocked VMAs require explicit munlocking before unmap.
- * Let's keep it simple here and skip such VMAs.
- */
- if (vma->vm_flags & VM_LOCKED)
+ if (!can_madv_dontneed_vma(vma))
continue;
/*
@@ -532,7 +525,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
- &details);
+ NULL);
}
tlb_finish_mmu(&tlb, 0, -1);
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
@@ -1013,7 +1006,7 @@ bool out_of_memory(struct oom_control *oc)
* make sure exclude 0 mask - all other users should have at least
* ___GFP_DIRECT_RECLAIM to get here.
*/
- if (oc->gfp_mask && !(oc->gfp_mask & (__GFP_FS|__GFP_NOFAIL)))
+ if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
return true;
/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 216449825859..ae6e601f0a58 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -580,7 +580,7 @@ static void wb_domain_writeout_inc(struct wb_domain *dom,
__fprop_inc_percpu_max(&dom->completions, completions,
max_prop_frac);
/* First event after period switching was turned off? */
- if (!unlikely(dom->period_time)) {
+ if (unlikely(!dom->period_time)) {
/*
* We can race with other __bdi_writeout_inc calls here but
* it does not cause any harm since the resulting time when
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f3e0c69a97b7..0aba4dfc0e57 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -55,10 +55,10 @@
#include <linux/kmemleak.h>
#include <linux/compaction.h>
#include <trace/events/kmem.h>
+#include <trace/events/oom.h>
#include <linux/prefetch.h>
#include <linux/mm_inline.h>
#include <linux/migrate.h>
-#include <linux/page_ext.h>
#include <linux/hugetlb.h>
#include <linux/sched/rt.h>
#include <linux/page_owner.h>
@@ -91,6 +91,10 @@ EXPORT_PER_CPU_SYMBOL(_numa_mem_);
int _node_numa_mem_[MAX_NUMNODES];
#endif
+/* work_structs for global per-cpu drains */
+DEFINE_MUTEX(pcpu_drain_mutex);
+DEFINE_PER_CPU(struct work_struct, pcpu_drain);
+
#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
volatile unsigned long latent_entropy __latent_entropy;
EXPORT_SYMBOL(latent_entropy);
@@ -352,7 +356,7 @@ static inline unsigned long *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
- return __pfn_to_section(pfn)->pageblock_flags;
+ return section_to_usemap(__pfn_to_section(pfn));
#else
return page_zone(page)->pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
@@ -714,7 +718,7 @@ static inline void rmv_page_order(struct page *page)
/*
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
- * (a) the buddy is not in a hole &&
+ * (a) the buddy is not in a hole (check before calling!) &&
* (b) the buddy is in the buddy system &&
* (c) a page and its buddy have the same order &&
* (d) a page and its buddy are in the same zone.
@@ -729,9 +733,6 @@ static inline void rmv_page_order(struct page *page)
static inline int page_is_buddy(struct page *page, struct page *buddy,
unsigned int order)
{
- if (!pfn_valid_within(page_to_pfn(buddy)))
- return 0;
-
if (page_is_guard(buddy) && page_order(buddy) == order) {
if (page_zone_id(page) != page_zone_id(buddy))
return 0;
@@ -787,9 +788,8 @@ static inline void __free_one_page(struct page *page,
struct zone *zone, unsigned int order,
int migratetype)
{
- unsigned long page_idx;
- unsigned long combined_idx;
- unsigned long uninitialized_var(buddy_idx);
+ unsigned long combined_pfn;
+ unsigned long uninitialized_var(buddy_pfn);
struct page *buddy;
unsigned int max_order;
@@ -802,15 +802,16 @@ static inline void __free_one_page(struct page *page,
if (likely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
- page_idx = pfn & ((1 << MAX_ORDER) - 1);
-
- VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
+ VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page);
continue_merging:
while (order < max_order - 1) {
- buddy_idx = __find_buddy_index(page_idx, order);
- buddy = page + (buddy_idx - page_idx);
+ buddy_pfn = __find_buddy_pfn(pfn, order);
+ buddy = page + (buddy_pfn - pfn);
+
+ if (!pfn_valid_within(buddy_pfn))
+ goto done_merging;
if (!page_is_buddy(page, buddy, order))
goto done_merging;
/*
@@ -824,9 +825,9 @@ continue_merging:
zone->free_area[order].nr_free--;
rmv_page_order(buddy);
}
- combined_idx = buddy_idx & page_idx;
- page = page + (combined_idx - page_idx);
- page_idx = combined_idx;
+ combined_pfn = buddy_pfn & pfn;
+ page = page + (combined_pfn - pfn);
+ pfn = combined_pfn;
order++;
}
if (max_order < MAX_ORDER) {
@@ -841,8 +842,8 @@ continue_merging:
if (unlikely(has_isolate_pageblock(zone))) {
int buddy_mt;
- buddy_idx = __find_buddy_index(page_idx, order);
- buddy = page + (buddy_idx - page_idx);
+ buddy_pfn = __find_buddy_pfn(pfn, order);
+ buddy = page + (buddy_pfn - pfn);
buddy_mt = get_pageblock_migratetype(buddy);
if (migratetype != buddy_mt
@@ -865,12 +866,12 @@ done_merging:
* so it's less likely to be used soon and more likely to be merged
* as a higher order page
*/
- if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
+ if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)) {
struct page *higher_page, *higher_buddy;
- combined_idx = buddy_idx & page_idx;
- higher_page = page + (combined_idx - page_idx);
- buddy_idx = __find_buddy_index(combined_idx, order + 1);
- higher_buddy = higher_page + (buddy_idx - combined_idx);
+ combined_pfn = buddy_pfn & pfn;
+ higher_page = page + (combined_pfn - pfn);
+ buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
+ higher_buddy = higher_page + (buddy_pfn - combined_pfn);
if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
list_add_tail(&page->lru,
&zone->free_area[order].free_list[migratetype]);
@@ -1087,10 +1088,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
{
int migratetype = 0;
int batch_free = 0;
- unsigned long nr_scanned;
+ unsigned long nr_scanned, flags;
bool isolated_pageblocks;
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
isolated_pageblocks = has_isolate_pageblock(zone);
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
if (nr_scanned)
@@ -1139,7 +1140,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
trace_mm_page_pcpu_drain(page, 0, mt);
} while (--count && --batch_free && !list_empty(list));
}
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
static void free_one_page(struct zone *zone,
@@ -1147,8 +1148,9 @@ static void free_one_page(struct zone *zone,
unsigned int order,
int migratetype)
{
- unsigned long nr_scanned;
- spin_lock(&zone->lock);
+ unsigned long nr_scanned, flags;
+ spin_lock_irqsave(&zone->lock, flags);
+ __count_vm_events(PGFREE, 1 << order);
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
if (nr_scanned)
__mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
@@ -1158,7 +1160,7 @@ static void free_one_page(struct zone *zone,
migratetype = get_pfnblock_migratetype(page, pfn);
}
__free_one_page(page, pfn, zone, order, migratetype);
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -1236,7 +1238,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
static void __free_pages_ok(struct page *page, unsigned int order)
{
- unsigned long flags;
int migratetype;
unsigned long pfn = page_to_pfn(page);
@@ -1244,10 +1245,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
return;
migratetype = get_pfnblock_migratetype(page, pfn);
- local_irq_save(flags);
- __count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, pfn, order, migratetype);
- local_irq_restore(flags);
}
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -2219,8 +2217,9 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
int migratetype, bool cold)
{
int i, alloced = 0;
+ unsigned long flags;
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype);
if (unlikely(page == NULL))
@@ -2256,7 +2255,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages added to the pcp list.
*/
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
return alloced;
}
@@ -2341,16 +2340,26 @@ void drain_local_pages(struct zone *zone)
drain_pages(cpu);
}
+static void drain_local_pages_wq(struct work_struct *work)
+{
+ /*
+ * drain_all_pages doesn't use proper cpu hotplug protection so
+ * we can race with cpu offline when the WQ can move this from
+ * a cpu pinned worker to an unbound one. We can operate on a different
+ * cpu which is allright but we also have to make sure to not move to
+ * a different one.
+ */
+ preempt_disable();
+ drain_local_pages(NULL);
+ preempt_enable();
+}
+
/*
* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
*
* When zone parameter is non-NULL, spill just the single zone's pages.
*
- * Note that this code is protected against sending an IPI to an offline
- * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
- * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
- * nothing keeps CPUs from showing up after we populated the cpumask and
- * before the call to on_each_cpu_mask().
+ * Note that this can be extremely slow as the draining happens in a workqueue.
*/
void drain_all_pages(struct zone *zone)
{
@@ -2362,6 +2371,21 @@ void drain_all_pages(struct zone *zone)
*/
static cpumask_t cpus_with_pcps;
+ /* Workqueues cannot recurse */
+ if (current->flags & PF_WQ_WORKER)
+ return;
+
+ /*
+ * Do not drain if one is already in progress unless it's specific to
+ * a zone. Such callers are primarily CMA and memory hotplug and need
+ * the drain to be complete when the call returns.
+ */
+ if (unlikely(!mutex_trylock(&pcpu_drain_mutex))) {
+ if (!zone)
+ return;
+ mutex_lock(&pcpu_drain_mutex);
+ }
+
/*
* We don't care about racing with CPU hotplug event
* as offline notification will cause the notified
@@ -2392,8 +2416,16 @@ void drain_all_pages(struct zone *zone)
else
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
- on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
- zone, 1);
+
+ for_each_cpu(cpu, &cpus_with_pcps) {
+ struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
+ INIT_WORK(work, drain_local_pages_wq);
+ schedule_work_on(cpu, work);
+ }
+ for_each_cpu(cpu, &cpus_with_pcps)
+ flush_work(per_cpu_ptr(&pcpu_drain, cpu));
+
+ mutex_unlock(&pcpu_drain_mutex);
}
#ifdef CONFIG_HIBERNATION
@@ -2444,17 +2476,20 @@ void free_hot_cold_page(struct page *page, bool cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
- unsigned long flags;
unsigned long pfn = page_to_pfn(page);
int migratetype;
+ if (in_interrupt()) {
+ __free_pages_ok(page, 0);
+ return;
+ }
+
if (!free_pcp_prepare(page))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
set_pcppage_migratetype(page, migratetype);
- local_irq_save(flags);
- __count_vm_event(PGFREE);
+ preempt_disable();
/*
* We only track unmovable, reclaimable and movable on pcp lists.
@@ -2471,6 +2506,7 @@ void free_hot_cold_page(struct page *page, bool cold)
migratetype = MIGRATE_MOVABLE;
}
+ __count_vm_event(PGFREE);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (!cold)
list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2484,7 +2520,7 @@ void free_hot_cold_page(struct page *page, bool cold)
}
out:
- local_irq_restore(flags);
+ preempt_enable_no_resched();
}
/*
@@ -2602,74 +2638,105 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
#endif
}
+/* Remove page from the per-cpu list, caller must protect the list */
+static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+ bool cold, struct per_cpu_pages *pcp,
+ struct list_head *list)
+{
+ struct page *page;
+
+ VM_BUG_ON(in_interrupt());
+
+ do {
+ if (list_empty(list)) {
+ pcp->count += rmqueue_bulk(zone, 0,
+ pcp->batch, list,
+ migratetype, cold);
+ if (unlikely(list_empty(list)))
+ return NULL;
+ }
+
+ if (cold)
+ page = list_last_entry(list, struct page, lru);
+ else
+ page = list_first_entry(list, struct page, lru);
+
+ list_del(&page->lru);
+ pcp->count--;
+ } while (check_new_pcp(page));
+
+ return page;
+}
+
+/* Lock and remove page from the per-cpu list */
+static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+ struct zone *zone, unsigned int order,
+ gfp_t gfp_flags, int migratetype)
+{
+ struct per_cpu_pages *pcp;
+ struct list_head *list;
+ bool cold = ((gfp_flags & __GFP_COLD) != 0);
+ struct page *page;
+
+ preempt_disable();
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
+ list = &pcp->lists[migratetype];
+ page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
+ if (page) {
+ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+ zone_statistics(preferred_zone, zone);
+ }
+ preempt_enable_no_resched();
+ return page;
+}
+
/*
* Allocate a page from the given zone. Use pcplists for order-0 allocations.
*/
static inline
-struct page *buffered_rmqueue(struct zone *preferred_zone,
+struct page *rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
gfp_t gfp_flags, unsigned int alloc_flags,
int migratetype)
{
unsigned long flags;
struct page *page;
- bool cold = ((gfp_flags & __GFP_COLD) != 0);
-
- if (likely(order == 0)) {
- struct per_cpu_pages *pcp;
- struct list_head *list;
- local_irq_save(flags);
- do {
- pcp = &this_cpu_ptr(zone->pageset)->pcp;
- list = &pcp->lists[migratetype];
- if (list_empty(list)) {
- pcp->count += rmqueue_bulk(zone, 0,
- pcp->batch, list,
- migratetype, cold);
- if (unlikely(list_empty(list)))
- goto failed;
- }
-
- if (cold)
- page = list_last_entry(list, struct page, lru);
- else
- page = list_first_entry(list, struct page, lru);
-
- list_del(&page->lru);
- pcp->count--;
+ if (likely(order == 0) && !in_interrupt()) {
+ page = rmqueue_pcplist(preferred_zone, zone, order,
+ gfp_flags, migratetype);
+ goto out;
+ }
- } while (check_new_pcp(page));
- } else {
- /*
- * We most definitely don't want callers attempting to
- * allocate greater than order-1 page units with __GFP_NOFAIL.
- */
- WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
- spin_lock_irqsave(&zone->lock, flags);
+ /*
+ * We most definitely don't want callers attempting to
+ * allocate greater than order-1 page units with __GFP_NOFAIL.
+ */
+ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+ spin_lock_irqsave(&zone->lock, flags);
- do {
- page = NULL;
- if (alloc_flags & ALLOC_HARDER) {
- page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
- if (page)
- trace_mm_page_alloc_zone_locked(page, order, migratetype);
- }
- if (!page)
- page = __rmqueue(zone, order, migratetype);
- } while (page && check_new_pages(page, order));
- spin_unlock(&zone->lock);
+ do {
+ page = NULL;
+ if (alloc_flags & ALLOC_HARDER) {
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+ if (page)
+ trace_mm_page_alloc_zone_locked(page, order, migratetype);
+ }
if (!page)
- goto failed;
- __mod_zone_freepage_state(zone, -(1 << order),
- get_pcppage_migratetype(page));
- }
+ page = __rmqueue(zone, order, migratetype);
+ } while (page && check_new_pages(page, order));
+ spin_unlock(&zone->lock);
+ if (!page)
+ goto failed;
+ __mod_zone_freepage_state(zone, -(1 << order),
+ get_pcppage_migratetype(page));
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
local_irq_restore(flags);
- VM_BUG_ON_PAGE(bad_range(zone, page), page);
+out:
+ VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
return page;
failed:
@@ -2974,7 +3041,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
}
try_this_zone:
- page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order,
+ page = rmqueue(ac->preferred_zoneref->zone, zone, order,
gfp_mask, alloc_flags, ac->migratetype);
if (page) {
prep_new_page(page, order, gfp_mask, alloc_flags);
@@ -3007,18 +3074,12 @@ static inline bool should_suppress_show_mem(void)
return ret;
}
-static DEFINE_RATELIMIT_STATE(nopage_rs,
- DEFAULT_RATELIMIT_INTERVAL,
- DEFAULT_RATELIMIT_BURST);
-
-void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
+static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
{
unsigned int filter = SHOW_MEM_FILTER_NODES;
- struct va_format vaf;
- va_list args;
+ static DEFINE_RATELIMIT_STATE(show_mem_rs, HZ, 1);
- if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
- debug_guardpage_minorder() > 0)
+ if (should_suppress_show_mem() || !__ratelimit(&show_mem_rs))
return;
/*
@@ -3033,6 +3094,20 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
filter &= ~SHOW_MEM_FILTER_NODES;
+ show_mem(filter, nodemask);
+}
+
+void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+ static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
+ debug_guardpage_minorder() > 0)
+ return;
+
pr_warn("%s: ", current->comm);
va_start(args, fmt);
@@ -3041,11 +3116,36 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
pr_cont("%pV", &vaf);
va_end(args);
- pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask);
+ pr_cont(", mode:%#x(%pGg), nodemask=", gfp_mask, &gfp_mask);
+ if (nodemask)
+ pr_cont("%*pbl\n", nodemask_pr_args(nodemask));
+ else
+ pr_cont("(null)\n");
+
+ cpuset_print_current_mems_allowed();
dump_stack();
- if (!should_suppress_show_mem())
- show_mem(filter);
+ warn_alloc_show_mem(gfp_mask, nodemask);
+}
+
+static inline struct page *
+__alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
+ unsigned int alloc_flags,
+ const struct alloc_context *ac)
+{
+ struct page *page;
+
+ page = get_page_from_freelist(gfp_mask, order,
+ alloc_flags|ALLOC_CPUSET, ac);
+ /*
+ * fallback to ignore cpuset restriction if our nodes
+ * are depleted
+ */
+ if (!page)
+ page = get_page_from_freelist(gfp_mask, order,
+ alloc_flags, ac);
+
+ return page;
}
static inline struct page *
@@ -3083,47 +3183,42 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
if (page)
goto out;
- if (!(gfp_mask & __GFP_NOFAIL)) {
- /* Coredumps can quickly deplete all memory reserves */
- if (current->flags & PF_DUMPCORE)
- goto out;
- /* The OOM killer will not help higher order allocs */
- if (order > PAGE_ALLOC_COSTLY_ORDER)
- goto out;
- /* The OOM killer does not needlessly kill tasks for lowmem */
- if (ac->high_zoneidx < ZONE_NORMAL)
- goto out;
- if (pm_suspended_storage())
- goto out;
- /*
- * XXX: GFP_NOFS allocations should rather fail than rely on
- * other request to make a forward progress.
- * We are in an unfortunate situation where out_of_memory cannot
- * do much for this context but let's try it to at least get
- * access to memory reserved if the current task is killed (see
- * out_of_memory). Once filesystems are ready to handle allocation
- * failures more gracefully we should just bail out here.
- */
+ /* Coredumps can quickly deplete all memory reserves */
+ if (current->flags & PF_DUMPCORE)
+ goto out;
+ /* The OOM killer will not help higher order allocs */
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
+ goto out;
+ /* The OOM killer does not needlessly kill tasks for lowmem */
+ if (ac->high_zoneidx < ZONE_NORMAL)
+ goto out;
+ if (pm_suspended_storage())
+ goto out;
+ /*
+ * XXX: GFP_NOFS allocations should rather fail than rely on
+ * other request to make a forward progress.
+ * We are in an unfortunate situation where out_of_memory cannot
+ * do much for this context but let's try it to at least get
+ * access to memory reserved if the current task is killed (see
+ * out_of_memory). Once filesystems are ready to handle allocation
+ * failures more gracefully we should just bail out here.
+ */
+
+ /* The OOM killer may not free memory on a specific node */
+ if (gfp_mask & __GFP_THISNODE)
+ goto out;
- /* The OOM killer may not free memory on a specific node */
- if (gfp_mask & __GFP_THISNODE)
- goto out;
- }
/* Exhausted what can be done so it's blamo time */
if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
*did_some_progress = 1;
- if (gfp_mask & __GFP_NOFAIL) {
- page = get_page_from_freelist(gfp_mask, order,
- ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac);
- /*
- * fallback to ignore cpuset restriction if our nodes
- * are depleted
- */
- if (!page)
- page = get_page_from_freelist(gfp_mask, order,
+ /*
+ * Help non-failing allocations by giving them access to memory
+ * reserves
+ */
+ if (gfp_mask & __GFP_NOFAIL)
+ page = __alloc_pages_cpuset_fallback(gfp_mask, order,
ALLOC_NO_WATERMARKS, ac);
- }
}
out:
mutex_unlock(&oom_lock);
@@ -3192,6 +3287,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
{
int max_retries = MAX_COMPACT_RETRIES;
int min_priority;
+ bool ret = false;
+ int retries = *compaction_retries;
+ enum compact_priority priority = *compact_priority;
if (!order)
return false;
@@ -3213,8 +3311,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
* But do not retry if the given zonelist is not suitable for
* compaction.
*/
- if (compaction_withdrawn(compact_result))
- return compaction_zonelist_suitable(ac, order, alloc_flags);
+ if (compaction_withdrawn(compact_result)) {
+ ret = compaction_zonelist_suitable(ac, order, alloc_flags);
+ goto out;
+ }
/*
* !costly requests are much more important than __GFP_REPEAT
@@ -3226,8 +3326,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
*/
if (order > PAGE_ALLOC_COSTLY_ORDER)
max_retries /= 4;
- if (*compaction_retries <= max_retries)
- return true;
+ if (*compaction_retries <= max_retries) {
+ ret = true;
+ goto out;
+ }
/*
* Make sure there are attempts at the highest priority if we exhausted
@@ -3236,12 +3338,15 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
check_priority:
min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+
if (*compact_priority > min_priority) {
(*compact_priority)--;
*compaction_retries = 0;
- return true;
+ ret = true;
}
- return false;
+out:
+ trace_compact_retry(order, priority, compact_result, retries, max_retries, ret);
+ return ret;
}
#else
static inline struct page *
@@ -3464,6 +3569,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
ac->nodemask) {
unsigned long available;
unsigned long reclaimable;
+ unsigned long min_wmark = min_wmark_pages(zone);
+ bool wmark;
available = reclaimable = zone_reclaimable_pages(zone);
available -= DIV_ROUND_UP((*no_progress_loops) * available,
@@ -3474,8 +3581,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* Would the allocation succeed if we reclaimed the whole
* available?
*/
- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
- ac_classzone_idx(ac), alloc_flags, available)) {
+ wmark = __zone_watermark_ok(zone, order, min_wmark,
+ ac_classzone_idx(ac), alloc_flags, available);
+ trace_reclaim_retry_zone(z, order, reclaimable,
+ available, min_wmark, *no_progress_loops, wmark);
+ if (wmark) {
/*
* If we didn't make any progress and have a lot of
* dirty + writeback pages then we should wait for
@@ -3555,6 +3665,14 @@ retry_cpuset:
no_progress_loops = 0;
compact_priority = DEF_COMPACT_PRIORITY;
cpuset_mems_cookie = read_mems_allowed_begin();
+
+ /*
+ * The fast path uses conservative alloc_flags to succeed only until
+ * kswapd needs to be woken up, and to avoid the cost of setting up
+ * alloc_flags precisely. So we do that now.
+ */
+ alloc_flags = gfp_to_alloc_flags(gfp_mask);
+
/*
* We need to recalculate the starting point for the zonelist iterator
* because we might have used different nodemask in the fast path, or
@@ -3566,14 +3684,6 @@ retry_cpuset:
if (!ac->preferred_zoneref->zone)
goto nopage;
-
- /*
- * The fast path uses conservative alloc_flags to succeed only until
- * kswapd needs to be woken up, and to avoid the cost of setting up
- * alloc_flags precisely. So we do that now.
- */
- alloc_flags = gfp_to_alloc_flags(gfp_mask);
-
if (gfp_mask & __GFP_KSWAPD_RECLAIM)
wake_all_kswapds(order, ac);
@@ -3650,35 +3760,21 @@ retry:
goto got_pg;
/* Caller is not willing to reclaim, we can't balance anything */
- if (!can_direct_reclaim) {
- /*
- * All existing users of the __GFP_NOFAIL are blockable, so warn
- * of any new users that actually allow this type of allocation
- * to fail.
- */
- WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL);
+ if (!can_direct_reclaim)
goto nopage;
- }
- /* Avoid recursion of direct reclaim */
- if (current->flags & PF_MEMALLOC) {
- /*
- * __GFP_NOFAIL request from this context is rather bizarre
- * because we cannot reclaim anything and only can loop waiting
- * for somebody to do a work for us.
- */
- if (WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
- cond_resched();
- goto retry;
- }
- goto nopage;
+ /* Make sure we know about allocations which stall for too long */
+ if (time_after(jiffies, alloc_start + stall_timeout)) {
+ warn_alloc(gfp_mask, ac->nodemask,
+ "page allocation stalls for %ums, order:%u",
+ jiffies_to_msecs(jiffies-alloc_start), order);
+ stall_timeout += 10 * HZ;
}
- /* Avoid allocations with no watermarks from looping endlessly */
- if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
+ /* Avoid recursion of direct reclaim */
+ if (current->flags & PF_MEMALLOC)
goto nopage;
-
/* Try direct reclaim and then allocating */
page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
&did_some_progress);
@@ -3702,14 +3798,6 @@ retry:
if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
goto nopage;
- /* Make sure we know about allocations which stall for too long */
- if (time_after(jiffies, alloc_start + stall_timeout)) {
- warn_alloc(gfp_mask,
- "page allocation stalls for %ums, order:%u",
- jiffies_to_msecs(jiffies-alloc_start), order);
- stall_timeout += 10 * HZ;
- }
-
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
did_some_progress > 0, &no_progress_loops))
goto retry;
@@ -3738,6 +3826,10 @@ retry:
if (page)
goto got_pg;
+ /* Avoid allocations with no watermarks from looping endlessly */
+ if (test_thread_flag(TIF_MEMDIE))
+ goto nopage;
+
/* Retry as long as the OOM killer is making progress */
if (did_some_progress) {
no_progress_loops = 0;
@@ -3755,82 +3847,123 @@ nopage:
if (read_mems_allowed_retry(cpuset_mems_cookie))
goto retry_cpuset;
- warn_alloc(gfp_mask,
+ /*
+ * Make sure that __GFP_NOFAIL request doesn't leak out and make sure
+ * we always retry
+ */
+ if (gfp_mask & __GFP_NOFAIL) {
+ /*
+ * All existing users of the __GFP_NOFAIL are blockable, so warn
+ * of any new users that actually require GFP_NOWAIT
+ */
+ if (WARN_ON_ONCE(!can_direct_reclaim))
+ goto fail;
+
+ /*
+ * PF_MEMALLOC request from this context is rather bizarre
+ * because we cannot reclaim anything and only can loop waiting
+ * for somebody to do a work for us
+ */
+ WARN_ON_ONCE(current->flags & PF_MEMALLOC);
+
+ /*
+ * non failing costly orders are a hard requirement which we
+ * are not prepared for much so let's warn about these users
+ * so that we can identify them and convert them to something
+ * else.
+ */
+ WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
+
+ /*
+ * Help non-failing allocations by giving them access to memory
+ * reserves but do not use ALLOC_NO_WATERMARKS because this
+ * could deplete whole memory reserves which would just make
+ * the situation worse
+ */
+ page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
+ if (page)
+ goto got_pg;
+
+ cond_resched();
+ goto retry;
+ }
+fail:
+ warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
got_pg:
return page;
}
-/*
- * This is the 'heart' of the zoned buddy allocator.
- */
-struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
- struct zonelist *zonelist, nodemask_t *nodemask)
+static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
+ struct zonelist *zonelist, nodemask_t *nodemask,
+ struct alloc_context *ac, gfp_t *alloc_mask,
+ unsigned int *alloc_flags)
{
- struct page *page;
- unsigned int alloc_flags = ALLOC_WMARK_LOW;
- gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
- struct alloc_context ac = {
- .high_zoneidx = gfp_zone(gfp_mask),
- .zonelist = zonelist,
- .nodemask = nodemask,
- .migratetype = gfpflags_to_migratetype(gfp_mask),
- };
+ ac->high_zoneidx = gfp_zone(gfp_mask);
+ ac->zonelist = zonelist;
+ ac->nodemask = nodemask;
+ ac->migratetype = gfpflags_to_migratetype(gfp_mask);
if (cpusets_enabled()) {
- alloc_mask |= __GFP_HARDWALL;
- alloc_flags |= ALLOC_CPUSET;
- if (!ac.nodemask)
- ac.nodemask = &cpuset_current_mems_allowed;
+ *alloc_mask |= __GFP_HARDWALL;
+ if (!ac->nodemask)
+ ac->nodemask = &cpuset_current_mems_allowed;
+ else
+ *alloc_flags |= ALLOC_CPUSET;
}
- gfp_mask &= gfp_allowed_mask;
-
lockdep_trace_alloc(gfp_mask);
might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
if (should_fail_alloc_page(gfp_mask, order))
- return NULL;
+ return false;
- /*
- * Check the zones suitable for the gfp_mask contain at least one
- * valid zone. It's possible to have an empty zonelist as a result
- * of __GFP_THISNODE and a memoryless node
- */
- if (unlikely(!zonelist->_zonerefs->zone))
- return NULL;
+ if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
+ *alloc_flags |= ALLOC_CMA;
- if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
- alloc_flags |= ALLOC_CMA;
+ return true;
+}
+/* Determine whether to spread dirty pages and what the first usable zone */
+static inline void finalise_ac(gfp_t gfp_mask,
+ unsigned int order, struct alloc_context *ac)
+{
/* Dirty zone balancing only done in the fast path */
- ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
+ ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
/*
* The preferred zone is used for statistics but crucially it is
* also used as the starting point for the zonelist iterator. It
* may get reset for allocations that ignore memory policies.
*/
- ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
- ac.high_zoneidx, ac.nodemask);
- if (!ac.preferred_zoneref->zone) {
- page = NULL;
- /*
- * This might be due to race with cpuset_current_mems_allowed
- * update, so make sure we retry with original nodemask in the
- * slow path.
- */
- goto no_zone;
- }
+ ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
+ ac->high_zoneidx, ac->nodemask);
+}
+
+/*
+ * This is the 'heart' of the zoned buddy allocator.
+ */
+struct page *
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+ struct zonelist *zonelist, nodemask_t *nodemask)
+{
+ struct page *page;
+ unsigned int alloc_flags = ALLOC_WMARK_LOW;
+ gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
+ struct alloc_context ac = { };
+
+ gfp_mask &= gfp_allowed_mask;
+ if (!prepare_alloc_pages(gfp_mask, order, zonelist, nodemask, &ac, &alloc_mask, &alloc_flags))
+ return NULL;
+
+ finalise_ac(gfp_mask, order, &ac);
/* First allocation attempt */
page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
if (likely(page))
goto out;
-no_zone:
/*
* Runtime PM, block IO and its error handling path can deadlock
* because I/O on the device might not complete.
@@ -4252,20 +4385,20 @@ void si_meminfo_node(struct sysinfo *val, int nid)
* Determine whether the node should be displayed or not, depending on whether
* SHOW_MEM_FILTER_NODES was passed to show_free_areas().
*/
-bool skip_free_areas_node(unsigned int flags, int nid)
+static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
{
- bool ret = false;
- unsigned int cpuset_mems_cookie;
-
if (!(flags & SHOW_MEM_FILTER_NODES))
- goto out;
+ return false;
- do {
- cpuset_mems_cookie = read_mems_allowed_begin();
- ret = !node_isset(nid, cpuset_current_mems_allowed);
- } while (read_mems_allowed_retry(cpuset_mems_cookie));
-out:
- return ret;
+ /*
+ * no node mask - aka implicit memory numa policy. Do not bother with
+ * the synchronization - read_mems_allowed_begin - because we do not
+ * have to be precise here.
+ */
+ if (!nodemask)
+ nodemask = &cpuset_current_mems_allowed;
+
+ return !node_isset(nid, *nodemask);
}
#define K(x) ((x) << (PAGE_SHIFT-10))
@@ -4306,7 +4439,7 @@ static void show_migration_types(unsigned char type)
* SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
* cpuset.
*/
-void show_free_areas(unsigned int filter)
+void show_free_areas(unsigned int filter, nodemask_t *nodemask)
{
unsigned long free_pcp = 0;
int cpu;
@@ -4314,7 +4447,7 @@ void show_free_areas(unsigned int filter)
pg_data_t *pgdat;
for_each_populated_zone(zone) {
- if (skip_free_areas_node(filter, zone_to_nid(zone)))
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
for_each_online_cpu(cpu)
@@ -4348,6 +4481,9 @@ void show_free_areas(unsigned int filter)
global_page_state(NR_FREE_CMA_PAGES));
for_each_online_pgdat(pgdat) {
+ if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
+ continue;
+
printk("Node %d"
" active_anon:%lukB"
" inactive_anon:%lukB"
@@ -4397,7 +4533,7 @@ void show_free_areas(unsigned int filter)
for_each_populated_zone(zone) {
int i;
- if (skip_free_areas_node(filter, zone_to_nid(zone)))
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
free_pcp = 0;
@@ -4462,7 +4598,7 @@ void show_free_areas(unsigned int filter)
unsigned long nr[MAX_ORDER], flags, total = 0;
unsigned char types[MAX_ORDER];
- if (skip_free_areas_node(filter, zone_to_nid(zone)))
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
continue;
show_node(zone);
printk(KERN_CONT "%s: ", zone->name);
@@ -5083,8 +5219,17 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
if (context != MEMMAP_EARLY)
goto not_early;
- if (!early_pfn_valid(pfn))
+ if (!early_pfn_valid(pfn)) {
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+ /*
+ * Skip to the pfn preceding the next valid one (or
+ * end_pfn), such that we hit a valid pfn (or end_pfn)
+ * on our next iteration of the loop.
+ */
+ pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
+#endif
continue;
+ }
if (!early_pfn_in_nid(pfn, nid))
continue;
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
@@ -5591,6 +5736,11 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
adjust_zone_range_for_zone_movable(nid, zone_type,
node_start_pfn, node_end_pfn,
&zone_start_pfn, &zone_end_pfn);
+
+ /* If this node has no page within this zone, return 0. */
+ if (zone_start_pfn == zone_end_pfn)
+ return 0;
+
nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
/*
@@ -6378,10 +6528,12 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
/* Print out the early node map */
pr_info("Early memory node ranges\n");
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid,
(u64)start_pfn << PAGE_SHIFT,
((u64)end_pfn << PAGE_SHIFT) - 1);
+ section_active_init(start_pfn, end_pfn - start_pfn);
+ }
/* Initialise every node */
mminit_verify_pageflags_layout();
@@ -7081,8 +7233,9 @@ void *__init alloc_large_system_hash(const char *tablename,
* If @count is not zero, it is okay to include less @count unmovable pages
*
* PageLRU check without isolation or lru_lock could race so that
- * MIGRATE_MOVABLE block might include unmovable pages. It means you can't
- * expect this function should be exact.
+ * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
+ * check without lock_page also may miss some movable non-lru pages at
+ * race condition. So you can't expect this function should be exact.
*/
bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
bool skip_hwpoisoned_pages)
@@ -7138,6 +7291,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
if (skip_hwpoisoned_pages && PageHWPoison(page))
continue;
+ if (__PageMovable(page))
+ continue;
+
if (!PageLRU(page))
found++;
/*
@@ -7249,6 +7405,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
* #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks
* in range must have the same migratetype and it must
* be either of the two.
+ * @gfp_mask: GFP mask to use during compaction
*
* The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
* aligned, however it's the caller's responsibility to guarantee that
@@ -7262,7 +7419,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
* need to be freed with free_contig_range().
*/
int alloc_contig_range(unsigned long start, unsigned long end,
- unsigned migratetype)
+ unsigned migratetype, gfp_t gfp_mask)
{
unsigned long outer_start, outer_end;
unsigned int order;
@@ -7274,7 +7431,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
.zone = page_zone(pfn_to_page(start)),
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
- .gfp_mask = GFP_KERNEL,
+ .gfp_mask = memalloc_noio_flags(gfp_mask),
};
INIT_LIST_HEAD(&cc.migratepages);
diff --git a/mm/page_idle.c b/mm/page_idle.c
index ae11aa914e55..b0ee56c56b58 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -54,27 +54,27 @@ static int page_idle_clear_pte_refs_one(struct page *page,
struct vm_area_struct *vma,
unsigned long addr, void *arg)
{
- struct mm_struct *mm = vma->vm_mm;
- pmd_t *pmd;
- pte_t *pte;
- spinlock_t *ptl;
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ .address = addr,
+ };
bool referenced = false;
- if (!page_check_address_transhuge(page, mm, addr, &pmd, &pte, &ptl))
- return SWAP_AGAIN;
-
- if (pte) {
- referenced = ptep_clear_young_notify(vma, addr, pte);
- pte_unmap(pte);
- } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- referenced = pmdp_clear_young_notify(vma, addr, pmd);
- } else {
- /* unexpected pmd-mapped page? */
- WARN_ON_ONCE(1);
+ while (page_vma_mapped_walk(&pvmw)) {
+ addr = pvmw.address;
+ if (pvmw.pte) {
+ referenced = ptep_clear_young_notify(vma, addr,
+ pvmw.pte);
+ } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+ referenced = pmdp_clear_young_notify(vma, addr,
+ pvmw.pmd);
+ } else {
+ /* unexpected pmd-mapped page? */
+ WARN_ON_ONCE(1);
+ }
}
- spin_unlock(ptl);
-
if (referenced) {
clear_page_idle(page);
/*
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index a5594bfcc5ed..f4e17a57926a 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -83,7 +83,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
unsigned long flags, nr_pages;
bool isolated_page = false;
unsigned int order;
- unsigned long page_idx, buddy_idx;
+ unsigned long pfn, buddy_pfn;
struct page *buddy;
zone = page_zone(page);
@@ -102,11 +102,11 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
if (PageBuddy(page)) {
order = page_order(page);
if (order >= pageblock_order) {
- page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
- buddy_idx = __find_buddy_index(page_idx, order);
- buddy = page + (buddy_idx - page_idx);
+ pfn = page_to_pfn(page);
+ buddy_pfn = __find_buddy_pfn(pfn, order);
+ buddy = page + (buddy_pfn - pfn);
- if (pfn_valid_within(page_to_pfn(buddy)) &&
+ if (pfn_valid_within(buddy_pfn) &&
!is_migrate_isolate_page(buddy)) {
__isolate_free_page(page, order);
isolated_page = true;
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 60634dc53a88..c3cee247f2e6 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -261,7 +261,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
*/
for (; pfn < end_pfn; ) {
if (!pfn_valid(pfn)) {
- pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+ pfn = ALIGN(pfn + 1, pageblock_nr_pages);
continue;
}
@@ -527,7 +527,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
*/
for (; pfn < end_pfn; ) {
if (!pfn_valid(pfn)) {
- pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+ pfn = ALIGN(pfn + 1, pageblock_nr_pages);
continue;
}
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
new file mode 100644
index 000000000000..a23001a22c15
--- /dev/null
+++ b/mm/page_vma_mapped.c
@@ -0,0 +1,218 @@
+#include <linux/mm.h>
+#include <linux/rmap.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+
+#include "internal.h"
+
+static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
+{
+ pmd_t pmde;
+ /*
+ * Make sure we don't re-load pmd between present and !trans_huge check.
+ * We need a consistent view.
+ */
+ pmde = READ_ONCE(*pvmw->pmd);
+ return pmd_present(pmde) && !pmd_trans_huge(pmde);
+}
+
+static inline bool not_found(struct page_vma_mapped_walk *pvmw)
+{
+ page_vma_mapped_walk_done(pvmw);
+ return false;
+}
+
+static bool map_pte(struct page_vma_mapped_walk *pvmw)
+{
+ pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address);
+ if (!(pvmw->flags & PVMW_SYNC)) {
+ if (pvmw->flags & PVMW_MIGRATION) {
+ if (!is_swap_pte(*pvmw->pte))
+ return false;
+ } else {
+ if (!pte_present(*pvmw->pte))
+ return false;
+ }
+ }
+ pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd);
+ spin_lock(pvmw->ptl);
+ return true;
+}
+
+static bool check_pte(struct page_vma_mapped_walk *pvmw)
+{
+ if (pvmw->flags & PVMW_MIGRATION) {
+#ifdef CONFIG_MIGRATION
+ swp_entry_t entry;
+ if (!is_swap_pte(*pvmw->pte))
+ return false;
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (!is_migration_entry(entry))
+ return false;
+ if (migration_entry_to_page(entry) - pvmw->page >=
+ hpage_nr_pages(pvmw->page)) {
+ return false;
+ }
+ if (migration_entry_to_page(entry) < pvmw->page)
+ return false;
+#else
+ WARN_ON_ONCE(1);
+#endif
+ } else {
+ if (!pte_present(*pvmw->pte))
+ return false;
+
+ /* THP can be referenced by any subpage */
+ if (pte_page(*pvmw->pte) - pvmw->page >=
+ hpage_nr_pages(pvmw->page)) {
+ return false;
+ }
+ if (pte_page(*pvmw->pte) < pvmw->page)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
+ * @pvmw->address
+ * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
+ * must be set. pmd, pte and ptl must be NULL.
+ *
+ * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
+ * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
+ * adjusted if needed (for PTE-mapped THPs).
+ *
+ * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
+ * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
+ * a loop to find all PTEs that map the THP.
+ *
+ * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
+ * regardless of which page table level the page is mapped at. @pvmw->pmd is
+ * NULL.
+ *
+ * Retruns false if there are no more page table entries for the page in
+ * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
+ *
+ * If you need to stop the walk before page_vma_mapped_walk() returned false,
+ * use page_vma_mapped_walk_done(). It will do the housekeeping.
+ */
+bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+{
+ struct mm_struct *mm = pvmw->vma->vm_mm;
+ struct page *page = pvmw->page;
+ pgd_t *pgd;
+ pud_t *pud;
+
+ /* The only possible pmd mapping has been handled on last iteration */
+ if (pvmw->pmd && !pvmw->pte)
+ return not_found(pvmw);
+
+ /* Only for THP, seek to next pte entry makes sense */
+ if (pvmw->pte) {
+ if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+ return not_found(pvmw);
+ goto next_pte;
+ }
+
+ if (unlikely(PageHuge(pvmw->page))) {
+ /* when pud is not present, pte will be NULL */
+ pvmw->pte = huge_pte_offset(mm, pvmw->address);
+ if (!pvmw->pte)
+ return false;
+
+ pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte);
+ spin_lock(pvmw->ptl);
+ if (!check_pte(pvmw))
+ return not_found(pvmw);
+ return true;
+ }
+restart:
+ pgd = pgd_offset(mm, pvmw->address);
+ if (!pgd_present(*pgd))
+ return false;
+ pud = pud_offset(pgd, pvmw->address);
+ if (!pud_present(*pud))
+ return false;
+ pvmw->pmd = pmd_offset(pud, pvmw->address);
+ if (pmd_trans_huge(*pvmw->pmd)) {
+ pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+ if (!pmd_present(*pvmw->pmd))
+ return not_found(pvmw);
+ if (likely(pmd_trans_huge(*pvmw->pmd))) {
+ if (pvmw->flags & PVMW_MIGRATION)
+ return not_found(pvmw);
+ if (pmd_page(*pvmw->pmd) != page)
+ return not_found(pvmw);
+ return true;
+ } else {
+ /* THP pmd was split under us: handle on pte level */
+ spin_unlock(pvmw->ptl);
+ pvmw->ptl = NULL;
+ }
+ } else {
+ if (!check_pmd(pvmw))
+ return false;
+ }
+ if (!map_pte(pvmw))
+ goto next_pte;
+ while (1) {
+ if (check_pte(pvmw))
+ return true;
+next_pte: do {
+ pvmw->address += PAGE_SIZE;
+ if (pvmw->address >=
+ __vma_address(pvmw->page, pvmw->vma) +
+ hpage_nr_pages(pvmw->page) * PAGE_SIZE)
+ return not_found(pvmw);
+ /* Did we cross page table boundary? */
+ if (pvmw->address % PMD_SIZE == 0) {
+ pte_unmap(pvmw->pte);
+ if (pvmw->ptl) {
+ spin_unlock(pvmw->ptl);
+ pvmw->ptl = NULL;
+ }
+ goto restart;
+ } else {
+ pvmw->pte++;
+ }
+ } while (pte_none(*pvmw->pte));
+
+ if (!pvmw->ptl) {
+ pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+ spin_lock(pvmw->ptl);
+ }
+ }
+}
+
+/**
+ * page_mapped_in_vma - check whether a page is really mapped in a VMA
+ * @page: the page to test
+ * @vma: the VMA to test
+ *
+ * Returns 1 if the page is mapped into the page tables of the VMA, 0
+ * if the page is not mapped into the page tables of this VMA. Only
+ * valid for normal file or anonymous VMAs.
+ */
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+{
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ .flags = PVMW_SYNC,
+ };
+ unsigned long start, end;
+
+ start = __vma_address(page, vma);
+ end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
+
+ if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+ return 0;
+ pvmw.address = max(start, vma->vm_start);
+ if (!page_vma_mapped_walk(&pvmw))
+ return 0;
+ page_vma_mapped_walk_done(&pvmw);
+ return 1;
+}
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 207244489a68..03761577ae86 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -78,14 +78,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
pud = pud_offset(pgd, addr);
do {
+ again:
next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud)) {
+ if (pud_none(*pud) || !walk->vma) {
if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
if (err)
break;
continue;
}
+
+ if (walk->pud_entry) {
+ spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma);
+
+ if (ptl) {
+ err = walk->pud_entry(pud, addr, next, walk);
+ spin_unlock(ptl);
+ if (err)
+ break;
+ continue;
+ }
+ }
+
+ split_huge_pud(walk->vma, pud, addr);
+ if (pud_none(*pud))
+ goto again;
+
if (walk->pmd_entry || walk->pte_entry)
err = walk_pmd_range(pud, addr, next, walk);
if (err)
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 71c5f9109f2a..4ed5908c65b0 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -123,6 +123,20 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ pud_t pud;
+
+ VM_BUG_ON(address & ~HPAGE_PUD_MASK);
+ VM_BUG_ON(!pud_trans_huge(*pudp) && !pud_devmap(*pudp));
+ pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return pud;
+}
+#endif
#endif
#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
diff --git a/mm/rmap.c b/mm/rmap.c
index 91619fd70939..8774791e2809 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -607,8 +607,7 @@ void try_to_unmap_flush_dirty(void)
try_to_unmap_flush();
}
-static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
- struct page *page, bool writable)
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
{
struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
@@ -643,8 +642,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
return should_defer;
}
#else
-static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
- struct page *page, bool writable)
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
{
}
@@ -710,170 +708,6 @@ out:
return pmd;
}
-/*
- * Check that @page is mapped at @address into @mm.
- *
- * If @sync is false, page_check_address may perform a racy check to avoid
- * the page table lock when the pte is not present (helpful when reclaiming
- * highly shared pages).
- *
- * On success returns with pte mapped and locked.
- */
-pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
- unsigned long address, spinlock_t **ptlp, int sync)
-{
- pmd_t *pmd;
- pte_t *pte;
- spinlock_t *ptl;
-
- if (unlikely(PageHuge(page))) {
- /* when pud is not present, pte will be NULL */
- pte = huge_pte_offset(mm, address);
- if (!pte)
- return NULL;
-
- ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
- goto check;
- }
-
- pmd = mm_find_pmd(mm, address);
- if (!pmd)
- return NULL;
-
- pte = pte_offset_map(pmd, address);
- /* Make a quick check before getting the lock */
- if (!sync && !pte_present(*pte)) {
- pte_unmap(pte);
- return NULL;
- }
-
- ptl = pte_lockptr(mm, pmd);
-check:
- spin_lock(ptl);
- if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
- *ptlp = ptl;
- return pte;
- }
- pte_unmap_unlock(pte, ptl);
- return NULL;
-}
-
-/**
- * page_mapped_in_vma - check whether a page is really mapped in a VMA
- * @page: the page to test
- * @vma: the VMA to test
- *
- * Returns 1 if the page is mapped into the page tables of the VMA, 0
- * if the page is not mapped into the page tables of this VMA. Only
- * valid for normal file or anonymous VMAs.
- */
-int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
-{
- unsigned long address;
- pte_t *pte;
- spinlock_t *ptl;
-
- address = __vma_address(page, vma);
- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
- return 0;
- pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
- if (!pte) /* the page is not in this mm */
- return 0;
- pte_unmap_unlock(pte, ptl);
-
- return 1;
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-/*
- * Check that @page is mapped at @address into @mm. In contrast to
- * page_check_address(), this function can handle transparent huge pages.
- *
- * On success returns true with pte mapped and locked. For PMD-mapped
- * transparent huge pages *@ptep is set to NULL.
- */
-bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
- unsigned long address, pmd_t **pmdp,
- pte_t **ptep, spinlock_t **ptlp)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- spinlock_t *ptl;
-
- if (unlikely(PageHuge(page))) {
- /* when pud is not present, pte will be NULL */
- pte = huge_pte_offset(mm, address);
- if (!pte)
- return false;
-
- ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
- pmd = NULL;
- goto check_pte;
- }
-
- pgd = pgd_offset(mm, address);
- if (!pgd_present(*pgd))
- return false;
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- return false;
- pmd = pmd_offset(pud, address);
-
- if (pmd_trans_huge(*pmd)) {
- ptl = pmd_lock(mm, pmd);
- if (!pmd_present(*pmd))
- goto unlock_pmd;
- if (unlikely(!pmd_trans_huge(*pmd))) {
- spin_unlock(ptl);
- goto map_pte;
- }
-
- if (pmd_page(*pmd) != page)
- goto unlock_pmd;
-
- pte = NULL;
- goto found;
-unlock_pmd:
- spin_unlock(ptl);
- return false;
- } else {
- pmd_t pmde = *pmd;
-
- barrier();
- if (!pmd_present(pmde) || pmd_trans_huge(pmde))
- return false;
- }
-map_pte:
- pte = pte_offset_map(pmd, address);
- if (!pte_present(*pte)) {
- pte_unmap(pte);
- return false;
- }
-
- ptl = pte_lockptr(mm, pmd);
-check_pte:
- spin_lock(ptl);
-
- if (!pte_present(*pte)) {
- pte_unmap_unlock(pte, ptl);
- return false;
- }
-
- /* THP can be referenced by any subpage */
- if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
- pte_unmap_unlock(pte, ptl);
- return false;
- }
-found:
- *ptep = pte;
- *pmdp = pmd;
- *ptlp = ptl;
- return true;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
struct page_referenced_arg {
int mapcount;
int referenced;
@@ -886,45 +720,48 @@ struct page_referenced_arg {
static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
- struct mm_struct *mm = vma->vm_mm;
struct page_referenced_arg *pra = arg;
- pmd_t *pmd;
- pte_t *pte;
- spinlock_t *ptl;
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ .address = address,
+ };
int referenced = 0;
- if (!page_check_address_transhuge(page, mm, address, &pmd, &pte, &ptl))
- return SWAP_AGAIN;
+ while (page_vma_mapped_walk(&pvmw)) {
+ address = pvmw.address;
- if (vma->vm_flags & VM_LOCKED) {
- if (pte)
- pte_unmap(pte);
- spin_unlock(ptl);
- pra->vm_flags |= VM_LOCKED;
- return SWAP_FAIL; /* To break the loop */
- }
+ if (vma->vm_flags & VM_LOCKED) {
+ page_vma_mapped_walk_done(&pvmw);
+ pra->vm_flags |= VM_LOCKED;
+ return SWAP_FAIL; /* To break the loop */
+ }
- if (pte) {
- if (ptep_clear_flush_young_notify(vma, address, pte)) {
- /*
- * Don't treat a reference through a sequentially read
- * mapping as such. If the page has been used in
- * another mapping, we will catch it; if this other
- * mapping is already gone, the unmap path will have
- * set PG_referenced or activated the page.
- */
- if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+ if (pvmw.pte) {
+ if (ptep_clear_flush_young_notify(vma, address,
+ pvmw.pte)) {
+ /*
+ * Don't treat a reference through
+ * a sequentially read mapping as such.
+ * If the page has been used in another mapping,
+ * we will catch it; if this other mapping is
+ * already gone, the unmap path will have set
+ * PG_referenced or activated the page.
+ */
+ if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+ referenced++;
+ }
+ } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+ if (pmdp_clear_flush_young_notify(vma, address,
+ pvmw.pmd))
referenced++;
+ } else {
+ /* unexpected pmd-mapped page? */
+ WARN_ON_ONCE(1);
}
- pte_unmap(pte);
- } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- if (pmdp_clear_flush_young_notify(vma, address, pmd))
- referenced++;
- } else {
- /* unexpected pmd-mapped page? */
- WARN_ON_ONCE(1);
+
+ pra->mapcount--;
}
- spin_unlock(ptl);
if (referenced)
clear_page_idle(page);
@@ -936,7 +773,6 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
pra->vm_flags |= vma->vm_flags;
}
- pra->mapcount--;
if (!pra->mapcount)
return SWAP_SUCCESS; /* To break the loop */
@@ -1015,34 +851,56 @@ int page_referenced(struct page *page,
static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
- struct mm_struct *mm = vma->vm_mm;
- pte_t *pte;
- spinlock_t *ptl;
- int ret = 0;
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ .address = address,
+ .flags = PVMW_SYNC,
+ };
int *cleaned = arg;
- pte = page_check_address(page, mm, address, &ptl, 1);
- if (!pte)
- goto out;
-
- if (pte_dirty(*pte) || pte_write(*pte)) {
- pte_t entry;
+ while (page_vma_mapped_walk(&pvmw)) {
+ int ret = 0;
+ address = pvmw.address;
+ if (pvmw.pte) {
+ pte_t entry;
+ pte_t *pte = pvmw.pte;
+
+ if (!pte_dirty(*pte) && !pte_write(*pte))
+ continue;
+
+ flush_cache_page(vma, address, pte_pfn(*pte));
+ entry = ptep_clear_flush(vma, address, pte);
+ entry = pte_wrprotect(entry);
+ entry = pte_mkclean(entry);
+ set_pte_at(vma->vm_mm, address, pte, entry);
+ ret = 1;
+ } else {
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+ pmd_t *pmd = pvmw.pmd;
+ pmd_t entry;
+
+ if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
+ continue;
+
+ flush_cache_page(vma, address, page_to_pfn(page));
+ entry = pmdp_huge_clear_flush(vma, address, pmd);
+ entry = pmd_wrprotect(entry);
+ entry = pmd_mkclean(entry);
+ set_pmd_at(vma->vm_mm, address, pmd, entry);
+ ret = 1;
+#else
+ /* unexpected pmd-mapped page? */
+ WARN_ON_ONCE(1);
+#endif
+ }
- flush_cache_page(vma, address, pte_pfn(*pte));
- entry = ptep_clear_flush(vma, address, pte);
- entry = pte_wrprotect(entry);
- entry = pte_mkclean(entry);
- set_pte_at(mm, address, pte, entry);
- ret = 1;
+ if (ret) {
+ mmu_notifier_invalidate_page(vma->vm_mm, address);
+ (*cleaned)++;
+ }
}
- pte_unmap_unlock(pte, ptl);
-
- if (ret) {
- mmu_notifier_invalidate_page(mm, address);
- (*cleaned)++;
- }
-out:
return SWAP_AGAIN;
}
@@ -1435,155 +1293,163 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
- pte_t *pte;
+ struct page_vma_mapped_walk pvmw = {
+ .page = page,
+ .vma = vma,
+ .address = address,
+ };
pte_t pteval;
- spinlock_t *ptl;
+ struct page *subpage;
int ret = SWAP_AGAIN;
struct rmap_private *rp = arg;
enum ttu_flags flags = rp->flags;
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
- goto out;
+ return SWAP_AGAIN;
if (flags & TTU_SPLIT_HUGE_PMD) {
split_huge_pmd_address(vma, address,
flags & TTU_MIGRATION, page);
- /* check if we have anything to do after split */
- if (page_mapcount(page) == 0)
- goto out;
}
- pte = page_check_address(page, mm, address, &ptl,
- PageTransCompound(page));
- if (!pte)
- goto out;
+ while (page_vma_mapped_walk(&pvmw)) {
+ subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+ address = pvmw.address;
- /*
- * If the page is mlock()d, we cannot swap it out.
- * If it's recently referenced (perhaps page_referenced
- * skipped over this mm) then we should reactivate it.
- */
- if (!(flags & TTU_IGNORE_MLOCK)) {
- if (vma->vm_flags & VM_LOCKED) {
- /* PTE-mapped THP are never mlocked */
- if (!PageTransCompound(page)) {
- /*
- * Holding pte lock, we do *not* need
- * mmap_sem here
- */
- mlock_vma_page(page);
- }
- ret = SWAP_MLOCK;
- goto out_unmap;
- }
- if (flags & TTU_MUNLOCK)
- goto out_unmap;
- }
- if (!(flags & TTU_IGNORE_ACCESS)) {
- if (ptep_clear_flush_young_notify(vma, address, pte)) {
- ret = SWAP_FAIL;
- goto out_unmap;
- }
- }
+ /* Unexpected PMD-mapped THP? */
+ VM_BUG_ON_PAGE(!pvmw.pte, page);
- /* Nuke the page table entry. */
- flush_cache_page(vma, address, page_to_pfn(page));
- if (should_defer_flush(mm, flags)) {
/*
- * We clear the PTE but do not flush so potentially a remote
- * CPU could still be writing to the page. If the entry was
- * previously clean then the architecture must guarantee that
- * a clear->dirty transition on a cached TLB entry is written
- * through and traps if the PTE is unmapped.
+ * If the page is mlock()d, we cannot swap it out.
+ * If it's recently referenced (perhaps page_referenced
+ * skipped over this mm) then we should reactivate it.
*/
- pteval = ptep_get_and_clear(mm, address, pte);
-
- set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval));
- } else {
- pteval = ptep_clear_flush(vma, address, pte);
- }
+ if (!(flags & TTU_IGNORE_MLOCK)) {
+ if (vma->vm_flags & VM_LOCKED) {
+ /* PTE-mapped THP are never mlocked */
+ if (!PageTransCompound(page)) {
+ /*
+ * Holding pte lock, we do *not* need
+ * mmap_sem here
+ */
+ mlock_vma_page(page);
+ }
+ ret = SWAP_MLOCK;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ if (flags & TTU_MUNLOCK)
+ continue;
+ }
- /* Move the dirty bit to the physical page now the pte is gone. */
- if (pte_dirty(pteval))
- set_page_dirty(page);
+ if (!(flags & TTU_IGNORE_ACCESS)) {
+ if (ptep_clear_flush_young_notify(vma, address,
+ pvmw.pte)) {
+ ret = SWAP_FAIL;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ }
- /* Update high watermark before we lower rss */
- update_hiwater_rss(mm);
+ /* Nuke the page table entry. */
+ flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+ if (should_defer_flush(mm, flags)) {
+ /*
+ * We clear the PTE but do not flush so potentially
+ * a remote CPU could still be writing to the page.
+ * If the entry was previously clean then the
+ * architecture must guarantee that a clear->dirty
+ * transition on a cached TLB entry is written through
+ * and traps if the PTE is unmapped.
+ */
+ pteval = ptep_get_and_clear(mm, address, pvmw.pte);
- if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
- if (PageHuge(page)) {
- hugetlb_count_sub(1 << compound_order(page), mm);
+ set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
} else {
- dec_mm_counter(mm, mm_counter(page));
+ pteval = ptep_clear_flush(vma, address, pvmw.pte);
}
- set_pte_at(mm, address, pte,
- swp_entry_to_pte(make_hwpoison_entry(page)));
- } else if (pte_unused(pteval)) {
- /*
- * The guest indicated that the page content is of no
- * interest anymore. Simply discard the pte, vmscan
- * will take care of the rest.
- */
- dec_mm_counter(mm, mm_counter(page));
- } else if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION)) {
- swp_entry_t entry;
- pte_t swp_pte;
- /*
- * Store the pfn of the page in a special migration
- * pte. do_swap_page() will wait until the migration
- * pte is removed and then restart fault handling.
- */
- entry = make_migration_entry(page, pte_write(pteval));
- swp_pte = swp_entry_to_pte(entry);
- if (pte_soft_dirty(pteval))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- set_pte_at(mm, address, pte, swp_pte);
- } else if (PageAnon(page)) {
- swp_entry_t entry = { .val = page_private(page) };
- pte_t swp_pte;
- /*
- * Store the swap location in the pte.
- * See handle_pte_fault() ...
- */
- VM_BUG_ON_PAGE(!PageSwapCache(page), page);
- if (!PageDirty(page) && (flags & TTU_LZFREE)) {
- /* It's a freeable page by MADV_FREE */
- dec_mm_counter(mm, MM_ANONPAGES);
- rp->lazyfreed++;
- goto discard;
- }
+ /* Move the dirty bit to the page. Now the pte is gone. */
+ if (pte_dirty(pteval))
+ set_page_dirty(page);
- if (swap_duplicate(entry) < 0) {
- set_pte_at(mm, address, pte, pteval);
- ret = SWAP_FAIL;
- goto out_unmap;
- }
- if (list_empty(&mm->mmlist)) {
- spin_lock(&mmlist_lock);
- if (list_empty(&mm->mmlist))
- list_add(&mm->mmlist, &init_mm.mmlist);
- spin_unlock(&mmlist_lock);
- }
- dec_mm_counter(mm, MM_ANONPAGES);
- inc_mm_counter(mm, MM_SWAPENTS);
- swp_pte = swp_entry_to_pte(entry);
- if (pte_soft_dirty(pteval))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- set_pte_at(mm, address, pte, swp_pte);
- } else
- dec_mm_counter(mm, mm_counter_file(page));
+ /* Update high watermark before we lower rss */
+ update_hiwater_rss(mm);
-discard:
- page_remove_rmap(page, PageHuge(page));
- put_page(page);
+ if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+ if (PageHuge(page)) {
+ int nr = 1 << compound_order(page);
+ hugetlb_count_sub(nr, mm);
+ } else {
+ dec_mm_counter(mm, mm_counter(page));
+ }
+
+ pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ } else if (pte_unused(pteval)) {
+ /*
+ * The guest indicated that the page content is of no
+ * interest anymore. Simply discard the pte, vmscan
+ * will take care of the rest.
+ */
+ dec_mm_counter(mm, mm_counter(page));
+ } else if (IS_ENABLED(CONFIG_MIGRATION) &&
+ (flags & TTU_MIGRATION)) {
+ swp_entry_t entry;
+ pte_t swp_pte;
+ /*
+ * Store the pfn of the page in a special migration
+ * pte. do_swap_page() will wait until the migration
+ * pte is removed and then restart fault handling.
+ */
+ entry = make_migration_entry(subpage,
+ pte_write(pteval));
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ set_pte_at(mm, address, pvmw.pte, swp_pte);
+ } else if (PageAnon(page)) {
+ swp_entry_t entry = { .val = page_private(subpage) };
+ pte_t swp_pte;
+ /*
+ * Store the swap location in the pte.
+ * See handle_pte_fault() ...
+ */
+ VM_BUG_ON_PAGE(!PageSwapCache(page), page);
+
+ if (!PageDirty(page) && (flags & TTU_LZFREE)) {
+ /* It's a freeable page by MADV_FREE */
+ dec_mm_counter(mm, MM_ANONPAGES);
+ rp->lazyfreed++;
+ goto discard;
+ }
-out_unmap:
- pte_unmap_unlock(pte, ptl);
- if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
+ if (swap_duplicate(entry) < 0) {
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = SWAP_FAIL;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ if (list_empty(&mm->mmlist)) {
+ spin_lock(&mmlist_lock);
+ if (list_empty(&mm->mmlist))
+ list_add(&mm->mmlist, &init_mm.mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ dec_mm_counter(mm, MM_ANONPAGES);
+ inc_mm_counter(mm, MM_SWAPENTS);
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ set_pte_at(mm, address, pvmw.pte, swp_pte);
+ } else
+ dec_mm_counter(mm, mm_counter_file(page));
+discard:
+ page_remove_rmap(subpage, PageHuge(page));
+ put_page(page);
mmu_notifier_invalidate_page(mm, address);
-out:
+ }
return ret;
}
@@ -1608,7 +1474,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
static int page_mapcount_is_zero(struct page *page)
{
- return !page_mapcount(page);
+ return !total_mapcount(page);
}
/**
@@ -1755,7 +1621,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
bool locked)
{
struct anon_vma *anon_vma;
- pgoff_t pgoff;
+ pgoff_t pgoff_start, pgoff_end;
struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
@@ -1769,8 +1635,10 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
if (!anon_vma)
return ret;
- pgoff = page_to_pgoff(page);
- anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
+ pgoff_start = page_to_pgoff(page);
+ pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
+ pgoff_start, pgoff_end) {
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
@@ -1808,7 +1676,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
bool locked)
{
struct address_space *mapping = page_mapping(page);
- pgoff_t pgoff;
+ pgoff_t pgoff_start, pgoff_end;
struct vm_area_struct *vma;
int ret = SWAP_AGAIN;
@@ -1823,10 +1691,12 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
if (!mapping)
return ret;
- pgoff = page_to_pgoff(page);
+ pgoff_start = page_to_pgoff(page);
+ pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
if (!locked)
i_mmap_lock_read(mapping);
- vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+ vma_interval_tree_foreach(vma, &mapping->i_mmap,
+ pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);
cond_resched();
diff --git a/mm/rodata_test.c b/mm/rodata_test.c
new file mode 100644
index 000000000000..0fd21670b513
--- /dev/null
+++ b/mm/rodata_test.c
@@ -0,0 +1,56 @@
+/*
+ * rodata_test.c: functional test for mark_rodata_ro function
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/uaccess.h>
+#include <asm/sections.h>
+
+const int rodata_test_data = 0xC3;
+EXPORT_SYMBOL_GPL(rodata_test_data);
+
+void rodata_test(void)
+{
+ unsigned long start, end;
+ int zero = 0;
+
+ /* test 1: read the value */
+ /* If this test fails, some previous testrun has clobbered the state */
+ if (!rodata_test_data) {
+ pr_err("rodata_test: test 1 fails (start data)\n");
+ return;
+ }
+
+ /* test 2: write to the variable; this should fault */
+ if (!probe_kernel_write((void *)&rodata_test_data,
+ (void *)&zero, sizeof(zero))) {
+ pr_err("rodata_test: test data was not read only\n");
+ return;
+ }
+
+ /* test 3: check the value hasn't changed */
+ if (rodata_test_data == zero) {
+ pr_err("rodata_test: test data was changed\n");
+ return;
+ }
+
+ /* test 4: check if the rodata section is PAGE_SIZE aligned */
+ start = (unsigned long)__start_rodata;
+ end = (unsigned long)__end_rodata;
+ if (start & (PAGE_SIZE - 1)) {
+ pr_err("rodata_test: start of .rodata is not page size aligned\n");
+ return;
+ }
+ if (end & (PAGE_SIZE - 1)) {
+ pr_err("rodata_test: end of .rodata is not page size aligned\n");
+ return;
+ }
+
+ pr_info("rodata_test: all tests were successful\n");
+}
diff --git a/mm/shmem.c b/mm/shmem.c
index 3a7587a0314d..a26649a6633f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -34,6 +34,8 @@
#include <linux/uio.h>
#include <linux/khugepaged.h>
+#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
+
static struct vfsmount *shm_mnt;
#ifdef CONFIG_SHMEM
@@ -70,6 +72,8 @@ static struct vfsmount *shm_mnt;
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <uapi/linux/memfd.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/rmap.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@@ -115,13 +119,14 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index);
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp,
- gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
+ gfp_t gfp, struct vm_area_struct *vma,
+ struct vm_fault *vmf, int *fault_type);
int shmem_getpage(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp)
{
return shmem_getpage_gfp(inode, index, pagep, sgp,
- mapping_gfp_mask(inode->i_mapping), NULL, NULL);
+ mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
}
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
@@ -190,6 +195,11 @@ static const struct inode_operations shmem_special_inode_operations;
static const struct vm_operations_struct shmem_vm_ops;
static struct file_system_type shmem_fs_type;
+bool vma_is_shmem(struct vm_area_struct *vma)
+{
+ return vma->vm_ops == &shmem_vm_ops;
+}
+
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
@@ -1570,7 +1580,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
*/
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, gfp_t gfp,
- struct mm_struct *fault_mm, int *fault_type)
+ struct vm_area_struct *vma, struct vm_fault *vmf, int *fault_type)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -1624,7 +1634,7 @@ repeat:
* bring it back from swap or allocate.
*/
sbinfo = SHMEM_SB(inode->i_sb);
- charge_mm = fault_mm ? : current->mm;
+ charge_mm = vma ? vma->vm_mm : current->mm;
if (swap.val) {
/* Look it up and read it in.. */
@@ -1634,7 +1644,8 @@ repeat:
if (fault_type) {
*fault_type |= VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT);
+ mem_cgroup_count_vm_event(charge_mm,
+ PGMAJFAULT);
}
/* Here we actually start the io */
page = shmem_swapin(swap, gfp, info, index);
@@ -1703,6 +1714,11 @@ repeat:
swap_free(swap);
} else {
+ if (vma && userfaultfd_missing(vma)) {
+ *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
+ return 0;
+ }
+
/* shmem_symlink() */
if (mapping->a_ops != &shmem_aops)
goto alloc_nohuge;
@@ -1892,8 +1908,9 @@ static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync
return ret;
}
-static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int shmem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
enum sgp_type sgp;
@@ -1965,7 +1982,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
sgp = SGP_NOHUGE;
error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
- gfp, vma->vm_mm, &ret);
+ gfp, vma, vmf, &ret);
if (error)
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
return ret;
@@ -2175,10 +2192,123 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
bool shmem_mapping(struct address_space *mapping)
{
- if (!mapping->host)
- return false;
+ return mapping->a_ops == &shmem_aops;
+}
- return mapping->host->i_sb->s_op == &shmem_ops;
+int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
+ pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep)
+{
+ struct inode *inode = file_inode(dst_vma->vm_file);
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct address_space *mapping = inode->i_mapping;
+ gfp_t gfp = mapping_gfp_mask(mapping);
+ pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
+ struct mem_cgroup *memcg;
+ spinlock_t *ptl;
+ void *page_kaddr;
+ struct page *page;
+ pte_t _dst_pte, *dst_pte;
+ int ret;
+
+ ret = -ENOMEM;
+ if (shmem_acct_block(info->flags, 1))
+ goto out;
+ if (sbinfo->max_blocks) {
+ if (percpu_counter_compare(&sbinfo->used_blocks,
+ sbinfo->max_blocks) >= 0)
+ goto out_unacct_blocks;
+ percpu_counter_inc(&sbinfo->used_blocks);
+ }
+
+ if (!*pagep) {
+ page = shmem_alloc_page(gfp, info, pgoff);
+ if (!page)
+ goto out_dec_used_blocks;
+
+ page_kaddr = kmap_atomic(page);
+ ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
+ PAGE_SIZE);
+ kunmap_atomic(page_kaddr);
+
+ /* fallback to copy_from_user outside mmap_sem */
+ if (unlikely(ret)) {
+ *pagep = page;
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -1);
+ shmem_unacct_blocks(info->flags, 1);
+ /* don't free the page */
+ return -EFAULT;
+ }
+ } else {
+ page = *pagep;
+ *pagep = NULL;
+ }
+
+ VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
+ __SetPageLocked(page);
+ __SetPageSwapBacked(page);
+ __SetPageUptodate(page);
+
+ ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
+ if (ret)
+ goto out_release;
+
+ ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
+ if (!ret) {
+ ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
+ radix_tree_preload_end();
+ }
+ if (ret)
+ goto out_release_uncharge;
+
+ mem_cgroup_commit_charge(page, memcg, false, false);
+
+ _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+ if (dst_vma->vm_flags & VM_WRITE)
+ _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+
+ ret = -EEXIST;
+ dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+ if (!pte_none(*dst_pte))
+ goto out_release_uncharge_unlock;
+
+ lru_cache_add_anon(page);
+
+ spin_lock(&info->lock);
+ info->alloced++;
+ inode->i_blocks += BLOCKS_PER_PAGE;
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
+
+ inc_mm_counter(dst_mm, mm_counter_file(page));
+ page_add_file_rmap(page, false);
+ set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(dst_vma, dst_addr, dst_pte);
+ unlock_page(page);
+ pte_unmap_unlock(dst_pte, ptl);
+ ret = 0;
+out:
+ return ret;
+out_release_uncharge_unlock:
+ pte_unmap_unlock(dst_pte, ptl);
+out_release_uncharge:
+ mem_cgroup_cancel_charge(page, memcg, false);
+out_release:
+ unlock_page(page);
+ put_page(page);
+out_dec_used_blocks:
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -1);
+out_unacct_blocks:
+ shmem_unacct_blocks(info->flags, 1);
+ goto out;
}
#ifdef CONFIG_TMPFS
@@ -2201,7 +2331,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = pos >> PAGE_SHIFT;
/* i_mutex is held by caller */
- if (unlikely(info->seals)) {
+ if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
if (info->seals & F_SEAL_WRITE)
return -EPERM;
if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
@@ -4140,7 +4270,7 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
BUG_ON(mapping->a_ops != &shmem_aops);
error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
- gfp, NULL, NULL);
+ gfp, NULL, NULL, NULL);
if (error)
page = ERR_PTR(error);
else
diff --git a/mm/slab.c b/mm/slab.c
index 4f2ec6bb46eb..31335e713ffb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1288,7 +1288,8 @@ void __init kmem_cache_init(void)
* Initialize the caches that provide memory for the kmem_cache_node
* structures first. Without this, further allocations will bug.
*/
- kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
+ kmalloc_caches[INDEX_NODE] = create_kmalloc_cache(
+ get_kmalloc_cache_name(INDEX_NODE),
kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
slab_state = PARTIAL_NODE;
setup_kmalloc_cache_index_table();
@@ -2332,6 +2333,13 @@ int __kmem_cache_shrink(struct kmem_cache *cachep)
return (ret ? 1 : 0);
}
+#ifdef CONFIG_MEMCG
+void __kmemcg_cache_deactivate(struct kmem_cache *cachep)
+{
+ __kmem_cache_shrink(cachep);
+}
+#endif
+
int __kmem_cache_shutdown(struct kmem_cache *cachep)
{
return __kmem_cache_shrink(cachep);
diff --git a/mm/slab.h b/mm/slab.h
index de6579dc362c..360f157b1da2 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -78,6 +78,7 @@ unsigned long calculate_alignment(unsigned long flags,
/* Kmalloc array related functions */
void setup_kmalloc_cache_index_table(void);
void create_kmalloc_caches(unsigned long);
+const char *get_kmalloc_cache_name(int index);
/* Find the kmalloc slab corresponding for a certain size */
struct kmem_cache *kmalloc_slab(size_t, gfp_t);
@@ -162,6 +163,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
int __kmem_cache_shutdown(struct kmem_cache *);
void __kmem_cache_release(struct kmem_cache *);
int __kmem_cache_shrink(struct kmem_cache *);
+void __kmemcg_cache_deactivate(struct kmem_cache *s);
void slab_kmem_cache_release(struct kmem_cache *);
struct seq_file;
@@ -195,17 +197,22 @@ void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+
+/* List of all root caches. */
+extern struct list_head slab_root_caches;
+#define root_caches_node memcg_params.__root_caches_node
+
/*
* Iterate over all memcg caches of the given root cache. The caller must hold
* slab_mutex.
*/
#define for_each_memcg_cache(iter, root) \
- list_for_each_entry(iter, &(root)->memcg_params.list, \
- memcg_params.list)
+ list_for_each_entry(iter, &(root)->memcg_params.children, \
+ memcg_params.children_node)
static inline bool is_root_cache(struct kmem_cache *s)
{
- return s->memcg_params.is_root_cache;
+ return !s->memcg_params.root_cache;
}
static inline bool slab_equal_or_root(struct kmem_cache *s,
@@ -294,9 +301,16 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
}
extern void slab_init_memcg_params(struct kmem_cache *);
+extern void memcg_link_cache(struct kmem_cache *s);
+extern void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
+ void (*deact_fn)(struct kmem_cache *));
#else /* CONFIG_MEMCG && !CONFIG_SLOB */
+/* If !memcg, all caches are root. */
+#define slab_root_caches slab_caches
+#define root_caches_node list
+
#define for_each_memcg_cache(iter, root) \
for ((void)(iter), (void)(root); 0; )
@@ -341,6 +355,11 @@ static inline void memcg_uncharge_slab(struct page *page, int order,
static inline void slab_init_memcg_params(struct kmem_cache *s)
{
}
+
+static inline void memcg_link_cache(struct kmem_cache *s)
+{
+}
+
#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
@@ -488,6 +507,9 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
void *slab_start(struct seq_file *m, loff_t *pos);
void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
+void *memcg_slab_start(struct seq_file *m, loff_t *pos);
+void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos);
+void memcg_slab_stop(struct seq_file *m, void *p);
int memcg_slab_show(struct seq_file *m, void *p);
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index ae323841adb1..16b39eeb208a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -30,6 +30,11 @@ LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
struct kmem_cache *kmem_cache;
+static LIST_HEAD(slab_caches_to_rcu_destroy);
+static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
+static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
+ slab_caches_to_rcu_destroy_workfn);
+
/*
* Set of flags that will prevent slab merging
*/
@@ -133,11 +138,14 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
}
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+
+LIST_HEAD(slab_root_caches);
+
void slab_init_memcg_params(struct kmem_cache *s)
{
- s->memcg_params.is_root_cache = true;
- INIT_LIST_HEAD(&s->memcg_params.list);
+ s->memcg_params.root_cache = NULL;
RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
+ INIT_LIST_HEAD(&s->memcg_params.children);
}
static int init_memcg_params(struct kmem_cache *s,
@@ -145,10 +153,11 @@ static int init_memcg_params(struct kmem_cache *s,
{
struct memcg_cache_array *arr;
- if (memcg) {
- s->memcg_params.is_root_cache = false;
- s->memcg_params.memcg = memcg;
+ if (root_cache) {
s->memcg_params.root_cache = root_cache;
+ s->memcg_params.memcg = memcg;
+ INIT_LIST_HEAD(&s->memcg_params.children_node);
+ INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
return 0;
}
@@ -177,9 +186,6 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size)
{
struct memcg_cache_array *old, *new;
- if (!is_root_cache(s))
- return 0;
-
new = kzalloc(sizeof(struct memcg_cache_array) +
new_array_size * sizeof(void *), GFP_KERNEL);
if (!new)
@@ -203,7 +209,7 @@ int memcg_update_all_caches(int num_memcgs)
int ret = 0;
mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list) {
+ list_for_each_entry(s, &slab_root_caches, root_caches_node) {
ret = update_memcg_params(s, num_memcgs);
/*
* Instead of freeing the memory, we'll just leave the caches
@@ -215,6 +221,28 @@ int memcg_update_all_caches(int num_memcgs)
mutex_unlock(&slab_mutex);
return ret;
}
+
+void memcg_link_cache(struct kmem_cache *s)
+{
+ if (is_root_cache(s)) {
+ list_add(&s->root_caches_node, &slab_root_caches);
+ } else {
+ list_add(&s->memcg_params.children_node,
+ &s->memcg_params.root_cache->memcg_params.children);
+ list_add(&s->memcg_params.kmem_caches_node,
+ &s->memcg_params.memcg->kmem_caches);
+ }
+}
+
+static void memcg_unlink_cache(struct kmem_cache *s)
+{
+ if (is_root_cache(s)) {
+ list_del(&s->root_caches_node);
+ } else {
+ list_del(&s->memcg_params.children_node);
+ list_del(&s->memcg_params.kmem_caches_node);
+ }
+}
#else
static inline int init_memcg_params(struct kmem_cache *s,
struct mem_cgroup *memcg, struct kmem_cache *root_cache)
@@ -225,6 +253,10 @@ static inline int init_memcg_params(struct kmem_cache *s,
static inline void destroy_memcg_params(struct kmem_cache *s)
{
}
+
+static inline void memcg_unlink_cache(struct kmem_cache *s)
+{
+}
#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
/*
@@ -255,7 +287,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
{
struct kmem_cache *s;
- if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
+ if (slab_nomerge)
return NULL;
if (ctor)
@@ -266,7 +298,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
size = ALIGN(size, align);
flags = kmem_cache_flags(size, flags, name, NULL);
- list_for_each_entry_reverse(s, &slab_caches, list) {
+ if (flags & SLAB_NEVER_MERGE)
+ return NULL;
+
+ list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
if (slab_unmergeable(s))
continue;
@@ -350,6 +385,7 @@ static struct kmem_cache *create_cache(const char *name,
s->refcount = 1;
list_add(&s->list, &slab_caches);
+ memcg_link_cache(s);
out:
if (err)
return ERR_PTR(err);
@@ -458,33 +494,61 @@ out_unlock:
}
EXPORT_SYMBOL(kmem_cache_create);
-static int shutdown_cache(struct kmem_cache *s,
- struct list_head *release, bool *need_rcu_barrier)
+static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
{
- if (__kmem_cache_shutdown(s) != 0)
- return -EBUSY;
+ LIST_HEAD(to_destroy);
+ struct kmem_cache *s, *s2;
- if (s->flags & SLAB_DESTROY_BY_RCU)
- *need_rcu_barrier = true;
+ /*
+ * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the
+ * @slab_caches_to_rcu_destroy list. The slab pages are freed
+ * through RCU and and the associated kmem_cache are dereferenced
+ * while freeing the pages, so the kmem_caches should be freed only
+ * after the pending RCU operations are finished. As rcu_barrier()
+ * is a pretty slow operation, we batch all pending destructions
+ * asynchronously.
+ */
+ mutex_lock(&slab_mutex);
+ list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
+ mutex_unlock(&slab_mutex);
- list_move(&s->list, release);
- return 0;
+ if (list_empty(&to_destroy))
+ return;
+
+ rcu_barrier();
+
+ list_for_each_entry_safe(s, s2, &to_destroy, list) {
+#ifdef SLAB_SUPPORTS_SYSFS
+ sysfs_slab_release(s);
+#else
+ slab_kmem_cache_release(s);
+#endif
+ }
}
-static void release_caches(struct list_head *release, bool need_rcu_barrier)
+static int shutdown_cache(struct kmem_cache *s)
{
- struct kmem_cache *s, *s2;
+ /* free asan quarantined objects */
+ kasan_cache_shutdown(s);
+
+ if (__kmem_cache_shutdown(s) != 0)
+ return -EBUSY;
- if (need_rcu_barrier)
- rcu_barrier();
+ memcg_unlink_cache(s);
+ list_del(&s->list);
- list_for_each_entry_safe(s, s2, release, list) {
+ if (s->flags & SLAB_DESTROY_BY_RCU) {
+ list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
+ schedule_work(&slab_caches_to_rcu_destroy_work);
+ } else {
#ifdef SLAB_SUPPORTS_SYSFS
- sysfs_slab_remove(s);
+ sysfs_slab_release(s);
#else
slab_kmem_cache_release(s);
#endif
}
+
+ return 0;
}
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
@@ -551,8 +615,6 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
goto out_unlock;
}
- list_add(&s->memcg_params.list, &root_cache->memcg_params.list);
-
/*
* Since readers won't lock (see cache_from_memcg_idx()), we need a
* barrier here to ensure nobody will see the kmem_cache partially
@@ -568,6 +630,66 @@ out_unlock:
put_online_cpus();
}
+static void kmemcg_deactivate_workfn(struct work_struct *work)
+{
+ struct kmem_cache *s = container_of(work, struct kmem_cache,
+ memcg_params.deact_work);
+
+ get_online_cpus();
+ get_online_mems();
+
+ mutex_lock(&slab_mutex);
+
+ s->memcg_params.deact_fn(s);
+
+ mutex_unlock(&slab_mutex);
+
+ put_online_mems();
+ put_online_cpus();
+
+ /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
+ css_put(&s->memcg_params.memcg->css);
+}
+
+static void kmemcg_deactivate_rcufn(struct rcu_head *head)
+{
+ struct kmem_cache *s = container_of(head, struct kmem_cache,
+ memcg_params.deact_rcu_head);
+
+ /*
+ * We need to grab blocking locks. Bounce to ->deact_work. The
+ * work item shares the space with the RCU head and can't be
+ * initialized eariler.
+ */
+ INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
+ queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
+}
+
+/**
+ * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
+ * sched RCU grace period
+ * @s: target kmem_cache
+ * @deact_fn: deactivation function to call
+ *
+ * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
+ * held after a sched RCU grace period. The slab is guaranteed to stay
+ * alive until @deact_fn is finished. This is to be used from
+ * __kmemcg_cache_deactivate().
+ */
+void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
+ void (*deact_fn)(struct kmem_cache *))
+{
+ if (WARN_ON_ONCE(is_root_cache(s)) ||
+ WARN_ON_ONCE(s->memcg_params.deact_fn))
+ return;
+
+ /* pin memcg so that @s doesn't get destroyed in the middle */
+ css_get(&s->memcg_params.memcg->css);
+
+ s->memcg_params.deact_fn = deact_fn;
+ call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
+}
+
void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
{
int idx;
@@ -579,41 +701,15 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
get_online_cpus();
get_online_mems();
-#ifdef CONFIG_SLUB
- /*
- * In case of SLUB, we need to disable empty slab caching to
- * avoid pinning the offline memory cgroup by freeable kmem
- * pages charged to it. SLAB doesn't need this, as it
- * periodically purges unused slabs.
- */
mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list) {
- c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL;
- if (c) {
- c->cpu_partial = 0;
- c->min_partial = 0;
- }
- }
- mutex_unlock(&slab_mutex);
- /*
- * kmem_cache->cpu_partial is checked locklessly (see
- * put_cpu_partial()). Make sure the change is visible.
- */
- synchronize_sched();
-#endif
-
- mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list) {
- if (!is_root_cache(s))
- continue;
-
+ list_for_each_entry(s, &slab_root_caches, root_caches_node) {
arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
lockdep_is_held(&slab_mutex));
c = arr->entries[idx];
if (!c)
continue;
- __kmem_cache_shrink(c);
+ __kmemcg_cache_deactivate(c);
arr->entries[idx] = NULL;
}
mutex_unlock(&slab_mutex);
@@ -622,47 +718,29 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
put_online_cpus();
}
-static int __shutdown_memcg_cache(struct kmem_cache *s,
- struct list_head *release, bool *need_rcu_barrier)
-{
- BUG_ON(is_root_cache(s));
-
- if (shutdown_cache(s, release, need_rcu_barrier))
- return -EBUSY;
-
- list_del(&s->memcg_params.list);
- return 0;
-}
-
void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
{
- LIST_HEAD(release);
- bool need_rcu_barrier = false;
struct kmem_cache *s, *s2;
get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex);
- list_for_each_entry_safe(s, s2, &slab_caches, list) {
- if (is_root_cache(s) || s->memcg_params.memcg != memcg)
- continue;
+ list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
+ memcg_params.kmem_caches_node) {
/*
* The cgroup is about to be freed and therefore has no charges
* left. Hence, all its caches must be empty by now.
*/
- BUG_ON(__shutdown_memcg_cache(s, &release, &need_rcu_barrier));
+ BUG_ON(shutdown_cache(s));
}
mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus();
-
- release_caches(&release, need_rcu_barrier);
}
-static int shutdown_memcg_caches(struct kmem_cache *s,
- struct list_head *release, bool *need_rcu_barrier)
+static int shutdown_memcg_caches(struct kmem_cache *s)
{
struct memcg_cache_array *arr;
struct kmem_cache *c, *c2;
@@ -681,13 +759,13 @@ static int shutdown_memcg_caches(struct kmem_cache *s,
c = arr->entries[i];
if (!c)
continue;
- if (__shutdown_memcg_cache(c, release, need_rcu_barrier))
+ if (shutdown_cache(c))
/*
* The cache still has objects. Move it to a temporary
* list so as not to try to destroy it for a second
* time while iterating over inactive caches below.
*/
- list_move(&c->memcg_params.list, &busy);
+ list_move(&c->memcg_params.children_node, &busy);
else
/*
* The cache is empty and will be destroyed soon. Clear
@@ -702,23 +780,22 @@ static int shutdown_memcg_caches(struct kmem_cache *s,
* Second, shutdown all caches left from memory cgroups that are now
* offline.
*/
- list_for_each_entry_safe(c, c2, &s->memcg_params.list,
- memcg_params.list)
- __shutdown_memcg_cache(c, release, need_rcu_barrier);
+ list_for_each_entry_safe(c, c2, &s->memcg_params.children,
+ memcg_params.children_node)
+ shutdown_cache(c);
- list_splice(&busy, &s->memcg_params.list);
+ list_splice(&busy, &s->memcg_params.children);
/*
* A cache being destroyed must be empty. In particular, this means
* that all per memcg caches attached to it must be empty too.
*/
- if (!list_empty(&s->memcg_params.list))
+ if (!list_empty(&s->memcg_params.children))
return -EBUSY;
return 0;
}
#else
-static inline int shutdown_memcg_caches(struct kmem_cache *s,
- struct list_head *release, bool *need_rcu_barrier)
+static inline int shutdown_memcg_caches(struct kmem_cache *s)
{
return 0;
}
@@ -734,8 +811,6 @@ void slab_kmem_cache_release(struct kmem_cache *s)
void kmem_cache_destroy(struct kmem_cache *s)
{
- LIST_HEAD(release);
- bool need_rcu_barrier = false;
int err;
if (unlikely(!s))
@@ -744,16 +819,15 @@ void kmem_cache_destroy(struct kmem_cache *s)
get_online_cpus();
get_online_mems();
- kasan_cache_destroy(s);
mutex_lock(&slab_mutex);
s->refcount--;
if (s->refcount)
goto out_unlock;
- err = shutdown_memcg_caches(s, &release, &need_rcu_barrier);
+ err = shutdown_memcg_caches(s);
if (!err)
- err = shutdown_cache(s, &release, &need_rcu_barrier);
+ err = shutdown_cache(s);
if (err) {
pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
@@ -765,8 +839,6 @@ out_unlock:
put_online_mems();
put_online_cpus();
-
- release_caches(&release, need_rcu_barrier);
}
EXPORT_SYMBOL(kmem_cache_destroy);
@@ -828,6 +900,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
create_boot_cache(s, name, size, flags);
list_add(&s->list, &slab_caches);
+ memcg_link_cache(s);
s->refcount = 1;
return s;
}
@@ -932,6 +1005,11 @@ static struct {
{"kmalloc-67108864", 67108864}
};
+const char * __init get_kmalloc_cache_name(int index)
+{
+ return kmalloc_info[index].name;
+}
+
/*
* Patch up the size_index table if we have strange large alignment
* requirements for the kmalloc array. This is only the case for
@@ -1138,12 +1216,12 @@ static void print_slabinfo_header(struct seq_file *m)
void *slab_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&slab_mutex);
- return seq_list_start(&slab_caches, *pos);
+ return seq_list_start(&slab_root_caches, *pos);
}
void *slab_next(struct seq_file *m, void *p, loff_t *pos)
{
- return seq_list_next(p, &slab_caches, pos);
+ return seq_list_next(p, &slab_root_caches, pos);
}
void slab_stop(struct seq_file *m, void *p)
@@ -1195,25 +1273,44 @@ static void cache_show(struct kmem_cache *s, struct seq_file *m)
static int slab_show(struct seq_file *m, void *p)
{
- struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
+ struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
- if (p == slab_caches.next)
+ if (p == slab_root_caches.next)
print_slabinfo_header(m);
- if (is_root_cache(s))
- cache_show(s, m);
+ cache_show(s, m);
return 0;
}
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+void *memcg_slab_start(struct seq_file *m, loff_t *pos)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+ mutex_lock(&slab_mutex);
+ return seq_list_start(&memcg->kmem_caches, *pos);
+}
+
+void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+ return seq_list_next(p, &memcg->kmem_caches, pos);
+}
+
+void memcg_slab_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&slab_mutex);
+}
+
int memcg_slab_show(struct seq_file *m, void *p)
{
- struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
+ struct kmem_cache *s = list_entry(p, struct kmem_cache,
+ memcg_params.kmem_caches_node);
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
- if (p == slab_caches.next)
+ if (p == memcg->kmem_caches.next)
print_slabinfo_header(m);
- if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
- cache_show(s, m);
+ cache_show(s, m);
return 0;
}
#endif
diff --git a/mm/slub.c b/mm/slub.c
index 7ec0a965c6a3..7f4bc7027ed5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -214,11 +214,13 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
static void memcg_propagate_slab_attrs(struct kmem_cache *s);
+static void sysfs_slab_remove(struct kmem_cache *s);
#else
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
+static inline void sysfs_slab_remove(struct kmem_cache *s) { }
#endif
static inline void stat(const struct kmem_cache *s, enum stat_item si)
@@ -1630,6 +1632,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
flags &= ~GFP_SLAB_BUG_MASK;
pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
invalid_mask, &invalid_mask, flags, &flags);
+ dump_stack();
}
return allocate_slab(s,
@@ -3686,6 +3689,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
if (n->nr_partial || slabs_node(s, node))
return 1;
}
+ sysfs_slab_remove(s);
return 0;
}
@@ -3952,6 +3956,42 @@ int __kmem_cache_shrink(struct kmem_cache *s)
return ret;
}
+#ifdef CONFIG_MEMCG
+static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
+{
+ /*
+ * Called with all the locks held after a sched RCU grace period.
+ * Even if @s becomes empty after shrinking, we can't know that @s
+ * doesn't have allocations already in-flight and thus can't
+ * destroy @s until the associated memcg is released.
+ *
+ * However, let's remove the sysfs files for empty caches here.
+ * Each cache has a lot of interface files which aren't
+ * particularly useful for empty draining caches; otherwise, we can
+ * easily end up with millions of unnecessary sysfs files on
+ * systems which have a lot of memory and transient cgroups.
+ */
+ if (!__kmem_cache_shrink(s))
+ sysfs_slab_remove(s);
+}
+
+void __kmemcg_cache_deactivate(struct kmem_cache *s)
+{
+ /*
+ * Disable empty slabs caching. Used to avoid pinning offline
+ * memory cgroups by kmem pages that can be freed.
+ */
+ s->cpu_partial = 0;
+ s->min_partial = 0;
+
+ /*
+ * s->cpu_partial is checked locklessly (see put_cpu_partial), so
+ * we have to make sure the change is visible before shrinking.
+ */
+ slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
+}
+#endif
+
static int slab_mem_going_offline_callback(void *arg)
{
struct kmem_cache *s;
@@ -4108,6 +4148,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
}
slab_init_memcg_params(s);
list_add(&s->list, &slab_caches);
+ memcg_link_cache(s);
return s;
}
@@ -4667,6 +4708,22 @@ enum slab_stat_type {
#define SO_OBJECTS (1 << SL_OBJECTS)
#define SO_TOTAL (1 << SL_TOTAL)
+#ifdef CONFIG_MEMCG
+static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
+
+static int __init setup_slub_memcg_sysfs(char *str)
+{
+ int v;
+
+ if (get_option(&str, &v) > 0)
+ memcg_sysfs_enabled = v;
+
+ return 1;
+}
+
+__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
+#endif
+
static ssize_t show_slab_objects(struct kmem_cache *s,
char *buf, unsigned long flags)
{
@@ -5570,8 +5627,14 @@ static int sysfs_slab_add(struct kmem_cache *s)
{
int err;
const char *name;
+ struct kset *kset = cache_kset(s);
int unmergeable = slab_unmergeable(s);
+ if (!kset) {
+ kobject_init(&s->kobj, &slab_ktype);
+ return 0;
+ }
+
if (unmergeable) {
/*
* Slabcache can never be merged so we can use the name proper.
@@ -5588,7 +5651,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
name = create_unique_id(s);
}
- s->kobj.kset = cache_kset(s);
+ s->kobj.kset = kset;
err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
if (err)
goto out;
@@ -5598,7 +5661,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
goto out_del_kobj;
#ifdef CONFIG_MEMCG
- if (is_root_cache(s)) {
+ if (is_root_cache(s) && memcg_sysfs_enabled) {
s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
if (!s->memcg_kset) {
err = -ENOMEM;
@@ -5621,7 +5684,7 @@ out_del_kobj:
goto out;
}
-void sysfs_slab_remove(struct kmem_cache *s)
+static void sysfs_slab_remove(struct kmem_cache *s)
{
if (slab_state < FULL)
/*
@@ -5630,12 +5693,26 @@ void sysfs_slab_remove(struct kmem_cache *s)
*/
return;
+ if (!s->kobj.state_in_sysfs)
+ /*
+ * For a memcg cache, this may be called during
+ * deactivation and again on shutdown. Remove only once.
+ * A cache is never shut down before deactivation is
+ * complete, so no need to worry about synchronization.
+ */
+ return;
+
#ifdef CONFIG_MEMCG
kset_unregister(s->memcg_kset);
#endif
kobject_uevent(&s->kobj, KOBJ_REMOVE);
kobject_del(&s->kobj);
- kobject_put(&s->kobj);
+}
+
+void sysfs_slab_release(struct kmem_cache *s)
+{
+ if (slab_state >= FULL)
+ kobject_put(&s->kobj);
}
/*
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 574c67b663fe..8679d4a81b98 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -248,20 +248,28 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
return 0;
}
-struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
+struct page * __meminit __populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid)
{
unsigned long start;
unsigned long end;
- struct page *map;
- map = pfn_to_page(pnum * PAGES_PER_SECTION);
- start = (unsigned long)map;
- end = (unsigned long)(map + PAGES_PER_SECTION);
+ /*
+ * The minimum granularity of memmap extensions is
+ * SECTION_ACTIVE_SIZE as allocations are tracked in the
+ * 'map_active' bitmap of the section.
+ */
+ end = ALIGN(pfn + nr_pages, PHYS_PFN(SECTION_ACTIVE_SIZE));
+ pfn &= PHYS_PFN(SECTION_ACTIVE_MASK);
+ nr_pages = end - pfn;
+
+ start = (unsigned long) pfn_to_page(pfn);
+ end = start + nr_pages * sizeof(struct page);
if (vmemmap_populate(start, end, nid))
return NULL;
- return map;
+ return pfn_to_page(pfn);
}
void __init sparse_mem_maps_populate_node(struct page **map_map,
@@ -284,11 +292,13 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
struct mem_section *ms;
+ unsigned long pfn = section_nr_to_pfn(pnum);
if (!present_section_nr(pnum))
continue;
- map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+ map_map[pnum] = __populate_section_memmap(pfn,
+ PAGES_PER_SECTION, nodeid);
if (map_map[pnum])
continue;
ms = __nr_to_section(pnum);
diff --git a/mm/sparse.c b/mm/sparse.c
index 1e168bf2779a..3e0c1c73f814 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -24,6 +24,7 @@
#ifdef CONFIG_SPARSEMEM_EXTREME
struct mem_section *mem_section[NR_SECTION_ROOTS]
____cacheline_internodealigned_in_smp;
+static DEFINE_SPINLOCK(mem_section_lock); /* atomically instantiate new entries */
#else
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
____cacheline_internodealigned_in_smp;
@@ -89,7 +90,22 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid)
if (!section)
return -ENOMEM;
- mem_section[root] = section;
+ spin_lock(&mem_section_lock);
+ if (mem_section[root] == NULL) {
+ mem_section[root] = section;
+ section = NULL;
+ }
+ spin_unlock(&mem_section_lock);
+
+ /*
+ * The only time we expect adding a section may race is during
+ * post-meminit hotplug. So, there is no expectation that 'section'
+ * leaks in the !slab_is_available() case.
+ */
+ if (section && slab_is_available()) {
+ kfree(section);
+ return -EEXIST;
+ }
return 0;
}
@@ -168,6 +184,66 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
}
}
+static int __init section_active_index(phys_addr_t phys)
+{
+ return (phys & ~(PA_SECTION_MASK)) / SECTION_ACTIVE_SIZE;
+}
+
+static unsigned long section_active_mask(unsigned long pfn,
+ unsigned long nr_pages)
+{
+ int idx_start, idx_size;
+ phys_addr_t start, size;
+
+ WARN_ON((pfn & ~PAGE_SECTION_MASK) + nr_pages > PAGES_PER_SECTION);
+ if (!nr_pages)
+ return 0;
+
+ /*
+ * The size is the number of pages left in the section or
+ * nr_pages, whichever is smaller. The size will be rounded up
+ * to the next SECTION_ACTIVE_SIZE boundary, the start will be
+ * rounded down.
+ */
+ start = PFN_PHYS(pfn);
+ size = PFN_PHYS(min_not_zero(nr_pages, PAGES_PER_SECTION
+ - (pfn & ~PAGE_SECTION_MASK)));
+ size = ALIGN(size, SECTION_ACTIVE_SIZE);
+
+ idx_start = section_active_index(start);
+ idx_size = section_active_index(size);
+
+ if (idx_size == 0)
+ return ULONG_MAX; /* full section */
+ return ((1UL << idx_size) - 1) << idx_start;
+}
+
+void __init section_active_init(unsigned long pfn, unsigned long nr_pages)
+{
+ int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
+ int i, start_sec = pfn_to_section_nr(pfn);
+
+ if (!nr_pages)
+ return;
+
+ for (i = start_sec; i <= end_sec; i++) {
+ struct mem_section *ms;
+ unsigned long mask;
+ unsigned long pfns;
+
+ pfns = min(nr_pages, PAGES_PER_SECTION
+ - (pfn & ~PAGE_SECTION_MASK));
+ mask = section_active_mask(pfn, pfns);
+
+ ms = __nr_to_section(i);
+ pr_debug("%s: sec: %d mask: %#018lx\n", __func__, i, mask);
+ ms->usage->map_active = mask;
+
+ pfn += pfns;
+ nr_pages -= pfns;
+ }
+}
+
/* Record a memory area against a node. */
void __init memory_present(int nid, unsigned long start, unsigned long end)
{
@@ -231,19 +307,23 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn
return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
}
-static int __meminit sparse_init_one_section(struct mem_section *ms,
+static void __meminit sparse_init_one_section(struct mem_section *ms,
unsigned long pnum, struct page *mem_map,
- unsigned long *pageblock_bitmap)
+ struct mem_section_usage *usage)
{
- if (!present_section(ms))
- return -EINVAL;
+ /*
+ * Given that SPARSEMEM_VMEMMAP=y supports sub-section hotplug,
+ * ->section_mem_map can not be guaranteed to point to a full
+ * section's worth of memory. The field is only valid / used
+ * in the SPARSEMEM_VMEMMAP=n case.
+ */
+ if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+ mem_map = NULL;
ms->section_mem_map &= ~SECTION_MAP_MASK;
ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
- SECTION_HAS_MEM_MAP;
- ms->pageblock_flags = pageblock_bitmap;
-
- return 1;
+ SECTION_HAS_MEM_MAP;
+ ms->usage = usage;
}
unsigned long usemap_size(void)
@@ -255,9 +335,13 @@ unsigned long usemap_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-static unsigned long *__kmalloc_section_usemap(void)
+static struct mem_section_usage *__alloc_section_usage(void)
{
- return kmalloc(usemap_size(), GFP_KERNEL);
+ struct mem_section_usage *usage;
+
+ usage = kzalloc(sizeof(*usage) + usemap_size(), GFP_KERNEL);
+ /* TODO: allocate the map_active bitmap */
+ return usage;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
@@ -293,7 +377,8 @@ again:
return p;
}
-static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+static void __init check_usemap_section_nr(int nid,
+ struct mem_section_usage *usage)
{
unsigned long usemap_snr, pgdat_snr;
static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
@@ -301,7 +386,7 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
struct pglist_data *pgdat = NODE_DATA(nid);
int usemap_nid;
- usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
+ usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
if (usemap_snr == pgdat_snr)
return;
@@ -336,7 +421,8 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
}
-static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+static void __init check_usemap_section_nr(int nid,
+ struct mem_section_usage *usage)
{
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
@@ -344,31 +430,33 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
static void __init sparse_early_usemaps_alloc_node(void *data,
unsigned long pnum_begin,
unsigned long pnum_end,
- unsigned long usemap_count, int nodeid)
+ unsigned long usage_count, int nodeid)
{
- void *usemap;
+ void *usage;
unsigned long pnum;
- unsigned long **usemap_map = (unsigned long **)data;
- int size = usemap_size();
+ struct mem_section_usage **usage_map = data;
+ int size = sizeof(struct mem_section_usage) + usemap_size();
- usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
- size * usemap_count);
- if (!usemap) {
+ usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
+ size * usage_count);
+ if (!usage) {
pr_warn("%s: allocation failed\n", __func__);
return;
}
+ memset(usage, 0, size * usage_count);
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
if (!present_section_nr(pnum))
continue;
- usemap_map[pnum] = usemap;
- usemap += size;
- check_usemap_section_nr(nodeid, usemap_map[pnum]);
+ usage_map[pnum] = usage;
+ usage += size;
+ check_usemap_section_nr(nodeid, usage_map[pnum]);
}
}
#ifndef CONFIG_SPARSEMEM_VMEMMAP
-struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
+struct page __init *__populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid)
{
struct page *map;
unsigned long size;
@@ -420,10 +508,12 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
/* fallback */
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
struct mem_section *ms;
+ unsigned long pfn = section_nr_to_pfn(pnum);
if (!present_section_nr(pnum))
continue;
- map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+ map_map[pnum] = __populate_section_memmap(pfn,
+ PAGES_PER_SECTION, nodeid);
if (map_map[pnum])
continue;
ms = __nr_to_section(pnum);
@@ -451,7 +541,8 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
struct mem_section *ms = __nr_to_section(pnum);
int nid = sparse_early_nid(ms);
- map = sparse_mem_map_populate(pnum, nid);
+ map = __populate_section_memmap(section_nr_to_pfn(pnum),
+ PAGES_PER_SECTION, nid);
if (map)
return map;
@@ -468,7 +559,7 @@ void __weak __meminit vmemmap_populate_print_last(void)
/**
* alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap
- * @map: usemap_map for pageblock flags or mmap_map for vmemmap
+ * @map: usage_map for mem_section_usage or mmap_map for vmemmap
*/
static void __init alloc_usemap_and_memmap(void (*alloc_func)
(void *, unsigned long, unsigned long,
@@ -521,10 +612,9 @@ static void __init alloc_usemap_and_memmap(void (*alloc_func)
*/
void __init sparse_init(void)
{
+ struct mem_section_usage *usage, **usage_map;
unsigned long pnum;
struct page *map;
- unsigned long *usemap;
- unsigned long **usemap_map;
int size;
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
int size2;
@@ -539,21 +629,21 @@ void __init sparse_init(void)
/*
* map is using big page (aka 2M in x86 64 bit)
- * usemap is less one page (aka 24 bytes)
+ * usage is less one page (aka 24 bytes)
* so alloc 2M (with 2M align) and 24 bytes in turn will
* make next 2M slip to one more 2M later.
* then in big system, the memory will have a lot of holes...
* here try to allocate 2M pages continuously.
*
* powerpc need to call sparse_init_one_section right after each
- * sparse_early_mem_map_alloc, so allocate usemap_map at first.
+ * sparse_early_mem_map_alloc, so allocate usage_map at first.
*/
- size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
- usemap_map = memblock_virt_alloc(size, 0);
- if (!usemap_map)
- panic("can not allocate usemap_map\n");
+ size = sizeof(struct mem_section_usage *) * NR_MEM_SECTIONS;
+ usage_map = memblock_virt_alloc(size, 0);
+ if (!usage_map)
+ panic("can not allocate usage_map\n");
alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
- (void *)usemap_map);
+ (void *)usage_map);
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
@@ -568,8 +658,8 @@ void __init sparse_init(void)
if (!present_section_nr(pnum))
continue;
- usemap = usemap_map[pnum];
- if (!usemap)
+ usage = usage_map[pnum];
+ if (!usage)
continue;
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
@@ -581,7 +671,7 @@ void __init sparse_init(void)
continue;
sparse_init_one_section(__nr_to_section(pnum), pnum, map,
- usemap);
+ usage);
}
vmemmap_populate_print_last();
@@ -589,20 +679,21 @@ void __init sparse_init(void)
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
memblock_free_early(__pa(map_map), size2);
#endif
- memblock_free_early(__pa(usemap_map), size);
+ memblock_free_early(__pa(usage_map), size);
}
#ifdef CONFIG_MEMORY_HOTPLUG
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
+static struct page *populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid)
{
- /* This will make the necessary allocations eventually. */
- return sparse_mem_map_populate(pnum, nid);
+ return __populate_section_memmap(pfn, nr_pages, nid);
}
-static void __kfree_section_memmap(struct page *memmap)
+
+static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages)
{
- unsigned long start = (unsigned long)memmap;
- unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
+ unsigned long start = (unsigned long) pfn_to_page(pfn);
+ unsigned long end = start + nr_pages * sizeof(struct page);
vmemmap_free(start, end);
}
@@ -616,11 +707,18 @@ static void free_map_bootmem(struct page *memmap)
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
#else
-static struct page *__kmalloc_section_memmap(void)
+struct page *populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid)
{
struct page *page, *ret;
unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
+ if ((pfn & ~PAGE_SECTION_MASK) || nr_pages != PAGES_PER_SECTION) {
+ WARN(1, "%s: called with section unaligned parameters\n",
+ __func__);
+ return NULL;
+ }
+
page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
if (page)
goto got_map_page;
@@ -637,13 +735,16 @@ got_map_ptr:
return ret;
}
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
+static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages)
{
- return __kmalloc_section_memmap();
-}
+ struct page *memmap = pfn_to_page(pfn);
+
+ if ((pfn & ~PAGE_SECTION_MASK) || nr_pages != PAGES_PER_SECTION) {
+ WARN(1, "%s: called with section unaligned parameters\n",
+ __func__);
+ return;
+ }
-static void __kfree_section_memmap(struct page *memmap)
-{
if (is_vmalloc_addr(memmap))
vfree(memmap);
else
@@ -662,12 +763,12 @@ static void free_map_bootmem(struct page *memmap)
>> PAGE_SHIFT;
for (i = 0; i < nr_pages; i++, page++) {
- magic = (unsigned long) page->lru.next;
+ magic = (unsigned long) page->freelist;
BUG_ON(magic == NODE_INFO);
maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
- removing_section_nr = page->private;
+ removing_section_nr = page_private(page);
/*
* When this function is called, the removing section is
@@ -684,18 +785,179 @@ static void free_map_bootmem(struct page *memmap)
#endif /* CONFIG_MEMORY_HOTREMOVE */
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-/*
- * returns the number of sections whose mem_maps were properly
- * set. If this is <=0, then that means that the passed-in
- * map was not consumed and must be freed.
+static bool is_early_section(struct mem_section *ms)
+{
+ struct page *usemap_page;
+
+ usemap_page = virt_to_page(ms->usage->pageblock_flags);
+ if (PageSlab(usemap_page) || PageCompound(usemap_page))
+ return false;
+ else
+ return true;
+
+}
+
+#ifndef CONFIG_MEMORY_HOTREMOVE
+static void free_map_bootmem(struct page *memmap)
+{
+}
+#endif
+
+static void section_deactivate(struct pglist_data *pgdat, unsigned long pfn,
+ unsigned long nr_pages)
+{
+ bool early_section;
+ struct page *memmap = NULL;
+ struct mem_section_usage *usage = NULL;
+ int section_nr = pfn_to_section_nr(pfn);
+ struct mem_section *ms = __nr_to_section(section_nr);
+ unsigned long mask = section_active_mask(pfn, nr_pages), flags;
+
+ pgdat_resize_lock(pgdat, &flags);
+ if (!ms->usage ||
+ WARN((ms->usage->map_active & mask) != mask,
+ "section already deactivated active: %#lx mask: %#lx\n",
+ ms->usage->map_active, mask)) {
+ pgdat_resize_unlock(pgdat, &flags);
+ return;
+ }
+
+ early_section = is_early_section(ms);
+ ms->usage->map_active ^= mask;
+ if (ms->usage->map_active == 0) {
+ usage = ms->usage;
+ ms->usage = NULL;
+ memmap = sparse_decode_mem_map(ms->section_mem_map,
+ section_nr);
+ ms->section_mem_map = 0;
+ }
+
+ pgdat_resize_unlock(pgdat, &flags);
+
+ /*
+ * There are 3 cases to handle across two configurations
+ * (SPARSEMEM_VMEMMAP={y,n}):
+ *
+ * 1/ deactivation of a partial hot-added section (only possible
+ * in the SPARSEMEM_VMEMMAP=y case).
+ * a/ section was present at memory init
+ * b/ section was hot-added post memory init
+ * 2/ deactivation of a complete hot-added section
+ * 3/ deactivation of a complete section from memory init
+ *
+ * For 1/, when map_active does not go to zero we will not be
+ * freeing the usage map, but still need to free the vmemmap
+ * range.
+ *
+ * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
+ */
+ if (!mask)
+ return;
+ if (nr_pages < PAGES_PER_SECTION) {
+ if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) {
+ WARN(1, "partial memory section removal not supported\n");
+ return;
+ }
+ if (!early_section)
+ depopulate_section_memmap(pfn, nr_pages);
+ memmap = 0;
+ }
+
+ if (usage) {
+ if (!early_section) {
+ /*
+ * 'memmap' may be zero in the SPARSEMEM_VMEMMAP=y case
+ * (see sparse_init_one_section()), so we can't rely on
+ * it to determine if we need to depopulate the memmap.
+ * Instead, we uncoditionally depopulate due to 'usage'
+ * being valid.
+ */
+ if (memmap || (nr_pages >= PAGES_PER_SECTION
+ && IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)))
+ depopulate_section_memmap(pfn, nr_pages);
+ kfree(usage);
+ return;
+ }
+ }
+
+ /*
+ * The usemap came from bootmem. This is packed with other usemaps
+ * on the section which has pgdat at boot time. Just keep it as is now.
+ */
+ if (memmap)
+ free_map_bootmem(memmap);
+}
+
+static struct page * __meminit section_activate(struct pglist_data *pgdat,
+ unsigned long pfn, unsigned nr_pages)
+{
+ struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
+ unsigned long mask = section_active_mask(pfn, nr_pages), flags;
+ struct mem_section_usage *usage;
+ bool early_section = false;
+ struct page *memmap;
+ int rc = 0;
+
+ usage = __alloc_section_usage();
+ if (!usage)
+ return ERR_PTR(-ENOMEM);
+
+ pgdat_resize_lock(pgdat, &flags);
+ if (!ms->usage) {
+ ms->usage = usage;
+ usage = NULL;
+ } else
+ early_section = is_early_section(ms);
+
+ if (!mask)
+ rc = -EINVAL;
+ else if (mask & ms->usage->map_active)
+ rc = -EBUSY;
+ else
+ ms->usage->map_active |= mask;
+ pgdat_resize_unlock(pgdat, &flags);
+
+ kfree(usage);
+
+ if (rc)
+ return ERR_PTR(rc);
+
+
+ /*
+ * The early init code does not consider partially populated
+ * initial sections, it simply assumes that memory will never be
+ * referenced. If we hot-add memory into such a section then we
+ * do not need to populate the memmap and can simply reuse what
+ * is already there.
+ */
+ if (nr_pages < PAGES_PER_SECTION && early_section)
+ return pfn_to_page(pfn);
+
+ memmap = populate_section_memmap(pfn, nr_pages, pgdat->node_id);
+ if (!memmap) {
+ section_deactivate(pgdat, pfn, nr_pages);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return memmap;
+}
+
+/**
+ * sparse_add_section() - create a new memmap section, or populate an
+ * existing one
+ * @zone: host zone for the new memory mapping
+ * @start_pfn: first pfn to add (section aligned if zone != ZONE_DEVICE)
+ * @nr_pages: number of new pages to add
+ *
+ * Returns 0 on success.
*/
-int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
+int __meminit sparse_add_section(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
struct pglist_data *pgdat = zone->zone_pgdat;
struct mem_section *ms;
struct page *memmap;
- unsigned long *usemap;
unsigned long flags;
int ret;
@@ -706,36 +968,28 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
ret = sparse_index_init(section_nr, pgdat->node_id);
if (ret < 0 && ret != -EEXIST)
return ret;
- memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
- if (!memmap)
- return -ENOMEM;
- usemap = __kmalloc_section_usemap();
- if (!usemap) {
- __kfree_section_memmap(memmap);
- return -ENOMEM;
- }
- pgdat_resize_lock(pgdat, &flags);
+ memmap = section_activate(pgdat, start_pfn, nr_pages);
+ if (IS_ERR(memmap))
+ return PTR_ERR(memmap);
+ pgdat_resize_lock(pgdat, &flags);
ms = __pfn_to_section(start_pfn);
- if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
- ret = -EEXIST;
+ if (nr_pages == PAGES_PER_SECTION && (ms->section_mem_map
+ & SECTION_MARKED_PRESENT)) {
+ ret = -EBUSY;
goto out;
}
-
- memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
-
ms->section_mem_map |= SECTION_MARKED_PRESENT;
-
- ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
-
+ sparse_init_one_section(ms, section_nr, memmap, ms->usage);
out:
pgdat_resize_unlock(pgdat, &flags);
- if (ret <= 0) {
- kfree(usemap);
- __kfree_section_memmap(memmap);
+ if (nr_pages == PAGES_PER_SECTION && ret < 0 && ret != -EEXIST) {
+ section_deactivate(pgdat, start_pfn, nr_pages);
+ return ret;
}
- return ret;
+ memset(memmap, 0, sizeof(struct page) * nr_pages);
+ return 0;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -760,53 +1014,15 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
}
#endif
-static void free_section_usemap(struct page *memmap, unsigned long *usemap)
-{
- struct page *usemap_page;
-
- if (!usemap)
- return;
-
- usemap_page = virt_to_page(usemap);
- /*
- * Check to see if allocation came from hot-plug-add
- */
- if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
- kfree(usemap);
- if (memmap)
- __kfree_section_memmap(memmap);
- return;
- }
-
- /*
- * The usemap came from bootmem. This is packed with other usemaps
- * on the section which has pgdat at boot time. Just keep it as is now.
- */
-
- if (memmap)
- free_map_bootmem(memmap);
-}
-
-void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
+void sparse_remove_section(struct zone *zone, struct mem_section *ms,
+ unsigned long pfn, unsigned long nr_pages,
unsigned long map_offset)
{
- struct page *memmap = NULL;
- unsigned long *usemap = NULL, flags;
struct pglist_data *pgdat = zone->zone_pgdat;
- pgdat_resize_lock(pgdat, &flags);
- if (ms->section_mem_map) {
- usemap = ms->pageblock_flags;
- memmap = sparse_decode_mem_map(ms->section_mem_map,
- __section_nr(ms));
- ms->section_mem_map = 0;
- ms->pageblock_flags = NULL;
- }
- pgdat_resize_unlock(pgdat, &flags);
-
- clear_hwpoisoned_pages(memmap + map_offset,
- PAGES_PER_SECTION - map_offset);
- free_section_usemap(memmap, usemap);
+ clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
+ nr_pages - map_offset);
+ section_deactivate(pgdat, pfn, nr_pages);
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/mm/swap.c b/mm/swap.c
index 844baedd2429..c4910f14f957 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -209,9 +209,10 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
{
int *pgmoved = arg;
- if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
- enum lru_list lru = page_lru_base_type(page);
- list_move_tail(&page->lru, &lruvec->lists[lru]);
+ if (PageLRU(page) && !PageUnevictable(page)) {
+ del_page_from_lru_list(page, lruvec, page_lru(page));
+ ClearPageActive(page);
+ add_page_to_lru_list_tail(page, lruvec, page_lru(page));
(*pgmoved)++;
}
}
@@ -235,7 +236,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
*/
void rotate_reclaimable_page(struct page *page)
{
- if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
+ if (!PageLocked(page) && !PageDirty(page) &&
!PageUnevictable(page) && PageLRU(page)) {
struct pagevec *pvec;
unsigned long flags;
@@ -971,12 +972,6 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
void __init swap_setup(void)
{
unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
-#ifdef CONFIG_SWAP
- int i;
-
- for (i = 0; i < MAX_SWAPFILES; i++)
- spin_lock_init(&swapper_spaces[i].tree_lock);
-#endif
/* Use a smaller cluster for small-memory machines */
if (megs < 16)
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
new file mode 100644
index 000000000000..9b5bc86f96ad
--- /dev/null
+++ b/mm/swap_slots.c
@@ -0,0 +1,342 @@
+/*
+ * Manage cache of swap slots to be used for and returned from
+ * swap.
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * We allocate the swap slots from the global pool and put
+ * it into local per cpu caches. This has the advantage
+ * of no needing to acquire the swap_info lock every time
+ * we need a new slot.
+ *
+ * There is also opportunity to simply return the slot
+ * to local caches without needing to acquire swap_info
+ * lock. We do not reuse the returned slots directly but
+ * move them back to the global pool in a batch. This
+ * allows the slots to coaellesce and reduce fragmentation.
+ *
+ * The swap entry allocated is marked with SWAP_HAS_CACHE
+ * flag in map_count that prevents it from being allocated
+ * again from the global pool.
+ *
+ * The swap slots cache is protected by a mutex instead of
+ * a spin lock as when we search for slots with scan_swap_map,
+ * we can possibly sleep.
+ */
+
+#include <linux/swap_slots.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+
+#ifdef CONFIG_SWAP
+
+static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
+static bool swap_slot_cache_active;
+bool swap_slot_cache_enabled;
+static bool swap_slot_cache_initialized;
+DEFINE_MUTEX(swap_slots_cache_mutex);
+/* Serialize swap slots cache enable/disable operations */
+DEFINE_MUTEX(swap_slots_cache_enable_mutex);
+
+static void __drain_swap_slots_cache(unsigned int type);
+static void deactivate_swap_slots_cache(void);
+static void reactivate_swap_slots_cache(void);
+
+#define use_swap_slot_cache (swap_slot_cache_active && \
+ swap_slot_cache_enabled && swap_slot_cache_initialized)
+#define SLOTS_CACHE 0x1
+#define SLOTS_CACHE_RET 0x2
+
+static void deactivate_swap_slots_cache(void)
+{
+ mutex_lock(&swap_slots_cache_mutex);
+ swap_slot_cache_active = false;
+ __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
+ mutex_unlock(&swap_slots_cache_mutex);
+}
+
+static void reactivate_swap_slots_cache(void)
+{
+ mutex_lock(&swap_slots_cache_mutex);
+ swap_slot_cache_active = true;
+ mutex_unlock(&swap_slots_cache_mutex);
+}
+
+/* Must not be called with cpu hot plug lock */
+void disable_swap_slots_cache_lock(void)
+{
+ mutex_lock(&swap_slots_cache_enable_mutex);
+ swap_slot_cache_enabled = false;
+ if (swap_slot_cache_initialized) {
+ /* serialize with cpu hotplug operations */
+ get_online_cpus();
+ __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
+ put_online_cpus();
+ }
+}
+
+static void __reenable_swap_slots_cache(void)
+{
+ swap_slot_cache_enabled = has_usable_swap();
+}
+
+void reenable_swap_slots_cache_unlock(void)
+{
+ __reenable_swap_slots_cache();
+ mutex_unlock(&swap_slots_cache_enable_mutex);
+}
+
+static bool check_cache_active(void)
+{
+ long pages;
+
+ if (!swap_slot_cache_enabled || !swap_slot_cache_initialized)
+ return false;
+
+ pages = get_nr_swap_pages();
+ if (!swap_slot_cache_active) {
+ if (pages > num_online_cpus() *
+ THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
+ reactivate_swap_slots_cache();
+ goto out;
+ }
+
+ /* if global pool of slot caches too low, deactivate cache */
+ if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
+ deactivate_swap_slots_cache();
+out:
+ return swap_slot_cache_active;
+}
+
+static int alloc_swap_slot_cache(unsigned int cpu)
+{
+ struct swap_slots_cache *cache;
+ swp_entry_t *slots, *slots_ret;
+
+ /*
+ * Do allocation outside swap_slots_cache_mutex
+ * as vzalloc could trigger reclaim and get_swap_page,
+ * which can lock swap_slots_cache_mutex.
+ */
+ slots = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
+ if (!slots)
+ return -ENOMEM;
+
+ slots_ret = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE);
+ if (!slots_ret) {
+ vfree(slots);
+ return -ENOMEM;
+ }
+
+ mutex_lock(&swap_slots_cache_mutex);
+ cache = &per_cpu(swp_slots, cpu);
+ if (cache->slots || cache->slots_ret)
+ /* cache already allocated */
+ goto out;
+ if (!cache->lock_initialized) {
+ mutex_init(&cache->alloc_lock);
+ spin_lock_init(&cache->free_lock);
+ cache->lock_initialized = true;
+ }
+ cache->nr = 0;
+ cache->cur = 0;
+ cache->n_ret = 0;
+ cache->slots = slots;
+ slots = NULL;
+ cache->slots_ret = slots_ret;
+ slots_ret = NULL;
+out:
+ mutex_unlock(&swap_slots_cache_mutex);
+ if (slots)
+ vfree(slots);
+ if (slots_ret)
+ vfree(slots_ret);
+ return 0;
+}
+
+static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
+ bool free_slots)
+{
+ struct swap_slots_cache *cache;
+ swp_entry_t *slots = NULL;
+
+ cache = &per_cpu(swp_slots, cpu);
+ if ((type & SLOTS_CACHE) && cache->slots) {
+ mutex_lock(&cache->alloc_lock);
+ swapcache_free_entries(cache->slots + cache->cur, cache->nr);
+ cache->cur = 0;
+ cache->nr = 0;
+ if (free_slots && cache->slots) {
+ vfree(cache->slots);
+ cache->slots = NULL;
+ }
+ mutex_unlock(&cache->alloc_lock);
+ }
+ if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
+ spin_lock_irq(&cache->free_lock);
+ swapcache_free_entries(cache->slots_ret, cache->n_ret);
+ cache->n_ret = 0;
+ if (free_slots && cache->slots_ret) {
+ slots = cache->slots_ret;
+ cache->slots_ret = NULL;
+ }
+ spin_unlock_irq(&cache->free_lock);
+ if (slots)
+ vfree(slots);
+ }
+}
+
+static void __drain_swap_slots_cache(unsigned int type)
+{
+ unsigned int cpu;
+
+ /*
+ * This function is called during
+ * 1) swapoff, when we have to make sure no
+ * left over slots are in cache when we remove
+ * a swap device;
+ * 2) disabling of swap slot cache, when we run low
+ * on swap slots when allocating memory and need
+ * to return swap slots to global pool.
+ *
+ * We cannot acquire cpu hot plug lock here as
+ * this function can be invoked in the cpu
+ * hot plug path:
+ * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
+ * -> memory allocation -> direct reclaim -> get_swap_page
+ * -> drain_swap_slots_cache
+ *
+ * Hence the loop over current online cpu below could miss cpu that
+ * is being brought online but not yet marked as online.
+ * That is okay as we do not schedule and run anything on a
+ * cpu before it has been marked online. Hence, we will not
+ * fill any swap slots in slots cache of such cpu.
+ * There are no slots on such cpu that need to be drained.
+ */
+ for_each_online_cpu(cpu)
+ drain_slots_cache_cpu(cpu, type, false);
+}
+
+static int free_slot_cache(unsigned int cpu)
+{
+ mutex_lock(&swap_slots_cache_mutex);
+ drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
+ mutex_unlock(&swap_slots_cache_mutex);
+ return 0;
+}
+
+int enable_swap_slots_cache(void)
+{
+ int ret = 0;
+
+ mutex_lock(&swap_slots_cache_enable_mutex);
+ if (swap_slot_cache_initialized) {
+ __reenable_swap_slots_cache();
+ goto out_unlock;
+ }
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
+ alloc_swap_slot_cache, free_slot_cache);
+ if (ret < 0)
+ goto out_unlock;
+ swap_slot_cache_initialized = true;
+ __reenable_swap_slots_cache();
+out_unlock:
+ mutex_unlock(&swap_slots_cache_enable_mutex);
+ return 0;
+}
+
+/* called with swap slot cache's alloc lock held */
+static int refill_swap_slots_cache(struct swap_slots_cache *cache)
+{
+ if (!use_swap_slot_cache || cache->nr)
+ return 0;
+
+ cache->cur = 0;
+ if (swap_slot_cache_active)
+ cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots);
+
+ return cache->nr;
+}
+
+int free_swap_slot(swp_entry_t entry)
+{
+ struct swap_slots_cache *cache;
+
+ BUG_ON(!swap_slot_cache_initialized);
+
+ cache = &get_cpu_var(swp_slots);
+ if (use_swap_slot_cache && cache->slots_ret) {
+ spin_lock_irq(&cache->free_lock);
+ /* Swap slots cache may be deactivated before acquiring lock */
+ if (!use_swap_slot_cache) {
+ spin_unlock_irq(&cache->free_lock);
+ goto direct_free;
+ }
+ if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
+ /*
+ * Return slots to global pool.
+ * The current swap_map value is SWAP_HAS_CACHE.
+ * Set it to 0 to indicate it is available for
+ * allocation in global pool
+ */
+ swapcache_free_entries(cache->slots_ret, cache->n_ret);
+ cache->n_ret = 0;
+ }
+ cache->slots_ret[cache->n_ret++] = entry;
+ spin_unlock_irq(&cache->free_lock);
+ } else {
+direct_free:
+ swapcache_free_entries(&entry, 1);
+ }
+ put_cpu_var(swp_slots);
+
+ return 0;
+}
+
+swp_entry_t get_swap_page(void)
+{
+ swp_entry_t entry, *pentry;
+ struct swap_slots_cache *cache;
+
+ /*
+ * Preemption is allowed here, because we may sleep
+ * in refill_swap_slots_cache(). But it is safe, because
+ * accesses to the per-CPU data structure are protected by the
+ * mutex cache->alloc_lock.
+ *
+ * The alloc path here does not touch cache->slots_ret
+ * so cache->free_lock is not taken.
+ */
+ cache = raw_cpu_ptr(&swp_slots);
+
+ entry.val = 0;
+ if (check_cache_active()) {
+ mutex_lock(&cache->alloc_lock);
+ if (cache->slots) {
+repeat:
+ if (cache->nr) {
+ pentry = &cache->slots[cache->cur++];
+ entry = *pentry;
+ pentry->val = 0;
+ cache->nr--;
+ } else {
+ if (refill_swap_slots_cache(cache))
+ goto repeat;
+ }
+ }
+ mutex_unlock(&cache->alloc_lock);
+ if (entry.val)
+ return entry;
+ }
+
+ get_swap_pages(1, &entry);
+
+ return entry;
+}
+
+#endif /* CONFIG_SWAP */
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 35d7e0ee1c77..2126e9ba23b2 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,6 +17,8 @@
#include <linux/blkdev.h>
#include <linux/pagevec.h>
#include <linux/migrate.h>
+#include <linux/vmalloc.h>
+#include <linux/swap_slots.h>
#include <asm/pgtable.h>
@@ -32,15 +34,8 @@ static const struct address_space_operations swap_aops = {
#endif
};
-struct address_space swapper_spaces[MAX_SWAPFILES] = {
- [0 ... MAX_SWAPFILES - 1] = {
- .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
- .i_mmap_writable = ATOMIC_INIT(0),
- .a_ops = &swap_aops,
- /* swap cache doesn't use writeback related tags */
- .flags = 1 << AS_NO_WRITEBACK_TAGS,
- }
-};
+struct address_space *swapper_spaces[MAX_SWAPFILES];
+static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
@@ -53,11 +48,26 @@ static struct {
unsigned long total_swapcache_pages(void)
{
- int i;
+ unsigned int i, j, nr;
unsigned long ret = 0;
+ struct address_space *spaces;
- for (i = 0; i < MAX_SWAPFILES; i++)
- ret += swapper_spaces[i].nrpages;
+ rcu_read_lock();
+ for (i = 0; i < MAX_SWAPFILES; i++) {
+ /*
+ * The corresponding entries in nr_swapper_spaces and
+ * swapper_spaces will be reused only after at least
+ * one grace period. So it is impossible for them
+ * belongs to different usage.
+ */
+ nr = nr_swapper_spaces[i];
+ spaces = rcu_dereference(swapper_spaces[i]);
+ if (!nr || !spaces)
+ continue;
+ for (j = 0; j < nr; j++)
+ ret += spaces[j].nrpages;
+ }
+ rcu_read_unlock();
return ret;
}
@@ -315,6 +325,17 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
break;
/*
+ * Just skip read ahead for unused swap slot.
+ * During swap_off when swap_slot_cache is disabled,
+ * we have to handle the race between putting
+ * swap entry in swap cache and marking swap slot
+ * as SWAP_HAS_CACHE. That's done in later part of code or
+ * else swap_off will be aborted if we return NULL.
+ */
+ if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
+ return NULL;
+
+ /*
* Get a new page to read into from swap.
*/
if (!new_page) {
@@ -505,3 +526,38 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
skip:
return read_swap_cache_async(entry, gfp_mask, vma, addr);
}
+
+int init_swap_address_space(unsigned int type, unsigned long nr_pages)
+{
+ struct address_space *spaces, *space;
+ unsigned int i, nr;
+
+ nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
+ spaces = vzalloc(sizeof(struct address_space) * nr);
+ if (!spaces)
+ return -ENOMEM;
+ for (i = 0; i < nr; i++) {
+ space = spaces + i;
+ INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN);
+ atomic_set(&space->i_mmap_writable, 0);
+ space->a_ops = &swap_aops;
+ /* swap cache doesn't use writeback related tags */
+ mapping_set_no_writeback_tags(space);
+ spin_lock_init(&space->tree_lock);
+ }
+ nr_swapper_spaces[type] = nr;
+ rcu_assign_pointer(swapper_spaces[type], spaces);
+
+ return 0;
+}
+
+void exit_swap_address_space(unsigned int type)
+{
+ struct address_space *spaces;
+
+ spaces = swapper_spaces[type];
+ nr_swapper_spaces[type] = 0;
+ rcu_assign_pointer(swapper_spaces[type], NULL);
+ synchronize_rcu();
+ kvfree(spaces);
+}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4761701d1721..50b64f6dd580 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -34,6 +34,7 @@
#include <linux/frontswap.h>
#include <linux/swapfile.h>
#include <linux/export.h>
+#include <linux/swap_slots.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -257,6 +258,52 @@ static inline void cluster_set_null(struct swap_cluster_info *info)
info->data = 0;
}
+static inline void __lock_cluster(struct swap_cluster_info *ci)
+{
+ spin_lock(&ci->lock);
+}
+
+static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
+ unsigned long offset)
+{
+ struct swap_cluster_info *ci;
+
+ ci = si->cluster_info;
+ if (ci) {
+ ci += offset / SWAPFILE_CLUSTER;
+ __lock_cluster(ci);
+ }
+ return ci;
+}
+
+static inline void unlock_cluster(struct swap_cluster_info *ci)
+{
+ if (ci)
+ spin_unlock(&ci->lock);
+}
+
+static inline struct swap_cluster_info *lock_cluster_or_swap_info(
+ struct swap_info_struct *si,
+ unsigned long offset)
+{
+ struct swap_cluster_info *ci;
+
+ ci = lock_cluster(si, offset);
+ if (!ci)
+ spin_lock(&si->lock);
+
+ return ci;
+}
+
+static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
+ struct swap_cluster_info *ci)
+{
+ if (ci)
+ unlock_cluster(ci);
+ else
+ spin_unlock(&si->lock);
+}
+
static inline bool cluster_list_empty(struct swap_cluster_list *list)
{
return cluster_is_null(&list->head);
@@ -281,9 +328,17 @@ static void cluster_list_add_tail(struct swap_cluster_list *list,
cluster_set_next_flag(&list->head, idx, 0);
cluster_set_next_flag(&list->tail, idx, 0);
} else {
+ struct swap_cluster_info *ci_tail;
unsigned int tail = cluster_next(&list->tail);
- cluster_set_next(&ci[tail], idx);
+ /*
+ * Nested cluster lock, but both cluster locks are
+ * only acquired when we held swap_info_struct->lock
+ */
+ ci_tail = ci + tail;
+ __lock_cluster(ci_tail);
+ cluster_set_next(ci_tail, idx);
+ unlock_cluster(ci_tail);
cluster_set_next_flag(&list->tail, idx, 0);
}
}
@@ -328,7 +383,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
*/
static void swap_do_scheduled_discard(struct swap_info_struct *si)
{
- struct swap_cluster_info *info;
+ struct swap_cluster_info *info, *ci;
unsigned int idx;
info = si->cluster_info;
@@ -341,10 +396,14 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si)
SWAPFILE_CLUSTER);
spin_lock(&si->lock);
- cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE);
+ ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
+ cluster_set_flag(ci, CLUSTER_FLAG_FREE);
+ unlock_cluster(ci);
cluster_list_add_tail(&si->free_clusters, info, idx);
+ ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
memset(si->swap_map + idx * SWAPFILE_CLUSTER,
0, SWAPFILE_CLUSTER);
+ unlock_cluster(ci);
}
}
@@ -443,12 +502,13 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
* Try to get a swap entry from current cpu's swap entry pool (a cluster). This
* might involve allocating a new cluster for current CPU too.
*/
-static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
+static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
unsigned long *offset, unsigned long *scan_base)
{
struct percpu_cluster *cluster;
+ struct swap_cluster_info *ci;
bool found_free;
- unsigned long tmp;
+ unsigned long tmp, max;
new_cluster:
cluster = this_cpu_ptr(si->percpu_cluster);
@@ -466,7 +526,7 @@ new_cluster:
*scan_base = *offset = si->cluster_next;
goto new_cluster;
} else
- return;
+ return false;
}
found_free = false;
@@ -476,14 +536,21 @@ new_cluster:
* check if there is still free entry in the cluster
*/
tmp = cluster->next;
- while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) *
- SWAPFILE_CLUSTER) {
+ max = min_t(unsigned long, si->max,
+ (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
+ if (tmp >= max) {
+ cluster_set_null(&cluster->index);
+ goto new_cluster;
+ }
+ ci = lock_cluster(si, tmp);
+ while (tmp < max) {
if (!si->swap_map[tmp]) {
found_free = true;
break;
}
tmp++;
}
+ unlock_cluster(ci);
if (!found_free) {
cluster_set_null(&cluster->index);
goto new_cluster;
@@ -491,15 +558,22 @@ new_cluster:
cluster->next = tmp + 1;
*offset = tmp;
*scan_base = tmp;
+ return found_free;
}
-static unsigned long scan_swap_map(struct swap_info_struct *si,
- unsigned char usage)
+static int scan_swap_map_slots(struct swap_info_struct *si,
+ unsigned char usage, int nr,
+ swp_entry_t slots[])
{
+ struct swap_cluster_info *ci;
unsigned long offset;
unsigned long scan_base;
unsigned long last_in_cluster = 0;
int latency_ration = LATENCY_LIMIT;
+ int n_ret = 0;
+
+ if (nr > SWAP_BATCH)
+ nr = SWAP_BATCH;
/*
* We try to cluster swap pages by allocating them sequentially
@@ -517,8 +591,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
/* SSD algorithm */
if (si->cluster_info) {
- scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
- goto checks;
+ if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
+ goto checks;
+ else
+ goto scan;
}
if (unlikely(!si->cluster_nr--)) {
@@ -562,8 +638,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
checks:
if (si->cluster_info) {
- while (scan_swap_map_ssd_cluster_conflict(si, offset))
- scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
+ while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
+ /* take a break if we already got some slots */
+ if (n_ret)
+ goto done;
+ if (!scan_swap_map_try_ssd_cluster(si, &offset,
+ &scan_base))
+ goto scan;
+ }
}
if (!(si->flags & SWP_WRITEOK))
goto no_page;
@@ -572,9 +654,11 @@ checks:
if (offset > si->highest_bit)
scan_base = offset = si->lowest_bit;
+ ci = lock_cluster(si, offset);
/* reuse swap entry of cache-only swap if not busy. */
if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
int swap_was_freed;
+ unlock_cluster(ci);
spin_unlock(&si->lock);
swap_was_freed = __try_to_reclaim_swap(si, offset);
spin_lock(&si->lock);
@@ -584,8 +668,13 @@ checks:
goto scan; /* check next one */
}
- if (si->swap_map[offset])
- goto scan;
+ if (si->swap_map[offset]) {
+ unlock_cluster(ci);
+ if (!n_ret)
+ goto scan;
+ else
+ goto done;
+ }
if (offset == si->lowest_bit)
si->lowest_bit++;
@@ -601,10 +690,45 @@ checks:
}
si->swap_map[offset] = usage;
inc_cluster_info_page(si, si->cluster_info, offset);
+ unlock_cluster(ci);
si->cluster_next = offset + 1;
- si->flags -= SWP_SCANNING;
+ slots[n_ret++] = swp_entry(si->type, offset);
+
+ /* got enough slots or reach max slots? */
+ if ((n_ret == nr) || (offset >= si->highest_bit))
+ goto done;
+
+ /* search for next available slot */
+
+ /* time to take a break? */
+ if (unlikely(--latency_ration < 0)) {
+ if (n_ret)
+ goto done;
+ spin_unlock(&si->lock);
+ cond_resched();
+ spin_lock(&si->lock);
+ latency_ration = LATENCY_LIMIT;
+ }
+
+ /* try to get more slots in cluster */
+ if (si->cluster_info) {
+ if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
+ goto checks;
+ else
+ goto done;
+ }
+ /* non-ssd case */
+ ++offset;
- return offset;
+ /* non-ssd case, still more slots in cluster? */
+ if (si->cluster_nr && !si->swap_map[offset]) {
+ --si->cluster_nr;
+ goto checks;
+ }
+
+done:
+ si->flags -= SWP_SCANNING;
+ return n_ret;
scan:
spin_unlock(&si->lock);
@@ -642,17 +766,41 @@ scan:
no_page:
si->flags -= SWP_SCANNING;
- return 0;
+ return n_ret;
}
-swp_entry_t get_swap_page(void)
+static unsigned long scan_swap_map(struct swap_info_struct *si,
+ unsigned char usage)
+{
+ swp_entry_t entry;
+ int n_ret;
+
+ n_ret = scan_swap_map_slots(si, usage, 1, &entry);
+
+ if (n_ret)
+ return swp_offset(entry);
+ else
+ return 0;
+
+}
+
+int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
{
struct swap_info_struct *si, *next;
- pgoff_t offset;
+ long avail_pgs;
+ int n_ret = 0;
- if (atomic_long_read(&nr_swap_pages) <= 0)
+ avail_pgs = atomic_long_read(&nr_swap_pages);
+ if (avail_pgs <= 0)
goto noswap;
- atomic_long_dec(&nr_swap_pages);
+
+ if (n_goal > SWAP_BATCH)
+ n_goal = SWAP_BATCH;
+
+ if (n_goal > avail_pgs)
+ n_goal = avail_pgs;
+
+ atomic_long_sub(n_goal, &nr_swap_pages);
spin_lock(&swap_avail_lock);
@@ -678,14 +826,14 @@ start_over:
spin_unlock(&si->lock);
goto nextsi;
}
-
- /* This is called for allocating swap entry for cache */
- offset = scan_swap_map(si, SWAP_HAS_CACHE);
+ n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
+ n_goal, swp_entries);
spin_unlock(&si->lock);
- if (offset)
- return swp_entry(si->type, offset);
+ if (n_ret)
+ goto check_out;
pr_debug("scan_swap_map of si %d failed to find offset\n",
- si->type);
+ si->type);
+
spin_lock(&swap_avail_lock);
nextsi:
/*
@@ -696,7 +844,8 @@ nextsi:
* up between us dropping swap_avail_lock and taking si->lock.
* Since we dropped the swap_avail_lock, the swap_avail_head
* list may have been modified; so if next is still in the
- * swap_avail_head list then try it, otherwise start over.
+ * swap_avail_head list then try it, otherwise start over
+ * if we have not gotten any slots.
*/
if (plist_node_empty(&next->avail_list))
goto start_over;
@@ -704,9 +853,11 @@ nextsi:
spin_unlock(&swap_avail_lock);
- atomic_long_inc(&nr_swap_pages);
+check_out:
+ if (n_ret < n_goal)
+ atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
noswap:
- return (swp_entry_t) {0};
+ return n_ret;
}
/* The only caller of this function is now suspend routine */
@@ -731,7 +882,7 @@ swp_entry_t get_swap_page_of_type(int type)
return (swp_entry_t) {0};
}
-static struct swap_info_struct *swap_info_get(swp_entry_t entry)
+static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
{
struct swap_info_struct *p;
unsigned long offset, type;
@@ -747,34 +898,76 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry)
offset = swp_offset(entry);
if (offset >= p->max)
goto bad_offset;
- if (!p->swap_map[offset])
- goto bad_free;
- spin_lock(&p->lock);
return p;
-bad_free:
- pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
- goto out;
bad_offset:
- pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
+ pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val);
goto out;
bad_device:
- pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
+ pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val);
goto out;
bad_nofile:
- pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
+ pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val);
+out:
+ return NULL;
+}
+
+static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
+{
+ struct swap_info_struct *p;
+
+ p = __swap_info_get(entry);
+ if (!p)
+ goto out;
+ if (!p->swap_map[swp_offset(entry)])
+ goto bad_free;
+ return p;
+
+bad_free:
+ pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val);
+ goto out;
out:
return NULL;
}
-static unsigned char swap_entry_free(struct swap_info_struct *p,
- swp_entry_t entry, unsigned char usage)
+static struct swap_info_struct *swap_info_get(swp_entry_t entry)
{
+ struct swap_info_struct *p;
+
+ p = _swap_info_get(entry);
+ if (p)
+ spin_lock(&p->lock);
+ return p;
+}
+
+static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
+ struct swap_info_struct *q)
+{
+ struct swap_info_struct *p;
+
+ p = _swap_info_get(entry);
+
+ if (p != q) {
+ if (q != NULL)
+ spin_unlock(&q->lock);
+ if (p != NULL)
+ spin_lock(&p->lock);
+ }
+ return p;
+}
+
+static unsigned char __swap_entry_free(struct swap_info_struct *p,
+ swp_entry_t entry, unsigned char usage)
+{
+ struct swap_cluster_info *ci;
unsigned long offset = swp_offset(entry);
unsigned char count;
unsigned char has_cache;
+ ci = lock_cluster_or_swap_info(p, offset);
+
count = p->swap_map[offset];
+
has_cache = count & SWAP_HAS_CACHE;
count &= ~SWAP_HAS_CACHE;
@@ -798,38 +991,52 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
}
usage = count | has_cache;
- p->swap_map[offset] = usage;
-
- /* free if no reference */
- if (!usage) {
- mem_cgroup_uncharge_swap(entry);
- dec_cluster_info_page(p, p->cluster_info, offset);
- if (offset < p->lowest_bit)
- p->lowest_bit = offset;
- if (offset > p->highest_bit) {
- bool was_full = !p->highest_bit;
- p->highest_bit = offset;
- if (was_full && (p->flags & SWP_WRITEOK)) {
- spin_lock(&swap_avail_lock);
- WARN_ON(!plist_node_empty(&p->avail_list));
- if (plist_node_empty(&p->avail_list))
- plist_add(&p->avail_list,
- &swap_avail_head);
- spin_unlock(&swap_avail_lock);
- }
- }
- atomic_long_inc(&nr_swap_pages);
- p->inuse_pages--;
- frontswap_invalidate_page(p->type, offset);
- if (p->flags & SWP_BLKDEV) {
- struct gendisk *disk = p->bdev->bd_disk;
- if (disk->fops->swap_slot_free_notify)
- disk->fops->swap_slot_free_notify(p->bdev,
- offset);
+ p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
+
+ unlock_cluster_or_swap_info(p, ci);
+
+ return usage;
+}
+
+static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
+{
+ struct swap_cluster_info *ci;
+ unsigned long offset = swp_offset(entry);
+ unsigned char count;
+
+ ci = lock_cluster(p, offset);
+ count = p->swap_map[offset];
+ VM_BUG_ON(count != SWAP_HAS_CACHE);
+ p->swap_map[offset] = 0;
+ dec_cluster_info_page(p, p->cluster_info, offset);
+ unlock_cluster(ci);
+
+ mem_cgroup_uncharge_swap(entry);
+ if (offset < p->lowest_bit)
+ p->lowest_bit = offset;
+ if (offset > p->highest_bit) {
+ bool was_full = !p->highest_bit;
+
+ p->highest_bit = offset;
+ if (was_full && (p->flags & SWP_WRITEOK)) {
+ spin_lock(&swap_avail_lock);
+ WARN_ON(!plist_node_empty(&p->avail_list));
+ if (plist_node_empty(&p->avail_list))
+ plist_add(&p->avail_list,
+ &swap_avail_head);
+ spin_unlock(&swap_avail_lock);
}
}
+ atomic_long_inc(&nr_swap_pages);
+ p->inuse_pages--;
+ frontswap_invalidate_page(p->type, offset);
+ if (p->flags & SWP_BLKDEV) {
+ struct gendisk *disk = p->bdev->bd_disk;
- return usage;
+ if (disk->fops->swap_slot_free_notify)
+ disk->fops->swap_slot_free_notify(p->bdev,
+ offset);
+ }
}
/*
@@ -840,10 +1047,10 @@ void swap_free(swp_entry_t entry)
{
struct swap_info_struct *p;
- p = swap_info_get(entry);
+ p = _swap_info_get(entry);
if (p) {
- swap_entry_free(p, entry, 1);
- spin_unlock(&p->lock);
+ if (!__swap_entry_free(p, entry, 1))
+ free_swap_slot(entry);
}
}
@@ -854,11 +1061,33 @@ void swapcache_free(swp_entry_t entry)
{
struct swap_info_struct *p;
- p = swap_info_get(entry);
+ p = _swap_info_get(entry);
if (p) {
- swap_entry_free(p, entry, SWAP_HAS_CACHE);
- spin_unlock(&p->lock);
+ if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
+ free_swap_slot(entry);
+ }
+}
+
+void swapcache_free_entries(swp_entry_t *entries, int n)
+{
+ struct swap_info_struct *p, *prev;
+ int i;
+
+ if (n <= 0)
+ return;
+
+ prev = NULL;
+ p = NULL;
+ for (i = 0; i < n; ++i) {
+ p = swap_info_get_cont(entries[i], prev);
+ if (p)
+ swap_entry_free(p, entries[i]);
+ else
+ break;
+ prev = p;
}
+ if (p)
+ spin_unlock(&p->lock);
}
/*
@@ -870,13 +1099,39 @@ int page_swapcount(struct page *page)
{
int count = 0;
struct swap_info_struct *p;
+ struct swap_cluster_info *ci;
swp_entry_t entry;
+ unsigned long offset;
entry.val = page_private(page);
- p = swap_info_get(entry);
+ p = _swap_info_get(entry);
if (p) {
- count = swap_count(p->swap_map[swp_offset(entry)]);
- spin_unlock(&p->lock);
+ offset = swp_offset(entry);
+ ci = lock_cluster_or_swap_info(p, offset);
+ count = swap_count(p->swap_map[offset]);
+ unlock_cluster_or_swap_info(p, ci);
+ }
+ return count;
+}
+
+/*
+ * How many references to @entry are currently swapped out?
+ * This does not give an exact answer when swap count is continued,
+ * but does include the high COUNT_CONTINUED flag to allow for that.
+ */
+int __swp_swapcount(swp_entry_t entry)
+{
+ int count = 0;
+ pgoff_t offset;
+ struct swap_info_struct *si;
+ struct swap_cluster_info *ci;
+
+ si = __swap_info_get(entry);
+ if (si) {
+ offset = swp_offset(entry);
+ ci = lock_cluster_or_swap_info(si, offset);
+ count = swap_count(si->swap_map[offset]);
+ unlock_cluster_or_swap_info(si, ci);
}
return count;
}
@@ -889,22 +1144,26 @@ int swp_swapcount(swp_entry_t entry)
{
int count, tmp_count, n;
struct swap_info_struct *p;
+ struct swap_cluster_info *ci;
struct page *page;
pgoff_t offset;
unsigned char *map;
- p = swap_info_get(entry);
+ p = _swap_info_get(entry);
if (!p)
return 0;
- count = swap_count(p->swap_map[swp_offset(entry)]);
+ offset = swp_offset(entry);
+
+ ci = lock_cluster_or_swap_info(p, offset);
+
+ count = swap_count(p->swap_map[offset]);
if (!(count & COUNT_CONTINUED))
goto out;
count &= ~COUNT_CONTINUED;
n = SWAP_MAP_MAX + 1;
- offset = swp_offset(entry);
page = vmalloc_to_page(p->swap_map + offset);
offset &= ~PAGE_MASK;
VM_BUG_ON(page_private(page) != SWP_CONTINUED);
@@ -919,7 +1178,7 @@ int swp_swapcount(swp_entry_t entry)
n *= (SWAP_CONT_MAX + 1);
} while (tmp_count & COUNT_CONTINUED);
out:
- spin_unlock(&p->lock);
+ unlock_cluster_or_swap_info(p, ci);
return count;
}
@@ -1011,21 +1270,23 @@ int free_swap_and_cache(swp_entry_t entry)
{
struct swap_info_struct *p;
struct page *page = NULL;
+ unsigned char count;
if (non_swap_entry(entry))
return 1;
- p = swap_info_get(entry);
+ p = _swap_info_get(entry);
if (p) {
- if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
+ count = __swap_entry_free(p, entry, 1);
+ if (count == SWAP_HAS_CACHE) {
page = find_get_page(swap_address_space(entry),
swp_offset(entry));
if (page && !trylock_page(page)) {
put_page(page);
page = NULL;
}
- }
- spin_unlock(&p->lock);
+ } else if (!count)
+ free_swap_slot(entry);
}
if (page) {
/*
@@ -1853,6 +2114,17 @@ static void reinsert_swap_info(struct swap_info_struct *p)
spin_unlock(&swap_lock);
}
+bool has_usable_swap(void)
+{
+ bool ret = true;
+
+ spin_lock(&swap_lock);
+ if (plist_head_empty(&swap_active_head))
+ ret = false;
+ spin_unlock(&swap_lock);
+ return ret;
+}
+
SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
{
struct swap_info_struct *p = NULL;
@@ -1923,6 +2195,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
+ disable_swap_slots_cache_lock();
+
set_current_oom_origin();
err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
clear_current_oom_origin();
@@ -1930,9 +2204,12 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
if (err) {
/* re-insert swap space back into swap_list */
reinsert_swap_info(p);
+ reenable_swap_slots_cache_unlock();
goto out_dput;
}
+ reenable_swap_slots_cache_unlock();
+
flush_work(&p->discard_work);
destroy_swap_extents(p);
@@ -1975,6 +2252,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
vfree(frontswap_map);
/* Destroy swap account information */
swap_cgroup_swapoff(p->type);
+ exit_swap_address_space(p->type);
inode = mapping->host;
if (S_ISBLK(inode->i_mode)) {
@@ -2298,6 +2576,13 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
return maxpages;
}
+#define SWAP_CLUSTER_INFO_COLS \
+ DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
+#define SWAP_CLUSTER_SPACE_COLS \
+ DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
+#define SWAP_CLUSTER_COLS \
+ max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
+
static int setup_swap_map_and_extents(struct swap_info_struct *p,
union swap_header *swap_header,
unsigned char *swap_map,
@@ -2305,11 +2590,12 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
unsigned long maxpages,
sector_t *span)
{
- int i;
+ unsigned int j, k;
unsigned int nr_good_pages;
int nr_extents;
unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
- unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER;
+ unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS;
+ unsigned long i, idx;
nr_good_pages = maxpages - 1; /* omit header page */
@@ -2357,15 +2643,23 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
if (!cluster_info)
return nr_extents;
- for (i = 0; i < nr_clusters; i++) {
- if (!cluster_count(&cluster_info[idx])) {
+
+ /*
+ * Reduce false cache line sharing between cluster_info and
+ * sharing same address space.
+ */
+ for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
+ j = (k + col) % SWAP_CLUSTER_COLS;
+ for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
+ idx = i * SWAP_CLUSTER_COLS + j;
+ if (idx >= nr_clusters)
+ continue;
+ if (cluster_count(&cluster_info[idx]))
+ continue;
cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
cluster_list_add_tail(&p->free_clusters, cluster_info,
idx);
}
- idx++;
- if (idx == nr_clusters)
- idx = 0;
}
return nr_extents;
}
@@ -2468,6 +2762,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
int cpu;
+ unsigned long ci, nr_cluster;
p->flags |= SWP_SOLIDSTATE;
/*
@@ -2475,13 +2770,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
* SSD
*/
p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
+ nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
- cluster_info = vzalloc(DIV_ROUND_UP(maxpages,
- SWAPFILE_CLUSTER) * sizeof(*cluster_info));
+ cluster_info = vzalloc(nr_cluster * sizeof(*cluster_info));
if (!cluster_info) {
error = -ENOMEM;
goto bad_swap;
}
+
+ for (ci = 0; ci < nr_cluster; ci++)
+ spin_lock_init(&((cluster_info + ci)->lock));
+
p->percpu_cluster = alloc_percpu(struct percpu_cluster);
if (!p->percpu_cluster) {
error = -ENOMEM;
@@ -2538,6 +2837,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
}
+ error = init_swap_address_space(p->type, maxpages);
+ if (error)
+ goto bad_swap;
+
mutex_lock(&swapon_mutex);
prio = -1;
if (swap_flags & SWAP_FLAG_PREFER)
@@ -2593,6 +2896,8 @@ out:
putname(name);
if (inode && S_ISREG(inode->i_mode))
inode_unlock(inode);
+ if (!error)
+ enable_swap_slots_cache();
return error;
}
@@ -2627,6 +2932,7 @@ void si_swapinfo(struct sysinfo *val)
static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
{
struct swap_info_struct *p;
+ struct swap_cluster_info *ci;
unsigned long offset, type;
unsigned char count;
unsigned char has_cache;
@@ -2640,10 +2946,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
goto bad_file;
p = swap_info[type];
offset = swp_offset(entry);
-
- spin_lock(&p->lock);
if (unlikely(offset >= p->max))
- goto unlock_out;
+ goto out;
+
+ ci = lock_cluster_or_swap_info(p, offset);
count = p->swap_map[offset];
@@ -2686,7 +2992,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
p->swap_map[offset] = count | has_cache;
unlock_out:
- spin_unlock(&p->lock);
+ unlock_cluster_or_swap_info(p, ci);
out:
return err;
@@ -2775,6 +3081,7 @@ EXPORT_SYMBOL_GPL(__page_file_index);
int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
{
struct swap_info_struct *si;
+ struct swap_cluster_info *ci;
struct page *head;
struct page *page;
struct page *list_page;
@@ -2798,6 +3105,9 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
}
offset = swp_offset(entry);
+
+ ci = lock_cluster(si, offset);
+
count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
@@ -2810,6 +3120,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
}
if (!page) {
+ unlock_cluster(ci);
spin_unlock(&si->lock);
return -ENOMEM;
}
@@ -2858,6 +3169,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
list_add_tail(&page->lru, &head->lru);
page = NULL; /* now it's attached, don't free it */
out:
+ unlock_cluster(ci);
spin_unlock(&si->lock);
outer:
if (page)
@@ -2871,7 +3183,8 @@ outer:
* into, carry if so, or else fail until a new continuation page is allocated;
* when the original swap_map count is decremented from 0 with continuation,
* borrow from the continuation and report whether it still holds more.
- * Called while __swap_duplicate() or swap_entry_free() holds swap_lock.
+ * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster
+ * lock.
*/
static bool swap_count_continued(struct swap_info_struct *si,
pgoff_t offset, unsigned char count)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index af817e5060fb..cf4345603490 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -14,6 +14,9 @@
#include <linux/swapops.h>
#include <linux/userfaultfd_k.h>
#include <linux/mmu_notifier.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/shmem_fs.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -139,6 +142,200 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
return pmd;
}
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * __mcopy_atomic processing for HUGETLB vmas. Note that this routine is
+ * called with mmap_sem held, it will release mmap_sem before returning.
+ */
+static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_start,
+ unsigned long src_start,
+ unsigned long len,
+ bool zeropage)
+{
+ ssize_t err;
+ pte_t *dst_pte;
+ unsigned long src_addr, dst_addr;
+ long copied;
+ struct page *page;
+ struct hstate *h;
+ unsigned long vma_hpagesize;
+ pgoff_t idx;
+ u32 hash;
+ struct address_space *mapping;
+
+ /*
+ * There is no default zero huge page for all huge page sizes as
+ * supported by hugetlb. A PMD_SIZE huge pages may exist as used
+ * by THP. Since we can not reliably insert a zero page, this
+ * feature is not supported.
+ */
+ if (zeropage) {
+ up_read(&dst_mm->mmap_sem);
+ return -EINVAL;
+ }
+
+ src_addr = src_start;
+ dst_addr = dst_start;
+ copied = 0;
+ page = NULL;
+ vma_hpagesize = vma_kernel_pagesize(dst_vma);
+
+ /*
+ * Validate alignment based on huge page size
+ */
+ err = -EINVAL;
+ if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
+ goto out_unlock;
+
+retry:
+ /*
+ * On routine entry dst_vma is set. If we had to drop mmap_sem and
+ * retry, dst_vma will be set to NULL and we must lookup again.
+ */
+ if (!dst_vma) {
+ err = -ENOENT;
+ dst_vma = find_vma(dst_mm, dst_start);
+ if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
+ goto out_unlock;
+ /*
+ * Only allow __mcopy_atomic_hugetlb on userfaultfd
+ * registered ranges.
+ */
+ if (!dst_vma->vm_userfaultfd_ctx.ctx)
+ goto out_unlock;
+
+ if (dst_start < dst_vma->vm_start ||
+ dst_start + len > dst_vma->vm_end)
+ goto out_unlock;
+
+ err = -EINVAL;
+ if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
+ goto out_unlock;
+
+ /*
+ * Make sure the vma is not shared, that the remaining dst
+ * range is both valid and fully within a single existing vma.
+ */
+ if (dst_vma->vm_flags & VM_SHARED)
+ goto out_unlock;
+ }
+
+ if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
+ (len - copied) & (vma_hpagesize - 1)))
+ goto out_unlock;
+
+ /*
+ * Ensure the dst_vma has a anon_vma.
+ */
+ err = -ENOMEM;
+ if (unlikely(anon_vma_prepare(dst_vma)))
+ goto out_unlock;
+
+ h = hstate_vma(dst_vma);
+
+ while (src_addr < src_start + len) {
+ pte_t dst_pteval;
+
+ BUG_ON(dst_addr >= dst_start + len);
+ VM_BUG_ON(dst_addr & ~huge_page_mask(h));
+
+ /*
+ * Serialize via hugetlb_fault_mutex
+ */
+ idx = linear_page_index(dst_vma, dst_addr);
+ mapping = dst_vma->vm_file->f_mapping;
+ hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping,
+ idx, dst_addr);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
+ err = -ENOMEM;
+ dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h));
+ if (!dst_pte) {
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ goto out_unlock;
+ }
+
+ err = -EEXIST;
+ dst_pteval = huge_ptep_get(dst_pte);
+ if (!huge_pte_none(dst_pteval)) {
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ goto out_unlock;
+ }
+
+ err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
+ dst_addr, src_addr, &page);
+
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+
+ cond_resched();
+
+ if (unlikely(err == -EFAULT)) {
+ up_read(&dst_mm->mmap_sem);
+ BUG_ON(!page);
+
+ err = copy_huge_page_from_user(page,
+ (const void __user *)src_addr,
+ pages_per_huge_page(h), true);
+ if (unlikely(err)) {
+ err = -EFAULT;
+ goto out;
+ }
+ down_read(&dst_mm->mmap_sem);
+
+ dst_vma = NULL;
+ goto retry;
+ } else
+ BUG_ON(page);
+
+ if (!err) {
+ dst_addr += vma_hpagesize;
+ src_addr += vma_hpagesize;
+ copied += vma_hpagesize;
+
+ if (fatal_signal_pending(current))
+ err = -EINTR;
+ }
+ if (err)
+ break;
+ }
+
+out_unlock:
+ up_read(&dst_mm->mmap_sem);
+out:
+ if (page) {
+ /*
+ * We encountered an error and are about to free a newly
+ * allocated huge page. It is possible that there was a
+ * reservation associated with the page that has been
+ * consumed. See the routine restore_reserve_on_error
+ * for details. Unfortunately, we can not call
+ * restore_reserve_on_error now as it would require holding
+ * mmap_sem. Clear the PagePrivate flag so that the global
+ * reserve count will not be incremented in free_huge_page.
+ * The reservation map will still indicate the reservation
+ * was consumed and possibly prevent later page allocation.
+ * This is better than leaking a global reservation.
+ */
+ ClearPagePrivate(page);
+ put_page(page);
+ }
+ BUG_ON(copied < 0);
+ BUG_ON(err > 0);
+ BUG_ON(!copied && !err);
+ return copied ? copied : err;
+}
+#else /* !CONFIG_HUGETLB_PAGE */
+/* fail at build time if gcc attempts to use this */
+extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_start,
+ unsigned long src_start,
+ unsigned long len,
+ bool zeropage);
+#endif /* CONFIG_HUGETLB_PAGE */
+
static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
unsigned long dst_start,
unsigned long src_start,
@@ -173,14 +370,10 @@ retry:
* Make sure the vma is not shared, that the dst range is
* both valid and fully within a single existing vma.
*/
- err = -EINVAL;
+ err = -ENOENT;
dst_vma = find_vma(dst_mm, dst_start);
- if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
+ if (!dst_vma)
goto out_unlock;
- if (dst_start < dst_vma->vm_start ||
- dst_start + len > dst_vma->vm_end)
- goto out_unlock;
-
/*
* Be strict and only allow __mcopy_atomic on userfaultfd
* registered ranges to prevent userland errors going
@@ -193,11 +386,22 @@ retry:
if (!dst_vma->vm_userfaultfd_ctx.ctx)
goto out_unlock;
+ if (dst_start < dst_vma->vm_start ||
+ dst_start + len > dst_vma->vm_end)
+ goto out_unlock;
+
+ err = -EINVAL;
+ if (!vma_is_shmem(dst_vma) && dst_vma->vm_flags & VM_SHARED)
+ goto out_unlock;
+
/*
- * FIXME: only allow copying on anonymous vmas, tmpfs should
- * be added.
+ * If this is a HUGETLB vma, pass off to appropriate routine
*/
- if (dst_vma->vm_ops)
+ if (is_vm_hugetlb_page(dst_vma))
+ return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
+ src_start, len, zeropage);
+
+ if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
goto out_unlock;
/*
@@ -206,7 +410,7 @@ retry:
* dst_vma.
*/
err = -ENOMEM;
- if (unlikely(anon_vma_prepare(dst_vma)))
+ if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma)))
goto out_unlock;
while (src_addr < src_start + len) {
@@ -243,12 +447,21 @@ retry:
BUG_ON(pmd_none(*dst_pmd));
BUG_ON(pmd_trans_huge(*dst_pmd));
- if (!zeropage)
- err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
- dst_addr, src_addr, &page);
- else
- err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma,
- dst_addr);
+ if (vma_is_anonymous(dst_vma)) {
+ if (!zeropage)
+ err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
+ dst_addr, src_addr,
+ &page);
+ else
+ err = mfill_zeropage_pte(dst_mm, dst_pmd,
+ dst_vma, dst_addr);
+ } else {
+ err = -EINVAL; /* if zeropage is true return -EINVAL */
+ if (likely(!zeropage))
+ err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
+ dst_vma, dst_addr,
+ src_addr, &page);
+ }
cond_resched();
diff --git a/mm/util.c b/mm/util.c
index e91250dabf49..69bf18206e08 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -11,6 +11,7 @@
#include <linux/mman.h>
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
+#include <linux/userfaultfd_k.h>
#include <asm/sections.h>
#include <linux/uaccess.h>
@@ -297,14 +298,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long ret;
struct mm_struct *mm = current->mm;
unsigned long populate;
+ LIST_HEAD(uf);
ret = security_mmap_file(file, prot, flag);
if (!ret) {
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
- &populate);
+ &populate, &uf);
up_write(&mm->mmap_sem);
+ userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(ret, populate);
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 3ca82d44edd3..011b446f8758 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1642,6 +1642,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
+ if (fatal_signal_pending(current)) {
+ area->nr_pages = i;
+ goto fail;
+ }
+
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask);
else
@@ -1662,7 +1667,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
return area->addr;
fail:
- warn_alloc(gfp_mask,
+ warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
(area->nr_pages*PAGE_SIZE), area->size);
vfree(area->addr);
@@ -1724,7 +1729,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return addr;
fail:
- warn_alloc(gfp_mask,
+ warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure: %lu bytes", real_size);
return NULL;
}
@@ -2309,7 +2314,7 @@ EXPORT_SYMBOL_GPL(free_vm_area);
#ifdef CONFIG_SMP
static struct vmap_area *node_to_va(struct rb_node *n)
{
- return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
+ return rb_entry_safe(n, struct vmap_area, rb_node);
}
/**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 532a2a750952..26c3b405ef34 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -87,6 +87,7 @@ struct scan_control {
/* The highest zone to isolate pages for reclaim from */
enum zone_type reclaim_idx;
+ /* Writepage batching in laptop mode; RECLAIM_WRITE */
unsigned int may_writepage:1;
/* Can mapped pages be reclaimed? */
@@ -234,22 +235,39 @@ bool pgdat_reclaimable(struct pglist_data *pgdat)
pgdat_reclaimable_pages(pgdat) * 6;
}
-unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
+/**
+ * lruvec_lru_size - Returns the number of pages on the given LRU list.
+ * @lruvec: lru vector
+ * @lru: lru to use
+ * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list)
+ */
+unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
{
+ unsigned long lru_size;
+ int zid;
+
if (!mem_cgroup_disabled())
- return mem_cgroup_get_lru_size(lruvec, lru);
+ lru_size = mem_cgroup_get_lru_size(lruvec, lru);
+ else
+ lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
- return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
-}
+ for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
+ struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid];
+ unsigned long size;
-unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru,
- int zone_idx)
-{
- if (!mem_cgroup_disabled())
- return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx);
+ if (!managed_zone(zone))
+ continue;
+
+ if (!mem_cgroup_disabled())
+ size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
+ else
+ size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid],
+ NR_ZONE_LRU_BASE + lru);
+ lru_size -= min(size, lru_size);
+ }
+
+ return lru_size;
- return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx],
- NR_ZONE_LRU_BASE + lru);
}
/*
@@ -912,6 +930,17 @@ static void page_check_dirty_writeback(struct page *page,
mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
}
+struct reclaim_stat {
+ unsigned nr_dirty;
+ unsigned nr_unqueued_dirty;
+ unsigned nr_congested;
+ unsigned nr_writeback;
+ unsigned nr_immediate;
+ unsigned nr_activate;
+ unsigned nr_ref_keep;
+ unsigned nr_unmap_fail;
+};
+
/*
* shrink_page_list() returns the number of reclaimed pages
*/
@@ -919,22 +948,20 @@ static unsigned long shrink_page_list(struct list_head *page_list,
struct pglist_data *pgdat,
struct scan_control *sc,
enum ttu_flags ttu_flags,
- unsigned long *ret_nr_dirty,
- unsigned long *ret_nr_unqueued_dirty,
- unsigned long *ret_nr_congested,
- unsigned long *ret_nr_writeback,
- unsigned long *ret_nr_immediate,
+ struct reclaim_stat *stat,
bool force_reclaim)
{
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
int pgactivate = 0;
- unsigned long nr_unqueued_dirty = 0;
- unsigned long nr_dirty = 0;
- unsigned long nr_congested = 0;
- unsigned long nr_reclaimed = 0;
- unsigned long nr_writeback = 0;
- unsigned long nr_immediate = 0;
+ unsigned nr_unqueued_dirty = 0;
+ unsigned nr_dirty = 0;
+ unsigned nr_congested = 0;
+ unsigned nr_reclaimed = 0;
+ unsigned nr_writeback = 0;
+ unsigned nr_immediate = 0;
+ unsigned nr_ref_keep = 0;
+ unsigned nr_unmap_fail = 0;
cond_resched();
@@ -1029,6 +1056,15 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* throttling so we could easily OOM just because too many
* pages are in writeback and there is nothing else to
* reclaim. Wait for the writeback to complete.
+ *
+ * In cases 1) and 2) we activate the pages to get them out of
+ * the way while we continue scanning for clean pages on the
+ * inactive list and refilling from the active list. The
+ * observation here is that waiting for disk writes is more
+ * expensive than potentially causing reloads down the line.
+ * Since they're marked for immediate reclaim, they won't put
+ * memory pressure on the cache working set any longer than it
+ * takes to write them to disk.
*/
if (PageWriteback(page)) {
/* Case 1 above */
@@ -1036,7 +1072,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
PageReclaim(page) &&
test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
nr_immediate++;
- goto keep_locked;
+ goto activate_locked;
/* Case 2 above */
} else if (sane_reclaim(sc) ||
@@ -1054,7 +1090,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
SetPageReclaim(page);
nr_writeback++;
- goto keep_locked;
+ goto activate_locked;
/* Case 3 above */
} else {
@@ -1073,6 +1109,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
case PAGEREF_ACTIVATE:
goto activate_locked;
case PAGEREF_KEEP:
+ nr_ref_keep++;
goto keep_locked;
case PAGEREF_RECLAIM:
case PAGEREF_RECLAIM_CLEAN:
@@ -1110,6 +1147,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
(ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) :
(ttu_flags | TTU_BATCH_FLUSH))) {
case SWAP_FAIL:
+ nr_unmap_fail++;
goto activate_locked;
case SWAP_AGAIN:
goto keep_locked;
@@ -1124,13 +1162,18 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (PageDirty(page)) {
/*
- * Only kswapd can writeback filesystem pages to
- * avoid risk of stack overflow but only writeback
- * if many dirty pages have been encountered.
+ * Only kswapd can writeback filesystem pages
+ * to avoid risk of stack overflow. But avoid
+ * injecting inefficient single-page IO into
+ * flusher writeback as much as possible: only
+ * write pages when we've encountered many
+ * dirty pages, and when we've already scanned
+ * the rest of the LRU for clean pages and see
+ * the same dirty pages again (PageReclaim).
*/
if (page_is_file_cache(page) &&
- (!current_is_kswapd() ||
- !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
+ (!current_is_kswapd() || !PageReclaim(page) ||
+ !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
/*
* Immediately reclaim when written back.
* Similar in principal to deactivate_page()
@@ -1140,7 +1183,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
inc_node_page_state(page, NR_VMSCAN_IMMEDIATE);
SetPageReclaim(page);
- goto keep_locked;
+ goto activate_locked;
}
if (references == PAGEREF_RECLAIM_CLEAN)
@@ -1276,11 +1319,16 @@ keep:
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
- *ret_nr_dirty += nr_dirty;
- *ret_nr_congested += nr_congested;
- *ret_nr_unqueued_dirty += nr_unqueued_dirty;
- *ret_nr_writeback += nr_writeback;
- *ret_nr_immediate += nr_immediate;
+ if (stat) {
+ stat->nr_dirty = nr_dirty;
+ stat->nr_congested = nr_congested;
+ stat->nr_unqueued_dirty = nr_unqueued_dirty;
+ stat->nr_writeback = nr_writeback;
+ stat->nr_immediate = nr_immediate;
+ stat->nr_activate = pgactivate;
+ stat->nr_ref_keep = nr_ref_keep;
+ stat->nr_unmap_fail = nr_unmap_fail;
+ }
return nr_reclaimed;
}
@@ -1292,7 +1340,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
.priority = DEF_PRIORITY,
.may_unmap = 1,
};
- unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
+ unsigned long ret;
struct page *page, *next;
LIST_HEAD(clean_pages);
@@ -1305,8 +1353,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
}
ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
- TTU_UNMAP|TTU_IGNORE_ACCESS,
- &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
+ TTU_UNMAP|TTU_IGNORE_ACCESS, NULL, true);
list_splice(&clean_pages, page_list);
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret);
return ret;
@@ -1341,13 +1388,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
* wants to isolate pages it will be able to operate on without
* blocking - clean pages for the most part.
*
- * ISOLATE_CLEAN means that only clean pages should be isolated. This
- * is used by reclaim when it is cannot write to backing storage
- *
* ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
* that it is possible to migrate without blocking
*/
- if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+ if (mode & ISOLATE_ASYNC_MIGRATE) {
/* All the caller can do on PageWriteback is block */
if (PageWriteback(page))
return ret;
@@ -1355,10 +1399,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
if (PageDirty(page)) {
struct address_space *mapping;
- /* ISOLATE_CLEAN means only clean pages */
- if (mode & ISOLATE_CLEAN)
- return ret;
-
/*
* Only pages without mappings or that have a
* ->migratepage callback are possible to migrate
@@ -1437,6 +1477,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
unsigned long nr_taken = 0;
unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
+ unsigned long skipped = 0, total_skipped = 0;
unsigned long scan, nr_pages;
LIST_HEAD(pages_skipped);
@@ -1488,14 +1529,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
*/
if (!list_empty(&pages_skipped)) {
int zid;
- unsigned long total_skipped = 0;
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
if (!nr_skipped[zid])
continue;
__count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]);
- total_skipped += nr_skipped[zid];
+ skipped += nr_skipped[zid];
}
/*
@@ -1503,13 +1543,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
* close to unreclaimable. If the LRU list is empty, account
* skipped pages as a full scan.
*/
- scan += list_empty(src) ? total_skipped : total_skipped >> 2;
+ total_skipped = list_empty(src) ? skipped : skipped >> 2;
list_splice(&pages_skipped, src);
}
- *nr_scanned = scan;
- trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
- nr_taken, mode, is_file_lru(lru));
+ *nr_scanned = scan + total_skipped;
+ trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
+ scan, skipped, nr_taken, mode, lru);
update_lru_sizes(lruvec, lru, nr_zone_taken);
return nr_taken;
}
@@ -1669,30 +1709,6 @@ static int current_may_throttle(void)
bdi_write_congested(current->backing_dev_info);
}
-static bool inactive_reclaimable_pages(struct lruvec *lruvec,
- struct scan_control *sc, enum lru_list lru)
-{
- int zid;
- struct zone *zone;
- int file = is_file_lru(lru);
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-
- if (!global_reclaim(sc))
- return true;
-
- for (zid = sc->reclaim_idx; zid >= 0; zid--) {
- zone = &pgdat->node_zones[zid];
- if (!managed_zone(zone))
- continue;
-
- if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE +
- LRU_FILE * file) >= SWAP_CLUSTER_MAX)
- return true;
- }
-
- return false;
-}
-
/*
* shrink_inactive_list() is a helper for shrink_node(). It returns the number
* of reclaimed pages
@@ -1705,19 +1721,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
unsigned long nr_scanned;
unsigned long nr_reclaimed = 0;
unsigned long nr_taken;
- unsigned long nr_dirty = 0;
- unsigned long nr_congested = 0;
- unsigned long nr_unqueued_dirty = 0;
- unsigned long nr_writeback = 0;
- unsigned long nr_immediate = 0;
+ struct reclaim_stat stat = {};
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
- if (!inactive_reclaimable_pages(lruvec, sc, lru))
- return 0;
-
while (unlikely(too_many_isolated(pgdat, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1730,8 +1739,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
if (!sc->may_unmap)
isolate_mode |= ISOLATE_UNMAPPED;
- if (!sc->may_writepage)
- isolate_mode |= ISOLATE_CLEAN;
spin_lock_irq(&pgdat->lru_lock);
@@ -1754,9 +1761,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
return 0;
nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, TTU_UNMAP,
- &nr_dirty, &nr_unqueued_dirty, &nr_congested,
- &nr_writeback, &nr_immediate,
- false);
+ &stat, false);
spin_lock_irq(&pgdat->lru_lock);
@@ -1790,7 +1795,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* of pages under pages flagged for immediate reclaim and stall if any
* are encountered in the nr_immediate check below.
*/
- if (nr_writeback && nr_writeback == nr_taken)
+ if (stat.nr_writeback && stat.nr_writeback == nr_taken)
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/*
@@ -1802,17 +1807,25 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* Tag a zone as congested if all the dirty pages scanned were
* backed by a congested BDI and wait_iff_congested will stall.
*/
- if (nr_dirty && nr_dirty == nr_congested)
+ if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
set_bit(PGDAT_CONGESTED, &pgdat->flags);
/*
* If dirty pages are scanned that are not queued for IO, it
- * implies that flushers are not keeping up. In this case, flag
- * the pgdat PGDAT_DIRTY and kswapd will start writing pages from
- * reclaim context.
+ * implies that flushers are not doing their job. This can
+ * happen when memory pressure pushes dirty pages to the end of
+ * the LRU before the dirty limits are breached and the dirty
+ * data has expired. It can also happen when the proportion of
+ * dirty pages grows not through writes but through memory
+ * pressure reclaiming all the clean cache. And in some cases,
+ * the flushers simply cannot keep up with the allocation
+ * rate. Nudge the flusher threads in case they are asleep, but
+ * also allow kswapd to start writing pages during reclaim.
*/
- if (nr_unqueued_dirty == nr_taken)
+ if (stat.nr_unqueued_dirty == nr_taken) {
+ wakeup_flusher_threads(0, WB_REASON_VMSCAN);
set_bit(PGDAT_DIRTY, &pgdat->flags);
+ }
/*
* If kswapd scans pages marked marked for immediate
@@ -1820,7 +1833,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* that pages are cycling through the LRU faster than
* they are written so also forcibly stall.
*/
- if (nr_immediate && current_may_throttle())
+ if (stat.nr_immediate && current_may_throttle())
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
@@ -1835,6 +1848,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
nr_scanned, nr_reclaimed,
+ stat.nr_dirty, stat.nr_writeback,
+ stat.nr_congested, stat.nr_immediate,
+ stat.nr_activate, stat.nr_ref_keep,
+ stat.nr_unmap_fail,
sc->priority, file);
return nr_reclaimed;
}
@@ -1855,17 +1872,19 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
*
* The downside is that we have to touch page->_refcount against each page.
* But we had to alter page->flags anyway.
+ *
+ * Returns the number of pages moved to the given lru.
*/
-static void move_active_pages_to_lru(struct lruvec *lruvec,
+static unsigned move_active_pages_to_lru(struct lruvec *lruvec,
struct list_head *list,
struct list_head *pages_to_free,
enum lru_list lru)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- unsigned long pgmoved = 0;
struct page *page;
int nr_pages;
+ int nr_moved = 0;
while (!list_empty(list)) {
page = lru_to_page(list);
@@ -1877,7 +1896,6 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
nr_pages = hpage_nr_pages(page);
update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
list_move(&page->lru, &lruvec->lists[lru]);
- pgmoved += nr_pages;
if (put_page_testzero(page)) {
__ClearPageLRU(page);
@@ -1891,11 +1909,15 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
spin_lock_irq(&pgdat->lru_lock);
} else
list_add(&page->lru, pages_to_free);
+ } else {
+ nr_moved += nr_pages;
}
}
if (!is_active_lru(lru))
- __count_vm_events(PGDEACTIVATE, pgmoved);
+ __count_vm_events(PGDEACTIVATE, nr_moved);
+
+ return nr_moved;
}
static void shrink_active_list(unsigned long nr_to_scan,
@@ -1911,7 +1933,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
LIST_HEAD(l_inactive);
struct page *page;
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
- unsigned long nr_rotated = 0;
+ unsigned nr_deactivate, nr_activate;
+ unsigned nr_rotated = 0;
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -1920,8 +1943,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
if (!sc->may_unmap)
isolate_mode |= ISOLATE_UNMAPPED;
- if (!sc->may_writepage)
- isolate_mode |= ISOLATE_CLEAN;
spin_lock_irq(&pgdat->lru_lock);
@@ -1989,13 +2010,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
- move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
- move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
+ nr_activate = move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
+ nr_deactivate = move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&pgdat->lru_lock);
mem_cgroup_uncharge_list(&l_hold);
free_hot_cold_page_list(&l_hold, true);
+ trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
+ nr_deactivate, nr_rotated, sc->priority, file);
}
/*
@@ -2025,14 +2048,13 @@ static void shrink_active_list(unsigned long nr_to_scan,
* 10TB 320 32GB
*/
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
- struct scan_control *sc)
+ struct scan_control *sc, bool trace)
{
unsigned long inactive_ratio;
- unsigned long inactive;
- unsigned long active;
+ unsigned long inactive, active;
+ enum lru_list inactive_lru = file * LRU_FILE;
+ enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
unsigned long gb;
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- int zid;
/*
* If we don't have swap space, anonymous page deactivation
@@ -2041,27 +2063,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
if (!file && !total_swap_pages)
return false;
- inactive = lruvec_lru_size(lruvec, file * LRU_FILE);
- active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE);
-
- /*
- * For zone-constrained allocations, it is necessary to check if
- * deactivations are required for lowmem to be reclaimed. This
- * calculates the inactive/active pages available in eligible zones.
- */
- for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) {
- struct zone *zone = &pgdat->node_zones[zid];
- unsigned long inactive_zone, active_zone;
-
- if (!managed_zone(zone))
- continue;
-
- inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid);
- active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid);
-
- inactive -= min(inactive, inactive_zone);
- active -= min(active, active_zone);
- }
+ inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
+ active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
@@ -2069,6 +2072,13 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
else
inactive_ratio = 1;
+ if (trace)
+ trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id,
+ sc->reclaim_idx,
+ lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
+ lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
+ inactive_ratio, file);
+
return inactive * inactive_ratio < active;
}
@@ -2076,7 +2086,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru), sc))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
@@ -2207,8 +2217,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true, sc) &&
- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
+ if (!inactive_list_is_low(lruvec, true, sc, false) &&
+ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -2234,10 +2244,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* anon in [0], file in [1]
*/
- anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) +
- lruvec_lru_size(lruvec, LRU_INACTIVE_ANON);
- file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) +
- lruvec_lru_size(lruvec, LRU_INACTIVE_FILE);
+ anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
+ lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
+ file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) +
+ lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
spin_lock_irq(&pgdat->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
@@ -2275,7 +2285,7 @@ out:
unsigned long size;
unsigned long scan;
- size = lruvec_lru_size(lruvec, lru);
+ size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
scan = size >> sc->priority;
if (!scan && pass && force_scan)
@@ -2432,7 +2442,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false, sc))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
}
@@ -2593,16 +2603,23 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
sc->nr_scanned - nr_scanned,
node_lru_pages);
+ /*
+ * Record the subtree's reclaim efficiency. The reclaimed
+ * pages from slab is excluded here because the corresponding
+ * scanned pages is not accounted. Moreover, freeing a page
+ * by slab shrinking depends on each slab's object population,
+ * making the cost model (i.e. scan:free) different from that
+ * of LRU.
+ */
+ vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+ sc->nr_scanned - nr_scanned,
+ sc->nr_reclaimed - nr_reclaimed);
+
if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
}
- /* Record the subtree's reclaim efficiency */
- vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
- sc->nr_scanned - nr_scanned,
- sc->nr_reclaimed - nr_reclaimed);
-
if (sc->nr_reclaimed - nr_reclaimed)
reclaimable = true;
@@ -2761,8 +2778,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct scan_control *sc)
{
int initial_priority = sc->priority;
- unsigned long total_scanned = 0;
- unsigned long writeback_threshold;
retry:
delayacct_freepages_start();
@@ -2775,7 +2790,6 @@ retry:
sc->nr_scanned = 0;
shrink_zones(zonelist, sc);
- total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
break;
@@ -2788,20 +2802,6 @@ retry:
*/
if (sc->priority < DEF_PRIORITY - 2)
sc->may_writepage = 1;
-
- /*
- * Try to write back as many pages as we just scanned. This
- * tends to cause slow streaming writers to write data to the
- * disk smoothly, at the dirtying rate, which is nice. But
- * that's undesirable in laptop mode, where we *want* lumpy
- * writeout. So in laptop mode, write out the whole world.
- */
- writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2;
- if (total_scanned > writeback_threshold) {
- wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
- WB_REASON_TRY_TO_FREE_PAGES);
- sc->may_writepage = 1;
- }
} while (--sc->priority >= 0);
delayacct_freepages_end();
@@ -3082,7 +3082,7 @@ static void age_active_anon(struct pglist_data *pgdat,
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, sc))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
@@ -3103,6 +3103,7 @@ static bool zone_balanced(struct zone *zone, int order, int classzone_idx)
*/
clear_bit(PGDAT_CONGESTED, &zone->zone_pgdat->flags);
clear_bit(PGDAT_DIRTY, &zone->zone_pgdat->flags);
+ clear_bit(PGDAT_WRITEBACK, &zone->zone_pgdat->flags);
return true;
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7c28df36f50f..69f9aff39a2e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1038,6 +1038,8 @@ const char * const vmstat_text[] = {
"compact_fail",
"compact_success",
"compact_daemon_wake",
+ "compact_daemon_migrate_scanned",
+ "compact_daemon_free_scanned",
#endif
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/workingset.c b/mm/workingset.c
index 80c913c89f11..c573cb24d1c5 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -267,7 +267,7 @@ bool workingset_refault(void *shadow)
}
lruvec = mem_cgroup_lruvec(pgdat, memcg);
refault = atomic_long_read(&lruvec->inactive_age);
- active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
+ active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
rcu_read_unlock();
/*
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 8f9e89ca1d31..8970a2fd3b1a 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -34,28 +34,61 @@
/*****************
* Structures
*****************/
+struct z3fold_pool;
+struct z3fold_ops {
+ int (*evict)(struct z3fold_pool *pool, unsigned long handle);
+};
+
+enum buddy {
+ HEADLESS = 0,
+ FIRST,
+ MIDDLE,
+ LAST,
+ BUDDIES_MAX
+};
+
+/*
+ * struct z3fold_header - z3fold page metadata occupying the first chunk of each
+ * z3fold page, except for HEADLESS pages
+ * @buddy: links the z3fold page into the relevant list in the pool
+ * @page_lock: per-page lock
+ * @refcount: reference cound for the z3fold page
+ * @first_chunks: the size of the first buddy in chunks, 0 if free
+ * @middle_chunks: the size of the middle buddy in chunks, 0 if free
+ * @last_chunks: the size of the last buddy in chunks, 0 if free
+ * @first_num: the starting number (for the first handle)
+ */
+struct z3fold_header {
+ struct list_head buddy;
+ spinlock_t page_lock;
+ struct kref refcount;
+ unsigned short first_chunks;
+ unsigned short middle_chunks;
+ unsigned short last_chunks;
+ unsigned short start_middle;
+ unsigned short first_num:2;
+};
+
/*
* NCHUNKS_ORDER determines the internal allocation granularity, effectively
* adjusting internal fragmentation. It also determines the number of
* freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
- * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk
- * in allocated page is occupied by z3fold header, NCHUNKS will be calculated
- * to 63 which shows the max number of free chunks in z3fold page, also there
- * will be 63 freelists per pool.
+ * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
+ * in the beginning of an allocated page are occupied by z3fold header, so
+ * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
+ * which shows the max number of free chunks in z3fold page, also there will
+ * be 63, or 62, respectively, freelists per pool.
*/
#define NCHUNKS_ORDER 6
#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
#define CHUNK_SIZE (1 << CHUNK_SHIFT)
-#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
+#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
+#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
+#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
-#define BUDDY_MASK ((1 << NCHUNKS_ORDER) - 1)
-
-struct z3fold_pool;
-struct z3fold_ops {
- int (*evict)(struct z3fold_pool *pool, unsigned long handle);
-};
+#define BUDDY_MASK (0x3)
/**
* struct z3fold_pool - stores metadata for each z3fold pool
@@ -64,8 +97,6 @@ struct z3fold_ops {
* @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
* the lists each z3fold page is added to depends on the size of
* its free region.
- * @buddied: list tracking the z3fold pages that contain 3 buddies;
- * these z3fold pages are full
* @lru: list tracking the z3fold pages in LRU order by most recently
* added buddy.
* @pages_nr: number of z3fold pages in the pool.
@@ -78,49 +109,22 @@ struct z3fold_ops {
struct z3fold_pool {
spinlock_t lock;
struct list_head unbuddied[NCHUNKS];
- struct list_head buddied;
struct list_head lru;
- u64 pages_nr;
+ atomic64_t pages_nr;
const struct z3fold_ops *ops;
struct zpool *zpool;
const struct zpool_ops *zpool_ops;
};
-enum buddy {
- HEADLESS = 0,
- FIRST,
- MIDDLE,
- LAST,
- BUDDIES_MAX
-};
-
-/*
- * struct z3fold_header - z3fold page metadata occupying the first chunk of each
- * z3fold page, except for HEADLESS pages
- * @buddy: links the z3fold page into the relevant list in the pool
- * @first_chunks: the size of the first buddy in chunks, 0 if free
- * @middle_chunks: the size of the middle buddy in chunks, 0 if free
- * @last_chunks: the size of the last buddy in chunks, 0 if free
- * @first_num: the starting number (for the first handle)
- */
-struct z3fold_header {
- struct list_head buddy;
- unsigned short first_chunks;
- unsigned short middle_chunks;
- unsigned short last_chunks;
- unsigned short start_middle;
- unsigned short first_num:NCHUNKS_ORDER;
-};
-
/*
* Internal z3fold page flags
*/
enum z3fold_page_flags {
- UNDER_RECLAIM = 0,
- PAGE_HEADLESS,
+ PAGE_HEADLESS = 0,
MIDDLE_CHUNK_MAPPED,
};
+
/*****************
* Helpers
*****************/
@@ -140,10 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
struct z3fold_header *zhdr = page_address(page);
INIT_LIST_HEAD(&page->lru);
- clear_bit(UNDER_RECLAIM, &page->private);
clear_bit(PAGE_HEADLESS, &page->private);
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
+ spin_lock_init(&zhdr->page_lock);
+ kref_init(&zhdr->refcount);
zhdr->first_chunks = 0;
zhdr->middle_chunks = 0;
zhdr->last_chunks = 0;
@@ -154,9 +159,36 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
}
/* Resets the struct page fields and frees the page */
-static void free_z3fold_page(struct z3fold_header *zhdr)
+static void free_z3fold_page(struct page *page)
+{
+ __free_page(page);
+}
+
+static void release_z3fold_page(struct kref *ref)
+{
+ struct z3fold_header *zhdr;
+ struct page *page;
+
+ zhdr = container_of(ref, struct z3fold_header, refcount);
+ page = virt_to_page(zhdr);
+
+ if (!list_empty(&zhdr->buddy))
+ list_del(&zhdr->buddy);
+ if (!list_empty(&page->lru))
+ list_del(&page->lru);
+ free_z3fold_page(page);
+}
+
+/* Lock a z3fold page */
+static inline void z3fold_page_lock(struct z3fold_header *zhdr)
{
- __free_page(virt_to_page(zhdr));
+ spin_lock(&zhdr->page_lock);
+}
+
+/* Unlock a z3fold page */
+static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
+{
+ spin_unlock(&zhdr->page_lock);
}
/*
@@ -179,7 +211,11 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle)
return (struct z3fold_header *)(handle & PAGE_MASK);
}
-/* Returns buddy number */
+/*
+ * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle
+ * but that doesn't matter. because the masking will result in the
+ * correct buddy number.
+ */
static enum buddy handle_to_buddy(unsigned long handle)
{
struct z3fold_header *zhdr = handle_to_z3fold_header(handle);
@@ -200,9 +236,10 @@ static int num_free_chunks(struct z3fold_header *zhdr)
*/
if (zhdr->middle_chunks != 0) {
int nfree_before = zhdr->first_chunks ?
- 0 : zhdr->start_middle - 1;
+ 0 : zhdr->start_middle - ZHDR_CHUNKS;
int nfree_after = zhdr->last_chunks ?
- 0 : NCHUNKS - zhdr->start_middle - zhdr->middle_chunks;
+ 0 : TOTAL_CHUNKS -
+ (zhdr->start_middle + zhdr->middle_chunks);
nfree = max(nfree_before, nfree_after);
} else
nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
@@ -232,9 +269,8 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
spin_lock_init(&pool->lock);
for_each_unbuddied_list(i, 0)
INIT_LIST_HEAD(&pool->unbuddied[i]);
- INIT_LIST_HEAD(&pool->buddied);
INIT_LIST_HEAD(&pool->lru);
- pool->pages_nr = 0;
+ atomic64_set(&pool->pages_nr, 0);
pool->ops = ops;
return pool;
}
@@ -250,25 +286,58 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
kfree(pool);
}
+static inline void *mchunk_memmove(struct z3fold_header *zhdr,
+ unsigned short dst_chunk)
+{
+ void *beg = zhdr;
+ return memmove(beg + (dst_chunk << CHUNK_SHIFT),
+ beg + (zhdr->start_middle << CHUNK_SHIFT),
+ zhdr->middle_chunks << CHUNK_SHIFT);
+}
+
+#define BIG_CHUNK_GAP 3
/* Has to be called with lock held */
static int z3fold_compact_page(struct z3fold_header *zhdr)
{
struct page *page = virt_to_page(zhdr);
- void *beg = zhdr;
+ if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
+ return 0; /* can't move middle chunk, it's used */
+
+ if (zhdr->middle_chunks == 0)
+ return 0; /* nothing to compact */
- if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
- zhdr->middle_chunks != 0 &&
- zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
- memmove(beg + ZHDR_SIZE_ALIGNED,
- beg + (zhdr->start_middle << CHUNK_SHIFT),
- zhdr->middle_chunks << CHUNK_SHIFT);
+ if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+ /* move to the beginning */
+ mchunk_memmove(zhdr, ZHDR_CHUNKS);
zhdr->first_chunks = zhdr->middle_chunks;
zhdr->middle_chunks = 0;
zhdr->start_middle = 0;
zhdr->first_num++;
return 1;
}
+
+ /*
+ * moving data is expensive, so let's only do that if
+ * there's substantial gain (at least BIG_CHUNK_GAP chunks)
+ */
+ if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
+ zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
+ BIG_CHUNK_GAP) {
+ mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
+ zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
+ return 1;
+ } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
+ TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
+ + zhdr->middle_chunks) >=
+ BIG_CHUNK_GAP) {
+ unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
+ zhdr->middle_chunks;
+ mchunk_memmove(zhdr, new_start);
+ zhdr->start_middle = new_start;
+ return 1;
+ }
+
return 0;
}
@@ -309,50 +378,63 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
bud = HEADLESS;
else {
chunks = size_to_chunks(size);
- spin_lock(&pool->lock);
/* First, try to find an unbuddied z3fold page. */
zhdr = NULL;
for_each_unbuddied_list(i, chunks) {
- if (!list_empty(&pool->unbuddied[i])) {
- zhdr = list_first_entry(&pool->unbuddied[i],
+ spin_lock(&pool->lock);
+ zhdr = list_first_entry_or_null(&pool->unbuddied[i],
struct z3fold_header, buddy);
- page = virt_to_page(zhdr);
- if (zhdr->first_chunks == 0) {
- if (zhdr->middle_chunks != 0 &&
- chunks >= zhdr->start_middle)
- bud = LAST;
- else
- bud = FIRST;
- } else if (zhdr->last_chunks == 0)
+ if (!zhdr) {
+ spin_unlock(&pool->lock);
+ continue;
+ }
+ kref_get(&zhdr->refcount);
+ list_del_init(&zhdr->buddy);
+ spin_unlock(&pool->lock);
+
+ page = virt_to_page(zhdr);
+ z3fold_page_lock(zhdr);
+ if (zhdr->first_chunks == 0) {
+ if (zhdr->middle_chunks != 0 &&
+ chunks >= zhdr->start_middle)
bud = LAST;
- else if (zhdr->middle_chunks == 0)
- bud = MIDDLE;
- else {
- pr_err("No free chunks in unbuddied\n");
- WARN_ON(1);
- continue;
- }
- list_del(&zhdr->buddy);
- goto found;
+ else
+ bud = FIRST;
+ } else if (zhdr->last_chunks == 0)
+ bud = LAST;
+ else if (zhdr->middle_chunks == 0)
+ bud = MIDDLE;
+ else {
+ z3fold_page_unlock(zhdr);
+ spin_lock(&pool->lock);
+ if (kref_put(&zhdr->refcount,
+ release_z3fold_page))
+ atomic64_dec(&pool->pages_nr);
+ spin_unlock(&pool->lock);
+ pr_err("No free chunks in unbuddied\n");
+ WARN_ON(1);
+ continue;
}
+ goto found;
}
bud = FIRST;
- spin_unlock(&pool->lock);
}
/* Couldn't find unbuddied z3fold page, create new one */
page = alloc_page(gfp);
if (!page)
return -ENOMEM;
- spin_lock(&pool->lock);
- pool->pages_nr++;
+
+ atomic64_inc(&pool->pages_nr);
zhdr = init_z3fold_page(page);
if (bud == HEADLESS) {
set_bit(PAGE_HEADLESS, &page->private);
+ spin_lock(&pool->lock);
goto headless;
}
+ z3fold_page_lock(zhdr);
found:
if (bud == FIRST)
@@ -361,17 +443,15 @@ found:
zhdr->last_chunks = chunks;
else {
zhdr->middle_chunks = chunks;
- zhdr->start_middle = zhdr->first_chunks + 1;
+ zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
}
+ spin_lock(&pool->lock);
if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
zhdr->middle_chunks == 0) {
/* Add to unbuddied list */
freechunks = num_free_chunks(zhdr);
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
- } else {
- /* Add to buddied list */
- list_add(&zhdr->buddy, &pool->buddied);
}
headless:
@@ -383,6 +463,8 @@ headless:
*handle = encode_handle(zhdr, bud);
spin_unlock(&pool->lock);
+ if (bud != HEADLESS)
+ z3fold_page_unlock(zhdr);
return 0;
}
@@ -404,7 +486,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
struct page *page;
enum buddy bud;
- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
page = virt_to_page(zhdr);
@@ -412,6 +493,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
/* HEADLESS page stored */
bud = HEADLESS;
} else {
+ z3fold_page_lock(zhdr);
bud = handle_to_buddy(handle);
switch (bud) {
@@ -428,38 +510,36 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
default:
pr_err("%s: unknown bud %d\n", __func__, bud);
WARN_ON(1);
- spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
return;
}
}
- if (test_bit(UNDER_RECLAIM, &page->private)) {
- /* z3fold page is under reclaim, reclaim will free */
- spin_unlock(&pool->lock);
- return;
- }
-
- if (bud != HEADLESS) {
- /* Remove from existing buddy list */
- list_del(&zhdr->buddy);
- }
-
- if (bud == HEADLESS ||
- (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
- zhdr->last_chunks == 0)) {
- /* z3fold page is empty, free */
+ if (bud == HEADLESS) {
+ spin_lock(&pool->lock);
list_del(&page->lru);
- clear_bit(PAGE_HEADLESS, &page->private);
- free_z3fold_page(zhdr);
- pool->pages_nr--;
+ spin_unlock(&pool->lock);
+ free_z3fold_page(page);
+ atomic64_dec(&pool->pages_nr);
} else {
- z3fold_compact_page(zhdr);
- /* Add to the unbuddied list */
- freechunks = num_free_chunks(zhdr);
- list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
+ zhdr->last_chunks != 0) {
+ z3fold_compact_page(zhdr);
+ /* Add to the unbuddied list */
+ spin_lock(&pool->lock);
+ if (!list_empty(&zhdr->buddy))
+ list_del(&zhdr->buddy);
+ freechunks = num_free_chunks(zhdr);
+ list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ spin_unlock(&pool->lock);
+ }
+ z3fold_page_unlock(zhdr);
+ spin_lock(&pool->lock);
+ if (kref_put(&zhdr->refcount, release_z3fold_page))
+ atomic64_dec(&pool->pages_nr);
+ spin_unlock(&pool->lock);
}
- spin_unlock(&pool->lock);
}
/**
@@ -506,20 +586,25 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
spin_lock(&pool->lock);
- if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
- retries == 0) {
+ if (!pool->ops || !pool->ops->evict || retries == 0) {
spin_unlock(&pool->lock);
return -EINVAL;
}
for (i = 0; i < retries; i++) {
+ if (list_empty(&pool->lru)) {
+ spin_unlock(&pool->lock);
+ return -EINVAL;
+ }
page = list_last_entry(&pool->lru, struct page, lru);
- list_del(&page->lru);
+ list_del_init(&page->lru);
- /* Protect z3fold page against free */
- set_bit(UNDER_RECLAIM, &page->private);
zhdr = page_address(page);
if (!test_bit(PAGE_HEADLESS, &page->private)) {
- list_del(&zhdr->buddy);
+ if (!list_empty(&zhdr->buddy))
+ list_del_init(&zhdr->buddy);
+ kref_get(&zhdr->refcount);
+ spin_unlock(&pool->lock);
+ z3fold_page_lock(zhdr);
/*
* We need encode the handles before unlocking, since
* we can race with free that will set
@@ -534,13 +619,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
middle_handle = encode_handle(zhdr, MIDDLE);
if (zhdr->last_chunks)
last_handle = encode_handle(zhdr, LAST);
+ z3fold_page_unlock(zhdr);
} else {
first_handle = encode_handle(zhdr, HEADLESS);
last_handle = middle_handle = 0;
+ spin_unlock(&pool->lock);
}
- spin_unlock(&pool->lock);
-
/* Issue the eviction callback(s) */
if (middle_handle) {
ret = pool->ops->evict(pool, middle_handle);
@@ -558,36 +643,40 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next;
}
next:
- spin_lock(&pool->lock);
- clear_bit(UNDER_RECLAIM, &page->private);
- if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
- (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
- zhdr->middle_chunks == 0)) {
- /*
- * All buddies are now free, free the z3fold page and
- * return success.
- */
- clear_bit(PAGE_HEADLESS, &page->private);
- free_z3fold_page(zhdr);
- pool->pages_nr--;
- spin_unlock(&pool->lock);
- return 0;
- } else if (!test_bit(PAGE_HEADLESS, &page->private)) {
- if (zhdr->first_chunks != 0 &&
- zhdr->last_chunks != 0 &&
- zhdr->middle_chunks != 0) {
- /* Full, add to buddied list */
- list_add(&zhdr->buddy, &pool->buddied);
+ if (test_bit(PAGE_HEADLESS, &page->private)) {
+ if (ret == 0) {
+ free_z3fold_page(page);
+ return 0;
} else {
+ spin_lock(&pool->lock);
+ }
+ } else {
+ z3fold_page_lock(zhdr);
+ if ((zhdr->first_chunks || zhdr->last_chunks ||
+ zhdr->middle_chunks) &&
+ !(zhdr->first_chunks && zhdr->last_chunks &&
+ zhdr->middle_chunks)) {
z3fold_compact_page(zhdr);
/* add to unbuddied list */
+ spin_lock(&pool->lock);
freechunks = num_free_chunks(zhdr);
list_add(&zhdr->buddy,
&pool->unbuddied[freechunks]);
+ spin_unlock(&pool->lock);
+ }
+ z3fold_page_unlock(zhdr);
+ spin_lock(&pool->lock);
+ if (kref_put(&zhdr->refcount, release_z3fold_page)) {
+ atomic64_dec(&pool->pages_nr);
+ return 0;
}
}
- /* add to beginning of LRU */
+ /*
+ * Add to the beginning of LRU.
+ * Pool lock has to be kept here to ensure the page has
+ * not already been released
+ */
list_add(&page->lru, &pool->lru);
}
spin_unlock(&pool->lock);
@@ -611,7 +700,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
void *addr;
enum buddy buddy;
- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
addr = zhdr;
page = virt_to_page(zhdr);
@@ -619,6 +707,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
if (test_bit(PAGE_HEADLESS, &page->private))
goto out;
+ z3fold_page_lock(zhdr);
buddy = handle_to_buddy(handle);
switch (buddy) {
case FIRST:
@@ -637,8 +726,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
addr = NULL;
break;
}
+
+ z3fold_page_unlock(zhdr);
out:
- spin_unlock(&pool->lock);
return addr;
}
@@ -653,31 +743,28 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
struct page *page;
enum buddy buddy;
- spin_lock(&pool->lock);
zhdr = handle_to_z3fold_header(handle);
page = virt_to_page(zhdr);
- if (test_bit(PAGE_HEADLESS, &page->private)) {
- spin_unlock(&pool->lock);
+ if (test_bit(PAGE_HEADLESS, &page->private))
return;
- }
+ z3fold_page_lock(zhdr);
buddy = handle_to_buddy(handle);
if (buddy == MIDDLE)
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
- spin_unlock(&pool->lock);
+ z3fold_page_unlock(zhdr);
}
/**
* z3fold_get_pool_size() - gets the z3fold pool size in pages
* @pool: pool whose size is being queried
*
- * Returns: size in pages of the given pool. The pool lock need not be
- * taken to access pages_nr.
+ * Returns: size in pages of the given pool.
*/
static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
{
- return pool->pages_nr;
+ return atomic64_read(&pool->pages_nr);
}
/*****************
@@ -776,8 +863,8 @@ MODULE_ALIAS("zpool-z3fold");
static int __init init_z3fold(void)
{
- /* Make sure the z3fold header will fit in one chunk */
- BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
+ /* Make sure the z3fold header is not larger than the page size */
+ BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
zpool_register_driver(&z3fold_zpool_driver);
return 0;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 9cc3c0b2c2c1..a1f24989ac23 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -25,7 +25,7 @@
* Usage of struct page flags:
* PG_private: identifies the first component page
* PG_private2: identifies the last component page
- * PG_owner_priv_1: indentifies the huge component page
+ * PG_owner_priv_1: identifies the huge component page
*
*/
@@ -364,7 +364,7 @@ static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
{
return kmem_cache_alloc(pool->zspage_cachep,
flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
-};
+}
static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
{
@@ -2383,7 +2383,7 @@ struct zs_pool *zs_create_pool(const char *name)
goto err;
/*
- * Iterate reversly, because, size of size_class that we want to use
+ * Iterate reversely, because, size of size_class that we want to use
* for merging should be larger or equal to current size.
*/
for (i = zs_size_classes - 1; i >= 0; i--) {
diff --git a/mm/zswap.c b/mm/zswap.c
index cabf09e0128b..eedc27894b10 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -76,6 +76,8 @@ static u64 zswap_duplicate_entry;
* tunables
**********************************/
+#define ZSWAP_PARAM_UNSET ""
+
/* Enable/disable zswap (disabled by default) */
static bool zswap_enabled;
static int zswap_enabled_param_set(const char *,
@@ -185,6 +187,9 @@ static bool zswap_init_started;
/* fatal error during init */
static bool zswap_init_failed;
+/* init completed, but couldn't create the initial pool */
+static bool zswap_has_pool;
+
/*********************************
* helpers and fwd declarations
**********************************/
@@ -424,7 +429,8 @@ static struct zswap_pool *__zswap_pool_current(void)
struct zswap_pool *pool;
pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
- WARN_ON(!pool);
+ WARN_ONCE(!pool && zswap_has_pool,
+ "%s: no page storage pool!\n", __func__);
return pool;
}
@@ -443,7 +449,7 @@ static struct zswap_pool *zswap_pool_current_get(void)
rcu_read_lock();
pool = __zswap_pool_current();
- if (!pool || !zswap_pool_get(pool))
+ if (!zswap_pool_get(pool))
pool = NULL;
rcu_read_unlock();
@@ -459,7 +465,9 @@ static struct zswap_pool *zswap_pool_last_get(void)
list_for_each_entry_rcu(pool, &zswap_pools, list)
last = pool;
- if (!WARN_ON(!last) && !zswap_pool_get(last))
+ WARN_ONCE(!last && zswap_has_pool,
+ "%s: no page storage pool!\n", __func__);
+ if (!zswap_pool_get(last))
last = NULL;
rcu_read_unlock();
@@ -495,6 +503,17 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
int ret;
+ if (!zswap_has_pool) {
+ /* if either are unset, pool initialization failed, and we
+ * need both params to be set correctly before trying to
+ * create a pool.
+ */
+ if (!strcmp(type, ZSWAP_PARAM_UNSET))
+ return NULL;
+ if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
+ return NULL;
+ }
+
pool = kzalloc(sizeof(*pool), GFP_KERNEL);
if (!pool) {
pr_err("pool alloc failed\n");
@@ -544,29 +563,41 @@ error:
static __init struct zswap_pool *__zswap_pool_create_fallback(void)
{
- if (!crypto_has_comp(zswap_compressor, 0, 0)) {
- if (!strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
- pr_err("default compressor %s not available\n",
- zswap_compressor);
- return NULL;
- }
+ bool has_comp, has_zpool;
+
+ has_comp = crypto_has_comp(zswap_compressor, 0, 0);
+ if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
pr_err("compressor %s not available, using default %s\n",
zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
param_free_charp(&zswap_compressor);
zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
+ has_comp = crypto_has_comp(zswap_compressor, 0, 0);
}
- if (!zpool_has_pool(zswap_zpool_type)) {
- if (!strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
- pr_err("default zpool %s not available\n",
- zswap_zpool_type);
- return NULL;
- }
+ if (!has_comp) {
+ pr_err("default compressor %s not available\n",
+ zswap_compressor);
+ param_free_charp(&zswap_compressor);
+ zswap_compressor = ZSWAP_PARAM_UNSET;
+ }
+
+ has_zpool = zpool_has_pool(zswap_zpool_type);
+ if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
pr_err("zpool %s not available, using default %s\n",
zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
param_free_charp(&zswap_zpool_type);
zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+ has_zpool = zpool_has_pool(zswap_zpool_type);
+ }
+ if (!has_zpool) {
+ pr_err("default zpool %s not available\n",
+ zswap_zpool_type);
+ param_free_charp(&zswap_zpool_type);
+ zswap_zpool_type = ZSWAP_PARAM_UNSET;
}
+ if (!has_comp || !has_zpool)
+ return NULL;
+
return zswap_pool_create(zswap_zpool_type, zswap_compressor);
}
@@ -582,6 +613,9 @@ static void zswap_pool_destroy(struct zswap_pool *pool)
static int __must_check zswap_pool_get(struct zswap_pool *pool)
{
+ if (!pool)
+ return 0;
+
return kref_get_unless_zero(&pool->kref);
}
@@ -639,7 +673,7 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
}
/* no change required */
- if (!strcmp(s, *(char **)kp->arg))
+ if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
return 0;
/* if this is load-time (pre-init) param setting,
@@ -670,21 +704,26 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
pool = zswap_pool_find_get(type, compressor);
if (pool) {
zswap_pool_debug("using existing", pool);
+ WARN_ON(pool == zswap_pool_current());
list_del_rcu(&pool->list);
- } else {
- spin_unlock(&zswap_pools_lock);
- pool = zswap_pool_create(type, compressor);
- spin_lock(&zswap_pools_lock);
}
+ spin_unlock(&zswap_pools_lock);
+
+ if (!pool)
+ pool = zswap_pool_create(type, compressor);
+
if (pool)
ret = param_set_charp(s, kp);
else
ret = -EINVAL;
+ spin_lock(&zswap_pools_lock);
+
if (!ret) {
put_pool = zswap_pool_current();
list_add_rcu(&pool->list, &zswap_pools);
+ zswap_has_pool = true;
} else if (pool) {
/* add the possibly pre-existing pool to the end of the pools
* list; if it's new (and empty) then it'll be removed and
@@ -696,6 +735,17 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
spin_unlock(&zswap_pools_lock);
+ if (!zswap_has_pool && !pool) {
+ /* if initial pool creation failed, and this pool creation also
+ * failed, maybe both compressor and zpool params were bad.
+ * Allow changing this param, so pool creation will succeed
+ * when the other param is changed. We already verified this
+ * param is ok in the zpool_has_pool() or crypto_has_comp()
+ * checks above.
+ */
+ ret = param_set_charp(s, kp);
+ }
+
/* drop the ref from either the old current pool,
* or the new pool we failed to add
*/
@@ -724,6 +774,10 @@ static int zswap_enabled_param_set(const char *val,
pr_err("can't enable, initialization failed\n");
return -ENODEV;
}
+ if (!zswap_has_pool && zswap_init_started) {
+ pr_err("can't enable, no pool configured\n");
+ return -ENODEV;
+ }
return param_set_bool(val, kp);
}
@@ -1205,22 +1259,21 @@ static int __init init_zswap(void)
goto hp_fail;
pool = __zswap_pool_create_fallback();
- if (!pool) {
+ if (pool) {
+ pr_info("loaded using pool %s/%s\n", pool->tfm_name,
+ zpool_get_type(pool->zpool));
+ list_add(&pool->list, &zswap_pools);
+ zswap_has_pool = true;
+ } else {
pr_err("pool creation failed\n");
- goto pool_fail;
+ zswap_enabled = false;
}
- pr_info("loaded using pool %s/%s\n", pool->tfm_name,
- zpool_get_type(pool->zpool));
-
- list_add(&pool->list, &zswap_pools);
frontswap_register_ops(&zswap_frontswap_ops);
if (zswap_debugfs_init())
pr_warn("debugfs initialization failed\n");
return 0;
-pool_fail:
- cpuhp_remove_state_nocalls(CPUHP_MM_ZSWP_POOL_PREPARE);
hp_fail:
cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
dstmem_fail:
diff --git a/scripts/Lindent b/scripts/Lindent
index 6d889de4e70b..57b564c24d61 100755
--- a/scripts/Lindent
+++ b/scripts/Lindent
@@ -1,21 +1,25 @@
#!/bin/sh
+
PARAM="-npro -kr -i8 -ts8 -sob -l80 -ss -ncs -cp1"
-RES=`indent --version`
+
+RES=`indent --version | cut -d' ' -f3`
if [ "$RES" = "" ]; then
exit 1
fi
-V1=`echo $RES | cut -d' ' -f3 | cut -d'.' -f1`
-V2=`echo $RES | cut -d' ' -f3 | cut -d'.' -f2`
-V3=`echo $RES | cut -d' ' -f3 | cut -d'.' -f3`
+V1=`echo $RES | cut -d'.' -f1`
+V2=`echo $RES | cut -d'.' -f2`
+V3=`echo $RES | cut -d'.' -f3`
+
if [ $V1 -gt 2 ]; then
PARAM="$PARAM -il0"
elif [ $V1 -eq 2 ]; then
if [ $V2 -gt 2 ]; then
- PARAM="$PARAM -il0";
+ PARAM="$PARAM -il0"
elif [ $V2 -eq 2 ]; then
if [ $V3 -ge 10 ]; then
PARAM="$PARAM -il0"
fi
fi
fi
+
indent $PARAM "$@"
diff --git a/scripts/checkincludes.pl b/scripts/checkincludes.pl
index 97b2c6143fe4..381c018a4612 100755
--- a/scripts/checkincludes.pl
+++ b/scripts/checkincludes.pl
@@ -37,6 +37,8 @@ if ($#ARGV >= 1) {
}
}
+my $dup_counter = 0;
+
foreach my $file (@ARGV) {
open(my $f, '<', $file)
or die "Cannot open $file: $!.\n";
@@ -57,6 +59,7 @@ foreach my $file (@ARGV) {
foreach my $filename (keys %includedfiles) {
if ($includedfiles{$filename} > 1) {
print "$file: $filename is included more than once.\n";
+ ++$dup_counter;
}
}
next;
@@ -73,6 +76,7 @@ foreach my $file (@ARGV) {
if ($includedfiles{$filename} > 1) {
$includedfiles{$filename}--;
$dups++;
+ ++$dup_counter;
} else {
print {$f} $_;
}
@@ -87,3 +91,7 @@ foreach my $file (@ARGV) {
}
close($f);
}
+
+if ($dup_counter == 0) {
+ print "No duplicate includes found.\n";
+}
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 982c52ca6473..12db1483e6c2 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -424,7 +424,7 @@ our $typeTypedefs = qr{(?x:
our $zero_initializer = qr{(?:(?:0[xX])?0+$Int_type?|NULL|false)\b};
our $logFunctions = qr{(?x:
- printk(?:_ratelimited|_once|)|
+ printk(?:_ratelimited|_once|_deferred_once|_deferred|)|
(?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)|
WARN(?:_RATELIMIT|_ONCE|)|
panic|
@@ -2134,7 +2134,7 @@ sub process {
my $in_header_lines = $file ? 0 : 1;
my $in_commit_log = 0; #Scanning lines before patch
my $has_commit_log = 0; #Encountered lines before patch
- my $commit_log_possible_stack_dump = 0;
+ my $commit_log_possible_stack_dump = 0;
my $commit_log_long_line = 0;
my $commit_log_has_diff = 0;
my $reported_maintainer_file = 0;
@@ -2154,6 +2154,7 @@ sub process {
my $realline = 0;
my $realcnt = 0;
my $here = '';
+ my $context_function; #undef'd unless there's a known function
my $in_comment = 0;
my $comment_edge = 0;
my $first_line = 0;
@@ -2192,7 +2193,8 @@ sub process {
}
#next;
}
- if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
+ if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) {
+ my $context = $4;
$realline=$1-1;
if (defined $2) {
$realcnt=$3+1;
@@ -2201,6 +2203,12 @@ sub process {
}
$in_comment = 0;
+ if ($context =~ /\b(\w+)\s*\(/) {
+ $context_function = $1;
+ } else {
+ undef $context_function;
+ }
+
# Guestimate if this is a continuing comment. Run
# the context looking for a comment "edge". If this
# edge is a close comment then we must be in a comment
@@ -2695,6 +2703,7 @@ sub process {
# Check for FSF mailing addresses.
if ($rawline =~ /\bwrite to the Free/i ||
+ $rawline =~ /\b675\s+Mass\s+Ave/i ||
$rawline =~ /\b59\s+Temple\s+Pl/i ||
$rawline =~ /\b51\s+Franklin\s+St/i) {
my $herevet = "$here\n" . cat_vet($rawline) . "\n";
@@ -5157,6 +5166,16 @@ sub process {
"break quoted strings at a space character\n" . $hereprev);
}
+#check for an embedded function name in a string when the function is known
+# as part of a diff. This does not work for -f --file checking as it
+#depends on patch context providing the function name
+ if ($line =~ /^\+.*$String/ &&
+ defined($context_function) &&
+ get_quoted_string($line, $rawline) =~ /\b$context_function\b/) {
+ WARN("EMBEDDED_FUNCTION_NAME",
+ "Prefer using \"%s\", __func__ to embedded function names\n" . $herecurr);
+ }
+
# check for spaces before a quoted newline
if ($rawline =~ /^.*\".*\s\\n/) {
if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
@@ -5269,6 +5288,12 @@ sub process {
}
}
+# check for logging continuations
+ if ($line =~ /\bprintk\s*\(\s*KERN_CONT\b|\bpr_cont\s*\(/) {
+ WARN("LOGGING_CONTINUATION",
+ "Avoid logging continuation uses where feasible\n" . $herecurr);
+ }
+
# check for mask then right shift without a parentheses
if ($^V && $^V ge 5.10.0 &&
$line =~ /$LvalOrFunc\s*\&\s*($LvalOrFunc)\s*>>/ &&
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index eea5b782a3b7..36a8a788f5c5 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -78,6 +78,9 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
} elsif ($arch eq 'mips') {
#88003254: 27bdffe0 addiu sp,sp,-32
$re = qr/.*addiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
+ } elsif ($arch eq 'nios2') {
+ #25a8: defffb04 addi sp,sp,-20
+ $re = qr/.*addi.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
} elsif ($arch eq 'parisc' || $arch eq 'parisc64') {
$re = qr/.*ldo ($x{1,8})\(sp\),sp/o;
} elsif ($arch eq 'ppc') {
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index 7986f4e0da12..7aad82406422 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
+#include <linux/of_fdt.h>
/* We need to stringify expanded macros so that they can be parsed */
@@ -50,3 +51,9 @@ LX_VALUE(MNT_NOEXEC)
LX_VALUE(MNT_NOATIME)
LX_VALUE(MNT_NODIRATIME)
LX_VALUE(MNT_RELATIME)
+
+/* linux/of_fdt.h> */
+LX_VALUE(OF_DT_HEADER)
+
+/* Kernel Configs */
+LX_CONFIG(CONFIG_OF)
diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py
index 38b1f09d1cd9..086d27223c0c 100644
--- a/scripts/gdb/linux/proc.py
+++ b/scripts/gdb/linux/proc.py
@@ -16,6 +16,7 @@ from linux import constants
from linux import utils
from linux import tasks
from linux import lists
+from struct import *
class LxCmdLine(gdb.Command):
@@ -195,3 +196,75 @@ values of that process namespace"""
info_opts(MNT_INFO, m_flags)))
LxMounts()
+
+
+class LxFdtDump(gdb.Command):
+ """Output Flattened Device Tree header and dump FDT blob to the filename
+ specified as the command argument. Equivalent to
+ 'cat /proc/fdt > fdtdump.dtb' on a running target"""
+
+ def __init__(self):
+ super(LxFdtDump, self).__init__("lx-fdtdump", gdb.COMMAND_DATA,
+ gdb.COMPLETE_FILENAME)
+
+ def fdthdr_to_cpu(self, fdt_header):
+
+ fdt_header_be = ">IIIIIII"
+ fdt_header_le = "<IIIIIII"
+
+ if utils.get_target_endianness() == 1:
+ output_fmt = fdt_header_le
+ else:
+ output_fmt = fdt_header_be
+
+ return unpack(output_fmt, pack(fdt_header_be,
+ fdt_header['magic'],
+ fdt_header['totalsize'],
+ fdt_header['off_dt_struct'],
+ fdt_header['off_dt_strings'],
+ fdt_header['off_mem_rsvmap'],
+ fdt_header['version'],
+ fdt_header['last_comp_version']))
+
+ def invoke(self, arg, from_tty):
+
+ if not constants.LX_CONFIG_OF:
+ raise gdb.GdbError("Kernel not compiled with CONFIG_OF\n")
+
+ if len(arg) == 0:
+ filename = "fdtdump.dtb"
+ else:
+ filename = arg
+
+ py_fdt_header_ptr = gdb.parse_and_eval(
+ "(const struct fdt_header *) initial_boot_params")
+ py_fdt_header = py_fdt_header_ptr.dereference()
+
+ fdt_header = self.fdthdr_to_cpu(py_fdt_header)
+
+ if fdt_header[0] != constants.LX_OF_DT_HEADER:
+ raise gdb.GdbError("No flattened device tree magic found\n")
+
+ gdb.write("fdt_magic: 0x{:02X}\n".format(fdt_header[0]))
+ gdb.write("fdt_totalsize: 0x{:02X}\n".format(fdt_header[1]))
+ gdb.write("off_dt_struct: 0x{:02X}\n".format(fdt_header[2]))
+ gdb.write("off_dt_strings: 0x{:02X}\n".format(fdt_header[3]))
+ gdb.write("off_mem_rsvmap: 0x{:02X}\n".format(fdt_header[4]))
+ gdb.write("version: {}\n".format(fdt_header[5]))
+ gdb.write("last_comp_version: {}\n".format(fdt_header[6]))
+
+ inf = gdb.inferiors()[0]
+ fdt_buf = utils.read_memoryview(inf, py_fdt_header_ptr,
+ fdt_header[1]).tobytes()
+
+ try:
+ f = open(filename, 'wb')
+ except:
+ raise gdb.GdbError("Could not open file to dump fdt")
+
+ f.write(fdt_buf)
+ f.close()
+
+ gdb.write("Dumped fdt blob to " + filename + "\n")
+
+LxFdtDump()
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 163c720d3f2b..b3a1994b5df7 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -16,6 +16,7 @@ absense||absence
absolut||absolute
absoulte||absolute
acccess||access
+acceess||access
acceleratoin||acceleration
accelleration||acceleration
accesing||accessing
@@ -39,13 +40,14 @@ achitecture||architecture
acient||ancient
acitions||actions
acitve||active
-acknowldegement||acknowldegement
+acknowldegement||acknowledgment
acknowledgement||acknowledgment
ackowledge||acknowledge
ackowledged||acknowledged
acording||according
activete||activate
acumulating||accumulating
+acumulator||accumulator
adapater||adapter
addional||additional
additionaly||additionally
@@ -184,6 +186,7 @@ cacluated||calculated
caculation||calculation
calender||calendar
calle||called
+callibration||calibration
calucate||calculate
calulate||calculate
cancelation||cancellation
@@ -244,6 +247,7 @@ compatiblity||compatibility
competion||completion
compilant||compliant
compleatly||completely
+completition||completion
completly||completely
complient||compliant
componnents||components
@@ -257,6 +261,7 @@ conected||connected
configuratoin||configuration
configuraton||configuration
configuretion||configuration
+configutation||configuration
conider||consider
conjuction||conjunction
connectinos||connections
@@ -317,6 +322,7 @@ dependant||dependent
depreacted||deprecated
depreacte||deprecate
desactivate||deactivate
+desciptor||descriptor
desciptors||descriptors
descripton||description
descrition||description
@@ -417,9 +423,12 @@ extention||extension
extracter||extractor
faild||failed
faill||fail
+failied||failed
+faillure||failure
failue||failure
failuer||failure
faireness||fairness
+falied||failed
faliure||failure
familar||familiar
fatser||faster
@@ -441,6 +450,7 @@ forseeable||foreseeable
forse||force
fortan||fortran
forwardig||forwarding
+framming||framing
framwork||framework
frequncy||frequency
frome||from
@@ -482,6 +492,7 @@ howver||however
hsould||should
hypter||hyper
identidier||identifier
+illigal||illegal
imblance||imbalance
immeadiately||immediately
immedaite||immediate
@@ -520,6 +531,7 @@ informtion||information
infromation||information
ingore||ignore
inital||initial
+initalized||initialized
initalised||initialized
initalise||initialize
initalize||initialize
@@ -532,6 +544,7 @@ initilize||initialize
inofficial||unofficial
insititute||institute
instal||install
+instanciated||instantiated
inteface||interface
integreated||integrated
integrety||integrity
@@ -557,9 +570,10 @@ intialized||initialized
intialize||initialize
intregral||integral
intrrupt||interrupt
+intterrupt||interrupt
intuative||intuitive
invaid||invalid
-invalde||invald
+invalde||invalid
invalide||invalid
invididual||individual
invokation||invocation
@@ -567,6 +581,8 @@ invokations||invocations
irrelevent||irrelevant
isnt||isn't
isssue||issue
+iternations||iterations
+itertation||iteration
itslef||itself
jave||java
jeffies||jiffies
@@ -621,6 +637,7 @@ messsage||message
messsages||messages
microprocesspr||microprocessor
milliseonds||milliseconds
+minium||minimum
minumum||minimum
miscelleneous||miscellaneous
misformed||malformed
@@ -668,6 +685,7 @@ occurances||occurrences
occured||occurred
occurence||occurrence
occure||occurred
+occured||occurred
occuring||occurring
offet||offset
omitt||omit
@@ -681,8 +699,10 @@ optionnal||optional
optmizations||optimizations
orientatied||orientated
orientied||oriented
+orignal||original
otherise||otherwise
ouput||output
+oustanding||outstanding
overaall||overall
overhread||overhead
overlaping||overlapping
@@ -705,6 +725,7 @@ paramter||parameter
paramters||parameters
particuarly||particularly
particularily||particularly
+partiton||partition
pased||passed
passin||passing
pathes||paths
@@ -724,6 +745,7 @@ pleaes||please
ploting||plotting
plugable||pluggable
poinnter||pointer
+pointeur||pointer
poiter||pointer
posible||possible
positon||position
@@ -752,6 +774,7 @@ procceed||proceed
proccesors||processors
procesed||processed
proces||process
+procesing||processing
processessing||processing
processess||processes
processpr||processor
@@ -780,6 +803,7 @@ protable||portable
protcol||protocol
protecion||protection
protocoll||protocol
+promixity||proximity
psudo||pseudo
psuedo||pseudo
psychadelic||psychedelic
@@ -801,6 +825,7 @@ recommanded||recommended
recyle||recycle
redircet||redirect
redirectrion||redirection
+reename||rename
refcounf||refcount
refence||reference
refered||referred
@@ -944,7 +969,9 @@ suble||subtle
substract||subtract
succesfully||successfully
succesful||successful
+successed||succeeded
successfull||successful
+successfuly||successfully
sucessfully||successfully
sucess||success
superflous||superfluous
@@ -960,6 +987,7 @@ suppport||support
supress||suppress
surpresses||suppresses
susbsystem||subsystem
+suspeneded||suspended
suspicously||suspiciously
swaping||swapping
switchs||switches
@@ -967,6 +995,7 @@ symetric||symmetric
synax||syntax
synchonized||synchronized
syncronize||synchronize
+syncronized||synchronized
syncronizing||synchronizing
syncronus||synchronous
syste||system
@@ -991,7 +1020,7 @@ tramsmitted||transmitted
tramsmit||transmit
tranfer||transfer
transciever||transceiver
-transferd||transferrd
+transferd||transferred
transfered||transferred
transfering||transferring
transision||transition
@@ -1005,22 +1034,30 @@ ture||true
tyep||type
udpate||update
uesd||used
+uncommited||uncommitted
unconditionaly||unconditionally
underun||underrun
unecessary||unnecessary
unexecpted||unexpected
+unexpcted||unexpected
unexpectd||unexpected
unexpeted||unexpected
+unexpexted||unexpected
unfortunatelly||unfortunately
unifiy||unify
unintialized||uninitialized
+unkmown||unknown
unknonw||unknown
unknow||unknown
unkown||unknown
unneedingly||unnecessarily
+unnsupported||unsupported
+unmached||unmatched
unresgister||unregister
+unrgesiter||unregister
unsinged||unsigned
unstabel||unstable
+unsolicitied||unsolicited
unsuccessfull||unsuccessful
unsuported||unsupported
untill||until
diff --git a/scripts/tags.sh b/scripts/tags.sh
index df5fa777d300..d661f2f3ef61 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -128,6 +128,8 @@ all_target_sources()
all_kconfigs()
{
+ find ${tree}arch/ -maxdepth 1 $ignore \
+ -name "Kconfig*" -not -type l -print;
for arch in $ALLSOURCE_ARCHS; do
find_sources $arch 'Kconfig*'
done
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index c354807381c1..c9e8a9898ce4 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -424,10 +424,9 @@ out:
return ret;
}
-static int sel_mmap_policy_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int sel_mmap_policy_fault(struct vm_fault *vmf)
{
- struct policy_load_memory *plm = vma->vm_file->private_data;
+ struct policy_load_memory *plm = vmf->vma->vm_file->private_data;
unsigned long offset;
struct page *page;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 9d33c1e85c79..aec9c92250fd 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3245,10 +3245,9 @@ static unsigned int snd_pcm_capture_poll(struct file *file, poll_table * wait)
/*
* mmap status record
*/
-static int snd_pcm_mmap_status_fault(struct vm_area_struct *area,
- struct vm_fault *vmf)
+static int snd_pcm_mmap_status_fault(struct vm_fault *vmf)
{
- struct snd_pcm_substream *substream = area->vm_private_data;
+ struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
struct snd_pcm_runtime *runtime;
if (substream == NULL)
@@ -3282,10 +3281,9 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file
/*
* mmap control record
*/
-static int snd_pcm_mmap_control_fault(struct vm_area_struct *area,
- struct vm_fault *vmf)
+static int snd_pcm_mmap_control_fault(struct vm_fault *vmf)
{
- struct snd_pcm_substream *substream = area->vm_private_data;
+ struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
struct snd_pcm_runtime *runtime;
if (substream == NULL)
@@ -3341,10 +3339,9 @@ snd_pcm_default_page_ops(struct snd_pcm_substream *substream, unsigned long ofs)
/*
* fault callback for mmapping a RAM page
*/
-static int snd_pcm_mmap_data_fault(struct vm_area_struct *area,
- struct vm_fault *vmf)
+static int snd_pcm_mmap_data_fault(struct vm_fault *vmf)
{
- struct snd_pcm_substream *substream = area->vm_private_data;
+ struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
struct snd_pcm_runtime *runtime;
unsigned long offset;
struct page * page;
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index cf5dc33f4a6d..cf45bf1f7ee0 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -137,13 +137,12 @@ static void usb_stream_hwdep_vm_open(struct vm_area_struct *area)
snd_printdd(KERN_DEBUG "%i\n", atomic_read(&us122l->mmap_count));
}
-static int usb_stream_hwdep_vm_fault(struct vm_area_struct *area,
- struct vm_fault *vmf)
+static int usb_stream_hwdep_vm_fault(struct vm_fault *vmf)
{
unsigned long offset;
struct page *page;
void *vaddr;
- struct us122l *us122l = area->vm_private_data;
+ struct us122l *us122l = vmf->vma->vm_private_data;
struct usb_stream *s;
mutex_lock(&us122l->mutex);
diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
index 0b34dbc8f302..605e1047c01d 100644
--- a/sound/usb/usx2y/usX2Yhwdep.c
+++ b/sound/usb/usx2y/usX2Yhwdep.c
@@ -31,19 +31,18 @@
#include "usbusx2y.h"
#include "usX2Yhwdep.h"
-static int snd_us428ctls_vm_fault(struct vm_area_struct *area,
- struct vm_fault *vmf)
+static int snd_us428ctls_vm_fault(struct vm_fault *vmf)
{
unsigned long offset;
struct page * page;
void *vaddr;
snd_printdd("ENTER, start %lXh, pgoff %ld\n",
- area->vm_start,
+ vmf->vma->vm_start,
vmf->pgoff);
offset = vmf->pgoff << PAGE_SHIFT;
- vaddr = (char*)((struct usX2Ydev *)area->vm_private_data)->us428ctls_sharedmem + offset;
+ vaddr = (char *)((struct usX2Ydev *)vmf->vma->vm_private_data)->us428ctls_sharedmem + offset;
page = virt_to_page(vaddr);
get_page(page);
vmf->page = page;
diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c
index 90766a92e7fd..f95164b91152 100644
--- a/sound/usb/usx2y/usx2yhwdeppcm.c
+++ b/sound/usb/usx2y/usx2yhwdeppcm.c
@@ -652,14 +652,13 @@ static void snd_usX2Y_hwdep_pcm_vm_close(struct vm_area_struct *area)
}
-static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_area_struct *area,
- struct vm_fault *vmf)
+static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_fault *vmf)
{
unsigned long offset;
void *vaddr;
offset = vmf->pgoff << PAGE_SHIFT;
- vaddr = (char*)((struct usX2Ydev *)area->vm_private_data)->hwdep_pcm_shm + offset;
+ vaddr = (char *)((struct usX2Ydev *)vmf->vma->vm_private_data)->hwdep_pcm_shm + offset;
vmf->page = virt_to_page(vaddr);
get_page(vmf->page);
return 0;
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index 6d8b8f22cf55..42c15f906aac 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -34,7 +34,7 @@ static unsigned long _find_next_bit(const unsigned long *addr,
{
unsigned long tmp;
- if (!nbits || start >= nbits)
+ if (unlikely(start >= nbits))
return nbits;
tmp = addr[start / BITS_PER_LONG] ^ invert;
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index 1bb01258e559..ccd07343d418 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -57,7 +57,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
exit(EXIT_FAILURE);
}
if (stk.ss_flags != SS_DISABLE)
- printf("[FAIL]\tss_flags=%i, should be SS_DISABLE\n",
+ printf("[FAIL]\tss_flags=%x, should be SS_DISABLE\n",
stk.ss_flags);
else
printf("[OK]\tsigaltstack is disabled in sighandler\n");
@@ -122,7 +122,8 @@ int main(void)
if (stk.ss_flags == SS_DISABLE) {
printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n");
} else {
- printf("[FAIL]\tInitial sigaltstack state was %i; should have been SS_DISABLE\n", stk.ss_flags);
+ printf("[FAIL]\tInitial sigaltstack state was %x; "
+ "should have been SS_DISABLE\n", stk.ss_flags);
return EXIT_FAILURE;
}
@@ -165,7 +166,7 @@ int main(void)
exit(EXIT_FAILURE);
}
if (stk.ss_flags != SS_AUTODISARM) {
- printf("[FAIL]\tss_flags=%i, should be SS_AUTODISARM\n",
+ printf("[FAIL]\tss_flags=%x, should be SS_AUTODISARM\n",
stk.ss_flags);
exit(EXIT_FAILURE);
}
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 983140e68661..222ee457febf 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -11,6 +11,8 @@ TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += userfaultfd_hugetlb
+TEST_GEN_FILES += userfaultfd_shmem
TEST_GEN_FILES += mlock-random-test
TEST_PROGS := run_vmtests
@@ -18,6 +20,8 @@ TEST_PROGS := run_vmtests
include ../lib.mk
$(OUTPUT)/userfaultfd: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h
+$(OUTPUT)/userfaultfd_hugetlb: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h
+$(OUTPUT)/userfaultfd_shmem: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h
$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
../../../../usr/include/linux/kernel.h:
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index e11968b3677e..c92f6cf31d0a 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -103,6 +103,30 @@ else
echo "[PASS]"
fi
+echo "----------------------------"
+echo "running userfaultfd_hugetlb"
+echo "----------------------------"
+# 258MB total huge pages == 128MB src and 128MB dst
+./userfaultfd_hugetlb 128 32 $mnt/ufd_test_file
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+rm -f $mnt/ufd_test_file
+
+echo "----------------------------"
+echo "running userfaultfd_shmem"
+echo "----------------------------"
+./userfaultfd_shmem 128 32
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
#cleanup
umount $mnt
rm -rf $mnt
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index d77ed41b2094..e9449c801888 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -63,6 +63,7 @@
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
+#include <sys/wait.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
@@ -76,8 +77,12 @@ static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
#define BOUNCE_POLL (1<<3)
static int bounces;
+#ifdef HUGETLB_TEST
+static int huge_fd;
+static char *huge_fd_off0;
+#endif
static unsigned long long *count_verify;
-static int uffd, finished, *pipefd;
+static int uffd, uffd_flags, finished, *pipefd;
static char *area_src, *area_dst;
static char *zeropage;
pthread_attr_t attr;
@@ -97,6 +102,102 @@ pthread_attr_t attr;
~(unsigned long)(sizeof(unsigned long long) \
- 1)))
+#if !defined(HUGETLB_TEST) && !defined(SHMEM_TEST)
+
+/* Anonymous memory */
+#define EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
+ (1 << _UFFDIO_COPY) | \
+ (1 << _UFFDIO_ZEROPAGE))
+
+static int release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static void allocate_area(void **alloc_area)
+{
+ if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) {
+ fprintf(stderr, "out of memory\n");
+ *alloc_area = NULL;
+ }
+}
+
+#else /* HUGETLB_TEST or SHMEM_TEST */
+
+#define EXPECTED_IOCTLS UFFD_API_RANGE_IOCTLS_BASIC
+
+#ifdef HUGETLB_TEST
+
+/* HugeTLB memory */
+static int release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ rel_area == huge_fd_off0 ? 0 :
+ nr_pages * page_size,
+ nr_pages * page_size)) {
+ perror("fallocate");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+
+static void allocate_area(void **alloc_area)
+{
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_HUGETLB, huge_fd,
+ *alloc_area == area_src ? 0 :
+ nr_pages * page_size);
+ if (*alloc_area == MAP_FAILED) {
+ fprintf(stderr, "mmap of hugetlbfs file failed\n");
+ *alloc_area = NULL;
+ }
+
+ if (*alloc_area == area_src)
+ huge_fd_off0 = *alloc_area;
+}
+
+#elif defined(SHMEM_TEST)
+
+/* Shared memory */
+static int release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
+ perror("madvise");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static void allocate_area(void **alloc_area)
+{
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ if (*alloc_area == MAP_FAILED) {
+ fprintf(stderr, "shared memory mmap failed\n");
+ *alloc_area = NULL;
+ }
+}
+
+#else /* SHMEM_TEST */
+#error "Undefined test type"
+#endif /* HUGETLB_TEST */
+
+#endif /* !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) */
+
static int my_bcmp(char *str1, char *str2, size_t n)
{
unsigned long i;
@@ -217,7 +318,7 @@ static void *locking_thread(void *arg)
return NULL;
}
-static int copy_page(unsigned long offset)
+static int copy_page(int ufd, unsigned long offset)
{
struct uffdio_copy uffdio_copy;
@@ -229,7 +330,7 @@ static int copy_page(unsigned long offset)
uffdio_copy.len = page_size;
uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
- if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) {
+ if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
/* real retval in ufdio_copy.copy */
if (uffdio_copy.copy != -EEXIST)
fprintf(stderr, "UFFDIO_COPY error %Ld\n",
@@ -247,6 +348,7 @@ static void *uffd_poll_thread(void *arg)
unsigned long cpu = (unsigned long) arg;
struct pollfd pollfd[2];
struct uffd_msg msg;
+ struct uffdio_register uffd_reg;
int ret;
unsigned long offset;
char tmp_chr;
@@ -278,16 +380,35 @@ static void *uffd_poll_thread(void *arg)
continue;
perror("nonblocking read error"), exit(1);
}
- if (msg.event != UFFD_EVENT_PAGEFAULT)
+ switch (msg.event) {
+ default:
fprintf(stderr, "unexpected msg event %u\n",
msg.event), exit(1);
- if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
- fprintf(stderr, "unexpected write fault\n"), exit(1);
- offset = (char *)(unsigned long)msg.arg.pagefault.address -
- area_dst;
- offset &= ~(page_size-1);
- if (copy_page(offset))
- userfaults++;
+ break;
+ case UFFD_EVENT_PAGEFAULT:
+ if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)(unsigned long)msg.arg.pagefault.address -
+ area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(uffd, offset))
+ userfaults++;
+ break;
+ case UFFD_EVENT_FORK:
+ uffd = msg.arg.fork.ufd;
+ pollfd[0].fd = uffd;
+ break;
+ case UFFD_EVENT_REMOVE:
+ uffd_reg.range.start = msg.arg.remove.start;
+ uffd_reg.range.len = msg.arg.remove.end -
+ msg.arg.remove.start;
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+ fprintf(stderr, "remove failure\n"), exit(1);
+ break;
+ case UFFD_EVENT_REMAP:
+ area_dst = (char *)(unsigned long)msg.arg.remap.to;
+ break;
+ }
}
return (void *)userfaults;
}
@@ -324,7 +445,7 @@ static void *uffd_read_thread(void *arg)
offset = (char *)(unsigned long)msg.arg.pagefault.address -
area_dst;
offset &= ~(page_size-1);
- if (copy_page(offset))
+ if (copy_page(uffd, offset))
(*this_cpu_userfaults)++;
}
return (void *)NULL;
@@ -338,7 +459,7 @@ static void *background_thread(void *arg)
for (page_nr = cpu * nr_pages_per_cpu;
page_nr < (cpu+1) * nr_pages_per_cpu;
page_nr++)
- copy_page(page_nr * page_size);
+ copy_page(uffd, page_nr * page_size);
return NULL;
}
@@ -384,10 +505,8 @@ static int stress(unsigned long *userfaults)
* UFFDIO_COPY without writing zero pages into area_dst
* because the background threads already completed).
*/
- if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) {
- perror("madvise");
+ if (release_pages(area_src))
return 1;
- }
for (cpu = 0; cpu < nr_cpus; cpu++) {
char c;
@@ -414,27 +533,9 @@ static int stress(unsigned long *userfaults)
return 0;
}
-static int userfaultfd_stress(void)
+static int userfaultfd_open(int features)
{
- void *area;
- char *tmp_area;
- unsigned long nr;
- struct uffdio_register uffdio_register;
struct uffdio_api uffdio_api;
- unsigned long cpu;
- int uffd_flags, err;
- unsigned long userfaults[nr_cpus];
-
- if (posix_memalign(&area, page_size, nr_pages * page_size)) {
- fprintf(stderr, "out of memory\n");
- return 1;
- }
- area_src = area;
- if (posix_memalign(&area, page_size, nr_pages * page_size)) {
- fprintf(stderr, "out of memory\n");
- return 1;
- }
- area_dst = area;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd < 0) {
@@ -445,7 +546,7 @@ static int userfaultfd_stress(void)
uffd_flags = fcntl(uffd, F_GETFD, NULL);
uffdio_api.api = UFFD_API;
- uffdio_api.features = 0;
+ uffdio_api.features = features;
if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
fprintf(stderr, "UFFDIO_API\n");
return 1;
@@ -455,6 +556,233 @@ static int userfaultfd_stress(void)
return 1;
}
+ return 0;
+}
+
+/*
+ * For non-cooperative userfaultfd test we fork() a process that will
+ * generate pagefaults, will mremap the area monitored by the
+ * userfaultfd and at last this process will release the monitored
+ * area.
+ * For the anonymous and shared memory the area is divided into two
+ * parts, the first part is accessed before mremap, and the second
+ * part is accessed after mremap. Since hugetlbfs does not support
+ * mremap, the entire monitored area is accessed in a single pass for
+ * HUGETLB_TEST.
+ * The release of the pages currently generates event for shmem and
+ * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
+ * for hugetlb.
+ */
+static int faulting_process(void)
+{
+ unsigned long nr;
+ unsigned long long count;
+
+#ifndef HUGETLB_TEST
+ unsigned long split_nr_pages = (nr_pages + 1) / 2;
+#else
+ unsigned long split_nr_pages = nr_pages;
+#endif
+
+ for (nr = 0; nr < split_nr_pages; nr++) {
+ count = *area_count(area_dst, nr);
+ if (count != count_verify[nr]) {
+ fprintf(stderr,
+ "nr %lu memory corruption %Lu %Lu\n",
+ nr, count,
+ count_verify[nr]), exit(1);
+ }
+ }
+
+#ifndef HUGETLB_TEST
+ area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
+ if (area_dst == MAP_FAILED)
+ perror("mremap"), exit(1);
+
+ for (; nr < nr_pages; nr++) {
+ count = *area_count(area_dst, nr);
+ if (count != count_verify[nr]) {
+ fprintf(stderr,
+ "nr %lu memory corruption %Lu %Lu\n",
+ nr, count,
+ count_verify[nr]), exit(1);
+ }
+ }
+
+ if (release_pages(area_dst))
+ return 1;
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
+ fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+ }
+
+#endif /* HUGETLB_TEST */
+
+ return 0;
+}
+
+static int uffdio_zeropage(int ufd, unsigned long offset)
+{
+ struct uffdio_zeropage uffdio_zeropage;
+ int ret;
+ unsigned long has_zeropage = EXPECTED_IOCTLS & (1 << _UFFDIO_ZEROPAGE);
+
+ if (offset >= nr_pages * page_size)
+ fprintf(stderr, "unexpected offset %lu\n",
+ offset), exit(1);
+ uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
+ uffdio_zeropage.range.len = page_size;
+ uffdio_zeropage.mode = 0;
+ ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+ if (ret) {
+ /* real retval in ufdio_zeropage.zeropage */
+ if (has_zeropage) {
+ if (uffdio_zeropage.zeropage == -EEXIST)
+ fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
+ exit(1);
+ else
+ fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ } else {
+ if (uffdio_zeropage.zeropage != -EINVAL)
+ fprintf(stderr,
+ "UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ }
+ } else if (has_zeropage) {
+ if (uffdio_zeropage.zeropage != page_size) {
+ fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ } else
+ return 1;
+ } else {
+ fprintf(stderr,
+ "UFFDIO_ZEROPAGE succeeded %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ }
+
+ return 0;
+}
+
+/* exercise UFFDIO_ZEROPAGE */
+static int userfaultfd_zeropage_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+
+ printf("testing UFFDIO_ZEROPAGE: ");
+ fflush(stdout);
+
+ if (release_pages(area_dst))
+ return 1;
+
+ if (userfaultfd_open(0) < 0)
+ return 1;
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ fprintf(stderr, "register failure\n"), exit(1);
+
+ expected_ioctls = EXPECTED_IOCTLS;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls)
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n"),
+ exit(1);
+
+ if (uffdio_zeropage(uffd, 0)) {
+ if (my_bcmp(area_dst, zeropage, page_size))
+ fprintf(stderr, "zeropage is not zero\n"), exit(1);
+ }
+
+ close(uffd);
+ printf("done.\n");
+ return 0;
+}
+
+static int userfaultfd_events_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+ unsigned long userfaults;
+ pthread_t uffd_mon;
+ int err, features;
+ pid_t pid;
+ char c;
+
+ printf("testing events (fork, remap, remove): ");
+ fflush(stdout);
+
+ if (release_pages(area_dst))
+ return 1;
+
+ features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
+ UFFD_FEATURE_EVENT_REMOVE;
+ if (userfaultfd_open(features) < 0)
+ return 1;
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ fprintf(stderr, "register failure\n"), exit(1);
+
+ expected_ioctls = EXPECTED_IOCTLS;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls)
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n"),
+ exit(1);
+
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+ perror("uffd_poll_thread create"), exit(1);
+
+ pid = fork();
+ if (pid < 0)
+ perror("fork"), exit(1);
+
+ if (!pid)
+ return faulting_process();
+
+ waitpid(pid, &err, 0);
+ if (err)
+ fprintf(stderr, "faulting process failed\n"), exit(1);
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ perror("pipe write"), exit(1);
+ if (pthread_join(uffd_mon, (void **)&userfaults))
+ return 1;
+
+ close(uffd);
+ printf("userfaults: %ld\n", userfaults);
+
+ return userfaults != nr_pages;
+}
+
+static int userfaultfd_stress(void)
+{
+ void *area;
+ char *tmp_area;
+ unsigned long nr;
+ struct uffdio_register uffdio_register;
+ unsigned long cpu;
+ int err;
+ unsigned long userfaults[nr_cpus];
+
+ allocate_area((void **)&area_src);
+ if (!area_src)
+ return 1;
+ allocate_area((void **)&area_dst);
+ if (!area_dst)
+ return 1;
+
+ if (userfaultfd_open(0) < 0)
+ return 1;
+
count_verify = malloc(nr_pages * sizeof(unsigned long long));
if (!count_verify) {
perror("count_verify");
@@ -528,9 +856,7 @@ static int userfaultfd_stress(void)
fprintf(stderr, "register failure\n");
return 1;
}
- expected_ioctls = (1 << _UFFDIO_WAKE) |
- (1 << _UFFDIO_COPY) |
- (1 << _UFFDIO_ZEROPAGE);
+ expected_ioctls = EXPECTED_IOCTLS;
if ((uffdio_register.ioctls & expected_ioctls) !=
expected_ioctls) {
fprintf(stderr,
@@ -562,10 +888,8 @@ static int userfaultfd_stress(void)
* MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
* required to MADV_DONTNEED here.
*/
- if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) {
- perror("madvise 2");
+ if (release_pages(area_dst))
return 1;
- }
/* bounce pass */
if (stress(userfaults))
@@ -603,9 +927,15 @@ static int userfaultfd_stress(void)
printf("\n");
}
- return err;
+ if (err)
+ return err;
+
+ close(uffd);
+ return userfaultfd_zeropage_test() || userfaultfd_events_test();
}
+#ifndef HUGETLB_TEST
+
int main(int argc, char **argv)
{
if (argc < 3)
@@ -632,6 +962,74 @@ int main(int argc, char **argv)
return userfaultfd_stress();
}
+#else /* HUGETLB_TEST */
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 4)
+ fprintf(stderr, "Usage: <MiB> <bounces> <hugetlbfs_file>\n"),
+ exit(1);
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ page_size = default_huge_page_size();
+ if (!page_size)
+ fprintf(stderr, "Unable to determine huge page size\n"),
+ exit(2);
+ if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
+ > page_size)
+ fprintf(stderr, "Impossible to run this test\n"), exit(2);
+ nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size /
+ nr_cpus;
+ if (!nr_pages_per_cpu) {
+ fprintf(stderr, "invalid MiB\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+ bounces = atoi(argv[2]);
+ if (bounces <= 0) {
+ fprintf(stderr, "invalid bounces\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+ nr_pages = nr_pages_per_cpu * nr_cpus;
+ huge_fd = open(argv[3], O_CREAT | O_RDWR, 0755);
+ if (huge_fd < 0) {
+ fprintf(stderr, "Open of %s failed", argv[3]);
+ perror("open");
+ exit(1);
+ }
+ if (ftruncate(huge_fd, 0)) {
+ fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
+ perror("ftruncate");
+ exit(1);
+ }
+ printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+ nr_pages, nr_pages_per_cpu);
+ return userfaultfd_stress();
+}
+
+#endif
#else /* __NR_userfaultfd */
#warning "missing __NR_userfaultfd definition"
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
index 93aadaf7ff63..006029456988 100644
--- a/tools/vm/Makefile
+++ b/tools/vm/Makefile
@@ -9,6 +9,8 @@ CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall -Wextra -I../lib/
LDFLAGS = $(LIBS)
+all: $(TARGETS)
+
$(TARGETS): $(LIBS)
$(LIBS):
@@ -20,3 +22,9 @@ $(LIBS):
clean:
$(RM) page-types slabinfo page_owner_sort
make -C $(LIB_DIR) clean
+
+sbindir ?= /usr/sbin
+
+install: all
+ install -d $(DESTDIR)$(sbindir)
+ install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 482612b4e496..7556e1d56708 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2348,9 +2348,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
-static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kvm_vcpu_fault(struct vm_fault *vmf)
{
- struct kvm_vcpu *vcpu = vma->vm_file->private_data;
+ struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff == 0)