summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-02-17 15:53:02 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2017-02-17 15:53:04 +1100
commit6641ce8e95e9cb5c678cf882f9eb7c7632fa1a2c (patch)
treee36b518353cf1b36c20a8b315de17ea8ee577c39 /include
parent200445685b48a1f4eb8e4abb4ae46f22236b1197 (diff)
parent1224d22e9f1c57618e6db3737c17f8a1e0927d18 (diff)
Merge branch 'akpm-current/current'
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/kprobes.h25
-rw-r--r--include/asm-generic/pgtable.h80
-rw-r--r--include/asm-generic/tlb.h14
-rw-r--r--include/linux/bug.h12
-rw-r--r--include/linux/cma.h3
-rw-r--r--include/linux/compat.h4
-rw-r--r--include/linux/compiler-gcc.h1
-rw-r--r--include/linux/compiler.h8
-rw-r--r--include/linux/dax.h14
-rw-r--r--include/linux/dma-contiguous.h4
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/huge_mm.h84
-rw-r--r--include/linux/hugetlb.h12
-rw-r--r--include/linux/iomap.h3
-rw-r--r--include/linux/iopoll.h2
-rw-r--r--include/linux/kasan.h4
-rw-r--r--include/linux/kernel.h10
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/kprobes.h19
-rw-r--r--include/linux/lz4.h701
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/memory.h4
-rw-r--r--include/linux/memory_hotplug.h6
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mm.h129
-rw-r--r--include/linux/mm_inline.h7
-rw-r--r--include/linux/mmu_notifier.h14
-rw-r--r--include/linux/mmzone.h38
-rw-r--r--include/linux/pagemap.h13
-rw-r--r--include/linux/pfn_t.h18
-rw-r--r--include/linux/pid.h4
-rw-r--r--include/linux/rbtree_augmented.h4
-rw-r--r--include/linux/rmap.h52
-rw-r--r--include/linux/rodata_test.h24
-rw-r--r--include/linux/sem.h2
-rw-r--r--include/linux/shmem_fs.h11
-rw-r--r--include/linux/slab.h45
-rw-r--r--include/linux/slub_def.h4
-rw-r--r--include/linux/swap.h30
-rw-r--r--include/linux/swap_slots.h30
-rw-r--r--include/linux/trace_events.h4
-rw-r--r--include/linux/userfaultfd_k.h67
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/linux/writeback.h2
-rw-r--r--include/trace/events/compaction.h60
-rw-r--r--include/trace/events/fs_dax.h156
-rw-r--r--include/trace/events/mmflags.h98
-rw-r--r--include/trace/events/oom.h81
-rw-r--r--include/trace/events/vmscan.h150
-rw-r--r--include/trace/events/writeback.h2
-rw-r--r--include/trace/trace_events.h11
-rw-r--r--include/uapi/asm-generic/ioctl.h10
-rw-r--r--include/uapi/linux/auto_dev-ioctl.h10
-rw-r--r--include/uapi/linux/auto_fs.h25
-rw-r--r--include/uapi/linux/auto_fs4.h16
-rw-r--r--include/uapi/linux/mqueue.h2
-rw-r--r--include/uapi/linux/userfaultfd.h73
58 files changed, 1858 insertions, 357 deletions
diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h
new file mode 100644
index 000000000000..57af9f21d148
--- /dev/null
+++ b/include/asm-generic/kprobes.h
@@ -0,0 +1,25 @@
+#ifndef _ASM_GENERIC_KPROBES_H
+#define _ASM_GENERIC_KPROBES_H
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef CONFIG_KPROBES
+/*
+ * Blacklist ganerating macro. Specify functions which is not probed
+ * by using this macro.
+ */
+# define __NOKPROBE_SYMBOL(fname) \
+static unsigned long __used \
+ __attribute__((__section__("_kprobe_blacklist"))) \
+ _kbl_addr_##fname = (unsigned long)fname;
+# define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname)
+/* Use this to forbid a kprobes attach on very low level functions */
+# define __kprobes __attribute__((__section__(".kprobes.text")))
+# define nokprobe_inline __always_inline
+#else
+# define NOKPROBE_SYMBOL(fname)
+# define __kprobes
+# define nokprobe_inline inline
+#endif
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+
+#endif /* _ASM_GENERIC_KPROBES_H */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 18af2bcefe6a..a0aba0f9c57b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -36,6 +36,9 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma,
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty);
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty);
#else
static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
@@ -44,6 +47,13 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
BUILD_BUG();
return 0;
}
+static inline int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty)
+{
+ BUILD_BUG();
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
@@ -121,8 +131,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
}
#endif
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address,
pmd_t *pmdp)
@@ -131,20 +141,40 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
pmd_clear(pmdp);
return pmd;
}
+#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address,
+ pud_t *pudp)
+{
+ pud_t pud = *pudp;
+
+ pud_clear(pudp);
+ return pud;
+}
+#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#endif
-#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp,
int full)
{
return pmdp_huge_get_and_clear(mm, address, pmdp);
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp,
+ int full)
+{
+ return pudp_huge_get_and_clear(mm, address, pudp);
+}
+#endif
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
@@ -181,6 +211,9 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp);
+extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
+ unsigned long address,
+ pud_t *pudp);
#endif
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -208,6 +241,23 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp)
+{
+ pud_t old_pud = *pudp;
+
+ set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
+}
+#else
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp)
+{
+ BUILD_BUG();
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+#endif
#ifndef pmdp_collapse_flush
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -273,12 +323,23 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
return pmd_val(pmd_a) == pmd_val(pmd_b);
}
+
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return pud_val(pud_a) == pud_val(pud_b);
+}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
BUILD_BUG();
return 0;
}
+
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ BUILD_BUG();
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
@@ -640,6 +701,15 @@ static inline int pmd_write(pmd_t pmd)
#endif /* __HAVE_ARCH_PMD_WRITE */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
+ (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+static inline int pud_trans_huge(pud_t pud)
+{
+ return 0;
+}
+#endif
+
#ifndef pmd_read_atomic
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
{
@@ -785,8 +855,10 @@ static inline int pmd_clear_huge(pmd_t *pmd)
* e.g. see arch/arc: flush_pmd_tlb_range
*/
#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
+#define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
#else
#define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG()
+#define flush_pud_tlb_range(vma, addr, end) BUILD_BUG()
#endif
#endif
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 7eed8cf3130a..4329bc6ef04b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -232,6 +232,20 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
__tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
} while (0)
+/**
+ * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
+ * invalidation. This is a nop so far, because only x86 needs it.
+ */
+#ifndef __tlb_remove_pud_tlb_entry
+#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
+#endif
+
+#define tlb_remove_pud_tlb_entry(tlb, pudp, address) \
+ do { \
+ __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \
+ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \
+ } while (0)
+
/*
* For things like page tables caches (ie caching addresses "inside" the
* page tables, like x86 does), for legacy reasons, flushing an
diff --git a/include/linux/bug.h b/include/linux/bug.h
index baff2e8fc8a8..5828489309bb 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -124,18 +124,20 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
/*
* Since detected data corruption should stop operation on the affected
- * structures, this returns false if the corruption condition is found.
+ * structures. Return value must be checked and sanely acted on by caller.
*/
+static inline __must_check bool check_data_corruption(bool v) { return v; }
#define CHECK_DATA_CORRUPTION(condition, fmt, ...) \
- do { \
- if (unlikely(condition)) { \
+ check_data_corruption(({ \
+ bool corruption = unlikely(condition); \
+ if (corruption) { \
if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
pr_err(fmt, ##__VA_ARGS__); \
BUG(); \
} else \
WARN(1, fmt, ##__VA_ARGS__); \
- return false; \
} \
- } while (0)
+ corruption; \
+ }))
#endif /* _LINUX_BUG_H */
diff --git a/include/linux/cma.h b/include/linux/cma.h
index 6f0a91b37f68..03f32d0bd1d8 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -29,6 +29,7 @@ extern int __init cma_declare_contiguous(phys_addr_t base,
extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
unsigned int order_per_bit,
struct cma **res_cma);
-extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align);
+extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
+ gfp_t gfp_mask);
extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
#endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 9e40be522793..aef47be2a5c1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long);
compat_stack_t __user *__uss = uss; \
struct task_struct *t = current; \
put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \
- put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \
+ put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
put_user_ex(t->sas_ss_size, &__uss->ss_size); \
+ if (t->sas_ss_flags & SS_AUTODISARM) \
+ sas_ss_reset(t); \
} while (0);
asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cad999386d61..d5e1fedbad24 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -122,6 +122,7 @@
#define __attribute_const__ __attribute__((__const__))
#define __maybe_unused __attribute__((unused))
#define __always_unused __attribute__((unused))
+#define __mode(x) __attribute__((mode(x)))
/* gcc version specific checks */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d93221e4849f..6e8e160b1e4b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -585,12 +585,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
(_________p1); \
})
-/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
-#ifdef CONFIG_KPROBES
-# define __kprobes __attribute__((__section__(".kprobes.text")))
-# define nokprobe_inline __always_inline
-#else
-# define __kprobes
-# define nokprobe_inline inline
-#endif
#endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 2983e52efd07..c5c1bf29efeb 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,8 +38,8 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops);
-int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- const struct iomap_ops *ops);
+int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+ const struct iomap_ops *ops);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
@@ -71,21 +71,13 @@ static inline unsigned int dax_radix_order(void *entry)
return PMD_SHIFT - PAGE_SHIFT;
return 0;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops);
#else
static inline unsigned int dax_radix_order(void *entry)
{
return 0;
}
-static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmd, unsigned int flags,
- const struct iomap_ops *ops)
-{
- return VM_FAULT_FALLBACK;
-}
#endif
-int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
+int dax_pfn_mkwrite(struct vm_fault *vmf);
static inline bool vma_is_dax(struct vm_area_struct *vma)
{
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index fec734df1524..b67bf6ac907d 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -112,7 +112,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
}
struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
- unsigned int order);
+ unsigned int order, gfp_t gfp_mask);
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
int count);
@@ -145,7 +145,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size,
static inline
struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
- unsigned int order)
+ unsigned int order, gfp_t gfp_mask)
{
return NULL;
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0fe0b6295ab5..db373b9d3223 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -541,7 +541,7 @@ static inline bool pm_suspended_storage(void)
#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
/* The below functions must be run on a range from a single zone. */
extern int alloc_contig_range(unsigned long start, unsigned long end,
- unsigned migratetype);
+ unsigned migratetype, gfp_t gfp_mask);
extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
#endif
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 97e478d6b690..a3762d49ba39 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,6 +6,18 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *vma);
extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+ struct vm_area_struct *vma);
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+#else
+static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
+{
+}
+#endif
+
extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
@@ -17,6 +29,9 @@ extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
extern int zap_huge_pmd(struct mmu_gather *tlb,
struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr);
+extern int zap_huge_pud(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ pud_t *pud, unsigned long addr);
extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned char *vec);
@@ -26,13 +41,16 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
int prot_numa);
-int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
- pfn_t pfn, bool write);
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, pfn_t pfn, bool write);
+int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, pfn_t pfn, bool write);
enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
+ TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
@@ -57,13 +75,14 @@ extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, int flags);
-
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
+#define HPAGE_PUD_SHIFT PUD_SHIFT
+#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
+#define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
+
extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
#define transparent_hugepage_enabled(__vma) \
@@ -117,6 +136,17 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
bool freeze, struct page *page);
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long address);
+
+#define split_huge_pud(__vma, __pud, __address) \
+ do { \
+ pud_t *____pud = (__pud); \
+ if (pud_trans_huge(*____pud) \
+ || pud_devmap(*____pud)) \
+ __split_huge_pud(__vma, __pud, __address); \
+ } while (0)
+
extern int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice);
extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -125,6 +155,8 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
long adjust_next);
extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma);
+extern spinlock_t *__pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma);
/* mmap_sem must be held on entry */
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
@@ -135,6 +167,15 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
else
return NULL;
}
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma)
+{
+ VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
+ if (pud_trans_huge(*pud) || pud_devmap(*pud))
+ return __pud_trans_huge_lock(pud, vma);
+ else
+ return NULL;
+}
static inline int hpage_nr_pages(struct page *page)
{
if (unlikely(PageTransHuge(page)))
@@ -142,6 +183,11 @@ static inline int hpage_nr_pages(struct page *page)
return 1;
}
+struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, int flags);
+struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, int flags);
+
extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page;
@@ -156,6 +202,11 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
return is_huge_zero_page(pmd_page(pmd));
}
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+ return false;
+}
+
struct page *mm_get_huge_zero_page(struct mm_struct *mm);
void mm_put_huge_zero_page(struct mm_struct *mm);
@@ -166,6 +217,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
+
#define hpage_nr_pages(x) 1
#define transparent_hugepage_enabled(__vma) 0
@@ -194,6 +249,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
unsigned long address, bool freeze, struct page *page) {}
+#define split_huge_pud(__vma, __pmd, __address) \
+ do { } while (0)
+
static inline int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
@@ -211,6 +269,11 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
{
return NULL;
}
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+ struct vm_area_struct *vma)
+{
+ return NULL;
+}
static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
{
@@ -222,6 +285,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+ return false;
+}
+
static inline void mm_put_huge_zero_page(struct mm_struct *mm)
{
return;
@@ -232,6 +300,12 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
{
return NULL;
}
+
+static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud, int flags)
+{
+ return NULL;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 48c76d612d40..503099d8aada 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -65,7 +65,8 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
- unsigned long *, unsigned long *, long, unsigned int);
+ unsigned long *, unsigned long *, long, unsigned int,
+ int *);
void unmap_hugepage_range(struct vm_area_struct *,
unsigned long, unsigned long, struct page *);
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
@@ -81,6 +82,11 @@ void hugetlb_show_meminfo(void);
unsigned long hugetlb_total_pages(void);
int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
+int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep);
int hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma,
vm_flags_t vm_flags);
@@ -131,7 +137,7 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}
-#define follow_hugetlb_page(m,v,p,vs,a,b,i,w) ({ BUG(); 0; })
+#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
static inline void hugetlb_report_meminfo(struct seq_file *m)
@@ -149,6 +155,8 @@ static inline void hugetlb_show_meminfo(void)
#define is_hugepage_only_range(mm, addr, len) 0
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
+#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+ src_addr, pagep) ({ BUG(); 0; })
#define huge_pte_offset(mm, address) 0
static inline int dequeue_hwpoisoned_huge_page(struct page *page)
{
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 891459caa278..7291810067eb 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -79,8 +79,7 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
bool *did_zero, const struct iomap_ops *ops);
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops);
-int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
- const struct iomap_ops *ops);
+int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len, const struct iomap_ops *ops);
diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 1c30014ed176..d29e1e21bf3f 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -17,7 +17,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/hrtimer.h>
+#include <linux/ktime.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/io.h>
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 820c0ad54a01..c908b25bf5a5 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order);
void kasan_cache_create(struct kmem_cache *cache, size_t *size,
unsigned long *flags);
void kasan_cache_shrink(struct kmem_cache *cache);
-void kasan_cache_destroy(struct kmem_cache *cache);
+void kasan_cache_shutdown(struct kmem_cache *cache);
void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
size_t *size,
unsigned long *flags) {}
static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
-static inline void kasan_cache_destroy(struct kmem_cache *cache) {}
+static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index cb09238f6d32..4c26dc3a8295 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -100,16 +100,18 @@
)
/*
- * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors and
- * for negative dividends if the divisor variable type is unsigned.
+ * Divide positive or negative dividend by positive or negative divisor
+ * and round to closest integer. Result is undefined for negative
+ * divisors if he dividend variable type is unsigned and for negative
+ * dividends if the divisor variable type is unsigned.
*/
#define DIV_ROUND_CLOSEST(x, divisor)( \
{ \
typeof(x) __x = x; \
typeof(divisor) __d = divisor; \
(((typeof(x))-1) > 0 || \
- ((typeof(divisor))-1) > 0 || (__x) > 0) ? \
+ ((typeof(divisor))-1) > 0 || \
+ (((__x) > 0) == ((__d) > 0))) ? \
(((__x) + ((__d) / 2)) / (__d)) : \
(((__x) - ((__d) / 2)) / (__d)); \
} \
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d419d0e51fe5..e98e546b543c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -283,6 +283,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
#define VMCOREINFO_CONFIG(name) \
vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+#define VMCOREINFO_PHYS_BASE(value) \
+ vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 16ddfb8b304a..c328e4f7dcad 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -29,7 +29,7 @@
* <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
* <prasanna@in.ibm.com> added function-return probes.
*/
-#include <linux/compiler.h> /* for __kprobes */
+#include <linux/compiler.h>
#include <linux/linkage.h>
#include <linux/list.h>
#include <linux/notifier.h>
@@ -40,9 +40,9 @@
#include <linux/rcupdate.h>
#include <linux/mutex.h>
#include <linux/ftrace.h>
+#include <asm/kprobes.h>
#ifdef CONFIG_KPROBES
-#include <asm/kprobes.h>
/* kprobe_status settings */
#define KPROBE_HIT_ACTIVE 0x00000001
@@ -51,6 +51,7 @@
#define KPROBE_HIT_SSDONE 0x00000008
#else /* CONFIG_KPROBES */
+#include <asm-generic/kprobes.h>
typedef int kprobe_opcode_t;
struct arch_specific_insn {
int dummy;
@@ -509,18 +510,4 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
}
#endif
-#ifdef CONFIG_KPROBES
-/*
- * Blacklist ganerating macro. Specify functions which is not probed
- * by using this macro.
- */
-#define __NOKPROBE_SYMBOL(fname) \
-static unsigned long __used \
- __attribute__((section("_kprobe_blacklist"))) \
- _kbl_addr_##fname = (unsigned long)fname;
-#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname)
-#else
-#define NOKPROBE_SYMBOL(fname)
-#endif
-
#endif /* _LINUX_KPROBES_H */
diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c59f321..a3912d7984b5 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,648 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
*
* Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
*/
-#define LZ4_MEM_COMPRESS (16384)
-#define LZ4HC_MEM_COMPRESS (262144 + (2 * sizeof(unsigned char *)))
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h> /* memset, memcpy */
+
+/*-************************************************************************
+ * CONSTANTS
+ **************************************************************************/
/*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
*/
-static inline size_t lz4_compressbound(size_t isize)
-{
- return isize + (isize / 255) + 16;
-}
+#define LZ4_MEMORY_USAGE 14
+
+#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize) (\
+ (unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+ ? 0 \
+ : (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL 3
+#define LZ4HC_DEFAULT_CLEVEL 9
+#define LZ4HC_MAX_CLEVEL 16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE - 1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ * STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE 262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64 4
+#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * \
+ sizeof(unsigned long long))
/*
- * lz4_compress()
- * src : source address of the original data
- * src_len : size of the original data
- * dst : output buffer address of the compressed data
- * This requires 'dst' of size LZ4_COMPRESSBOUND.
- * dst_len : is the output size, which is returned after compress done
- * workmem : address of the working memory.
- * This requires 'workmem' of size LZ4_MEM_COMPRESS.
- * return : Success if return 0
- * Error if return (< 0)
- * note : Destination buffer and workmem must be already allocated with
- * the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len,
- unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
- * lz4hc_compress()
- * src : source address of the original data
- * src_len : size of the original data
- * dst : output buffer address of the compressed data
- * This requires 'dst' of size LZ4_COMPRESSBOUND.
- * dst_len : is the output size, which is returned after compress done
- * workmem : address of the working memory.
- * This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
- * return : Success if return 0
- * Error if return (< 0)
- * note : Destination buffer and workmem must be already allocated with
- * the defined size.
- */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
- unsigned char *dst, size_t *dst_len, void *wrkmem);
+ * LZ4_stream_t - information structure to track an LZ4 stream.
+ */
+typedef struct {
+ uint32_t hashTable[LZ4_HASH_SIZE_U32];
+ uint32_t currentOffset;
+ uint32_t initCheck;
+ const uint8_t *dictionary;
+ uint8_t *bufferStart;
+ uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+ unsigned long long table[LZ4_STREAMSIZE_U64];
+ LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
/*
- * lz4_decompress()
- * src : source address of the compressed data
- * src_len : is the input size, whcih is returned after decompress done
- * dest : output buffer address of the decompressed data
- * actual_dest_len: is the size of uncompressed data, supposing it's known
- * return : Success if return 0
- * Error if return (< 0)
- * note : Destination buffer must be already allocated.
- * slightly faster than lz4_decompress_unknownoutputsize()
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
- unsigned char *dest, size_t actual_dest_len);
+ * LZ4_streamHC_t - information structure to track an LZ4HC stream.
+ */
+typedef struct {
+ unsigned int hashTable[LZ4HC_HASHTABLESIZE];
+ unsigned short chainTable[LZ4HC_MAXD];
+ /* next block to continue on current prefix */
+ const unsigned char *end;
+ /* All index relative to this position */
+ const unsigned char *base;
+ /* alternate base for extDict */
+ const unsigned char *dictBase;
+ /* below that point, need extDict */
+ unsigned int dictLimit;
+ /* below that point, no more dict */
+ unsigned int lowLimit;
+ /* index from which to continue dict update */
+ unsigned int nextToUpdate;
+ unsigned int compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+ size_t table[LZ4_STREAMHCSIZE_SIZET];
+ LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
/*
- * lz4_decompress_unknownoutputsize()
- * src : source address of the compressed data
- * src_len : is the input size, therefore the compressed size
- * dest : output buffer address of the decompressed data
- * dest_len: is the max size of the destination buffer, which is
- * returned with actual size of decompressed data after
- * decompress done
- * return : Success if return 0
- * Error if return (< 0)
- * note : Destination buffer must be already allocated.
- */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
- unsigned char *dest, size_t *dest_len);
+ * LZ4_streamDecode_t - information structure to track an
+ * LZ4 stream during decompression.
+ *
+ * init this structure using LZ4_setStreamDecode (or memset()) before first use
+ */
+typedef struct {
+ const uint8_t *externalDict;
+ size_t extDictSize;
+ const uint8_t *prefixEnd;
+ size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+ unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+ LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ * SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ * Compression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_compressBound() - Max. output size in worst case szenarios
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ * (data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
+{
+ return LZ4_COMPRESSBOUND(isize);
+}
+
+/**
+ * LZ4_compress_default() - Compress data from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ * which must be already allocated
+ * @wrkmem: address of the working memory.
+ * This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Compresses 'sourceSize' bytes from buffer 'source'
+ * into already allocated 'dest' buffer of size 'maxOutputSize'.
+ * Compression is guaranteed to succeed if
+ * 'maxOutputSize' >= LZ4_compressBound(inputSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'source' into a more limited 'dest' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * As a consequence, 'dest' content is not valid.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ * (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+ int maxOutputSize, void *wrkmem);
+
+/**
+ * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ * which must be already allocated
+ * @acceleration: acceleration factor
+ * @wrkmem: address of the working memory.
+ * This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Same as LZ4_compress_default(), but allows to select an "acceleration"
+ * factor. The larger the acceleration value, the faster the algorithm,
+ * but also the lesser the compression. It's a trade-off. It can be fine tuned,
+ * with each successive value providing roughly +~3% to speed.
+ * An acceleration value of "1" is the same as regular LZ4_compress_default()
+ * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ * (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+ int maxOutputSize, int acceleration, void *wrkmem);
+
+/**
+ * LZ4_compress_destSize() - Compress as much data as possible
+ * from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @sourceSizePtr: will be modified to indicate how many bytes where read
+ * from 'source' to fill 'dest'. New value is necessarily <= old value.
+ * @targetDestSize: Size of buffer 'dest' which must be already allocated
+ * @wrkmem: address of the working memory.
+ * This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * possible from 'source'.
+ *
+ * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize)
+ * or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+ int targetDestSize, void *wrkmem);
+
+/*-************************************************************************
+ * Decompression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * Decompresses data from 'source' into 'dest'.
+ * This function fully respect memory boundaries for properly formed
+ * compressed data.
+ * It is a bit faster than LZ4_decompress_safe().
+ * However, it does not provide any protection against intentionally
+ * modified data stream (malicious input).
+ * Use this function in trusted environment only
+ * (data to decode comes from a trusted source).
+ *
+ * Return: number of bytes read from the source buffer
+ * or a negative result if decompression fails.
+ */
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe() - Decompression protected against buffer overflow
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * Decompresses data fom 'source' into 'dest'.
+ * If the source stream is detected malformed, the function will
+ * stop decoding and return a negative result.
+ * This function is protected against buffer overflow exploits,
+ * including malicious data packets. It never writes outside output buffer,
+ * nor reads outside input buffer.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+ int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize'
+ * at position 'source' into buffer 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the decompressed data which must be
+ * already allocated
+ * @compressedSize: is the precise full size of the compressed block.
+ * @targetOutputSize: the decompression operation will try
+ * to stop as soon as 'targetOutputSize' has been reached
+ * @maxDecompressedSize: is the size of destination buffer
+ *
+ * This function decompresses a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ * This function never writes outside of output buffer,
+ * and never reads outside of input buffer.
+ * It is therefore protected against malicious data packets.
+ *
+ * Return: the number of bytes decoded in the destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ *
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+ int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+/*-************************************************************************
+ * LZ4 HC Compression
+ **************************************************************************/
+
+/**
+ * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @dstCapacity: full or partial size of buffer 'dst',
+ * which must be already allocated
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ * value between 1 and LZ4HC_MAX_CLEVEL will work.
+ * Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ * @wrkmem: address of the working memory.
+ * This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Compress data from 'src' into 'dst', using the more powerful
+ * but slower "HC" algorithm. Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ *
+ * Return : the number of bytes written into 'dst' or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+ int compressionLevel, void *wrkmem);
+
+/**
+ * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
+ * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ * value between 1 and LZ4HC_MAX_CLEVEL will work.
+ * Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *
+ * An LZ4_streamHC_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_streamHC_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+
+/**
+ * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC
+ * @streamHCPtr: pointer to the LZ4HC_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4HC_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+ int dictSize);
+
+/**
+ * LZ4_compress_HC_continue() - Compress 'src' using data from previously
+ * compressed blocks as a dictionary using the HC algorithm
+ * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ * which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ * which must be already allocated
+ *
+ * These functions compress data in successive blocks of any size, using
+ * previous blocks as dictionary. One key assumption is that previous
+ * blocks (up to 64 KB) remain read-accessible while
+ * compressing next blocks. There is an exception for ring buffers,
+ * which can be smaller than 64 KB.
+ * Ring buffers scenario is automatically detected and handled by
+ * LZ4_compress_HC_continue().
+ * Before starting compression, state must be properly initialized,
+ * using LZ4_resetStreamHC().
+ * A first "fictional block" can then be designated as
+ * initial dictionary, using LZ4_loadDictHC() (Optional).
+ * Then, use LZ4_compress_HC_continue()
+ * to compress each successive block. Previous memory blocks
+ * (including initial dictionary when present) must remain accessible
+ * and unmodified during compression.
+ * 'dst' buffer should be sized to handle worst case scenarios, using
+ * LZ4_compressBound(), to ensure operation success.
+ * If, for any reason, previous data blocks can't be preserved unmodified
+ * in memory during next compression block,
+ * you must save it to a safer memory space, using LZ4_saveDictHC().
+ * Return value of LZ4_saveDictHC() is the size of dictionary
+ * effectively saved into 'safeBuffer'.
+ *
+ * Return: Number of bytes written into buffer 'dst' or 0 if compression fails
+ */
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+ char *dst, int srcSize, int maxDstSize);
+
+/**
+ * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream
+ * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @maxDictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDictHC() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_HC_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= maxDictSize),
+ * or 0 if error.
+ */
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+ int maxDictSize);
+
+/*-*********************************************
+ * Streaming Compression Functions
+ ***********************************************/
+
+/**
+ * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure
+ * @LZ4_stream: pointer to the 'LZ4_stream_t' structure
+ *
+ * An LZ4_stream_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_stream_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/**
+ * LZ4_loadDict() - Load a static dictionary into LZ4_stream
+ * @streamPtr: pointer to the LZ4_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+ int dictSize);
+
+/**
+ * LZ4_saveDict() - Save static dictionary from LZ4_stream
+ * @streamPtr: pointer to the 'LZ4_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @dictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_fast_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= dictSize),
+ * or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/**
+ * LZ4_compress_fast_continue() - Compress 'src' using data from previously
+ * compressed blocks as a dictionary
+ * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ * which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ * which must be already allocated
+ * @acceleration: acceleration factor
+ *
+ * Compress buffer content 'src', using data from previously compressed blocks
+ * as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still
+ * be present and unmodified !
+ * If maxDstSize >= LZ4_compressBound(srcSize),
+ * compression is guaranteed to succeed, and runs faster.
+ *
+ * Return: Number of bytes written into buffer 'dst' or 0 if compression fails
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+ char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/**
+ * LZ4_setStreamDecode() - Instruct where to find dictionary
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @dictionary: dictionary to use
+ * @dictSize: size of dictionary
+ *
+ * Use this function to instruct where to find the dictionary.
+ * Setting a size of 0 is allowed (same effect as reset).
+ *
+ * Return: 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *dictionary, int dictSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ * - Exactly same size as encoding buffer, with same update rule
+ * (block boundaries at same positions) In which case,
+ * the decoding & encoding ring buffer can have any size,
+ * including very small ones ( < 64 KB).
+ * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ * maxBlockSize is implementation dependent.
+ * It's the maximum size you intend to compress into a single block.
+ * In which case, encoding and decoding buffers do not need
+ * to be synchronized, and encoding ring buffer can have any size,
+ * including small ones ( < 64 KB).
+ * - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ * In which case, encoding and decoding buffers do not need to be
+ * synchronized, and encoding ring buffer can have any size,
+ * including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *source, char *dest, int compressedSize,
+ int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ * - Exactly same size as encoding buffer, with same update rule
+ * (block boundaries at same positions) In which case,
+ * the decoding & encoding ring buffer can have any size,
+ * including very small ones ( < 64 KB).
+ * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ * maxBlockSize is implementation dependent.
+ * It's the maximum size you intend to compress into a single block.
+ * In which case, encoding and decoding buffers do not need
+ * to be synchronized, and encoding ring buffer can have any size,
+ * including small ones ( < 64 KB).
+ * - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ * In which case, encoding and decoding buffers do not need to be
+ * synchronized, and encoding ring buffer can have any size,
+ * including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+ const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode()
+ * followed by LZ4_decompress_safe_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+ int compressedSize, int maxDecompressedSize, const char *dictStart,
+ int dictSize);
+
+/**
+ * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode()
+ * followed by LZ4_decompress_fast_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ * which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ * (necessarily <= maxDecompressedSize)
+ * or a negative result in case of error
+ */
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+ int originalSize, const char *dictStart, int dictSize);
+
#endif
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5b759c9acf97..bdfc65af4152 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,6 +42,7 @@ struct memblock_type {
unsigned long max; /* size of the allocated array */
phys_addr_t total_size; /* size of all regions */
struct memblock_region *regions;
+ char *name;
};
struct memblock {
@@ -203,6 +204,7 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
unsigned long *end_pfn);
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
unsigned long *out_end_pfn, int *out_nid);
+unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
/**
* for_each_mem_pfn_range - early memory pfn range iterator
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 254698856b8f..5af377303880 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -253,6 +253,7 @@ struct mem_cgroup {
/* Index in the kmem_cache->memcg_params.memcg_caches array */
int kmemcg_id;
enum memcg_kmem_state kmem_state;
+ struct list_head kmem_caches;
#endif
int last_scanned_node;
@@ -829,6 +830,7 @@ void memcg_kmem_uncharge(struct page *page, int order);
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
extern struct static_key_false memcg_kmem_enabled_key;
+extern struct workqueue_struct *memcg_kmem_cache_wq;
extern int memcg_nr_cache_ids;
void memcg_get_cache_ids(void);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 093607f90b91..301dfb03ecb7 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -108,12 +108,12 @@ extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
extern int register_memory_isolate_notifier(struct notifier_block *nb);
extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
-extern int register_new_memory(int, struct mem_section *);
+extern int register_new_memory(struct zone *, int, struct mem_section *);
extern int memory_block_change_state(struct memory_block *mem,
unsigned long to_state,
unsigned long from_state_req);
#ifdef CONFIG_MEMORY_HOTREMOVE
-extern int unregister_memory_section(struct mem_section *);
+extern int unregister_memory_section(struct zone *, struct mem_section *);
#endif
extern int memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 134a2f69c21a..2f2c0d1290a1 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -280,8 +280,10 @@ extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size);
-extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
-extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
+extern int sparse_add_section(struct zone *zone, unsigned long pfn,
+ unsigned long nr_pages);
+extern void sparse_remove_section(struct zone *zone, struct mem_section *ms,
+ unsigned long pfn, unsigned long nr_pages,
unsigned long map_offset);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ae8d475a9385..fa76b516fa47 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -37,7 +37,7 @@ extern int migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
unsigned long private, enum migrate_mode mode, int reason);
-extern bool isolate_movable_page(struct page *page, isolate_mode_t mode);
+extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
extern int migrate_prep(void);
@@ -56,6 +56,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
free_page_t free, unsigned long private, enum migrate_mode mode,
int reason)
{ return -ENOSYS; }
+static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
+ { return -EBUSY; }
static inline int migrate_prep(void) { return -ENOSYS; }
static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6ff66d6fe8e2..7b11431124c3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -285,6 +285,17 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */
+#define FAULT_FLAG_TRACE \
+ { FAULT_FLAG_WRITE, "WRITE" }, \
+ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \
+ { FAULT_FLAG_ALLOW_RETRY, "ALLOW_RETRY" }, \
+ { FAULT_FLAG_RETRY_NOWAIT, "RETRY_NOWAIT" }, \
+ { FAULT_FLAG_KILLABLE, "KILLABLE" }, \
+ { FAULT_FLAG_TRIED, "TRIED" }, \
+ { FAULT_FLAG_USER, "USER" }, \
+ { FAULT_FLAG_REMOTE, "REMOTE" }, \
+ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }
+
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
* ->fault function. The vma's ->fault is responsible for returning a bitmask
@@ -303,6 +314,9 @@ struct vm_fault {
unsigned long address; /* Faulting virtual address */
pmd_t *pmd; /* Pointer to pmd entry matching
* the 'address' */
+ pud_t *pud; /* Pointer to pud entry matching
+ * the 'address'
+ */
pte_t orig_pte; /* Value of PTE at the time of fault */
struct page *cow_page; /* Page handler may use for COW fault */
@@ -330,6 +344,13 @@ struct vm_fault {
*/
};
+/* page entry size for vm->huge_fault() */
+enum page_entry_size {
+ PE_SIZE_PTE = 0,
+ PE_SIZE_PMD,
+ PE_SIZE_PUD,
+};
+
/*
* These are the virtual MM functions - opening of an area, closing and
* unmapping it (needed to keep files on disk up-to-date etc), pointer
@@ -339,18 +360,17 @@ struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
int (*mremap)(struct vm_area_struct * area);
- int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
- int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
- pmd_t *, unsigned int flags);
+ int (*fault)(struct vm_fault *vmf);
+ int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
void (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
- int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ int (*page_mkwrite)(struct vm_fault *vmf);
/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
- int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ int (*pfn_mkwrite)(struct vm_fault *vmf);
/* called by access_process_vm when get_user_pages() fails, typically
* for use by special VMAs that can switch between memory and hardware
@@ -406,6 +426,10 @@ static inline int pmd_devmap(pmd_t pmd)
{
return 0;
}
+static inline int pud_devmap(pud_t pud)
+{
+ return 0;
+}
#endif
/*
@@ -1111,6 +1135,20 @@ static inline void clear_page_pfmemalloc(struct page *page)
VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
VM_FAULT_FALLBACK)
+#define VM_FAULT_RESULT_TRACE \
+ { VM_FAULT_OOM, "OOM" }, \
+ { VM_FAULT_SIGBUS, "SIGBUS" }, \
+ { VM_FAULT_MAJOR, "MAJOR" }, \
+ { VM_FAULT_WRITE, "WRITE" }, \
+ { VM_FAULT_HWPOISON, "HWPOISON" }, \
+ { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \
+ { VM_FAULT_SIGSEGV, "SIGSEGV" }, \
+ { VM_FAULT_NOPAGE, "NOPAGE" }, \
+ { VM_FAULT_LOCKED, "LOCKED" }, \
+ { VM_FAULT_RETRY, "RETRY" }, \
+ { VM_FAULT_FALLBACK, "FALLBACK" }, \
+ { VM_FAULT_DONE_COW, "DONE_COW" }
+
/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
@@ -1128,8 +1166,7 @@ extern void pagefault_out_of_memory(void);
*/
#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
-extern void show_free_areas(unsigned int flags);
-extern bool skip_free_areas_node(unsigned int flags, int nid);
+extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
int shmem_zero_setup(struct vm_area_struct *);
#ifdef CONFIG_SHMEM
@@ -1152,8 +1189,6 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
- bool ignore_dirty; /* Ignore dirty pages */
- bool check_swap_entries; /* Check also swap entries */
};
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1164,12 +1199,16 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
- unsigned long size, struct zap_details *);
+ unsigned long size);
void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long start, unsigned long end);
/**
* mm_walk - callbacks for walk_page_range
+ * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ * this handler should only handle pud_trans_huge() puds.
+ * the pmd_entry or pte_entry callbacks will be used for
+ * regular PUDs.
* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
@@ -1189,6 +1228,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
* (see the comment on walk_page_range() for more details)
*/
struct mm_walk {
+ int (*pud_entry)(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1359,6 +1400,16 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
return !vma->vm_ops;
}
+#ifdef CONFIG_SHMEM
+/*
+ * The vma_is_shmem is not inline because it is used only by slow
+ * paths in userfault.
+ */
+bool vma_is_shmem(struct vm_area_struct *vma);
+#else
+static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+#endif
+
static inline int stack_guard_page_start(struct vm_area_struct *vma,
unsigned long addr)
{
@@ -1762,8 +1813,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
return ptl;
}
-extern void __init pagecache_init(void);
+/*
+ * No scalability reason to split PUD locks yet, but follow the same pattern
+ * as the PMD locks to make it easier if we decide to. The VM should not be
+ * considered ready to switch to split PUD locks yet; there may be places
+ * which need to be converted from page_table_lock.
+ */
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+ return &mm->page_table_lock;
+}
+static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
+{
+ spinlock_t *ptl = pud_lockptr(mm, pud);
+
+ spin_lock(ptl);
+ return ptl;
+}
+
+extern void __init pagecache_init(void);
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
@@ -1900,7 +1969,7 @@ extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);
-extern void show_mem(unsigned int flags);
+extern void show_mem(unsigned int flags, nodemask_t *nodemask);
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
@@ -1908,8 +1977,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
extern unsigned long arch_reserved_kernel_pages(void);
#endif
-extern __printf(2, 3)
-void warn_alloc(gfp_t gfp_mask, const char *fmt, ...);
+extern __printf(3, 4)
+void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
extern void setup_per_cpu_pageset(void);
@@ -1972,8 +2041,10 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *,
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
struct mempolicy *, struct vm_userfaultfd_ctx);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
-extern int split_vma(struct mm_struct *,
- struct vm_area_struct *, unsigned long addr, int new_below);
+extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
+ unsigned long addr, int new_below);
+extern int split_vma(struct mm_struct *, struct vm_area_struct *,
+ unsigned long addr, int new_below);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *);
@@ -2021,18 +2092,22 @@ extern int install_special_mapping(struct mm_struct *mm,
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
extern unsigned long mmap_region(struct file *file, unsigned long addr,
- unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
+ unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
+ struct list_head *uf);
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
-extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+ vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf);
+extern int do_munmap(struct mm_struct *, unsigned long, size_t,
+ struct list_head *uf);
static inline unsigned long
do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate)
+ unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf)
{
- return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
+ return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
}
#ifdef CONFIG_MMU
@@ -2049,6 +2124,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {}
/* These take the mm semaphore themselves */
extern int __must_check vm_brk(unsigned long, unsigned long);
+extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t);
extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
unsigned long, unsigned long,
@@ -2092,10 +2168,10 @@ extern void truncate_inode_pages_range(struct address_space *,
extern void truncate_inode_pages_final(struct address_space *);
/* generic vm_area_ops exported for stackable file systems */
-extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
+extern int filemap_fault(struct vm_fault *vmf);
extern void filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
-extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int filemap_page_mkwrite(struct vm_fault *vmf);
/* mm/page-writeback.c */
int write_one_page(struct page *page, int wait);
@@ -2317,7 +2393,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
unsigned long map_count,
int nodeid);
-struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
+struct page *__populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
@@ -2400,6 +2477,10 @@ extern void clear_huge_page(struct page *page,
extern void copy_user_huge_page(struct page *dst, struct page *src,
unsigned long addr, struct vm_area_struct *vma,
unsigned int pages_per_huge_page);
+extern long copy_huge_page_from_user(struct page *dst_page,
+ const void __user *usr_src,
+ unsigned int pages_per_huge_page,
+ bool allow_pagefault);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
extern struct page_ext_operations debug_guardpage_ops;
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 41d376e7116d..e030a68ead7e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -50,6 +50,13 @@ static __always_inline void add_page_to_lru_list(struct page *page,
list_add(&page->lru, &lruvec->lists[lru]);
}
+static __always_inline void add_page_to_lru_list_tail(struct page *page,
+ struct lruvec *lruvec, enum lru_list lru)
+{
+ update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
+ list_add_tail(&page->lru, &lruvec->lists[lru]);
+}
+
static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index a1a210d59961..51891fb0d3ce 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -381,6 +381,19 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
___pmd; \
})
+#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \
+({ \
+ unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \
+ struct mm_struct *___mm = (__vma)->vm_mm; \
+ pud_t ___pud; \
+ \
+ ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \
+ mmu_notifier_invalidate_range(___mm, ___haddr, \
+ ___haddr + HPAGE_PUD_SIZE); \
+ \
+ ___pud; \
+})
+
#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \
({ \
unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
@@ -475,6 +488,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define pmdp_clear_young_notify pmdp_test_and_clear_young
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
+#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
#define set_pte_at_notify set_pte_at
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f4aac87adcc3..338a786a993f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -236,8 +236,6 @@ struct lruvec {
#define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
#define LRU_ALL ((1 << NR_LRU_LISTS) - 1)
-/* Isolate clean file */
-#define ISOLATE_CLEAN ((__force isolate_mode_t)0x1)
/* Isolate unmapped file */
#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
/* Isolate for asynchronous migration */
@@ -779,7 +777,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
#endif
}
-extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru);
+extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
#ifdef CONFIG_HAVE_MEMORY_PRESENT
void memory_present(int nid, unsigned long start, unsigned long end);
@@ -1052,6 +1050,8 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
* PFN_SECTION_SHIFT pfn to/from section number
*/
#define PA_SECTION_SHIFT (SECTION_SIZE_BITS)
+#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
+#define PA_SECTION_MASK (~(PA_SECTION_SIZE-1))
#define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT)
#define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT)
@@ -1066,12 +1066,27 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
#error Allocator MAX_ORDER exceeds SECTION_SIZE
#endif
-#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
-#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+#define pfn_to_section_nr(pfn) ((unsigned long)(pfn) >> PFN_SECTION_SHIFT)
+#define section_nr_to_pfn(sec) ((unsigned long)(sec) << PFN_SECTION_SHIFT)
#define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
+#define SECTION_ACTIVE_SIZE ((1UL << SECTION_SIZE_BITS) / BITS_PER_LONG)
+#define SECTION_ACTIVE_MASK (~(SECTION_ACTIVE_SIZE - 1))
+
+struct mem_section_usage {
+ /*
+ * SECTION_ACTIVE_SIZE portions of the section that are populated in
+ * the memmap
+ */
+ unsigned long map_active;
+ /* See declaration of similar field in struct zone */
+ unsigned long pageblock_flags[0];
+};
+
+void __init section_active_init(unsigned long pfn, unsigned long nr_pages);
+
struct page;
struct page_ext;
struct mem_section {
@@ -1089,8 +1104,7 @@ struct mem_section {
*/
unsigned long section_mem_map;
- /* See declaration of similar field in struct zone */
- unsigned long *pageblock_flags;
+ struct mem_section_usage *usage;
#ifdef CONFIG_PAGE_EXTENSION
/*
* If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
@@ -1121,6 +1135,11 @@ extern struct mem_section *mem_section[NR_SECTION_ROOTS];
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif
+static inline unsigned long *section_to_usemap(struct mem_section *ms)
+{
+ return ms->usage->pageblock_flags;
+}
+
static inline struct mem_section *__nr_to_section(unsigned long nr)
{
if (!mem_section[SECTION_NR_TO_ROOT(nr)])
@@ -1209,6 +1228,11 @@ void sparse_init(void);
#else
#define sparse_init() do {} while (0)
#define sparse_index_init(_sec, _nid) do {} while (0)
+static inline void section_active_init(unsigned long pfn,
+ unsigned long nr_pages)
+{
+}
+#define section_active_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
/*
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 324c8dbad1e1..84943e8057ef 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -266,7 +266,6 @@ static inline struct page *find_get_page_flags(struct address_space *mapping,
/**
* find_lock_page - locate, pin and lock a pagecache page
- * pagecache_get_page - find and get a page reference
* @mapping: the address_space to search
* @offset: the page index
*
@@ -482,19 +481,11 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
}
/*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback,
- * and for filesystems which need to wait on PG_private.
+ * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
+ * and should not be used directly.
*/
extern void wait_on_page_bit(struct page *page, int bit_nr);
extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
-extern void wake_up_page_bit(struct page *page, int bit_nr);
-
-static inline void wake_up_page(struct page *page, int bit)
-{
- if (!PageWaiters(page))
- return;
- wake_up_page_bit(page, bit);
-}
/*
* Wait for a page to be unlocked.
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index a3d90b9da18d..a49b3259cad7 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -15,6 +15,12 @@
#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
+#define PFN_FLAGS_TRACE \
+ { PFN_SG_CHAIN, "SG_CHAIN" }, \
+ { PFN_SG_LAST, "SG_LAST" }, \
+ { PFN_DEV, "DEV" }, \
+ { PFN_MAP, "MAP" }
+
static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
{
pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
@@ -84,6 +90,13 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot)
{
return pfn_pmd(pfn_t_to_pfn(pfn), pgprot);
}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
+{
+ return pfn_pud(pfn_t_to_pfn(pfn), pgprot);
+}
+#endif
#endif
#ifdef __HAVE_ARCH_PTE_DEVMAP
@@ -100,5 +113,10 @@ static inline bool pfn_t_devmap(pfn_t pfn)
}
pte_t pte_mkdevmap(pte_t pte);
pmd_t pmd_mkdevmap(pmd_t pmd);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+pud_t pud_mkdevmap(pud_t pud);
#endif
+#endif /* __HAVE_ARCH_PTE_DEVMAP */
+
#endif /* _LINUX_PFN_T_H_ */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 23705a53abba..298ead5512e5 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -191,10 +191,10 @@ pid_t pid_vnr(struct pid *pid);
#define do_each_pid_thread(pid, type, task) \
do_each_pid_task(pid, type, task) { \
struct task_struct *tg___ = task; \
- do {
+ for_each_thread(tg___, task) {
#define while_each_pid_thread(pid, type, task) \
- } while_each_thread(tg___, task); \
+ } \
task = tg___; \
} while_each_pid_task(pid, type, task)
#endif /* _LINUX_PID_H */
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index d076183e49be..9702b6e183bc 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -90,7 +90,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
old->rbaugmented = rbcompute(old); \
} \
rbstatic const struct rb_augment_callbacks rbname = { \
- rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
+ .propagate = rbname ## _propagate, \
+ .copy = rbname ## _copy, \
+ .rotate = rbname ## _rotate \
};
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 15321fb1df6b..8c89e902df3e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/rwsem.h>
#include <linux/memcontrol.h>
+#include <linux/highmem.h>
/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -196,41 +197,30 @@ int page_referenced(struct page *, int is_locked,
int try_to_unmap(struct page *, enum ttu_flags flags);
-/*
- * Used by uprobes to replace a userspace page safely
- */
-pte_t *__page_check_address(struct page *, struct mm_struct *,
- unsigned long, spinlock_t **, int);
-
-static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm,
- unsigned long address,
- spinlock_t **ptlp, int sync)
-{
- pte_t *ptep;
+/* Avoid racy checks */
+#define PVMW_SYNC (1 << 0)
+/* Look for migarion entries rather than present PTEs */
+#define PVMW_MIGRATION (1 << 1)
- __cond_lock(*ptlp, ptep = __page_check_address(page, mm, address,
- ptlp, sync));
- return ptep;
-}
+struct page_vma_mapped_walk {
+ struct page *page;
+ struct vm_area_struct *vma;
+ unsigned long address;
+ pmd_t *pmd;
+ pte_t *pte;
+ spinlock_t *ptl;
+ unsigned int flags;
+};
-/*
- * Used by idle page tracking to check if a page was referenced via page
- * tables.
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
- unsigned long address, pmd_t **pmdp,
- pte_t **ptep, spinlock_t **ptlp);
-#else
-static inline bool page_check_address_transhuge(struct page *page,
- struct mm_struct *mm, unsigned long address,
- pmd_t **pmdp, pte_t **ptep, spinlock_t **ptlp)
+static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
{
- *ptep = page_check_address(page, mm, address, ptlp, 0);
- *pmdp = NULL;
- return !!*ptep;
+ if (pvmw->pte)
+ pte_unmap(pvmw->pte);
+ if (pvmw->ptl)
+ spin_unlock(pvmw->ptl);
}
-#endif
+
+bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
/*
* Used by swapoff to help locate where page is expected in vma.
diff --git a/include/linux/rodata_test.h b/include/linux/rodata_test.h
new file mode 100644
index 000000000000..562537f85a28
--- /dev/null
+++ b/include/linux/rodata_test.h
@@ -0,0 +1,24 @@
+/*
+ * rodata_test.h: functional test for mark_rodata_ro function
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifndef _RODATA_TEST_H
+#define _RODATA_TEST_H
+
+#ifdef CONFIG_DEBUG_RODATA_TEST
+extern const int rodata_test_data;
+void rodata_test(void);
+#else
+static inline void rodata_test(void) {}
+#endif
+
+#endif /* _RODATA_TEST_H */
+
diff --git a/include/linux/sem.h b/include/linux/sem.h
index d0efd6e6c20a..4fc222f8755d 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -21,7 +21,7 @@ struct sem_array {
struct list_head list_id; /* undo requests on this array */
int sem_nsems; /* no. of semaphores in array */
int complex_count; /* pending complex operations */
- bool complex_mode; /* no parallel simple ops */
+ unsigned int use_global_lock;/* >0: global lock required */
};
#ifdef CONFIG_SYSVIPC
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index ff078e7043b6..fdaac9d4d46d 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -124,4 +124,15 @@ static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
}
#endif
+#ifdef CONFIG_SHMEM
+extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep);
+#else
+#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+ src_addr, pagep) ({ BUG(); 0; })
+#endif
+
#endif
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4c5363566815..3c37a8c51921 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -545,22 +545,49 @@ struct memcg_cache_array {
* array to be accessed without taking any locks, on relocation we free the old
* version only after a grace period.
*
- * Child caches will hold extra metadata needed for its operation. Fields are:
+ * Root and child caches hold different metadata.
*
- * @memcg: pointer to the memcg this cache belongs to
- * @root_cache: pointer to the global, root cache, this cache was derived from
+ * @root_cache: Common to root and child caches. NULL for root, pointer to
+ * the root cache for children.
*
- * Both root and child caches of the same kind are linked into a list chained
- * through @list.
+ * The following fields are specific to root caches.
+ *
+ * @memcg_caches: kmemcg ID indexed table of child caches. This table is
+ * used to index child cachces during allocation and cleared
+ * early during shutdown.
+ *
+ * @root_caches_node: List node for slab_root_caches list.
+ *
+ * @children: List of all child caches. While the child caches are also
+ * reachable through @memcg_caches, a child cache remains on
+ * this list until it is actually destroyed.
+ *
+ * The following fields are specific to child caches.
+ *
+ * @memcg: Pointer to the memcg this cache belongs to.
+ *
+ * @children_node: List node for @root_cache->children list.
+ *
+ * @kmem_caches_node: List node for @memcg->kmem_caches list.
*/
struct memcg_cache_params {
- bool is_root_cache;
- struct list_head list;
+ struct kmem_cache *root_cache;
union {
- struct memcg_cache_array __rcu *memcg_caches;
+ struct {
+ struct memcg_cache_array __rcu *memcg_caches;
+ struct list_head __root_caches_node;
+ struct list_head children;
+ };
struct {
struct mem_cgroup *memcg;
- struct kmem_cache *root_cache;
+ struct list_head children_node;
+ struct list_head kmem_caches_node;
+
+ void (*deact_fn)(struct kmem_cache *);
+ union {
+ struct rcu_head deact_rcu_head;
+ struct work_struct deact_work;
+ };
};
};
};
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 75f56c2ef2d4..07ef550c6627 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -113,9 +113,9 @@ struct kmem_cache {
#ifdef CONFIG_SYSFS
#define SLAB_SUPPORTS_SYSFS
-void sysfs_slab_remove(struct kmem_cache *);
+void sysfs_slab_release(struct kmem_cache *);
#else
-static inline void sysfs_slab_remove(struct kmem_cache *s)
+static inline void sysfs_slab_release(struct kmem_cache *s)
{
}
#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7f47b7098b1b..45e91dd6716d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -27,6 +27,7 @@ struct bio;
#define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
SWAP_FLAG_DISCARD_PAGES)
+#define SWAP_BATCH 64
static inline int current_is_kswapd(void)
{
@@ -176,6 +177,12 @@ enum {
* protected by swap_info_struct.lock.
*/
struct swap_cluster_info {
+ spinlock_t lock; /*
+ * Protect swap_cluster_info fields
+ * and swap_info_struct->swap_map
+ * elements correspond to the swap
+ * cluster
+ */
unsigned int data:24;
unsigned int flags:8;
};
@@ -337,8 +344,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
sector_t *);
/* linux/mm/swap_state.c */
-extern struct address_space swapper_spaces[];
-#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
+/* One swap address space for each 64M swap space */
+#define SWAP_ADDRESS_SPACE_SHIFT 14
+#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
+extern struct address_space *swapper_spaces[];
+#define swap_address_space(entry) \
+ (&swapper_spaces[swp_type(entry)][swp_offset(entry) \
+ >> SWAP_ADDRESS_SPACE_SHIFT])
extern unsigned long total_swapcache_pages(void);
extern void show_swap_cache_info(void);
extern int add_to_swap(struct page *, struct list_head *list);
@@ -360,6 +372,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
+extern bool has_usable_swap(void);
/* Swap 50% full? Release swapcache more aggressively.. */
static inline bool vm_swap_full(void)
@@ -375,23 +388,31 @@ static inline long get_nr_swap_pages(void)
extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(void);
extern swp_entry_t get_swap_page_of_type(int);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
extern void swapcache_free(swp_entry_t);
+extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern int free_swap_and_cache(swp_entry_t);
extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
extern sector_t map_swap_page(struct page *, struct block_device **);
extern sector_t swapdev_block(int, pgoff_t);
extern int page_swapcount(struct page *);
+extern int __swp_swapcount(swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *);
extern bool reuse_swap_page(struct page *, int *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
+extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
+extern void exit_swap_address_space(unsigned int type);
+
+extern int get_swap_slots(int n, swp_entry_t *slots);
+extern void swapcache_free_batch(swp_entry_t *entries, int n);
#else /* CONFIG_SWAP */
@@ -479,6 +500,11 @@ static inline int page_swapcount(struct page *page)
return 0;
}
+static inline int __swp_swapcount(swp_entry_t entry)
+{
+ return 0;
+}
+
static inline int swp_swapcount(swp_entry_t entry)
{
return 0;
diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h
new file mode 100644
index 000000000000..6ef92d17633d
--- /dev/null
+++ b/include/linux/swap_slots.h
@@ -0,0 +1,30 @@
+#ifndef _LINUX_SWAP_SLOTS_H
+#define _LINUX_SWAP_SLOTS_H
+
+#include <linux/swap.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+
+#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH
+#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE)
+#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE)
+
+struct swap_slots_cache {
+ bool lock_initialized;
+ struct mutex alloc_lock; /* protects slots, nr, cur */
+ swp_entry_t *slots;
+ int nr;
+ int cur;
+ spinlock_t free_lock; /* protects slots_ret, n_ret */
+ swp_entry_t *slots_ret;
+ int n_ret;
+};
+
+void disable_swap_slots_cache_lock(void);
+void reenable_swap_slots_cache_unlock(void);
+int enable_swap_slots_cache(void);
+int free_swap_slot(swp_entry_t entry);
+
+extern bool swap_slot_cache_enabled;
+
+#endif /* _LINUX_SWAP_SLOTS_H */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 0f165507495c..0af63c4381b9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -23,6 +23,10 @@ const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
const struct trace_print_flags *symbol_array);
#if BITS_PER_LONG == 32
+const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
+ unsigned long long flags,
+ const struct trace_print_flags_u64 *flag_array);
+
const char *trace_print_symbols_seq_u64(struct trace_seq *p,
unsigned long long val,
const struct trace_print_flags_u64
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 11b92b047a1e..0468548acebf 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -52,6 +52,28 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP);
}
+extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
+extern void dup_userfaultfd_complete(struct list_head *);
+
+extern void mremap_userfaultfd_prep(struct vm_area_struct *,
+ struct vm_userfaultfd_ctx *);
+extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
+ unsigned long from, unsigned long to,
+ unsigned long len);
+
+extern void userfaultfd_remove(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start,
+ unsigned long end);
+
+extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *uf);
+extern void userfaultfd_unmap_complete(struct mm_struct *mm,
+ struct list_head *uf);
+
+extern void userfaultfd_exit(struct mm_struct *mm);
+
#else /* CONFIG_USERFAULTFD */
/* mm helpers */
@@ -76,6 +98,51 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
return false;
}
+static inline int dup_userfaultfd(struct vm_area_struct *vma,
+ struct list_head *l)
+{
+ return 0;
+}
+
+static inline void dup_userfaultfd_complete(struct list_head *l)
+{
+}
+
+static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
+ struct vm_userfaultfd_ctx *ctx)
+{
+}
+
+static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
+ unsigned long from,
+ unsigned long to,
+ unsigned long len)
+{
+}
+
+static inline void userfaultfd_remove(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start,
+ unsigned long end)
+{
+}
+
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *uf)
+{
+ return 0;
+}
+
+static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
+ struct list_head *uf)
+{
+}
+
+static inline void userfaultfd_exit(struct mm_struct *mm)
+{
+}
+
#endif /* CONFIG_USERFAULTFD */
#endif /* _LINUX_USERFAULTFD_K_H */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 4d6ec58a8d45..6aa1b6cb5828 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -56,6 +56,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
COMPACTISOLATED,
COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
KCOMPACTD_WAKE,
+ KCOMPACTD_MIGRATE_SCANNED, KCOMPACTD_FREE_SCANNED,
#endif
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 5527d910ba3d..a3c0cbd7c888 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -46,7 +46,7 @@ enum writeback_sync_modes {
*/
enum wb_reason {
WB_REASON_BACKGROUND,
- WB_REASON_TRY_TO_FREE_PAGES,
+ WB_REASON_VMSCAN,
WB_REASON_SYNC,
WB_REASON_PERIODIC,
WB_REASON_LAPTOP_TIMER,
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index cbdb90b6b308..0a18ab6483ff 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -9,62 +9,6 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
-#define COMPACTION_STATUS \
- EM( COMPACT_SKIPPED, "skipped") \
- EM( COMPACT_DEFERRED, "deferred") \
- EM( COMPACT_CONTINUE, "continue") \
- EM( COMPACT_SUCCESS, "success") \
- EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \
- EM( COMPACT_COMPLETE, "complete") \
- EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
- EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
- EMe(COMPACT_CONTENDED, "contended")
-
-#ifdef CONFIG_ZONE_DMA
-#define IFDEF_ZONE_DMA(X) X
-#else
-#define IFDEF_ZONE_DMA(X)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define IFDEF_ZONE_DMA32(X) X
-#else
-#define IFDEF_ZONE_DMA32(X)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define IFDEF_ZONE_HIGHMEM(X) X
-#else
-#define IFDEF_ZONE_HIGHMEM(X)
-#endif
-
-#define ZONE_TYPE \
- IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
- IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
- EM (ZONE_NORMAL, "Normal") \
- IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
- EMe(ZONE_MOVABLE,"Movable")
-
-/*
- * First define the enums in the above macros to be exported to userspace
- * via TRACE_DEFINE_ENUM().
- */
-#undef EM
-#undef EMe
-#define EM(a, b) TRACE_DEFINE_ENUM(a);
-#define EMe(a, b) TRACE_DEFINE_ENUM(a);
-
-COMPACTION_STATUS
-ZONE_TYPE
-
-/*
- * Now redefine the EM() and EMe() macros to map the enums to the strings
- * that will be printed in the output.
- */
-#undef EM
-#undef EMe
-#define EM(a, b) {a, b},
-#define EMe(a, b) {a, b}
DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
@@ -187,6 +131,7 @@ TRACE_EVENT(mm_compaction_begin,
__entry->sync ? "sync" : "async")
);
+#ifdef CONFIG_COMPACTION
TRACE_EVENT(mm_compaction_end,
TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
unsigned long free_pfn, unsigned long zone_end, bool sync,
@@ -220,6 +165,7 @@ TRACE_EVENT(mm_compaction_end,
__entry->sync ? "sync" : "async",
__print_symbolic(__entry->status, COMPACTION_STATUS))
);
+#endif
TRACE_EVENT(mm_compaction_try_to_compact_pages,
@@ -248,6 +194,7 @@ TRACE_EVENT(mm_compaction_try_to_compact_pages,
__entry->prio)
);
+#ifdef CONFIG_COMPACTION
DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
TP_PROTO(struct zone *zone,
@@ -295,7 +242,6 @@ DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable,
TP_ARGS(zone, order, ret)
);
-#ifdef CONFIG_COMPACTION
DECLARE_EVENT_CLASS(mm_compaction_defer_template,
TP_PROTO(struct zone *zone, int order),
diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h
new file mode 100644
index 000000000000..c566ddc87f73
--- /dev/null
+++ b/include/trace/events/fs_dax.h
@@ -0,0 +1,156 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fs_dax
+
+#if !defined(_TRACE_FS_DAX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FS_DAX_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(dax_pmd_fault_class,
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ pgoff_t max_pgoff, int result),
+ TP_ARGS(inode, vmf, max_pgoff, result),
+ TP_STRUCT__entry(
+ __field(unsigned long, ino)
+ __field(unsigned long, vm_start)
+ __field(unsigned long, vm_end)
+ __field(unsigned long, vm_flags)
+ __field(unsigned long, address)
+ __field(pgoff_t, pgoff)
+ __field(pgoff_t, max_pgoff)
+ __field(dev_t, dev)
+ __field(unsigned int, flags)
+ __field(int, result)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->vm_start = vmf->vma->vm_start;
+ __entry->vm_end = vmf->vma->vm_end;
+ __entry->vm_flags = vmf->vma->vm_flags;
+ __entry->address = vmf->address;
+ __entry->flags = vmf->flags;
+ __entry->pgoff = vmf->pgoff;
+ __entry->max_pgoff = max_pgoff;
+ __entry->result = result;
+ ),
+ TP_printk("dev %d:%d ino %#lx %s %s address %#lx vm_start "
+ "%#lx vm_end %#lx pgoff %#lx max_pgoff %#lx %s",
+ MAJOR(__entry->dev),
+ MINOR(__entry->dev),
+ __entry->ino,
+ __entry->vm_flags & VM_SHARED ? "shared" : "private",
+ __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE),
+ __entry->address,
+ __entry->vm_start,
+ __entry->vm_end,
+ __entry->pgoff,
+ __entry->max_pgoff,
+ __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE)
+ )
+)
+
+#define DEFINE_PMD_FAULT_EVENT(name) \
+DEFINE_EVENT(dax_pmd_fault_class, name, \
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
+ pgoff_t max_pgoff, int result), \
+ TP_ARGS(inode, vmf, max_pgoff, result))
+
+DEFINE_PMD_FAULT_EVENT(dax_pmd_fault);
+DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done);
+
+DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ struct page *zero_page,
+ void *radix_entry),
+ TP_ARGS(inode, vmf, zero_page, radix_entry),
+ TP_STRUCT__entry(
+ __field(unsigned long, ino)
+ __field(unsigned long, vm_flags)
+ __field(unsigned long, address)
+ __field(struct page *, zero_page)
+ __field(void *, radix_entry)
+ __field(dev_t, dev)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->vm_flags = vmf->vma->vm_flags;
+ __entry->address = vmf->address;
+ __entry->zero_page = zero_page;
+ __entry->radix_entry = radix_entry;
+ ),
+ TP_printk("dev %d:%d ino %#lx %s address %#lx zero_page %p "
+ "radix_entry %#lx",
+ MAJOR(__entry->dev),
+ MINOR(__entry->dev),
+ __entry->ino,
+ __entry->vm_flags & VM_SHARED ? "shared" : "private",
+ __entry->address,
+ __entry->zero_page,
+ (unsigned long)__entry->radix_entry
+ )
+)
+
+#define DEFINE_PMD_LOAD_HOLE_EVENT(name) \
+DEFINE_EVENT(dax_pmd_load_hole_class, name, \
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
+ struct page *zero_page, void *radix_entry), \
+ TP_ARGS(inode, vmf, zero_page, radix_entry))
+
+DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole);
+DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole_fallback);
+
+DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class,
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ long length, pfn_t pfn, void *radix_entry),
+ TP_ARGS(inode, vmf, length, pfn, radix_entry),
+ TP_STRUCT__entry(
+ __field(unsigned long, ino)
+ __field(unsigned long, vm_flags)
+ __field(unsigned long, address)
+ __field(long, length)
+ __field(u64, pfn_val)
+ __field(void *, radix_entry)
+ __field(dev_t, dev)
+ __field(int, write)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->vm_flags = vmf->vma->vm_flags;
+ __entry->address = vmf->address;
+ __entry->write = vmf->flags & FAULT_FLAG_WRITE;
+ __entry->length = length;
+ __entry->pfn_val = pfn.val;
+ __entry->radix_entry = radix_entry;
+ ),
+ TP_printk("dev %d:%d ino %#lx %s %s address %#lx length %#lx "
+ "pfn %#llx %s radix_entry %#lx",
+ MAJOR(__entry->dev),
+ MINOR(__entry->dev),
+ __entry->ino,
+ __entry->vm_flags & VM_SHARED ? "shared" : "private",
+ __entry->write ? "write" : "read",
+ __entry->address,
+ __entry->length,
+ __entry->pfn_val & ~PFN_FLAGS_MASK,
+ __print_flags_u64(__entry->pfn_val & PFN_FLAGS_MASK, "|",
+ PFN_FLAGS_TRACE),
+ (unsigned long)__entry->radix_entry
+ )
+)
+
+#define DEFINE_PMD_INSERT_MAPPING_EVENT(name) \
+DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
+ long length, pfn_t pfn, void *radix_entry), \
+ TP_ARGS(inode, vmf, length, pfn, radix_entry))
+
+DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping);
+DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback);
+
+#endif /* _TRACE_FS_DAX_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 15bf875d0e4a..12cd88c86b66 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -1,3 +1,6 @@
+#include <linux/node.h>
+#include <linux/mmzone.h>
+#include <linux/compaction.h>
/*
* The order of these masks is important. Matching masks will be seen
* first and the left over flags will end up showing by themselves.
@@ -171,3 +174,98 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
(flags) ? __print_flags(flags, "|", \
__def_vmaflag_names \
) : "none"
+
+#ifdef CONFIG_COMPACTION
+#define COMPACTION_STATUS \
+ EM( COMPACT_SKIPPED, "skipped") \
+ EM( COMPACT_DEFERRED, "deferred") \
+ EM( COMPACT_CONTINUE, "continue") \
+ EM( COMPACT_SUCCESS, "success") \
+ EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \
+ EM( COMPACT_COMPLETE, "complete") \
+ EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
+ EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
+ EMe(COMPACT_CONTENDED, "contended")
+
+/* High-level compaction status feedback */
+#define COMPACTION_FAILED 1
+#define COMPACTION_WITHDRAWN 2
+#define COMPACTION_PROGRESS 3
+
+#define compact_result_to_feedback(result) \
+({ \
+ enum compact_result __result = result; \
+ (compaction_failed(__result)) ? COMPACTION_FAILED : \
+ (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
+})
+
+#define COMPACTION_FEEDBACK \
+ EM(COMPACTION_FAILED, "failed") \
+ EM(COMPACTION_WITHDRAWN, "withdrawn") \
+ EMe(COMPACTION_PROGRESS, "progress")
+
+#define COMPACTION_PRIORITY \
+ EM(COMPACT_PRIO_SYNC_FULL, "COMPACT_PRIO_SYNC_FULL") \
+ EM(COMPACT_PRIO_SYNC_LIGHT, "COMPACT_PRIO_SYNC_LIGHT") \
+ EMe(COMPACT_PRIO_ASYNC, "COMPACT_PRIO_ASYNC")
+#else
+#define COMPACTION_STATUS
+#define COMPACTION_PRIORITY
+#define COMPACTION_FEEDBACK
+#endif
+
+#ifdef CONFIG_ZONE_DMA
+#define IFDEF_ZONE_DMA(X) X
+#else
+#define IFDEF_ZONE_DMA(X)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define IFDEF_ZONE_DMA32(X) X
+#else
+#define IFDEF_ZONE_DMA32(X)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define IFDEF_ZONE_HIGHMEM(X) X
+#else
+#define IFDEF_ZONE_HIGHMEM(X)
+#endif
+
+#define ZONE_TYPE \
+ IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
+ IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
+ EM (ZONE_NORMAL, "Normal") \
+ IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
+ EMe(ZONE_MOVABLE,"Movable")
+
+#define LRU_NAMES \
+ EM (LRU_INACTIVE_ANON, "inactive_anon") \
+ EM (LRU_ACTIVE_ANON, "active_anon") \
+ EM (LRU_INACTIVE_FILE, "inactive_file") \
+ EM (LRU_ACTIVE_FILE, "active_file") \
+ EMe(LRU_UNEVICTABLE, "unevictable")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+COMPACTION_STATUS
+COMPACTION_PRIORITY
+COMPACTION_FEEDBACK
+ZONE_TYPE
+LRU_NAMES
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 1e974983757e..38baeb27221a 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -4,6 +4,7 @@
#if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_OOM_H
#include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
TRACE_EVENT(oom_score_adj_update,
@@ -27,6 +28,86 @@ TRACE_EVENT(oom_score_adj_update,
__entry->pid, __entry->comm, __entry->oom_score_adj)
);
+TRACE_EVENT(reclaim_retry_zone,
+
+ TP_PROTO(struct zoneref *zoneref,
+ int order,
+ unsigned long reclaimable,
+ unsigned long available,
+ unsigned long min_wmark,
+ int no_progress_loops,
+ bool wmark_check),
+
+ TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check),
+
+ TP_STRUCT__entry(
+ __field( int, node)
+ __field( int, zone_idx)
+ __field( int, order)
+ __field( unsigned long, reclaimable)
+ __field( unsigned long, available)
+ __field( unsigned long, min_wmark)
+ __field( int, no_progress_loops)
+ __field( bool, wmark_check)
+ ),
+
+ TP_fast_assign(
+ __entry->node = zone_to_nid(zoneref->zone);
+ __entry->zone_idx = zoneref->zone_idx;
+ __entry->order = order;
+ __entry->reclaimable = reclaimable;
+ __entry->available = available;
+ __entry->min_wmark = min_wmark;
+ __entry->no_progress_loops = no_progress_loops;
+ __entry->wmark_check = wmark_check;
+ ),
+
+ TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d",
+ __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE),
+ __entry->order,
+ __entry->reclaimable, __entry->available, __entry->min_wmark,
+ __entry->no_progress_loops,
+ __entry->wmark_check)
+);
+
+#ifdef CONFIG_COMPACTION
+TRACE_EVENT(compact_retry,
+
+ TP_PROTO(int order,
+ enum compact_priority priority,
+ enum compact_result result,
+ int retries,
+ int max_retries,
+ bool ret),
+
+ TP_ARGS(order, priority, result, retries, max_retries, ret),
+
+ TP_STRUCT__entry(
+ __field( int, order)
+ __field( int, priority)
+ __field( int, result)
+ __field( int, retries)
+ __field( int, max_retries)
+ __field( bool, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->order = order;
+ __entry->priority = priority;
+ __entry->result = compact_result_to_feedback(result);
+ __entry->retries = retries;
+ __entry->max_retries = max_retries;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d",
+ __entry->order,
+ __print_symbolic(__entry->priority, COMPACTION_PRIORITY),
+ __print_symbolic(__entry->result, COMPACTION_FEEDBACK),
+ __entry->retries, __entry->max_retries,
+ __entry->ret)
+);
+#endif /* CONFIG_COMPACTION */
#endif
/* This part must be outside protection */
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index c88fd0934e7e..27e8a5c77579 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -15,6 +15,7 @@
#define RECLAIM_WB_MIXED 0x0010u
#define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */
#define RECLAIM_WB_ASYNC 0x0008u
+#define RECLAIM_WB_LRU (RECLAIM_WB_ANON|RECLAIM_WB_FILE)
#define show_reclaim_flags(flags) \
(flags) ? __print_flags(flags, "|", \
@@ -269,26 +270,27 @@ TRACE_EVENT(mm_shrink_slab_end,
__entry->retval)
);
-DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
-
+TRACE_EVENT(mm_vmscan_lru_isolate,
TP_PROTO(int classzone_idx,
int order,
unsigned long nr_requested,
unsigned long nr_scanned,
+ unsigned long nr_skipped,
unsigned long nr_taken,
isolate_mode_t isolate_mode,
- int file),
+ int lru),
- TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file),
+ TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
TP_STRUCT__entry(
__field(int, classzone_idx)
__field(int, order)
__field(unsigned long, nr_requested)
__field(unsigned long, nr_scanned)
+ __field(unsigned long, nr_skipped)
__field(unsigned long, nr_taken)
__field(isolate_mode_t, isolate_mode)
- __field(int, file)
+ __field(int, lru)
),
TP_fast_assign(
@@ -296,47 +298,21 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
__entry->order = order;
__entry->nr_requested = nr_requested;
__entry->nr_scanned = nr_scanned;
+ __entry->nr_skipped = nr_skipped;
__entry->nr_taken = nr_taken;
__entry->isolate_mode = isolate_mode;
- __entry->file = file;
+ __entry->lru = lru;
),
- TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d",
+ TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
__entry->isolate_mode,
__entry->classzone_idx,
__entry->order,
__entry->nr_requested,
__entry->nr_scanned,
+ __entry->nr_skipped,
__entry->nr_taken,
- __entry->file)
-);
-
-DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
-
- TP_PROTO(int classzone_idx,
- int order,
- unsigned long nr_requested,
- unsigned long nr_scanned,
- unsigned long nr_taken,
- isolate_mode_t isolate_mode,
- int file),
-
- TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
-
-);
-
-DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
-
- TP_PROTO(int classzone_idx,
- int order,
- unsigned long nr_requested,
- unsigned long nr_scanned,
- unsigned long nr_taken,
- isolate_mode_t isolate_mode,
- int file),
-
- TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
-
+ __print_symbolic(__entry->lru, LRU_NAMES))
);
TRACE_EVENT(mm_vmscan_writepage,
@@ -365,14 +341,27 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
TP_PROTO(int nid,
unsigned long nr_scanned, unsigned long nr_reclaimed,
+ unsigned long nr_dirty, unsigned long nr_writeback,
+ unsigned long nr_congested, unsigned long nr_immediate,
+ unsigned long nr_activate, unsigned long nr_ref_keep,
+ unsigned long nr_unmap_fail,
int priority, int file),
- TP_ARGS(nid, nr_scanned, nr_reclaimed, priority, file),
+ TP_ARGS(nid, nr_scanned, nr_reclaimed, nr_dirty, nr_writeback,
+ nr_congested, nr_immediate, nr_activate, nr_ref_keep,
+ nr_unmap_fail, priority, file),
TP_STRUCT__entry(
__field(int, nid)
__field(unsigned long, nr_scanned)
__field(unsigned long, nr_reclaimed)
+ __field(unsigned long, nr_dirty)
+ __field(unsigned long, nr_writeback)
+ __field(unsigned long, nr_congested)
+ __field(unsigned long, nr_immediate)
+ __field(unsigned long, nr_activate)
+ __field(unsigned long, nr_ref_keep)
+ __field(unsigned long, nr_unmap_fail)
__field(int, priority)
__field(int, reclaim_flags)
),
@@ -381,17 +370,102 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
__entry->nid = nid;
__entry->nr_scanned = nr_scanned;
__entry->nr_reclaimed = nr_reclaimed;
+ __entry->nr_dirty = nr_dirty;
+ __entry->nr_writeback = nr_writeback;
+ __entry->nr_congested = nr_congested;
+ __entry->nr_immediate = nr_immediate;
+ __entry->nr_activate = nr_activate;
+ __entry->nr_ref_keep = nr_ref_keep;
+ __entry->nr_unmap_fail = nr_unmap_fail;
__entry->priority = priority;
__entry->reclaim_flags = trace_shrink_flags(file);
),
- TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
+ TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate=%ld nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
__entry->nid,
__entry->nr_scanned, __entry->nr_reclaimed,
+ __entry->nr_dirty, __entry->nr_writeback,
+ __entry->nr_congested, __entry->nr_immediate,
+ __entry->nr_activate, __entry->nr_ref_keep,
+ __entry->nr_unmap_fail, __entry->priority,
+ show_reclaim_flags(__entry->reclaim_flags))
+);
+
+TRACE_EVENT(mm_vmscan_lru_shrink_active,
+
+ TP_PROTO(int nid, unsigned long nr_taken,
+ unsigned long nr_active, unsigned long nr_deactivated,
+ unsigned long nr_referenced, int priority, int file),
+
+ TP_ARGS(nid, nr_taken, nr_active, nr_deactivated, nr_referenced, priority, file),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ __field(unsigned long, nr_taken)
+ __field(unsigned long, nr_active)
+ __field(unsigned long, nr_deactivated)
+ __field(unsigned long, nr_referenced)
+ __field(int, priority)
+ __field(int, reclaim_flags)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = nid;
+ __entry->nr_taken = nr_taken;
+ __entry->nr_active = nr_active;
+ __entry->nr_deactivated = nr_deactivated;
+ __entry->nr_referenced = nr_referenced;
+ __entry->priority = priority;
+ __entry->reclaim_flags = trace_shrink_flags(file);
+ ),
+
+ TP_printk("nid=%d nr_taken=%ld nr_active=%ld nr_deactivated=%ld nr_referenced=%ld priority=%d flags=%s",
+ __entry->nid,
+ __entry->nr_taken,
+ __entry->nr_active, __entry->nr_deactivated, __entry->nr_referenced,
__entry->priority,
show_reclaim_flags(__entry->reclaim_flags))
);
+TRACE_EVENT(mm_vmscan_inactive_list_is_low,
+
+ TP_PROTO(int nid, int reclaim_idx,
+ unsigned long total_inactive, unsigned long inactive,
+ unsigned long total_active, unsigned long active,
+ unsigned long ratio, int file),
+
+ TP_ARGS(nid, reclaim_idx, total_inactive, inactive, total_active, active, ratio, file),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ __field(int, reclaim_idx)
+ __field(unsigned long, total_inactive)
+ __field(unsigned long, inactive)
+ __field(unsigned long, total_active)
+ __field(unsigned long, active)
+ __field(unsigned long, ratio)
+ __field(int, reclaim_flags)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = nid;
+ __entry->reclaim_idx = reclaim_idx;
+ __entry->total_inactive = total_inactive;
+ __entry->inactive = inactive;
+ __entry->total_active = total_active;
+ __entry->active = active;
+ __entry->ratio = ratio;
+ __entry->reclaim_flags = trace_shrink_flags(file) & RECLAIM_WB_LRU;
+ ),
+
+ TP_printk("nid=%d reclaim_idx=%d total_inactive=%ld inactive=%ld total_active=%ld active=%ld ratio=%ld flags=%s",
+ __entry->nid,
+ __entry->reclaim_idx,
+ __entry->total_inactive, __entry->inactive,
+ __entry->total_active, __entry->active,
+ __entry->ratio,
+ show_reclaim_flags(__entry->reclaim_flags))
+);
#endif /* _TRACE_VMSCAN_H */
/* This part must be outside protection */
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 2ccd9ccbf9ef..7bd8783a590f 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -31,7 +31,7 @@
#define WB_WORK_REASON \
EM( WB_REASON_BACKGROUND, "background") \
- EM( WB_REASON_TRY_TO_FREE_PAGES, "try_to_free_pages") \
+ EM( WB_REASON_VMSCAN, "vmscan") \
EM( WB_REASON_SYNC, "sync") \
EM( WB_REASON_PERIODIC, "periodic") \
EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 5c06f4af8323..00f643164ca2 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -283,8 +283,16 @@ TRACE_MAKE_SYSTEM_STR();
trace_print_symbols_seq(p, value, symbols); \
})
+#undef __print_flags_u64
#undef __print_symbolic_u64
#if BITS_PER_LONG == 32
+#define __print_flags_u64(flag, delim, flag_array...) \
+ ({ \
+ static const struct trace_print_flags_u64 __flags[] = \
+ { flag_array, { -1, NULL } }; \
+ trace_print_flags_seq_u64(p, delim, flag, __flags); \
+ })
+
#define __print_symbolic_u64(value, symbol_array...) \
({ \
static const struct trace_print_flags_u64 symbols[] = \
@@ -292,6 +300,9 @@ TRACE_MAKE_SYSTEM_STR();
trace_print_symbols_seq_u64(p, value, symbols); \
})
#else
+#define __print_flags_u64(flag, delim, flag_array...) \
+ __print_flags(flag, delim, flag_array)
+
#define __print_symbolic_u64(value, symbol_array...) \
__print_symbolic(value, symbol_array)
#endif
diff --git a/include/uapi/asm-generic/ioctl.h b/include/uapi/asm-generic/ioctl.h
index 7e7c11b52143..749b32fe5623 100644
--- a/include/uapi/asm-generic/ioctl.h
+++ b/include/uapi/asm-generic/ioctl.h
@@ -48,6 +48,9 @@
/*
* Direction bits, which any architecture can choose to override
* before including this file.
+ *
+ * NOTE: _IOC_WRITE means userland is writing and kernel is
+ * reading. _IOC_READ means userland is reading and kernel is writing.
*/
#ifndef _IOC_NONE
@@ -72,7 +75,12 @@
#define _IOC_TYPECHECK(t) (sizeof(t))
#endif
-/* used to create numbers */
+/*
+ * Used to create numbers.
+ *
+ * NOTE: _IOW means userland is writing and kernel is reading. _IOR
+ * means userland is reading and kernel is writing.
+ */
#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size)))
#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h
index 021ed331dd71..744b3d060968 100644
--- a/include/uapi/linux/auto_dev-ioctl.h
+++ b/include/uapi/linux/auto_dev-ioctl.h
@@ -113,17 +113,13 @@ struct autofs_dev_ioctl {
static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
{
- memset(in, 0, sizeof(struct autofs_dev_ioctl));
+ memset(in, 0, AUTOFS_DEV_IOCTL_SIZE);
in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
- in->size = sizeof(struct autofs_dev_ioctl);
+ in->size = AUTOFS_DEV_IOCTL_SIZE;
in->ioctlfd = -1;
}
-/*
- * If you change this make sure you make the corresponding change
- * to autofs-dev-ioctl.c:lookup_ioctl()
- */
enum {
/* Get various version info */
AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
@@ -160,8 +156,6 @@ enum {
AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
};
-#define AUTOFS_IOCTL 0x93
-
#define AUTOFS_DEV_IOCTL_VERSION \
_IOWR(AUTOFS_IOCTL, \
AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 1bfc3ed8b284..aa63451ef20a 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -61,12 +61,23 @@ struct autofs_packet_expire {
char name[NAME_MAX+1];
};
-#define AUTOFS_IOC_READY _IO(0x93, 0x60)
-#define AUTOFS_IOC_FAIL _IO(0x93, 0x61)
-#define AUTOFS_IOC_CATATONIC _IO(0x93, 0x62)
-#define AUTOFS_IOC_PROTOVER _IOR(0x93, 0x63, int)
-#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93, 0x64, compat_ulong_t)
-#define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93, 0x64, unsigned long)
-#define AUTOFS_IOC_EXPIRE _IOR(0x93, 0x65, struct autofs_packet_expire)
+#define AUTOFS_IOCTL 0x93
+
+enum {
+ AUTOFS_IOC_READY_CMD = 0x60,
+ AUTOFS_IOC_FAIL_CMD,
+ AUTOFS_IOC_CATATONIC_CMD,
+ AUTOFS_IOC_PROTOVER_CMD,
+ AUTOFS_IOC_SETTIMEOUT_CMD,
+ AUTOFS_IOC_EXPIRE_CMD,
+};
+
+#define AUTOFS_IOC_READY _IO(AUTOFS_IOCTL, AUTOFS_IOC_READY_CMD)
+#define AUTOFS_IOC_FAIL _IO(AUTOFS_IOCTL, AUTOFS_IOC_FAIL_CMD)
+#define AUTOFS_IOC_CATATONIC _IO(AUTOFS_IOCTL, AUTOFS_IOC_CATATONIC_CMD)
+#define AUTOFS_IOC_PROTOVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOVER_CMD, int)
+#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, compat_ulong_t)
+#define AUTOFS_IOC_SETTIMEOUT _IOWR(AUTOFS_IOCTL, AUTOFS_IOC_SETTIMEOUT_CMD, unsigned long)
+#define AUTOFS_IOC_EXPIRE _IOR(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_CMD, struct autofs_packet_expire)
#endif /* _UAPI_LINUX_AUTO_FS_H */
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 8f8f1bdcca8c..7c6da423d54e 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h
@@ -148,10 +148,16 @@ union autofs_v5_packet_union {
autofs_packet_expire_direct_t expire_direct;
};
-#define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93, 0x66, int)
-#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI
-#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI
-#define AUTOFS_IOC_PROTOSUBVER _IOR(0x93, 0x67, int)
-#define AUTOFS_IOC_ASKUMOUNT _IOR(0x93, 0x70, int)
+enum {
+ AUTOFS_IOC_EXPIRE_MULTI_CMD = 0x66, /* AUTOFS_IOC_EXPIRE_CMD + 1 */
+ AUTOFS_IOC_PROTOSUBVER_CMD,
+ AUTOFS_IOC_ASKUMOUNT_CMD = 0x70, /* AUTOFS_DEV_IOCTL_VERSION_CMD - 1 */
+};
+
+#define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int)
+#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI
+#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI
+#define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int)
+#define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int)
#endif /* _LINUX_AUTO_FS4_H */
diff --git a/include/uapi/linux/mqueue.h b/include/uapi/linux/mqueue.h
index d0a2b8e89813..bbd5116ea739 100644
--- a/include/uapi/linux/mqueue.h
+++ b/include/uapi/linux/mqueue.h
@@ -18,6 +18,8 @@
#ifndef _LINUX_MQUEUE_H
#define _LINUX_MQUEUE_H
+#include <linux/types.h>
+
#define MQ_PRIO_MAX 32768
/* per-uid limit of kernel memory used by mqueue, in bytes */
#define MQ_BYTES_MAX 819200
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 9057d7af3ae1..c055947c5c98 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -11,13 +11,20 @@
#include <linux/types.h>
-#define UFFD_API ((__u64)0xAA)
/*
- * After implementing the respective features it will become:
- * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
- * UFFD_FEATURE_EVENT_FORK)
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
*/
-#define UFFD_API_FEATURES (0)
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT | \
+ UFFD_FEATURE_EVENT_FORK | \
+ UFFD_FEATURE_EVENT_REMAP | \
+ UFFD_FEATURE_EVENT_REMOVE | \
+ UFFD_FEATURE_EVENT_UNMAP | \
+ UFFD_FEATURE_MISSING_HUGETLBFS | \
+ UFFD_FEATURE_MISSING_SHMEM)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -26,6 +33,9 @@
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_ZEROPAGE)
+#define UFFD_API_RANGE_IOCTLS_BASIC \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY)
/*
* Valid ioctl command number range with this API is from 0x00 to
@@ -72,6 +82,21 @@ struct uffd_msg {
} pagefault;
struct {
+ __u32 ufd;
+ } fork;
+
+ struct {
+ __u64 from;
+ __u64 to;
+ __u64 len;
+ } remap;
+
+ struct {
+ __u64 start;
+ __u64 end;
+ } remove;
+
+ struct {
/* unused reserved fields */
__u64 reserved1;
__u64 reserved2;
@@ -84,9 +109,11 @@ struct uffd_msg {
* Start at 0x12 and not at 0 to be more strict against bugs.
*/
#define UFFD_EVENT_PAGEFAULT 0x12
-#if 0 /* not available yet */
#define UFFD_EVENT_FORK 0x13
-#endif
+#define UFFD_EVENT_REMAP 0x14
+#define UFFD_EVENT_REMOVE 0x15
+#define UFFD_EVENT_UNMAP 0x16
+#define UFFD_EVENT_EXIT 0x17
/* flags for UFFD_EVENT_PAGEFAULT */
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
@@ -104,11 +131,39 @@ struct uffdio_api {
* Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
* are to be considered implicitly always enabled in all kernels as
* long as the uffdio_api.api requested matches UFFD_API.
+ *
+ * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+ * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+ * hugetlbfs virtual memory ranges. Adding or not adding
+ * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+ * no real functional effect after UFFDIO_API returns, but
+ * it's only useful for an initial feature set probe at
+ * UFFDIO_API time. There are two ways to use it:
+ *
+ * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+ * uffdio_api.features before calling UFFDIO_API, an error
+ * will be returned by UFFDIO_API on a kernel without
+ * hugetlbfs missing support
+ *
+ * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+ * uffdio_api.features and instead it will be set by the
+ * kernel in the uffdio_api.features if the kernel supports
+ * it, so userland can later check if the feature flag is
+ * present in uffdio_api.features after UFFDIO_API
+ * succeeded.
+ *
+ * UFFD_FEATURE_MISSING_SHMEM works the same as
+ * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+ * (i.e. tmpfs and other shmem based APIs).
*/
-#if 0 /* not available yet */
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
-#endif
+#define UFFD_FEATURE_EVENT_REMAP (1<<2)
+#define UFFD_FEATURE_EVENT_REMOVE (1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
+#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
+#define UFFD_FEATURE_EVENT_EXIT (1<<7)
__u64 features;
__u64 ioctls;