summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2016-03-11 11:20:15 +1100
committerMichael Ellerman <mpe@ellerman.id.au>2016-03-11 11:20:15 +1100
commitd8c0282f4da6d5335fee56141ca92284026f4818 (patch)
tree264153c2762ef1682cc685700139b6948bbeb1f6
parent58bffb5bbb238d56e8818acb463cce990021fa32 (diff)
parent8c50b72a3b4f1f7cdfdfebd233b1cbd121262e65 (diff)
Merge branch 'topic/mprofile-kernel' into next
Merge the ftrace changes to support -mprofile-kernel on ppc64le. This is a prerequisite for live patching, the support for which will be merged via the livepatch tree based on this topic branch.
-rw-r--r--arch/powerpc/Kconfig19
-rw-r--r--arch/powerpc/Makefile15
-rw-r--r--arch/powerpc/include/asm/code-patching.h21
-rw-r--r--arch/powerpc/include/asm/ftrace.h5
-rw-r--r--arch/powerpc/include/asm/module.h12
-rw-r--r--arch/powerpc/include/asm/sections.h12
-rw-r--r--arch/powerpc/kernel/Makefile12
-rw-r--r--arch/powerpc/kernel/entry_64.S166
-rw-r--r--arch/powerpc/kernel/ftrace.c132
-rw-r--r--arch/powerpc/kernel/module.c5
-rw-r--r--arch/powerpc/kernel/module_32.c20
-rw-r--r--arch/powerpc/kernel/module_64.c214
-rw-r--r--arch/powerpc/kernel/paca.c11
-rw-r--r--arch/powerpc/lib/Makefile4
-rw-r--r--arch/powerpc/platforms/powermac/Makefile2
-rwxr-xr-xarch/powerpc/scripts/gcc-check-mprofile-kernel.sh23
16 files changed, 546 insertions, 127 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7efddd18d700..5130fa166a93 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -94,6 +94,7 @@ config PPC
select OF_RESERVED_MEM
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select SYSCTL_EXCEPTION_TRACE
@@ -373,6 +374,24 @@ config PPC_TRANSACTIONAL_MEM
---help---
Support user-mode Transactional Memory on POWERPC.
+config DISABLE_MPROFILE_KERNEL
+ bool "Disable use of mprofile-kernel for kernel tracing"
+ depends on PPC64 && CPU_LITTLE_ENDIAN
+ default y
+ help
+ Selecting this options disables use of the mprofile-kernel ABI for
+ kernel tracing. That will cause options such as live patching
+ (CONFIG_LIVEPATCH) which depend on CONFIG_DYNAMIC_FTRACE_WITH_REGS to
+ be disabled also.
+
+ If you have a toolchain which supports mprofile-kernel, then you can
+ enable this. Otherwise leave it disabled. If you're not sure, say
+ "N".
+
+config MPROFILE_KERNEL
+ depends on PPC64 && CPU_LITTLE_ENDIAN
+ def_bool !DISABLE_MPROFILE_KERNEL
+
config IOMMU_HELPER
def_bool PPC64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 96efd8213c1c..f4e49a4153cb 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -133,6 +133,21 @@ else
CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
endif
+ifdef CONFIG_MPROFILE_KERNEL
+ ifeq ($(shell $(srctree)/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__),OK)
+ CC_FLAGS_FTRACE := -pg -mprofile-kernel
+ KBUILD_CPPFLAGS += -DCC_USING_MPROFILE_KERNEL
+ else
+ # If the user asked for mprofile-kernel but the toolchain doesn't
+ # support it, emit a warning and deliberately break the build later
+ # with mprofile-kernel-not-supported. We would prefer to make this an
+ # error right here, but then the user would never be able to run
+ # oldconfig to change their configuration.
+ $(warning Compiler does not support mprofile-kernel, set CONFIG_DISABLE_MPROFILE_KERNEL)
+ CC_FLAGS_FTRACE := -mprofile-kernel-not-supported
+ endif
+endif
+
CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 840a5509b3f1..994c60a857ce 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func)
#endif
}
+#ifdef CONFIG_PPC64
+/*
+ * Some instruction encodings commonly used in dynamic ftracing
+ * and function live patching.
+ */
+
+/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define R2_STACK_OFFSET 24
+#else
+#define R2_STACK_OFFSET 40
+#endif
+
+#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \
+ ___PPC_RA(__REG_R1) | R2_STACK_OFFSET)
+
+/* usually preceded by a mflr r0 */
+#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \
+ ___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
+#endif /* CONFIG_PPC64 */
+
#endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index ef89b1465573..50ca7585abe2 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -46,6 +46,8 @@
extern void _mcount(void);
#ifdef CONFIG_DYNAMIC_FTRACE
+# define FTRACE_ADDR ((unsigned long)ftrace_caller)
+# define FTRACE_REGS_ADDR FTRACE_ADDR
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/* reloction of mcount call site is the same as the address */
@@ -58,6 +60,9 @@ struct dyn_arch_ftrace {
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
#endif
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 5f1526fddccf..cd4ffd86765f 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -78,10 +78,18 @@ struct mod_arch_specific {
# endif /* MODULE */
#endif
-bool is_module_trampoline(u32 *insns);
-int module_trampoline_target(struct module *mod, u32 *trampoline,
+int module_trampoline_target(struct module *mod, unsigned long trampoline,
unsigned long *target);
+#ifdef CONFIG_DYNAMIC_FTRACE
+int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs);
+#else
+static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
+{
+ return 0;
+}
+#endif
+
struct exception_table_entry;
void sort_ex_table(struct exception_table_entry *start,
struct exception_table_entry *finish);
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index a5e930aca804..abf5866e08c6 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -22,6 +22,18 @@ static inline int in_kernel_text(unsigned long addr)
return 0;
}
+static inline unsigned long kernel_toc_addr(void)
+{
+ /* Defined by the linker, see vmlinux.lds.S */
+ extern unsigned long __toc_start;
+
+ /*
+ * The TOC register (r2) points 32kB into the TOC, so that 64kB of
+ * the TOC can be addressed using a single machine instruction.
+ */
+ return (unsigned long)(&__toc_start) + 0x8000UL;
+}
+
static inline int overlaps_interrupt_vector_text(unsigned long start,
unsigned long end)
{
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 794f22adf99d..2da380fcc34c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -16,14 +16,14 @@ endif
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
# do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
# timers used by tracing
-CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
endif
obj-y := cputable.o ptrace.o syscalls.o \
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 038e0a1425e7..b9b125327f27 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1158,8 +1158,12 @@ _GLOBAL(enter_prom)
#ifdef CONFIG_DYNAMIC_FTRACE
_GLOBAL(mcount)
_GLOBAL(_mcount)
- blr
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+#ifndef CC_USING_MPROFILE_KERNEL
_GLOBAL_TOC(ftrace_caller)
/* Taken from output of objdump from lib64/glibc */
mflr r3
@@ -1181,6 +1185,115 @@ _GLOBAL(ftrace_graph_stub)
ld r0, 128(r1)
mtlr r0
addi r1, r1, 112
+
+#else /* CC_USING_MPROFILE_KERNEL */
+/*
+ *
+ * ftrace_caller() is the function that replaces _mcount() when ftrace is
+ * active.
+ *
+ * We arrive here after a function A calls function B, and we are the trace
+ * function for B. When we enter r1 points to A's stack frame, B has not yet
+ * had a chance to allocate one yet.
+ *
+ * Additionally r2 may point either to the TOC for A, or B, depending on
+ * whether B did a TOC setup sequence before calling us.
+ *
+ * On entry the LR points back to the _mcount() call site, and r0 holds the
+ * saved LR as it was on entry to B, ie. the original return address at the
+ * call site in A.
+ *
+ * Our job is to save the register state into a struct pt_regs (on the stack)
+ * and then arrange for the ftrace function to be called.
+ */
+_GLOBAL(ftrace_caller)
+ /* Save the original return address in A's stack frame */
+ std r0,LRSAVE(r1)
+
+ /* Create our stack frame + pt_regs */
+ stdu r1,-SWITCH_FRAME_SIZE(r1)
+
+ /* Save all gprs to pt_regs */
+ SAVE_8GPRS(0,r1)
+ SAVE_8GPRS(8,r1)
+ SAVE_8GPRS(16,r1)
+ SAVE_8GPRS(24,r1)
+
+ /* Load special regs for save below */
+ mfmsr r8
+ mfctr r9
+ mfxer r10
+ mfcr r11
+
+ /* Get the _mcount() call site out of LR */
+ mflr r7
+ /* Save it as pt_regs->nip & pt_regs->link */
+ std r7, _NIP(r1)
+ std r7, _LINK(r1)
+
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, 24(r1)
+ ld r2,PACATOC(r13) /* get kernel TOC in r2 */
+
+ addis r3,r2,function_trace_op@toc@ha
+ addi r3,r3,function_trace_op@toc@l
+ ld r5,0(r3)
+
+ /* Calculate ip from nip-4 into r3 for call below */
+ subi r3, r7, MCOUNT_INSN_SIZE
+
+ /* Put the original return address in r4 as parent_ip */
+ mr r4, r0
+
+ /* Save special regs */
+ std r8, _MSR(r1)
+ std r9, _CTR(r1)
+ std r10, _XER(r1)
+ std r11, _CCR(r1)
+
+ /* Load &pt_regs in r6 for call below */
+ addi r6, r1 ,STACK_FRAME_OVERHEAD
+
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+
+ /* Load ctr with the possibly modified NIP */
+ ld r3, _NIP(r1)
+ mtctr r3
+
+ /* Restore gprs */
+ REST_8GPRS(0,r1)
+ REST_8GPRS(8,r1)
+ REST_8GPRS(16,r1)
+ REST_8GPRS(24,r1)
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+
+ /* Pop our stack frame */
+ addi r1, r1, SWITCH_FRAME_SIZE
+
+ /* Restore original LR for return to B */
+ ld r0, LRSAVE(r1)
+ mtlr r0
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ stdu r1, -112(r1)
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+ addi r1, r1, 112
+#endif
+
+ ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */
+ mtlr r0
+ bctr /* jump after _mcount site */
+#endif /* CC_USING_MPROFILE_KERNEL */
+
_GLOBAL(ftrace_stub)
blr
#else
@@ -1213,6 +1326,7 @@ _GLOBAL(ftrace_stub)
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifndef CC_USING_MPROFILE_KERNEL
_GLOBAL(ftrace_graph_caller)
/* load r4 with local address */
ld r4, 128(r1)
@@ -1237,6 +1351,56 @@ _GLOBAL(ftrace_graph_caller)
addi r1, r1, 112
blr
+#else /* CC_USING_MPROFILE_KERNEL */
+_GLOBAL(ftrace_graph_caller)
+ /* with -mprofile-kernel, parameter regs are still alive at _mcount */
+ std r10, 104(r1)
+ std r9, 96(r1)
+ std r8, 88(r1)
+ std r7, 80(r1)
+ std r6, 72(r1)
+ std r5, 64(r1)
+ std r4, 56(r1)
+ std r3, 48(r1)
+
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, 24(r1)
+ ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+
+ mfctr r4 /* ftrace_caller has moved local addr here */
+ std r4, 40(r1)
+ mflr r3 /* ftrace_caller has restored LR from stack */
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR to this.
+ */
+ mtlr r3
+
+ ld r0, 40(r1)
+ mtctr r0
+ ld r10, 104(r1)
+ ld r9, 96(r1)
+ ld r8, 88(r1)
+ ld r7, 80(r1)
+ ld r6, 72(r1)
+ ld r5, 64(r1)
+ ld r4, 56(r1)
+ ld r3, 48(r1)
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+
+ addi r1, r1, 112
+ mflr r0
+ std r0, LRSAVE(r1)
+ bctr
+#endif /* CC_USING_MPROFILE_KERNEL */
+
_GLOBAL(return_to_handler)
/* need to save return values */
std r4, -32(r1)
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 44d4d8eb3c85..9dac18dabd03 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
return -EFAULT;
/* Make sure it is what we expect it to be */
- if (replaced != old)
+ if (replaced != old) {
+ pr_err("%p: replaced (%#x) != old (%#x)",
+ (void *)ip, replaced, old);
return -EINVAL;
+ }
/* replace the text with the new text */
if (patch_instruction((unsigned int *)ip, new))
@@ -106,14 +109,15 @@ static int
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int op;
- unsigned long entry, ptr;
+ unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
- void *tramp;
+ unsigned int op, pop;
/* read where this goes */
- if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
+ if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+ pr_err("Fetching opcode failed.\n");
return -EFAULT;
+ }
/* Make sure that that this is still a 24bit jump */
if (!is_bl_op(op)) {
@@ -122,14 +126,9 @@ __ftrace_make_nop(struct module *mod,
}
/* lets find where the pointer goes */
- tramp = (void *)find_bl_target(ip, op);
-
- pr_devel("ip:%lx jumps to %p", ip, tramp);
+ tramp = find_bl_target(ip, op);
- if (!is_module_trampoline(tramp)) {
- pr_err("Not a trampoline\n");
- return -EINVAL;
- }
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
if (module_trampoline_target(mod, tramp, &ptr)) {
pr_err("Failed to get trampoline target\n");
@@ -158,10 +157,42 @@ __ftrace_make_nop(struct module *mod,
*
* Use a b +8 to jump over the load.
*/
- op = 0x48000008; /* b +8 */
- if (patch_instruction((unsigned int *)ip, op))
+ pop = PPC_INST_BRANCH | 8; /* b +8 */
+
+ /*
+ * Check what is in the next instruction. We can see ld r2,40(r1), but
+ * on first pass after boot we will see mflr r0.
+ */
+ if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
+ pr_err("Fetching op failed.\n");
+ return -EFAULT;
+ }
+
+ if (op != PPC_INST_LD_TOC) {
+ unsigned int inst;
+
+ if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) {
+ pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+ return -EFAULT;
+ }
+
+ /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */
+ if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) {
+ pr_err("Unexpected instructions around bl _mcount\n"
+ "when enabling dynamic ftrace!\t"
+ "(%08x,bl,%08x)\n", inst, op);
+ return -EINVAL;
+ }
+
+ /* When using -mkernel_profile there is no load to jump over */
+ pop = PPC_INST_NOP;
+ }
+
+ if (patch_instruction((unsigned int *)ip, pop)) {
+ pr_err("Patching NOP failed.\n");
return -EPERM;
+ }
return 0;
}
@@ -287,16 +318,15 @@ int ftrace_make_nop(struct module *mod,
#ifdef CONFIG_MODULES
#ifdef CONFIG_PPC64
+/*
+ * Examine the existing instructions for __ftrace_make_call.
+ * They should effectively be a NOP, and follow formal constraints,
+ * depending on the ABI. Return false if they don't.
+ */
+#ifndef CC_USING_MPROFILE_KERNEL
static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
{
- unsigned int op[2];
- void *ip = (void *)rec->ip;
-
- /* read where this goes */
- if (probe_kernel_read(op, ip, sizeof(op)))
- return -EFAULT;
-
/*
* We expect to see:
*
@@ -306,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* The load offset is different depending on the ABI. For simplicity
* just mask it out when doing the compare.
*/
- if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
- pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
+ if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000))
+ return 0;
+ return 1;
+}
+#else
+static int
+expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
+{
+ /* look for patched "NOP" on ppc64 with -mprofile-kernel */
+ if (op0 != PPC_INST_NOP)
+ return 0;
+ return 1;
+}
+#endif
+
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op[2];
+ void *ip = (void *)rec->ip;
+
+ /* read where this goes */
+ if (probe_kernel_read(op, ip, sizeof(op)))
+ return -EFAULT;
+
+ if (!expected_nop_sequence(ip, op[0], op[1])) {
+ pr_err("Unexpected call sequence at %p: %x %x\n",
+ ip, op[0], op[1]);
return -EINVAL;
}
@@ -330,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return 0;
}
-#else
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ return ftrace_make_call(rec, addr);
+}
+#endif
+
+#else /* !CONFIG_PPC64: */
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
@@ -455,20 +520,13 @@ void ftrace_replace_code(int enable)
}
}
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
void arch_ftrace_update_code(int command)
{
- if (command & FTRACE_UPDATE_CALLS)
- ftrace_replace_code(1);
- else if (command & FTRACE_DISABLE_CALLS)
- ftrace_replace_code(0);
-
- if (command & FTRACE_UPDATE_TRACE_FUNC)
- ftrace_update_ftrace_func(ftrace_trace_function);
-
- if (command & FTRACE_START_FUNC_RET)
- ftrace_enable_ftrace_graph_caller();
- else if (command & FTRACE_STOP_FUNC_RET)
- ftrace_disable_ftrace_graph_caller();
+ ftrace_modify_all_code(command);
}
int __init ftrace_dyn_arch_init(void)
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 9547381b631a..d1f1b35bf0c7 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -47,6 +47,11 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs, struct module *me)
{
const Elf_Shdr *sect;
+ int rc;
+
+ rc = module_finalize_ftrace(me, sechdrs);
+ if (rc)
+ return rc;
/* Apply feature fixups */
sect = find_section(hdr, sechdrs, "__ftr_fixup");
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 2c01665eb410..5a7a78f12562 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -181,7 +181,7 @@ static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
/* Set up a trampoline in the PLT to bounce us to the distant function */
static uint32_t do_plt_call(void *location,
Elf32_Addr val,
- Elf32_Shdr *sechdrs,
+ const Elf32_Shdr *sechdrs,
struct module *mod)
{
struct ppc_plt_entry *entry;
@@ -294,11 +294,19 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
return -ENOEXEC;
}
}
+
+ return 0;
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE
- module->arch.tramp =
- do_plt_call(module->core_layout.base,
- (unsigned long)ftrace_caller,
- sechdrs, module);
-#endif
+int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
+{
+ module->arch.tramp = do_plt_call(module->core_layout.base,
+ (unsigned long)ftrace_caller,
+ sechdrs, module);
+ if (!module->arch.tramp)
+ return -ENOENT;
+
return 0;
}
+#endif
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 08b7a40de5f8..9ce9a25f58b5 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -31,6 +31,7 @@
#include <asm/code-patching.h>
#include <linux/sort.h>
#include <asm/setup.h>
+#include <asm/sections.h>
/* FIXME: We don't do .init separately. To do this, we'd need to have
a separate r2 value in the init and core section, and stub between
@@ -41,7 +42,6 @@
--RR. */
#if defined(_CALL_ELF) && _CALL_ELF == 2
-#define R2_STACK_OFFSET 24
/* An address is simply the address of the function. */
typedef unsigned long func_desc_t;
@@ -73,7 +73,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
}
#else
-#define R2_STACK_OFFSET 40
/* An address is address of the OPD entry, which contains address of fn. */
typedef struct ppc64_opd_entry func_desc_t;
@@ -96,6 +95,8 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
}
#endif
+#define STUB_MAGIC 0x73747562 /* stub */
+
/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
the kernel itself). But on PPC64, these need to be used for every
jump, actually, to reset r2 (TOC+0x8000). */
@@ -105,7 +106,8 @@ struct ppc64_stub_entry
* need 6 instructions on ABIv2 but we always allocate 7 so
* so we don't have to modify the trampoline load instruction. */
u32 jump[7];
- u32 unused;
+ /* Used by ftrace to identify stubs */
+ u32 magic;
/* Data for the above code */
func_desc_t funcdata;
};
@@ -139,70 +141,39 @@ static u32 ppc64_stub_insns[] = {
};
#ifdef CONFIG_DYNAMIC_FTRACE
-
-static u32 ppc64_stub_mask[] = {
- 0xffff0000,
- 0xffff0000,
- 0xffffffff,
- 0xffffffff,
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
- 0xffffffff,
-#endif
- 0xffffffff,
- 0xffffffff
-};
-
-bool is_module_trampoline(u32 *p)
+int module_trampoline_target(struct module *mod, unsigned long addr,
+ unsigned long *target)
{
- unsigned int i;
- u32 insns[ARRAY_SIZE(ppc64_stub_insns)];
-
- BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask));
+ struct ppc64_stub_entry *stub;
+ func_desc_t funcdata;
+ u32 magic;
- if (probe_kernel_read(insns, p, sizeof(insns)))
+ if (!within_module_core(addr, mod)) {
+ pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name);
return -EFAULT;
-
- for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
- u32 insna = insns[i];
- u32 insnb = ppc64_stub_insns[i];
- u32 mask = ppc64_stub_mask[i];
-
- if ((insna & mask) != (insnb & mask))
- return false;
}
- return true;
-}
-
-int module_trampoline_target(struct module *mod, u32 *trampoline,
- unsigned long *target)
-{
- u32 buf[2];
- u16 upper, lower;
- long offset;
- void *toc_entry;
+ stub = (struct ppc64_stub_entry *)addr;
- if (probe_kernel_read(buf, trampoline, sizeof(buf)))
+ if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) {
+ pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name);
return -EFAULT;
+ }
- upper = buf[0] & 0xffff;
- lower = buf[1] & 0xffff;
-
- /* perform the addis/addi, both signed */
- offset = ((short)upper << 16) + (short)lower;
+ if (magic != STUB_MAGIC) {
+ pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name);
+ return -EFAULT;
+ }
- /*
- * Now get the address this trampoline jumps to. This
- * is always 32 bytes into our trampoline stub.
- */
- toc_entry = (void *)mod->arch.toc + offset + 32;
+ if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) {
+ pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name);
+ return -EFAULT;
+ }
- if (probe_kernel_read(target, toc_entry, sizeof(*target)))
- return -EFAULT;
+ *target = stub_func_addr(funcdata);
return 0;
}
-
#endif
/* Count how many different 24-bit relocations (different symbol,
@@ -413,7 +384,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
gives the value maximum span in an instruction which uses a signed
offset) */
-static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
+static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
{
return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
}
@@ -426,7 +397,7 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
#define PPC_HA(v) PPC_HI ((v) + 0x8000)
/* Patch stub to reference function and correct r2 value. */
-static inline int create_stub(Elf64_Shdr *sechdrs,
+static inline int create_stub(const Elf64_Shdr *sechdrs,
struct ppc64_stub_entry *entry,
unsigned long addr,
struct module *me)
@@ -447,12 +418,14 @@ static inline int create_stub(Elf64_Shdr *sechdrs,
entry->jump[0] |= PPC_HA(reladdr);
entry->jump[1] |= PPC_LO(reladdr);
entry->funcdata = func_desc(addr);
+ entry->magic = STUB_MAGIC;
+
return 1;
}
/* Create stub to jump to function described in this OPD/ptr: we need the
stub to set up the TOC ptr (r2) for the function. */
-static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
+static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
unsigned long addr,
struct module *me)
{
@@ -476,17 +449,60 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
return (unsigned long)&stubs[i];
}
+#ifdef CC_USING_MPROFILE_KERNEL
+static bool is_early_mcount_callsite(u32 *instruction)
+{
+ /*
+ * Check if this is one of the -mprofile-kernel sequences.
+ */
+ if (instruction[-1] == PPC_INST_STD_LR &&
+ instruction[-2] == PPC_INST_MFLR)
+ return true;
+
+ if (instruction[-1] == PPC_INST_MFLR)
+ return true;
+
+ return false;
+}
+
+/*
+ * In case of _mcount calls, do not save the current callee's TOC (in r2) into
+ * the original caller's stack frame. If we did we would clobber the saved TOC
+ * value of the original caller.
+ */
+static void squash_toc_save_inst(const char *name, unsigned long addr)
+{
+ struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr;
+
+ /* Only for calls to _mcount */
+ if (strcmp("_mcount", name) != 0)
+ return;
+
+ stub->jump[2] = PPC_INST_NOP;
+}
+#else
+static void squash_toc_save_inst(const char *name, unsigned long addr) { }
+
+/* without -mprofile-kernel, mcount calls are never early */
+static bool is_early_mcount_callsite(u32 *instruction)
+{
+ return false;
+}
+#endif
+
/* We expect a noop next: if it is, replace it with instruction to
restore r2. */
static int restore_r2(u32 *instruction, struct module *me)
{
if (*instruction != PPC_INST_NOP) {
+ if (is_early_mcount_callsite(instruction - 1))
+ return 1;
pr_err("%s: Expect noop after relocate, got %08x\n",
me->name, *instruction);
return 0;
}
/* ld r2,R2_STACK_OFFSET(r1) */
- *instruction = 0xe8410000 | R2_STACK_OFFSET;
+ *instruction = PPC_INST_LD_TOC;
return 1;
}
@@ -611,6 +627,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
return -ENOENT;
if (!restore_r2((u32 *)location + 1, me))
return -ENOEXEC;
+
+ squash_toc_save_inst(strtab + sym->st_name, value);
} else
value += local_entry_offset(sym);
@@ -693,12 +711,84 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
}
+ return 0;
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE
- me->arch.toc = my_r2(sechdrs, me);
- me->arch.tramp = stub_for_addr(sechdrs,
- (unsigned long)ftrace_caller,
- me);
+
+#ifdef CC_USING_MPROFILE_KERNEL
+
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+/*
+ * For mprofile-kernel we use a special stub for ftrace_caller() because we
+ * can't rely on r2 containing this module's TOC when we enter the stub.
+ *
+ * That can happen if the function calling us didn't need to use the toc. In
+ * that case it won't have setup r2, and the r2 value will be either the
+ * kernel's toc, or possibly another modules toc.
+ *
+ * To deal with that this stub uses the kernel toc, which is always accessible
+ * via the paca (in r13). The target (ftrace_caller()) is responsible for
+ * saving and restoring the toc before returning.
+ */
+static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me)
+{
+ struct ppc64_stub_entry *entry;
+ unsigned int i, num_stubs;
+ static u32 stub_insns[] = {
+ 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */
+ 0x3d8c0000, /* addis r12,r12,<high> */
+ 0x398c0000, /* addi r12,r12,<low> */
+ 0x7d8903a6, /* mtctr r12 */
+ 0x4e800420, /* bctr */
+ };
+ long reladdr;
+
+ num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*entry);
+
+ /* Find the next available stub entry */
+ entry = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+ for (i = 0; i < num_stubs && stub_func_addr(entry->funcdata); i++, entry++);
+
+ if (i >= num_stubs) {
+ pr_err("%s: Unable to find a free slot for ftrace stub.\n", me->name);
+ return 0;
+ }
+
+ memcpy(entry->jump, stub_insns, sizeof(stub_insns));
+
+ /* Stub uses address relative to kernel toc (from the paca) */
+ reladdr = (unsigned long)ftrace_caller - kernel_toc_addr();
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address of ftrace_caller out of range of kernel_toc.\n", me->name);
+ return 0;
+ }
+
+ entry->jump[1] |= PPC_HA(reladdr);
+ entry->jump[2] |= PPC_LO(reladdr);
+
+ /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */
+ entry->funcdata = func_desc((unsigned long)ftrace_caller);
+ entry->magic = STUB_MAGIC;
+
+ return (unsigned long)entry;
+}
+#else
+static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me)
+{
+ return stub_for_addr(sechdrs, (unsigned long)ftrace_caller, me);
+}
#endif
+int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
+{
+ mod->arch.toc = my_r2(sechdrs, mod);
+ mod->arch.tramp = create_ftrace_stub(sechdrs, mod);
+
+ if (!mod->arch.tramp)
+ return -ENOENT;
+
return 0;
}
+#endif
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 01ea0edf0579..93dae296b6be 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -17,10 +17,6 @@
#include <asm/pgtable.h>
#include <asm/kexec.h>
-/* This symbol is provided by the linker - let it fill in the paca
- * field correctly */
-extern unsigned long __toc_start;
-
#ifdef CONFIG_PPC_BOOK3S
/*
@@ -149,11 +145,6 @@ EXPORT_SYMBOL(paca);
void __init initialise_paca(struct paca_struct *new_paca, int cpu)
{
- /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
- * of the TOC can be addressed using a single machine instruction.
- */
- unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
-
#ifdef CONFIG_PPC_BOOK3S
new_paca->lppaca_ptr = new_lppaca(cpu);
#else
@@ -161,7 +152,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
#endif
new_paca->lock_token = 0x8000;
new_paca->paca_index = cpu;
- new_paca->kernel_toc = kernel_toc;
+ new_paca->kernel_toc = kernel_toc_addr();
new_paca->kernelbase = (unsigned long) _stext;
/* Only set MSR:IR/DR when MMU is initialized */
new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index a47e14277fd8..4513d1a706be 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -6,8 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-CFLAGS_REMOVE_code-patching.o = -pg
-CFLAGS_REMOVE_feature-fixups.o = -pg
+CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \
feature-fixups.o
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 52c6ce1cc985..1eb7b45e017d 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -2,7 +2,7 @@ CFLAGS_bootx_init.o += -fPIC
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
-CFLAGS_REMOVE_bootx_init.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_bootx_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
endif
obj-y += pic.o setup.o time.o feature.o pci.o \
diff --git a/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh b/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh
new file mode 100755
index 000000000000..c658d8cf760b
--- /dev/null
+++ b/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+set -o pipefail
+
+# To debug, uncomment the following line
+# set -x
+
+# Test whether the compile option -mprofile-kernel exists and generates
+# profiling code (ie. a call to _mcount()).
+echo "int func() { return 0; }" | \
+ $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
+ grep -q "_mcount"
+
+# Test whether the notrace attribute correctly suppresses calls to _mcount().
+
+echo -e "#include <linux/compiler.h>\nnotrace int func() { return 0; }" | \
+ $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
+ grep -q "_mcount" && \
+ exit 1
+
+echo "OK"
+exit 0