diff options
Diffstat (limited to 'arch/powerpc')
248 files changed, 2857 insertions, 7445 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 107bb4319e0e..386ae12d8523 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -196,7 +196,6 @@ config PPC select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 - select HAVE_TIF_NOHZ if PPC64 select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE @@ -226,7 +225,6 @@ config PPC select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) - select HAVE_OPROFILE select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 @@ -237,6 +235,7 @@ config PPC select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN + select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_IRQ_TIME_ACCOUNTING @@ -282,7 +281,6 @@ config COMPAT bool "Enable support for 32bit binaries" depends on PPC64 default y if !CPU_LITTLE_ENDIAN - select COMPAT_BINFMT_ELF select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION @@ -505,18 +503,14 @@ config HOTPLUG_CPU Say N if you are unsure. config PPC_QUEUED_SPINLOCKS - bool "Queued spinlocks" + bool "Queued spinlocks" if EXPERT depends on SMP + default PPC_BOOK3S_64 help Say Y here to use queued spinlocks which give better scalability and fairness on large SMP and NUMA systems without harming single threaded performance. - This option is currently experimental, the code is more complex and - less tested so it defaults to "N" for the moment. - - If unsure, say "N". - config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU @@ -720,18 +714,6 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -config STDBINUTILS - bool "Using standard binutils settings" - depends on 44x - default y - help - Turning this option off allows you to select 256KB PAGE_SIZE on 44x. - Note, that kernel will be able to run only those applications, - which had been compiled using binutils later than 2.17.50.0.3 with - '-zmax-page-size' set to 256K (the default is 64K). Or, if using - the older binutils, you can patch them with a trivial patch, which - changes the ELF_MAXPAGESIZE definition from 0x10000 to 0x40000. - choice prompt "Page size" default PPC_4K_PAGES @@ -771,17 +753,15 @@ config PPC_64K_PAGES select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_256K_PAGES - bool "256k page size" - depends on 44x && !STDBINUTILS + bool "256k page size (Requires non-standard binutils settings)" + depends on 44x && !PPC_47x help Make the page size 256k. - As the ELF standard only requires alignment to support page - sizes up to 64k, you will need to compile all of your user - space applications with a non-standard binutils settings - (see the STDBINUTILS description for details). - - Say N unless you know what you are doing. + The kernel will only be able to run applications that have been + compiled with '-zmax-page-size' set to 256K (the default is 64K) using + binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE + definition from 0x10000 to 0x40000 in older versions. endchoice diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index b88900f4832f..ae084357994e 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -88,6 +88,7 @@ config PPC_IRQ_SOFT_MASK_DEBUG config XMON bool "Include xmon kernel debugger" depends on DEBUG_KERNEL + select CONSOLE_POLL if SERIAL_CPM_CONSOLE help Include in-kernel hooks for the xmon kernel monitor/debugger. Unless you are intending to debug the kernel, say N here. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 08cf0eade56a..5f8544cf724a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -65,7 +65,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y)) ifdef CONFIG_PPC32 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o else -ifeq ($(call ld-ifversion, -ge, 225000000, y),y) +ifeq ($(call ld-ifversion, -ge, 22500, y),y) # Have the linker provide sfpr if possible. # There is a corresponding test in arch/powerpc/lib/Makefile KBUILD_LDFLAGS_MODULE += --save-restore-funcs @@ -276,8 +276,6 @@ head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += arch/powerpc/kernel/prom_init.o # See arch/powerpc/Kbuild for content of core part of the kernel core-y += arch/powerpc/ -drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ - # Default to zImage, override when needed all: zImage diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 3894ba8f8ffc..4bc549c6edc5 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -8,7 +8,6 @@ CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y # CONFIG_SLUB_CPU_PARTIAL is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -21,6 +20,7 @@ CONFIG_IRQ_ALL_CPUS=y # CONFIG_COMPACTION is not set # CONFIG_SUSPEND is not set CONFIG_NET=y +CONFIG_NETDEVICES=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y @@ -41,7 +41,9 @@ CONFIG_BLK_DEV_RAM_SIZE=35000 # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y # CONFIG_SATA_PMP is not set +CONFIG_SATA_AHCI_PLATFORM=y # CONFIG_ATA_SFF is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set @@ -98,6 +100,8 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_HCD_PCI is not set CONFIG_USB_STORAGE=y CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index 34c86b3abecb..717827219921 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -6,7 +6,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_PROFILING=y -CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index 30845ce0885a..8da316e61a08 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -17,7 +17,6 @@ CONFIG_KALLSYMS_ALL=y CONFIG_BPF_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_PROFILING=y -CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index 2c3834eebca3..c11e777b2f3d 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -7,7 +7,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_PROFILING=y -CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 42fbc70cec33..cc2c0d51f493 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -14,7 +14,6 @@ CONFIG_CPUSETS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 1de0dbf6cbba..63d611cc160f 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -12,7 +12,6 @@ CONFIG_CGROUPS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 161351a18517..9424c1e67e1c 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -9,7 +9,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 15ed8d0aa014..78606b7e42df 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -7,7 +7,6 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_PROFILING=y -CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 665a8d7cded0..7aefac5afab0 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -10,7 +10,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 60a30fffeda0..2c87e856d839 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -30,7 +30,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_BPF_SYSCALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 48759656a067..4f05a6652478 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -62,7 +62,6 @@ CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m CONFIG_VHOST_NET=m -CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 33a01a9e86be..5cf49a515f8e 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -14,7 +14,6 @@ CONFIG_CPUSETS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index ef09f3cce1fa..6677ac0da45a 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -19,7 +19,6 @@ CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -1072,7 +1071,6 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_DEBUG_INFO=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 142f1321fa58..f300dcb937cc 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -13,7 +13,6 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_PROFILING=y -CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_PPC_POWERNV is not set diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index d5dece981c02..777221775c83 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -29,7 +29,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_BPF_SYSCALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_MODULES=y diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index a6e650a97d8f..ffedea7e4bef 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -129,7 +129,7 @@ static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data, src += bytes; len -= bytes; - }; + } memcpy((char *)sctx->buf, src, len); return 0; diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d0b832cbbec8..939f3c94c8f3 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -56,35 +56,6 @@ int exit_vmx_usercopy(void); int enter_vmx_ops(void); void *exit_vmx_ops(void *dest); -/* Traps */ -long machine_check_early(struct pt_regs *regs); -long hmi_exception_realmode(struct pt_regs *regs); -void SMIException(struct pt_regs *regs); -void handle_hmi_exception(struct pt_regs *regs); -void instruction_breakpoint_exception(struct pt_regs *regs); -void RunModeException(struct pt_regs *regs); -void single_step_exception(struct pt_regs *regs); -void program_check_exception(struct pt_regs *regs); -void alignment_exception(struct pt_regs *regs); -void StackOverflow(struct pt_regs *regs); -void stack_overflow_exception(struct pt_regs *regs); -void kernel_fp_unavailable_exception(struct pt_regs *regs); -void altivec_unavailable_exception(struct pt_regs *regs); -void vsx_unavailable_exception(struct pt_regs *regs); -void fp_unavailable_tm(struct pt_regs *regs); -void altivec_unavailable_tm(struct pt_regs *regs); -void vsx_unavailable_tm(struct pt_regs *regs); -void facility_unavailable_exception(struct pt_regs *regs); -void TAUException(struct pt_regs *regs); -void altivec_assist_exception(struct pt_regs *regs); -void unrecoverable_exception(struct pt_regs *regs); -void kernel_bad_stack(struct pt_regs *regs); -void system_reset_exception(struct pt_regs *regs); -void machine_check_exception(struct pt_regs *regs); -void emulation_assist_interrupt(struct pt_regs *regs); -long do_slb_fault(struct pt_regs *regs, unsigned long ea); -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err); - /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index a0117a9d5b06..73bc5d2c431d 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -95,12 +95,12 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) addr &= 0xf0000000; /* align addr to start of segment */ barrier(); /* make sure thread.kuap is updated before playing with SRs */ while (addr < end) { - mtsrin(sr, addr); + mtsr(sr, addr); sr += 0x111; /* next VSID */ sr &= 0xf0ffffff; /* clear VSID overflow */ addr += 0x10000000; /* address of next segment */ } - isync(); /* Context sync required after mtsrin() */ + isync(); /* Context sync required after mtsr() */ } static __always_inline void allow_user_access(void __user *to, const void __user *from, @@ -122,7 +122,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user end = min(addr + size, TASK_SIZE); current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); - kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ + kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ } static __always_inline void prevent_user_access(void __user *to, const void __user *from, @@ -151,7 +151,7 @@ static __always_inline void prevent_user_access(void __user *to, const void __us } current->thread.kuap = 0; - kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ + kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* set Ks */ } static inline unsigned long prevent_user_access_return(void) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 685c589e723f..b85f8e114a9c 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -94,7 +94,7 @@ typedef struct { } mm_context_t; void update_bats(void); -static inline void cleanup_cpu_mmu_context(void) { }; +static inline void cleanup_cpu_mmu_context(void) { } /* patch sites */ extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2; diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index f50f72e535aa..8bd905050896 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -199,25 +199,31 @@ DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); #ifdef CONFIG_PPC_PKEY +extern u64 __ro_after_init default_uamor; +extern u64 __ro_after_init default_amr; +extern u64 __ro_after_init default_iamr; + #include <asm/mmu.h> #include <asm/ptrace.h> -/* - * For kernel thread that doesn't have thread.regs return - * default AMR/IAMR values. +/* usage of kthread_use_mm() should inherit the + * AMR value of the operating address space. But, the AMR value is + * thread-specific and we inherit the address space and not thread + * access restrictions. Because of this ignore AMR value when accessing + * userspace via kernel thread. */ static inline u64 current_thread_amr(void) { if (current->thread.regs) return current->thread.regs->amr; - return AMR_KUAP_BLOCKED; + return default_amr; } static inline u64 current_thread_iamr(void) { if (current->thread.regs) return current->thread.regs->iamr; - return AMR_KUEP_BLOCKED; + return default_iamr; } #endif /* CONFIG_PPC_PKEY */ @@ -333,7 +339,7 @@ static inline unsigned long get_kuap(void) * This has no effect in terms of actually blocking things on hash, * so it doesn't break anything. */ - if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return AMR_KUAP_BLOCKED; return mfspr(SPRN_AMR); @@ -341,7 +347,7 @@ static inline unsigned long get_kuap(void) static inline void set_kuap(unsigned long value) { - if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return; /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 066b1d34c7bc..f911bdb68d8b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -454,6 +454,8 @@ static inline unsigned long hpt_hash(unsigned long vpn, #define HPTE_NOHPTE_UPDATE 0x2 #define HPTE_USE_KERNEL_KEY 0x4 +long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, + unsigned long rlags, unsigned long vflags, int psize, int ssize); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); @@ -467,6 +469,8 @@ extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long flags); extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, unsigned long dsisr); +void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc); +int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, unsigned long msr); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize); @@ -521,6 +525,7 @@ void slb_dump_contents(struct slb_entry *slb_ptr); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); +void preload_new_slb_context(unsigned long start, unsigned long sp); #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 995bbcdd0ef8..eace8c3f7b0a 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -239,7 +239,7 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, #ifdef CONFIG_PPC_PSERIES extern void radix_init_pseries(void); #else -static inline void radix_init_pseries(void) { }; +static inline void radix_init_pseries(void) { } #endif #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index a39886681629..058601efbc8a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -388,11 +388,28 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ ({ \ - int __r; \ - __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ - __r; \ + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ }) +/* + * On Book3S CPUs, clearing the accessed bit without a TLB flush + * doesn't cause data corruption. [ It could cause incorrect + * page aging and the (mistaken) reclaim of hot pages, but the + * chance of that should be relatively low. ] + * + * So as a performance optimization don't flush the TLB when + * clearing the accessed bit, it will eventually be flushed by + * a context switch or a VM operation anyway. [ In the rare + * event of it not getting flushed for a long time the delay + * shouldn't really matter because there's no real memory + * pressure for swapout to react to. ] + */ +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young ptep_test_and_clear_young + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +#define pmdp_clear_flush_young pmdp_test_and_clear_young + static inline int __pte_write(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h index 3b8640498f5b..5b178139f3c0 100644 --- a/arch/powerpc/include/asm/book3s/64/pkeys.h +++ b/arch/powerpc/include/asm/book3s/64/pkeys.h @@ -5,10 +5,6 @@ #include <asm/book3s/64/hash-pkey.h> -extern u64 __ro_after_init default_uamor; -extern u64 __ro_after_init default_amr; -extern u64 __ro_after_init default_iamr; - static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) { if (!mmu_has_feature(MMU_FTR_PKEY)) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 94439e0cefc9..8b33601cdb9d 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -35,7 +35,7 @@ extern void radix__flush_pwc_lpid(unsigned int lpid); extern void radix__flush_all_lpid(unsigned int lpid); extern void radix__flush_all_lpid_guest(unsigned int lpid); #else -static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }; +static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); } static inline void radix__flush_tlb_lpid_page(unsigned int lpid, unsigned long addr, unsigned long page_size) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index dcb5c3839d2f..215973b4cb26 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -31,7 +31,7 @@ static inline void tlbiel_all(void) hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); } #else -static inline void tlbiel_all(void) { BUG(); }; +static inline void tlbiel_all(void) { BUG(); } #endif static inline void tlbiel_all_lpid(bool radix) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 464f8ca8a5c9..d1635ffbb179 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -111,12 +111,15 @@ #ifndef __ASSEMBLY__ struct pt_regs; -extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); -extern void bad_page_fault(struct pt_regs *, unsigned long, int); -void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); +long do_page_fault(struct pt_regs *); +long hash__do_page_fault(struct pt_regs *); +void bad_page_fault(struct pt_regs *, int); +void __bad_page_fault(struct pt_regs *regs, int sig); +void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); +void die_mce(const char *str, struct pt_regs *regs, long err); extern bool die_will_crash(void); extern void panic_flush_kmsg_start(void); extern void panic_flush_kmsg_end(void); diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 138e46d8c04e..f63495109f63 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -8,6 +8,12 @@ #include <asm/cputable.h> #include <asm/cpu_has_feature.h> +/* + * This flag is used to indicate that the page pointed to by a pte is clean + * and does not require cleaning before returning it to the user. + */ +#define PG_dcache_clean PG_arch_1 + #ifdef CONFIG_PPC_BOOK3S_64 /* * Book3s has no ptesync after setting a pte, so without this ptesync it's diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 5f21a5bab467..e85c849214a2 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -17,16 +17,6 @@ struct cpu_spec; typedef void (*cpu_setup_t)(unsigned long offset, struct cpu_spec* spec); typedef void (*cpu_restore_t)(void); -enum powerpc_oprofile_type { - PPC_OPROFILE_INVALID = 0, - PPC_OPROFILE_RS64 = 1, - PPC_OPROFILE_POWER4 = 2, - PPC_OPROFILE_G4 = 3, - PPC_OPROFILE_FSL_EMB = 4, - PPC_OPROFILE_CELL = 5, - PPC_OPROFILE_PA6T = 6, -}; - enum powerpc_pmc_type { PPC_PMC_DEFAULT = 0, PPC_PMC_IBM = 1, @@ -83,16 +73,6 @@ struct cpu_spec { /* Used by oprofile userspace to select the right counters */ char *oprofile_cpu_type; - /* Processor specific oprofile operations */ - enum powerpc_oprofile_type oprofile_type; - - /* Bit locations inside the mmcra change */ - unsigned long oprofile_mmcra_sihv; - unsigned long oprofile_mmcra_sipr; - - /* Bits to clear during an oprofile exception */ - unsigned long oprofile_mmcra_clear; - /* Name of processor class, for the ELF AT_PLATFORM entry */ char *platform; diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index ed75d1c318e3..504f7fe6711a 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -87,6 +87,17 @@ static notrace inline void account_cpu_user_exit(void) acct->starttime_user = tb; } +static notrace inline void account_stolen_time(void) +{ +#ifdef CONFIG_PPC_SPLPAR + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct lppaca *lp = local_paca->lppaca_ptr; + + if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) + accumulate_stolen_time(); + } +#endif +} #endif /* __KERNEL__ */ #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ @@ -96,5 +107,8 @@ static inline void account_cpu_user_entry(void) static inline void account_cpu_user_exit(void) { } +static notrace inline void account_stolen_time(void) +{ +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index ec57daf87f40..86a14736c76c 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -50,10 +50,6 @@ bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int brkpt); -#else - -extern void do_break(struct pt_regs *regs, unsigned long address, - unsigned long error_code); #endif #endif /* _ASM_POWERPC_DEBUG_H */ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index aa6a5ef5d483..7604673787d6 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -137,7 +137,7 @@ extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else -static inline void pseries_probe_fw_features(void) { }; +static inline void pseries_probe_fw_features(void) { } #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 013165e62618..f18c543bc01d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -17,8 +17,6 @@ extern bool hugetlb_disabled; void hugetlbpage_init_default(void); -void flush_dcache_icache_hugepage(struct page *page); - int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index c98f5141e3fc..ed6086d57b22 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -535,9 +535,12 @@ struct h_cpu_char_result { u64 behaviour; }; -/* Register state for entering a nested guest with H_ENTER_NESTED */ +/* + * Register state for entering a nested guest with H_ENTER_NESTED. + * New member must be added at the end. + */ struct hv_guest_state { - u64 version; /* version of this structure layout */ + u64 version; /* version of this structure layout, must be first */ u32 lpid; u32 vcpu_token; /* These registers are hypervisor privileged (at least for writing) */ @@ -566,10 +569,26 @@ struct hv_guest_state { u64 pidr; u64 cfar; u64 ppr; + /* Version 1 ends here */ + u64 dawr1; + u64 dawrx1; + /* Version 2 ends here */ }; /* Latest version of hv_guest_state structure */ -#define HV_GUEST_STATE_VERSION 1 +#define HV_GUEST_STATE_VERSION 2 + +static inline int hv_guest_state_size(unsigned int version) +{ + switch (version) { + case 1: + return offsetofend(struct hv_guest_state, ppr); + case 2: + return offsetofend(struct hv_guest_state, dawrx1); + default: + return -1; + } +} /* * From the document "H_GetPerformanceCounterInfo Interface" v1.07 diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 0363734ff56e..56a98936a6a9 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -38,6 +38,8 @@ #define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) #endif +#endif /* CONFIG_PPC64 */ + /* * flags for paca->irq_soft_mask */ @@ -46,18 +48,56 @@ #define IRQS_PMI_DISABLED 2 #define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED) -#endif /* CONFIG_PPC64 */ - #ifndef __ASSEMBLY__ -extern void replay_system_reset(void); -extern void replay_soft_interrupts(void); +static inline void __hard_irq_enable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(MSR_EE); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EIE); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_EE | MSR_RI, 1); + else + mtmsr(mfmsr() | MSR_EE); +} -extern void timer_interrupt(struct pt_regs *); -extern void timer_broadcast_interrupt(void); -extern void performance_monitor_exception(struct pt_regs *regs); -extern void WatchdogException(struct pt_regs *regs); -extern void unknown_exception(struct pt_regs *regs); +static inline void __hard_irq_disable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(0); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EID); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_RI, 1); + else + mtmsr(mfmsr() & ~MSR_EE); +} + +static inline void __hard_EE_RI_disable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(0); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_NRI); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(0, 1); + else + mtmsr(mfmsr() & ~(MSR_EE | MSR_RI)); +} + +static inline void __hard_RI_enable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + return; + + if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EID); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_RI, 1); + else + mtmsr(mfmsr() | MSR_RI); +} #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -221,18 +261,6 @@ static inline bool arch_irqs_disabled(void) #endif /* CONFIG_PPC_BOOK3S */ -#ifdef CONFIG_PPC_BOOK3E -#define __hard_irq_enable() wrtee(MSR_EE) -#define __hard_irq_disable() wrtee(0) -#define __hard_EE_RI_disable() wrtee(0) -#define __hard_RI_enable() do { } while (0) -#else -#define __hard_irq_enable() __mtmsrd(MSR_EE|MSR_RI, 1) -#define __hard_irq_disable() __mtmsrd(MSR_RI, 1) -#define __hard_EE_RI_disable() __mtmsrd(0, 1) -#define __hard_RI_enable() __mtmsrd(MSR_RI, 1) -#endif - #define hard_irq_disable() do { \ unsigned long flags; \ __hard_irq_disable(); \ @@ -296,8 +324,17 @@ extern void irq_set_pending_from_srr1(unsigned long srr1); extern void force_external_irq_replay(void); +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) +{ + regs->softe = val; +} #else /* CONFIG_PPC64 */ +static inline notrace unsigned long irq_soft_mask_return(void) +{ + return 0; +} + static inline unsigned long arch_local_save_flags(void) { return mfmsr(); @@ -327,22 +364,12 @@ static inline unsigned long arch_local_irq_save(void) static inline void arch_local_irq_disable(void) { - if (IS_ENABLED(CONFIG_BOOKE)) - wrtee(0); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - wrtspr(SPRN_EID); - else - mtmsr(mfmsr() & ~MSR_EE); + __hard_irq_disable(); } static inline void arch_local_irq_enable(void) { - if (IS_ENABLED(CONFIG_BOOKE)) - wrtee(MSR_EE); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - wrtspr(SPRN_EIE); - else - mtmsr(mfmsr() | MSR_EE); + __hard_irq_enable(); } static inline bool arch_irqs_disabled_flags(unsigned long flags) @@ -364,6 +391,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) static inline void may_hard_irq_enable(void) { } +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) +{ +} #endif /* CONFIG_PPC64 */ #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h new file mode 100644 index 000000000000..aedfba29e43a --- /dev/null +++ b/arch/powerpc/include/asm/interrupt.h @@ -0,0 +1,449 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_INTERRUPT_H +#define _ASM_POWERPC_INTERRUPT_H + +#include <linux/context_tracking.h> +#include <linux/hardirq.h> +#include <asm/cputime.h> +#include <asm/ftrace.h> +#include <asm/kprobes.h> +#include <asm/runlatch.h> + +struct interrupt_state { +#ifdef CONFIG_PPC_BOOK3E_64 + enum ctx_state ctx_state; +#endif +}; + +static inline void booke_restore_dbcr0(void) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dbcr0 = current->thread.debug.dbcr0; + + if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) { + mtspr(SPRN_DBSR, -1); + mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]); + } +#endif +} + +static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ + /* + * Book3E reconciles irq soft mask in asm + */ +#ifdef CONFIG_PPC_BOOK3S_64 + if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + if (user_mode(regs)) { + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); + + account_cpu_user_entry(); + account_stolen_time(); + } else { + /* + * CT_WARN_ON comes here via program_check_exception, + * so avoid recursion. + */ + if (TRAP(regs) != 0x700) + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + } +#endif + +#ifdef CONFIG_PPC_BOOK3E_64 + state->ctx_state = exception_enter(); + if (user_mode(regs)) + account_cpu_user_entry(); +#endif +} + +/* + * Care should be taken to note that interrupt_exit_prepare and + * interrupt_async_exit_prepare do not necessarily return immediately to + * regs context (e.g., if regs is usermode, we don't necessarily return to + * user mode). Other interrupts might be taken between here and return, + * context switch / preemption may occur in the exit path after this, or a + * signal may be delivered, etc. + * + * The real interrupt exit code is platform specific, e.g., + * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s. + * + * However interrupt_nmi_exit_prepare does return directly to regs, because + * NMIs do not do "exit work" or replay soft-masked interrupts. + */ +static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ +#ifdef CONFIG_PPC_BOOK3E_64 + exception_exit(state->ctx_state); +#endif + + /* + * Book3S exits to user via interrupt_exit_user_prepare(), which does + * context tracking, which is a cleaner way to handle PREEMPT=y + * and avoid context entry/exit in e.g., preempt_schedule_irq()), + * which is likely to be where the core code wants to end up. + * + * The above comment explains why we can't do the + * + * if (user_mode(regs)) + * user_exit_irqoff(); + * + * sequence here. + */ +} + +static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_CTRL) && + !test_thread_local_flags(_TLF_RUNLATCH)) + __ppc64_runlatch_on(); +#endif + + interrupt_enter_prepare(regs, state); + irq_enter(); +} + +static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ + irq_exit(); + interrupt_exit_prepare(regs, state); +} + +struct interrupt_nmi_state { +#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 + u8 irq_soft_mask; + u8 irq_happened; +#endif + u8 ftrace_enabled; +#endif +}; + +static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) +{ +#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 + state->irq_soft_mask = local_paca->irq_soft_mask; + state->irq_happened = local_paca->irq_happened; + + /* + * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does + * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile + * because that goes through irq tracing which we don't want in NMI. + */ + local_paca->irq_soft_mask = IRQS_ALL_DISABLED; + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* Don't do any per-CPU operations until interrupt state is fixed */ +#endif + /* Allow DEC and PMI to be traced when they are soft-NMI */ + if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) { + state->ftrace_enabled = this_cpu_get_ftrace_enabled(); + this_cpu_set_ftrace_enabled(0); + } +#endif + + /* + * Do not use nmi_enter() for pseries hash guest taking a real-mode + * NMI because not everything it touches is within the RMA limit. + */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || + !firmware_has_feature(FW_FEATURE_LPAR) || + radix_enabled() || (mfmsr() & MSR_DR)) + nmi_enter(); +} + +static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) +{ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || + !firmware_has_feature(FW_FEATURE_LPAR) || + radix_enabled() || (mfmsr() & MSR_DR)) + nmi_exit(); + +#ifdef CONFIG_PPC64 + if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) + this_cpu_set_ftrace_enabled(state->ftrace_enabled); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Check we didn't change the pending interrupt mask. */ + WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); + local_paca->irq_happened = state->irq_happened; + local_paca->irq_soft_mask = state->irq_soft_mask; +#endif +#endif +} + +/* + * Don't use noinstr here like x86, but rather add NOKPROBE_SYMBOL to each + * function definition. The reason for this is the noinstr section is placed + * after the main text section, i.e., very far away from the interrupt entry + * asm. That creates problems with fitting linker stubs when building large + * kernels. + */ +#define interrupt_handler __visible noinline notrace __no_kcsan __no_sanitize_address + +/** + * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_RAW(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_RAW - Define raw interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * This is a plain function which does no tracing, reconciling, etc. + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * raw interrupt handlers must not enable or disable interrupts, or + * schedule, tracing and instrumentation (ftrace, lockdep, etc) would + * not be advisable either, although may be possible in a pinch, the + * trace will look odd at least. + * + * A raw handler may call one of the other interrupt handler functions + * to be converted into that interrupt context without these restrictions. + * + * On PPC64, _RAW handlers may return with fast_interrupt_return. + * + * Specific handlers may have additional restrictions. + */ +#define DEFINE_INTERRUPT_HANDLER_RAW(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +interrupt_handler long func(struct pt_regs *regs) \ +{ \ + long ret; \ + \ + ret = ____##func (regs); \ + \ + return ret; \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER - Declare synchronous interrupt handler function + * @func: Function name of the entry point + */ +#define DECLARE_INTERRUPT_HANDLER(func) \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER - Define synchronous interrupt handler function + * @func: Function name of the entry point + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER(func) \ +static __always_inline void ____##func(struct pt_regs *regs); \ + \ +interrupt_handler void func(struct pt_regs *regs) \ +{ \ + struct interrupt_state state; \ + \ + interrupt_enter_prepare(regs, &state); \ + \ + ____##func (regs); \ + \ + interrupt_exit_prepare(regs, &state); \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline void ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_RET - Declare synchronous interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_RET(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_RET - Define synchronous interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_RET(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +interrupt_handler long func(struct pt_regs *regs) \ +{ \ + struct interrupt_state state; \ + long ret; \ + \ + interrupt_enter_prepare(regs, &state); \ + \ + ret = ____##func (regs); \ + \ + interrupt_exit_prepare(regs, &state); \ + \ + return ret; \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_ASYNC - Declare asynchronous interrupt handler function + * @func: Function name of the entry point + */ +#define DECLARE_INTERRUPT_HANDLER_ASYNC(func) \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_ASYNC - Define asynchronous interrupt handler function + * @func: Function name of the entry point + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_ASYNC(func) \ +static __always_inline void ____##func(struct pt_regs *regs); \ + \ +interrupt_handler void func(struct pt_regs *regs) \ +{ \ + struct interrupt_state state; \ + \ + interrupt_async_enter_prepare(regs, &state); \ + \ + ____##func (regs); \ + \ + interrupt_async_exit_prepare(regs, &state); \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline void ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_NMI - Declare NMI interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_NMI(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_NMI - Define NMI interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_NMI(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +interrupt_handler long func(struct pt_regs *regs) \ +{ \ + struct interrupt_nmi_state state; \ + long ret; \ + \ + interrupt_nmi_enter_prepare(regs, &state); \ + \ + ret = ____##func (regs); \ + \ + interrupt_nmi_exit_prepare(regs, &state); \ + \ + return ret; \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + + +/* Interrupt handlers */ +/* kernel/traps.c */ +DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); +#ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception); +#else +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); +#endif +DECLARE_INTERRUPT_HANDLER(SMIException); +DECLARE_INTERRUPT_HANDLER(handle_hmi_exception); +DECLARE_INTERRUPT_HANDLER(unknown_exception); +DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception); +DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception); +DECLARE_INTERRUPT_HANDLER(RunModeException); +DECLARE_INTERRUPT_HANDLER(single_step_exception); +DECLARE_INTERRUPT_HANDLER(program_check_exception); +DECLARE_INTERRUPT_HANDLER(emulation_assist_interrupt); +DECLARE_INTERRUPT_HANDLER(alignment_exception); +DECLARE_INTERRUPT_HANDLER(StackOverflow); +DECLARE_INTERRUPT_HANDLER(stack_overflow_exception); +DECLARE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(altivec_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(vsx_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(facility_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(fp_unavailable_tm); +DECLARE_INTERRUPT_HANDLER(altivec_unavailable_tm); +DECLARE_INTERRUPT_HANDLER(vsx_unavailable_tm); +DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); +DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); +DECLARE_INTERRUPT_HANDLER_RAW(performance_monitor_exception); +DECLARE_INTERRUPT_HANDLER(DebugException); +DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); +DECLARE_INTERRUPT_HANDLER(CacheLockingException); +DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); +DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); +DECLARE_INTERRUPT_HANDLER(unrecoverable_exception); +DECLARE_INTERRUPT_HANDLER(WatchdogException); +DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); + +/* slb.c */ +DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); + +/* hash_utils.c */ +DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); + +/* fault.c */ +DECLARE_INTERRUPT_HANDLER_RET(do_page_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv); + +/* process.c */ +DECLARE_INTERRUPT_HANDLER(do_break); + +/* time.c */ +DECLARE_INTERRUPT_HANDLER_ASYNC(timer_interrupt); + +/* mce.c */ +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_early); +DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); + +DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); + +void replay_system_reset(void); +void replay_soft_interrupts(void); + +static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) +{ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); +} + +#endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 4f983ca4030a..f3f264e441a7 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -37,8 +37,6 @@ extern int distribute_irqs; struct pt_regs; -#define __ARCH_HAS_DO_SOFTIRQ - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) /* * Per-cpu stacks for handling critical, debug and machine check diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 55d6ede30c19..9ab344d29a54 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -136,6 +136,7 @@ int load_crashdump_segments_ppc64(struct kimage *image, int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); +unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image); int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index bf221a2a523e..7ec21af49a45 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -91,6 +91,7 @@ static __always_inline void setup_kup(void) static inline void allow_read_from_user(const void __user *from, unsigned long size) { + barrier_nospec(); allow_user_access(NULL, from, size, KUAP_READ); } @@ -102,6 +103,7 @@ static inline void allow_write_to_user(void __user *to, unsigned long size) static inline void allow_read_write_user(void __user *to, const void __user *from, unsigned long size) { + barrier_nospec(); allow_user_access(to, from, size, KUAP_READ_WRITE); } diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index d32ec9ae73bd..2f5f919f6cd3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -277,6 +277,13 @@ extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); +long kvmppc_read_intr(void); +void kvmppc_bad_interrupt(struct pt_regs *regs); +void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip); +void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip); +void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); +void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu); void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 078f4648ea27..b6d31bff5209 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -74,16 +74,6 @@ struct kvm_split_mode { u8 do_nap; u8 napped[MAX_SMT_THREADS]; struct kvmppc_vcore *vc[MAX_SUBCORES]; - /* Bits for changing lpcr on P9 */ - unsigned long lpcr_req; - unsigned long lpidr_req; - unsigned long host_lpcr; - u32 do_set; - u32 do_restore; - union { - u32 allphases; - u8 phase[4]; - } lpcr_sync; }; /* @@ -110,7 +100,6 @@ struct kvmppc_host_state { u8 hwthread_state; u8 host_ipi; u8 ptid; /* thread number within subcore when split */ - u8 tid; /* thread number within whole core */ u8 fake_suspend; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d67a470e95a3..05fb00d37609 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -28,7 +28,6 @@ #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS -#define KVM_USER_MEM_SLOTS 512 #include <asm/cputhreads.h> @@ -307,6 +306,7 @@ struct kvm_arch { u8 svm_enabled; bool threads_indep; bool nested_enable; + bool dawr1_enabled; pgd_t *pgtable; u64 process_table; struct dentry *debugfs_dir; @@ -584,8 +584,10 @@ struct kvm_vcpu_arch { u32 ctrl; u32 dabrx; ulong dabr; - ulong dawr; - ulong dawrx; + ulong dawr0; + ulong dawrx0; + ulong dawr1; + ulong dawrx1; ulong ciabr; ulong cfar; ulong ppr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0a056c64c317..8aacd76bb702 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -314,6 +314,8 @@ struct kvmppc_ops { int size); int (*enable_svm)(struct kvm *kvm); int (*svm_off)(struct kvm *kvm); + int (*enable_dawr1)(struct kvm *kvm); + bool (*hash_v3_possible)(void); }; extern struct kvmppc_ops *kvmppc_hv_ops; @@ -627,9 +629,9 @@ extern int h_ipi_redirect; static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( struct kvm *kvm) { return NULL; } -static inline void kvmppc_alloc_host_rm_ops(void) {}; -static inline void kvmppc_free_host_rm_ops(void) {}; -static inline void kvmppc_free_pimap(struct kvm *kvm) {}; +static inline void kvmppc_alloc_host_rm_ops(void) {} +static inline void kvmppc_free_host_rm_ops(void) {} +static inline void kvmppc_free_pimap(struct kvm *kvm) {} static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) { return 0; } static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) @@ -881,9 +883,9 @@ static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn) /* Clear i-cache for new pages */ page = pfn_to_page(pfn); - if (!test_bit(PG_arch_1, &page->flags)) { + if (!test_bit(PG_dcache_clean, &page->flags)) { flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_dcache_clean, &page->flags); } } diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index cf6ebbc16cb4..764f2732a821 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -59,6 +59,9 @@ struct machdep_calls { int (*pcibios_root_bridge_prepare)(struct pci_host_bridge *bridge); + /* finds all the pci_controllers present at boot */ + void (*discover_phbs)(void); + /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index e6c27ae843dc..331d944280b8 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -204,7 +204,18 @@ struct mce_error_info { bool ignore_event; }; -#define MAX_MC_EVT 100 +#define MAX_MC_EVT 10 + +struct mce_info { + int mce_nest_count; + struct machine_check_event mce_event[MAX_MC_EVT]; + /* Queue for delayed MCE events. */ + int mce_queue_count; + struct machine_check_event mce_event_queue[MAX_MC_EVT]; + /* Queue for delayed MCE UE events. */ + int mce_ue_count; + struct machine_check_event mce_ue_event_queue[MAX_MC_EVT]; +}; /* Release flags for get_mce_event() */ #define MCE_EVENT_RELEASE true @@ -234,4 +245,11 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs); long __machine_check_early_realmode_p9(struct pt_regs *regs); long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 +void mce_init(void); +#else +static inline void mce_init(void) { }; +#endif /* CONFIG_PPC_BOOK3S_64 */ + #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index d5821834dba9..652ce85f9410 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -282,9 +282,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, } #define pkey_mm_init(mm) -#define thread_pkey_regs_save(thread) -#define thread_pkey_regs_restore(new_thread, old_thread) -#define thread_pkey_regs_init(thread) #define arch_dup_pkeys(oldmm, mm) static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 84b4cfe73edd..160abcb8e9fa 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -4,6 +4,7 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); +long soft_nmi_interrupt(struct pt_regs *regs); #else static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h deleted file mode 100644 index 2a166c297f97..000000000000 --- a/arch/powerpc/include/asm/oprofile_impl.h +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM - * - * Based on alpha version. - */ - -#ifndef _ASM_POWERPC_OPROFILE_IMPL_H -#define _ASM_POWERPC_OPROFILE_IMPL_H -#ifdef __KERNEL__ - -#define OP_MAX_COUNTER 8 - -/* Per-counter configuration as set via oprofilefs. */ -struct op_counter_config { - unsigned long enabled; - unsigned long event; - unsigned long count; - /* Classic doesn't support per-counter user/kernel selection */ - unsigned long kernel; - unsigned long user; - unsigned long unit_mask; -}; - -/* System-wide configuration as set via oprofilefs. */ -struct op_system_config { -#ifdef CONFIG_PPC64 - unsigned long mmcr0; - unsigned long mmcr1; - unsigned long mmcra; -#ifdef CONFIG_OPROFILE_CELL - /* Register for oprofile user tool to check cell kernel profiling - * support. - */ - unsigned long cell_support; -#endif -#endif - unsigned long enable_kernel; - unsigned long enable_user; -}; - -/* Per-arch configuration */ -struct op_powerpc_model { - int (*reg_setup) (struct op_counter_config *, - struct op_system_config *, - int num_counters); - int (*cpu_setup) (struct op_counter_config *); - int (*start) (struct op_counter_config *); - int (*global_start) (struct op_counter_config *); - void (*stop) (void); - void (*global_stop) (void); - int (*sync_start)(void); - int (*sync_stop)(void); - void (*handle_interrupt) (struct pt_regs *, - struct op_counter_config *); - int num_counters; -}; - -extern struct op_powerpc_model op_model_fsl_emb; -extern struct op_powerpc_model op_model_power4; -extern struct op_powerpc_model op_model_7450; -extern struct op_powerpc_model op_model_cell; -extern struct op_powerpc_model op_model_pa6t; - - -/* All the classic PPC parts use these */ -static inline unsigned int classic_ctr_read(unsigned int i) -{ - switch(i) { - case 0: - return mfspr(SPRN_PMC1); - case 1: - return mfspr(SPRN_PMC2); - case 2: - return mfspr(SPRN_PMC3); - case 3: - return mfspr(SPRN_PMC4); - case 4: - return mfspr(SPRN_PMC5); - case 5: - return mfspr(SPRN_PMC6); - -/* No PPC32 chip has more than 6 so far */ -#ifdef CONFIG_PPC64 - case 6: - return mfspr(SPRN_PMC7); - case 7: - return mfspr(SPRN_PMC8); -#endif - default: - return 0; - } -} - -static inline void classic_ctr_write(unsigned int i, unsigned int val) -{ - switch(i) { - case 0: - mtspr(SPRN_PMC1, val); - break; - case 1: - mtspr(SPRN_PMC2, val); - break; - case 2: - mtspr(SPRN_PMC3, val); - break; - case 3: - mtspr(SPRN_PMC4, val); - break; - case 4: - mtspr(SPRN_PMC5, val); - break; - case 5: - mtspr(SPRN_PMC6, val); - break; - -/* No PPC32 chip has more than 6, yet */ -#ifdef CONFIG_PPC64 - case 6: - mtspr(SPRN_PMC7, val); - break; - case 7: - mtspr(SPRN_PMC8, val); - break; -#endif - default: - break; - } -} - - -extern void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth); - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_OPROFILE_IMPL_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 9454d29ff4b4..ec18ac818e3a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -29,6 +29,7 @@ #include <asm/hmi.h> #include <asm/cpuidle.h> #include <asm/atomic.h> +#include <asm/mce.h> #include <asm-generic/mmiowb_types.h> @@ -108,8 +109,7 @@ struct paca_struct { */ /* used for most interrupts/exceptions */ u64 exgen[EX_SIZE] __attribute__((aligned(0x80))); - u64 exslb[EX_SIZE]; /* used for SLB/segment table misses - * on the linear mapping */ + /* SLB related definitions */ u16 vmalloc_sllp; u8 slb_cache_ptr; @@ -273,6 +273,9 @@ struct paca_struct { #ifdef CONFIG_MMIOWB struct mmiowb_state mmiowb_state; #endif +#ifdef CONFIG_PPC_BOOK3S_64 + struct mce_info *mce_info; +#endif /* CONFIG_PPC_BOOK3S_64 */ } ____cacheline_aligned; extern void copy_mm_to_paca(struct mm_struct *mm); @@ -285,9 +288,9 @@ extern void free_unused_pacas(void); #else /* CONFIG_PPC64 */ -static inline void allocate_paca_ptrs(void) { }; -static inline void allocate_paca(int cpu) { }; -static inline void free_unused_pacas(void) { }; +static inline void allocate_paca_ptrs(void) { } +static inline void allocate_paca(int cpu) { } +static inline void free_unused_pacas(void) { } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index edc08f04aef7..5d1726bb28e7 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -10,6 +10,7 @@ #endif #ifdef CONFIG_PPC_SPLPAR +#include <linux/smp.h> #include <asm/kvm_guest.h> #include <asm/cputhreads.h> diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index daec64d41b44..164e910bf654 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -14,6 +14,7 @@ #include <asm/perf_event_server.h> #else static inline bool is_sier_available(void) { return false; } +static inline unsigned long get_pmcs_ext_regs(int idx) { return 0; } #endif #ifdef CONFIG_FSL_EMB_PERF_EVENT @@ -40,6 +41,7 @@ static inline bool is_sier_available(void) { return false; } /* To support perf_regs sier update */ extern bool is_sier_available(void); +extern unsigned long get_pmcs_ext_regs(int idx); /* To define perf extended regs mask value */ extern u64 PERF_REG_EXTENDED_MASK; #define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3b7baba01c92..00e7e671bb4b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -36,9 +36,9 @@ struct power_pmu { unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]); + struct perf_event *pevents[], u32 flags); int (*get_constraint)(u64 event_id, unsigned long *mskp, - unsigned long *valp); + unsigned long *valp, u64 event_config1); int (*get_alternatives)(u64 event_id, unsigned int flags, u64 alt[]); void (*get_mem_data_src)(union perf_mem_data_src *dsrc, @@ -83,6 +83,7 @@ struct power_pmu { #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ #define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ #define PPMU_P10_DD1 0x00000400 /* Is power10 DD1 processor version */ +#define PPMU_HAS_ATTR_CONFIG1 0x00000800 /* Using config1 attribute */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index f7613f43c9cf..4eed82172e33 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -162,6 +162,9 @@ static inline bool is_ioremap_addr(const void *x) return addr >= IOREMAP_BASE && addr < IOREMAP_END; } + +struct seq_file; +void arch_report_meminfo(struct seq_file *m); #endif /* CONFIG_PPC64 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index a7951049e129..59a2c7dbc78f 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -169,10 +169,4 @@ static inline bool arch_pkeys_enabled(void) } extern void pkey_mm_init(struct mm_struct *mm); -extern bool arch_supports_pkeys(int cap); -extern unsigned int arch_usable_pkeys(void); -extern void thread_pkey_regs_save(struct thread_struct *thread); -extern void thread_pkey_regs_restore(struct thread_struct *new_thread, - struct thread_struct *old_thread); -extern void thread_pkey_regs_init(struct thread_struct *thread); #endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 7f4be5a05eb3..2b9edbf6e929 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -13,10 +13,6 @@ extern unsigned long isa_io_base; -extern void pci_setup_phb_io(struct pci_controller *hose, int primary); -extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary); - - extern struct list_head hose_list; extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */ @@ -32,9 +28,6 @@ struct pci_dn; void *pci_traverse_device_nodes(struct device_node *start, void *(*fn)(struct device_node *, void *), void *data); -void *traverse_pci_dn(struct pci_dn *root, - void *(*fn)(struct pci_dn *, void *), - void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); /* From rtas_pci.h */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index cc1bca571332..3dceb64fc9af 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -25,7 +25,6 @@ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) -#define ACCOUNT_STOLEN_TIME #else #define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \ MFTB(ra); /* get timebase */ \ @@ -44,29 +43,6 @@ PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \ add ra,ra,rb; /* add on to system time */ \ PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr) - -#ifdef CONFIG_PPC_SPLPAR -#define ACCOUNT_STOLEN_TIME \ -BEGIN_FW_FTR_SECTION; \ - beq 33f; \ - /* from user - see if there are any DTL entries to process */ \ - ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \ - ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \ - addi r10,r10,LPPACA_DTLIDX; \ - LDX_BE r10,0,r10; /* get log write index */ \ - cmpd cr1,r11,r10; \ - beq+ cr1,33f; \ - bl accumulate_stolen_time; \ - ld r12,_MSR(r1); \ - andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \ -33: \ -END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) - -#else /* CONFIG_PPC_SPLPAR */ -#define ACCOUNT_STOLEN_TIME - -#endif /* CONFIG_PPC_SPLPAR */ - #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 58f9dc060a7b..975ba260006a 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -70,6 +70,9 @@ struct pt_regs }; #endif + +#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) + #ifdef __powerpc64__ /* @@ -229,6 +232,11 @@ static inline bool trap_is_scv(struct pt_regs *regs) return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000); } +static inline bool trap_is_unsupported_scv(struct pt_regs *regs) +{ + return IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0; +} + static inline bool trap_is_syscall(struct pt_regs *regs) { return (trap_is_scv(regs) || TRAP(regs) == 0xc00); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e40a921d78f9..da103e92c112 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1375,6 +1375,7 @@ #define mtmsr(v) asm volatile("mtmsr %0" : \ : "r" ((unsigned long)(v)) \ : "memory") +#define __mtmsrd(v, l) BUILD_BUG() #define __MTMSR "mtmsr" #endif @@ -1413,13 +1414,24 @@ static inline void msr_check_and_clear(unsigned long bits) } #ifdef CONFIG_PPC32 -#define mfsrin(v) ({unsigned int rval; \ - asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \ - rval;}) +static inline u32 mfsr(u32 idx) +{ + u32 val; + + if (__builtin_constant_p(idx)) + asm volatile("mfsr %0, %1" : "=r" (val): "i" (idx >> 28)); + else + asm volatile("mfsrin %0, %1" : "=r" (val): "r" (idx)); -static inline void mtsrin(u32 val, u32 idx) + return val; +} + +static inline void mtsr(u32 val, u32 idx) { - asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx)); + if (__builtin_constant_p(idx)) + asm volatile("mtsr %1, %0" : : "r" (val), "i" (idx >> 28)); + else + asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx)); } #endif diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 262782f08fd4..17b8dcd9a40d 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -691,6 +691,9 @@ #define mttmr(rn, v) asm volatile(MTTMR(rn, %0) : \ : "r" ((unsigned long)(v)) \ : "memory") + +extern unsigned long global_dbcr0[]; + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_POWERPC_REG_BOOKE_H__ */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 332e1000ca0f..658448ca5b8a 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -369,7 +369,7 @@ void rtas_initialize(void); #else static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;} static inline void pSeries_coalesce_init(void) { } -static inline void rtas_initialize(void) { }; +static inline void rtas_initialize(void) { } #endif extern int call_rtas(const char *, int, int, unsigned long *, ...); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index a466749703f1..e89bfebd4e00 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -58,7 +58,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else -static inline void setup_barrier_nospec(void) { }; +static inline void setup_barrier_nospec(void) { } #endif void do_uaccess_flush_fixups(enum l1d_flush_type types); void do_entry_flush_fixups(enum l1d_flush_type types); @@ -68,13 +68,13 @@ extern bool barrier_nospec_enabled; #ifdef CONFIG_PPC_BARRIER_NOSPEC void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); #else -static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; +static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { } #endif #ifdef CONFIG_PPC_FSL_BOOK3E void setup_spectre_v2(void); #else -static inline void setup_spectre_v2(void) {}; +static inline void setup_spectre_v2(void) {} #endif void do_btb_flush_fixups(void); diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index 9c3c30534333..5b862de29dff 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -90,8 +90,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) void splpar_spin_yield(arch_spinlock_t *lock); void splpar_rw_yield(arch_rwlock_t *lock); #else /* SPLPAR */ -static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; -static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; +static inline void splpar_spin_yield(arch_spinlock_t *lock) {} +static inline void splpar_rw_yield(arch_rwlock_t *lock) {} #endif static inline void spin_yield(arch_spinlock_t *lock) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index c4e2d53acd2b..7a13bc20f0a0 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -236,7 +236,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)) extern void smp_release_cpus(void); #else -static inline void smp_release_cpus(void) { }; +static inline void smp_release_cpus(void) { } #endif extern int smt_enabled_at_boot; diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 9666491bcb8a..8a2d11ba0dae 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -201,20 +201,6 @@ int spu_64k_pages_available(void); struct mm_struct; extern void spu_flush_all_slbs(struct mm_struct *mm); -/* This interface allows a profiler (e.g., OProfile) to store a ref - * to spu context information that it creates. This caching technique - * avoids the need to recreate this information after a save/restore operation. - * - * Assumes the caller has already incremented the ref count to - * profile_info; then spu_context_destroy must call kref_put - * on prof_info_kref. - */ -void spu_set_profile_private_kref(struct spu_context *ctx, - struct kref *prof_info_kref, - void ( * prof_info_release) (struct kref *kref)); - -void *spu_get_profile_private_kref(struct spu_context *ctx); - /* system callbacks from the SPU */ struct spu_syscall_block { u64 nr_ret; @@ -266,25 +252,6 @@ void spu_remove_dev_attr(struct device_attribute *attr); int spu_add_dev_attr_group(struct attribute_group *attrs); void spu_remove_dev_attr_group(struct attribute_group *attrs); -/* - * Notifier blocks: - * - * oprofile can get notified when a context switch is performed - * on an spe. The notifer function that gets called is passed - * a pointer to the SPU structure as well as the object-id that - * identifies the binary running on that SPU now. - * - * For a context save, the object-id that is passed is zero, - * identifying that the kernel will run from that moment on. - * - * For a context restore, the object-id is the value written - * to object-id spufs file from user space and the notifer - * function can assume that spu->ctx is valid. - */ -struct notifier_block; -int spu_switch_event_register(struct notifier_block * n); -int spu_switch_event_unregister(struct notifier_block * n); - extern void notify_spus_active(void); extern void do_notify_spus_active(void); diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 3d8a47af7a25..386d576673a1 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -94,7 +94,6 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ -#define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -128,11 +127,10 @@ void arch_setup_new_exec(void); #define _TIF_UPROBE (1<<TIF_UPROBE) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) -#define _TIF_NOHZ (1<<TIF_NOHZ) #define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU) #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ - _TIF_NOHZ | _TIF_SYSCALL_EMU) + _TIF_SYSCALL_EMU) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 8f789b597bae..8dd3cdb25338 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -102,6 +102,8 @@ DECLARE_PER_CPU(u64, decrementers_next_tb); /* Convert timebase ticks to nanoseconds */ unsigned long long tb_to_ns(unsigned long long tb_ticks); +void timer_broadcast_interrupt(void); + /* SPLPAR */ void accumulate_stolen_time(void); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 501c9a79038c..78e2a3990eab 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -52,8 +52,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#define __put_user_goto(x, ptr, label) \ - __put_user_nocheck_goto((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) #define __get_user_allowed(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) @@ -110,22 +108,18 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) extern long __put_user_bad(void); -#define __put_user_size_allowed(x, ptr, size, retval) \ +#define __put_user_size(x, ptr, size, retval) \ do { \ __label__ __pu_failed; \ \ retval = 0; \ + allow_write_to_user(ptr, size); \ __put_user_size_goto(x, ptr, size, __pu_failed); \ + prevent_write_to_user(ptr, size); \ break; \ \ __pu_failed: \ retval = -EFAULT; \ -} while (0) - -#define __put_user_size(x, ptr, size, retval) \ -do { \ - allow_write_to_user(ptr, size); \ - __put_user_size_allowed(x, ptr, size, retval); \ prevent_write_to_user(ptr, size); \ } while (0) @@ -213,11 +207,9 @@ do { \ } \ } while (0) -#define __put_user_nocheck_goto(x, ptr, size, label) \ +#define __unsafe_put_user_goto(x, ptr, size, label) \ do { \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - if (!is_kernel_addr((unsigned long)__pu_addr)) \ - might_fault(); \ __chk_user_ptr(ptr); \ __put_user_size_goto((x), __pu_addr, (size), label); \ } while (0) @@ -313,9 +305,8 @@ do { \ __typeof__(size) __gu_size = (size); \ \ __chk_user_ptr(__gu_addr); \ - if (!is_kernel_addr((unsigned long)__gu_addr)) \ + if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ - barrier_nospec(); \ if (do_allow) \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ else \ @@ -333,10 +324,8 @@ do { \ __typeof__(size) __gu_size = (size); \ \ might_fault(); \ - if (access_ok(__gu_addr, __gu_size)) { \ - barrier_nospec(); \ + if (access_ok(__gu_addr, __gu_size)) \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ @@ -350,7 +339,6 @@ do { \ __typeof__(size) __gu_size = (size); \ \ __chk_user_ptr(__gu_addr); \ - barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ \ @@ -395,7 +383,6 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { unsigned long ret; - barrier_nospec(); allow_read_write_user(to, from, n); ret = __copy_tofrom_user(to, from, n); prevent_read_write_user(to, from, n); @@ -407,32 +394,7 @@ static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long ret; - if (__builtin_constant_p(n) && (n <= 8)) { - ret = 1; - - switch (n) { - case 1: - barrier_nospec(); - __get_user_size(*(u8 *)to, from, 1, ret); - break; - case 2: - barrier_nospec(); - __get_user_size(*(u16 *)to, from, 2, ret); - break; - case 4: - barrier_nospec(); - __get_user_size(*(u32 *)to, from, 4, ret); - break; - case 8: - barrier_nospec(); - __get_user_size(*(u64 *)to, from, 8, ret); - break; - } - if (ret == 0) - return 0; - } - barrier_nospec(); allow_read_from_user(from, n); ret = __copy_tofrom_user((__force void __user *)to, from, n); prevent_read_from_user(from, n); @@ -440,39 +402,12 @@ static inline unsigned long raw_copy_from_user(void *to, } static inline unsigned long -raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n) -{ - if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; - - switch (n) { - case 1: - __put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret); - break; - case 2: - __put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret); - break; - case 4: - __put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret); - break; - case 8: - __put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret); - break; - } - if (ret == 0) - return 0; - } - - return __copy_tofrom_user(to, (__force const void __user *)from, n); -} - -static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { unsigned long ret; allow_write_to_user(to, n); - ret = raw_copy_to_user_allowed(to, from, n); + ret = __copy_tofrom_user(to, (__force const void __user *)from, n); prevent_write_to_user(to, n); return ret; } @@ -508,6 +443,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_read_write_user((void __user *)ptr, ptr, len); return true; } @@ -521,6 +459,9 @@ user_read_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_read_from_user(ptr, len); return true; } @@ -532,6 +473,9 @@ user_write_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_write_to_user((void __user *)ptr, len); return true; } @@ -540,7 +484,8 @@ user_write_access_begin(const void __user *ptr, size_t len) #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) -#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e) +#define unsafe_put_user(x, p, e) \ + __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) #define unsafe_copy_to_user(d, s, l, e) \ do { \ @@ -550,17 +495,17 @@ do { \ int _i; \ \ for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ - __put_user_goto(*(long*)(_src + _i), (long __user *)(_dst + _i), e);\ + unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + _i), e); \ if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ - __put_user_goto(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ + unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ _i += 4; \ } \ if (_len & 2) { \ - __put_user_goto(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \ + unsafe_put_user(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \ _i += 2; \ } \ if (_len & 1) \ - __put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\ + unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \ } while (0) #define HAVE_GET_KERNEL_NOFAULT diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h index 881f655caa0a..891c9d5eaabe 100644 --- a/arch/powerpc/include/asm/vdso/timebase.h +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -43,12 +43,6 @@ #define mttbl(v) asm volatile("mttbl %0":: "r"(v)) #define mttbu(v) asm volatile("mttbu %0":: "r"(v)) -/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ -static inline unsigned long get_tbl(void) -{ - return mftb(); -} - static __always_inline u64 get_tb(void) { unsigned int tbhi, tblo, tbhi2; diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h index 454a7fc6113b..68bfb2361f03 100644 --- a/arch/powerpc/include/asm/xmon.h +++ b/arch/powerpc/include/asm/xmon.h @@ -17,8 +17,8 @@ struct pt_regs; extern int xmon(struct pt_regs *excp); extern irqreturn_t xmon_irq(int, void *); #else -static inline void xmon_setup(void) { }; -static inline void xmon_register_spus(struct list_head *list) { }; +static inline void xmon_setup(void) { } +static inline void xmon_register_spus(struct list_head *list) { } #endif #if defined(CONFIG_XMON) && defined(CONFIG_SMP) diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c3af3f324c5a..9f18fa090f1f 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) +#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) +#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index bdf5f10f8b9f..578b3ee86105 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -55,17 +55,33 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_MMCR3, PERF_REG_POWERPC_SIER2, PERF_REG_POWERPC_SIER3, + PERF_REG_POWERPC_PMC1, + PERF_REG_POWERPC_PMC2, + PERF_REG_POWERPC_PMC3, + PERF_REG_POWERPC_PMC4, + PERF_REG_POWERPC_PMC5, + PERF_REG_POWERPC_PMC6, /* Max regs without the extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ -#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ -#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) +/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */ +#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3) -#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) -#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) +/* + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 + * includes 9 SPRS from MMCR0 to PMC6 excluding the + * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300. + */ +#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300) + +/* + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 + * includes 12 SPRs from MMCR0 to PMC6. + */ +#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0) + +#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fe2ef598e2ea..6084fa499aa3 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -46,12 +46,12 @@ obj-y := cputable.o syscalls.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o firmware.o \ - hw_breakpoint_constraints.o + hw_breakpoint_constraints.o interrupt.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ - paca.o nvram_64.o note.o syscall_64.o + paca.o nvram_64.o note.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o -obj-$(CONFIG_VDSO32) += vdso32/ +obj-$(CONFIG_VDSO32) += vdso32_wrapper.o obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_DAWR) += dawr.o @@ -60,7 +60,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o -obj-$(CONFIG_PPC64) += vdso64/ +obj-$(CONFIG_PPC64) += vdso64_wrapper.o obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o procfs-y := proc_powerpc.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b12d7c049bfe..f3a662201a9f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -255,7 +255,6 @@ int main(void) #endif /* CONFIG_PPC_MM_SLICES */ OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); - OFFSET(PACA_EXSLB, paca_struct, exslb); OFFSET(PACA_EXNMI, paca_struct, exnmi); #ifdef CONFIG_PPC_PSERIES OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); @@ -309,7 +308,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); @@ -526,8 +525,10 @@ int main(void) OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl); OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr); OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); - OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr); - OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx); + OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0); + OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0); + OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1); + OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); @@ -668,7 +669,6 @@ int main(void) HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); - HSTATE_FIELD(HSTATE_TID, tid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]); HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]); @@ -698,8 +698,6 @@ int main(void) OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap); OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped); - OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set); - OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 65f35ec052d4..ae0fdef0ac11 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -13,7 +13,6 @@ #include <linux/export.h> #include <linux/jump_label.h> -#include <asm/oprofile_impl.h> #include <asm/cputable.h> #include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */ #include <asm/mce.h> @@ -151,7 +150,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", - .oprofile_type = PPC_OPROFILE_POWER4, .platform = "ppc970", }, { /* PPC970FX */ @@ -169,7 +167,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", - .oprofile_type = PPC_OPROFILE_POWER4, .platform = "ppc970", }, { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ @@ -187,7 +184,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970MP", - .oprofile_type = PPC_OPROFILE_POWER4, .platform = "ppc970", }, { /* PPC970MP */ @@ -205,7 +201,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_ppc970MP, .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970MP", - .oprofile_type = PPC_OPROFILE_POWER4, .platform = "ppc970", }, { /* PPC970GX */ @@ -222,7 +217,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", - .oprofile_type = PPC_OPROFILE_POWER4, .platform = "ppc970", }, { /* Power5 GR */ @@ -237,12 +231,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power5", - .oprofile_type = PPC_OPROFILE_POWER4, - /* SIHV / SIPR bits are implemented on POWER4+ (GQ) - * and above but only works on POWER5 and above - */ - .oprofile_mmcra_sihv = MMCRA_SIHV, - .oprofile_mmcra_sipr = MMCRA_SIPR, .platform = "power5", }, { /* Power5++ */ @@ -256,9 +244,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .oprofile_cpu_type = "ppc64/power5++", - .oprofile_type = PPC_OPROFILE_POWER4, - .oprofile_mmcra_sihv = MMCRA_SIHV, - .oprofile_mmcra_sipr = MMCRA_SIPR, .platform = "power5+", }, { /* Power5 GS */ @@ -273,9 +258,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power5+", - .oprofile_type = PPC_OPROFILE_POWER4, - .oprofile_mmcra_sihv = MMCRA_SIHV, - .oprofile_mmcra_sipr = MMCRA_SIPR, .platform = "power5+", }, { /* POWER6 in P5+ mode; 2.04-compliant processor */ @@ -288,7 +270,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .oprofile_cpu_type = "ppc64/ibm-compat-v1", - .oprofile_type = PPC_OPROFILE_POWER4, .platform = "power5+", }, { /* Power6 */ @@ -304,11 +285,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power6", - .oprofile_type = PPC_OPROFILE_POWER4, - .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV, - .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR, - .oprofile_mmcra_clear = POWER6_MMCRA_THRM | - POWER6_MMCRA_OTHER, .platform = "power6x", }, { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ @@ -321,7 +297,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .oprofile_cpu_type = "ppc64/ibm-compat-v1", - .oprofile_type = PPC_OPROFILE_POWER4, .platform = "power6", }, { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ @@ -334,7 +309,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, @@ -351,7 +325,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER8, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_type = PPC_OPROFILE_INVALID, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, @@ -368,7 +341,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER9, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_type = PPC_OPROFILE_INVALID, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, @@ -384,7 +356,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER10, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_type = PPC_OPROFILE_INVALID, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power10, .cpu_restore = __restore_cpu_power10, @@ -403,7 +374,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", - .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -422,7 +392,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", - .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -441,7 +410,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power8", - .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -460,7 +428,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power8", - .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -479,7 +446,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power8", - .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -498,7 +464,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power9", - .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -517,7 +482,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power9", - .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -536,7 +500,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power9", - .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -555,7 +518,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power10", - .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power10, .cpu_restore = __restore_cpu_power10, .machine_check_early = __machine_check_early_realmode_p10, @@ -575,7 +537,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 4, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/cell-be", - .oprofile_type = PPC_OPROFILE_CELL, .platform = "ppc-cell-be", }, { /* PA Semi PA6T */ @@ -592,7 +553,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_pa6t, .cpu_restore = __restore_cpu_pa6t, .oprofile_cpu_type = "ppc64/pa6t", - .oprofile_type = PPC_OPROFILE_PA6T, .platform = "pa6t", }, { /* default match */ @@ -757,7 +717,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc750", .oprofile_cpu_type = "ppc/750", - .oprofile_type = PPC_OPROFILE_G4, }, { /* 745/755 */ .pvr_mask = 0xfffff000, @@ -789,7 +748,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc750", .oprofile_cpu_type = "ppc/750", - .oprofile_type = PPC_OPROFILE_G4, }, { /* 750FX rev 2.0 must disable HID0[DPM] */ .pvr_mask = 0xffffffff, @@ -806,7 +764,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc750", .oprofile_cpu_type = "ppc/750", - .oprofile_type = PPC_OPROFILE_G4, }, { /* 750FX (All revs except 2.0) */ .pvr_mask = 0xffff0000, @@ -823,7 +780,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc750", .oprofile_cpu_type = "ppc/750", - .oprofile_type = PPC_OPROFILE_G4, }, { /* 750GX */ .pvr_mask = 0xffff0000, @@ -840,7 +796,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc750", .oprofile_cpu_type = "ppc/750", - .oprofile_type = PPC_OPROFILE_G4, }, { /* 740/750 (L2CR bit need fixup for 740) */ .pvr_mask = 0xffff0000, @@ -919,7 +874,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -937,7 +891,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -955,7 +908,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -973,7 +925,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -991,7 +942,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1009,7 +959,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1027,7 +976,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1045,7 +993,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1062,7 +1009,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1080,7 +1026,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1098,7 +1043,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, .oprofile_cpu_type = "ppc/7450", - .oprofile_type = PPC_OPROFILE_G4, .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1211,7 +1155,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_83xx, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e300", - .oprofile_type = PPC_OPROFILE_FSL_EMB, .platform = "ppc603", }, { /* e300c4 (e300c1, plus one IU) */ @@ -1228,7 +1171,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_83xx, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e300", - .oprofile_type = PPC_OPROFILE_FSL_EMB, .platform = "ppc603", }, #endif @@ -1925,7 +1867,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 32, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e500", - .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e500v1, .machine_check = machine_check_e500, .platform = "ppc8540", @@ -1945,7 +1886,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 32, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e500", - .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e500v2, .machine_check = machine_check_e500, .platform = "ppc8548", @@ -1965,7 +1905,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 64, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e500mc", - .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500mc, .platform = "ppce500mc", @@ -1987,7 +1926,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 64, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e500mc", - .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e5500, #ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e5500, @@ -2010,7 +1948,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 64, .num_pmcs = 6, .oprofile_cpu_type = "ppc/e6500", - .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e6500, #ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e6500, @@ -2076,10 +2013,6 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, if (old.num_pmcs && !s->num_pmcs) { t->num_pmcs = old.num_pmcs; t->pmc_type = old.pmc_type; - t->oprofile_type = old.oprofile_type; - t->oprofile_mmcra_sihv = old.oprofile_mmcra_sihv; - t->oprofile_mmcra_sipr = old.oprofile_mmcra_sipr; - t->oprofile_mmcra_clear = old.oprofile_mmcra_clear; /* * If we have passed through this logic once before and diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 52680cf07c9d..5545c9cd17c1 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -12,17 +12,17 @@ #include <linux/hardirq.h> #include <asm/dbell.h> +#include <asm/interrupt.h> #include <asm/irq_regs.h> #include <asm/kvm_ppc.h> #include <asm/trace.h> #ifdef CONFIG_SMP -void doorbell_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { struct pt_regs *old_regs = set_irq_regs(regs); - irq_enter(); trace_doorbell_entry(regs); ppc_msgsync(); @@ -35,13 +35,12 @@ void doorbell_exception(struct pt_regs *regs) smp_ipi_demux_relaxed(); /* already performed the barrier */ trace_doorbell_exit(regs); - irq_exit(); + set_irq_regs(old_regs); } #else /* CONFIG_SMP */ -void doorbell_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { printk(KERN_WARNING "Received doorbell on non-smp system\n"); } #endif /* CONFIG_SMP */ - diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index b5478b72c08c..358aee7c2d79 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -19,7 +19,6 @@ #include <asm/dt_cpu_ftrs.h> #include <asm/mce.h> #include <asm/mmu.h> -#include <asm/oprofile_impl.h> #include <asm/prom.h> #include <asm/setup.h> @@ -103,7 +102,6 @@ static struct cpu_spec __initdata base_cpu_spec = { .num_pmcs = 0, .pmc_type = PPC_PMC_DEFAULT, .oprofile_cpu_type = NULL, - .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = NULL, .cpu_restore = __restore_cpu_cpufeatures, .machine_check_early = NULL, diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 813713c9120c..cd60bc1c8701 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1596,6 +1596,35 @@ static int proc_eeh_show(struct seq_file *m, void *v) } #ifdef CONFIG_DEBUG_FS + + +static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + uint32_t domain, bus, dev, fn; + struct pci_dev *pdev; + char buf[20]; + int ret; + + memset(buf, 0, sizeof(buf)); + ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); + if (!ret) + return ERR_PTR(-EFAULT); + + ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); + if (ret != 4) { + pr_err("%s: expected 4 args, got %d\n", __func__, ret); + return ERR_PTR(-EINVAL); + } + + pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); + if (!pdev) + return ERR_PTR(-ENODEV); + + return pdev; +} + static int eeh_enable_dbgfs_set(void *data, u64 val) { if (val) @@ -1688,26 +1717,13 @@ static ssize_t eeh_dev_check_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) { - uint32_t domain, bus, dev, fn; struct pci_dev *pdev; struct eeh_dev *edev; - char buf[20]; int ret; - memset(buf, 0, sizeof(buf)); - ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); - if (!ret) - return -EFAULT; - - ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); - if (ret != 4) { - pr_err("%s: expected 4 args, got %d\n", __func__, ret); - return -EINVAL; - } - - pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); - if (!pdev) - return -ENODEV; + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); edev = pci_dev_to_eeh_dev(pdev); if (!edev) { @@ -1717,8 +1733,8 @@ static ssize_t eeh_dev_check_write(struct file *filp, } ret = eeh_dev_check_failure(edev); - pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n", - domain, bus, dev, fn, ret); + pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n", + pci_name(pdev), ret); pci_dev_put(pdev); @@ -1829,25 +1845,12 @@ static ssize_t eeh_dev_break_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) { - uint32_t domain, bus, dev, fn; struct pci_dev *pdev; - char buf[20]; int ret; - memset(buf, 0, sizeof(buf)); - ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); - if (!ret) - return -EFAULT; - - ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); - if (ret != 4) { - pr_err("%s: expected 4 args, got %d\n", __func__, ret); - return -EINVAL; - } - - pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); - if (!pdev) - return -ENODEV; + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); ret = eeh_debugfs_break_device(pdev); pci_dev_put(pdev); @@ -1865,6 +1868,53 @@ static const struct file_operations eeh_dev_break_fops = { .read = eeh_debugfs_dev_usage, }; +static ssize_t eeh_dev_can_recover(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_driver *drv; + struct pci_dev *pdev; + size_t ret; + + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + /* + * In order for error recovery to work the driver needs to implement + * .error_detected(), so it can quiesce IO to the device, and + * .slot_reset() so it can re-initialise the device after a reset. + * + * Ideally they'd implement .resume() too, but some drivers which + * we need to support (notably IPR) don't so I guess we can tolerate + * that. + * + * .mmio_enabled() is mostly there as a work-around for devices which + * take forever to re-init after a hot reset. Implementing that is + * strictly optional. + */ + drv = pci_dev_driver(pdev); + if (drv && + drv->err_handler && + drv->err_handler->error_detected && + drv->err_handler->slot_reset) { + ret = count; + } else { + ret = -EOPNOTSUPP; + } + + pci_dev_put(pdev); + + return ret; +} + +static const struct file_operations eeh_dev_can_recover_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = eeh_dev_can_recover, + .read = eeh_debugfs_dev_usage, +}; + #endif static int __init eeh_init_proc(void) @@ -1889,6 +1939,9 @@ static int __init eeh_init_proc(void) debugfs_create_file_unsafe("eeh_force_recover", 0600, powerpc_debugfs_root, NULL, &eeh_force_recover_fops); + debugfs_create_file_unsafe("eeh_dev_can_recover", 0600, + powerpc_debugfs_root, NULL, + &eeh_dev_can_recover_fops); eeh_cache_debugfs_init(); #endif } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1c9b0ccc2172..78c430b7f9d9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -175,14 +175,11 @@ transfer_to_handler: addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r9,TASK_CPU(r2) - slwi r9,r9,3 + slwi r9,r9,2 add r11,r11,r9 #endif lwz r12,0(r11) mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) #endif b 3f @@ -276,8 +273,7 @@ reenable_mmu: * We save a bunch of GPRs, * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, - * r4 & r5 can contain page fault arguments that need to be passed - * along as well. r0, r6-r8, r12, CCR, CTR, XER etc... are left + * r0, r4-r8, r12, CCR, CTR, XER etc... are left * clobbered as they aren't useful past this point. */ @@ -285,15 +281,11 @@ reenable_mmu: stw r9,8(r1) stw r11,12(r1) stw r3,16(r1) - stw r4,20(r1) - stw r5,24(r1) /* If we are disabling interrupts (normal case), simply log it with * lockdep */ 1: bl trace_hardirqs_off - lwz r5,24(r1) - lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) lwz r9,8(r1) @@ -334,132 +326,29 @@ stack_ovf: _ASM_NOKPROBE_SYMBOL(stack_ovf) #endif -#ifdef CONFIG_TRACE_IRQFLAGS -trace_syscall_entry_irq_off: - /* - * Syscall shouldn't happen while interrupts are disabled, - * so let's do a warning here. - */ -0: trap - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING - bl trace_hardirqs_on - - /* Now enable for real */ - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE) - mtmsr r10 - - REST_GPR(0, r1) - REST_4GPRS(3, r1) - REST_2GPRS(7, r1) - b DoSyscall -#endif /* CONFIG_TRACE_IRQFLAGS */ - .globl transfer_to_syscall transfer_to_syscall: -#ifdef CONFIG_TRACE_IRQFLAGS - andi. r12,r9,MSR_EE - beq- trace_syscall_entry_irq_off -#endif /* CONFIG_TRACE_IRQFLAGS */ + SAVE_NVGPRS(r1) +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif -/* - * Handle a system call. - */ - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f - .stabs "entry_32.S",N_SO,0,0,0f -0: - -_GLOBAL(DoSyscall) - stw r3,ORIG_GPR3(r1) - li r12,0 - stw r12,RESULT(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* Make sure interrupts are enabled */ - mfmsr r11 - andi. r12,r11,MSR_EE - /* We came in with interrupts disabled, we WARN and mark them enabled - * for lockdep now */ -0: tweqi r12, 0 - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING -#endif /* CONFIG_TRACE_IRQFLAGS */ - lwz r11,TI_FLAGS(r2) - andi. r11,r11,_TIF_SYSCALL_DOTRACE - bne- syscall_dotrace -syscall_dotrace_cont: - cmplwi 0,r0,NR_syscalls - lis r10,sys_call_table@h - ori r10,r10,sys_call_table@l - slwi r0,r0,2 - bge- 66f - - barrier_nospec_asm - /* - * Prevent the load of the handler below (based on the user-passed - * system call number) being speculatively executed until the test - * against NR_syscalls and branch to .66f above has - * committed. - */ + /* Calling convention has r9 = orig r0, r10 = regs */ + addi r10,r1,STACK_FRAME_OVERHEAD + mr r9,r0 + stw r10,THREAD+PT_REGS(r2) + bl system_call_exception - lwzx r10,r10,r0 /* Fetch system call handler [ptr] */ - mtlr r10 - addi r9,r1,STACK_FRAME_OVERHEAD - PPC440EP_ERR42 - blrl /* Call handler */ - .globl ret_from_syscall ret_from_syscall: -#ifdef CONFIG_DEBUG_RSEQ - /* Check whether the syscall is issued inside a restartable sequence */ - stw r3,GPR3(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl rseq_syscall - lwz r3,GPR3(r1) -#endif - mr r6,r3 - /* disable interrupts so current_thread_info()->flags can't change */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */ - /* Note: We don't bother telling lockdep about it */ - mtmsr r10 - lwz r9,TI_FLAGS(r2) - li r8,-MAX_ERRNO - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) - bne- syscall_exit_work - cmplw 0,r3,r8 - blt+ syscall_exit_cont - lwz r11,_CCR(r1) /* Load CR */ - neg r3,r3 - oris r11,r11,0x1000 /* Set SO bit in CR */ - stw r11,_CCR(r1) -syscall_exit_cont: - lwz r8,_MSR(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* If we are going to return from the syscall with interrupts - * off, we trace that here. It shouldn't normally happen. - */ - andi. r10,r8,MSR_EE - bne+ 1f - stw r3,GPR3(r1) - bl trace_hardirqs_off - lwz r3,GPR3(r1) -1: -#endif /* CONFIG_TRACE_IRQFLAGS */ -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - /* If the process has its own DBCR0 value, load it up. The internal - debug mode bit tells us that dbcr0 should be loaded. */ - lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h - bnel- load_dbcr0 -#endif + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 + bl syscall_exit_prepare #ifdef CONFIG_PPC_47x lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ -1: -BEGIN_FTR_SECTION - lwarx r7,0,r1 -END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) - stwcx. r0,0,r1 /* to clear the reservation */ - ACCOUNT_CPU_USER_EXIT(r2, r5, r7) #ifdef CONFIG_PPC_BOOK3S_32 kuep_unlock r5, r7 #endif @@ -467,21 +356,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 - mtcr r5 lwz r7,_NIP(r1) - lwz r2,GPR2(r1) - lwz r1,GPR1(r1) + lwz r8,_MSR(r1) + cmpwi r3,0 + lwz r3,GPR3(r1) syscall_exit_finish: -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 + + bne 3f + mtcr r5 + +1: lwz r2,GPR2(r1) + lwz r1,GPR1(r1) rfi #ifdef CONFIG_40x b . /* Prevent prefetch past rfi */ #endif -_ASM_NOKPROBE_SYMBOL(syscall_exit_finish) + +3: mtcr r5 + lwz r4,_CTR(r1) + lwz r5,_XER(r1) + REST_NVGPRS(r1) + mtctr r4 + mtxer r5 + lwz r0,GPR0(r1) + lwz r3,GPR3(r1) + REST_8GPRS(4,r1) + lwz r12,GPR12(r1) + b 1b + #ifdef CONFIG_44x 2: li r7,0 iccci r0,r0 @@ -489,9 +393,6 @@ _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) b 1b #endif /* CONFIG_44x */ -66: li r3,-ENOSYS - b ret_from_syscall - .globl ret_from_fork ret_from_fork: REST_NVGPRS(r1) @@ -510,157 +411,6 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* Traced system call support */ -syscall_dotrace: - SAVE_NVGPRS(r1) - li r0,0xc00 - stw r0,_TRAP(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_enter - /* - * Restore argument registers possibly just changed. - * We use the return value of do_syscall_trace_enter - * for call number to look up in the table (r0). - */ - mr r0,r3 - lwz r3,GPR3(r1) - lwz r4,GPR4(r1) - lwz r5,GPR5(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) - REST_NVGPRS(r1) - - cmplwi r0,NR_syscalls - /* Return code is already in r3 thanks to do_syscall_trace_enter() */ - bge- ret_from_syscall - b syscall_dotrace_cont - -syscall_exit_work: - andi. r0,r9,_TIF_RESTOREALL - beq+ 0f - REST_NVGPRS(r1) - b 2f -0: cmplw 0,r3,r8 - blt+ 1f - andi. r0,r9,_TIF_NOERROR - bne- 1f - lwz r11,_CCR(r1) /* Load CR */ - neg r3,r3 - oris r11,r11,0x1000 /* Set SO bit in CR */ - stw r11,_CCR(r1) - -1: stw r6,RESULT(r1) /* Save result */ - stw r3,GPR3(r1) /* Update return value */ -2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) - beq 4f - - /* Clear per-syscall TIF flags if any are set. */ - - li r11,_TIF_PERSYSCALL_MASK - addi r12,r2,TI_FLAGS -3: lwarx r8,0,r12 - andc r8,r8,r11 - stwcx. r8,0,r12 - bne- 3b - -4: /* Anything which requires enabling interrupts? */ - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) - beq ret_from_except - - /* Re-enable interrupts. There is no need to trace that with - * lockdep as we are supposed to have IRQs on at this point - */ - ori r10,r10,MSR_EE - mtmsr r10 - - /* Save NVGPRS if they're not saved already */ - lwz r4,_TRAP(r1) - andi. r4,r4,1 - beq 5f - SAVE_NVGPRS(r1) - li r4,0xc00 - stw r4,_TRAP(r1) -5: - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_leave - b ret_from_except_full - - /* - * System call was called from kernel. We get here with SRR1 in r9. - * Mark the exception as recoverable once we have retrieved SRR0, - * trap a warning and return ENOSYS with CR[SO] set. - */ - .globl ret_from_kernel_syscall -ret_from_kernel_syscall: - mfspr r9, SPRN_SRR0 - mfspr r10, SPRN_SRR1 -#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE) - LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR)) - mtmsr r11 -#endif - -0: trap - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING - - li r3, ENOSYS - crset so -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR0, r9 - mtspr SPRN_SRR1, r10 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) - -/* - * The fork/clone functions need to copy the full register set into - * the child process. Therefore we need to save all the nonvolatile - * registers (r13 - r31) before calling the C code. - */ - .globl ppc_fork -ppc_fork: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_fork - - .globl ppc_vfork -ppc_vfork: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_vfork - - .globl ppc_clone -ppc_clone: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_clone - - .globl ppc_clone3 -ppc_clone3: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_clone3 - - .globl ppc_swapcontext -ppc_swapcontext: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_swapcontext - /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that @@ -670,10 +420,6 @@ ppc_swapcontext: .globl handle_page_fault handle_page_fault: addi r3,r1,STACK_FRAME_OVERHEAD -#ifdef CONFIG_PPC_BOOK3S_32 - andis. r0,r5,DSISR_DABRMATCH@h - bne- handle_dabr_fault -#endif bl do_page_fault cmpwi r3,0 beq+ ret_from_except @@ -681,23 +427,11 @@ handle_page_fault: lwz r0,_TRAP(r1) clrrwi r0,r0,1 stw r0,_TRAP(r1) - mr r5,r3 + mr r4,r3 /* err arg for bad_page_fault */ addi r3,r1,STACK_FRAME_OVERHEAD - lwz r4,_DAR(r1) bl __bad_page_fault b ret_from_except_full -#ifdef CONFIG_PPC_BOOK3S_32 - /* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - clrrwi r0,r0,1 - stw r0,_TRAP(r1) - bl do_break - b ret_from_except_full -#endif - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -1237,14 +971,11 @@ load_dbcr0: addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r9,TASK_CPU(r2) - slwi r9,r9,3 + slwi r9,r9,2 add r11,r11,r9 #endif stw r10,0(r11) mtspr SPRN_DBCR0,r0 - lwz r10,4(r11) - addi r10,r10,1 - stw r10,4(r11) li r11,-1 mtspr SPRN_DBSR,r11 /* clear all pending debug events */ blr @@ -1253,7 +984,7 @@ load_dbcr0: .align 4 .global global_dbcr0 global_dbcr0: - .space 8*NR_CPUS + .space 4*NR_CPUS .previous #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 33ddfeef4fe9..6c4d9e276c4d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -108,7 +108,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) li r11,\trapnr std r11,_TRAP(r1) std r12,_CCR(r1) - std r3,ORIG_GPR3(r1) addi r10,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ @@ -226,6 +225,12 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) #endif .balign IFETCH_ALIGN_BYTES + .globl system_call_common_real +system_call_common_real: + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + mtmsrd r10 + + .balign IFETCH_ALIGN_BYTES .globl system_call_common system_call_common: _ASM_NOKPROBE_SYMBOL(system_call_common) @@ -278,7 +283,6 @@ END_BTB_FLUSH_SECTION std r10,_LINK(r1) std r11,_TRAP(r1) std r12,_CCR(r1) - std r3,ORIG_GPR3(r1) addi r10,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 74d07dc0bb48..e8eb9992a270 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -398,7 +398,6 @@ exc_##n##_common: \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,scratch; /* get back r13 */ \ @@ -791,7 +790,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) EXCEPTION_COMMON_CRIT(0xd00) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) bl save_nvgprs @@ -864,7 +862,6 @@ kernel_dbg_exc: INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) bl save_nvgprs @@ -1011,8 +1008,6 @@ storage_fault_common: std r14,_DAR(r1) std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 - mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault @@ -1020,9 +1015,8 @@ storage_fault_common: bne- 1f b ret_from_except_lite 1: bl save_nvgprs - mr r5,r3 + mr r4,r3 addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) bl __bad_page_fault b ret_from_except diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6e53f7638737..60d3051a8bc8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -139,7 +139,6 @@ name: #define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */ #define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ #define __ISTACK(name) .L_ISTACK_ ## name -#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */ #define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ #define INT_DEFINE_BEGIN(n) \ @@ -203,9 +202,6 @@ do_define_int n .ifndef ISTACK ISTACK=1 .endif - .ifndef IRECONCILE - IRECONCILE=1 - .endif .ifndef IKUAP IKUAP=1 .endif @@ -581,7 +577,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) kuap_save_amr_and_lock r9, r10, cr1, cr0 .endif beq 101f /* if from kernel mode */ - ACCOUNT_CPU_USER_ENTRY(r13, r9, r10) BEGIN_FTR_SECTION ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */ std r9,_PPR(r1) @@ -649,14 +644,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r11,exception_marker@toc(r2) std r10,RESULT(r1) /* clear regs->result */ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ - - .if ISTACK - ACCOUNT_STOLEN_TIME - .endif - - .if IRECONCILE - RECONCILE_IRQ_STATE(r10, r11) - .endif .endm /* @@ -705,14 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r1,GPR1(r1) .endm -#define RUNLATCH_ON \ -BEGIN_FTR_SECTION \ - ld r3, PACA_THREAD_INFO(r13); \ - ld r4,TI_LOCAL_FLAGS(r3); \ - andi. r0,r4,_TLF_RUNLATCH; \ - beql ppc64_runlatch_on_trampoline; \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - /* * When the idle code in power4_idle puts the CPU into NAP mode, * it has to do so in a loop, and relies on the external interrupt @@ -935,7 +914,6 @@ INT_DEFINE_BEGIN(system_reset) */ ISET_RI=0 ISTACK=0 - IRECONCILE=0 IKVM_REAL=1 INT_DEFINE_END(system_reset) @@ -1022,20 +1000,6 @@ EXC_COMMON_BEGIN(system_reset_common) ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY system_reset - /* - * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does - * the right thing. We do not want to reconcile because that goes - * through irq tracing which we don't want in NMI. - * - * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS - * as we are running with MSR[EE]=0. - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) addi r3,r1,STACK_FRAME_OVERHEAD bl system_reset_exception @@ -1051,14 +1015,6 @@ EXC_COMMON_BEGIN(system_reset_common) subi r10,r10,1 sth r10,PACA_IN_NMI(r13) - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -1123,7 +1079,6 @@ INT_DEFINE_BEGIN(machine_check_early) ISTACK=0 IDAR=1 IDSISR=1 - IRECONCILE=0 IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ INT_DEFINE_END(machine_check_early) @@ -1205,30 +1160,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) li r10,MSR_RI mtmsrd r10,1 - /* - * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see - * system_reset_common) - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) - addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -1401,14 +1337,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * * Handling: * - Hash MMU - * Go to do_hash_page first to see if the HPT can be filled from an entry in - * the Linux page table. Hash faults can hit in kernel mode in a fairly + * Go to do_hash_fault, which attempts to fill the HPT from an entry in the + * Linux page table. Hash faults can hit in kernel mode in a fairly * arbitrary state (e.g., interrupts disabled, locks held) when accessing * "non-bolted" regions, e.g., vmalloc space. However these should always be - * backed by Linux page tables. + * backed by Linux page table entries. * - * If none is found, do a Linux page fault. Linux page faults can happen in - * kernel mode due to user copy operations of course. + * If no entry is found the Linux page fault handler is invoked (by + * do_hash_fault). Linux page faults can happen in kernel mode due to user + * copy operations of course. * * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest * MMU context, which may cause a DSI in the host, which must go to the @@ -1437,15 +1374,24 @@ EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) EXC_VIRT_END(data_access, 0x4300, 0x80) EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access - ld r4,_DAR(r1) - ld r5,_DSISR(r1) + ld r4,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + andis. r0,r4,DSISR_DABRMATCH@h + bne- 1f BEGIN_MMU_FTR_SECTION - ld r6,_MSR(r1) - li r3,0x300 - b do_hash_page /* Try to handle as hpte fault */ + bl do_hash_fault MMU_FTR_SECTION_ELSE - b handle_page_fault + bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + b interrupt_return + +1: bl do_break + /* + * do_break() may have changed the NV GPRS while handling a breakpoint. + * If so, we need to restore them with their updated values. + */ + REST_NVGPRS(r1) + b interrupt_return GEN_KVM data_access @@ -1466,14 +1412,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * on user-handler data structures. * * KVM: Same as 0x300, DSLB must test for KVM guest. - * - * A dedicated save area EXSLB is used (XXX: but it actually need not be - * these days, we could use EXGEN). */ INT_DEFINE_BEGIN(data_access_slb) IVEC=0x380 - IAREA=PACA_EXSLB - IRECONCILE=0 IDAR=1 IKVM_SKIP=1 IKVM_REAL=1 @@ -1487,10 +1428,9 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) EXC_VIRT_END(data_access_slb, 0x4380, 0x80) EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb - ld r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ + addi r3,r1,STACK_FRAME_OVERHEAD bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1501,9 +1441,6 @@ MMU_FTR_SECTION_ELSE li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return @@ -1538,15 +1475,13 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access - ld r4,_DAR(r1) - ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION - ld r6,_MSR(r1) - li r3,0x400 - b do_hash_page /* Try to handle as hpte fault */ + bl do_hash_fault MMU_FTR_SECTION_ELSE - b handle_page_fault + bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + b interrupt_return GEN_KVM instruction_access @@ -1562,8 +1497,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) */ INT_DEFINE_BEGIN(instruction_access_slb) IVEC=0x480 - IAREA=PACA_EXSLB - IRECONCILE=0 IISIDE=1 IDAR=1 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -1579,10 +1512,9 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb - ld r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ + addi r3,r1,STACK_FRAME_OVERHEAD bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1593,9 +1525,6 @@ MMU_FTR_SECTION_ELSE li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return @@ -1643,7 +1572,6 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -1697,6 +1625,51 @@ INT_DEFINE_BEGIN(program_check) INT_DEFINE_END(program_check) EXC_REAL_BEGIN(program_check, 0x700, 0x100) + +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* + * There's a short window during boot where although the kernel is + * running little endian, any exceptions will cause the CPU to switch + * back to big endian. For example a WARN() boils down to a trap + * instruction, which will cause a program check, and we end up here but + * with the CPU in big endian mode. The first instruction of the program + * check handler (in GEN_INT_ENTRY below) is an mtsprg, which when + * executed in the wrong endian is an lhzu with a ~3GB displacement from + * r3. The content of r3 is random, so that is a load from some random + * location, and depending on the system can easily lead to a checkstop, + * or an infinitely recursive page fault. + * + * So to handle that case we have a trampoline here that can detect we + * are in the wrong endian and flip us back to the correct endian. We + * can't flip MSR[LE] using mtmsr, so we have to use rfid. That requires + * backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3, as + * SPRG1 is already used for the paca. SPRG3 is user readable, but this + * trampoline is only active very early in boot, and SPRG3 will be + * reinitialised in vdso_getcpu_init() before userspace starts. + */ +BEGIN_FTR_SECTION + tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8 + b 1f // Skip trampoline if endian is correct + .long 0xa643707d // mtsprg 0, r11 Backup r11 + .long 0xa6027a7d // mfsrr0 r11 + .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2 + .long 0xa6027b7d // mfsrr1 r11 + .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3 + .long 0xa600607d // mfmsr r11 + .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE] + .long 0xa6037b7d // mtsrr1 r11 + .long 0x34076039 // li r11, 0x734 + .long 0xa6037a7d // mtsrr0 r11 + .long 0x2400004c // rfid + mfsprg r11, 3 + mtsrr1 r11 // Restore SRR1 + mfsprg r11, 2 + mtsrr0 r11 // Restore SRR0 + mfsprg r11, 0 // Restore r11 +1: +END_FTR_SECTION(0, 1) // nop out after boot +#endif /* CONFIG_CPU_LITTLE_ENDIAN */ + GEN_INT_ENTRY program_check, virt=0 EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) @@ -1755,7 +1728,6 @@ EXC_COMMON_BEGIN(program_check_common) */ INT_DEFINE_BEGIN(fp_unavailable) IVEC=0x800 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -1770,7 +1742,6 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception 0: trap @@ -1789,7 +1760,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm b interrupt_return @@ -1832,7 +1802,6 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt b interrupt_return @@ -1854,7 +1823,6 @@ INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 IHSRR=1 ISTACK=0 - IRECONCILE=0 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(hdecrementer) @@ -1919,12 +1887,11 @@ EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else - bl unknown_exception + bl unknown_async_exception #endif b interrupt_return @@ -2001,12 +1968,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) HMT_MEDIUM .if ! \virt - __LOAD_HANDLER(r10, system_call_common) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - b . /* prevent speculative execution */ + __LOAD_HANDLER(r10, system_call_common_real) + mtctr r10 + bctr .else li r10,MSR_RI mtmsrd r10,1 /* Set RI (EE=0) */ @@ -2137,9 +2101,7 @@ EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION - ld r4,_DAR(r1) - li r5,SIGSEGV - bl bad_page_fault + bl do_bad_page_fault_segv MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) @@ -2230,7 +2192,6 @@ INT_DEFINE_BEGIN(hmi_exception_early) IHSRR=1 IREALMODE_COMMON=1 ISTACK=0 - IRECONCILE=0 IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ IKVM_REAL=1 INT_DEFINE_END(hmi_exception_early) @@ -2277,7 +2238,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception b interrupt_return @@ -2307,12 +2267,11 @@ EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else - bl unknown_exception + bl unknown_async_exception #endif b interrupt_return @@ -2341,7 +2300,6 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -2388,7 +2346,6 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception b interrupt_return @@ -2404,7 +2361,6 @@ EXC_COMMON_BEGIN(performance_monitor_common) */ INT_DEFINE_BEGIN(altivec_unavailable) IVEC=0xf20 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -2434,7 +2390,6 @@ BEGIN_FTR_SECTION b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm b interrupt_return @@ -2442,7 +2397,6 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception b interrupt_return @@ -2458,7 +2412,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) */ INT_DEFINE_BEGIN(vsx_unavailable) IVEC=0xf40 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -2487,7 +2440,6 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm b interrupt_return @@ -2495,7 +2447,6 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception b interrupt_return @@ -2830,7 +2781,6 @@ EXC_VIRT_NONE(0x5800, 0x100) INT_DEFINE_BEGIN(soft_nmi) IVEC=0x900 ISTACK=0 - IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */ INT_DEFINE_END(soft_nmi) /* @@ -2849,17 +2799,6 @@ EXC_COMMON_BEGIN(soft_nmi_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi - /* - * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see - * system_reset_common) - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) - addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt @@ -2867,14 +2806,6 @@ EXC_COMMON_BEGIN(soft_nmi_common) li r9,0 mtmsrd r9,1 - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -3148,9 +3079,6 @@ kvmppc_skip_Hinterrupt: * come here. */ -EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) - b __ppc64_runlatch_on - USE_FIXED_SECTION(virt_trampolines) /* * All code below __end_interrupts is treated as soft-masked. If @@ -3221,99 +3149,3 @@ disable_machine_check: RFI_TO_KERNEL 1: mtlr r0 blr - -/* - * Hash table stuff - */ - .balign IFETCH_ALIGN_BYTES -do_hash_page: -#ifdef CONFIG_PPC_BOOK3S_64 - lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h - ori r0,r0,DSISR_BAD_FAULT_64S@l - and. r0,r5,r0 /* weird error? */ - bne- handle_page_fault /* if not, try to insert a HPTE */ - - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - */ - ld r11, PACA_THREAD_INFO(r13) - lwz r0,TI_PREEMPT(r11) - andis. r0,r0,NMI_MASK@h - bne 77f - - /* - * r3 contains the trap number - * r4 contains the faulting address - * r5 contains dsisr - * r6 msr - * - * at return r3 = 0 for success, 1 for page fault, negative for error - */ - bl __hash_page /* build HPTE if possible */ - cmpdi r3,0 /* see if __hash_page succeeded */ - - /* Success */ - beq interrupt_return /* Return from exception on success */ - - /* Error */ - blt- 13f - - /* Reload DAR/DSISR into r4/r5 for the DABR check below */ - ld r4,_DAR(r1) - ld r5,_DSISR(r1) -#endif /* CONFIG_PPC_BOOK3S_64 */ - -/* Here we have a page fault that hash_page can't handle. */ -handle_page_fault: -11: andis. r0,r5,DSISR_DABRMATCH@h - bne- handle_dabr_fault - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_page_fault - cmpdi r3,0 - beq+ interrupt_return - mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl __bad_page_fault - b interrupt_return - -/* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_break - /* - * do_break() may have changed the NV GPRS while handling a breakpoint. - * If so, we need to restore them with their updated values. - */ - REST_NVGPRS(r1) - b interrupt_return - - -#ifdef CONFIG_PPC_BOOK3S_64 -/* We have a page fault that hash_page could handle but HV refused - * the PTE insertion - */ -13: mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl low_hash_fault - b interrupt_return -#endif - -/* - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ -77: addi r3,r1,STACK_FRAME_OVERHEAD - li r5,SIGSEGV - bl bad_page_fault - b interrupt_return diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a2f72c966baf..5d4706c14572 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -47,7 +47,7 @@ lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 1: - mtcrf 0x7f, r1 + mtcrf 0x3f, r1 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ @@ -116,114 +116,44 @@ .endm .macro SYSCALL_ENTRY trapno - mfspr r12,SPRN_SPRG_THREAD mfspr r9, SPRN_SRR1 -#ifdef CONFIG_VMAP_STACK - mfspr r11, SPRN_SRR0 - mtctr r11 - andi. r11, r9, MSR_PR + mfspr r10, SPRN_SRR0 + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */ + lis r12, 1f@h + ori r12, r12, 1f@l + mtspr SPRN_SRR1, r11 + mtspr SPRN_SRR0, r12 + mfspr r12,SPRN_SPRG_THREAD mr r11, r1 lwz r1,TASK_STACK-THREAD(r12) - beq- 99f - addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r10 - isync tovirt(r12, r12) + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE + rfi +1: stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 -#else - andi. r11, r9, MSR_PR - lwz r11,TASK_STACK-THREAD(r12) - beq- 99f - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE - tophys(r11, r11) - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1, r11) /* set new kernel sp */ -#endif + stw r10,_NIP(r11) mflr r10 stw r10, _LINK(r11) -#ifdef CONFIG_VMAP_STACK - mfctr r10 -#else - mfspr r10,SPRN_SRR0 -#endif - stw r10,_NIP(r11) mfcr r10 rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ stw r10,_CCR(r11) /* save registers */ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#else -#ifdef CONFIG_VMAP_STACK - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */ -#else - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ -#endif - mtmsr r10 /* (except for mach check in rtas) */ #endif lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ stw r2,GPR2(r11) addi r10,r10,STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r11) - li r2, \trapno + 1 + li r2, \trapno stw r10,8(r11) stw r2,_TRAP(r11) SAVE_GPR(0, r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) - addi r11,r1,STACK_FRAME_OVERHEAD addi r2,r12,-THREAD - stw r11,PT_REGS(r12) -#if defined(CONFIG_40x) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h -#endif - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) -#if defined(CONFIG_40x) - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - tophys(r11,r11) - addi r11,r11,global_dbcr0@l - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) -#endif - -3: - tovirt_novmstack r2, r2 /* set r2 to current */ - lis r11, transfer_to_syscall@h - ori r11, r11, transfer_to_syscall@l -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * If MSR is changing we need to keep interrupts disabled at this point - * otherwise we might risk taking an interrupt before we tell lockdep - * they are enabled. - */ - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL) - rlwimi r10, r9, 0, MSR_EE -#else - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE) -#endif -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR1,r10 - mtspr SPRN_SRR0,r11 - rfi /* jump to handler, enable MMU */ -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -99: b ret_from_kernel_syscall + b transfer_to_syscall /* jump to handler */ .endm .macro save_dar_dsisr_on_stack reg1, reg2, sp diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a1ae00689e0f..24724a7dad49 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -179,9 +179,9 @@ _ENTRY(saved_ksp_limit) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG - mfspr r5, SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + mfspr r5, SPRN_ESR /* Grab the ESR, save it */ stw r5, _ESR(r11) - mfspr r4, SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + mfspr r4, SPRN_DEAR /* Grab the DEAR, save it */ stw r4, _DEAR(r11) EXC_XFER_LITE(0x300, handle_page_fault) @@ -191,9 +191,9 @@ _ENTRY(saved_ksp_limit) */ START_EXCEPTION(0x0400, InstructionAccess) EXCEPTION_PROLOG - mr r4,r12 /* Pass SRR0 as arg2 */ - stw r4, _DEAR(r11) - li r5,0 /* Pass zero as arg3 */ + li r5,0 + stw r5, _ESR(r11) /* Zero ESR */ + stw r12, _DEAR(r11) /* SRR0 as DEAR */ EXC_XFER_LITE(0x400, handle_page_fault) /* 0x0500 - External Interrupt Exception */ @@ -476,6 +476,7 @@ _ENTRY(saved_ksp_limit) /* continue normal handling for a critical exception... */ 2: mfspr r4,SPRN_DBSR + stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 8e36718f3167..813fa305c33b 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -376,7 +376,7 @@ interrupt_base: /* Load the next available TLB index */ lwz r13,tlb_44x_index@l(r10) - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Increment, rollover, and store TLB index */ addi r13,r13,1 @@ -471,7 +471,7 @@ interrupt_base: /* Load the next available TLB index */ lwz r13,tlb_44x_index@l(r10) - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Increment, rollover, and store TLB index */ addi r13,r13,1 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 52702f3db6df..46dff3f9c31f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -165,7 +165,7 @@ SystemCall: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) + EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) . = 0x1100 /* @@ -312,14 +312,14 @@ DataStoreTLBMiss: . = 0x1300 InstructionTLBError: EXCEPTION_PROLOG - mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie - tlbie r4 + tlbie r12 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ .Litlbie: - stw r4, _DAR(r11) + stw r12, _DAR(r11) + stw r5, _DSISR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to @@ -364,10 +364,9 @@ do_databreakpoint: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_BAR stw r4,_DAR(r11) -#ifdef CONFIG_VMAP_STACK - lwz r5,_DSISR(r11) -#else +#ifndef CONFIG_VMAP_STACK mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) #endif EXC_XFER_STD(0x1c00, do_break) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 858fbc8b19f3..727fdab557c9 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -238,8 +238,8 @@ __secondary_hold_acknowledge: /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and - putting it back to what it was (unknown_exception) when done. */ - EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) + putting it back to what it was (unknown_async_exception) when done. */ + EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD) /* Machine check */ /* @@ -278,12 +278,6 @@ MachineCheck: 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK - mfspr r4, SPRN_SPRG_THREAD - tovirt(r4, r4) - lwz r4, RTAS_SP(r4) - cmpwi cr1, r4, 0 -#endif beq cr1, machine_check_tramp twi 31, 0, 0 #else @@ -295,6 +289,7 @@ MachineCheck: DO_KVM 0x300 DataAccess: #ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 mfspr r10, SPRN_SPRG_THREAD @@ -311,12 +306,14 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE b 1f ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 #else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ @@ -324,8 +321,11 @@ BEGIN_MMU_FTR_SECTION bl hash_page b handle_page_fault_tramp_1 MMU_FTR_SECTION_ELSE +#endif b handle_page_fault_tramp_2 +#ifdef CONFIG_PPC_BOOK3S_604 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif #endif /* CONFIG_VMAP_STACK */ /* Instruction access exception. */ @@ -341,12 +341,14 @@ InstructionAccess: mfspr r11, SPRN_SRR1 /* check whether user or kernel */ stw r11, SRR1(r10) mfcr r10 +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */ bne hash_page_isi .Lhash_page_isi_cont: mfspr r11, SPRN_SRR1 /* check whether user or kernel */ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif andi. r11, r11, MSR_PR EXCEPTION_PROLOG_1 @@ -357,13 +359,15 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) beq 1f /* if so, try to put a PTE */ li r3,0 /* into the hash table */ mr r4,r12 /* SRR0 is fault address */ +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION bl hash_page END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif #endif /* CONFIG_VMAP_STACK */ -1: mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ - stw r4, _DAR(r11) + stw r5, _DSISR(r11) + stw r12, _DAR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ @@ -640,7 +644,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #endif #ifndef CONFIG_TAU_INT -#define TAUException unknown_exception +#define TAUException unknown_async_exception #endif EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) @@ -685,13 +689,16 @@ handle_page_fault_tramp_1: #ifdef CONFIG_VMAP_STACK EXCEPTION_PROLOG_2 handle_dar_dsisr=1 #endif - lwz r4, _DAR(r11) lwz r5, _DSISR(r11) /* fall through */ handle_page_fault_tramp_2: + andis. r0, r5, DSISR_DABRMATCH@h + bne- 1f EXC_XFER_LITE(0x300, handle_page_fault) +1: EXC_XFER_STD(0x300, do_break) #ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_PPC_BOOK3S_604 .macro save_regs_thread thread stw r0, THR0(\thread) stw r3, THR3(\thread) @@ -763,6 +770,7 @@ fast_hash_page_return: mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 rfi +#endif /* CONFIG_PPC_BOOK3S_604 */ stack_overflow: vmap_stack_overflow_exception diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 74e230c200fb..47857795f50a 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -106,10 +106,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) - andi. r11, r9, MSR_PR lwz r11, TASK_STACK - THREAD(r10) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ - beq- 99f ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) stw r12, _CCR(r11) /* save various registers */ mflr r12 @@ -124,60 +122,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r2,GPR2(r11) addi r12, r12, STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r11) - li r2, \trapno + 1 + li r2, \trapno stw r12, 8(r11) stw r2,_TRAP(r11) SAVE_GPR(0, r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) - addi r11,r1,STACK_FRAME_OVERHEAD addi r2,r10,-THREAD - stw r11,PT_REGS(r10) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r10) - andis. r12,r12,DBCR0_IDM@h - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - tophys(r11,r11) - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r10, TASK_CPU(r2) - slwi r10, r10, 3 - add r11, r11, r10 -#endif - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) - -3: - tovirt(r2, r2) /* set r2 to current */ - lis r11, transfer_to_syscall@h - ori r11, r11, transfer_to_syscall@l -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * If MSR is changing we need to keep interrupts disabled at this point - * otherwise we might risk taking an interrupt before we tell lockdep - * they are enabled. - */ - lis r10, MSR_KERNEL@h - ori r10, r10, MSR_KERNEL@l - rlwimi r10, r9, 0, MSR_EE -#else - lis r10, (MSR_KERNEL | MSR_EE)@h - ori r10, r10, (MSR_KERNEL | MSR_EE)@l -#endif - mtspr SPRN_SRR1,r10 - mtspr SPRN_SRR0,r11 - rfi /* jump to handler, enable MMU */ -99: b ret_from_kernel_syscall + b transfer_to_syscall /* jump to handler */ .endm /* To handle the additional exception priority levels on 40x and Book-E @@ -406,6 +359,7 @@ label: \ /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ + stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) @@ -459,6 +413,7 @@ label: \ /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ + stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) @@ -476,9 +431,7 @@ label: NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ - mr r4,r12; /* Pass SRR0 as arg2 */ \ - stw r4, _DEAR(r11); \ - li r5,0; /* Pass zero as arg3 */ \ + stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ EXC_XFER_LITE(0x0400, handle_page_fault) #define ALIGNMENT_EXCEPTION \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index fdd4d274c245..3f4a40cccef5 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -364,12 +364,12 @@ interrupt_base: /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) - mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + mfspr r5,SPRN_ESR /* Grab the ESR, save it */ stw r5,_ESR(r11) - mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */ + stw r4, _DEAR(r11) andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - stw r4, _DEAR(r11) EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 22f249b6f58d..f9e6d83e6720 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -52,28 +52,32 @@ _GLOBAL(isa300_idle_stop_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) /* 168 bytes */ PPC_STOP b . /* catch bugs */ @@ -89,8 +93,8 @@ _GLOBAL(isa300_idle_stop_mayloss) */ _GLOBAL(idle_return_gpr_loss) ld r1,PACAR1(r13) - ld r4,-8*19(r1) - ld r5,-8*20(r1) + ld r4,-8*20(r1) + ld r5,-8*21(r1) mtlr r4 mtcr r5 /* @@ -98,25 +102,25 @@ _GLOBAL(idle_return_gpr_loss) * from PACATOC. This could be avoided for that less common case * if KVM saved its r2. */ - ld r2,-8*0(r1) - ld r14,-8*1(r1) - ld r15,-8*2(r1) - ld r16,-8*3(r1) - ld r17,-8*4(r1) - ld r18,-8*5(r1) - ld r19,-8*6(r1) - ld r20,-8*7(r1) - ld r21,-8*8(r1) - ld r22,-8*9(r1) - ld r23,-8*10(r1) - ld r24,-8*11(r1) - ld r25,-8*12(r1) - ld r26,-8*13(r1) - ld r27,-8*14(r1) - ld r28,-8*15(r1) - ld r29,-8*16(r1) - ld r30,-8*17(r1) - ld r31,-8*18(r1) + ld r2,-8*1(r1) + ld r14,-8*2(r1) + ld r15,-8*3(r1) + ld r16,-8*4(r1) + ld r17,-8*5(r1) + ld r18,-8*6(r1) + ld r19,-8*7(r1) + ld r20,-8*8(r1) + ld r21,-8*9(r1) + ld r22,-8*10(r1) + ld r23,-8*11(r1) + ld r24,-8*12(r1) + ld r25,-8*13(r1) + ld r26,-8*14(r1) + ld r27,-8*15(r1) + ld r28,-8*16(r1) + ld r29,-8*17(r1) + ld r30,-8*18(r1) + ld r31,-8*19(r1) blr /* @@ -154,28 +158,32 @@ _GLOBAL(isa206_idle_insn_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) cmpwi r3,PNV_THREAD_NAP bne 1f IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/interrupt.c index 7c85ed04a164..398cd86b6ada 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/interrupt.c @@ -1,10 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/context_tracking.h> #include <linux/err.h> +#include <linux/compat.h> + #include <asm/asm-prototypes.h> #include <asm/kup.h> #include <asm/cputime.h> +#include <asm/interrupt.h> #include <asm/hw_irq.h> +#include <asm/interrupt.h> #include <asm/kprobes.h> #include <asm/paca.h> #include <asm/ptrace.h> @@ -24,16 +29,21 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; + regs->orig_gpr3 = r3; + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + user_exit_irqoff(); + trace_hardirqs_off(); /* finish reconciling */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); - BUG_ON(regs->softe != IRQS_ENABLED); + BUG_ON(arch_irq_disabled_regs(regs)); #ifdef CONFIG_PPC_PKEY if (mmu_has_feature(MMU_FTR_PKEY)) { @@ -59,19 +69,15 @@ notrace long system_call_exception(long r3, long r4, long r5, isync(); } else #endif +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif - account_cpu_user_entry(); + booke_restore_dbcr0(); -#ifdef CONFIG_PPC_SPLPAR - if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && - firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct lppaca *lp = local_paca->lppaca_ptr; + account_cpu_user_entry(); - if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) - accumulate_stolen_time(); - } -#endif + account_stolen_time(); /* * This is not required for the syscall exit path, but makes the @@ -79,12 +85,12 @@ notrace long system_call_exception(long r3, long r4, long r5, * frame, or if the unwinder was taught the first stack frame always * returns to user with IRQS_ENABLED, this store could be avoided! */ - regs->softe = IRQS_ENABLED; + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(regs->trap == 0x7ff0)) { + if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return regs->gpr[3]; @@ -107,7 +113,7 @@ notrace long system_call_exception(long r3, long r4, long r5, r8 = regs->gpr[8]; } else if (unlikely(r0 >= NR_syscalls)) { - if (unlikely(regs->trap == 0x7ff0)) { + if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return regs->gpr[3]; @@ -118,7 +124,7 @@ notrace long system_call_exception(long r3, long r4, long r5, /* May be faster to do array_index_nospec? */ barrier_nospec(); - if (unlikely(is_32bit_task())) { + if (unlikely(is_compat_task())) { f = (void *)compat_sys_call_table[r0]; r3 &= 0x00000000ffffffffULL; @@ -138,8 +144,12 @@ notrace long system_call_exception(long r3, long r4, long r5, /* * local irqs must be disabled. Returns false if the caller must re-enable * them, check for new work, and try again. + * + * This should be called with local irqs disabled, but if they were previously + * enabled when the interrupt handler returns (indicating a process-context / + * synchronous interrupt) then irqs_enabled should be true. */ -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) +static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); @@ -149,6 +159,7 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) __hard_EE_RI_disable(); else __hard_irq_disable(); +#ifdef CONFIG_PPC64 if (unlikely(lazy_irq_pending_nocheck())) { /* Took an interrupt, may have more exit work to do. */ if (clear_ri) @@ -160,10 +171,63 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) } local_paca->irq_happened = 0; irq_soft_mask_set(IRQS_ENABLED); - +#endif return true; } +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) +{ + if (__prep_irq_for_enabled_exit(clear_ri)) + return true; + + /* + * Must replay pending soft-masked interrupts now. Don't just + * local_irq_enabe(); local_irq_disable(); because if we are + * returning from an asynchronous interrupt here, another one + * might hit after irqs are enabled, and it would exit via this + * same path allowing another to fire, and so on unbounded. + * + * If interrupts were enabled when this interrupt exited, + * indicating a process context (synchronous) interrupt, + * local_irq_enable/disable can be used, which will enable + * interrupts rather than keeping them masked (unclear how + * much benefit this is over just replaying for all cases, + * because we immediately disable again, so all we're really + * doing is allowing hard interrupts to execute directly for + * a very small time, rather than being masked and replayed). + */ + if (irqs_enabled) { + local_irq_enable(); + local_irq_disable(); + } else { + replay_soft_interrupts(); + } + + return false; +} + +static notrace void booke_load_dbcr0(void) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dbcr0 = current->thread.debug.dbcr0; + + if (likely(!(dbcr0 & DBCR0_IDM))) + return; + + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + mtmsr(mfmsr() & ~MSR_DE); + if (IS_ENABLED(CONFIG_PPC32)) { + isync(); + global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); + } + mtspr(SPRN_DBCR0, dbcr0); + mtspr(SPRN_DBSR, -1); +#endif +} + /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -177,20 +241,24 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv) { - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; unsigned long ret = 0; + bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; + CT_WARN_ON(ct_state() == CONTEXT_USER); + +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif regs->result = r3; /* Check whether the syscall is issued inside a restartable sequence */ rseq_syscall(regs); - ti_flags = *ti_flagsp; + ti_flags = current_thread_info()->flags; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) { + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { r3 = -r3; regs->ccr |= 0x10000000; /* Set SO bit in CR */ @@ -202,7 +270,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret = _TIF_RESTOREALL; else regs->gpr[3] = r3; - clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp); + clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); } else { regs->gpr[3] = r3; } @@ -212,9 +280,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret |= _TIF_RESTOREALL; } -again: local_irq_disable(); - ti_flags = READ_ONCE(*ti_flagsp); + +again: + ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); if (ti_flags & _TIF_NEED_RESCHED) { @@ -230,7 +299,7 @@ again: do_notify_resume(regs, ti_flags); } local_irq_disable(); - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); } if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -257,9 +326,13 @@ again: } } + user_enter_irqoff(); + /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!prep_irq_for_enabled_exit(!scv))) { + if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { + user_exit_irqoff(); local_irq_enable(); + local_irq_disable(); goto again; } @@ -267,9 +340,11 @@ again: local_paca->tm_scratch = regs->msr; #endif + booke_load_dbcr0(); + account_cpu_user_exit(); -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ +#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */ /* * We do this at the end so that we do context switch with KERNEL AMR */ @@ -278,33 +353,32 @@ again: return ret; } -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ +#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) { -#ifdef CONFIG_PPC_BOOK3E - struct thread_struct *ts = ¤t->thread; -#endif - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; unsigned long flags; unsigned long ret = 0; - if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); - BUG_ON(regs->softe != IRQS_ENABLED); + BUG_ON(arch_irq_disabled_regs(regs)); + CT_WARN_ON(ct_state() == CONTEXT_USER); /* * We don't need to restore AMR on the way back to userspace for KUAP. * AMR can only have been unlocked if we interrupted the kernel. */ +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif local_irq_save(flags); again: - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); /* returning to user: may enable */ if (ti_flags & _TIF_NEED_RESCHED) { @@ -315,7 +389,7 @@ again: do_notify_resume(regs, ti_flags); } local_irq_disable(); - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); } if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -336,23 +410,16 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true))) { + user_enter_irqoff(); + + if (unlikely(!__prep_irq_for_enabled_exit(true))) { + user_exit_irqoff(); local_irq_enable(); local_irq_disable(); goto again; } -#ifdef CONFIG_PPC_BOOK3E - if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - mtmsr(mfmsr() & ~MSR_DE); - mtspr(SPRN_DBCR0, ts->debug.dbcr0); - mtspr(SPRN_DBSR, -1); - } -#endif + booke_load_dbcr0(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -363,7 +430,9 @@ again: /* * We do this at the end so that we do context switch with KERNEL AMR */ +#ifdef CONFIG_PPC64 kuap_user_restore(regs); +#endif return ret; } @@ -372,56 +441,56 @@ void preempt_schedule_irq(void); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) { - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long flags; unsigned long ret = 0; +#ifdef CONFIG_PPC64 unsigned long amr; +#endif - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && + unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); BUG_ON(!FULL_REGS(regs)); + /* + * CT_WARN_ON comes here via program_check_exception, + * so avoid recursion. + */ + if (TRAP(regs) != 0x700) + CT_WARN_ON(ct_state() == CONTEXT_USER); +#ifdef CONFIG_PPC64 amr = kuap_get_and_check_amr(); +#endif - if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { - clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); + if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); ret = 1; } local_irq_save(flags); - if (regs->softe == IRQS_ENABLED) { + if (!arch_irq_disabled_regs(regs)) { /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: if (IS_ENABLED(CONFIG_PREEMPT)) { /* Return to preemptible kernel context */ - if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) { + if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) preempt_schedule_irq(); } } - if (unlikely(!prep_irq_for_enabled_exit(true))) { - /* - * Can't local_irq_restore to replay if we were in - * interrupt context. Must replay directly. - */ - if (irqs_disabled_flags(flags)) { - replay_soft_interrupts(); - } else { - local_irq_restore(flags); - local_irq_save(flags); - } - /* Took an interrupt, may have more exit work to do. */ + if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) goto again; - } } else { /* Returning to a kernel context with local irqs disabled. */ __hard_EE_RI_disable(); +#ifdef CONFIG_PPC64 if (regs->msr & MSR_EE) local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; +#endif } @@ -434,7 +503,9 @@ again: * which would cause Read-After-Write stalls. Hence, we take the AMR * value from the check above. */ +#ifdef CONFIG_PPC64 kuap_kernel_restore(regs, amr); +#endif return ret; } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5b69a6a72a0e..c00214a4355c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -25,6 +25,7 @@ #include <linux/pci.h> #include <linux/iommu.h> #include <linux/sched.h> +#include <linux/debugfs.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -38,6 +39,47 @@ #define DBG(...) +#ifdef CONFIG_IOMMU_DEBUGFS +static int iommu_debugfs_weight_get(void *data, u64 *val) +{ + struct iommu_table *tbl = data; + *val = bitmap_weight(tbl->it_map, tbl->it_size); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(iommu_debugfs_fops_weight, iommu_debugfs_weight_get, NULL, "%llu\n"); + +static void iommu_debugfs_add(struct iommu_table *tbl) +{ + char name[10]; + struct dentry *liobn_entry; + + sprintf(name, "%08lx", tbl->it_index); + liobn_entry = debugfs_create_dir(name, iommu_debugfs_dir); + + debugfs_create_file_unsafe("weight", 0400, liobn_entry, tbl, &iommu_debugfs_fops_weight); + debugfs_create_ulong("it_size", 0400, liobn_entry, &tbl->it_size); + debugfs_create_ulong("it_page_shift", 0400, liobn_entry, &tbl->it_page_shift); + debugfs_create_ulong("it_reserved_start", 0400, liobn_entry, &tbl->it_reserved_start); + debugfs_create_ulong("it_reserved_end", 0400, liobn_entry, &tbl->it_reserved_end); + debugfs_create_ulong("it_indirect_levels", 0400, liobn_entry, &tbl->it_indirect_levels); + debugfs_create_ulong("it_level_size", 0400, liobn_entry, &tbl->it_level_size); +} + +static void iommu_debugfs_del(struct iommu_table *tbl) +{ + char name[10]; + struct dentry *liobn_entry; + + sprintf(name, "%08lx", tbl->it_index); + liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); + if (liobn_entry) + debugfs_remove(liobn_entry); +} +#else +static void iommu_debugfs_add(struct iommu_table *tbl){} +static void iommu_debugfs_del(struct iommu_table *tbl){} +#endif + static int novmerge; static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); @@ -725,6 +767,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, welcomed = 1; } + iommu_debugfs_add(tbl); + return tbl; } @@ -744,6 +788,8 @@ static void iommu_table_free(struct kref *kref) return; } + iommu_debugfs_del(tbl); + iommu_table_release_pages(tbl); /* verify that table contains no entries */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 6b1eca53e36c..d71fd10a1dd4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -54,6 +54,7 @@ #include <linux/pgtable.h> #include <linux/uaccess.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/cache.h> @@ -65,6 +66,7 @@ #include <asm/livepatch.h> #include <asm/asm-prototypes.h> #include <asm/hw_irq.h> +#include <asm/softirq_stack.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -180,13 +182,18 @@ void notrace restore_interrupts(void) void replay_soft_interrupts(void) { + struct pt_regs regs; + /* - * We use local_paca rather than get_paca() to avoid all - * the debug_smp_processor_id() business in this low level - * function + * Be careful here, calling these interrupt handlers can cause + * softirqs to be raised, which they may run when calling irq_exit, + * which will cause local_irq_enable() to be run, which can then + * recurse into this function. Don't keep any state across + * interrupt handler calls which may change underneath us. + * + * We use local_paca rather than get_paca() to avoid all the + * debug_smp_processor_id() business in this low level function. */ - unsigned char happened = local_paca->irq_happened; - struct pt_regs regs; ppc_save_regs(®s); regs.softe = IRQS_ENABLED; @@ -209,7 +216,7 @@ again: * This is a higher priority interrupt than the others, so * replay it first. */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_HMI)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { local_paca->irq_happened &= ~PACA_IRQ_HMI; regs.trap = 0xe60; handle_hmi_exception(®s); @@ -217,7 +224,7 @@ again: hard_irq_disable(); } - if (happened & PACA_IRQ_DEC) { + if (local_paca->irq_happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; regs.trap = 0x900; timer_interrupt(®s); @@ -225,7 +232,7 @@ again: hard_irq_disable(); } - if (happened & PACA_IRQ_EE) { + if (local_paca->irq_happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; regs.trap = 0x500; do_IRQ(®s); @@ -233,7 +240,7 @@ again: hard_irq_disable(); } - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) { + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; if (IS_ENABLED(CONFIG_PPC_BOOK3E)) regs.trap = 0x280; @@ -245,7 +252,7 @@ again: } /* Book3E does not support soft-masking PMI interrupts */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_PMI)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { local_paca->irq_happened &= ~PACA_IRQ_PMI; regs.trap = 0xf00; performance_monitor_exception(®s); @@ -253,8 +260,7 @@ again: hard_irq_disable(); } - happened = local_paca->irq_happened; - if (happened & ~PACA_IRQ_HARD_DIS) { + if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { /* * We are responding to the next interrupt, so interrupt-off * latencies should be reset here. @@ -265,6 +271,31 @@ again: } } +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) +static inline void replay_soft_interrupts_irqrestore(void) +{ + unsigned long kuap_state = get_kuap(); + + /* + * Check if anything calls local_irq_enable/restore() when KUAP is + * disabled (user access enabled). We handle that case here by saving + * and re-locking AMR but we shouldn't get here in the first place, + * hence the warning. + */ + kuap_check_amr(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(AMR_KUAP_BLOCKED); + + replay_soft_interrupts(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(kuap_state); +} +#else +#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#endif + notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -328,7 +359,7 @@ notrace void arch_local_irq_restore(unsigned long mask) irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); - replay_soft_interrupts(); + replay_soft_interrupts_irqrestore(); local_paca->irq_happened = 0; trace_hardirqs_on(); @@ -640,8 +671,6 @@ void __do_irq(struct pt_regs *regs) { unsigned int irq; - irq_enter(); - trace_irq_entry(regs); /* @@ -661,11 +690,9 @@ void __do_irq(struct pt_regs *regs) generic_handle_irq(irq); trace_irq_exit(regs); - - irq_exit(); } -void do_IRQ(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) { struct pt_regs *old_regs = set_irq_regs(regs); void *cursp, *irqsp, *sirqsp; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 9f3e133b57b7..11f0cae086ed 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -17,22 +17,15 @@ #include <linux/irq_work.h> #include <linux/extable.h> #include <linux/ftrace.h> +#include <linux/memblock.h> +#include <asm/interrupt.h> #include <asm/machdep.h> #include <asm/mce.h> #include <asm/nmi.h> +#include <asm/asm-prototypes.h> -static DEFINE_PER_CPU(int, mce_nest_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); - -/* Queue for delayed MCE events. */ -static DEFINE_PER_CPU(int, mce_queue_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); - -/* Queue for delayed MCE UE events. */ -static DEFINE_PER_CPU(int, mce_ue_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], - mce_ue_event_queue); +#include "setup.h" static void machine_check_process_queued_event(struct irq_work *work); static void machine_check_ue_irq_work(struct irq_work *work); @@ -103,9 +96,10 @@ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr, uint64_t phys_addr) { - int index = __this_cpu_inc_return(mce_nest_count) - 1; - struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); + int index = local_paca->mce_info->mce_nest_count++; + struct machine_check_event *mce; + mce = &local_paca->mce_info->mce_event[index]; /* * Return if we don't have enough space to log mce event. * mce_nest_count may go beyond MAX_MC_EVT but that's ok, @@ -191,7 +185,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __this_cpu_read(mce_nest_count) - 1; + int index = local_paca->mce_info->mce_nest_count - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -201,7 +195,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = this_cpu_ptr(&mce_event[index]); + mc_evt = &local_paca->mce_info->mce_event[index]; /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -211,7 +205,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __this_cpu_dec(mce_nest_count); + local_paca->mce_info->mce_nest_count--; return ret; } @@ -233,13 +227,14 @@ static void machine_check_ue_event(struct machine_check_event *evt) { int index; - index = __this_cpu_inc_return(mce_ue_count) - 1; + index = local_paca->mce_info->mce_ue_count++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; return; } - memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); + memcpy(&local_paca->mce_info->mce_ue_event_queue[index], + evt, sizeof(*evt)); /* Queue work to process this event later. */ irq_work_queue(&mce_ue_event_irq_work); @@ -256,13 +251,14 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __this_cpu_inc_return(mce_queue_count) - 1; + index = local_paca->mce_info->mce_queue_count++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; return; } - memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); + memcpy(&local_paca->mce_info->mce_event_queue[index], + &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -289,9 +285,9 @@ static void machine_process_ue_event(struct work_struct *work) int index; struct machine_check_event *evt; - while (__this_cpu_read(mce_ue_count) > 0) { - index = __this_cpu_read(mce_ue_count) - 1; - evt = this_cpu_ptr(&mce_ue_event_queue[index]); + while (local_paca->mce_info->mce_ue_count > 0) { + index = local_paca->mce_info->mce_ue_count - 1; + evt = &local_paca->mce_info->mce_ue_event_queue[index]; blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* @@ -304,7 +300,7 @@ static void machine_process_ue_event(struct work_struct *work) */ if (evt->error_type == MCE_ERROR_TYPE_UE) { if (evt->u.ue_error.ignore_event) { - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; continue; } @@ -320,7 +316,7 @@ static void machine_process_ue_event(struct work_struct *work) "was generated\n"); } #endif - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; } } /* @@ -338,17 +334,17 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__this_cpu_read(mce_queue_count) > 0) { - index = __this_cpu_read(mce_queue_count) - 1; - evt = this_cpu_ptr(&mce_event_queue[index]); + while (local_paca->mce_info->mce_queue_count > 0) { + index = local_paca->mce_info->mce_queue_count - 1; + evt = &local_paca->mce_info->mce_event_queue[index]; if (evt->error_type == MCE_ERROR_TYPE_UE && evt->u.ue_error.ignore_event) { - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; continue; } machine_check_print_event_info(evt, false, false); - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; } } @@ -588,15 +584,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); * * regs->nip and regs->msr contains srr0 and ssr1. */ -long notrace machine_check_early(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early) { long handled = 0; - u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); - - this_cpu_set_ftrace_enabled(0); - /* Do not use nmi_enter/exit for pseries hpte guest */ - if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) - nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -606,11 +596,6 @@ long notrace machine_check_early(struct pt_regs *regs) if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); - if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) - nmi_exit(); - - this_cpu_set_ftrace_enabled(ftrace_enabled); - return handled; } @@ -722,7 +707,7 @@ long hmi_handle_debugtrig(struct pt_regs *regs) /* * Return values: */ -long hmi_exception_realmode(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode) { int ret; @@ -741,3 +726,24 @@ long hmi_exception_realmode(struct pt_regs *regs) return 1; } + +void __init mce_init(void) +{ + struct mce_info *mce_info; + u64 limit; + int i; + + limit = min(ppc64_bolted_size(), ppc64_rma_size); + for_each_possible_cpu(i) { + mce_info = memblock_alloc_try_nid(sizeof(*mce_info), + __alignof__(*mce_info), + MEMBLOCK_LOW_LIMIT, + limit, cpu_to_node(i)); + if (!mce_info) + goto err; + paca_ptrs[i]->mce_info = mce_info; + } + return; +err: + panic("Failed to allocate memory for MCE event data\n"); +} diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 69bfe96884e2..7f7cdbeacd1a 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -142,29 +142,10 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) } /* - * emulate_step() requires insn to be emulated as - * second parameter. Load register 'r4' with the - * instruction. - */ -void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) -{ - /* addis r4,0,(insn)@h */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(4) | - ((val >> 16) & 0xffff))); - addr++; - - /* ori r4,r4,(insn)@l */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(4) | - ___PPC_RS(4) | (val & 0xffff))); -} - -/* * Generate instructions to load provided immediate 64-bit value * to register 'reg' and patch these instructions at 'addr'. */ -void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { /* lis reg,(op)@highest */ patch_instruction((struct ppc_inst *)addr, diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2b555997b295..001e90cd8948 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1699,3 +1699,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); + + +static int __init discover_phbs(void) +{ + if (ppc_md.discover_phbs) + ppc_md.discover_phbs(); + + return 0; +} +core_initcall(discover_phbs); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index e99b7c547d7e..61571ae23953 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -443,46 +443,6 @@ void *pci_traverse_device_nodes(struct device_node *start, } EXPORT_SYMBOL_GPL(pci_traverse_device_nodes); -static struct pci_dn *pci_dn_next_one(struct pci_dn *root, - struct pci_dn *pdn) -{ - struct list_head *next = pdn->child_list.next; - - if (next != &pdn->child_list) - return list_entry(next, struct pci_dn, list); - - while (1) { - if (pdn == root) - return NULL; - - next = pdn->list.next; - if (next != &pdn->parent->child_list) - break; - - pdn = pdn->parent; - } - - return list_entry(next, struct pci_dn, list); -} - -void *traverse_pci_dn(struct pci_dn *root, - void *(*fn)(struct pci_dn *, void *), - void *data) -{ - struct pci_dn *pdn = root; - void *ret; - - /* Only scan the child nodes */ - for (pdn = pci_dn_next_one(root, pdn); pdn; - pdn = pci_dn_next_one(root, pdn)) { - ret = fn(pdn, data); - if (ret) - return ret; - } - - return NULL; -} - static void *add_pdn(struct device_node *dn, void *data) { struct pci_controller *hose = data; @@ -521,28 +481,6 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) pci_traverse_device_nodes(dn, add_pdn, phb); } -/** - * pci_devs_phb_init - Initialize phbs and pci devs under them. - * - * This routine walks over all phb's (pci-host bridges) on the - * system, and sets up assorted pci-related structures - * (including pci info in the device node structs) for each - * pci device found underneath. This routine runs once, - * early in the boot sequence. - */ -static int __init pci_devs_phb_init(void) -{ - struct pci_controller *phb, *tmp; - - /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) - pci_devs_phb_init_dynamic(phb); - - return 0; -} - -core_initcall(pci_devs_phb_init); - static void pci_dev_pdn_setup(struct pci_dev *pdev) { struct pci_dn *pdn; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a66f435dabbf..3231c2df9e26 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -41,6 +41,7 @@ #include <linux/pkeys.h> #include <linux/seq_buf.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/mmu.h> @@ -659,11 +660,10 @@ static void do_break_handler(struct pt_regs *regs) } } -void do_break (struct pt_regs *regs, unsigned long address, - unsigned long error_code) +DEFINE_INTERRUPT_HANDLER(do_break) { current->thread.trap_nr = TRAP_HWBKPT; - if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr, 11, SIGSEGV) == NOTIFY_STOP) return; @@ -681,7 +681,7 @@ void do_break (struct pt_regs *regs, unsigned long address, do_break_handler(regs); /* Deliver the signal to userspace */ - force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)regs->dar); } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -1670,7 +1670,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, /* Copy registers */ sp -= sizeof(struct pt_regs); childregs = (struct pt_regs *) sp; - if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->gpr[1] = sp + sizeof(struct pt_regs); @@ -2047,6 +2047,9 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, unsigned long stack_page; unsigned long cpu = task_cpu(p); + if (!paca_ptrs) + return 0; + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; @@ -2176,7 +2179,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, * See if this is an exception frame. * We look for the "regshere" marker in the current frame. */ - if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE) + if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index ae3c41730367..9a4797d1d40d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -707,7 +707,7 @@ static void __init save_fscr_to_task(void) init_task.thread.fscr = mfspr(SPRN_FSCR); } #else -static inline void save_fscr_to_task(void) {}; +static inline void save_fscr_to_task(void) {} #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index e9d4eb6144e1..ccf77b985c8f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1331,14 +1331,10 @@ static void __init prom_check_platform_support(void) if (prop_len > sizeof(vec)) prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n", prop_len); - prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", - &vec, sizeof(vec)); - for (i = 0; i < sizeof(vec); i += 2) { - prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 - , vec[i] - , vec[i + 1]); - prom_parse_platform_support(vec[i], vec[i + 1], - &supported); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], &supported); } } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 3d44b73adb83..4f3d4ff3728c 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -262,8 +262,6 @@ long do_syscall_trace_enter(struct pt_regs *regs) { u32 flags; - user_exit(); - flags = READ_ONCE(current_thread_info()->flags) & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); @@ -340,8 +338,6 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); - - user_enter(); } void __init pt_regs_check(void); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 71f38e9248be..bee984b1887b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -64,6 +64,7 @@ #include <asm/mmu_context.h> #include <asm/cpu_has_feature.h> #include <asm/kasan.h> +#include <asm/mce.h> #include "setup.h" @@ -237,18 +238,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) maj = (pvr >> 8) & 0xFF; min = pvr & 0xFF; - seq_printf(m, "processor\t: %lu\n", cpu_id); - seq_printf(m, "cpu\t\t: "); + seq_printf(m, "processor\t: %lu\ncpu\t\t: ", cpu_id); if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name) - seq_printf(m, "%s", cur_cpu_spec->cpu_name); + seq_puts(m, cur_cpu_spec->cpu_name); else seq_printf(m, "unknown (%08x)", pvr); if (cpu_has_feature(CPU_FTR_ALTIVEC)) - seq_printf(m, ", altivec supported"); + seq_puts(m, ", altivec supported"); - seq_printf(m, "\n"); + seq_putc(m, '\n'); #ifdef CONFIG_TAU if (cpu_has_feature(CPU_FTR_TAU)) { @@ -327,7 +327,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100); - seq_printf(m, "\n"); + seq_putc(m, '\n'); /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) @@ -938,6 +938,7 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + mce_init(); smp_release_cpus(); initmem_init(); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2dd0d9cb5a20..84058bbc8fe9 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -14,31 +14,31 @@ void irqstack_early_init(void); #ifdef CONFIG_PPC32 void setup_power_save(void); #else -static inline void setup_power_save(void) { }; +static inline void setup_power_save(void) { } #endif #if defined(CONFIG_PPC64) && defined(CONFIG_SMP) void check_smt_enabled(void); #else -static inline void check_smt_enabled(void) { }; +static inline void check_smt_enabled(void) { } #endif #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) void setup_tlb_core_data(void); #else -static inline void setup_tlb_core_data(void) { }; +static inline void setup_tlb_core_data(void) { } #endif #if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) void exc_lvl_early_init(void); #else -static inline void exc_lvl_early_init(void) { }; +static inline void exc_lvl_early_init(void) { } #endif #if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK) void emergency_stack_init(void); #else -static inline void emergency_stack_init(void) { }; +static inline void emergency_stack_init(void) { } #endif #ifdef CONFIG_PPC64 @@ -55,7 +55,7 @@ extern unsigned long spr_default_dscr; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE void kvm_cma_reserve(void); #else -static inline void kvm_cma_reserve(void) { }; +static inline void kvm_cma_reserve(void) { } #endif #ifdef CONFIG_TAU diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c28e949cc222..560ed8b975e7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -67,6 +67,7 @@ #include <asm/kup.h> #include <asm/early_ioremap.h> #include <asm/pgalloc.h> +#include <asm/asm-prototypes.h> #include "setup.h" @@ -258,7 +259,7 @@ static void cpu_ready_for_interrupts(void) unsigned long spr_default_dscr = 0; -void __init record_spr_defaults(void) +static void __init record_spr_defaults(void) { if (early_cpu_has_feature(CPU_FTR_DSCR)) spr_default_dscr = mfspr(SPRN_DSCR); @@ -1008,7 +1009,7 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } -void entry_flush_enable(bool enable) +static void entry_flush_enable(bool enable) { if (enable) { do_entry_flush_fixups(enabled_flush_types); @@ -1020,7 +1021,7 @@ void entry_flush_enable(bool enable) entry_flush = enable; } -void uaccess_flush_enable(bool enable) +static void uaccess_flush_enable(bool enable) { if (enable) { do_uaccess_flush_fixups(enabled_flush_types); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 53782aa60ade..9ded046edb0e 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -282,8 +282,6 @@ static void do_signal(struct task_struct *tsk) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { - user_exit(); - if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); @@ -299,8 +297,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); } - - user_enter(); } static unsigned long get_tm_stackpointer(struct task_struct *tsk) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 934cbdf6dd10..75ee918a120a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -929,8 +929,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; regs->nip = (unsigned long)ksig->ka.sa.sa_handler; - /* enter the signal handler in big-endian mode */ + /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; + regs->msr |= (MSR_KERNEL & MSR_LE); return 0; failed: diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9e2246e80efd..5a4d59a1070d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -34,6 +34,7 @@ #include <linux/random.h> #include <linux/stackprotector.h> #include <linux/pgtable.h> +#include <linux/clockchips.h> #include <asm/ptrace.h> #include <linux/atomic.h> @@ -576,7 +577,7 @@ void tick_broadcast(const struct cpumask *mask) #endif #ifdef CONFIG_DEBUGGER -void debugger_ipi_callback(struct pt_regs *regs) +static void debugger_ipi_callback(struct pt_regs *regs) { debugger_ipi(regs); } diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index d36c6391eaf5..16ff0399a257 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -59,57 +59,64 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len, /* * long long munging: * The 32 bit ABI passes long longs in an odd even register pair. + * High and low parts are swapped depending on endian mode, + * so define a macro (similar to mips linux32) to handle that. */ +#ifdef __LITTLE_ENDIAN__ +#define merge_64(low, high) ((u64)high << 32) | low +#else +#define merge_64(high, low) ((u64)high << 32) | low +#endif compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, - u32 reg6, u32 poshi, u32 poslo) + u32 reg6, u32 pos1, u32 pos2) { - return ksys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); + return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2)); } compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, - u32 reg6, u32 poshi, u32 poslo) + u32 reg6, u32 pos1, u32 pos2) { - return ksys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); + return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2)); } -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) +compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count) { - return ksys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); + return ksys_readahead(fd, merge_64(offset1, offset2), count); } asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, - unsigned long high, unsigned long low) + unsigned long len1, unsigned long len2) { - return ksys_truncate(path, (high << 32) | low); + return ksys_truncate(path, merge_64(len1, len2)); } -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, - u32 lenhi, u32 lenlo) +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, + u32 len1, u32 len2) { - return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, - ((loff_t)lenhi << 32) | lenlo); + return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2, + merge_64(len1, len2)); } -asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, - unsigned long low) +asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1, + unsigned long len2) { - return ksys_ftruncate(fd, (high << 32) | low); + return ksys_ftruncate(fd, merge_64(len1, len2)); } -long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, +long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, size_t len, int advice) { - return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, len, + return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len, advice); } asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags, - unsigned offset_hi, unsigned offset_lo, - unsigned nbytes_hi, unsigned nbytes_lo) + unsigned offset1, unsigned offset2, + unsigned nbytes1, unsigned nbytes2) { - loff_t offset = ((loff_t)offset_hi << 32) | offset_lo; - loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo; + loff_t offset = merge_64(offset1, offset2); + loff_t nbytes = merge_64(nbytes1, nbytes2); return ksys_sync_file_range(fd, offset, nbytes, flags); } diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile index 27b48954808d..9e3be295dbba 100644 --- a/arch/powerpc/kernel/syscalls/Makefile +++ b/arch/powerpc/kernel/syscalls/Makefile @@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') -syscall := $(srctree)/$(src)/syscall.tbl +syscall := $(src)/syscall.tbl syshdr := $(srctree)/$(src)/syscallhdr.sh systbl := $(srctree)/$(src)/syscalltbl.sh @@ -22,31 +22,31 @@ quiet_cmd_systbl = SYSTBL $@ '$(systbl_offset_$(basetarget))' syshdr_abis_unistd_32 := common,nospu,32 -$(uapi)/unistd_32.h: $(syscall) $(syshdr) +$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) syshdr_abis_unistd_64 := common,nospu,64 -$(uapi)/unistd_64.h: $(syscall) $(syshdr) +$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) systbl_abis_syscall_table_32 := common,nospu,32 systbl_abi_syscall_table_32 := 32 -$(kapi)/syscall_table_32.h: $(syscall) $(systbl) +$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) systbl_abis_syscall_table_64 := common,nospu,64 systbl_abi_syscall_table_64 := 64 -$(kapi)/syscall_table_64.h: $(syscall) $(systbl) +$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) systbl_abis_syscall_table_c32 := common,nospu,32 systbl_abi_syscall_table_c32 := c32 -$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) +$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) systbl_abis_syscall_table_spu := common,spu systbl_abi_syscall_table_spu := spu -$(kapi)/syscall_table_spu.h: $(syscall) $(systbl) +$(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) uapisyshdr-y += unistd_32.h unistd_64.h @@ -55,9 +55,10 @@ kapisyshdr-y += syscall_table_32.h \ syscall_table_c32.h \ syscall_table_spu.h -targets += $(uapisyshdr-y) $(kapisyshdr-y) +uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) +kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y)) +targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y)) PHONY += all -all: $(addprefix $(uapi)/,$(uapisyshdr-y)) -all: $(addprefix $(kapi)/,$(kapisyshdr-y)) +all: $(uapisyshdr-y) $(kapisyshdr-y) @: diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index f744eb5cba88..0b2480cf3e47 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -9,9 +9,7 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 32 fork ppc_fork sys_fork -2 64 fork sys_fork -2 spu fork sys_ni_syscall +2 nospu fork sys_fork 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -160,9 +158,7 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 32 clone ppc_clone sys_clone -120 64 clone sys_clone -120 spu clone sys_ni_syscall +120 nospu clone sys_clone 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -244,9 +240,7 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 32 vfork ppc_vfork sys_vfork -189 64 vfork sys_vfork -189 spu vfork sys_ni_syscall +189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -322,9 +316,7 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext compat_sys_swapcontext -249 64 swapcontext sys_swapcontext -249 spu swapcontext sys_ni_syscall +249 nospu swapcontext sys_swapcontext compat_sys_swapcontext 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 251 64 utimes sys_utimes @@ -522,12 +514,11 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 32 clone3 ppc_clone3 sys_clone3 -435 64 clone3 sys_clone3 -435 spu clone3 sys_ni_syscall +435 nospu clone3 sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 0b4694b8d248..6c31af7f4fa8 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -22,6 +22,7 @@ #include <linux/delay.h> #include <linux/workqueue.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/reg.h> #include <asm/nvram.h> @@ -100,16 +101,13 @@ static void TAUupdate(int cpu) * with interrupts disabled */ -void TAUException(struct pt_regs * regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException) { int cpu = smp_processor_id(); - irq_enter(); tau[cpu].interrupts++; TAUupdate(cpu); - - irq_exit(); } #endif /* CONFIG_TAU_INT */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 67feb3524460..b67d93a609a2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,9 +53,11 @@ #include <linux/of_clk.h> #include <linux/suspend.h> #include <linux/sched/cputime.h> +#include <linux/sched/clock.h> #include <linux/processor.h> #include <asm/trace.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/nvram.h> #include <asm/cache.h> @@ -570,7 +572,7 @@ void arch_irq_work_raise(void) * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. */ -void timer_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) { struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); @@ -609,7 +611,7 @@ void timer_interrupt(struct pt_regs *regs) #endif old_regs = set_irq_regs(regs); - irq_enter(); + trace_timer_interrupt_entry(regs); if (test_irq_work_pending()) { @@ -634,7 +636,7 @@ void timer_interrupt(struct pt_regs *regs) } trace_timer_interrupt_exit(regs); - irq_exit(); + set_irq_regs(old_regs); } EXPORT_SYMBOL(timer_interrupt); @@ -1030,6 +1032,7 @@ void __init time_init(void) tick_setup_hrtimer_broadcast(); of_clk_init(NULL); + enable_sched_clock_irqtime(); } /* diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3ec7b443fe6b..1583fd1c6010 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -41,6 +41,7 @@ #include <asm/emulated_ops.h> #include <linux/uaccess.h> #include <asm/debugfs.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/machdep.h> #include <asm/rtas.h> @@ -342,8 +343,8 @@ static bool exception_common(int signr, struct pt_regs *regs, int code, show_signal_msg(signr, regs, code, addr); - if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs)) - local_irq_enable(); + if (arch_irqs_disabled()) + interrupt_cond_local_irq_enable(regs); current->thread.trap_nr = code; @@ -430,16 +431,10 @@ nonrecoverable: regs->msr &= ~MSR_RI; #endif } - -void system_reset_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) { unsigned long hsrr0, hsrr1; bool saved_hsrrs = false; - u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); - - this_cpu_set_ftrace_enabled(0); - - nmi_enter(); /* * System reset can interrupt code where HSRRs are live and MSR[RI]=1. @@ -503,19 +498,20 @@ out: die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) + if (!(regs->msr & MSR_RI)) { + /* For the reason explained in die_mce, nmi_exit before die */ + nmi_exit(); die("Unrecoverable System Reset", regs, SIGABRT); + } if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); mtspr(SPRN_HSRR1, hsrr1); } - nmi_exit(); - - this_cpu_set_ftrace_enabled(ftrace_enabled); - /* What should we do here? We could issue a shutdown or hard reset. */ + + return 0; } /* @@ -788,23 +784,33 @@ int machine_check_generic(struct pt_regs *regs) } #endif /* everything else */ -void machine_check_exception(struct pt_regs *regs) +void die_mce(const char *str, struct pt_regs *regs, long err) { - int recover = 0; - /* - * BOOK3S_64 does not call this handler as a non-maskable interrupt - * (it uses its own early real-mode handler to handle the MCE proper - * and then raises irq_work to call this handler when interrupts are - * enabled). - * - * This is silly. The BOOK3S_64 should just call a different function - * rather than expecting semantics to magically change. Something - * like 'non_nmi_machine_check_exception()', perhaps? + * The machine check wants to kill the interrupted context, but + * do_exit() checks for in_interrupt() and panics in that case, so + * exit the irq/nmi before calling die. */ - const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64); + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + irq_exit(); + else + nmi_exit(); + die(str, regs, err); +} - if (nmi) nmi_enter(); +/* + * BOOK3S_64 does not call this handler as a non-maskable interrupt + * (it uses its own early real-mode handler to handle the MCE proper + * and then raises irq_work to call this handler when interrupts are + * enabled). + */ +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception) +#else +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) +#endif +{ + int recover = 0; __this_cpu_inc(irq_stat.mce_exceptions); @@ -830,21 +836,21 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (nmi) nmi_exit(); - - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); +bail: /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - die("Unrecoverable Machine check", regs, SIGBUS); + die_mce("Unrecoverable Machine check", regs, SIGBUS); +#ifdef CONFIG_PPC_BOOK3S_64 return; - -bail: - if (nmi) nmi_exit(); +#else + return 0; +#endif } -void SMIException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */ { die("System Management Interrupt", regs, SIGABRT); } @@ -1030,12 +1036,11 @@ static void p9_hmi_special_emu(struct pt_regs *regs) } #endif /* CONFIG_VSX */ -void handle_hmi_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception) { struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); #ifdef CONFIG_VSX /* Real mode flagged P9 special emu is needed */ @@ -1055,46 +1060,42 @@ void handle_hmi_exception(struct pt_regs *regs) if (ppc_md.handle_hmi_exception) ppc_md.handle_hmi_exception(regs); - irq_exit(); set_irq_regs(old_regs); } -void unknown_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(unknown_exception) { - enum ctx_state prev_state = exception_enter(); - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, TRAP_UNK, 0); - - exception_exit(prev_state); } -void instruction_breakpoint_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) { - enum ctx_state prev_state = exception_enter(); + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + _exception(SIGTRAP, regs, TRAP_UNK, 0); +} + +DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) +{ if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; if (debugger_iabr_match(regs)) - goto bail; + return; _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - -bail: - exception_exit(prev_state); } -void RunModeException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(RunModeException) { _exception(SIGTRAP, regs, TRAP_UNK, 0); } -void single_step_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(single_step_exception) { - enum ctx_state prev_state = exception_enter(); - clear_single_step(regs); clear_br_trace(regs); @@ -1103,16 +1104,12 @@ void single_step_exception(struct pt_regs *regs) if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; if (debugger_sstep(regs)) - goto bail; + return; _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); - -bail: - exception_exit(prev_state); } -NOKPROBE_SYMBOL(single_step_exception); /* * After we have successfully emulated an instruction, we have to @@ -1436,9 +1433,8 @@ static int emulate_math(struct pt_regs *regs) static inline int emulate_math(struct pt_regs *regs) { return -1; } #endif -void program_check_exception(struct pt_regs *regs) +static void do_program_check(struct pt_regs *regs) { - enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); /* We can now get here via a FP Unavailable exception if the core @@ -1447,22 +1443,22 @@ void program_check_exception(struct pt_regs *regs) if (reason & REASON_FP) { /* IEEE FP exception */ parse_fpe(regs); - goto bail; + return; } if (reason & REASON_TRAP) { unsigned long bugaddr; /* Debugger is first in line to stop recursive faults in * rcu_lock, notify_die, or atomic_notifier_call_chain */ if (debugger_bpt(regs)) - goto bail; + return; if (kprobe_handler(regs)) - goto bail; + return; /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; bugaddr = regs->nip; /* @@ -1474,10 +1470,10 @@ void program_check_exception(struct pt_regs *regs) if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - goto bail; + return; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - goto bail; + return; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1498,7 +1494,7 @@ void program_check_exception(struct pt_regs *regs) */ if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - goto bail; + return; } else { printk(KERN_EMERG "Unexpected TM Bad Thing exception " "at %lx (msr 0x%lx) tm_scratch=%llx\n", @@ -1518,9 +1514,7 @@ void program_check_exception(struct pt_regs *regs) if (!user_mode(regs)) goto sigill; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) @@ -1531,7 +1525,7 @@ void program_check_exception(struct pt_regs *regs) * pattern to occurrences etc. -dgibson 31/Mar/2003 */ if (!emulate_math(regs)) - goto bail; + return; /* Try to emulate it if we should. */ if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { @@ -1539,10 +1533,10 @@ void program_check_exception(struct pt_regs *regs) case 0: regs->nip += 4; emulate_single_step(regs); - goto bail; + return; case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - goto bail; + return; } } @@ -1552,34 +1546,31 @@ sigill: else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -bail: - exception_exit(prev_state); } -NOKPROBE_SYMBOL(program_check_exception); + +DEFINE_INTERRUPT_HANDLER(program_check_exception) +{ + do_program_check(regs); +} /* * This occurs when running in hypervisor mode on POWER6 or later * and an illegal instruction is encountered. */ -void emulation_assist_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { regs->msr |= REASON_ILLEGAL; - program_check_exception(regs); + do_program_check(regs); } -NOKPROBE_SYMBOL(emulation_assist_interrupt); -void alignment_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(alignment_exception) { - enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; unsigned long reason; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); reason = get_reason(regs); - if (reason & REASON_BOUNDARY) { sig = SIGBUS; code = BUS_ADRALN; @@ -1587,7 +1578,7 @@ void alignment_exception(struct pt_regs *regs) } if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) - goto bail; + return; /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) @@ -1597,7 +1588,7 @@ void alignment_exception(struct pt_regs *regs) /* skip over emulated instruction */ regs->nip += inst_length(reason); emulate_single_step(regs); - goto bail; + return; } /* Operand address was bad */ @@ -1612,13 +1603,10 @@ bad: if (user_mode(regs)) _exception(sig, regs, code, regs->dar); else - bad_page_fault(regs, regs->dar, sig); - -bail: - exception_exit(prev_state); + bad_page_fault(regs, sig); } -void StackOverflow(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(StackOverflow) { pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n", current->comm, task_pid_nr(current), regs->gpr[1]); @@ -1627,46 +1615,33 @@ void StackOverflow(struct pt_regs *regs) panic("kernel stack overflow"); } -void stack_overflow_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { - enum ctx_state prev_state = exception_enter(); - die("Kernel stack overflow", regs, SIGSEGV); - - exception_exit(prev_state); } -void kernel_fp_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception) { - enum ctx_state prev_state = exception_enter(); - printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); - - exception_exit(prev_state); } -void altivec_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception) { - enum ctx_state prev_state = exception_enter(); - if (user_mode(regs)) { /* A user program has executed an altivec instruction, but this kernel doesn't support altivec. */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - goto bail; + return; } printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); - -bail: - exception_exit(prev_state); } -void vsx_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) { if (user_mode(regs)) { /* A user program has executed an vsx instruction, @@ -1697,7 +1672,7 @@ static void tm_unavailable(struct pt_regs *regs) die("Unrecoverable TM Unavailable Exception", regs, SIGABRT); } -void facility_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) { static char *facility_strings[] = { [FSCR_FP_LG] = "FPU", @@ -1737,9 +1712,7 @@ void facility_unavailable_exception(struct pt_regs *regs) die("Unexpected facility unavailable exception", regs, SIGABRT); } - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); if (status == FSCR_DSCR_LG) { /* @@ -1817,7 +1790,7 @@ out: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void fp_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm) { /* Note: This does not handle any kind of FP laziness. */ @@ -1850,7 +1823,7 @@ void fp_unavailable_tm(struct pt_regs *regs) tm_recheckpoint(¤t->thread); } -void altivec_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm) { /* See the comments in fp_unavailable_tm(). This function operates * the same way. @@ -1865,7 +1838,7 @@ void altivec_unavailable_tm(struct pt_regs *regs) current->thread.used_vr = 1; } -void vsx_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm) { /* See the comments in fp_unavailable_tm(). This works similarly, * though we're loading both FP and VEC registers in here. @@ -1890,11 +1863,40 @@ void vsx_unavailable_tm(struct pt_regs *regs) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -void performance_monitor_exception(struct pt_regs *regs) +#ifdef CONFIG_PPC64 +DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); +DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi) { __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); + + return 0; +} +#endif + +DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); +DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async) +{ + __this_cpu_inc(irq_stat.pmu_irqs); + + perf_irq(regs); +} + +DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception) +{ + /* + * On 64-bit, if perf interrupts hit in a local_irq_disable + * (soft-masked) region, we consider them as NMIs. This is required to + * prevent hash faults on user addresses when reading callchains (and + * looks better from an irq tracing perspective). + */ + if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs))) + performance_monitor_exception_nmi(regs); + else + performance_monitor_exception_async(regs); + + return 0; } #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -1957,8 +1959,10 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } -void DebugException(struct pt_regs *regs, unsigned long debug_status) +DEFINE_INTERRUPT_HANDLER(DebugException) { + unsigned long debug_status = regs->dsisr; + current->thread.debug.dbsr = debug_status; /* Hack alert: On BookE, Branch Taken stops on the branch itself, while @@ -2024,11 +2028,10 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) } else handle_debug(regs, debug_status); } -NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #ifdef CONFIG_ALTIVEC -void altivec_assist_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) { int err; @@ -2062,9 +2065,10 @@ void altivec_assist_exception(struct pt_regs *regs) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_FSL_BOOKE -void CacheLockingException(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +DEFINE_INTERRUPT_HANDLER(CacheLockingException) { + unsigned long error_code = regs->dsisr; + /* We treat cache locking instructions from the user * as priv ops, in the future we could try to do * something smarter @@ -2076,7 +2080,7 @@ void CacheLockingException(struct pt_regs *regs, unsigned long address, #endif /* CONFIG_FSL_BOOKE */ #ifdef CONFIG_SPE -void SPEFloatingPointException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) { extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; @@ -2084,9 +2088,7 @@ void SPEFloatingPointException(struct pt_regs *regs) int code = FPE_FLTUNK; int err; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); flush_spe_to_thread(current); @@ -2128,14 +2130,12 @@ void SPEFloatingPointException(struct pt_regs *regs) return; } -void SPEFloatingPointRoundException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) { extern int speround_handler(struct pt_regs *regs); int err; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); preempt_disable(); if (regs->msr & MSR_SPE) @@ -2170,13 +2170,12 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -void unrecoverable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(unrecoverable_exception) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); die("Unrecoverable exception", regs, SIGABRT); } -NOKPROBE_SYMBOL(unrecoverable_exception); #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) /* @@ -2190,7 +2189,7 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) return; } -void WatchdogException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */ { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); WatchdogHandler(regs); @@ -2201,13 +2200,12 @@ void WatchdogException(struct pt_regs *regs) * We enter here if we discover during exception entry that we are * running in supervisor mode with a userspace value in the stack pointer. */ -void kernel_bad_stack(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(kernel_bad_stack) { printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", regs->gpr[1], regs->nip); die("Bad kernel stack pointer", regs, SIGABRT); } -NOKPROBE_SYMBOL(kernel_bad_stack); void __init trap_init(void) { diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 9cb6f524854b..7d9a6fee0e3d 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -30,7 +30,7 @@ CC32FLAGS += -m32 KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS)) endif -targets := $(obj-vdso32) vdso32.so.dbg +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n @@ -46,9 +46,6 @@ obj-y += vdso32_wrapper.o targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc -# Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so.dbg - # link rule for the .so file, .lds has to be first $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE $(call if_changed,vdso32ld_and_check) diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 3f5ef035b0a9..3f5ef035b0a9 100644 --- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index bf363ff37152..2813e3f98db6 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -17,7 +17,7 @@ endif # Build rules -targets := $(obj-vdso64) vdso64.so.dbg +targets := $(obj-vdso64) vdso64.so.dbg vgettimeofday.o obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n @@ -29,15 +29,9 @@ ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO64__ -s -obj-y += vdso64_wrapper.o targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) -$(obj)/vgettimeofday.o: %.o: %.c FORCE - -# Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so.dbg - # link rule for the .so file, .lds has to be first $(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE $(call if_changed,vdso64ld_and_check) diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index bbf68cd01088..2d4067561293 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -15,11 +15,20 @@ .text +/* + * __kernel_start_sigtramp_rt64 and __kernel_sigtramp_rt64 together + * are one function split in two parts. The kernel jumps to the former + * and the signal handler indirectly (by blr) returns to the latter. + * __kernel_sigtramp_rt64 needs to point to the return address so + * glibc can correctly identify the trampoline stack frame. + */ .balign 8 .balign IFETCH_ALIGN_BYTES -V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) +V_FUNCTION_BEGIN(__kernel_start_sigtramp_rt64) .Lsigrt_start: bctrl /* call the handler */ +V_FUNCTION_END(__kernel_start_sigtramp_rt64) +V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) addi r1, r1, __SIGNAL_FRAMESIZE li r0,__NR_rt_sigreturn sc diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 6164d1a1ba11..2f3c359cacd3 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -131,4 +131,4 @@ VERSION /* * Make the sigreturn code visible to the kernel. */ -VDSO_sigtramp_rt64 = __kernel_sigtramp_rt64; +VDSO_sigtramp_rt64 = __kernel_start_sigtramp_rt64; diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 1d56d81fe3b3..1d56d81fe3b3 100644 --- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index af3c15a1d41e..c9a8f4781a10 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -26,7 +26,9 @@ #include <linux/delay.h> #include <linux/smp.h> +#include <asm/interrupt.h> #include <asm/paca.h> +#include <asm/nmi.h> /* * The powerpc watchdog ensures that each CPU is able to service timers. @@ -247,16 +249,17 @@ static void watchdog_timer_interrupt(int cpu) watchdog_smp_panic(cpu, tb); } -void soft_nmi_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) { unsigned long flags; int cpu = raw_smp_processor_id(); u64 tb; - if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return; + /* should only arrive from kernel, with irqs disabled */ + WARN_ON_ONCE(!arch_irq_disabled_regs(regs)); - nmi_enter(); + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) + return 0; __this_cpu_inc(irq_stat.soft_nmi_irqs); @@ -265,7 +268,7 @@ void soft_nmi_interrupt(struct pt_regs *regs) wd_smp_lock(&flags); if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { wd_smp_unlock(&flags); - goto out; + return 0; } set_cpu_stuck(cpu, tb); @@ -289,8 +292,7 @@ void soft_nmi_interrupt(struct pt_regs *regs) if (wd_panic_timeout_tb < 0x7fffffff) mtspr(SPRN_DEC, wd_panic_timeout_tb); -out: - nmi_exit(); + return 0; } static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index d0e459bb2f05..9842e33533df 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -102,7 +102,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); } - fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt_size = kexec_fdt_totalsize_ppc64(image); fdt = kmalloc(fdt_size, GFP_KERNEL); if (!fdt) { pr_err("Not enough memory for the device tree.\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index c69bcf9b547a..02b9e4d0dc40 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -21,6 +21,7 @@ #include <linux/memblock.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <asm/setup.h> #include <asm/drmem.h> #include <asm/kexec_ranges.h> #include <asm/crashdump-ppc64.h> @@ -926,6 +927,40 @@ out: } /** + * kexec_fdt_totalsize_ppc64 - Return the estimated size needed to setup FDT + * for kexec/kdump kernel. + * @image: kexec image being loaded. + * + * Returns the estimated size needed for kexec/kdump kernel FDT. + */ +unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image) +{ + unsigned int fdt_size; + u64 usm_entries; + + /* + * The below estimate more than accounts for a typical kexec case where + * the additional space is to accommodate things like kexec cmdline, + * chosen node with properties for initrd start & end addresses and + * a property to indicate kexec boot.. + */ + fdt_size = fdt_totalsize(initial_boot_params) + (2 * COMMAND_LINE_SIZE); + if (image->type != KEXEC_TYPE_CRASH) + return fdt_size; + + /* + * For kdump kernel, also account for linux,usable-memory and + * linux,drconf-usable-memory properties. Get an approximate on the + * number of usable memory entries and use for FDT size estimation. + */ + usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) + + (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); + fdt_size += (unsigned int)(usm_entries * sizeof(u64)); + + return fdt_size; +} + +/** * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel * being loaded. * @image: kexec image being loaded. diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 549591d9aaa2..e45644657d49 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -54,6 +54,7 @@ config KVM_BOOK3S_32 select KVM select KVM_BOOK3S_32_HANDLER select KVM_BOOK3S_PR_POSSIBLE + select PPC_FPU help Support running unmodified book3s_32 guest kernels in virtual machines on book3s_32 host processors. diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 38ea396a23d6..bb6773594cf8 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -27,6 +27,7 @@ #include <asm/cputable.h> #include <asm/pte-walk.h> +#include "book3s.h" #include "trace_hv.h" //#define DEBUG_RESIZE_HPT 1 @@ -590,7 +591,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, } else { /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, &write_ok); + writing, &write_ok, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index bb35490400e9..e603de7ade52 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, upgrade_p); + writing, upgrade_p, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index b08cc15f31c7..fdb57be71aa6 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -61,10 +61,6 @@ #define SPRN_GQR6 918 #define SPRN_GQR7 919 -/* Book3S_32 defines mfsrin(v) - but that messes up our abstract - * function pointers, so let's just disable the define. */ -#undef mfsrin - enum priv_level { PRIV_PROBLEM = 0, PRIV_SUPER = 1, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6f612d240392..13bad6bf4c95 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -53,6 +53,7 @@ #include <asm/cputable.h> #include <asm/cacheflush.h> #include <linux/uaccess.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -134,7 +135,7 @@ static inline bool nesting_enabled(struct kvm *kvm) } /* If set, the threads on each CPU core have to be in the same MMU mode */ -static bool no_mixing_hpt_and_radix; +static bool no_mixing_hpt_and_radix __read_mostly; static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -782,8 +783,24 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, return H_UNSUPPORTED_FLAG_START; if (value2 & DABRX_HYP) return H_P4; - vcpu->arch.dawr = value1; - vcpu->arch.dawrx = value2; + vcpu->arch.dawr0 = value1; + vcpu->arch.dawrx0 = value2; + return H_SUCCESS; + case H_SET_MODE_RESOURCE_SET_DAWR1: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (!ppc_breakpoint_available()) + return H_P2; + if (!cpu_has_feature(CPU_FTR_DAWR1)) + return H_P2; + if (!vcpu->kvm->arch.dawr1_enabled) + return H_FUNCTION; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + if (value2 & DABRX_HYP) + return H_P4; + vcpu->arch.dawr1 = value1; + vcpu->arch.dawrx1 = value2; return H_SUCCESS; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: /* KVM does not support mflags=2 (AIL=2) */ @@ -1759,10 +1776,16 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->vtb); break; case KVM_REG_PPC_DAWR: - *val = get_reg_val(id, vcpu->arch.dawr); + *val = get_reg_val(id, vcpu->arch.dawr0); break; case KVM_REG_PPC_DAWRX: - *val = get_reg_val(id, vcpu->arch.dawrx); + *val = get_reg_val(id, vcpu->arch.dawrx0); + break; + case KVM_REG_PPC_DAWR1: + *val = get_reg_val(id, vcpu->arch.dawr1); + break; + case KVM_REG_PPC_DAWRX1: + *val = get_reg_val(id, vcpu->arch.dawrx1); break; case KVM_REG_PPC_CIABR: *val = get_reg_val(id, vcpu->arch.ciabr); @@ -1991,10 +2014,16 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.vcore->vtb = set_reg_val(id, *val); break; case KVM_REG_PPC_DAWR: - vcpu->arch.dawr = set_reg_val(id, *val); + vcpu->arch.dawr0 = set_reg_val(id, *val); break; case KVM_REG_PPC_DAWRX: - vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP; + vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP; + break; + case KVM_REG_PPC_DAWR1: + vcpu->arch.dawr1 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DAWRX1: + vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP; break; case KVM_REG_PPC_CIABR: vcpu->arch.ciabr = set_reg_val(id, *val); @@ -2862,11 +2891,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm) return false; - /* Some POWER9 chips require all threads to be in the same MMU mode */ - if (no_mixing_hpt_and_radix && - kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) - return false; - if (n_threads < cip->max_subcore_threads) n_threads = cip->max_subcore_threads; if (!subcore_config_ok(cip->n_subcores + 1, n_threads)) @@ -2905,6 +2929,9 @@ static void prepare_threads(struct kvmppc_vcore *vc) for_each_runnable_thread(i, vcpu, vc) { if (signal_pending(vcpu->arch.run_task)) vcpu->arch.ret = -EINTR; + else if (no_mixing_hpt_and_radix && + kvm_is_radix(vc->kvm) != radix_enabled()) + vcpu->arch.ret = -EINVAL; else if (vcpu->arch.vpa.update_pending || vcpu->arch.slb_shadow.update_pending || vcpu->arch.dtl.update_pending) @@ -3110,7 +3137,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) int controlled_threads; int trap; bool is_power8; - bool hpt_on_radix; /* * Remove from the list any threads that have a signal pending @@ -3143,11 +3169,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * this is a HPT guest on a radix host machine where the * CPU threads may not be in different MMU modes. */ - hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() && - !kvm_is_radix(vc->kvm); - if (((controlled_threads > 1) && - ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) || - (hpt_on_radix && vc->kvm->arch.threads_indep)) { + if ((controlled_threads > 1) && + ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); @@ -3215,7 +3238,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S) && !cpu_has_feature(CPU_FTR_ARCH_300); - if (split > 1 || hpt_on_radix) { + if (split > 1) { sip = &split_info; memset(&split_info, 0, sizeof(split_info)); for (sub = 0; sub < core_info.n_subcores; ++sub) @@ -3237,13 +3260,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) split_info.subcore_size = subcore_size; } else { split_info.subcore_size = 1; - if (hpt_on_radix) { - /* Use the split_info for LPCR/LPIDR changes */ - split_info.lpcr_req = vc->lpcr; - split_info.lpidr_req = vc->kvm->arch.lpid; - split_info.host_lpcr = vc->kvm->arch.host_lpcr; - split_info.do_set = 1; - } } /* order writes to split_info before kvm_split_mode pointer */ @@ -3253,7 +3269,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) for (thr = 0; thr < controlled_threads; ++thr) { struct paca_struct *paca = paca_ptrs[pcpu + thr]; - paca->kvm_hstate.tid = thr; paca->kvm_hstate.napping = 0; paca->kvm_hstate.kvm_split_mode = sip; } @@ -3327,10 +3342,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * When doing micro-threading, poke the inactive threads as well. * This gets them to the nap instruction after kvm_do_nap, * which reduces the time taken to unsplit later. - * For POWER9 HPT guest on radix host, we need all the secondary - * threads woken up so they can do the LPCR/LPIDR change. */ - if (cmd_bit || hpt_on_radix) { + if (cmd_bit) { split_info.do_nap = 1; /* ask secondaries to nap when done */ for (thr = 1; thr < threads_per_subcore; ++thr) if (!(active & (1 << thr))) @@ -3391,24 +3404,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) cpu_relax(); ++loops; } - } else if (hpt_on_radix) { - /* Wait for all threads to have seen final sync */ - for (thr = 1; thr < controlled_threads; ++thr) { - struct paca_struct *paca = paca_ptrs[pcpu + thr]; - - while (paca->kvm_hstate.kvm_split_mode) { - HMT_low(); - barrier(); - } - HMT_medium(); - } + split_info.do_nap = 0; } - split_info.do_nap = 0; kvmppc_set_host_core(pcpu); + guest_exit_irqoff(); + local_irq_enable(); - guest_exit(); /* Let secondaries go back to the offline loop */ for (i = 0; i < controlled_threads; ++i) { @@ -3449,10 +3452,17 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, int trap; unsigned long host_hfscr = mfspr(SPRN_HFSCR); unsigned long host_ciabr = mfspr(SPRN_CIABR); - unsigned long host_dawr = mfspr(SPRN_DAWR0); - unsigned long host_dawrx = mfspr(SPRN_DAWRX0); + unsigned long host_dawr0 = mfspr(SPRN_DAWR0); + unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0); unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); + unsigned long host_dawr1 = 0; + unsigned long host_dawrx1 = 0; + + if (cpu_has_feature(CPU_FTR_DAWR1)) { + host_dawr1 = mfspr(SPRN_DAWR1); + host_dawrx1 = mfspr(SPRN_DAWRX1); + } /* * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0, @@ -3489,8 +3499,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_SPURR, vcpu->arch.spurr); if (dawr_enabled()) { - mtspr(SPRN_DAWR0, vcpu->arch.dawr); - mtspr(SPRN_DAWRX0, vcpu->arch.dawrx); + mtspr(SPRN_DAWR0, vcpu->arch.dawr0); + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + mtspr(SPRN_DAWR1, vcpu->arch.dawr1); + mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); + } } mtspr(SPRN_CIABR, vcpu->arch.ciabr); mtspr(SPRN_IC, vcpu->arch.ic); @@ -3542,8 +3556,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); mtspr(SPRN_HFSCR, host_hfscr); mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR0, host_dawr); - mtspr(SPRN_DAWRX0, host_dawrx); + mtspr(SPRN_DAWR0, host_dawr0); + mtspr(SPRN_DAWRX0, host_dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + mtspr(SPRN_DAWR1, host_dawr1); + mtspr(SPRN_DAWRX1, host_dawrx1); + } mtspr(SPRN_PID, host_pidr); /* @@ -3595,6 +3613,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long host_tidr = mfspr(SPRN_TIDR); unsigned long host_iamr = mfspr(SPRN_IAMR); unsigned long host_amr = mfspr(SPRN_AMR); + unsigned long host_fscr = mfspr(SPRN_FSCR); s64 dec; u64 tb; int trap, save_pmu; @@ -3735,6 +3754,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (host_amr != vcpu->arch.amr) mtspr(SPRN_AMR, host_amr); + if (host_fscr != vcpu->arch.fscr) + mtspr(SPRN_FSCR, host_fscr); + msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); store_fp_state(&vcpu->arch.fp); #ifdef CONFIG_ALTIVEC @@ -4173,7 +4195,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_clear_host_core(pcpu); - local_paca->kvm_hstate.tid = 0; local_paca->kvm_hstate.napping = 0; local_paca->kvm_hstate.kvm_split_mode = NULL; kvmppc_start_thread(vcpu, vc); @@ -4217,8 +4238,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_set_host_core(pcpu); + guest_exit_irqoff(); + local_irq_enable(); - guest_exit(); cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest); @@ -4358,15 +4380,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) do { /* - * The early POWER9 chips that can't mix radix and HPT threads - * on the same core also need the workaround for the problem - * where the TLB would prefetch entries in the guest exit path - * for radix guests using the guest PIDR value and LPID 0. - * The workaround is in the old path (kvmppc_run_vcpu()) - * but not the new path (kvmhv_run_single_vcpu()). + * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu + * path, which also handles hash and dependent threads mode. */ if (kvm->arch.threads_indep && kvm_is_radix(kvm) && - !no_mixing_hpt_and_radix) + !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) r = kvmhv_run_single_vcpu(vcpu, ~(u64)0, vcpu->arch.vcore->lpcr); else @@ -5599,6 +5617,26 @@ out: return ret; } +static int kvmhv_enable_dawr1(struct kvm *kvm) +{ + if (!cpu_has_feature(CPU_FTR_DAWR1)) + return -ENODEV; + + /* kvm == NULL means the caller is testing if the capability exists */ + if (kvm) + kvm->arch.dawr1_enabled = true; + return 0; +} + +static bool kvmppc_hash_v3_possible(void) +{ + if (radix_enabled() && no_mixing_hpt_and_radix) + return false; + + return cpu_has_feature(CPU_FTR_ARCH_300) && + cpu_has_feature(CPU_FTR_HVMODE); +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -5642,6 +5680,8 @@ static struct kvmppc_ops kvm_ops_hv = { .store_to_eaddr = kvmhv_store_to_eaddr, .enable_svm = kvmhv_enable_svm, .svm_off = kvmhv_svm_off, + .enable_dawr1 = kvmhv_enable_dawr1, + .hash_v3_possible = kvmppc_hash_v3_possible, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8053efdf7ea7..158d309b42a3 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -17,6 +17,7 @@ #include <asm/asm-prototypes.h> #include <asm/cputable.h> +#include <asm/interrupt.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> #include <asm/archrandom.h> @@ -277,8 +278,7 @@ void kvmhv_commence_exit(int trap) struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; int ptid = local_paca->kvm_hstate.ptid; struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode; - int me, ee, i, t; - int cpu0; + int me, ee, i; /* Set our bit in the threads-exiting-guest map in the 0xff00 bits of vcore->entry_exit_map */ @@ -320,22 +320,6 @@ void kvmhv_commence_exit(int trap) if ((ee >> 8) == 0) kvmhv_interrupt_vcore(vc, ee); } - - /* - * On POWER9 when running a HPT guest on a radix host (sip != NULL), - * we have to interrupt inactive CPU threads to get them to - * restore the host LPCR value. - */ - if (sip->lpcr_req) { - if (cmpxchg(&sip->do_restore, 0, 1) == 0) { - vc = local_paca->kvm_hstate.kvm_vcore; - cpu0 = vc->pcpu + ptid - local_paca->kvm_hstate.tid; - for (t = 1; t < threads_per_core; ++t) { - if (sip->napped[t]) - kvmhv_rm_send_ipi(cpu0 + t); - } - } - } } struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; @@ -667,95 +651,6 @@ void kvmppc_bad_interrupt(struct pt_regs *regs) panic("Bad KVM trap"); } -/* - * Functions used to switch LPCR HR and UPRT bits on all threads - * when entering and exiting HPT guests on a radix host. - */ - -#define PHASE_REALMODE 1 /* in real mode */ -#define PHASE_SET_LPCR 2 /* have set LPCR */ -#define PHASE_OUT_OF_GUEST 4 /* have finished executing in guest */ -#define PHASE_RESET_LPCR 8 /* have reset LPCR to host value */ - -#define ALL(p) (((p) << 24) | ((p) << 16) | ((p) << 8) | (p)) - -static void wait_for_sync(struct kvm_split_mode *sip, int phase) -{ - int thr = local_paca->kvm_hstate.tid; - - sip->lpcr_sync.phase[thr] |= phase; - phase = ALL(phase); - while ((sip->lpcr_sync.allphases & phase) != phase) { - HMT_low(); - barrier(); - } - HMT_medium(); -} - -void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) -{ - int num_sets; - unsigned long rb, set; - - /* wait for every other thread to get to real mode */ - wait_for_sync(sip, PHASE_REALMODE); - - /* Set LPCR and LPIDR */ - mtspr(SPRN_LPCR, sip->lpcr_req); - mtspr(SPRN_LPID, sip->lpidr_req); - isync(); - - /* - * P10 will flush all the congruence class with a single tlbiel - */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - num_sets = 1; - else - num_sets = POWER9_TLB_SETS_RADIX; - - /* Invalidate the TLB on thread 0 */ - if (local_paca->kvm_hstate.tid == 0) { - sip->do_set = 0; - asm volatile("ptesync" : : : "memory"); - for (set = 0; set < num_sets; ++set) { - rb = TLBIEL_INVAL_SET_LPID + - (set << TLBIEL_INVAL_SET_SHIFT); - asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : : - "r" (rb), "r" (0)); - } - asm volatile("ptesync" : : : "memory"); - } - - /* indicate that we have done so and wait for others */ - wait_for_sync(sip, PHASE_SET_LPCR); - /* order read of sip->lpcr_sync.allphases vs. sip->do_set */ - smp_rmb(); -} - -/* - * Called when a thread that has been in the guest needs - * to reload the host LPCR value - but only on POWER9 when - * running a HPT guest on a radix host. - */ -void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) -{ - /* we're out of the guest... */ - wait_for_sync(sip, PHASE_OUT_OF_GUEST); - - mtspr(SPRN_LPID, 0); - mtspr(SPRN_LPCR, sip->host_lpcr); - isync(); - - if (local_paca->kvm_hstate.tid == 0) { - sip->do_restore = 0; - smp_wmb(); /* order store of do_restore vs. phase */ - } - - wait_for_sync(sip, PHASE_RESET_LPCR); - smp_mb(); - local_paca->kvm_hstate.kvm_split_mode = NULL; -} - static void kvmppc_end_cede(struct kvm_vcpu *vcpu) { vcpu->arch.ceded = 0; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 33b58549a9aa..0cd0e7aad588 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -33,8 +33,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) hr->dpdes = vc->dpdes; hr->hfscr = vcpu->arch.hfscr; hr->tb_offset = vc->tb_offset; - hr->dawr0 = vcpu->arch.dawr; - hr->dawrx0 = vcpu->arch.dawrx; + hr->dawr0 = vcpu->arch.dawr0; + hr->dawrx0 = vcpu->arch.dawrx0; hr->ciabr = vcpu->arch.ciabr; hr->purr = vcpu->arch.purr; hr->spurr = vcpu->arch.spurr; @@ -49,6 +49,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) hr->pidr = vcpu->arch.pid; hr->cfar = vcpu->arch.cfar; hr->ppr = vcpu->arch.ppr; + hr->dawr1 = vcpu->arch.dawr1; + hr->dawrx1 = vcpu->arch.dawrx1; } static void byteswap_pt_regs(struct pt_regs *regs) @@ -91,6 +93,8 @@ static void byteswap_hv_regs(struct hv_guest_state *hr) hr->pidr = swab64(hr->pidr); hr->cfar = swab64(hr->cfar); hr->ppr = swab64(hr->ppr); + hr->dawr1 = swab64(hr->dawr1); + hr->dawrx1 = swab64(hr->dawrx1); } static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, @@ -138,6 +142,7 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) /* Don't let data address watchpoint match in hypervisor state */ hr->dawrx0 &= ~DAWRX_HYP; + hr->dawrx1 &= ~DAWRX_HYP; /* Don't let completed instruction address breakpt match in HV state */ if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) @@ -151,8 +156,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) vc->pcr = hr->pcr | PCR_MASK; vc->dpdes = hr->dpdes; vcpu->arch.hfscr = hr->hfscr; - vcpu->arch.dawr = hr->dawr0; - vcpu->arch.dawrx = hr->dawrx0; + vcpu->arch.dawr0 = hr->dawr0; + vcpu->arch.dawrx0 = hr->dawrx0; vcpu->arch.ciabr = hr->ciabr; vcpu->arch.purr = hr->purr; vcpu->arch.spurr = hr->spurr; @@ -167,6 +172,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) vcpu->arch.pid = hr->pidr; vcpu->arch.cfar = hr->cfar; vcpu->arch.ppr = hr->ppr; + vcpu->arch.dawr1 = hr->dawr1; + vcpu->arch.dawrx1 = hr->dawrx1; } void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, @@ -215,12 +222,51 @@ static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr) } } +static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu, + struct hv_guest_state *l2_hv, + struct pt_regs *l2_regs, + u64 hv_ptr, u64 regs_ptr) +{ + int size; + + if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version, + sizeof(l2_hv->version))) + return -1; + + if (kvmppc_need_byteswap(vcpu)) + l2_hv->version = swab64(l2_hv->version); + + size = hv_guest_state_size(l2_hv->version); + if (size < 0) + return -1; + + return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) || + kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs, + sizeof(struct pt_regs)); +} + +static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu, + struct hv_guest_state *l2_hv, + struct pt_regs *l2_regs, + u64 hv_ptr, u64 regs_ptr) +{ + int size; + + size = hv_guest_state_size(l2_hv->version); + if (size < 0) + return -1; + + return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) || + kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs, + sizeof(struct pt_regs)); +} + long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) { long int err, r; struct kvm_nested_guest *l2; struct pt_regs l2_regs, saved_l1_regs; - struct hv_guest_state l2_hv, saved_l1_hv; + struct hv_guest_state l2_hv = {0}, saved_l1_hv; struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 hv_ptr, regs_ptr; u64 hdec_exp; @@ -235,17 +281,15 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) hv_ptr = kvmppc_get_gpr(vcpu, 4); regs_ptr = kvmppc_get_gpr(vcpu, 5); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)) || - kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, - sizeof(struct pt_regs)); + err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs, + hv_ptr, regs_ptr); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_PARAMETER; if (kvmppc_need_byteswap(vcpu)) byteswap_hv_regs(&l2_hv); - if (l2_hv.version != HV_GUEST_STATE_VERSION) + if (l2_hv.version > HV_GUEST_STATE_VERSION) return H_P2; if (kvmppc_need_byteswap(vcpu)) @@ -325,10 +369,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) byteswap_pt_regs(&l2_regs); } vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)) || - kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, - sizeof(struct pt_regs)); + err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs, + hv_ptr, regs_ptr); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_AUTHORITY; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index cd9995ee8441..5e634db4809b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -52,11 +52,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_PID (SFS-32) #define STACK_SLOT_IAMR (SFS-40) #define STACK_SLOT_CIABR (SFS-48) -#define STACK_SLOT_DAWR (SFS-56) -#define STACK_SLOT_DAWRX (SFS-64) +#define STACK_SLOT_DAWR0 (SFS-56) +#define STACK_SLOT_DAWRX0 (SFS-64) #define STACK_SLOT_HFSCR (SFS-72) #define STACK_SLOT_AMR (SFS-80) #define STACK_SLOT_UAMOR (SFS-88) +#define STACK_SLOT_DAWR1 (SFS-96) +#define STACK_SLOT_DAWRX1 (SFS-104) /* the following is used by the P9 short path */ #define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ @@ -85,19 +87,6 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline) RFI_TO_KERNEL kvmppc_call_hv_entry: -BEGIN_FTR_SECTION - /* On P9, do LPCR setting, if necessary */ - ld r3, HSTATE_SPLIT_MODE(r13) - cmpdi r3, 0 - beq 46f - lwz r4, KVM_SPLIT_DO_SET(r3) - cmpwi r4, 0 - beq 46f - bl kvmhv_p9_set_lpcr - nop -46: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - ld r4, HSTATE_KVM_VCPU(r13) bl kvmppc_hv_entry @@ -361,11 +350,11 @@ kvm_secondary_got_guest: LOAD_REG_ADDR(r6, decrementer_max) ld r6, 0(r6) mtspr SPRN_HDEC, r6 +BEGIN_FTR_SECTION /* and set per-LPAR registers, if doing dynamic micro-threading */ ld r6, HSTATE_SPLIT_MODE(r13) cmpdi r6, 0 beq 63f -BEGIN_FTR_SECTION ld r0, KVM_SPLIT_RPR(r6) mtspr SPRN_RPR, r0 ld r0, KVM_SPLIT_PMMAR(r6) @@ -373,16 +362,7 @@ BEGIN_FTR_SECTION ld r0, KVM_SPLIT_LDBAR(r6) mtspr SPRN_LDBAR, r0 isync -FTR_SECTION_ELSE - /* On P9 we use the split_info for coordinating LPCR changes */ - lwz r4, KVM_SPLIT_DO_SET(r6) - cmpwi r4, 0 - beq 1f - mr r3, r6 - bl kvmhv_p9_set_lpcr - nop -1: -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 63: /* Order load of vcpu after load of vcore */ lwsync @@ -452,19 +432,15 @@ kvm_no_guest: mtcr r5 blr -53: HMT_LOW +53: +BEGIN_FTR_SECTION + HMT_LOW ld r5, HSTATE_KVM_VCORE(r13) cmpdi r5, 0 bne 60f ld r3, HSTATE_SPLIT_MODE(r13) cmpdi r3, 0 beq kvm_no_guest - lwz r0, KVM_SPLIT_DO_SET(r3) - cmpwi r0, 0 - bne kvmhv_do_set - lwz r0, KVM_SPLIT_DO_RESTORE(r3) - cmpwi r0, 0 - bne kvmhv_do_restore lbz r0, KVM_SPLIT_DO_NAP(r3) cmpwi r0, 0 beq kvm_no_guest @@ -472,24 +448,19 @@ kvm_no_guest: b kvm_unsplit_nap 60: HMT_MEDIUM b kvm_secondary_got_guest +FTR_SECTION_ELSE + HMT_LOW + ld r5, HSTATE_KVM_VCORE(r13) + cmpdi r5, 0 + beq kvm_no_guest + HMT_MEDIUM + b kvm_secondary_got_guest +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 54: li r0, KVM_HWTHREAD_IN_KVM stb r0, HSTATE_HWTHREAD_STATE(r13) b kvm_no_guest -kvmhv_do_set: - /* Set LPCR, LPIDR etc. on P9 */ - HMT_MEDIUM - bl kvmhv_p9_set_lpcr - nop - b kvm_no_guest - -kvmhv_do_restore: - HMT_MEDIUM - bl kvmhv_p9_restore_lpcr - nop - b kvm_no_guest - /* * Here the primary thread is trying to return the core to * whole-core mode, so we need to nap. @@ -527,7 +498,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Set kvm_split_mode.napped[tid] = 1 */ ld r3, HSTATE_SPLIT_MODE(r13) li r0, 1 - lbz r4, HSTATE_TID(r13) + lhz r4, PACAPACAINDEX(r13) + clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */ addi r4, r4, KVM_SPLIT_NAPPED stbx r0, r3, r4 /* Check the do_nap flag again after setting napped[] */ @@ -711,10 +683,16 @@ BEGIN_FTR_SECTION mfspr r7, SPRN_DAWRX0 mfspr r8, SPRN_IAMR std r5, STACK_SLOT_CIABR(r1) - std r6, STACK_SLOT_DAWR(r1) - std r7, STACK_SLOT_DAWRX(r1) + std r6, STACK_SLOT_DAWR0(r1) + std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r6, SPRN_DAWR1 + mfspr r7, SPRN_DAWRX1 + std r6, STACK_SLOT_DAWR1(r1) + std r7, STACK_SLOT_DAWRX1(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1) mfspr r5, SPRN_AMR std r5, STACK_SLOT_AMR(r1) @@ -801,10 +779,16 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) lbz r5, 0(r5) cmpdi r5, 0 beq 1f - ld r5, VCPU_DAWR(r4) - ld r6, VCPU_DAWRX(r4) + ld r5, VCPU_DAWR0(r4) + ld r6, VCPU_DAWRX0(r4) mtspr SPRN_DAWR0, r5 mtspr SPRN_DAWRX0, r6 +BEGIN_FTR_SECTION + ld r5, VCPU_DAWR1(r4) + ld r6, VCPU_DAWRX1(r4) + mtspr SPRN_DAWR1, r5 + mtspr SPRN_DAWRX1, r6 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) 1: ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) @@ -918,15 +902,19 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon - /* For hash guest, clear out and reload the SLB */ ld r6, VCPU_KVM(r4) lbz r0, KVM_RADIX(r6) cmpwi r0, 0 bne 9f + + /* For hash guest, clear out and reload the SLB */ +BEGIN_MMU_FTR_SECTION + /* Radix host won't have populated the SLB, so no need to clear */ li r6, 0 slbmte r6, r6 - slbia + PPC_SLBIA(6) ptesync +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ lwz r5,VCPU_SLB_MAX(r4) @@ -1187,6 +1175,20 @@ EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9) mr r4, r3 b fast_guest_entry_c guest_exit_short_path: + /* + * Malicious or buggy radix guests may have inserted SLB entries + * (only 0..3 because radix always runs with UPRT=1), so these must + * be cleared here to avoid side-channels. slbmte is used rather + * than slbia, as it won't clear cached translations. + */ + li r0,0 + slbmte r0,r0 + li r4,1 + slbmte r0,r4 + li r4,2 + slbmte r0,r4 + li r4,3 + slbmte r0,r4 li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) @@ -1499,7 +1501,7 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ lbz r0, KVM_RADIX(r5) li r5, 0 cmpwi r0, 0 - bne 3f /* for radix, save 0 entries */ + bne 0f /* for radix, save 0 entries */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ mtctr r0 li r6,0 @@ -1518,13 +1520,13 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ /* Finally clear out the SLB */ li r0,0 slbmte r0,r0 - slbia + PPC_SLBIA(6) ptesync -3: stw r5,VCPU_SLB_MAX(r9) + stw r5,VCPU_SLB_MAX(r9) /* load host SLB entries */ BEGIN_MMU_FTR_SECTION - b 0f + b guest_bypass END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r8,PACA_SLBSHADOWPTR(r13) @@ -1538,7 +1540,21 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) slbmte r6,r5 1: addi r8,r8,16 .endr -0: + b guest_bypass + +0: /* + * Sanitise radix guest SLB, see guest_exit_short_path comment. + * We clear vcpu->arch.slb_max to match earlier behaviour. + */ + li r0,0 + stw r0,VCPU_SLB_MAX(r9) + slbmte r0,r0 + li r4,1 + slbmte r0,r4 + li r4,2 + slbmte r0,r4 + li r4,3 + slbmte r0,r4 guest_bypass: stw r12, STACK_SLOT_TRAP(r1) @@ -1759,8 +1775,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* Restore host values of some registers */ BEGIN_FTR_SECTION ld r5, STACK_SLOT_CIABR(r1) - ld r6, STACK_SLOT_DAWR(r1) - ld r7, STACK_SLOT_DAWRX(r1) + ld r6, STACK_SLOT_DAWR0(r1) + ld r7, STACK_SLOT_DAWRX0(r1) mtspr SPRN_CIABR, r5 /* * If the DAWR doesn't work, it's ok to write these here as @@ -1770,6 +1786,12 @@ BEGIN_FTR_SECTION mtspr SPRN_DAWRX0, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION + ld r6, STACK_SLOT_DAWR1(r1) + ld r7, STACK_SLOT_DAWRX1(r1) + mtspr SPRN_DAWR1, r6 + mtspr SPRN_DAWRX1, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1) +BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) ld r7, STACK_SLOT_PID(r1) @@ -1938,24 +1960,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 19: lis r8,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC,r8 -16: -BEGIN_FTR_SECTION - /* On POWER9 with HPT-on-radix we need to wait for all other threads */ - ld r3, HSTATE_SPLIT_MODE(r13) - cmpdi r3, 0 - beq 47f - lwz r8, KVM_SPLIT_DO_RESTORE(r3) - cmpwi r8, 0 - beq 47f - bl kvmhv_p9_restore_lpcr - nop - b 48f -47: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - ld r8,KVM_HOST_LPCR(r4) +16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync -48: + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING /* Finish timing, if we have a vcpu */ ld r4, HSTATE_KVM_VCPU(r13) @@ -2574,8 +2582,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 - std r4, VCPU_DAWR(r3) - std r5, VCPU_DAWRX(r3) + std r4, VCPU_DAWR0(r3) + std r5, VCPU_DAWRX0(r3) /* * If came in through the real mode hcall handler then it is necessary * to write the registers since the return path won't. Otherwise it is @@ -2779,8 +2787,10 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) beq kvm_end_cede cmpwi r0, NAPPING_NOVCPU beq kvm_novcpu_wakeup +BEGIN_FTR_SECTION cmpwi r0, NAPPING_UNSPLIT beq kvm_unsplit_wakeup +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) twi 31,0,0 /* Nap state must not be zero */ 33: mr r4, r3 @@ -3343,13 +3353,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX0, r0 +BEGIN_FTR_SECTION + mtspr SPRN_DAWRX1, r0 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) + + /* Clear hash and radix guest SLB, see guest_exit_short_path comment. */ + slbmte r0, r0 + PPC_SLBIA(6) BEGIN_MMU_FTR_SECTION b 4f END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) - slbmte r0, r0 - slbia ptesync ld r8, PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 30dfeac731c6..e7219b6f5f9a 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1813,9 +1813,9 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, return -EINVAL; if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL) - state->asserted = 1; + state->asserted = true; else if (level == 0 || level == KVM_INTERRUPT_UNSET) { - state->asserted = 0; + state->asserted = false; return 0; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 288a9820ec01..7d5fe43f85c4 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -20,6 +20,7 @@ #include <asm/cputable.h> #include <linux/uaccess.h> +#include <asm/interrupt.h> #include <asm/kvm_ppc.h> #include <asm/cacheflush.h> #include <asm/dbell.h> @@ -698,7 +699,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); r = 1; - }; + } return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cf52d26f49cd..a2a68a958fa0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -611,8 +611,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !!(hv_enabled && radix_enabled()); break; case KVM_CAP_PPC_MMU_HASH_V3: - r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) && - cpu_has_feature(CPU_FTR_HVMODE)); + r = !!(hv_enabled && kvmppc_hv_ops->hash_v3_possible && + kvmppc_hv_ops->hash_v3_possible()); break; case KVM_CAP_PPC_NESTED_HV: r = !!(hv_enabled && kvmppc_hv_ops->enable_nested && @@ -678,6 +678,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = hv_enabled && kvmppc_hv_ops->enable_svm && !kvmppc_hv_ops->enable_svm(NULL); break; + case KVM_CAP_PPC_DAWR1: + r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 && + !kvmppc_hv_ops->enable_dawr1(NULL)); + break; #endif default: r = 0; @@ -1518,7 +1522,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1536,7 +1540,7 @@ int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1554,7 +1558,7 @@ int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1572,7 +1576,7 @@ int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -2187,6 +2191,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; r = kvm->arch.kvm_ops->enable_svm(kvm); break; + case KVM_CAP_PPC_DAWR1: + r = -EINVAL; + if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_dawr1) + break; + r = kvm->arch.kvm_ops->enable_dawr1(kvm); + break; #endif default: r = -EINVAL; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 69a91b571845..d4efc182662a 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o # 64-bit linker creates .sfpr on demand for final link (vmlinux), # so it is only needed for modules, and only for older linkers which # do not support --save-restore-funcs -ifeq ($(call ld-ifversion, -lt, 225000000, y),y) +ifeq ($(call ld-ifversion, -lt, 22500, y),y) extra-$(CONFIG_PPC64) += crtsavres.o endif diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 1550e0d2513a..eb2919ddf9b9 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -6,6 +6,7 @@ #include <linux/string.h> #include <linux/export.h> #include <linux/uaccess.h> +#include <linux/libnvdimm.h> #include <asm/cacheflush.h> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bf7a7d62ae8b..bb5c20d4ca91 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -818,13 +818,15 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, break; if (rev) { /* reverse 32 bytes */ - buf.d[0] = byterev_8(reg->d[3]); - buf.d[1] = byterev_8(reg->d[2]); - buf.d[2] = byterev_8(reg->d[1]); - buf.d[3] = byterev_8(reg->d[0]); - reg = &buf; + union vsx_reg buf32[2]; + buf32[0].d[0] = byterev_8(reg[1].d[1]); + buf32[0].d[1] = byterev_8(reg[1].d[0]); + buf32[1].d[0] = byterev_8(reg[0].d[1]); + buf32[1].d[1] = byterev_8(reg[0].d[0]); + memcpy(mem, buf32, size); + } else { + memcpy(mem, reg, size); } - memcpy(mem, reg, size); break; case 16: /* stxv, stxvx, stxvl, stxvll */ @@ -1304,9 +1306,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if ((word & 0xfe2) == 2) op->type = SYSCALL; else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && - (word & 0xfe3) == 1) + (word & 0xfe3) == 1) { /* scv */ op->type = SYSCALL_VECTORED_0; - else + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + } else op->type = UNKNOWN; return 0; #endif @@ -1410,7 +1414,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 1: if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); @@ -1443,8 +1447,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 4: + /* + * There are very many instructions with this primary opcode + * introduced in the ISA as early as v2.03. However, the ones + * we currently emulate were all introduced with ISA 3.0 + */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (word & 0x3f) { case 48: /* maddhd */ @@ -1470,7 +1479,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, * There are other instructions from ISA 3.0 with the same * primary opcode which do not have emulation support yet. */ - return -1; + goto unknown_opcode; #endif case 7: /* mulli */ @@ -1530,6 +1539,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 19: if (((word >> 1) & 0x1f) == 2) { /* addpcis */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; imm = (short) (word & 0xffc1); /* d0 + d2 fields */ imm |= (word >> 15) & 0x3e; /* d1 field */ op->val = regs->nip + (imm << 16) + 4; @@ -1842,7 +1853,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 265: /* modud */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = regs->gpr[ra] % regs->gpr[rb]; goto compute_done; #endif @@ -1852,7 +1863,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 267: /* moduw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (unsigned int) regs->gpr[ra] % (unsigned int) regs->gpr[rb]; goto compute_done; @@ -1889,7 +1900,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (ra & 0x3) { case 0: /* 32-bit conditioned */ @@ -1907,18 +1918,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - return -1; + goto unknown_opcode; #ifdef __powerpc64__ case 777: /* modsd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (long int) regs->gpr[ra] % (long int) regs->gpr[rb]; goto compute_done; #endif case 779: /* modsw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (int) regs->gpr[ra] % (int) regs->gpr[rb]; goto compute_done; @@ -1995,14 +2006,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 538: /* cnttzw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = (unsigned int) regs->gpr[rd]; op->val = (val ? __builtin_ctz(val) : 32); goto logical_done; #ifdef __powerpc64__ case 570: /* cnttzd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = regs->gpr[rd]; op->val = (val ? __builtin_ctzl(val) : 64); goto logical_done; @@ -2112,7 +2123,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 890: /* extswsli with sh_5 = 0 */ case 891: /* extswsli with sh_5 = 1 */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->type = COMPUTE + SETREG; sh = rb | ((word & 2) << 4); val = (signed int) regs->gpr[rd]; @@ -2439,6 +2450,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 268: /* lxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; @@ -2448,6 +2461,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 269: /* lxvl */ case 301: { /* lxvll */ int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; @@ -2468,13 +2483,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 333: /* lxvpx */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->reg = VSX_REGISTER_XTP(rd); op->type = MKOP(LOAD_VSX, 0, 32); op->element_size = 32; break; case 364: /* lxvwsx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 4; @@ -2482,6 +2499,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 396: /* stxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; @@ -2491,6 +2510,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 397: /* stxvl */ case 429: { /* stxvll */ int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; @@ -2504,7 +2525,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } case 461: /* stxvpx */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->reg = VSX_REGISTER_XTP(rd); op->type = MKOP(STORE_VSX, 0, 32); op->element_size = 32; @@ -2542,6 +2563,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 781: /* lxsibzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 1); op->element_size = 8; @@ -2549,6 +2572,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 812: /* lxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 2; @@ -2556,6 +2581,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 813: /* lxsihzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 2); op->element_size = 8; @@ -2569,6 +2596,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 876: /* lxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 1; @@ -2582,6 +2611,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 909: /* stxsibx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 1); op->element_size = 8; @@ -2589,6 +2620,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 940: /* stxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 2; @@ -2596,6 +2629,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 941: /* stxsihx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 2); op->element_size = 8; @@ -2609,6 +2644,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1004: /* stxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 1; @@ -2717,12 +2754,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(LOAD_FP, 0, 16); break; case 2: /* lxsd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 3: /* lxssp */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; @@ -2752,7 +2793,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 6: if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->ea = dqform_ea(word, regs); op->reg = VSX_REGISTER_XTP(rd); op->element_size = 32; @@ -2775,6 +2816,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1: /* lxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dqform_ea(word, regs); if (word & 8) op->reg = rd + 32; @@ -2785,6 +2828,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 2: /* stxsd with LSB of DS field = 0 */ case 6: /* stxsd with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 8); @@ -2794,6 +2839,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 3: /* stxssp with LSB of DS field = 0 */ case 7: /* stxssp with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 4); @@ -2802,6 +2849,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 5: /* stxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dqform_ea(word, regs); if (word & 8) op->reg = rd + 32; @@ -2831,7 +2880,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1: /* Prefixed instructions */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); @@ -2970,6 +3019,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } + if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) { + switch (GETTYPE(op->type)) { + case LOAD: + if (ra == rd) + goto unknown_opcode; + fallthrough; + case STORE: + case LOAD_FP: + case STORE_FP: + if (ra == 0) + goto unknown_opcode; + } + } + #ifdef CONFIG_VSX if ((GETTYPE(op->type) == LOAD_VSX || GETTYPE(op->type) == STORE_VSX) && @@ -2980,6 +3043,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; + unknown_opcode: + op->type = UNKNOWN; + return 0; + logical_done: if (word & 1) set_cr0(regs, op); diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 3f972db17761..446d9de88ce4 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -6,4 +6,6 @@ ifdef CONFIG_KASAN CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o +obj-y += mmu.o mmu_context.o +obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o +obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 859e5bd603ac..d7eb266a3f7a 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -234,7 +234,7 @@ void mmu_mark_initmem_nx(void) if (is_module_segment(i << 28)) continue; - mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); + mtsr(mfsr(i << 28) | 0x10000000, i << 28); } } diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index b5e9fff8c217..a688e1324ae5 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -16,10 +16,6 @@ unsigned int hpage_shift; EXPORT_SYMBOL(hpage_shift); -extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, - unsigned long pa, unsigned long rlags, - unsigned long vflags, int psize, int ssize); - int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 73b06adb6eeb..581b20a2feaf 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -38,6 +38,7 @@ #include <linux/pgtable.h> #include <asm/debugfs.h> +#include <asm/interrupt.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -1143,10 +1144,10 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) page = pte_page(pte); /* page is dirty */ - if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { if (trap == 0x400) { flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_dcache_clean, &page->flags); } else pp |= HPTE_R_N; } @@ -1288,7 +1289,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long flags) { bool is_thp; - enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; pte_t *ptep; @@ -1490,7 +1490,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, DBG_LOW(" -> rc=%d\n", rc); bail: - exception_exit(prev_state); return rc; } EXPORT_SYMBOL_GPL(hash_page_mm); @@ -1512,16 +1511,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, - unsigned long msr) +DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); +DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) { + unsigned long ea = regs->dar; + unsigned long dsisr = regs->dsisr; unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; - struct mm_struct *mm = current->mm; - unsigned int region_id = get_region_id(ea); + struct mm_struct *mm; + unsigned int region_id; + long err; + region_id = get_region_id(ea); if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; + else + mm = current->mm; if (dsisr & DSISR_NOHPTE) flags |= HPTE_NOHPTE_UPDATE; @@ -1537,13 +1542,66 @@ int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, * 2) user space access kernel space. */ access |= _PAGE_PRIVILEGED; - if ((msr & MSR_PR) || (region_id == USER_REGION_ID)) + if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (trap == 0x400) + if (regs->trap == 0x400) access |= _PAGE_EXEC; - return hash_page_mm(mm, ea, access, trap, flags); + err = hash_page_mm(mm, ea, access, regs->trap, flags); + if (unlikely(err < 0)) { + // failed to instert a hash PTE due to an hypervisor error + if (user_mode(regs)) { + if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2) + _exception(SIGSEGV, regs, SEGV_ACCERR, ea); + else + _exception(SIGBUS, regs, BUS_ADRERR, ea); + } else { + bad_page_fault(regs, SIGBUS); + } + err = 0; + } + + return err; +} + +/* + * The _RAW interrupt entry checks for the in_nmi() case before + * running the full handler. + */ +DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) +{ + unsigned long dsisr = regs->dsisr; + long err; + + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) + goto page_fault; + + /* + * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then + * don't call hash_page, just fail the fault. This is required to + * prevent re-entrancy problems in the hash code, namely perf + * interrupts hitting while something holds H_PAGE_BUSY, and taking a + * hash fault. See the comment in hash_preload(). + * + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ + if (unlikely(in_nmi())) { + do_bad_page_fault_segv(regs); + return 0; + } + + err = __do_hash_fault(regs); + if (err) { +page_fault: + err = hash__do_page_fault(regs); + } + + return err; } #ifdef CONFIG_PPC_MM_SLICES @@ -1843,27 +1901,6 @@ void flush_hash_range(unsigned long number, int local) } } -/* - * low_hash_fault is called when we the low level hash code failed - * to instert a PTE due to an hypervisor error - */ -void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) -{ - enum ctx_state prev_state = exception_enter(); - - if (user_mode(regs)) { -#ifdef CONFIG_PPC_SUBPAGE_PROT - if (rc == -2) - _exception(SIGSEGV, regs, SEGV_ACCERR, address); - else -#endif - _exception(SIGBUS, regs, BUS_ADRERR, address); - } else - bad_page_fault(regs, address, SIGBUS); - - exception_exit(prev_state); -} - long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, int psize, int ssize) diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index c12d78ee42f5..5045048ce244 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -15,4 +15,6 @@ static inline bool stress_slb(void) void slb_setup_new_exec(void); +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); + #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */ diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 685d7bb3d26f..cd18e94d0843 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -129,7 +129,8 @@ good_exit: mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { + list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next, + lockdep_is_held(&mem_list_mutex)) { /* Overlap? */ if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && (ua < (mem2->ua + @@ -289,6 +290,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + rcu_read_lock(); list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + @@ -297,6 +299,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, break; } } + rcu_read_unlock(); return ret; } @@ -327,7 +330,8 @@ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next, + lockdep_is_held(&mem_list_mutex)) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; ++mem->used; @@ -421,6 +425,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, struct mm_iommu_table_group_mem_t *mem; unsigned long end; + rcu_read_lock(); list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) continue; @@ -437,6 +442,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, return true; } } + rcu_read_unlock(); return false; } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 5b3a3bae21aa..9ffa65074cb0 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -20,6 +20,8 @@ #include <mm/mmu_decl.h> #include <trace/events/thp.h> +#include "internal.h" + unsigned long __pmd_frag_nr; EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; @@ -79,10 +81,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } -static void do_nothing(void *unused) +static void do_serialize(void *arg) { - + /* We've taken the IPI, so try to trim the mask while here */ + if (radix_enabled()) { + struct mm_struct *mm = arg; + exit_lazy_flush_tlb(mm, false); + } } + /* * Serialize against find_current_mm_pte which does lock-less * lookup in page tables with local interrupts disabled. For huge pages @@ -96,7 +103,7 @@ static void do_nothing(void *unused) void serialize_against_pte_lookup(struct mm_struct *mm) { smp_mb(); - smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1); + smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1); } /* diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index f1c6f264ed91..15dcc5ad91c5 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -31,6 +31,7 @@ static u32 initial_allocation_mask __ro_after_init; u64 default_amr __ro_after_init = ~0x0UL; u64 default_iamr __ro_after_init = 0x5555555555555555UL; u64 default_uamor __ro_after_init; +EXPORT_SYMBOL(default_amr); /* * Key used to implement PROT_EXEC mmap. Denies READ/WRITE * We pick key 2 because 0 is special key and 1 is reserved as per ISA. diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index fb66d154b26c..409e61210789 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -18,6 +18,8 @@ #include <asm/cputhreads.h> #include <asm/plpar_wrappers.h> +#include "internal.h" + #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 #define RIC_FLUSH_ALL 2 @@ -627,15 +629,6 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); -static bool mm_is_singlethreaded(struct mm_struct *mm) -{ - if (atomic_read(&mm->context.copros) > 0) - return false; - if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) - return true; - return false; -} - static bool mm_needs_flush_escalation(struct mm_struct *mm) { /* @@ -648,21 +641,24 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm) return false; } -#ifdef CONFIG_SMP -static void do_exit_flush_lazy_tlb(void *arg) +/* + * If always_flush is true, then flush even if this CPU can't be removed + * from mm_cpumask. + */ +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) { - struct mm_struct *mm = arg; unsigned long pid = mm->context.id; + int cpu = smp_processor_id(); /* * A kthread could have done a mmget_not_zero() after the flushing CPU - * checked mm_is_singlethreaded, and be in the process of - * kthread_use_mm when interrupted here. In that case, current->mm will - * be set to mm, because kthread_use_mm() setting ->mm and switching to - * the mm is done with interrupts off. + * checked mm_cpumask, and be in the process of kthread_use_mm when + * interrupted here. In that case, current->mm will be set to mm, + * because kthread_use_mm() setting ->mm and switching to the mm is + * done with interrupts off. */ if (current->mm == mm) - goto out_flush; + goto out; if (current->active_mm == mm) { WARN_ON_ONCE(current->mm != NULL); @@ -673,11 +669,30 @@ static void do_exit_flush_lazy_tlb(void *arg) mmdrop(mm); } - atomic_dec(&mm->context.active_cpus); - cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); + /* + * This IPI may be initiated from any source including those not + * running the mm, so there may be a racing IPI that comes after + * this one which finds the cpumask already clear. Check and avoid + * underflowing the active_cpus count in that case. The race should + * not otherwise be a problem, but the TLB must be flushed because + * that's what the caller expects. + */ + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { + atomic_dec(&mm->context.active_cpus); + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + always_flush = true; + } -out_flush: - _tlbiel_pid(pid, RIC_FLUSH_ALL); +out: + if (always_flush) + _tlbiel_pid(pid, RIC_FLUSH_ALL); +} + +#ifdef CONFIG_SMP +static void do_exit_flush_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + exit_lazy_flush_tlb(mm, true); } static void exit_flush_lazy_tlbs(struct mm_struct *mm) @@ -693,9 +708,110 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) (void *)mm, 1); } +#else /* CONFIG_SMP */ +static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } +#endif /* CONFIG_SMP */ + +static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); + +/* + * Interval between flushes at which we send out IPIs to check whether the + * mm_cpumask can be trimmed for the case where it's not a single-threaded + * process flushing its own mm. The intent is to reduce the cost of later + * flushes. Don't want this to be so low that it adds noticable cost to TLB + * flushing, or so high that it doesn't help reduce global TLBIEs. + */ +static unsigned long tlb_mm_cpumask_trim_timer = 1073; + +static bool tick_and_test_trim_clock(void) +{ + if (__this_cpu_inc_return(mm_cpumask_trim_clock) == + tlb_mm_cpumask_trim_timer) { + __this_cpu_write(mm_cpumask_trim_clock, 0); + return true; + } + return false; +} + +enum tlb_flush_type { + FLUSH_TYPE_NONE, + FLUSH_TYPE_LOCAL, + FLUSH_TYPE_GLOBAL, +}; + +static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) +{ + int active_cpus = atomic_read(&mm->context.active_cpus); + int cpu = smp_processor_id(); + + if (active_cpus == 0) + return FLUSH_TYPE_NONE; + if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { + if (current->mm != mm) { + /* + * Asynchronous flush sources may trim down to nothing + * if the process is not running, so occasionally try + * to trim. + */ + if (tick_and_test_trim_clock()) { + exit_lazy_flush_tlb(mm, true); + return FLUSH_TYPE_NONE; + } + } + return FLUSH_TYPE_LOCAL; + } + + /* Coprocessors require TLBIE to invalidate nMMU. */ + if (atomic_read(&mm->context.copros) > 0) + return FLUSH_TYPE_GLOBAL; + + /* + * In the fullmm case there's no point doing the exit_flush_lazy_tlbs + * because the mm is being taken down anyway, and a TLBIE tends to + * be faster than an IPI+TLBIEL. + */ + if (fullmm) + return FLUSH_TYPE_GLOBAL; + + /* + * If we are running the only thread of a single-threaded process, + * then we should almost always be able to trim off the rest of the + * CPU mask (except in the case of use_mm() races), so always try + * trimming the mask. + */ + if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { + exit_flush_lazy_tlbs(mm); + /* + * use_mm() race could prevent IPIs from being able to clear + * the cpumask here, however those users are established + * after our first check (and so after the PTEs are removed), + * and the TLB still gets flushed by the IPI, so this CPU + * will only require a local flush. + */ + return FLUSH_TYPE_LOCAL; + } + + /* + * Occasionally try to trim down the cpumask. It's possible this can + * bring the mask to zero, which results in no flush. + */ + if (tick_and_test_trim_clock()) { + exit_flush_lazy_tlbs(mm); + if (current->mm == mm) + return FLUSH_TYPE_LOCAL; + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) + exit_lazy_flush_tlb(mm, true); + return FLUSH_TYPE_NONE; + } + + return FLUSH_TYPE_GLOBAL; +} + +#ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -703,16 +819,15 @@ void radix__flush_tlb_mm(struct mm_struct *mm) preempt_disable(); /* - * Order loads of mm_cpumask vs previous stores to clear ptes before - * the invalidate. See barrier in switch_mm_irqs_off + * Order loads of mm_cpumask (in flush_type_needed) vs previous + * stores to clear ptes before the invalidate. See barrier in + * switch_mm_irqs_off */ smp_mb(); - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } - + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_pid(pid, RIC_FLUSH_TLB); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; @@ -728,9 +843,6 @@ void radix__flush_tlb_mm(struct mm_struct *mm) } else { _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); } - } else { -local: - _tlbiel_pid(pid, RIC_FLUSH_TLB); } preempt_enable(); } @@ -739,6 +851,7 @@ EXPORT_SYMBOL(radix__flush_tlb_mm); static void __flush_all_mm(struct mm_struct *mm, bool fullmm) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -746,13 +859,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (!fullmm) { - exit_flush_lazy_tlbs(mm); - goto local; - } - } + type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_pid(pid, RIC_FLUSH_ALL); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | @@ -766,9 +876,6 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) _tlbie_pid(pid, RIC_FLUSH_ALL); else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); - } else { -local: - _tlbiel_pid(pid, RIC_FLUSH_ALL); } preempt_enable(); } @@ -783,6 +890,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -790,11 +898,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, pg_sizes, size; @@ -811,9 +918,6 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); - } else { -local: - _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); } preempt_enable(); } @@ -828,8 +932,6 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(radix__flush_tlb_page); -#else /* CONFIG_SMP */ -static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ static void do_tlbiel_kernel(void *info) @@ -893,7 +995,9 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool local, full; + bool fullmm = (end == TLB_FLUSH_ALL); + bool flush_pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -901,24 +1005,18 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (end != TLB_FLUSH_ALL) { - exit_flush_lazy_tlbs(mm); - goto is_local; - } - } - local = false; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_single_page_flush_ceiling); - } else { -is_local: - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } + type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_NONE) + goto out; + + if (fullmm) + flush_pid = true; + else if (type == FLUSH_TYPE_GLOBAL) + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + else + flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; - if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); @@ -928,8 +1026,8 @@ is_local: tgt |= H_RPTI_TARGET_NMMU; pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, start, end); - } else if (full) { - if (local) { + } else if (flush_pid) { + if (type == FLUSH_TYPE_LOCAL) { _tlbiel_pid(pid, RIC_FLUSH_TLB); } else { if (cputlb_use_tlbie()) { @@ -952,7 +1050,7 @@ is_local: hflush = true; } - if (local) { + if (type == FLUSH_TYPE_LOCAL) { asm volatile("ptesync": : :"memory"); __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) @@ -974,6 +1072,7 @@ is_local: hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); } } +out: preempt_enable(); } @@ -1085,32 +1184,30 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool local, full; + bool fullmm = (end == TLB_FLUSH_ALL); + bool flush_pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) return; + fullmm = (end == TLB_FLUSH_ALL); + preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (end != TLB_FLUSH_ALL) { - exit_flush_lazy_tlbs(mm); - goto is_local; - } - } - local = false; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_single_page_flush_ceiling); - } else { -is_local: - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } + type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_NONE) + goto out; + + if (fullmm) + flush_pid = true; + else if (type == FLUSH_TYPE_GLOBAL) + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + else + flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; - if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB; unsigned long pg_sizes = psize_to_rpti_pgsize(psize); @@ -1120,8 +1217,8 @@ is_local: if (atomic_read(&mm->context.copros) > 0) tgt |= H_RPTI_TARGET_NMMU; pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); - } else if (full) { - if (local) { + } else if (flush_pid) { + if (type == FLUSH_TYPE_LOCAL) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } else { if (cputlb_use_tlbie()) { @@ -1137,7 +1234,7 @@ is_local: } } else { - if (local) + if (type == FLUSH_TYPE_LOCAL) _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); else if (cputlb_use_tlbie()) _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); @@ -1145,6 +1242,7 @@ is_local: _tlbiel_va_range_multicast(mm, start, end, pid, page_size, psize, also_pwc); } +out: preempt_enable(); } @@ -1164,6 +1262,7 @@ static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) { unsigned long pid, end; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -1180,11 +1279,10 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) /* Otherwise first do the PWC, then iterate the pages. */ preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, type, pg_sizes; @@ -1202,9 +1300,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) else _tlbiel_va_range_multicast(mm, addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); - } else { -local: - _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } preempt_enable(); diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 584567970c11..c91bd85eb90e 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -10,6 +10,7 @@ */ #include <asm/asm-prototypes.h> +#include <asm/interrupt.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/paca.h> @@ -813,8 +814,9 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) return slb_insert_entry(ea, context, flags, ssize, false); } -long do_slb_fault(struct pt_regs *regs, unsigned long ea) +DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault) { + unsigned long ea = regs->dar; unsigned long id = get_region_id(ea); /* IRQs are not reconciled here, so can't check irqs_disabled */ @@ -824,19 +826,21 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea) return -EINVAL; /* - * SLB kernel faults must be very careful not to touch anything - * that is not bolted. E.g., PACA and global variables are okay, - * mm->context stuff is not. - * - * SLB user faults can access all of kernel memory, but must be - * careful not to touch things like IRQ state because it is not - * "reconciled" here. The difficulty is that we must use - * fast_exception_return to return from kernel SLB faults without - * looking at possible non-bolted memory. We could test user vs - * kernel faults in the interrupt handler asm and do a full fault, - * reconcile, ret_from_except for user faults which would make them - * first class kernel code. But for performance it's probably nicer - * if they go via fast_exception_return too. + * SLB kernel faults must be very careful not to touch anything that is + * not bolted. E.g., PACA and global variables are okay, mm->context + * stuff is not. SLB user faults may access all of memory (and induce + * one recursive SLB kernel fault), so the kernel fault must not + * trample on the user fault state at those points. + */ + + /* + * This is a raw interrupt handler, for performance, so that + * fast_interrupt_return can be used. The handler must not touch local + * irq state, or schedule. We could test for usermode and upgrade to a + * normal process context (synchronous) interrupt for those, which + * would make them first-class kernel code and able to be traced and + * instrumented, although performance would suffer a bit, it would + * probably be a good tradeoff. */ if (id >= LINEAR_MAP_REGION_ID) { long err; @@ -865,13 +869,15 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea) } } -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err) +DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault) { + int err = regs->result; + if (err == -EFAULT) { if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); else - bad_page_fault(regs, ea, SIGSEGV); + bad_page_fault(regs, SIGSEGV); } else if (err == -EINVAL) { unrecoverable_exception(regs); } else { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8961b44f350c..bb368257b55c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -34,6 +34,7 @@ #include <linux/uaccess.h> #include <asm/firmware.h> +#include <asm/interrupt.h> #include <asm/page.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -377,18 +378,16 @@ static void sanity_check_fault(bool is_write, bool is_user, /* * For 600- and 800-family processors, the error_code parameter is DSISR - * for a data fault, SRR1 for an instruction fault. For 400-family processors - * the error_code parameter is ESR for a data fault, 0 for an instruction - * fault. - * For 64-bit processors, the error_code parameter is - * - DSISR for a non-SLB data access fault, - * - SRR1 & 0x08000000 for a non-SLB instruction access fault - * - 0 any SLB fault. + * for a data fault, SRR1 for an instruction fault. + * For 400-family processors the error_code parameter is ESR for a data fault, + * 0 for an instruction fault. + * For 64-bit processors, the error_code parameter is DSISR for a data access + * fault, SRR1 & 0x08000000 for an instruction access fault. * * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -static int __do_page_fault(struct pt_regs *regs, unsigned long address, +static int ___do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; @@ -435,9 +434,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, return bad_area_nosemaphore(regs, address); } - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); @@ -540,34 +537,51 @@ retry: return 0; } -NOKPROBE_SYMBOL(__do_page_fault); +NOKPROBE_SYMBOL(___do_page_fault); -int do_page_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +static long __do_page_fault(struct pt_regs *regs) { const struct exception_table_entry *entry; - enum ctx_state prev_state = exception_enter(); - int rc = __do_page_fault(regs, address, error_code); - exception_exit(prev_state); - if (likely(!rc)) - return 0; + long err; + + err = ___do_page_fault(regs, regs->dar, regs->dsisr); + if (likely(!err)) + return err; entry = search_exception_tables(regs->nip); - if (unlikely(!entry)) - return rc; + if (likely(entry)) { + instruction_pointer_set(regs, extable_fixup(entry)); + return 0; + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + __bad_page_fault(regs, err); + return 0; + } else { + /* 32 and 64e handle the bad page fault in asm */ + return err; + } +} +NOKPROBE_SYMBOL(__do_page_fault); - instruction_pointer_set(regs, extable_fixup(entry)); +DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +{ + return __do_page_fault(regs); +} - return 0; +#ifdef CONFIG_PPC_BOOK3S_64 +/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ +long hash__do_page_fault(struct pt_regs *regs) +{ + return __do_page_fault(regs); } -NOKPROBE_SYMBOL(do_page_fault); +NOKPROBE_SYMBOL(hash__do_page_fault); +#endif /* * bad_page_fault is called when we have a bad access from the kernel. * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); @@ -605,7 +619,7 @@ void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) die("Kernel access of bad area", regs, sig); } -void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void bad_page_fault(struct pt_regs *regs, int sig) { const struct exception_table_entry *entry; @@ -614,5 +628,12 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) if (entry) instruction_pointer_set(regs, extable_fixup(entry)); else - __bad_page_fault(regs, address, sig); + __bad_page_fault(regs, sig); } + +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv) +{ + bad_page_fault(regs, SIGSEGV); +} +#endif diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8b3cc4d688e8..d142b76d507d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -217,7 +217,7 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p } } -int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) +static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) @@ -663,24 +663,6 @@ static int __init hugetlbpage_init(void) arch_initcall(hugetlbpage_init); -void flush_dcache_icache_hugepage(struct page *page) -{ - int i; - void *start; - - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } -} - void __init gigantic_hugetlb_cma_reserve(void) { unsigned long order = 0; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index afab328d0887..4e8ce6d85232 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -91,27 +91,6 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -#define FLUSH_CHUNK_SIZE SZ_1G -/** - * flush_dcache_range_chunked(): Write any modified data cache blocks out to - * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE - * Does not invalidate the corresponding instruction cache blocks. - * - * @start: the start address - * @stop: the stop address (exclusive) - * @chunk: the max size of the chunks - */ -static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, - unsigned long chunk) -{ - unsigned long i; - - for (i = start; i < stop; i += chunk) { - flush_dcache_range(i, min(stop, i + chunk)); - cond_resched(); - } -} - int __ref arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params) { @@ -136,7 +115,6 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size) /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); mutex_lock(&linear_mapping_mutex); ret = remove_section_mapping(start, start + size); @@ -489,19 +467,35 @@ void flush_dcache_page(struct page *page) if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) return; /* avoid an atomic op if possible */ - if (test_bit(PG_arch_1, &page->flags)) - clear_bit(PG_arch_1, &page->flags); + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); } EXPORT_SYMBOL(flush_dcache_page); -void flush_dcache_icache_page(struct page *page) +static void flush_dcache_icache_hugepage(struct page *page) { -#ifdef CONFIG_HUGETLB_PAGE - if (PageCompound(page)) { - flush_dcache_icache_hugepage(page); - return; + int i; + void *start; + + BUG_ON(!PageCompound(page)); + + for (i = 0; i < compound_nr(page); i++) { + if (!PageHighMem(page)) { + __flush_dcache_icache(page_address(page+i)); + } else { + start = kmap_atomic(page+i); + __flush_dcache_icache(start); + kunmap_atomic(start); + } } -#endif +} + +void flush_dcache_icache_page(struct page *page) +{ + + if (PageCompound(page)) + return flush_dcache_icache_hugepage(page); + #if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) /* On 8xx there is no need to kmap since highmem is not supported */ __flush_dcache_icache(page_address(page)); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 15555c95cebc..354611940118 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -26,6 +26,7 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/hugetlb.h> +#include <asm/pte-walk.h> static inline int is_exec_fault(void) { @@ -81,9 +82,9 @@ static pte_t set_pte_filter_hash(pte_t pte) struct page *pg = maybe_pte_to_page(pte); if (!pg) return pte; - if (!test_bit(PG_arch_1, &pg->flags)) { + if (!test_bit(PG_dcache_clean, &pg->flags)) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); } } return pte; @@ -116,13 +117,13 @@ static inline pte_t set_pte_filter(pte_t pte) return pte; /* If the page clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) return pte; /* If it's an exec fault, we flush the cache and make it clean */ if (is_exec_fault()) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); return pte; } @@ -161,12 +162,12 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, goto bail; /* If the page is already clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) goto bail; - /* Clean the page and set PG_arch_1 */ + /* Clean the page and set PG_dcache_clean */ flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); bail: return pte_mkexec(pte); diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c index dde2fe8de4b2..565048a0c9be 100644 --- a/arch/powerpc/mm/ptdump/segment_regs.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c @@ -10,7 +10,7 @@ static void seg_show(struct seq_file *m, int i) { - u32 val = mfsrin(i << 28); + u32 val = mfsr(i << 28); seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i); seq_printf(m, "Kern key %d ", (val >> 30) & 1); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 022103c6a201..aaf1a887f653 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -683,10 +683,18 @@ emit_clear: break; /* - * BPF_STX XADD (atomic_add) + * BPF_STX ATOMIC (atomic ops) */ - /* *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_W: + if (insn->imm != BPF_ADD) { + pr_err_ratelimited( + "eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -ENOTSUPP; + } + + /* *(u32 *)(dst + off) += src */ + /* Get EA into TMP_REG_1 */ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; @@ -699,8 +707,15 @@ emit_clear: /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; - /* *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (insn->imm != BPF_ADD) { + pr_err_ratelimited( + "eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -ENOTSUPP; + } + /* *(u64 *)(dst + off) += src */ + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile deleted file mode 100644 index bb2d94c8cbe6..000000000000 --- a/arch/powerpc/oprofile/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - -obj-$(CONFIG_OPROFILE) += oprofile.o - -DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ - oprof.o cpu_buffer.o buffer_sync.o \ - event_buffer.o oprofile_files.o \ - oprofilefs.o oprofile_stats.o \ - timer_int.o ) - -oprofile-y := $(DRIVER_OBJS) common.o backtrace.o -oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ - cell/spu_profiler.o cell/vma_map.o \ - cell/spu_task_sync.o -oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_power4.o op_model_pa6t.o -oprofile-$(CONFIG_FSL_EMB_PERFMON) += op_model_fsl_emb.o -oprofile-$(CONFIG_PPC_BOOK3S_32) += op_model_7450.o diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c deleted file mode 100644 index 9db7ada79d10..000000000000 --- a/arch/powerpc/oprofile/backtrace.c +++ /dev/null @@ -1,120 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/** - * Copyright (C) 2005 Brian Rogan <bcr6@cornell.edu>, IBM - * -**/ - -#include <linux/time.h> -#include <linux/oprofile.h> -#include <linux/sched.h> -#include <asm/processor.h> -#include <linux/uaccess.h> -#include <linux/compat.h> -#include <asm/oprofile_impl.h> - -#define STACK_SP(STACK) *(STACK) - -#define STACK_LR64(STACK) *((unsigned long *)(STACK) + 2) -#define STACK_LR32(STACK) *((unsigned int *)(STACK) + 1) - -#ifdef CONFIG_PPC64 -#define STACK_LR(STACK) STACK_LR64(STACK) -#else -#define STACK_LR(STACK) STACK_LR32(STACK) -#endif - -static unsigned int user_getsp32(unsigned int sp, int is_first) -{ - unsigned int stack_frame[2]; - void __user *p = compat_ptr(sp); - - /* - * The most likely reason for this is that we returned -EFAULT, - * which means that we've done all that we can do from - * interrupt context. - */ - if (copy_from_user_nofault(stack_frame, (void __user *)p, - sizeof(stack_frame))) - return 0; - - if (!is_first) - oprofile_add_trace(STACK_LR32(stack_frame)); - - /* - * We do not enforce increasing stack addresses here because - * we may transition to a different stack, eg a signal handler. - */ - return STACK_SP(stack_frame); -} - -#ifdef CONFIG_PPC64 -static unsigned long user_getsp64(unsigned long sp, int is_first) -{ - unsigned long stack_frame[3]; - - if (copy_from_user_nofault(stack_frame, (void __user *)sp, - sizeof(stack_frame))) - return 0; - - if (!is_first) - oprofile_add_trace(STACK_LR64(stack_frame)); - - return STACK_SP(stack_frame); -} -#endif - -static unsigned long kernel_getsp(unsigned long sp, int is_first) -{ - unsigned long *stack_frame = (unsigned long *)sp; - - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) - return 0; - - if (!is_first) - oprofile_add_trace(STACK_LR(stack_frame)); - - /* - * We do not enforce increasing stack addresses here because - * we might be transitioning from an interrupt stack to a kernel - * stack. validate_sp() is designed to understand this, so just - * use it. - */ - return STACK_SP(stack_frame); -} - -void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) -{ - unsigned long sp = regs->gpr[1]; - int first_frame = 1; - - /* We ditch the top stackframe so need to loop through an extra time */ - depth += 1; - - if (!user_mode(regs)) { - while (depth--) { - sp = kernel_getsp(sp, first_frame); - if (!sp) - break; - first_frame = 0; - } - } else { -#ifdef CONFIG_PPC64 - if (!is_32bit_task()) { - while (depth--) { - sp = user_getsp64(sp, first_frame); - if (!sp) - break; - first_frame = 0; - } - return; - } -#endif - - while (depth--) { - sp = user_getsp32(sp, first_frame); - if (!sp) - break; - first_frame = 0; - } - } -} diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h deleted file mode 100644 index e198efa9113a..000000000000 --- a/arch/powerpc/oprofile/cell/pr_util.h +++ /dev/null @@ -1,110 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - /* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Author: Maynard Johnson <maynardj@us.ibm.com> - */ - -#ifndef PR_UTIL_H -#define PR_UTIL_H - -#include <linux/cpumask.h> -#include <linux/oprofile.h> -#include <asm/cell-pmu.h> -#include <asm/cell-regs.h> -#include <asm/spu.h> - -/* Defines used for sync_start */ -#define SKIP_GENERIC_SYNC 0 -#define SYNC_START_ERROR -1 -#define DO_GENERIC_SYNC 1 -#define SPUS_PER_NODE 8 -#define DEFAULT_TIMER_EXPIRE (HZ / 10) - -extern struct delayed_work spu_work; -extern int spu_prof_running; - -#define TRACE_ARRAY_SIZE 1024 - -extern spinlock_t oprof_spu_smpl_arry_lck; - -struct spu_overlay_info { /* map of sections within an SPU overlay */ - unsigned int vma; /* SPU virtual memory address from elf */ - unsigned int size; /* size of section from elf */ - unsigned int offset; /* offset of section into elf file */ - unsigned int buf; -}; - -struct vma_to_fileoffset_map { /* map of sections within an SPU program */ - struct vma_to_fileoffset_map *next; /* list pointer */ - unsigned int vma; /* SPU virtual memory address from elf */ - unsigned int size; /* size of section from elf */ - unsigned int offset; /* offset of section into elf file */ - unsigned int guard_ptr; - unsigned int guard_val; - /* - * The guard pointer is an entry in the _ovly_buf_table, - * computed using ovly.buf as the index into the table. Since - * ovly.buf values begin at '1' to reference the first (or 0th) - * entry in the _ovly_buf_table, the computation subtracts 1 - * from ovly.buf. - * The guard value is stored in the _ovly_buf_table entry and - * is an index (starting at 1) back to the _ovly_table entry - * that is pointing at this _ovly_buf_table entry. So, for - * example, for an overlay scenario with one overlay segment - * and two overlay sections: - * - Section 1 points to the first entry of the - * _ovly_buf_table, which contains a guard value - * of '1', referencing the first (index=0) entry of - * _ovly_table. - * - Section 2 points to the second entry of the - * _ovly_buf_table, which contains a guard value - * of '2', referencing the second (index=1) entry of - * _ovly_table. - */ - -}; - -struct spu_buffer { - int last_guard_val; - int ctx_sw_seen; - unsigned long *buff; - unsigned int head, tail; -}; - - -/* The three functions below are for maintaining and accessing - * the vma-to-fileoffset map. - */ -struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, - unsigned long objectid); -unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, - unsigned int vma, const struct spu *aSpu, - int *grd_val); -void vma_map_free(struct vma_to_fileoffset_map *map); - -/* - * Entry point for SPU profiling. - * cycles_reset is the SPU_CYCLES count value specified by the user. - */ -int start_spu_profiling_cycles(unsigned int cycles_reset); -void start_spu_profiling_events(void); - -void stop_spu_profiling_cycles(void); -void stop_spu_profiling_events(void); - -/* add the necessary profiling hooks */ -int spu_sync_start(void); - -/* remove the hooks */ -int spu_sync_stop(void); - -/* Record SPU program counter samples to the oprofile event buffer. */ -void spu_sync_buffer(int spu_num, unsigned int *samples, - int num_samples); - -void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset); - -#endif /* PR_UTIL_H */ diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c deleted file mode 100644 index cdf883445a9f..000000000000 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ /dev/null @@ -1,248 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Authors: Maynard Johnson <maynardj@us.ibm.com> - * Carl Love <carll@us.ibm.com> - */ - -#include <linux/hrtimer.h> -#include <linux/smp.h> -#include <linux/slab.h> -#include <asm/cell-pmu.h> -#include <asm/time.h> -#include "pr_util.h" - -#define SCALE_SHIFT 14 - -static u32 *samples; - -/* spu_prof_running is a flag used to indicate if spu profiling is enabled - * or not. It is set by the routines start_spu_profiling_cycles() and - * start_spu_profiling_events(). The flag is cleared by the routines - * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These - * routines are called via global_start() and global_stop() which are called in - * op_powerpc_start() and op_powerpc_stop(). These routines are called once - * per system as a result of the user starting/stopping oprofile. Hence, only - * one CPU per user at a time will be changing the value of spu_prof_running. - * In general, OProfile does not protect against multiple users trying to run - * OProfile at a time. - */ -int spu_prof_running; -static unsigned int profiling_interval; - -#define NUM_SPU_BITS_TRBUF 16 -#define SPUS_PER_TB_ENTRY 4 - -#define SPU_PC_MASK 0xFFFF - -DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); -static unsigned long oprof_spu_smpl_arry_lck_flags; - -void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) -{ - unsigned long ns_per_cyc; - - if (!freq_khz) - freq_khz = ppc_proc_freq/1000; - - /* To calculate a timeout in nanoseconds, the basic - * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency). - * To avoid floating point math, we use the scale math - * technique as described in linux/jiffies.h. We use - * a scale factor of SCALE_SHIFT, which provides 4 decimal places - * of precision. This is close enough for the purpose at hand. - * - * The value of the timeout should be small enough that the hw - * trace buffer will not get more than about 1/3 full for the - * maximum user specified (the LFSR value) hw sampling frequency. - * This is to ensure the trace buffer will never fill even if the - * kernel thread scheduling varies under a heavy system load. - */ - - ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz; - profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT; - -} - -/* - * Extract SPU PC from trace buffer entry - */ -static void spu_pc_extract(int cpu, int entry) -{ - /* the trace buffer is 128 bits */ - u64 trace_buffer[2]; - u64 spu_mask; - int spu; - - spu_mask = SPU_PC_MASK; - - /* Each SPU PC is 16 bits; hence, four spus in each of - * the two 64-bit buffer entries that make up the - * 128-bit trace_buffer entry. Process two 64-bit values - * simultaneously. - * trace[0] SPU PC contents are: 0 1 2 3 - * trace[1] SPU PC contents are: 4 5 6 7 - */ - - cbe_read_trace_buffer(cpu, trace_buffer); - - for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) { - /* spu PC trace entry is upper 16 bits of the - * 18 bit SPU program counter - */ - samples[spu * TRACE_ARRAY_SIZE + entry] - = (spu_mask & trace_buffer[0]) << 2; - samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry] - = (spu_mask & trace_buffer[1]) << 2; - - trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF; - trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF; - } -} - -static int cell_spu_pc_collection(int cpu) -{ - u32 trace_addr; - int entry; - - /* process the collected SPU PC for the node */ - - entry = 0; - - trace_addr = cbe_read_pm(cpu, trace_address); - while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { - /* there is data in the trace buffer to process */ - spu_pc_extract(cpu, entry); - - entry++; - - if (entry >= TRACE_ARRAY_SIZE) - /* spu_samples is full */ - break; - - trace_addr = cbe_read_pm(cpu, trace_address); - } - - return entry; -} - - -static enum hrtimer_restart profile_spus(struct hrtimer *timer) -{ - ktime_t kt; - int cpu, node, k, num_samples, spu_num; - - if (!spu_prof_running) - goto stop; - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - node = cbe_cpu_to_node(cpu); - - /* There should only be one kernel thread at a time processing - * the samples. In the very unlikely case that the processing - * is taking a very long time and multiple kernel threads are - * started to process the samples. Make sure only one kernel - * thread is working on the samples array at a time. The - * sample array must be loaded and then processed for a given - * cpu. The sample array is not per cpu. - */ - spin_lock_irqsave(&oprof_spu_smpl_arry_lck, - oprof_spu_smpl_arry_lck_flags); - num_samples = cell_spu_pc_collection(cpu); - - if (num_samples == 0) { - spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, - oprof_spu_smpl_arry_lck_flags); - continue; - } - - for (k = 0; k < SPUS_PER_NODE; k++) { - spu_num = k + (node * SPUS_PER_NODE); - spu_sync_buffer(spu_num, - samples + (k * TRACE_ARRAY_SIZE), - num_samples); - } - - spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, - oprof_spu_smpl_arry_lck_flags); - - } - smp_wmb(); /* insure spu event buffer updates are written */ - /* don't want events intermingled... */ - - kt = profiling_interval; - if (!spu_prof_running) - goto stop; - hrtimer_forward(timer, timer->base->get_time(), kt); - return HRTIMER_RESTART; - - stop: - printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n"); - return HRTIMER_NORESTART; -} - -static struct hrtimer timer; -/* - * Entry point for SPU cycle profiling. - * NOTE: SPU profiling is done system-wide, not per-CPU. - * - * cycles_reset is the count value specified by the user when - * setting up OProfile to count SPU_CYCLES. - */ -int start_spu_profiling_cycles(unsigned int cycles_reset) -{ - ktime_t kt; - - pr_debug("timer resolution: %lu\n", TICK_NSEC); - kt = profiling_interval; - hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer_set_expires(&timer, kt); - timer.function = profile_spus; - - /* Allocate arrays for collecting SPU PC samples */ - samples = kcalloc(SPUS_PER_NODE * TRACE_ARRAY_SIZE, sizeof(u32), - GFP_KERNEL); - - if (!samples) - return -ENOMEM; - - spu_prof_running = 1; - hrtimer_start(&timer, kt, HRTIMER_MODE_REL); - schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); - - return 0; -} - -/* - * Entry point for SPU event profiling. - * NOTE: SPU profiling is done system-wide, not per-CPU. - * - * cycles_reset is the count value specified by the user when - * setting up OProfile to count SPU_CYCLES. - */ -void start_spu_profiling_events(void) -{ - spu_prof_running = 1; - schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); - - return; -} - -void stop_spu_profiling_cycles(void) -{ - spu_prof_running = 0; - hrtimer_cancel(&timer); - kfree(samples); - pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n"); -} - -void stop_spu_profiling_events(void) -{ - spu_prof_running = 0; -} diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c deleted file mode 100644 index 489f993100d5..000000000000 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ /dev/null @@ -1,657 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Author: Maynard Johnson <maynardj@us.ibm.com> - */ - -/* The purpose of this file is to handle SPU event task switching - * and to record SPU context information into the OProfile - * event buffer. - * - * Additionally, the spu_sync_buffer function is provided as a helper - * for recoding actual SPU program counter samples to the event buffer. - */ -#include <linux/dcookies.h> -#include <linux/kref.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/module.h> -#include <linux/notifier.h> -#include <linux/numa.h> -#include <linux/oprofile.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include "pr_util.h" - -#define RELEASE_ALL 9999 - -static DEFINE_SPINLOCK(buffer_lock); -static DEFINE_SPINLOCK(cache_lock); -static int num_spu_nodes; -static int spu_prof_num_nodes; - -struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE]; -struct delayed_work spu_work; -static unsigned max_spu_buff; - -static void spu_buff_add(unsigned long int value, int spu) -{ - /* spu buff is a circular buffer. Add entries to the - * head. Head is the index to store the next value. - * The buffer is full when there is one available entry - * in the queue, i.e. head and tail can't be equal. - * That way we can tell the difference between the - * buffer being full versus empty. - * - * ASSUMPTION: the buffer_lock is held when this function - * is called to lock the buffer, head and tail. - */ - int full = 1; - - if (spu_buff[spu].head >= spu_buff[spu].tail) { - if ((spu_buff[spu].head - spu_buff[spu].tail) - < (max_spu_buff - 1)) - full = 0; - - } else if (spu_buff[spu].tail > spu_buff[spu].head) { - if ((spu_buff[spu].tail - spu_buff[spu].head) - > 1) - full = 0; - } - - if (!full) { - spu_buff[spu].buff[spu_buff[spu].head] = value; - spu_buff[spu].head++; - - if (spu_buff[spu].head >= max_spu_buff) - spu_buff[spu].head = 0; - } else { - /* From the user's perspective make the SPU buffer - * size management/overflow look like we are using - * per cpu buffers. The user uses the same - * per cpu parameter to adjust the SPU buffer size. - * Increment the sample_lost_overflow to inform - * the user the buffer size needs to be increased. - */ - oprofile_cpu_buffer_inc_smpl_lost(); - } -} - -/* This function copies the per SPU buffers to the - * OProfile kernel buffer. - */ -static void sync_spu_buff(void) -{ - int spu; - unsigned long flags; - int curr_head; - - for (spu = 0; spu < num_spu_nodes; spu++) { - /* In case there was an issue and the buffer didn't - * get created skip it. - */ - if (spu_buff[spu].buff == NULL) - continue; - - /* Hold the lock to make sure the head/tail - * doesn't change while spu_buff_add() is - * deciding if the buffer is full or not. - * Being a little paranoid. - */ - spin_lock_irqsave(&buffer_lock, flags); - curr_head = spu_buff[spu].head; - spin_unlock_irqrestore(&buffer_lock, flags); - - /* Transfer the current contents to the kernel buffer. - * data can still be added to the head of the buffer. - */ - oprofile_put_buff(spu_buff[spu].buff, - spu_buff[spu].tail, - curr_head, max_spu_buff); - - spin_lock_irqsave(&buffer_lock, flags); - spu_buff[spu].tail = curr_head; - spin_unlock_irqrestore(&buffer_lock, flags); - } - -} - -static void wq_sync_spu_buff(struct work_struct *work) -{ - /* move data from spu buffers to kernel buffer */ - sync_spu_buff(); - - /* only reschedule if profiling is not done */ - if (spu_prof_running) - schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); -} - -/* Container for caching information about an active SPU task. */ -struct cached_info { - struct vma_to_fileoffset_map *map; - struct spu *the_spu; /* needed to access pointer to local_store */ - struct kref cache_ref; -}; - -static struct cached_info *spu_info[MAX_NUMNODES * 8]; - -static void destroy_cached_info(struct kref *kref) -{ - struct cached_info *info; - - info = container_of(kref, struct cached_info, cache_ref); - vma_map_free(info->map); - kfree(info); - module_put(THIS_MODULE); -} - -/* Return the cached_info for the passed SPU number. - * ATTENTION: Callers are responsible for obtaining the - * cache_lock if needed prior to invoking this function. - */ -static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num) -{ - struct kref *ref; - struct cached_info *ret_info; - - if (spu_num >= num_spu_nodes) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Invalid index %d into spu info cache\n", - __func__, __LINE__, spu_num); - ret_info = NULL; - goto out; - } - if (!spu_info[spu_num] && the_spu) { - ref = spu_get_profile_private_kref(the_spu->ctx); - if (ref) { - spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref); - kref_get(&spu_info[spu_num]->cache_ref); - } - } - - ret_info = spu_info[spu_num]; - out: - return ret_info; -} - - -/* Looks for cached info for the passed spu. If not found, the - * cached info is created for the passed spu. - * Returns 0 for success; otherwise, -1 for error. - */ -static int -prepare_cached_spu_info(struct spu *spu, unsigned long objectId) -{ - unsigned long flags; - struct vma_to_fileoffset_map *new_map; - int retval = 0; - struct cached_info *info; - - /* We won't bother getting cache_lock here since - * don't do anything with the cached_info that's returned. - */ - info = get_cached_info(spu, spu->number); - - if (info) { - pr_debug("Found cached SPU info.\n"); - goto out; - } - - /* Create cached_info and set spu_info[spu->number] to point to it. - * spu->number is a system-wide value, not a per-node value. - */ - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: create vma_map failed\n", - __func__, __LINE__); - retval = -ENOMEM; - goto err_alloc; - } - new_map = create_vma_map(spu, objectId); - if (!new_map) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: create vma_map failed\n", - __func__, __LINE__); - retval = -ENOMEM; - goto err_alloc; - } - - pr_debug("Created vma_map\n"); - info->map = new_map; - info->the_spu = spu; - kref_init(&info->cache_ref); - spin_lock_irqsave(&cache_lock, flags); - spu_info[spu->number] = info; - /* Increment count before passing off ref to SPUFS. */ - kref_get(&info->cache_ref); - - /* We increment the module refcount here since SPUFS is - * responsible for the final destruction of the cached_info, - * and it must be able to access the destroy_cached_info() - * function defined in the OProfile module. We decrement - * the module refcount in destroy_cached_info. - */ - try_module_get(THIS_MODULE); - spu_set_profile_private_kref(spu->ctx, &info->cache_ref, - destroy_cached_info); - spin_unlock_irqrestore(&cache_lock, flags); - goto out; - -err_alloc: - kfree(info); -out: - return retval; -} - -/* - * NOTE: The caller is responsible for locking the - * cache_lock prior to calling this function. - */ -static int release_cached_info(int spu_index) -{ - int index, end; - - if (spu_index == RELEASE_ALL) { - end = num_spu_nodes; - index = 0; - } else { - if (spu_index >= num_spu_nodes) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: " - "Invalid index %d into spu info cache\n", - __func__, __LINE__, spu_index); - goto out; - } - end = spu_index + 1; - index = spu_index; - } - for (; index < end; index++) { - if (spu_info[index]) { - kref_put(&spu_info[index]->cache_ref, - destroy_cached_info); - spu_info[index] = NULL; - } - } - -out: - return 0; -} - -/* The source code for fast_get_dcookie was "borrowed" - * from drivers/oprofile/buffer_sync.c. - */ - -/* Optimisation. We can manage without taking the dcookie sem - * because we cannot reach this code without at least one - * dcookie user still being registered (namely, the reader - * of the event buffer). - */ -static inline unsigned long fast_get_dcookie(const struct path *path) -{ - unsigned long cookie; - - if (path->dentry->d_flags & DCACHE_COOKIE) - return (unsigned long)path->dentry; - get_dcookie(path, &cookie); - return cookie; -} - -/* Look up the dcookie for the task's mm->exe_file, - * which corresponds loosely to "application name". Also, determine - * the offset for the SPU ELF object. If computed offset is - * non-zero, it implies an embedded SPU object; otherwise, it's a - * separate SPU binary, in which case we retrieve it's dcookie. - * For the embedded case, we must determine if SPU ELF is embedded - * in the executable application or another file (i.e., shared lib). - * If embedded in a shared lib, we must get the dcookie and return - * that to the caller. - */ -static unsigned long -get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, - unsigned long *spu_bin_dcookie, - unsigned long spu_ref) -{ - unsigned long app_cookie = 0; - unsigned int my_offset = 0; - struct vm_area_struct *vma; - struct file *exe_file; - struct mm_struct *mm = spu->mm; - - if (!mm) - goto out; - - exe_file = get_mm_exe_file(mm); - if (exe_file) { - app_cookie = fast_get_dcookie(&exe_file->f_path); - pr_debug("got dcookie for %pD\n", exe_file); - fput(exe_file); - } - - mmap_read_lock(mm); - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref) - continue; - my_offset = spu_ref - vma->vm_start; - if (!vma->vm_file) - goto fail_no_image_cookie; - - pr_debug("Found spu ELF at %X(object-id:%lx) for file %pD\n", - my_offset, spu_ref, vma->vm_file); - *offsetp = my_offset; - break; - } - - *spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path); - pr_debug("got dcookie for %pD\n", vma->vm_file); - - mmap_read_unlock(mm); - -out: - return app_cookie; - -fail_no_image_cookie: - mmap_read_unlock(mm); - - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Cannot find dcookie for SPU binary\n", - __func__, __LINE__); - goto out; -} - - - -/* This function finds or creates cached context information for the - * passed SPU and records SPU context information into the OProfile - * event buffer. - */ -static int process_context_switch(struct spu *spu, unsigned long objectId) -{ - unsigned long flags; - int retval; - unsigned int offset = 0; - unsigned long spu_cookie = 0, app_dcookie; - - retval = prepare_cached_spu_info(spu, objectId); - if (retval) - goto out; - - /* Get dcookie first because a mutex_lock is taken in that - * code path, so interrupts must not be disabled. - */ - app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId); - if (!app_dcookie || !spu_cookie) { - retval = -ENOENT; - goto out; - } - - /* Record context info in event buffer */ - spin_lock_irqsave(&buffer_lock, flags); - spu_buff_add(ESCAPE_CODE, spu->number); - spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number); - spu_buff_add(spu->number, spu->number); - spu_buff_add(spu->pid, spu->number); - spu_buff_add(spu->tgid, spu->number); - spu_buff_add(app_dcookie, spu->number); - spu_buff_add(spu_cookie, spu->number); - spu_buff_add(offset, spu->number); - - /* Set flag to indicate SPU PC data can now be written out. If - * the SPU program counter data is seen before an SPU context - * record is seen, the postprocessing will fail. - */ - spu_buff[spu->number].ctx_sw_seen = 1; - - spin_unlock_irqrestore(&buffer_lock, flags); - smp_wmb(); /* insure spu event buffer updates are written */ - /* don't want entries intermingled... */ -out: - return retval; -} - -/* - * This function is invoked on either a bind_context or unbind_context. - * If called for an unbind_context, the val arg is 0; otherwise, - * it is the object-id value for the spu context. - * The data arg is of type 'struct spu *'. - */ -static int spu_active_notify(struct notifier_block *self, unsigned long val, - void *data) -{ - int retval; - unsigned long flags; - struct spu *the_spu = data; - - pr_debug("SPU event notification arrived\n"); - if (!val) { - spin_lock_irqsave(&cache_lock, flags); - retval = release_cached_info(the_spu->number); - spin_unlock_irqrestore(&cache_lock, flags); - } else { - retval = process_context_switch(the_spu, val); - } - return retval; -} - -static struct notifier_block spu_active = { - .notifier_call = spu_active_notify, -}; - -static int number_of_online_nodes(void) -{ - u32 cpu; u32 tmp; - int nodes = 0; - for_each_online_cpu(cpu) { - tmp = cbe_cpu_to_node(cpu) + 1; - if (tmp > nodes) - nodes++; - } - return nodes; -} - -static int oprofile_spu_buff_create(void) -{ - int spu; - - max_spu_buff = oprofile_get_cpu_buffer_size(); - - for (spu = 0; spu < num_spu_nodes; spu++) { - /* create circular buffers to store the data in. - * use locks to manage accessing the buffers - */ - spu_buff[spu].head = 0; - spu_buff[spu].tail = 0; - - /* - * Create a buffer for each SPU. Can't reliably - * create a single buffer for all spus due to not - * enough contiguous kernel memory. - */ - - spu_buff[spu].buff = kzalloc((max_spu_buff - * sizeof(unsigned long)), - GFP_KERNEL); - - if (!spu_buff[spu].buff) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: oprofile_spu_buff_create " - "failed to allocate spu buffer %d.\n", - __func__, __LINE__, spu); - - /* release the spu buffers that have been allocated */ - while (spu >= 0) { - kfree(spu_buff[spu].buff); - spu_buff[spu].buff = 0; - spu--; - } - return -ENOMEM; - } - } - return 0; -} - -/* The main purpose of this function is to synchronize - * OProfile with SPUFS by registering to be notified of - * SPU task switches. - * - * NOTE: When profiling SPUs, we must ensure that only - * spu_sync_start is invoked and not the generic sync_start - * in drivers/oprofile/oprof.c. A return value of - * SKIP_GENERIC_SYNC or SYNC_START_ERROR will - * accomplish this. - */ -int spu_sync_start(void) -{ - int spu; - int ret = SKIP_GENERIC_SYNC; - int register_ret; - unsigned long flags = 0; - - spu_prof_num_nodes = number_of_online_nodes(); - num_spu_nodes = spu_prof_num_nodes * 8; - INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff); - - /* create buffer for storing the SPU data to put in - * the kernel buffer. - */ - ret = oprofile_spu_buff_create(); - if (ret) - goto out; - - spin_lock_irqsave(&buffer_lock, flags); - for (spu = 0; spu < num_spu_nodes; spu++) { - spu_buff_add(ESCAPE_CODE, spu); - spu_buff_add(SPU_PROFILING_CODE, spu); - spu_buff_add(num_spu_nodes, spu); - } - spin_unlock_irqrestore(&buffer_lock, flags); - - for (spu = 0; spu < num_spu_nodes; spu++) { - spu_buff[spu].ctx_sw_seen = 0; - spu_buff[spu].last_guard_val = 0; - } - - /* Register for SPU events */ - register_ret = spu_switch_event_register(&spu_active); - if (register_ret) { - ret = SYNC_START_ERROR; - goto out; - } - - pr_debug("spu_sync_start -- running.\n"); -out: - return ret; -} - -/* Record SPU program counter samples to the oprofile event buffer. */ -void spu_sync_buffer(int spu_num, unsigned int *samples, - int num_samples) -{ - unsigned long long file_offset; - unsigned long flags; - int i; - struct vma_to_fileoffset_map *map; - struct spu *the_spu; - unsigned long long spu_num_ll = spu_num; - unsigned long long spu_num_shifted = spu_num_ll << 32; - struct cached_info *c_info; - - /* We need to obtain the cache_lock here because it's - * possible that after getting the cached_info, the SPU job - * corresponding to this cached_info may end, thus resulting - * in the destruction of the cached_info. - */ - spin_lock_irqsave(&cache_lock, flags); - c_info = get_cached_info(NULL, spu_num); - if (!c_info) { - /* This legitimately happens when the SPU task ends before all - * samples are recorded. - * No big deal -- so we just drop a few samples. - */ - pr_debug("SPU_PROF: No cached SPU context " - "for SPU #%d. Dropping samples.\n", spu_num); - goto out; - } - - map = c_info->map; - the_spu = c_info->the_spu; - spin_lock(&buffer_lock); - for (i = 0; i < num_samples; i++) { - unsigned int sample = *(samples+i); - int grd_val = 0; - file_offset = 0; - if (sample == 0) - continue; - file_offset = vma_map_lookup( map, sample, the_spu, &grd_val); - - /* If overlays are used by this SPU application, the guard - * value is non-zero, indicating which overlay section is in - * use. We need to discard samples taken during the time - * period which an overlay occurs (i.e., guard value changes). - */ - if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) { - spu_buff[spu_num].last_guard_val = grd_val; - /* Drop the rest of the samples. */ - break; - } - - /* We must ensure that the SPU context switch has been written - * out before samples for the SPU. Otherwise, the SPU context - * information is not available and the postprocessing of the - * SPU PC will fail with no available anonymous map information. - */ - if (spu_buff[spu_num].ctx_sw_seen) - spu_buff_add((file_offset | spu_num_shifted), - spu_num); - } - spin_unlock(&buffer_lock); -out: - spin_unlock_irqrestore(&cache_lock, flags); -} - - -int spu_sync_stop(void) -{ - unsigned long flags = 0; - int ret; - int k; - - ret = spu_switch_event_unregister(&spu_active); - - if (ret) - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: spu_switch_event_unregister " \ - "returned %d\n", - __func__, __LINE__, ret); - - /* flush any remaining data in the per SPU buffers */ - sync_spu_buff(); - - spin_lock_irqsave(&cache_lock, flags); - ret = release_cached_info(RELEASE_ALL); - spin_unlock_irqrestore(&cache_lock, flags); - - /* remove scheduled work queue item rather then waiting - * for every queued entry to execute. Then flush pending - * system wide buffer to event buffer. - */ - cancel_delayed_work(&spu_work); - - for (k = 0; k < num_spu_nodes; k++) { - spu_buff[k].ctx_sw_seen = 0; - - /* - * spu_sys_buff will be null if there was a problem - * allocating the buffer. Only delete if it exists. - */ - kfree(spu_buff[k].buff); - spu_buff[k].buff = 0; - } - pr_debug("spu_sync_stop -- done.\n"); - return ret; -} - diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c deleted file mode 100644 index 7c4b19cfde88..000000000000 --- a/arch/powerpc/oprofile/cell/vma_map.c +++ /dev/null @@ -1,279 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Author: Maynard Johnson <maynardj@us.ibm.com> - */ - -/* The code in this source file is responsible for generating - * vma-to-fileOffset maps for both overlay and non-overlay SPU - * applications. - */ - -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/uaccess.h> -#include <linux/elf.h> -#include <linux/slab.h> -#include "pr_util.h" - - -void vma_map_free(struct vma_to_fileoffset_map *map) -{ - while (map) { - struct vma_to_fileoffset_map *next = map->next; - kfree(map); - map = next; - } -} - -unsigned int -vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma, - const struct spu *aSpu, int *grd_val) -{ - /* - * Default the offset to the physical address + a flag value. - * Addresses of dynamically generated code can't be found in the vma - * map. For those addresses the flagged value will be sent on to - * the user space tools so they can be reported rather than just - * thrown away. - */ - u32 offset = 0x10000000 + vma; - u32 ovly_grd; - - for (; map; map = map->next) { - if (vma < map->vma || vma >= map->vma + map->size) - continue; - - if (map->guard_ptr) { - ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr); - if (ovly_grd != map->guard_val) - continue; - *grd_val = ovly_grd; - } - offset = vma - map->vma + map->offset; - break; - } - - return offset; -} - -static struct vma_to_fileoffset_map * -vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma, - unsigned int size, unsigned int offset, unsigned int guard_ptr, - unsigned int guard_val) -{ - struct vma_to_fileoffset_map *new = kzalloc(sizeof(*new), GFP_KERNEL); - - if (!new) { - printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n", - __func__, __LINE__); - vma_map_free(map); - return NULL; - } - - new->next = map; - new->vma = vma; - new->size = size; - new->offset = offset; - new->guard_ptr = guard_ptr; - new->guard_val = guard_val; - - return new; -} - - -/* Parse SPE ELF header and generate a list of vma_maps. - * A pointer to the first vma_map in the generated list - * of vma_maps is returned. */ -struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu, - unsigned long __spu_elf_start) -{ - static const unsigned char expected[EI_PAD] = { - [EI_MAG0] = ELFMAG0, - [EI_MAG1] = ELFMAG1, - [EI_MAG2] = ELFMAG2, - [EI_MAG3] = ELFMAG3, - [EI_CLASS] = ELFCLASS32, - [EI_DATA] = ELFDATA2MSB, - [EI_VERSION] = EV_CURRENT, - [EI_OSABI] = ELFOSABI_NONE - }; - - int grd_val; - struct vma_to_fileoffset_map *map = NULL; - void __user *spu_elf_start = (void __user *)__spu_elf_start; - struct spu_overlay_info ovly; - unsigned int overlay_tbl_offset = -1; - Elf32_Phdr __user *phdr_start; - Elf32_Shdr __user *shdr_start; - Elf32_Ehdr ehdr; - Elf32_Phdr phdr; - Elf32_Shdr shdr, shdr_str; - Elf32_Sym sym; - int i, j; - char name[32]; - - unsigned int ovly_table_sym = 0; - unsigned int ovly_buf_table_sym = 0; - unsigned int ovly_table_end_sym = 0; - unsigned int ovly_buf_table_end_sym = 0; - struct spu_overlay_info __user *ovly_table; - unsigned int n_ovlys; - - /* Get and validate ELF header. */ - - if (copy_from_user(&ehdr, spu_elf_start, sizeof (ehdr))) - goto fail; - - if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Unexpected e_ident parsing SPU ELF\n", - __func__, __LINE__); - goto fail; - } - if (ehdr.e_machine != EM_SPU) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Unexpected e_machine parsing SPU ELF\n", - __func__, __LINE__); - goto fail; - } - if (ehdr.e_type != ET_EXEC) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Unexpected e_type parsing SPU ELF\n", - __func__, __LINE__); - goto fail; - } - phdr_start = spu_elf_start + ehdr.e_phoff; - shdr_start = spu_elf_start + ehdr.e_shoff; - - /* Traverse program headers. */ - for (i = 0; i < ehdr.e_phnum; i++) { - if (copy_from_user(&phdr, phdr_start + i, sizeof(phdr))) - goto fail; - - if (phdr.p_type != PT_LOAD) - continue; - if (phdr.p_flags & (1 << 27)) - continue; - - map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz, - phdr.p_offset, 0, 0); - if (!map) - goto fail; - } - - pr_debug("SPU_PROF: Created non-overlay maps\n"); - /* Traverse section table and search for overlay-related symbols. */ - for (i = 0; i < ehdr.e_shnum; i++) { - if (copy_from_user(&shdr, shdr_start + i, sizeof(shdr))) - goto fail; - - if (shdr.sh_type != SHT_SYMTAB) - continue; - if (shdr.sh_entsize != sizeof (sym)) - continue; - - if (copy_from_user(&shdr_str, - shdr_start + shdr.sh_link, - sizeof(shdr))) - goto fail; - - if (shdr_str.sh_type != SHT_STRTAB) - goto fail; - - for (j = 0; j < shdr.sh_size / sizeof (sym); j++) { - if (copy_from_user(&sym, spu_elf_start + - shdr.sh_offset + - j * sizeof (sym), - sizeof (sym))) - goto fail; - - if (copy_from_user(name, - spu_elf_start + shdr_str.sh_offset + - sym.st_name, - 20)) - goto fail; - - if (memcmp(name, "_ovly_table", 12) == 0) - ovly_table_sym = sym.st_value; - if (memcmp(name, "_ovly_buf_table", 16) == 0) - ovly_buf_table_sym = sym.st_value; - if (memcmp(name, "_ovly_table_end", 16) == 0) - ovly_table_end_sym = sym.st_value; - if (memcmp(name, "_ovly_buf_table_end", 20) == 0) - ovly_buf_table_end_sym = sym.st_value; - } - } - - /* If we don't have overlays, we're done. */ - if (ovly_table_sym == 0 || ovly_buf_table_sym == 0 - || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) { - pr_debug("SPU_PROF: No overlay table found\n"); - goto out; - } else { - pr_debug("SPU_PROF: Overlay table found\n"); - } - - /* The _ovly_table symbol represents a table with one entry - * per overlay section. The _ovly_buf_table symbol represents - * a table with one entry per overlay region. - * The struct spu_overlay_info gives the structure of the _ovly_table - * entries. The structure of _ovly_table_buf is simply one - * u32 word per entry. - */ - overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym, - aSpu, &grd_val); - if (overlay_tbl_offset > 0x10000000) { - printk(KERN_ERR "SPU_PROF: " - "%s, line %d: Error finding SPU overlay table\n", - __func__, __LINE__); - goto fail; - } - ovly_table = spu_elf_start + overlay_tbl_offset; - - n_ovlys = (ovly_table_end_sym - - ovly_table_sym) / sizeof (ovly); - - /* Traverse overlay table. */ - for (i = 0; i < n_ovlys; i++) { - if (copy_from_user(&ovly, ovly_table + i, sizeof (ovly))) - goto fail; - - /* The ovly.vma/size/offset arguments are analogous to the same - * arguments used above for non-overlay maps. The final two - * args are referred to as the guard pointer and the guard - * value. - * The guard pointer is an entry in the _ovly_buf_table, - * computed using ovly.buf as the index into the table. Since - * ovly.buf values begin at '1' to reference the first (or 0th) - * entry in the _ovly_buf_table, the computation subtracts 1 - * from ovly.buf. - * The guard value is stored in the _ovly_buf_table entry and - * is an index (starting at 1) back to the _ovly_table entry - * that is pointing at this _ovly_buf_table entry. So, for - * example, for an overlay scenario with one overlay segment - * and two overlay sections: - * - Section 1 points to the first entry of the - * _ovly_buf_table, which contains a guard value - * of '1', referencing the first (index=0) entry of - * _ovly_table. - * - Section 2 points to the second entry of the - * _ovly_buf_table, which contains a guard value - * of '2', referencing the second (index=1) entry of - * _ovly_table. - */ - map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset, - ovly_buf_table_sym + (ovly.buf-1) * 4, i+1); - if (!map) - goto fail; - } - goto out; - - fail: - map = NULL; - out: - return map; -} diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c deleted file mode 100644 index 0fb528c2b3a1..000000000000 --- a/arch/powerpc/oprofile/common.c +++ /dev/null @@ -1,243 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * PPC 64 oprofile support: - * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM - * PPC 32 oprofile support: (based on PPC 64 support) - * Copyright (C) Freescale Semiconductor, Inc 2004 - * Author: Andy Fleming - * - * Based on alpha version. - */ - -#include <linux/oprofile.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/errno.h> -#include <asm/ptrace.h> -#include <asm/pmc.h> -#include <asm/cputable.h> -#include <asm/oprofile_impl.h> -#include <asm/firmware.h> - -static struct op_powerpc_model *model; - -static struct op_counter_config ctr[OP_MAX_COUNTER]; -static struct op_system_config sys; - -static int op_per_cpu_rc; - -static void op_handle_interrupt(struct pt_regs *regs) -{ - model->handle_interrupt(regs, ctr); -} - -static void op_powerpc_cpu_setup(void *dummy) -{ - int ret; - - ret = model->cpu_setup(ctr); - - if (ret != 0) - op_per_cpu_rc = ret; -} - -static int op_powerpc_setup(void) -{ - int err; - - op_per_cpu_rc = 0; - - /* Grab the hardware */ - err = reserve_pmc_hardware(op_handle_interrupt); - if (err) - return err; - - /* Pre-compute the values to stuff in the hardware registers. */ - op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters); - - if (op_per_cpu_rc) - goto out; - - /* Configure the registers on all cpus. If an error occurs on one - * of the cpus, op_per_cpu_rc will be set to the error */ - on_each_cpu(op_powerpc_cpu_setup, NULL, 1); - -out: if (op_per_cpu_rc) { - /* error on setup release the performance counter hardware */ - release_pmc_hardware(); - } - - return op_per_cpu_rc; -} - -static void op_powerpc_shutdown(void) -{ - release_pmc_hardware(); -} - -static void op_powerpc_cpu_start(void *dummy) -{ - /* If any of the cpus have return an error, set the - * global flag to the error so it can be returned - * to the generic OProfile caller. - */ - int ret; - - ret = model->start(ctr); - if (ret != 0) - op_per_cpu_rc = ret; -} - -static int op_powerpc_start(void) -{ - op_per_cpu_rc = 0; - - if (model->global_start) - return model->global_start(ctr); - if (model->start) { - on_each_cpu(op_powerpc_cpu_start, NULL, 1); - return op_per_cpu_rc; - } - return -EIO; /* No start function is defined for this - power architecture */ -} - -static inline void op_powerpc_cpu_stop(void *dummy) -{ - model->stop(); -} - -static void op_powerpc_stop(void) -{ - if (model->stop) - on_each_cpu(op_powerpc_cpu_stop, NULL, 1); - if (model->global_stop) - model->global_stop(); -} - -static int op_powerpc_create_files(struct dentry *root) -{ - int i; - -#ifdef CONFIG_PPC64 - /* - * There is one mmcr0, mmcr1 and mmcra for setting the events for - * all of the counters. - */ - oprofilefs_create_ulong(root, "mmcr0", &sys.mmcr0); - oprofilefs_create_ulong(root, "mmcr1", &sys.mmcr1); - oprofilefs_create_ulong(root, "mmcra", &sys.mmcra); -#ifdef CONFIG_OPROFILE_CELL - /* create a file the user tool can check to see what level of profiling - * support exits with this kernel. Initialize bit mask to indicate - * what support the kernel has: - * bit 0 - Supports SPU event profiling in addition to PPU - * event and cycles; and SPU cycle profiling - * bits 1-31 - Currently unused. - * - * If the file does not exist, then the kernel only supports SPU - * cycle profiling, PPU event and cycle profiling. - */ - oprofilefs_create_ulong(root, "cell_support", &sys.cell_support); - sys.cell_support = 0x1; /* Note, the user OProfile tool must check - * that this bit is set before attempting to - * user SPU event profiling. Older kernels - * will not have this file, hence the user - * tool is not allowed to do SPU event - * profiling on older kernels. Older kernels - * will accept SPU events but collected data - * is garbage. - */ -#endif -#endif - - for (i = 0; i < model->num_counters; ++i) { - struct dentry *dir; - char buf[4]; - - snprintf(buf, sizeof buf, "%d", i); - dir = oprofilefs_mkdir(root, buf); - - oprofilefs_create_ulong(dir, "enabled", &ctr[i].enabled); - oprofilefs_create_ulong(dir, "event", &ctr[i].event); - oprofilefs_create_ulong(dir, "count", &ctr[i].count); - - /* - * Classic PowerPC doesn't support per-counter - * control like this, but the options are - * expected, so they remain. For Freescale - * Book-E style performance monitors, we do - * support them. - */ - oprofilefs_create_ulong(dir, "kernel", &ctr[i].kernel); - oprofilefs_create_ulong(dir, "user", &ctr[i].user); - - oprofilefs_create_ulong(dir, "unit_mask", &ctr[i].unit_mask); - } - - oprofilefs_create_ulong(root, "enable_kernel", &sys.enable_kernel); - oprofilefs_create_ulong(root, "enable_user", &sys.enable_user); - - /* Default to tracing both kernel and user */ - sys.enable_kernel = 1; - sys.enable_user = 1; - - return 0; -} - -int __init oprofile_arch_init(struct oprofile_operations *ops) -{ - if (!cur_cpu_spec->oprofile_cpu_type) - return -ENODEV; - - switch (cur_cpu_spec->oprofile_type) { -#ifdef CONFIG_PPC_BOOK3S_64 -#ifdef CONFIG_OPROFILE_CELL - case PPC_OPROFILE_CELL: - if (firmware_has_feature(FW_FEATURE_LPAR)) - return -ENODEV; - model = &op_model_cell; - ops->sync_start = model->sync_start; - ops->sync_stop = model->sync_stop; - break; -#endif - case PPC_OPROFILE_POWER4: - model = &op_model_power4; - break; - case PPC_OPROFILE_PA6T: - model = &op_model_pa6t; - break; -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - case PPC_OPROFILE_G4: - model = &op_model_7450; - break; -#endif -#if defined(CONFIG_FSL_EMB_PERFMON) - case PPC_OPROFILE_FSL_EMB: - model = &op_model_fsl_emb; - break; -#endif - default: - return -ENODEV; - } - - model->num_counters = cur_cpu_spec->num_pmcs; - - ops->cpu_type = cur_cpu_spec->oprofile_cpu_type; - ops->create_files = op_powerpc_create_files; - ops->setup = op_powerpc_setup; - ops->shutdown = op_powerpc_shutdown; - ops->start = op_powerpc_start; - ops->stop = op_powerpc_stop; - ops->backtrace = op_powerpc_backtrace; - - printk(KERN_DEBUG "oprofile: using %s performance monitoring.\n", - ops->cpu_type); - - return 0; -} - -void oprofile_arch_exit(void) -{ -} diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c deleted file mode 100644 index 5ebc25188a72..000000000000 --- a/arch/powerpc/oprofile/op_model_7450.c +++ /dev/null @@ -1,207 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * arch/powerpc/oprofile/op_model_7450.c - * - * Freescale 745x/744x oprofile support, based on fsl_booke support - * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM - * - * Copyright (c) 2004 Freescale Semiconductor, Inc - * - * Author: Andy Fleming - * Maintainer: Kumar Gala <galak@kernel.crashing.org> - */ - -#include <linux/oprofile.h> -#include <linux/smp.h> -#include <asm/ptrace.h> -#include <asm/processor.h> -#include <asm/cputable.h> -#include <asm/page.h> -#include <asm/pmc.h> -#include <asm/oprofile_impl.h> - -static unsigned long reset_value[OP_MAX_COUNTER]; - -static int oprofile_running; -static u32 mmcr0_val, mmcr1_val, mmcr2_val, num_pmcs; - -#define MMCR0_PMC1_SHIFT 6 -#define MMCR0_PMC2_SHIFT 0 -#define MMCR1_PMC3_SHIFT 27 -#define MMCR1_PMC4_SHIFT 22 -#define MMCR1_PMC5_SHIFT 17 -#define MMCR1_PMC6_SHIFT 11 - -#define mmcr0_event1(event) \ - ((event << MMCR0_PMC1_SHIFT) & MMCR0_PMC1SEL) -#define mmcr0_event2(event) \ - ((event << MMCR0_PMC2_SHIFT) & MMCR0_PMC2SEL) - -#define mmcr1_event3(event) \ - ((event << MMCR1_PMC3_SHIFT) & MMCR1_PMC3SEL) -#define mmcr1_event4(event) \ - ((event << MMCR1_PMC4_SHIFT) & MMCR1_PMC4SEL) -#define mmcr1_event5(event) \ - ((event << MMCR1_PMC5_SHIFT) & MMCR1_PMC5SEL) -#define mmcr1_event6(event) \ - ((event << MMCR1_PMC6_SHIFT) & MMCR1_PMC6SEL) - -#define MMCR0_INIT (MMCR0_FC | MMCR0_FCS | MMCR0_FCP | MMCR0_FCM1 | MMCR0_FCM0) - -/* Unfreezes the counters on this CPU, enables the interrupt, - * enables the counters to trigger the interrupt, and sets the - * counters to only count when the mark bit is not set. - */ -static void pmc_start_ctrs(void) -{ - u32 mmcr0 = mfspr(SPRN_MMCR0); - - mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0); - mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE); - - mtspr(SPRN_MMCR0, mmcr0); -} - -/* Disables the counters on this CPU, and freezes them */ -static void pmc_stop_ctrs(void) -{ - u32 mmcr0 = mfspr(SPRN_MMCR0); - - mmcr0 |= MMCR0_FC; - mmcr0 &= ~(MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE); - - mtspr(SPRN_MMCR0, mmcr0); -} - -/* Configures the counters on this CPU based on the global - * settings */ -static int fsl7450_cpu_setup(struct op_counter_config *ctr) -{ - /* freeze all counters */ - pmc_stop_ctrs(); - - mtspr(SPRN_MMCR0, mmcr0_val); - mtspr(SPRN_MMCR1, mmcr1_val); - if (num_pmcs > 4) - mtspr(SPRN_MMCR2, mmcr2_val); - - return 0; -} - -/* Configures the global settings for the countes on all CPUs. */ -static int fsl7450_reg_setup(struct op_counter_config *ctr, - struct op_system_config *sys, - int num_ctrs) -{ - int i; - - num_pmcs = num_ctrs; - /* Our counters count up, and "count" refers to - * how much before the next interrupt, and we interrupt - * on overflow. So we calculate the starting value - * which will give us "count" until overflow. - * Then we set the events on the enabled counters */ - for (i = 0; i < num_ctrs; ++i) - reset_value[i] = 0x80000000UL - ctr[i].count; - - /* Set events for Counters 1 & 2 */ - mmcr0_val = MMCR0_INIT | mmcr0_event1(ctr[0].event) - | mmcr0_event2(ctr[1].event); - - /* Setup user/kernel bits */ - if (sys->enable_kernel) - mmcr0_val &= ~(MMCR0_FCS); - - if (sys->enable_user) - mmcr0_val &= ~(MMCR0_FCP); - - /* Set events for Counters 3-6 */ - mmcr1_val = mmcr1_event3(ctr[2].event) - | mmcr1_event4(ctr[3].event); - if (num_ctrs > 4) - mmcr1_val |= mmcr1_event5(ctr[4].event) - | mmcr1_event6(ctr[5].event); - - mmcr2_val = 0; - - return 0; -} - -/* Sets the counters on this CPU to the chosen values, and starts them */ -static int fsl7450_start(struct op_counter_config *ctr) -{ - int i; - - mtmsr(mfmsr() | MSR_PMM); - - for (i = 0; i < num_pmcs; ++i) { - if (ctr[i].enabled) - classic_ctr_write(i, reset_value[i]); - else - classic_ctr_write(i, 0); - } - - /* Clear the freeze bit, and enable the interrupt. - * The counters won't actually start until the rfi clears - * the PMM bit */ - pmc_start_ctrs(); - - oprofile_running = 1; - - return 0; -} - -/* Stop the counters on this CPU */ -static void fsl7450_stop(void) -{ - /* freeze counters */ - pmc_stop_ctrs(); - - oprofile_running = 0; - - mb(); -} - - -/* Handle the interrupt on this CPU, and log a sample for each - * event that triggered the interrupt */ -static void fsl7450_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - unsigned long pc; - int is_kernel; - int val; - int i; - - /* set the PMM bit (see comment below) */ - mtmsr(mfmsr() | MSR_PMM); - - pc = mfspr(SPRN_SIAR); - is_kernel = is_kernel_addr(pc); - - for (i = 0; i < num_pmcs; ++i) { - val = classic_ctr_read(i); - if (val < 0) { - if (oprofile_running && ctr[i].enabled) { - oprofile_add_ext_sample(pc, regs, i, is_kernel); - classic_ctr_write(i, reset_value[i]); - } else { - classic_ctr_write(i, 0); - } - } - } - - /* The freeze bit was set by the interrupt. */ - /* Clear the freeze bit, and reenable the interrupt. - * The counters won't actually start until the rfi clears - * the PM/M bit */ - pmc_start_ctrs(); -} - -struct op_powerpc_model op_model_7450= { - .reg_setup = fsl7450_reg_setup, - .cpu_setup = fsl7450_cpu_setup, - .start = fsl7450_start, - .stop = fsl7450_stop, - .handle_interrupt = fsl7450_handle_interrupt, -}; diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c deleted file mode 100644 index 7eb73070b7be..000000000000 --- a/arch/powerpc/oprofile/op_model_cell.c +++ /dev/null @@ -1,1709 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cell Broadband Engine OProfile Support - * - * (C) Copyright IBM Corporation 2006 - * - * Author: David Erb (djerb@us.ibm.com) - * Modifications: - * Carl Love <carll@us.ibm.com> - * Maynard Johnson <maynardj@us.ibm.com> - */ - -#include <linux/cpufreq.h> -#include <linux/delay.h> -#include <linux/jiffies.h> -#include <linux/kthread.h> -#include <linux/oprofile.h> -#include <linux/percpu.h> -#include <linux/smp.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <asm/cell-pmu.h> -#include <asm/cputable.h> -#include <asm/firmware.h> -#include <asm/io.h> -#include <asm/oprofile_impl.h> -#include <asm/processor.h> -#include <asm/prom.h> -#include <asm/ptrace.h> -#include <asm/reg.h> -#include <asm/rtas.h> -#include <asm/cell-regs.h> - -#include "../platforms/cell/interrupt.h" -#include "cell/pr_util.h" - -#define PPU_PROFILING 0 -#define SPU_PROFILING_CYCLES 1 -#define SPU_PROFILING_EVENTS 2 - -#define SPU_EVENT_NUM_START 4100 -#define SPU_EVENT_NUM_STOP 4399 -#define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */ -#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */ -#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */ - -#define NUM_SPUS_PER_NODE 8 -#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ - -#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ -#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying - * PPU_CYCLES event - */ -#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ - -#define NUM_THREADS 2 /* number of physical threads in - * physical processor - */ -#define NUM_DEBUG_BUS_WORDS 4 -#define NUM_INPUT_BUS_WORDS 2 - -#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ - -/* Minimum HW interval timer setting to send value to trace buffer is 10 cycle. - * To configure counter to send value every N cycles set counter to - * 2^32 - 1 - N. - */ -#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10 - -/* - * spu_cycle_reset is the number of cycles between samples. - * This variable is used for SPU profiling and should ONLY be set - * at the beginning of cell_reg_setup; otherwise, it's read-only. - */ -static unsigned int spu_cycle_reset; -static unsigned int profiling_mode; -static int spu_evnt_phys_spu_indx; - -struct pmc_cntrl_data { - unsigned long vcntr; - unsigned long evnts; - unsigned long masks; - unsigned long enabled; -}; - -/* - * ibm,cbe-perftools rtas parameters - */ -struct pm_signal { - u16 cpu; /* Processor to modify */ - u16 sub_unit; /* hw subunit this applies to (if applicable)*/ - short int signal_group; /* Signal Group to Enable/Disable */ - u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event - * Bus Word(s) (bitmask) - */ - u8 bit; /* Trigger/Event bit (if applicable) */ -}; - -/* - * rtas call arguments - */ -enum { - SUBFUNC_RESET = 1, - SUBFUNC_ACTIVATE = 2, - SUBFUNC_DEACTIVATE = 3, - - PASSTHRU_IGNORE = 0, - PASSTHRU_ENABLE = 1, - PASSTHRU_DISABLE = 2, -}; - -struct pm_cntrl { - u16 enable; - u16 stop_at_max; - u16 trace_mode; - u16 freeze; - u16 count_mode; - u16 spu_addr_trace; - u8 trace_buf_ovflw; -}; - -static struct { - u32 group_control; - u32 debug_bus_control; - struct pm_cntrl pm_cntrl; - u32 pm07_cntrl[NR_PHYS_CTRS]; -} pm_regs; - -#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12) -#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4) -#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8) -#define GET_POLARITY(x) ((x & 0x00000002) >> 1) -#define GET_COUNT_CYCLES(x) (x & 0x00000001) -#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) - -static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); -static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE]; -static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; - -/* - * The CELL profiling code makes rtas calls to setup the debug bus to - * route the performance signals. Additionally, SPU profiling requires - * a second rtas call to setup the hardware to capture the SPU PCs. - * The EIO error value is returned if the token lookups or the rtas - * call fail. The EIO error number is the best choice of the existing - * error numbers. The probability of rtas related error is very low. But - * by returning EIO and printing additional information to dmsg the user - * will know that OProfile did not start and dmesg will tell them why. - * OProfile does not support returning errors on Stop. Not a huge issue - * since failure to reset the debug bus or stop the SPU PC collection is - * not a fatel issue. Chances are if the Stop failed, Start doesn't work - * either. - */ - -/* - * Interpetation of hdw_thread: - * 0 - even virtual cpus 0, 2, 4,... - * 1 - odd virtual cpus 1, 3, 5, ... - * - * FIXME: this is strictly wrong, we need to clean this up in a number - * of places. It works for now. -arnd - */ -static u32 hdw_thread; - -static u32 virt_cntr_inter_mask; -static struct timer_list timer_virt_cntr; -static struct timer_list timer_spu_event_swap; - -/* - * pm_signal needs to be global since it is initialized in - * cell_reg_setup at the time when the necessary information - * is available. - */ -static struct pm_signal pm_signal[NR_PHYS_CTRS]; -static int pm_rtas_token; /* token for debug bus setup call */ -static int spu_rtas_token; /* token for SPU cycle profiling */ - -static u32 reset_value[NR_PHYS_CTRS]; -static int num_counters; -static int oprofile_running; -static DEFINE_SPINLOCK(cntr_lock); - -static u32 ctr_enabled; - -static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; - -/* - * Firmware interface functions - */ -static int -rtas_ibm_cbe_perftools(int subfunc, int passthru, - void *address, unsigned long length) -{ - u64 paddr = __pa(address); - - return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, - passthru, paddr >> 32, paddr & 0xffffffff, length); -} - -static void pm_rtas_reset_signals(u32 node) -{ - int ret; - struct pm_signal pm_signal_local; - - /* - * The debug bus is being set to the passthru disable state. - * However, the FW still expects at least one legal signal routing - * entry or it will return an error on the arguments. If we don't - * supply a valid entry, we must ignore all return values. Ignoring - * all return values means we might miss an error we should be - * concerned about. - */ - - /* fw expects physical cpu #. */ - pm_signal_local.cpu = node; - pm_signal_local.signal_group = 21; - pm_signal_local.bus_word = 1; - pm_signal_local.sub_unit = 0; - pm_signal_local.bit = 0; - - ret = rtas_ibm_cbe_perftools(SUBFUNC_RESET, PASSTHRU_DISABLE, - &pm_signal_local, - sizeof(struct pm_signal)); - - if (unlikely(ret)) - /* - * Not a fatal error. For Oprofile stop, the oprofile - * functions do not support returning an error for - * failure to stop OProfile. - */ - printk(KERN_WARNING "%s: rtas returned: %d\n", - __func__, ret); -} - -static int pm_rtas_activate_signals(u32 node, u32 count) -{ - int ret; - int i, j; - struct pm_signal pm_signal_local[NR_PHYS_CTRS]; - - /* - * There is no debug setup required for the cycles event. - * Note that only events in the same group can be used. - * Otherwise, there will be conflicts in correctly routing - * the signals on the debug bus. It is the responsibility - * of the OProfile user tool to check the events are in - * the same group. - */ - i = 0; - for (j = 0; j < count; j++) { - if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) { - - /* fw expects physical cpu # */ - pm_signal_local[i].cpu = node; - pm_signal_local[i].signal_group - = pm_signal[j].signal_group; - pm_signal_local[i].bus_word = pm_signal[j].bus_word; - pm_signal_local[i].sub_unit = pm_signal[j].sub_unit; - pm_signal_local[i].bit = pm_signal[j].bit; - i++; - } - } - - if (i != 0) { - ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, - pm_signal_local, - i * sizeof(struct pm_signal)); - - if (unlikely(ret)) { - printk(KERN_WARNING "%s: rtas returned: %d\n", - __func__, ret); - return -EIO; - } - } - - return 0; -} - -/* - * PM Signal functions - */ -static void set_pm_event(u32 ctr, int event, u32 unit_mask) -{ - struct pm_signal *p; - u32 signal_bit; - u32 bus_word, bus_type, count_cycles, polarity, input_control; - int j, i; - - if (event == PPU_CYCLES_EVENT_NUM) { - /* Special Event: Count all cpu cycles */ - pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES; - p = &(pm_signal[ctr]); - p->signal_group = PPU_CYCLES_GRP_NUM; - p->bus_word = 1; - p->sub_unit = 0; - p->bit = 0; - goto out; - } else { - pm_regs.pm07_cntrl[ctr] = 0; - } - - bus_word = GET_BUS_WORD(unit_mask); - bus_type = GET_BUS_TYPE(unit_mask); - count_cycles = GET_COUNT_CYCLES(unit_mask); - polarity = GET_POLARITY(unit_mask); - input_control = GET_INPUT_CONTROL(unit_mask); - signal_bit = (event % 100); - - p = &(pm_signal[ctr]); - - p->signal_group = event / 100; - p->bus_word = bus_word; - p->sub_unit = GET_SUB_UNIT(unit_mask); - - pm_regs.pm07_cntrl[ctr] = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); - - /* - * Some of the islands signal selection is based on 64 bit words. - * The debug bus words are 32 bits, the input words to the performance - * counters are defined as 32 bits. Need to convert the 64 bit island - * specification to the appropriate 32 input bit and bus word for the - * performance counter event selection. See the CELL Performance - * monitoring signals manual and the Perf cntr hardware descriptions - * for the details. - */ - if (input_control == 0) { - if (signal_bit > 31) { - signal_bit -= 32; - if (bus_word == 0x3) - bus_word = 0x2; - else if (bus_word == 0xc) - bus_word = 0x8; - } - - if ((bus_type == 0) && p->signal_group >= 60) - bus_type = 2; - if ((bus_type == 1) && p->signal_group >= 50) - bus_type = 0; - - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit); - } else { - pm_regs.pm07_cntrl[ctr] = 0; - p->bit = signal_bit; - } - - for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) { - if (bus_word & (1 << i)) { - pm_regs.debug_bus_control |= - (bus_type << (30 - (2 * i))); - - for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { - if (input_bus[j] == 0xff) { - input_bus[j] = i; - pm_regs.group_control |= - (i << (30 - (2 * j))); - - break; - } - } - } - } -out: - ; -} - -static void write_pm_cntrl(int cpu) -{ - /* - * Oprofile will use 32 bit counters, set bits 7:10 to 0 - * pmregs.pm_cntrl is a global - */ - - u32 val = 0; - if (pm_regs.pm_cntrl.enable == 1) - val |= CBE_PM_ENABLE_PERF_MON; - - if (pm_regs.pm_cntrl.stop_at_max == 1) - val |= CBE_PM_STOP_AT_MAX; - - if (pm_regs.pm_cntrl.trace_mode != 0) - val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); - - if (pm_regs.pm_cntrl.trace_buf_ovflw == 1) - val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw); - if (pm_regs.pm_cntrl.freeze == 1) - val |= CBE_PM_FREEZE_ALL_CTRS; - - val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace); - - /* - * Routine set_count_mode must be called previously to set - * the count mode based on the user selection of user and kernel. - */ - val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); - cbe_write_pm(cpu, pm_control, val); -} - -static inline void -set_count_mode(u32 kernel, u32 user) -{ - /* - * The user must specify user and kernel if they want them. If - * neither is specified, OProfile will count in hypervisor mode. - * pm_regs.pm_cntrl is a global - */ - if (kernel) { - if (user) - pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES; - else - pm_regs.pm_cntrl.count_mode = - CBE_COUNT_SUPERVISOR_MODE; - } else { - if (user) - pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE; - else - pm_regs.pm_cntrl.count_mode = - CBE_COUNT_HYPERVISOR_MODE; - } -} - -static inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl) -{ - - pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE; - cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); -} - -/* - * Oprofile is expected to collect data on all CPUs simultaneously. - * However, there is one set of performance counters per node. There are - * two hardware threads or virtual CPUs on each node. Hence, OProfile must - * multiplex in time the performance counter collection on the two virtual - * CPUs. The multiplexing of the performance counters is done by this - * virtual counter routine. - * - * The pmc_values used below is defined as 'per-cpu' but its use is - * more akin to 'per-node'. We need to store two sets of counter - * values per node -- one for the previous run and one for the next. - * The per-cpu[NR_PHYS_CTRS] gives us the storage we need. Each odd/even - * pair of per-cpu arrays is used for storing the previous and next - * pmc values for a given node. - * NOTE: We use the per-cpu variable to improve cache performance. - * - * This routine will alternate loading the virtual counters for - * virtual CPUs - */ -static void cell_virtual_cntr(struct timer_list *unused) -{ - int i, prev_hdw_thread, next_hdw_thread; - u32 cpu; - unsigned long flags; - - /* - * Make sure that the interrupt_hander and the virt counter are - * not both playing with the counters on the same node. - */ - - spin_lock_irqsave(&cntr_lock, flags); - - prev_hdw_thread = hdw_thread; - - /* switch the cpu handling the interrupts */ - hdw_thread = 1 ^ hdw_thread; - next_hdw_thread = hdw_thread; - - pm_regs.group_control = 0; - pm_regs.debug_bus_control = 0; - - for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) - input_bus[i] = 0xff; - - /* - * There are some per thread events. Must do the - * set event, for the thread that is being started - */ - for (i = 0; i < num_counters; i++) - set_pm_event(i, - pmc_cntrl[next_hdw_thread][i].evnts, - pmc_cntrl[next_hdw_thread][i].masks); - - /* - * The following is done only once per each node, but - * we need cpu #, not node #, to pass to the cbe_xxx functions. - */ - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - /* - * stop counters, save counter values, restore counts - * for previous thread - */ - cbe_disable_pm(cpu); - cbe_disable_pm_interrupts(cpu); - for (i = 0; i < num_counters; i++) { - per_cpu(pmc_values, cpu + prev_hdw_thread)[i] - = cbe_read_ctr(cpu, i); - - if (per_cpu(pmc_values, cpu + next_hdw_thread)[i] - == 0xFFFFFFFF) - /* If the cntr value is 0xffffffff, we must - * reset that to 0xfffffff0 when the current - * thread is restarted. This will generate a - * new interrupt and make sure that we never - * restore the counters to the max value. If - * the counters were restored to the max value, - * they do not increment and no interrupts are - * generated. Hence no more samples will be - * collected on that cpu. - */ - cbe_write_ctr(cpu, i, 0xFFFFFFF0); - else - cbe_write_ctr(cpu, i, - per_cpu(pmc_values, - cpu + - next_hdw_thread)[i]); - } - - /* - * Switch to the other thread. Change the interrupt - * and control regs to be scheduled on the CPU - * corresponding to the thread to execute. - */ - for (i = 0; i < num_counters; i++) { - if (pmc_cntrl[next_hdw_thread][i].enabled) { - /* - * There are some per thread events. - * Must do the set event, enable_cntr - * for each cpu. - */ - enable_ctr(cpu, i, - pm_regs.pm07_cntrl); - } else { - cbe_write_pm07_control(cpu, i, 0); - } - } - - /* Enable interrupts on the CPU thread that is starting */ - cbe_enable_pm_interrupts(cpu, next_hdw_thread, - virt_cntr_inter_mask); - cbe_enable_pm(cpu); - } - - spin_unlock_irqrestore(&cntr_lock, flags); - - mod_timer(&timer_virt_cntr, jiffies + HZ / 10); -} - -static void start_virt_cntrs(void) -{ - timer_setup(&timer_virt_cntr, cell_virtual_cntr, 0); - timer_virt_cntr.expires = jiffies + HZ / 10; - add_timer(&timer_virt_cntr); -} - -static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, - struct op_system_config *sys, int num_ctrs) -{ - spu_cycle_reset = ctr[0].count; - - /* - * Each node will need to make the rtas call to start - * and stop SPU profiling. Get the token once and store it. - */ - spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); - - if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { - printk(KERN_ERR - "%s: rtas token ibm,cbe-spu-perftools unknown\n", - __func__); - return -EIO; - } - return 0; -} - -/* Unfortunately, the hardware will only support event profiling - * on one SPU per node at a time. Therefore, we must time slice - * the profiling across all SPUs in the node. Note, we do this - * in parallel for each node. The following routine is called - * periodically based on kernel timer to switch which SPU is - * being monitored in a round robbin fashion. - */ -static void spu_evnt_swap(struct timer_list *unused) -{ - int node; - int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx; - unsigned long flags; - int cpu; - int ret; - u32 interrupt_mask; - - - /* enable interrupts on cntr 0 */ - interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0); - - hdw_thread = 0; - - /* Make sure spu event interrupt handler and spu event swap - * don't access the counters simultaneously. - */ - spin_lock_irqsave(&cntr_lock, flags); - - cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx; - - if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE) - spu_evnt_phys_spu_indx = 0; - - pm_signal[0].sub_unit = spu_evnt_phys_spu_indx; - pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; - pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; - - /* switch the SPU being profiled on each node */ - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - node = cbe_cpu_to_node(cpu); - cur_phys_spu = (node * NUM_SPUS_PER_NODE) - + cur_spu_evnt_phys_spu_indx; - nxt_phys_spu = (node * NUM_SPUS_PER_NODE) - + spu_evnt_phys_spu_indx; - - /* - * stop counters, save counter values, restore counts - * for previous physical SPU - */ - cbe_disable_pm(cpu); - cbe_disable_pm_interrupts(cpu); - - spu_pm_cnt[cur_phys_spu] - = cbe_read_ctr(cpu, 0); - - /* restore previous count for the next spu to sample */ - /* NOTE, hardware issue, counter will not start if the - * counter value is at max (0xFFFFFFFF). - */ - if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF) - cbe_write_ctr(cpu, 0, 0xFFFFFFF0); - else - cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]); - - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); - - /* setup the debug bus measure the one event and - * the two events to route the next SPU's PC on - * the debug bus - */ - ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3); - if (ret) - printk(KERN_ERR "%s: pm_rtas_activate_signals failed, " - "SPU event swap\n", __func__); - - /* clear the trace buffer, don't want to take PC for - * previous SPU*/ - cbe_write_pm(cpu, trace_address, 0); - - enable_ctr(cpu, 0, pm_regs.pm07_cntrl); - - /* Enable interrupts on the CPU thread that is starting */ - cbe_enable_pm_interrupts(cpu, hdw_thread, - interrupt_mask); - cbe_enable_pm(cpu); - } - - spin_unlock_irqrestore(&cntr_lock, flags); - - /* swap approximately every 0.1 seconds */ - mod_timer(&timer_spu_event_swap, jiffies + HZ / 25); -} - -static void start_spu_event_swap(void) -{ - timer_setup(&timer_spu_event_swap, spu_evnt_swap, 0); - timer_spu_event_swap.expires = jiffies + HZ / 25; - add_timer(&timer_spu_event_swap); -} - -static int cell_reg_setup_spu_events(struct op_counter_config *ctr, - struct op_system_config *sys, int num_ctrs) -{ - int i; - - /* routine is called once for all nodes */ - - spu_evnt_phys_spu_indx = 0; - /* - * For all events except PPU CYCLEs, each node will need to make - * the rtas cbe-perftools call to setup and reset the debug bus. - * Make the token lookup call once and store it in the global - * variable pm_rtas_token. - */ - pm_rtas_token = rtas_token("ibm,cbe-perftools"); - - if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { - printk(KERN_ERR - "%s: rtas token ibm,cbe-perftools unknown\n", - __func__); - return -EIO; - } - - /* setup the pm_control register settings, - * settings will be written per node by the - * cell_cpu_setup() function. - */ - pm_regs.pm_cntrl.trace_buf_ovflw = 1; - - /* Use the occurrence trace mode to have SPU PC saved - * to the trace buffer. Occurrence data in trace buffer - * is not used. Bit 2 must be set to store SPU addresses. - */ - pm_regs.pm_cntrl.trace_mode = 2; - - pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus - event 2 & 3 */ - - /* setup the debug bus event array with the SPU PC routing events. - * Note, pm_signal[0] will be filled in by set_pm_event() call below. - */ - pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100; - pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A); - pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100; - pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; - - pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100; - pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B); - pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100; - pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; - - /* Set the user selected spu event to profile on, - * note, only one SPU profiling event is supported - */ - num_counters = 1; /* Only support one SPU event at a time */ - set_pm_event(0, ctr[0].event, ctr[0].unit_mask); - - reset_value[0] = 0xFFFFFFFF - ctr[0].count; - - /* global, used by cell_cpu_setup */ - ctr_enabled |= 1; - - /* Initialize the count for each SPU to the reset value */ - for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++) - spu_pm_cnt[i] = reset_value[0]; - - return 0; -} - -static int cell_reg_setup_ppu(struct op_counter_config *ctr, - struct op_system_config *sys, int num_ctrs) -{ - /* routine is called once for all nodes */ - int i, j, cpu; - - num_counters = num_ctrs; - - if (unlikely(num_ctrs > NR_PHYS_CTRS)) { - printk(KERN_ERR - "%s: Oprofile, number of specified events " \ - "exceeds number of physical counters\n", - __func__); - return -EIO; - } - - set_count_mode(sys->enable_kernel, sys->enable_user); - - /* Setup the thread 0 events */ - for (i = 0; i < num_ctrs; ++i) { - - pmc_cntrl[0][i].evnts = ctr[i].event; - pmc_cntrl[0][i].masks = ctr[i].unit_mask; - pmc_cntrl[0][i].enabled = ctr[i].enabled; - pmc_cntrl[0][i].vcntr = i; - - for_each_possible_cpu(j) - per_cpu(pmc_values, j)[i] = 0; - } - - /* - * Setup the thread 1 events, map the thread 0 event to the - * equivalent thread 1 event. - */ - for (i = 0; i < num_ctrs; ++i) { - if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111)) - pmc_cntrl[1][i].evnts = ctr[i].event + 19; - else if (ctr[i].event == 2203) - pmc_cntrl[1][i].evnts = ctr[i].event; - else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215)) - pmc_cntrl[1][i].evnts = ctr[i].event + 16; - else - pmc_cntrl[1][i].evnts = ctr[i].event; - - pmc_cntrl[1][i].masks = ctr[i].unit_mask; - pmc_cntrl[1][i].enabled = ctr[i].enabled; - pmc_cntrl[1][i].vcntr = i; - } - - for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) - input_bus[i] = 0xff; - - /* - * Our counters count up, and "count" refers to - * how much before the next interrupt, and we interrupt - * on overflow. So we calculate the starting value - * which will give us "count" until overflow. - * Then we set the events on the enabled counters. - */ - for (i = 0; i < num_counters; ++i) { - /* start with virtual counter set 0 */ - if (pmc_cntrl[0][i].enabled) { - /* Using 32bit counters, reset max - count */ - reset_value[i] = 0xFFFFFFFF - ctr[i].count; - set_pm_event(i, - pmc_cntrl[0][i].evnts, - pmc_cntrl[0][i].masks); - - /* global, used by cell_cpu_setup */ - ctr_enabled |= (1 << i); - } - } - - /* initialize the previous counts for the virtual cntrs */ - for_each_online_cpu(cpu) - for (i = 0; i < num_counters; ++i) { - per_cpu(pmc_values, cpu)[i] = reset_value[i]; - } - - return 0; -} - - -/* This function is called once for all cpus combined */ -static int cell_reg_setup(struct op_counter_config *ctr, - struct op_system_config *sys, int num_ctrs) -{ - int ret=0; - spu_cycle_reset = 0; - - /* initialize the spu_arr_trace value, will be reset if - * doing spu event profiling. - */ - pm_regs.group_control = 0; - pm_regs.debug_bus_control = 0; - pm_regs.pm_cntrl.stop_at_max = 1; - pm_regs.pm_cntrl.trace_mode = 0; - pm_regs.pm_cntrl.freeze = 1; - pm_regs.pm_cntrl.trace_buf_ovflw = 0; - pm_regs.pm_cntrl.spu_addr_trace = 0; - - /* - * For all events except PPU CYCLEs, each node will need to make - * the rtas cbe-perftools call to setup and reset the debug bus. - * Make the token lookup call once and store it in the global - * variable pm_rtas_token. - */ - pm_rtas_token = rtas_token("ibm,cbe-perftools"); - - if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { - printk(KERN_ERR - "%s: rtas token ibm,cbe-perftools unknown\n", - __func__); - return -EIO; - } - - if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { - profiling_mode = SPU_PROFILING_CYCLES; - ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); - } else if ((ctr[0].event >= SPU_EVENT_NUM_START) && - (ctr[0].event <= SPU_EVENT_NUM_STOP)) { - profiling_mode = SPU_PROFILING_EVENTS; - spu_cycle_reset = ctr[0].count; - - /* for SPU event profiling, need to setup the - * pm_signal array with the events to route the - * SPU PC before making the FW call. Note, only - * one SPU event for profiling can be specified - * at a time. - */ - cell_reg_setup_spu_events(ctr, sys, num_ctrs); - } else { - profiling_mode = PPU_PROFILING; - ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); - } - - return ret; -} - - - -/* This function is called once for each cpu */ -static int cell_cpu_setup(struct op_counter_config *cntr) -{ - u32 cpu = smp_processor_id(); - u32 num_enabled = 0; - int i; - int ret; - - /* Cycle based SPU profiling does not use the performance - * counters. The trace array is configured to collect - * the data. - */ - if (profiling_mode == SPU_PROFILING_CYCLES) - return 0; - - /* There is one performance monitor per processor chip (i.e. node), - * so we only need to perform this function once per node. - */ - if (cbe_get_hw_thread_id(cpu)) - return 0; - - /* Stop all counters */ - cbe_disable_pm(cpu); - cbe_disable_pm_interrupts(cpu); - - cbe_write_pm(cpu, pm_start_stop, 0); - cbe_write_pm(cpu, group_control, pm_regs.group_control); - cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); - write_pm_cntrl(cpu); - - for (i = 0; i < num_counters; ++i) { - if (ctr_enabled & (1 << i)) { - pm_signal[num_enabled].cpu = cbe_cpu_to_node(cpu); - num_enabled++; - } - } - - /* - * The pm_rtas_activate_signals will return -EIO if the FW - * call failed. - */ - if (profiling_mode == SPU_PROFILING_EVENTS) { - /* For SPU event profiling also need to setup the - * pm interval timer - */ - ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), - num_enabled+2); - /* store PC from debug bus to Trace buffer as often - * as possible (every 10 cycles) - */ - cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); - return ret; - } else - return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), - num_enabled); -} - -#define ENTRIES 303 -#define MAXLFSR 0xFFFFFF - -/* precomputed table of 24 bit LFSR values */ -static int initial_lfsr[] = { - 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424, - 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716, - 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547, - 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392, - 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026, - 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556, - 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769, - 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893, - 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017, - 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756, - 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558, - 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401, - 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720, - 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042, - 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955, - 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934, - 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783, - 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278, - 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051, - 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741, - 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972, - 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302, - 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384, - 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469, - 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697, - 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398, - 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140, - 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214, - 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386, - 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087, - 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130, - 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300, - 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475, - 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950, - 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003, - 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375, - 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426, - 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607 -}; - -/* - * The hardware uses an LFSR counting sequence to determine when to capture - * the SPU PCs. An LFSR sequence is like a puesdo random number sequence - * where each number occurs once in the sequence but the sequence is not in - * numerical order. The SPU PC capture is done when the LFSR sequence reaches - * the last value in the sequence. Hence the user specified value N - * corresponds to the LFSR number that is N from the end of the sequence. - * - * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit - * LFSR sequence is broken into four ranges. The spacing of the precomputed - * values is adjusted in each range so the error between the user specified - * number (N) of events between samples and the actual number of events based - * on the precomputed value will be les then about 6.2%. Note, if the user - * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. - * This is to prevent the loss of samples because the trace buffer is full. - * - * User specified N Step between Index in - * precomputed values precomputed - * table - * 0 to 2^16-1 ---- 0 - * 2^16 to 2^16+2^19-1 2^12 1 to 128 - * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256 - * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302 - * - * - * For example, the LFSR values in the second range are computed for 2^16, - * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies - * 1, 2,..., 127, 128. - * - * The 24 bit LFSR value for the nth number in the sequence can be - * calculated using the following code: - * - * #define size 24 - * int calculate_lfsr(int n) - * { - * int i; - * unsigned int newlfsr0; - * unsigned int lfsr = 0xFFFFFF; - * unsigned int howmany = n; - * - * for (i = 2; i < howmany + 2; i++) { - * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^ - * ((lfsr >> (size - 1 - 1)) & 1) ^ - * (((lfsr >> (size - 1 - 6)) & 1) ^ - * ((lfsr >> (size - 1 - 23)) & 1))); - * - * lfsr >>= 1; - * lfsr = lfsr | (newlfsr0 << (size - 1)); - * } - * return lfsr; - * } - */ - -#define V2_16 (0x1 << 16) -#define V2_19 (0x1 << 19) -#define V2_22 (0x1 << 22) - -static int calculate_lfsr(int n) -{ - /* - * The ranges and steps are in powers of 2 so the calculations - * can be done using shifts rather then divide. - */ - int index; - - if ((n >> 16) == 0) - index = 0; - else if (((n - V2_16) >> 19) == 0) - index = ((n - V2_16) >> 12) + 1; - else if (((n - V2_16 - V2_19) >> 22) == 0) - index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128; - else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0) - index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256; - else - index = ENTRIES-1; - - /* make sure index is valid */ - if ((index >= ENTRIES) || (index < 0)) - index = ENTRIES-1; - - return initial_lfsr[index]; -} - -static int pm_rtas_activate_spu_profiling(u32 node) -{ - int ret, i; - struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE]; - - /* - * Set up the rtas call to configure the debug bus to - * route the SPU PCs. Setup the pm_signal for each SPU - */ - for (i = 0; i < ARRAY_SIZE(pm_signal_local); i++) { - pm_signal_local[i].cpu = node; - pm_signal_local[i].signal_group = 41; - /* spu i on word (i/2) */ - pm_signal_local[i].bus_word = 1 << i / 2; - /* spu i */ - pm_signal_local[i].sub_unit = i; - pm_signal_local[i].bit = 63; - } - - ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, - PASSTHRU_ENABLE, pm_signal_local, - (ARRAY_SIZE(pm_signal_local) - * sizeof(struct pm_signal))); - - if (unlikely(ret)) { - printk(KERN_WARNING "%s: rtas returned: %d\n", - __func__, ret); - return -EIO; - } - - return 0; -} - -#ifdef CONFIG_CPU_FREQ -static int -oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data) -{ - int ret = 0; - struct cpufreq_freqs *frq = data; - if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) || - (val == CPUFREQ_POSTCHANGE && frq->old > frq->new)) - set_spu_profiling_frequency(frq->new, spu_cycle_reset); - return ret; -} - -static struct notifier_block cpu_freq_notifier_block = { - .notifier_call = oprof_cpufreq_notify -}; -#endif - -/* - * Note the generic OProfile stop calls do not support returning - * an error on stop. Hence, will not return an error if the FW - * calls fail on stop. Failure to reset the debug bus is not an issue. - * Failure to disable the SPU profiling is not an issue. The FW calls - * to enable the performance counters and debug bus will work even if - * the hardware was not cleanly reset. - */ -static void cell_global_stop_spu_cycles(void) -{ - int subfunc, rtn_value; - unsigned int lfsr_value; - int cpu; - - oprofile_running = 0; - smp_wmb(); - -#ifdef CONFIG_CPU_FREQ - cpufreq_unregister_notifier(&cpu_freq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); -#endif - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - subfunc = 3; /* - * 2 - activate SPU tracing, - * 3 - deactivate - */ - lfsr_value = 0x8f100000; - - rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, - subfunc, cbe_cpu_to_node(cpu), - lfsr_value); - - if (unlikely(rtn_value != 0)) { - printk(KERN_ERR - "%s: rtas call ibm,cbe-spu-perftools " \ - "failed, return = %d\n", - __func__, rtn_value); - } - - /* Deactivate the signals */ - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); - } - - stop_spu_profiling_cycles(); -} - -static void cell_global_stop_spu_events(void) -{ - int cpu; - oprofile_running = 0; - - stop_spu_profiling_events(); - smp_wmb(); - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - cbe_sync_irq(cbe_cpu_to_node(cpu)); - /* Stop the counters */ - cbe_disable_pm(cpu); - cbe_write_pm07_control(cpu, 0, 0); - - /* Deactivate the signals */ - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); - - /* Deactivate interrupts */ - cbe_disable_pm_interrupts(cpu); - } - del_timer_sync(&timer_spu_event_swap); -} - -static void cell_global_stop_ppu(void) -{ - int cpu; - - /* - * This routine will be called once for the system. - * There is one performance monitor per node, so we - * only need to perform this function once per node. - */ - del_timer_sync(&timer_virt_cntr); - oprofile_running = 0; - smp_wmb(); - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - cbe_sync_irq(cbe_cpu_to_node(cpu)); - /* Stop the counters */ - cbe_disable_pm(cpu); - - /* Deactivate the signals */ - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); - - /* Deactivate interrupts */ - cbe_disable_pm_interrupts(cpu); - } -} - -static void cell_global_stop(void) -{ - if (profiling_mode == PPU_PROFILING) - cell_global_stop_ppu(); - else if (profiling_mode == SPU_PROFILING_EVENTS) - cell_global_stop_spu_events(); - else - cell_global_stop_spu_cycles(); -} - -static int cell_global_start_spu_cycles(struct op_counter_config *ctr) -{ - int subfunc; - unsigned int lfsr_value; - int cpu; - int ret; - int rtas_error; - unsigned int cpu_khzfreq = 0; - - /* The SPU profiling uses time-based profiling based on - * cpu frequency, so if configured with the CPU_FREQ - * option, we should detect frequency changes and react - * accordingly. - */ -#ifdef CONFIG_CPU_FREQ - ret = cpufreq_register_notifier(&cpu_freq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - if (ret < 0) - /* this is not a fatal error */ - printk(KERN_ERR "CPU freq change registration failed: %d\n", - ret); - - else - cpu_khzfreq = cpufreq_quick_get(smp_processor_id()); -#endif - - set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset); - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - /* - * Setup SPU cycle-based profiling. - * Set perf_mon_control bit 0 to a zero before - * enabling spu collection hardware. - */ - cbe_write_pm(cpu, pm_control, 0); - - if (spu_cycle_reset > MAX_SPU_COUNT) - /* use largest possible value */ - lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1); - else - lfsr_value = calculate_lfsr(spu_cycle_reset); - - /* must use a non zero value. Zero disables data collection. */ - if (lfsr_value == 0) - lfsr_value = calculate_lfsr(1); - - lfsr_value = lfsr_value << 8; /* shift lfsr to correct - * register location - */ - - /* debug bus setup */ - ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu)); - - if (unlikely(ret)) { - rtas_error = ret; - goto out; - } - - - subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */ - - /* start profiling */ - ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, - cbe_cpu_to_node(cpu), lfsr_value); - - if (unlikely(ret != 0)) { - printk(KERN_ERR - "%s: rtas call ibm,cbe-spu-perftools failed, " \ - "return = %d\n", __func__, ret); - rtas_error = -EIO; - goto out; - } - } - - rtas_error = start_spu_profiling_cycles(spu_cycle_reset); - if (rtas_error) - goto out_stop; - - oprofile_running = 1; - return 0; - -out_stop: - cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */ -out: - return rtas_error; -} - -static int cell_global_start_spu_events(struct op_counter_config *ctr) -{ - int cpu; - u32 interrupt_mask = 0; - int rtn = 0; - - hdw_thread = 0; - - /* spu event profiling, uses the performance counters to generate - * an interrupt. The hardware is setup to store the SPU program - * counter into the trace array. The occurrence mode is used to - * enable storing data to the trace buffer. The bits are set - * to send/store the SPU address in the trace buffer. The debug - * bus must be setup to route the SPU program counter onto the - * debug bus. The occurrence data in the trace buffer is not used. - */ - - /* This routine gets called once for the system. - * There is one performance monitor per node, so we - * only need to perform this function once per node. - */ - - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - /* - * Setup SPU event-based profiling. - * Set perf_mon_control bit 0 to a zero before - * enabling spu collection hardware. - * - * Only support one SPU event on one SPU per node. - */ - if (ctr_enabled & 1) { - cbe_write_ctr(cpu, 0, reset_value[0]); - enable_ctr(cpu, 0, pm_regs.pm07_cntrl); - interrupt_mask |= - CBE_PM_CTR_OVERFLOW_INTR(0); - } else { - /* Disable counter */ - cbe_write_pm07_control(cpu, 0, 0); - } - - cbe_get_and_clear_pm_interrupts(cpu); - cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); - cbe_enable_pm(cpu); - - /* clear the trace buffer */ - cbe_write_pm(cpu, trace_address, 0); - } - - /* Start the timer to time slice collecting the event profile - * on each of the SPUs. Note, can collect profile on one SPU - * per node at a time. - */ - start_spu_event_swap(); - start_spu_profiling_events(); - oprofile_running = 1; - smp_wmb(); - - return rtn; -} - -static int cell_global_start_ppu(struct op_counter_config *ctr) -{ - u32 cpu, i; - u32 interrupt_mask = 0; - - /* This routine gets called once for the system. - * There is one performance monitor per node, so we - * only need to perform this function once per node. - */ - for_each_online_cpu(cpu) { - if (cbe_get_hw_thread_id(cpu)) - continue; - - interrupt_mask = 0; - - for (i = 0; i < num_counters; ++i) { - if (ctr_enabled & (1 << i)) { - cbe_write_ctr(cpu, i, reset_value[i]); - enable_ctr(cpu, i, pm_regs.pm07_cntrl); - interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i); - } else { - /* Disable counter */ - cbe_write_pm07_control(cpu, i, 0); - } - } - - cbe_get_and_clear_pm_interrupts(cpu); - cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); - cbe_enable_pm(cpu); - } - - virt_cntr_inter_mask = interrupt_mask; - oprofile_running = 1; - smp_wmb(); - - /* - * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being - * executed which manipulates the PMU. We start the "virtual counter" - * here so that we do not need to synchronize access to the PMU in - * the above for-loop. - */ - start_virt_cntrs(); - - return 0; -} - -static int cell_global_start(struct op_counter_config *ctr) -{ - if (profiling_mode == SPU_PROFILING_CYCLES) - return cell_global_start_spu_cycles(ctr); - else if (profiling_mode == SPU_PROFILING_EVENTS) - return cell_global_start_spu_events(ctr); - else - return cell_global_start_ppu(ctr); -} - - -/* The SPU interrupt handler - * - * SPU event profiling works as follows: - * The pm_signal[0] holds the one SPU event to be measured. It is routed on - * the debug bus using word 0 or 1. The value of pm_signal[1] and - * pm_signal[2] contain the necessary events to route the SPU program - * counter for the selected SPU onto the debug bus using words 2 and 3. - * The pm_interval register is setup to write the SPU PC value into the - * trace buffer at the maximum rate possible. The trace buffer is configured - * to store the PCs, wrapping when it is full. The performance counter is - * initialized to the max hardware count minus the number of events, N, between - * samples. Once the N events have occurred, a HW counter overflow occurs - * causing the generation of a HW counter interrupt which also stops the - * writing of the SPU PC values to the trace buffer. Hence the last PC - * written to the trace buffer is the SPU PC that we want. Unfortunately, - * we have to read from the beginning of the trace buffer to get to the - * last value written. We just hope the PPU has nothing better to do then - * service this interrupt. The PC for the specific SPU being profiled is - * extracted from the trace buffer processed and stored. The trace buffer - * is cleared, interrupts are cleared, the counter is reset to max - N. - * A kernel timer is used to periodically call the routine spu_evnt_swap() - * to switch to the next physical SPU in the node to profile in round robbin - * order. This way data is collected for all SPUs on the node. It does mean - * that we need to use a relatively small value of N to ensure enough samples - * on each SPU are collected each SPU is being profiled 1/8 of the time. - * It may also be necessary to use a longer sample collection period. - */ -static void cell_handle_interrupt_spu(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - u32 cpu, cpu_tmp; - u64 trace_entry; - u32 interrupt_mask; - u64 trace_buffer[2]; - u64 last_trace_buffer; - u32 sample; - u32 trace_addr; - unsigned long sample_array_lock_flags; - int spu_num; - unsigned long flags; - - /* Make sure spu event interrupt handler and spu event swap - * don't access the counters simultaneously. - */ - cpu = smp_processor_id(); - spin_lock_irqsave(&cntr_lock, flags); - - cpu_tmp = cpu; - cbe_disable_pm(cpu); - - interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); - - sample = 0xABCDEF; - trace_entry = 0xfedcba; - last_trace_buffer = 0xdeadbeaf; - - if ((oprofile_running == 1) && (interrupt_mask != 0)) { - /* disable writes to trace buff */ - cbe_write_pm(cpu, pm_interval, 0); - - /* only have one perf cntr being used, cntr 0 */ - if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0)) - && ctr[0].enabled) - /* The SPU PC values will be read - * from the trace buffer, reset counter - */ - - cbe_write_ctr(cpu, 0, reset_value[0]); - - trace_addr = cbe_read_pm(cpu, trace_address); - - while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { - /* There is data in the trace buffer to process - * Read the buffer until you get to the last - * entry. This is the value we want. - */ - - cbe_read_trace_buffer(cpu, trace_buffer); - trace_addr = cbe_read_pm(cpu, trace_address); - } - - /* SPU Address 16 bit count format for 128 bit - * HW trace buffer is used for the SPU PC storage - * HDR bits 0:15 - * SPU Addr 0 bits 16:31 - * SPU Addr 1 bits 32:47 - * unused bits 48:127 - * - * HDR: bit4 = 1 SPU Address 0 valid - * HDR: bit5 = 1 SPU Address 1 valid - * - unfortunately, the valid bits don't seem to work - * - * Note trace_buffer[0] holds bits 0:63 of the HW - * trace buffer, trace_buffer[1] holds bits 64:127 - */ - - trace_entry = trace_buffer[0] - & 0x00000000FFFF0000; - - /* only top 16 of the 18 bit SPU PC address - * is stored in trace buffer, hence shift right - * by 16 -2 bits */ - sample = trace_entry >> 14; - last_trace_buffer = trace_buffer[0]; - - spu_num = spu_evnt_phys_spu_indx - + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE); - - /* make sure only one process at a time is calling - * spu_sync_buffer() - */ - spin_lock_irqsave(&oprof_spu_smpl_arry_lck, - sample_array_lock_flags); - spu_sync_buffer(spu_num, &sample, 1); - spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, - sample_array_lock_flags); - - smp_wmb(); /* insure spu event buffer updates are written - * don't want events intermingled... */ - - /* The counters were frozen by the interrupt. - * Reenable the interrupt and restart the counters. - */ - cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); - cbe_enable_pm_interrupts(cpu, hdw_thread, - virt_cntr_inter_mask); - - /* clear the trace buffer, re-enable writes to trace buff */ - cbe_write_pm(cpu, trace_address, 0); - cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); - - /* The writes to the various performance counters only writes - * to a latch. The new values (interrupt setting bits, reset - * counter value etc.) are not copied to the actual registers - * until the performance monitor is enabled. In order to get - * this to work as desired, the performance monitor needs to - * be disabled while writing to the latches. This is a - * HW design issue. - */ - write_pm_cntrl(cpu); - cbe_enable_pm(cpu); - } - spin_unlock_irqrestore(&cntr_lock, flags); -} - -static void cell_handle_interrupt_ppu(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - u32 cpu; - u64 pc; - int is_kernel; - unsigned long flags = 0; - u32 interrupt_mask; - int i; - - cpu = smp_processor_id(); - - /* - * Need to make sure the interrupt handler and the virt counter - * routine are not running at the same time. See the - * cell_virtual_cntr() routine for additional comments. - */ - spin_lock_irqsave(&cntr_lock, flags); - - /* - * Need to disable and reenable the performance counters - * to get the desired behavior from the hardware. This - * is hardware specific. - */ - - cbe_disable_pm(cpu); - - interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); - - /* - * If the interrupt mask has been cleared, then the virt cntr - * has cleared the interrupt. When the thread that generated - * the interrupt is restored, the data count will be restored to - * 0xffffff0 to cause the interrupt to be regenerated. - */ - - if ((oprofile_running == 1) && (interrupt_mask != 0)) { - pc = regs->nip; - is_kernel = is_kernel_addr(pc); - - for (i = 0; i < num_counters; ++i) { - if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(i)) - && ctr[i].enabled) { - oprofile_add_ext_sample(pc, regs, i, is_kernel); - cbe_write_ctr(cpu, i, reset_value[i]); - } - } - - /* - * The counters were frozen by the interrupt. - * Reenable the interrupt and restart the counters. - * If there was a race between the interrupt handler and - * the virtual counter routine. The virtual counter - * routine may have cleared the interrupts. Hence must - * use the virt_cntr_inter_mask to re-enable the interrupts. - */ - cbe_enable_pm_interrupts(cpu, hdw_thread, - virt_cntr_inter_mask); - - /* - * The writes to the various performance counters only writes - * to a latch. The new values (interrupt setting bits, reset - * counter value etc.) are not copied to the actual registers - * until the performance monitor is enabled. In order to get - * this to work as desired, the performance monitor needs to - * be disabled while writing to the latches. This is a - * HW design issue. - */ - cbe_enable_pm(cpu); - } - spin_unlock_irqrestore(&cntr_lock, flags); -} - -static void cell_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - if (profiling_mode == PPU_PROFILING) - cell_handle_interrupt_ppu(regs, ctr); - else - cell_handle_interrupt_spu(regs, ctr); -} - -/* - * This function is called from the generic OProfile - * driver. When profiling PPUs, we need to do the - * generic sync start; otherwise, do spu_sync_start. - */ -static int cell_sync_start(void) -{ - if ((profiling_mode == SPU_PROFILING_CYCLES) || - (profiling_mode == SPU_PROFILING_EVENTS)) - return spu_sync_start(); - else - return DO_GENERIC_SYNC; -} - -static int cell_sync_stop(void) -{ - if ((profiling_mode == SPU_PROFILING_CYCLES) || - (profiling_mode == SPU_PROFILING_EVENTS)) - return spu_sync_stop(); - else - return 1; -} - -struct op_powerpc_model op_model_cell = { - .reg_setup = cell_reg_setup, - .cpu_setup = cell_cpu_setup, - .global_start = cell_global_start, - .global_stop = cell_global_stop, - .sync_start = cell_sync_start, - .sync_stop = cell_sync_stop, - .handle_interrupt = cell_handle_interrupt, -}; diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c deleted file mode 100644 index 25dc6813ecee..000000000000 --- a/arch/powerpc/oprofile/op_model_fsl_emb.c +++ /dev/null @@ -1,380 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Freescale Embedded oprofile support, based on ppc64 oprofile support - * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM - * - * Copyright (c) 2004, 2010 Freescale Semiconductor, Inc - * - * Author: Andy Fleming - * Maintainer: Kumar Gala <galak@kernel.crashing.org> - */ - -#include <linux/oprofile.h> -#include <linux/smp.h> -#include <asm/ptrace.h> -#include <asm/processor.h> -#include <asm/cputable.h> -#include <asm/reg_fsl_emb.h> -#include <asm/page.h> -#include <asm/pmc.h> -#include <asm/oprofile_impl.h> - -static unsigned long reset_value[OP_MAX_COUNTER]; - -static int num_counters; -static int oprofile_running; - -static inline u32 get_pmlca(int ctr) -{ - u32 pmlca; - - switch (ctr) { - case 0: - pmlca = mfpmr(PMRN_PMLCA0); - break; - case 1: - pmlca = mfpmr(PMRN_PMLCA1); - break; - case 2: - pmlca = mfpmr(PMRN_PMLCA2); - break; - case 3: - pmlca = mfpmr(PMRN_PMLCA3); - break; - case 4: - pmlca = mfpmr(PMRN_PMLCA4); - break; - case 5: - pmlca = mfpmr(PMRN_PMLCA5); - break; - default: - panic("Bad ctr number\n"); - } - - return pmlca; -} - -static inline void set_pmlca(int ctr, u32 pmlca) -{ - switch (ctr) { - case 0: - mtpmr(PMRN_PMLCA0, pmlca); - break; - case 1: - mtpmr(PMRN_PMLCA1, pmlca); - break; - case 2: - mtpmr(PMRN_PMLCA2, pmlca); - break; - case 3: - mtpmr(PMRN_PMLCA3, pmlca); - break; - case 4: - mtpmr(PMRN_PMLCA4, pmlca); - break; - case 5: - mtpmr(PMRN_PMLCA5, pmlca); - break; - default: - panic("Bad ctr number\n"); - } -} - -static inline unsigned int ctr_read(unsigned int i) -{ - switch(i) { - case 0: - return mfpmr(PMRN_PMC0); - case 1: - return mfpmr(PMRN_PMC1); - case 2: - return mfpmr(PMRN_PMC2); - case 3: - return mfpmr(PMRN_PMC3); - case 4: - return mfpmr(PMRN_PMC4); - case 5: - return mfpmr(PMRN_PMC5); - default: - return 0; - } -} - -static inline void ctr_write(unsigned int i, unsigned int val) -{ - switch(i) { - case 0: - mtpmr(PMRN_PMC0, val); - break; - case 1: - mtpmr(PMRN_PMC1, val); - break; - case 2: - mtpmr(PMRN_PMC2, val); - break; - case 3: - mtpmr(PMRN_PMC3, val); - break; - case 4: - mtpmr(PMRN_PMC4, val); - break; - case 5: - mtpmr(PMRN_PMC5, val); - break; - default: - break; - } -} - - -static void init_pmc_stop(int ctr) -{ - u32 pmlca = (PMLCA_FC | PMLCA_FCS | PMLCA_FCU | - PMLCA_FCM1 | PMLCA_FCM0); - u32 pmlcb = 0; - - switch (ctr) { - case 0: - mtpmr(PMRN_PMLCA0, pmlca); - mtpmr(PMRN_PMLCB0, pmlcb); - break; - case 1: - mtpmr(PMRN_PMLCA1, pmlca); - mtpmr(PMRN_PMLCB1, pmlcb); - break; - case 2: - mtpmr(PMRN_PMLCA2, pmlca); - mtpmr(PMRN_PMLCB2, pmlcb); - break; - case 3: - mtpmr(PMRN_PMLCA3, pmlca); - mtpmr(PMRN_PMLCB3, pmlcb); - break; - case 4: - mtpmr(PMRN_PMLCA4, pmlca); - mtpmr(PMRN_PMLCB4, pmlcb); - break; - case 5: - mtpmr(PMRN_PMLCA5, pmlca); - mtpmr(PMRN_PMLCB5, pmlcb); - break; - default: - panic("Bad ctr number!\n"); - } -} - -static void set_pmc_event(int ctr, int event) -{ - u32 pmlca; - - pmlca = get_pmlca(ctr); - - pmlca = (pmlca & ~PMLCA_EVENT_MASK) | - ((event << PMLCA_EVENT_SHIFT) & - PMLCA_EVENT_MASK); - - set_pmlca(ctr, pmlca); -} - -static void set_pmc_user_kernel(int ctr, int user, int kernel) -{ - u32 pmlca; - - pmlca = get_pmlca(ctr); - - if(user) - pmlca &= ~PMLCA_FCU; - else - pmlca |= PMLCA_FCU; - - if(kernel) - pmlca &= ~PMLCA_FCS; - else - pmlca |= PMLCA_FCS; - - set_pmlca(ctr, pmlca); -} - -static void set_pmc_marked(int ctr, int mark0, int mark1) -{ - u32 pmlca = get_pmlca(ctr); - - if(mark0) - pmlca &= ~PMLCA_FCM0; - else - pmlca |= PMLCA_FCM0; - - if(mark1) - pmlca &= ~PMLCA_FCM1; - else - pmlca |= PMLCA_FCM1; - - set_pmlca(ctr, pmlca); -} - -static void pmc_start_ctr(int ctr, int enable) -{ - u32 pmlca = get_pmlca(ctr); - - pmlca &= ~PMLCA_FC; - - if (enable) - pmlca |= PMLCA_CE; - else - pmlca &= ~PMLCA_CE; - - set_pmlca(ctr, pmlca); -} - -static void pmc_start_ctrs(int enable) -{ - u32 pmgc0 = mfpmr(PMRN_PMGC0); - - pmgc0 &= ~PMGC0_FAC; - pmgc0 |= PMGC0_FCECE; - - if (enable) - pmgc0 |= PMGC0_PMIE; - else - pmgc0 &= ~PMGC0_PMIE; - - mtpmr(PMRN_PMGC0, pmgc0); -} - -static void pmc_stop_ctrs(void) -{ - u32 pmgc0 = mfpmr(PMRN_PMGC0); - - pmgc0 |= PMGC0_FAC; - - pmgc0 &= ~(PMGC0_PMIE | PMGC0_FCECE); - - mtpmr(PMRN_PMGC0, pmgc0); -} - -static int fsl_emb_cpu_setup(struct op_counter_config *ctr) -{ - int i; - - /* freeze all counters */ - pmc_stop_ctrs(); - - for (i = 0;i < num_counters;i++) { - init_pmc_stop(i); - - set_pmc_event(i, ctr[i].event); - - set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); - } - - return 0; -} - -static int fsl_emb_reg_setup(struct op_counter_config *ctr, - struct op_system_config *sys, - int num_ctrs) -{ - int i; - - num_counters = num_ctrs; - - /* Our counters count up, and "count" refers to - * how much before the next interrupt, and we interrupt - * on overflow. So we calculate the starting value - * which will give us "count" until overflow. - * Then we set the events on the enabled counters */ - for (i = 0; i < num_counters; ++i) - reset_value[i] = 0x80000000UL - ctr[i].count; - - return 0; -} - -static int fsl_emb_start(struct op_counter_config *ctr) -{ - int i; - - mtmsr(mfmsr() | MSR_PMM); - - for (i = 0; i < num_counters; ++i) { - if (ctr[i].enabled) { - ctr_write(i, reset_value[i]); - /* Set each enabled counter to only - * count when the Mark bit is *not* set */ - set_pmc_marked(i, 1, 0); - pmc_start_ctr(i, 1); - } else { - ctr_write(i, 0); - - /* Set the ctr to be stopped */ - pmc_start_ctr(i, 0); - } - } - - /* Clear the freeze bit, and enable the interrupt. - * The counters won't actually start until the rfi clears - * the PMM bit */ - pmc_start_ctrs(1); - - oprofile_running = 1; - - pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), - mfpmr(PMRN_PMGC0)); - - return 0; -} - -static void fsl_emb_stop(void) -{ - /* freeze counters */ - pmc_stop_ctrs(); - - oprofile_running = 0; - - pr_debug("stop on cpu %d, pmgc0 %x\n", smp_processor_id(), - mfpmr(PMRN_PMGC0)); - - mb(); -} - - -static void fsl_emb_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - unsigned long pc; - int is_kernel; - int val; - int i; - - pc = regs->nip; - is_kernel = is_kernel_addr(pc); - - for (i = 0; i < num_counters; ++i) { - val = ctr_read(i); - if (val < 0) { - if (oprofile_running && ctr[i].enabled) { - oprofile_add_ext_sample(pc, regs, i, is_kernel); - ctr_write(i, reset_value[i]); - } else { - ctr_write(i, 0); - } - } - } - - /* The freeze bit was set by the interrupt. */ - /* Clear the freeze bit, and reenable the interrupt. The - * counters won't actually start until the rfi clears the PMM - * bit. The PMM bit should not be set until after the interrupt - * is cleared to avoid it getting lost in some hypervisor - * environments. - */ - mtmsr(mfmsr() | MSR_PMM); - pmc_start_ctrs(1); -} - -struct op_powerpc_model op_model_fsl_emb = { - .reg_setup = fsl_emb_reg_setup, - .cpu_setup = fsl_emb_cpu_setup, - .start = fsl_emb_start, - .stop = fsl_emb_stop, - .handle_interrupt = fsl_emb_handle_interrupt, -}; diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c deleted file mode 100644 index d23061cf76bc..000000000000 --- a/arch/powerpc/oprofile/op_model_pa6t.c +++ /dev/null @@ -1,227 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2006-2007 PA Semi, Inc - * - * Author: Shashi Rao, PA Semi - * - * Maintained by: Olof Johansson <olof@lixom.net> - * - * Based on arch/powerpc/oprofile/op_model_power4.c - */ - -#include <linux/oprofile.h> -#include <linux/smp.h> -#include <linux/percpu.h> -#include <asm/processor.h> -#include <asm/cputable.h> -#include <asm/oprofile_impl.h> -#include <asm/reg.h> - -static unsigned char oprofile_running; - -/* mmcr values are set in pa6t_reg_setup, used in pa6t_cpu_setup */ -static u64 mmcr0_val; -static u64 mmcr1_val; - -/* inited in pa6t_reg_setup */ -static u64 reset_value[OP_MAX_COUNTER]; - -static inline u64 ctr_read(unsigned int i) -{ - switch (i) { - case 0: - return mfspr(SPRN_PA6T_PMC0); - case 1: - return mfspr(SPRN_PA6T_PMC1); - case 2: - return mfspr(SPRN_PA6T_PMC2); - case 3: - return mfspr(SPRN_PA6T_PMC3); - case 4: - return mfspr(SPRN_PA6T_PMC4); - case 5: - return mfspr(SPRN_PA6T_PMC5); - default: - printk(KERN_ERR "ctr_read called with bad arg %u\n", i); - return 0; - } -} - -static inline void ctr_write(unsigned int i, u64 val) -{ - switch (i) { - case 0: - mtspr(SPRN_PA6T_PMC0, val); - break; - case 1: - mtspr(SPRN_PA6T_PMC1, val); - break; - case 2: - mtspr(SPRN_PA6T_PMC2, val); - break; - case 3: - mtspr(SPRN_PA6T_PMC3, val); - break; - case 4: - mtspr(SPRN_PA6T_PMC4, val); - break; - case 5: - mtspr(SPRN_PA6T_PMC5, val); - break; - default: - printk(KERN_ERR "ctr_write called with bad arg %u\n", i); - break; - } -} - - -/* precompute the values to stuff in the hardware registers */ -static int pa6t_reg_setup(struct op_counter_config *ctr, - struct op_system_config *sys, - int num_ctrs) -{ - int pmc; - - /* - * adjust the mmcr0.en[0-5] and mmcr0.inten[0-5] values obtained from the - * event_mappings file by turning off the counters that the user doesn't - * care about - * - * setup user and kernel profiling - */ - for (pmc = 0; pmc < cur_cpu_spec->num_pmcs; pmc++) - if (!ctr[pmc].enabled) { - sys->mmcr0 &= ~(0x1UL << pmc); - sys->mmcr0 &= ~(0x1UL << (pmc+12)); - pr_debug("turned off counter %u\n", pmc); - } - - if (sys->enable_kernel) - sys->mmcr0 |= PA6T_MMCR0_SUPEN | PA6T_MMCR0_HYPEN; - else - sys->mmcr0 &= ~(PA6T_MMCR0_SUPEN | PA6T_MMCR0_HYPEN); - - if (sys->enable_user) - sys->mmcr0 |= PA6T_MMCR0_PREN; - else - sys->mmcr0 &= ~PA6T_MMCR0_PREN; - - /* - * The performance counter event settings are given in the mmcr0 and - * mmcr1 values passed from the user in the op_system_config - * structure (sys variable). - */ - mmcr0_val = sys->mmcr0; - mmcr1_val = sys->mmcr1; - pr_debug("mmcr0_val inited to %016lx\n", sys->mmcr0); - pr_debug("mmcr1_val inited to %016lx\n", sys->mmcr1); - - for (pmc = 0; pmc < cur_cpu_spec->num_pmcs; pmc++) { - /* counters are 40 bit. Move to cputable at some point? */ - reset_value[pmc] = (0x1UL << 39) - ctr[pmc].count; - pr_debug("reset_value for pmc%u inited to 0x%llx\n", - pmc, reset_value[pmc]); - } - - return 0; -} - -/* configure registers on this cpu */ -static int pa6t_cpu_setup(struct op_counter_config *ctr) -{ - u64 mmcr0 = mmcr0_val; - u64 mmcr1 = mmcr1_val; - - /* Default is all PMCs off */ - mmcr0 &= ~(0x3FUL); - mtspr(SPRN_PA6T_MMCR0, mmcr0); - - /* program selected programmable events in */ - mtspr(SPRN_PA6T_MMCR1, mmcr1); - - pr_debug("setup on cpu %d, mmcr0 %016lx\n", smp_processor_id(), - mfspr(SPRN_PA6T_MMCR0)); - pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), - mfspr(SPRN_PA6T_MMCR1)); - - return 0; -} - -static int pa6t_start(struct op_counter_config *ctr) -{ - int i; - - /* Hold off event counting until rfid */ - u64 mmcr0 = mmcr0_val | PA6T_MMCR0_HANDDIS; - - for (i = 0; i < cur_cpu_spec->num_pmcs; i++) - if (ctr[i].enabled) - ctr_write(i, reset_value[i]); - else - ctr_write(i, 0UL); - - mtspr(SPRN_PA6T_MMCR0, mmcr0); - - oprofile_running = 1; - - pr_debug("start on cpu %d, mmcr0 %llx\n", smp_processor_id(), mmcr0); - - return 0; -} - -static void pa6t_stop(void) -{ - u64 mmcr0; - - /* freeze counters */ - mmcr0 = mfspr(SPRN_PA6T_MMCR0); - mmcr0 |= PA6T_MMCR0_FCM0; - mtspr(SPRN_PA6T_MMCR0, mmcr0); - - oprofile_running = 0; - - pr_debug("stop on cpu %d, mmcr0 %llx\n", smp_processor_id(), mmcr0); -} - -/* handle the perfmon overflow vector */ -static void pa6t_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - unsigned long pc = mfspr(SPRN_PA6T_SIAR); - int is_kernel = is_kernel_addr(pc); - u64 val; - int i; - u64 mmcr0; - - /* disable perfmon counting until rfid */ - mmcr0 = mfspr(SPRN_PA6T_MMCR0); - mtspr(SPRN_PA6T_MMCR0, mmcr0 | PA6T_MMCR0_HANDDIS); - - /* Record samples. We've got one global bit for whether a sample - * was taken, so add it for any counter that triggered overflow. - */ - for (i = 0; i < cur_cpu_spec->num_pmcs; i++) { - val = ctr_read(i); - if (val & (0x1UL << 39)) { /* Overflow bit set */ - if (oprofile_running && ctr[i].enabled) { - if (mmcr0 & PA6T_MMCR0_SIARLOG) - oprofile_add_ext_sample(pc, regs, i, is_kernel); - ctr_write(i, reset_value[i]); - } else { - ctr_write(i, 0UL); - } - } - } - - /* Restore mmcr0 to a good known value since the PMI changes it */ - mmcr0 = mmcr0_val | PA6T_MMCR0_HANDDIS; - mtspr(SPRN_PA6T_MMCR0, mmcr0); -} - -struct op_powerpc_model op_model_pa6t = { - .reg_setup = pa6t_reg_setup, - .cpu_setup = pa6t_cpu_setup, - .start = pa6t_start, - .stop = pa6t_stop, - .handle_interrupt = pa6t_handle_interrupt, -}; diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c deleted file mode 100644 index 2ae6b86ff97b..000000000000 --- a/arch/powerpc/oprofile/op_model_power4.c +++ /dev/null @@ -1,438 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM - * Added mmcra[slot] support: - * Copyright (C) 2006-2007 Will Schmidt <willschm@us.ibm.com>, IBM - */ - -#include <linux/oprofile.h> -#include <linux/smp.h> -#include <asm/firmware.h> -#include <asm/ptrace.h> -#include <asm/processor.h> -#include <asm/cputable.h> -#include <asm/rtas.h> -#include <asm/oprofile_impl.h> -#include <asm/reg.h> - -#define dbg(args...) -#define OPROFILE_PM_PMCSEL_MSK 0xffULL -#define OPROFILE_PM_UNIT_SHIFT 60 -#define OPROFILE_PM_UNIT_MSK 0xfULL -#define OPROFILE_MAX_PMC_NUM 3 -#define OPROFILE_PMSEL_FIELD_WIDTH 8 -#define OPROFILE_UNIT_FIELD_WIDTH 4 -#define MMCRA_SIAR_VALID_MASK 0x10000000ULL - -static unsigned long reset_value[OP_MAX_COUNTER]; - -static int oprofile_running; -static int use_slot_nums; - -/* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */ -static u32 mmcr0_val; -static u64 mmcr1_val; -static u64 mmcra_val; -static u32 cntr_marked_events; - -static int power7_marked_instr_event(u64 mmcr1) -{ - u64 psel, unit; - int pmc, cntr_marked_events = 0; - - /* Given the MMCR1 value, look at the field for each counter to - * determine if it is a marked event. Code based on the function - * power7_marked_instr_event() in file arch/powerpc/perf/power7-pmu.c. - */ - for (pmc = 0; pmc < 4; pmc++) { - psel = mmcr1 & (OPROFILE_PM_PMCSEL_MSK - << (OPROFILE_MAX_PMC_NUM - pmc) - * OPROFILE_PMSEL_FIELD_WIDTH); - psel = (psel >> ((OPROFILE_MAX_PMC_NUM - pmc) - * OPROFILE_PMSEL_FIELD_WIDTH)) & ~1ULL; - unit = mmcr1 & (OPROFILE_PM_UNIT_MSK - << (OPROFILE_PM_UNIT_SHIFT - - (pmc * OPROFILE_PMSEL_FIELD_WIDTH ))); - unit = unit >> (OPROFILE_PM_UNIT_SHIFT - - (pmc * OPROFILE_PMSEL_FIELD_WIDTH)); - - switch (psel >> 4) { - case 2: - cntr_marked_events |= (pmc == 1 || pmc == 3) << pmc; - break; - case 3: - if (psel == 0x3c) { - cntr_marked_events |= (pmc == 0) << pmc; - break; - } - - if (psel == 0x3e) { - cntr_marked_events |= (pmc != 1) << pmc; - break; - } - - cntr_marked_events |= 1 << pmc; - break; - case 4: - case 5: - cntr_marked_events |= (unit == 0xd) << pmc; - break; - case 6: - if (psel == 0x64) - cntr_marked_events |= (pmc >= 2) << pmc; - break; - case 8: - cntr_marked_events |= (unit == 0xd) << pmc; - break; - } - } - return cntr_marked_events; -} - -static int power4_reg_setup(struct op_counter_config *ctr, - struct op_system_config *sys, - int num_ctrs) -{ - int i; - - /* - * The performance counter event settings are given in the mmcr0, - * mmcr1 and mmcra values passed from the user in the - * op_system_config structure (sys variable). - */ - mmcr0_val = sys->mmcr0; - mmcr1_val = sys->mmcr1; - mmcra_val = sys->mmcra; - - /* Power 7+ and newer architectures: - * Determine which counter events in the group (the group of events is - * specified by the bit settings in the MMCR1 register) are marked - * events for use in the interrupt handler. Do the calculation once - * before OProfile starts. Information is used in the interrupt - * handler. Starting with Power 7+ we only record the sample for - * marked events if the SIAR valid bit is set. For non marked events - * the sample is always recorded. - */ - if (pvr_version_is(PVR_POWER7p)) - cntr_marked_events = power7_marked_instr_event(mmcr1_val); - else - cntr_marked_events = 0; /* For older processors, set the bit map - * to zero so the sample will always be - * be recorded. - */ - - for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) - reset_value[i] = 0x80000000UL - ctr[i].count; - - /* setup user and kernel profiling */ - if (sys->enable_kernel) - mmcr0_val &= ~MMCR0_KERNEL_DISABLE; - else - mmcr0_val |= MMCR0_KERNEL_DISABLE; - - if (sys->enable_user) - mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; - else - mmcr0_val |= MMCR0_PROBLEM_DISABLE; - - if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) || - pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) || - pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX) || - pvr_version_is(PVR_POWER5) || pvr_version_is(PVR_POWER5p)) - use_slot_nums = 1; - - return 0; -} - -extern void ppc_enable_pmcs(void); - -/* - * Older CPUs require the MMCRA sample bit to be always set, but newer - * CPUs only want it set for some groups. Eventually we will remove all - * knowledge of this bit in the kernel, oprofile userspace should be - * setting it when required. - * - * In order to keep current installations working we force the bit for - * those older CPUs. Once everyone has updated their oprofile userspace we - * can remove this hack. - */ -static inline int mmcra_must_set_sample(void) -{ - if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) || - pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) || - pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX)) - return 1; - - return 0; -} - -static int power4_cpu_setup(struct op_counter_config *ctr) -{ - unsigned int mmcr0 = mmcr0_val; - unsigned long mmcra = mmcra_val; - - ppc_enable_pmcs(); - - /* set the freeze bit */ - mmcr0 |= MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE; - mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE; - mtspr(SPRN_MMCR0, mmcr0); - - mtspr(SPRN_MMCR1, mmcr1_val); - - if (mmcra_must_set_sample()) - mmcra |= MMCRA_SAMPLE_ENABLE; - mtspr(SPRN_MMCRA, mmcra); - - dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(), - mfspr(SPRN_MMCR0)); - dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), - mfspr(SPRN_MMCR1)); - dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), - mfspr(SPRN_MMCRA)); - - return 0; -} - -static int power4_start(struct op_counter_config *ctr) -{ - int i; - unsigned int mmcr0; - - /* set the PMM bit (see comment below) */ - mtmsr(mfmsr() | MSR_PMM); - - for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { - if (ctr[i].enabled) { - classic_ctr_write(i, reset_value[i]); - } else { - classic_ctr_write(i, 0); - } - } - - mmcr0 = mfspr(SPRN_MMCR0); - - /* - * We must clear the PMAO bit on some (GQ) chips. Just do it - * all the time - */ - mmcr0 &= ~MMCR0_PMAO; - - /* - * now clear the freeze bit, counting will not start until we - * rfid from this excetion, because only at that point will - * the PMM bit be cleared - */ - mmcr0 &= ~MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - oprofile_running = 1; - - dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); - return 0; -} - -static void power4_stop(void) -{ - unsigned int mmcr0; - - /* freeze counters */ - mmcr0 = mfspr(SPRN_MMCR0); - mmcr0 |= MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - oprofile_running = 0; - - dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); - - mb(); -} - -/* Fake functions used by canonicalize_pc */ -static void __used hypervisor_bucket(void) -{ -} - -static void __used rtas_bucket(void) -{ -} - -static void __used kernel_unknown_bucket(void) -{ -} - -/* - * On GQ and newer the MMCRA stores the HV and PR bits at the time - * the SIAR was sampled. We use that to work out if the SIAR was sampled in - * the hypervisor, our exception vectors or RTAS. - * If the MMCRA_SAMPLE_ENABLE bit is set, we can use the MMCRA[slot] bits - * to more accurately identify the address of the sampled instruction. The - * mmcra[slot] bits represent the slot number of a sampled instruction - * within an instruction group. The slot will contain a value between 1 - * and 5 if MMCRA_SAMPLE_ENABLE is set, otherwise 0. - */ -static unsigned long get_pc(struct pt_regs *regs) -{ - unsigned long pc = mfspr(SPRN_SIAR); - unsigned long mmcra; - unsigned long slot; - - /* Can't do much about it */ - if (!cur_cpu_spec->oprofile_mmcra_sihv) - return pc; - - mmcra = mfspr(SPRN_MMCRA); - - if (use_slot_nums && (mmcra & MMCRA_SAMPLE_ENABLE)) { - slot = ((mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT); - if (slot > 1) - pc += 4 * (slot - 1); - } - - /* Were we in the hypervisor? */ - if (firmware_has_feature(FW_FEATURE_LPAR) && - (mmcra & cur_cpu_spec->oprofile_mmcra_sihv)) - /* function descriptor madness */ - return *((unsigned long *)hypervisor_bucket); - - /* We were in userspace, nothing to do */ - if (mmcra & cur_cpu_spec->oprofile_mmcra_sipr) - return pc; - -#ifdef CONFIG_PPC_RTAS - /* Were we in RTAS? */ - if (pc >= rtas.base && pc < (rtas.base + rtas.size)) - /* function descriptor madness */ - return *((unsigned long *)rtas_bucket); -#endif - - /* Were we in our exception vectors or SLB real mode miss handler? */ - if (pc < 0x1000000UL) - return (unsigned long)__va(pc); - - /* Not sure where we were */ - if (!is_kernel_addr(pc)) - /* function descriptor madness */ - return *((unsigned long *)kernel_unknown_bucket); - - return pc; -} - -static int get_kernel(unsigned long pc, unsigned long mmcra) -{ - int is_kernel; - - if (!cur_cpu_spec->oprofile_mmcra_sihv) { - is_kernel = is_kernel_addr(pc); - } else { - is_kernel = ((mmcra & cur_cpu_spec->oprofile_mmcra_sipr) == 0); - } - - return is_kernel; -} - -static bool pmc_overflow(unsigned long val) -{ - if ((int)val < 0) - return true; - - /* - * Events on POWER7 can roll back if a speculative event doesn't - * eventually complete. Unfortunately in some rare cases they will - * raise a performance monitor exception. We need to catch this to - * ensure we reset the PMC. In all cases the PMC will be 256 or less - * cycles from overflow. - * - * We only do this if the first pass fails to find any overflowing - * PMCs because a user might set a period of less than 256 and we - * don't want to mistakenly reset them. - */ - if (pvr_version_is(PVR_POWER7) && ((0x80000000 - val) <= 256)) - return true; - - return false; -} - -static void power4_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - unsigned long pc; - int is_kernel; - int val; - int i; - unsigned int mmcr0; - unsigned long mmcra; - bool siar_valid = false; - - mmcra = mfspr(SPRN_MMCRA); - - pc = get_pc(regs); - is_kernel = get_kernel(pc, mmcra); - - /* set the PMM bit (see comment below) */ - mtmsr(mfmsr() | MSR_PMM); - - /* Check that the SIAR valid bit in MMCRA is set to 1. */ - if ((mmcra & MMCRA_SIAR_VALID_MASK) == MMCRA_SIAR_VALID_MASK) - siar_valid = true; - - for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { - val = classic_ctr_read(i); - if (pmc_overflow(val)) { - if (oprofile_running && ctr[i].enabled) { - /* Power 7+ and newer architectures: - * If the event is a marked event, then only - * save the sample if the SIAR valid bit is - * set. If the event is not marked, then - * always save the sample. - * Note, the Sample enable bit in the MMCRA - * register must be set to 1 if the group - * contains a marked event. - */ - if ((siar_valid && - (cntr_marked_events & (1 << i))) - || !(cntr_marked_events & (1 << i))) - oprofile_add_ext_sample(pc, regs, i, - is_kernel); - - classic_ctr_write(i, reset_value[i]); - } else { - classic_ctr_write(i, 0); - } - } - } - - mmcr0 = mfspr(SPRN_MMCR0); - - /* reset the perfmon trigger */ - mmcr0 |= MMCR0_PMXE; - - /* - * We must clear the PMAO bit on some (GQ) chips. Just do it - * all the time - */ - mmcr0 &= ~MMCR0_PMAO; - - /* Clear the appropriate bits in the MMCRA */ - mmcra &= ~cur_cpu_spec->oprofile_mmcra_clear; - mtspr(SPRN_MMCRA, mmcra); - - /* - * now clear the freeze bit, counting will not start until we - * rfid from this exception, because only at that point will - * the PMM bit be cleared - */ - mmcr0 &= ~MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); -} - -struct op_powerpc_model op_model_power4 = { - .reg_setup = power4_reg_setup, - .cpu_setup = power4_cpu_setup, - .start = power4_start, - .stop = power4_stop, - .handle_interrupt = power4_handle_interrupt, -}; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 28206b1fe172..6817331e22ff 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -54,6 +54,9 @@ struct cpu_hw_events { struct perf_branch_stack bhrb_stack; struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES]; u64 ic_init; + + /* Store the PMC values */ + unsigned long pmcs[MAX_HWEVENTS]; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); @@ -110,10 +113,6 @@ static inline void perf_read_regs(struct pt_regs *regs) { regs->result = 0; } -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return 0; -} static inline int siar_valid(struct pt_regs *regs) { @@ -147,6 +146,17 @@ bool is_sier_available(void) return false; } +/* + * Return PMC value corresponding to the + * index passed. + */ +unsigned long get_pmcs_ext_regs(int idx) +{ + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + + return cpuhw->pmcs[idx]; +} + static bool regs_use_siar(struct pt_regs *regs) { /* @@ -354,15 +364,6 @@ static inline void perf_read_regs(struct pt_regs *regs) } /* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return (regs->softe & IRQS_DISABLED); -} - -/* * On processors like P7+ that have the SIAR-Valid bit, marked instructions * must be sampled only if the SIAR-valid bit is set. * @@ -915,7 +916,7 @@ void perf_event_print_debug(void) */ static int power_check_constraints(struct cpu_hw_events *cpuhw, u64 event_id[], unsigned int cflags[], - int n_ev) + int n_ev, struct perf_event **event) { unsigned long mask, value, nv; unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; @@ -938,7 +939,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, event_id[i] = cpuhw->alternatives[i][0]; } if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], - &cpuhw->avalues[i][0])) + &cpuhw->avalues[i][0], event[i]->attr.config1)) return -1; } value = mask = 0; @@ -973,7 +974,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, for (j = 1; j < n_alt[i]; ++j) ppmu->get_constraint(cpuhw->alternatives[i][j], &cpuhw->amasks[i][j], - &cpuhw->avalues[i][j]); + &cpuhw->avalues[i][j], + event[i]->attr.config1); } /* enumerate all possibilities and see if any will work */ @@ -1391,7 +1393,7 @@ static void power_pmu_enable(struct pmu *pmu) memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - &cpuhw->mmcr, cpuhw->event)) { + &cpuhw->mmcr, cpuhw->event, ppmu->flags)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; @@ -1579,7 +1581,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) goto out; - if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) + if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event)) goto out; event->hw.config = cpuhw->events[n0]; @@ -1789,7 +1791,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event); if (i < 0) return -EAGAIN; @@ -2027,7 +2029,7 @@ static int power_pmu_event_init(struct perf_event *event) local_irq_save(irq_flags); cpuhw = this_cpu_ptr(&cpu_hw_events); - err = power_check_constraints(cpuhw, events, cflags, n + 1); + err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs); if (has_branch_stack(event)) { u64 bhrb_filter = -1; @@ -2149,7 +2151,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val, left += period; if (left <= 0) left = period; - record = siar_valid(regs); + + /* + * If address is not requested in the sample via + * PERF_SAMPLE_IP, just record that sample irrespective + * of SIAR valid check. + */ + if (event->attr.sample_type & PERF_SAMPLE_IP) + record = siar_valid(regs); + else + record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) @@ -2167,9 +2179,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * MMCR2. Check attr.exclude_kernel and address to drop the sample in * these cases. */ - if (event->attr.exclude_kernel && record) - if (is_kernel_addr(mfspr(SPRN_SIAR))) - record = 0; + if (event->attr.exclude_kernel && + (event->attr.sample_type & PERF_SAMPLE_IP) && + is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; /* * Finally record data if requested. @@ -2195,7 +2208,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && ppmu->get_mem_weight) - ppmu->get_mem_weight(&data.weight); + ppmu->get_mem_weight(&data.weight.full); if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); @@ -2277,9 +2290,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) int i, j; struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; - unsigned long val[8]; int found, active; - int nmi; if (cpuhw->n_limited) freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), @@ -2287,26 +2298,14 @@ static void __perf_event_interrupt(struct pt_regs *regs) perf_read_regs(regs); - /* - * If perf interrupts hit in a local_irq_disable (soft-masked) region, - * we consider them as NMIs. This is required to prevent hash faults on - * user addresses when reading callchains. See the NMI test in - * do_hash_page. - */ - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); - /* Read all the PMCs since we'll need them a bunch of times */ for (i = 0; i < ppmu->n_counter; ++i) - val[i] = read_pmc(i + 1); + cpuhw->pmcs[i] = read_pmc(i + 1); /* Try to find what caused the IRQ */ found = 0; for (i = 0; i < ppmu->n_counter; ++i) { - if (!pmc_overflow(val[i])) + if (!pmc_overflow(cpuhw->pmcs[i])) continue; if (is_limited_pmc(i + 1)) continue; /* these won't generate IRQs */ @@ -2321,7 +2320,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) event = cpuhw->event[j]; if (event->hw.idx == (i + 1)) { active = 1; - record_and_restart(event, val[i], regs); + record_and_restart(event, cpuhw->pmcs[i], regs); break; } } @@ -2335,17 +2334,17 @@ static void __perf_event_interrupt(struct pt_regs *regs) event = cpuhw->event[i]; if (!event->hw.idx || is_limited_pmc(event->hw.idx)) continue; - if (pmc_overflow_power7(val[event->hw.idx - 1])) { + if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 1])) { /* event has overflowed in a buggy way*/ found = 1; record_and_restart(event, - val[event->hw.idx - 1], + cpuhw->pmcs[event->hw.idx - 1], regs); } } } - if (!found && !nmi && printk_ratelimit()) - printk(KERN_WARNING "Can't find PMC that caused IRQ\n"); + if (unlikely(!found) && !arch_irq_disabled_regs(regs)) + printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); /* * Reset MMCR0 to its normal value. This will set PMXE and @@ -2356,10 +2355,9 @@ static void __perf_event_interrupt(struct pt_regs *regs) */ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); - if (nmi) - nmi_exit(); - else - irq_exit(); + /* Clear the cpuhw->pmcs */ + memset(&cpuhw->pmcs, 0, sizeof(cpuhw->pmcs)); + } static void perf_event_interrupt(struct pt_regs *regs) diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index e0e7e276bfd2..ee721f420a7b 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -31,19 +31,6 @@ static atomic_t num_events; /* Used to avoid races in calling reserve/release_pmc_hardware */ static DEFINE_MUTEX(pmc_reserve_mutex); -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ -#ifdef __powerpc64__ - return (regs->softe & IRQS_DISABLED); -#else - return 0; -#endif -} - static void perf_event_interrupt(struct pt_regs *regs); /* @@ -659,13 +646,6 @@ static void perf_event_interrupt(struct pt_regs *regs) struct perf_event *event; unsigned long val; int found = 0; - int nmi; - - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); for (i = 0; i < ppmu->n_counter; ++i) { event = cpuhw->event[i]; @@ -690,11 +670,6 @@ static void perf_event_interrupt(struct pt_regs *regs) mtmsr(mfmsr() | MSR_PMM); mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); isync(); - - if (nmi) - nmi_exit(); - else - irq_exit(); } void hw_perf_event_setup(int cpu) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 6e7e820508df..e5eb33255066 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -764,6 +764,14 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, return ev_len; } +/* + * Return true incase of invalid or dummy events with names like RESERVED* + */ +static bool ignore_event(const char *name) +{ + return strncmp(name, "RESERVED", 8) == 0; +} + #define MAX_4K (SIZE_MAX / 4096) static int create_events_from_catalog(struct attribute ***events_, @@ -894,6 +902,10 @@ static int create_events_from_catalog(struct attribute ***events_, name = event_name(event, &nl); + if (ignore_event(name)) { + junk_events++; + continue; + } if (event->event_group_record_len == 0) { pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n", event_idx, nl, name); @@ -955,6 +967,9 @@ static int create_events_from_catalog(struct attribute ***events_, continue; name = event_name(event, &nl); + if (ignore_event(name)) + continue; + nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); ct = event_data_to_attrs(event_idx, events + event_attr_ct, event, nonce); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 6ab5b272090a..e4f577da33d8 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -108,12 +108,57 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) *mmcra |= MMCRA_SDAR_MODE_TLB; } +static u64 p10_thresh_cmp_val(u64 value) +{ + int exp = 0; + u64 result = value; + + if (!value) + return value; + + /* + * Incase of P10, thresh_cmp value is not part of raw event code + * and provided via attr.config1 parameter. To program threshold in MMCRA, + * take a 18 bit number N and shift right 2 places and increment + * the exponent E by 1 until the upper 10 bits of N are zero. + * Write E to the threshold exponent and write the lower 8 bits of N + * to the threshold mantissa. + * The max threshold that can be written is 261120. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (value > 261120) + value = 261120; + while ((64 - __builtin_clzl(value)) > 8) { + exp++; + value >>= 2; + } + + /* + * Note that it is invalid to write a mantissa with the + * upper 2 bits of mantissa being zero, unless the + * exponent is also zero. + */ + if (!(value & 0xC0) && exp) + result = 0; + else + result = (exp << 8) | value; + } + return result; +} + static u64 thresh_cmp_val(u64 value) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) + value = p10_thresh_cmp_val(value); + + /* + * Since location of threshold compare bits in MMCRA + * is different for p8, using different shift value. + */ if (cpu_has_feature(CPU_FTR_ARCH_300)) return value << p9_MMCRA_THR_CMP_SHIFT; - - return value << MMCRA_THR_CMP_SHIFT; + else + return value << MMCRA_THR_CMP_SHIFT; } static unsigned long combine_from_event(u64 event) @@ -141,13 +186,13 @@ static bool is_thresh_cmp_valid(u64 event) { unsigned int cmp, exp; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return p10_thresh_cmp_val(event) != 0; + /* * Check the mantissa upper two bits are not zero, unless the * exponent is also zero. See the THRESH_CMP_MANTISSA doc. - * Power10: thresh_cmp is replaced by l2_l3 event select. */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - return false; cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; exp = cmp >> 7; @@ -256,7 +301,7 @@ void isa207_get_mem_weight(u64 *weight) *weight = mantissa << (2 * exp); } -int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1) { unsigned int unit, pmc, cache, ebb; unsigned long mask, value; @@ -355,9 +400,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (cpu_has_feature(CPU_FTR_ARCH_31)) { - if (event_is_threshold(event)) { + if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) { mask |= CNST_THRESH_CTL_SEL_MASK; value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); + mask |= p10_CNST_THRESH_CMP_MASK; + value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1)); } } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { @@ -411,7 +458,7 @@ ebb_bhrb: int isa207_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], u32 flags) { unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; unsigned long mmcr3; @@ -504,6 +551,10 @@ int isa207_compute_mmcr(u64 event[], int n_ev, val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; mmcra |= thresh_cmp_val(val); + } else if (flags & PPMU_HAS_ATTR_CONFIG1) { + val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) & + p10_EVENT_THR_CMP_MASK; + mmcra |= thresh_cmp_val(val); } } diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 454b32c31440..1af0e8c97ac7 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -105,6 +105,10 @@ #define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1 #define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45 +/* Event Threshold Compare bit constant for power10 in config1 attribute */ +#define p10_EVENT_THR_CMP_SHIFT 0 +#define p10_EVENT_THR_CMP_MASK 0x3FFFFull + #define p10_EVENT_VALID_MASK \ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ @@ -124,8 +128,8 @@ * 60 56 52 48 44 40 36 32 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ] - * | - * thresh_sel -* + * | | + * [ thresh_cmp bits for p10] thresh_sel -* * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | @@ -152,6 +156,9 @@ #define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32) #define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff) +#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43) +#define p10_CNST_THRESH_CMP_MASK p10_CNST_THRESH_CMP_VAL(0x7ff) + #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) @@ -262,10 +269,10 @@ #define PH(a, b) (P(LVL, HIT) | P(a, b)) #define PM(a, b) (P(LVL, MISS) | P(a, b)) -int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1); int isa207_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]); + struct perf_event *pevents[], u32 flags); void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr); int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index 1919e9df9165..e39b15b79a83 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -148,7 +148,7 @@ static u32 classbits[N_CLASSES - 1][2] = { }; static int mpc7450_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, class; u32 mask, value; @@ -258,7 +258,8 @@ static const u32 pmcsel_mask[N_COUNTER] = { */ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { u8 event_index[N_CLASSES][N_COUNTER]; int n_classevent[N_CLASSES]; diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 6f681b105eec..b931eed482c9 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -75,6 +75,8 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { static u64 get_ext_regs_value(int idx) { switch (idx) { + case PERF_REG_POWERPC_PMC1 ... PERF_REG_POWERPC_PMC6: + return get_pmcs_ext_regs(idx - PERF_REG_POWERPC_PMC1); case PERF_REG_POWERPC_MMCR0: return mfspr(SPRN_MMCR0); case PERF_REG_POWERPC_MMCR1: @@ -95,13 +97,6 @@ static u64 get_ext_regs_value(int idx) u64 perf_reg_value(struct pt_regs *regs, int idx) { - u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; - - if (cpu_has_feature(CPU_FTR_ARCH_31)) - perf_reg_extended_max = PERF_REG_MAX_ISA_31; - else if (cpu_has_feature(CPU_FTR_ARCH_300)) - perf_reg_extended_max = PERF_REG_MAX_ISA_300; - if (idx == PERF_REG_POWERPC_SIER && (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || IS_ENABLED(CONFIG_PPC32) || @@ -113,14 +108,14 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) IS_ENABLED(CONFIG_PPC32))) return 0; - if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max) + if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX) return get_ext_regs_value(idx); /* * If the idx is referring to value beyond the * supported registers, return 0 with a warning */ - if (WARN_ON_ONCE(idx >= perf_reg_extended_max)) + if (WARN_ON_ONCE(idx >= PERF_REG_EXTENDED_MAX)) return 0; return regs_get_register(regs, pt_regs_offset[idx]); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 79e0206ca454..a901c1348cad 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -216,6 +216,7 @@ PMU_FORMAT_ATTR(invert_bit, "config:47"); PMU_FORMAT_ATTR(src_mask, "config:48-53"); PMU_FORMAT_ATTR(src_match, "config:54-59"); PMU_FORMAT_ATTR(radix_scope, "config:9"); +PMU_FORMAT_ATTR(thresh_cmp, "config1:0-17"); static struct attribute *power10_pmu_format_attr[] = { &format_attr_event.attr, @@ -236,6 +237,7 @@ static struct attribute *power10_pmu_format_attr[] = { &format_attr_src_mask.attr, &format_attr_src_match.attr, &format_attr_radix_scope.attr, + &format_attr_thresh_cmp.attr, NULL, }; @@ -550,7 +552,7 @@ static struct power_pmu power10_pmu = { .get_mem_weight = isa207_get_mem_weight, .disable_pmc = isa207_disable_pmc, .flags = PPMU_HAS_SIER | PPMU_ARCH_207S | - PPMU_ARCH_31, + PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1, .n_generic = ARRAY_SIZE(power10_generic_events), .generic_events = power10_generic_events, .cache_events = &power10_cache_events, diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 3e64b4a1511f..18732267993a 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -132,7 +132,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int power5p_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh; int bit, fmask; @@ -451,7 +451,8 @@ static int power5p_marked_instr_event(u64 event) static int power5p_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 017bb19b73fb..cb611c1e7abe 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -136,7 +136,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int power5_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh; int bit, fmask; @@ -382,7 +382,8 @@ static int power5_marked_instr_event(u64 event) static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 189974478e9f..69ef38216418 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -173,7 +173,8 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -266,7 +267,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, * 32-34 select field: nest (subunit) event selector */ static int p6_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, sh, subunit; unsigned long mask = 0, value = 0; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index bacfab104a1a..894c17f9a762 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -81,7 +81,7 @@ enum { */ static int power7_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, sh, unit; unsigned long mask = 0, value = 0; @@ -245,7 +245,8 @@ static int power7_marked_instr_event(u64 event) static int power7_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 7d78df97f272..1f8263785286 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -190,7 +190,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int p970_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh, spcsel; unsigned long mask = 0, value = 0; @@ -256,7 +256,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) static int p970_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 78ac6d67a935..7d41e9264510 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -206,17 +206,10 @@ config AKEBONO select PPC4xx_HSTA_MSI select I2C select I2C_IBM_IIC - select NETDEVICES - select ETHERNET - select NET_VENDOR_IBM select IBM_EMAC_EMAC4 if IBM_EMAC select USB if USB_SUPPORT select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD - select MMC_SDHCI - select MMC_SDHCI_PLTFM - select ATA - select SATA_AHCI_PLATFORM help This option enables support for the IBM Akebono (476gtr) evaluation board diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 6303fbfc4e4f..9d030c2e0004 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -24,21 +24,23 @@ static void __init mpc5121_ads_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n"); /* * cpld regs are needed early */ mpc5121_ads_cpld_map(); + mpc512x_setup_arch(); +} + +static void __init mpc5121_ads_setup_pci(void) +{ #ifdef CONFIG_PCI + struct device_node *np; + for_each_compatible_node(np, "pci", "fsl,mpc5121-pci") mpc83xx_add_bridge(np); #endif - - mpc512x_setup_arch(); } static void __init mpc5121_ads_init_IRQ(void) @@ -64,6 +66,7 @@ define_machine(mpc5121_ads) { .name = "MPC5121 ADS", .probe = mpc5121_ads_probe, .setup_arch = mpc5121_ads_setup_arch, + .discover_phbs = mpc5121_ads_setup_pci, .init = mpc512x_init, .init_IRQ = mpc5121_ads_init_IRQ, .get_irq = ipic_get_irq, diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 4514a6f7458a..3b7d70d71692 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -185,8 +185,6 @@ static void __init efika_setup_arch(void) /* Map important registers from the internal memory map */ mpc52xx_map_common_devices(); - efika_pcisetup(); - #ifdef CONFIG_PM mpc52xx_suspend.board_suspend_prepare = efika_suspend_prepare; mpc52xx_pm_init(); @@ -218,6 +216,7 @@ define_machine(efika) .name = EFIKA_PLATFORM_NAME, .probe = efika_probe, .setup_arch = efika_setup_arch, + .discover_phbs = efika_pcisetup, .init = mpc52xx_declare_of_platform_devices, .show_cpuinfo = efika_show_cpuinfo, .init_IRQ = mpc52xx_init_irq, diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c index 3181aac08225..04cc97397095 100644 --- a/arch/powerpc/platforms/52xx/lite5200.c +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -165,8 +165,6 @@ static void __init lite5200_setup_arch(void) mpc52xx_suspend.board_resume_finish = lite5200_resume_finish; lite5200_pm_init(); #endif - - mpc52xx_setup_pci(); } static const char * const board[] __initconst = { @@ -187,6 +185,7 @@ define_machine(lite5200) { .name = "lite5200", .probe = lite5200_probe, .setup_arch = lite5200_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 07c5bc4ed0b5..efb8bdecbcc7 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -202,8 +202,6 @@ static void __init media5200_setup_arch(void) /* Some mpc5200 & mpc5200b related configuration */ mpc5200_setup_xlb_arbiter(); - mpc52xx_setup_pci(); - np = of_find_matching_node(NULL, mpc5200_gpio_ids); gpio = of_iomap(np, 0); of_node_put(np); @@ -244,6 +242,7 @@ define_machine(media5200_platform) { .name = "media5200-platform", .probe = media5200_probe, .setup_arch = media5200_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = media5200_init_irq, .get_irq = mpc52xx_get_irq, diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c index 2d01e9b2e779..b9f5675b0a1d 100644 --- a/arch/powerpc/platforms/52xx/mpc5200_simple.c +++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c @@ -40,8 +40,6 @@ static void __init mpc5200_simple_setup_arch(void) /* Some mpc5200 & mpc5200b related configuration */ mpc5200_setup_xlb_arbiter(); - - mpc52xx_setup_pci(); } /* list of the supported boards */ @@ -73,6 +71,7 @@ define_machine(mpc5200_simple_platform) { .name = "mpc5200-simple-platform", .probe = mpc5200_simple_probe, .setup_arch = mpc5200_simple_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 05e19470d523..b91ebebd9ff2 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -229,7 +229,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) int dma, write, poll_dma; spin_lock_irqsave(&lpbfifo.lock, flags); - ts = get_tbl(); + ts = mftb(); req = lpbfifo.req; if (!req) { @@ -307,7 +307,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) if (irq != 0) /* don't increment on polled case */ req->irq_count++; - req->irq_ticks += get_tbl() - ts; + req->irq_ticks += mftb() - ts; spin_unlock_irqrestore(&lpbfifo.lock, flags); /* Spinlock is released; it is now safe to call the callback */ @@ -330,7 +330,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) u32 ts; spin_lock_irqsave(&lpbfifo.lock, flags); - ts = get_tbl(); + ts = mftb(); req = lpbfifo.req; if (!req || (req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA)) { @@ -361,7 +361,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) lpbfifo.req = NULL; /* Release the lock before calling out to the callback. */ - req->irq_ticks += get_tbl() - ts; + req->irq_ticks += mftb() - ts; spin_unlock_irqrestore(&lpbfifo.lock, flags); if (req->callback) diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index 3fe1a6593280..0b5b9dec16d5 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -171,7 +171,6 @@ static void __init mpc8272_ads_setup_arch(void) iounmap(bcsr); init_ioports(); - pq2_init_pci(); if (ppc_md.progress) ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0); @@ -205,6 +204,7 @@ define_machine(mpc8272_ads) .name = "Freescale MPC8272 ADS", .probe = mpc8272_ads_probe, .setup_arch = mpc8272_ads_setup_arch, + .discover_phbs = pq2_init_pci, .init_IRQ = mpc8272_ads_pic_init, .get_irq = cpm2_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 096cc0d59fd8..f82f75a6085c 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -123,20 +123,17 @@ int __init pq2ads_pci_init_irq(void) np = of_find_compatible_node(NULL, NULL, "fsl,pq2ads-pci-pic"); if (!np) { printk(KERN_ERR "No pci pic node in device tree.\n"); - of_node_put(np); goto out; } irq = irq_of_parse_and_map(np, 0); if (!irq) { printk(KERN_ERR "No interrupt in pci pic node.\n"); - of_node_put(np); - goto out; + goto out_put_node; } priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { - of_node_put(np); ret = -ENOMEM; goto out_unmap_irq; } @@ -161,17 +158,17 @@ int __init pq2ads_pci_init_irq(void) priv->host = host; irq_set_handler_data(irq, priv); irq_set_chained_handler(irq, pq2ads_pci_irq_demux); - - of_node_put(np); - return 0; + ret = 0; + goto out_put_node; out_unmap_regs: iounmap(priv->regs); out_free_kmalloc: kfree(priv); - of_node_put(np); out_unmap_irq: irq_dispose_mapping(irq); +out_put_node: + of_node_put(np); out: return ret; } diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index a74082140718..ac9113d524af 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -150,8 +150,6 @@ static void __init pq2fads_setup_arch(void) /* Enable external IRQs */ clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_siumcr, 0x0c000000); - pq2_init_pci(); - if (ppc_md.progress) ppc_md.progress("pq2fads_setup_arch(), finish", 0); } @@ -184,6 +182,7 @@ define_machine(pq2fads) .name = "Freescale PQ2FADS", .probe = pq2fads_probe, .setup_arch = pq2fads_setup_arch, + .discover_phbs = pq2_init_pci, .init_IRQ = pq2fads_pic_init, .get_irq = cpm2_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c index 28474876f41b..68061c2a57c1 100644 --- a/arch/powerpc/platforms/83xx/asp834x.c +++ b/arch/powerpc/platforms/83xx/asp834x.c @@ -44,6 +44,7 @@ define_machine(asp834x) { .name = "ASP8347E", .probe = asp834x_probe, .setup_arch = asp834x_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index bcdc2c203ec9..108e1e4d2683 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -180,6 +180,7 @@ define_machine(mpc83xx_km) { .name = "mpc83xx-km-platform", .probe = mpc83xx_km_probe, .setup_arch = mpc83xx_km_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index a952e91db3ee..3285dabcf923 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -132,8 +132,6 @@ void __init mpc83xx_setup_arch(void) setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); update_bats(); } - - mpc83xx_setup_pci(); } int machine_check_83xx(struct pt_regs *regs) diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c index 51426e88ec67..956d4389effa 100644 --- a/arch/powerpc/platforms/83xx/mpc830x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c @@ -48,6 +48,7 @@ define_machine(mpc830x_rdb) { .name = "MPC830x RDB", .probe = mpc830x_rdb_probe, .setup_arch = mpc830x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c index 5ccd57a48492..3b578f080e3b 100644 --- a/arch/powerpc/platforms/83xx/mpc831x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c @@ -48,6 +48,7 @@ define_machine(mpc831x_rdb) { .name = "MPC831x RDB", .probe = mpc831x_rdb_probe, .setup_arch = mpc831x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c index 6fa5402ebf20..850d566ef900 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c @@ -101,6 +101,7 @@ define_machine(mpc832x_mds) { .name = "MPC832x MDS", .probe = mpc832x_sys_probe, .setup_arch = mpc832x_sys_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index 622c625d5ce4..b6133a237a70 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -219,6 +219,7 @@ define_machine(mpc832x_rdb) { .name = "MPC832x RDB", .probe = mpc832x_rdb_probe, .setup_arch = mpc832x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index ebfd139bca20..9630f3aa4d9c 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -70,6 +70,7 @@ define_machine(mpc834x_itx) { .name = "MPC834x ITX", .probe = mpc834x_itx_probe, .setup_arch = mpc834x_itx_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c index 356228e35279..6d91bdce0a18 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c @@ -91,6 +91,7 @@ define_machine(mpc834x_mds) { .name = "MPC834x MDS", .probe = mpc834x_mds_probe, .setup_arch = mpc834x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index 90d9cbfae659..da4cf52cb55b 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -201,6 +201,7 @@ define_machine(mpc836x_mds) { .name = "MPC836x MDS", .probe = mpc836x_mds_probe, .setup_arch = mpc836x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c index b4aac2cde849..3427ad0d9d38 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c +++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c @@ -41,6 +41,7 @@ define_machine(mpc836x_rdk) { .name = "MPC836x RDK", .probe = mpc836x_rdk_probe, .setup_arch = mpc836x_rdk_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c index 9d3721c965be..f28d166ea7db 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c @@ -93,6 +93,7 @@ define_machine(mpc837x_mds) { .name = "MPC837x MDS", .probe = mpc837x_mds_probe, .setup_arch = mpc837x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c index 7c45f7ac2607..7fb7684c256b 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c @@ -73,6 +73,7 @@ define_machine(mpc837x_rdb) { .name = "MPC837x RDB/WLAN", .probe = mpc837x_rdb_probe, .setup_arch = mpc837x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index f37d04332fc7..a30d30588cf6 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -76,7 +76,7 @@ extern void mpc83xx_ipic_init_IRQ(void); #ifdef CONFIG_PCI extern void mpc83xx_setup_pci(void); #else -#define mpc83xx_setup_pci() do {} while (0) +#define mpc83xx_setup_pci NULL #endif extern int mpc83xx_declare_of_platform_devices(void); diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c index 88dedf38eccd..656365975895 100644 --- a/arch/powerpc/platforms/8xx/machine_check.c +++ b/arch/powerpc/platforms/8xx/machine_check.c @@ -26,7 +26,7 @@ int machine_check_8xx(struct pt_regs *regs) * to deal with that than having a wart in the mcheck handler. * -- BenH */ - bad_page_fault(regs, regs->dar, SIGBUS); + bad_page_fault(regs, SIGBUS); return 1; #else return 0; diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index f5d0bf999759..9d252c554f7f 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -66,6 +66,12 @@ static int __init amigaone_add_bridge(struct device_node *dev) void __init amigaone_setup_arch(void) { + if (ppc_md.progress) + ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0); +} + +static void __init amigaone_discover_phbs(void) +{ struct device_node *np; int phb = -ENODEV; @@ -74,9 +80,6 @@ void __init amigaone_setup_arch(void) phb = amigaone_add_bridge(np); BUG_ON(phb != 0); - - if (ppc_md.progress) - ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0); } void __init amigaone_init_IRQ(void) @@ -159,6 +162,7 @@ define_machine(amigaone) { .name = "AmigaOne", .probe = amigaone_probe, .setup_arch = amigaone_setup_arch, + .discover_phbs = amigaone_discover_phbs, .show_cpuinfo = amigaone_show_cpuinfo, .init_IRQ = amigaone_init_IRQ, .restart = amigaone_restart, diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index f2ff359041ee..e7c976bcadff 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -100,8 +100,3 @@ config CBE_CPUFREQ_SPU_GOVERNOR the minimal possible frequency. endmenu - -config OPROFILE_CELL - def_bool y - depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y) && SPU_BASE - diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 10064a33ca96..7ea6692f67e2 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -19,7 +19,6 @@ spu-priv1-$(CONFIG_PPC_CELL_COMMON) += spu_priv1_mmio.o spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ - spu_notify.o \ spu_syscalls.o \ $(spu-priv1-y) \ $(spu-manage-y) \ diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 9068edef71f7..5b9a7e9f144b 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -25,6 +25,7 @@ #include <asm/cpu_has_feature.h> #include "pervasive.h" +#include "ras.h" static void cbe_power_save(void) { diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h index c6fccad6caee..0da74ab10716 100644 --- a/arch/powerpc/platforms/cell/pervasive.h +++ b/arch/powerpc/platforms/cell/pervasive.h @@ -13,9 +13,6 @@ #define PERVASIVE_H extern void cbe_pervasive_init(void); -extern void cbe_system_error_exception(struct pt_regs *regs); -extern void cbe_maintenance_exception(struct pt_regs *regs); -extern void cbe_thermal_exception(struct pt_regs *regs); #ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON extern int cbe_sysreset_hack(void); diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 6ea480539419..4325c05bedd9 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -49,7 +49,7 @@ static void dump_fir(int cpu) } -void cbe_system_error_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_system_error_exception) { int cpu = smp_processor_id(); @@ -58,7 +58,7 @@ void cbe_system_error_exception(struct pt_regs *regs) dump_stack(); } -void cbe_maintenance_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_maintenance_exception) { int cpu = smp_processor_id(); @@ -70,7 +70,7 @@ void cbe_maintenance_exception(struct pt_regs *regs) dump_stack(); } -void cbe_thermal_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_thermal_exception) { int cpu = smp_processor_id(); diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h index 6c2e6bc0062e..226dbd48efad 100644 --- a/arch/powerpc/platforms/cell/ras.h +++ b/arch/powerpc/platforms/cell/ras.h @@ -2,9 +2,12 @@ #ifndef RAS_H #define RAS_H -extern void cbe_system_error_exception(struct pt_regs *regs); -extern void cbe_maintenance_exception(struct pt_regs *regs); -extern void cbe_thermal_exception(struct pt_regs *regs); +#include <asm/interrupt.h> + +DECLARE_INTERRUPT_HANDLER(cbe_system_error_exception); +DECLARE_INTERRUPT_HANDLER(cbe_maintenance_exception); +DECLARE_INTERRUPT_HANDLER(cbe_thermal_exception); + extern void cbe_ras_init(void); #endif /* RAS_H */ diff --git a/arch/powerpc/platforms/cell/spu_notify.c b/arch/powerpc/platforms/cell/spu_notify.c deleted file mode 100644 index 67870abf3715..000000000000 --- a/arch/powerpc/platforms/cell/spu_notify.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Move OProfile dependencies from spufs module to the kernel so it - * can run on non-cell PPC. - * - * Copyright (C) IBM 2005 - */ - -#undef DEBUG - -#include <linux/export.h> -#include <linux/notifier.h> -#include <asm/spu.h> -#include "spufs/spufs.h" - -static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); - -void spu_switch_notify(struct spu *spu, struct spu_context *ctx) -{ - blocking_notifier_call_chain(&spu_switch_notifier, - ctx ? ctx->object_id : 0, spu); -} -EXPORT_SYMBOL_GPL(spu_switch_notify); - -int spu_switch_event_register(struct notifier_block *n) -{ - int ret; - ret = blocking_notifier_chain_register(&spu_switch_notifier, n); - if (!ret) - notify_spus_active(); - return ret; -} -EXPORT_SYMBOL_GPL(spu_switch_event_register); - -int spu_switch_event_unregister(struct notifier_block *n) -{ - return blocking_notifier_chain_unregister(&spu_switch_notifier, n); -} -EXPORT_SYMBOL_GPL(spu_switch_event_unregister); - -void spu_set_profile_private_kref(struct spu_context *ctx, - struct kref *prof_info_kref, - void (* prof_info_release) (struct kref *kref)) -{ - ctx->prof_priv_kref = prof_info_kref; - ctx->prof_priv_release = prof_info_release; -} -EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); - -void *spu_get_profile_private_kref(struct spu_context *ctx) -{ - return ctx->prof_priv_kref; -} -EXPORT_SYMBOL_GPL(spu_get_profile_private_kref); - diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 25390569e24c..b83a3670bd74 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -91,14 +91,15 @@ out: } static int -spufs_setattr(struct dentry *dentry, struct iattr *attr) +spufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *attr) { struct inode *inode = d_inode(dentry); if ((attr->ia_valid & ATTR_SIZE) && (attr->ia_size != inode->i_size)) return -EINVAL; - setattr_copy(inode, attr); + setattr_copy(&init_user_ns, inode, attr); mark_inode_dirty(inode); return 0; } diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 3f2380f40f99..ce52b87496d2 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -353,7 +353,6 @@ static int spu_process_callback(struct spu_context *ctx) long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) { int ret; - struct spu *spu; u32 status; if (mutex_lock_interruptible(&ctx->run_mutex)) @@ -386,13 +385,10 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) mutex_lock(&ctx->state_mutex); break; } - spu = ctx->spu; if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))) { - if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { - spu_switch_notify(spu, ctx); + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) continue; - } } spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index f18d5067cd0f..369206489895 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -72,7 +72,7 @@ static struct timer_list spuloadavg_timer; #define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK)) #define SCALE_PRIO(x, prio) \ - max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE) + max(x * (MAX_PRIO - prio) / (NICE_WIDTH / 2), MIN_SPU_TIMESLICE) /* * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values: @@ -181,9 +181,6 @@ void do_notify_spus_active(void) /* * Wake up the active spu_contexts. - * - * When the awakened processes see their "notify_active" flag is set, - * they will call spu_switch_notify(). */ for_each_online_node(node) { struct spu *spu; @@ -239,7 +236,6 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0); spu_restore(&ctx->csa, spu); spu->timestamp = jiffies; - spu_switch_notify(spu, ctx); ctx->state = SPU_STATE_RUNNABLE; spuctx_switch_state(ctx, SPU_UTIL_USER); @@ -440,7 +436,6 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) */ atomic_dec_if_positive(&ctx->gang->aff_sched_count); - spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); spu_save(&ctx->csa, spu); spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0); diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 1ba4d884febf..afc1d6604d12 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -281,7 +281,6 @@ void spu_del_from_rq(struct spu_context *ctx); int spu_activate(struct spu_context *ctx, unsigned long flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); -void spu_switch_notify(struct spu *spu, struct spu_context *ctx); void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx, u32 type, u32 val); void spu_set_timeslice(struct spu_context *ctx); diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index b2c2bf35b76c..8c421dc78b28 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -314,6 +314,14 @@ chrp_find_bridges(void) } } of_node_put(root); + + /* + * "Temporary" fixes for PCI devices. + * -- Geert + */ + hydra_init(); /* Mac I/O */ + + pci_create_OF_bus_map(); } /* SL82C105 IDE Control/Status Register */ diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index c45435aa5e36..3cfc382841e5 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -334,22 +334,11 @@ static void __init chrp_setup_arch(void) /* On pegasos, enable the L2 cache if not already done by OF */ pegasos_set_l2cr(); - /* Lookup PCI host bridges */ - chrp_find_bridges(); - - /* - * Temporary fixes for PCI devices. - * -- Geert - */ - hydra_init(); /* Mac I/O */ - /* * Fix the Super I/O configuration */ sio_init(); - pci_create_OF_bus_map(); - /* * Print the banner, then scroll down so boot progress * can be printed. -- Cort @@ -582,6 +571,7 @@ define_machine(chrp) { .name = "CHRP", .probe = chrp_probe, .setup_arch = chrp_setup_arch, + .discover_phbs = chrp_find_bridges, .init = chrp_init2, .show_cpuinfo = chrp_show_cpuinfo, .init_IRQ = chrp_init_IRQ, diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index d8f2e2c737bb..53065d564161 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -108,15 +108,13 @@ static void holly_remap_bridge(void) tsi108_write_reg(TSI108_PCI_P2O_BAR2, 0x0); } -static void __init holly_setup_arch(void) +static void __init holly_init_pci(void) { struct device_node *np; if (ppc_md.progress) ppc_md.progress("holly_setup_arch():set_bridge", 0); - tsi108_csr_vir_base = get_vir_csrbase(); - /* setup PCI host bridge */ holly_remap_bridge(); @@ -127,6 +125,11 @@ static void __init holly_setup_arch(void) ppc_md.pci_exclude_device = holly_exclude_device; if (ppc_md.progress) ppc_md.progress("tsi108: resources set", 0x100); +} + +static void __init holly_setup_arch(void) +{ + tsi108_csr_vir_base = get_vir_csrbase(); printk(KERN_INFO "PPC750GX/CL Platform\n"); } @@ -259,6 +262,7 @@ define_machine(holly){ .name = "PPC750 GX/CL TSI", .probe = holly_probe, .setup_arch = holly_setup_arch, + .discover_phbs = holly_init_pci, .init_IRQ = holly_init_IRQ, .show_cpuinfo = holly_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index f514d5d28cd4..eb8342e7f84e 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -64,14 +64,17 @@ static int __init linkstation_add_bridge(struct device_node *dev) static void __init linkstation_setup_arch(void) { + printk(KERN_INFO "BUFFALO Network Attached Storage Series\n"); + printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n"); +} + +static void __init linkstation_setup_pci(void) +{ struct device_node *np; /* Lookup PCI host bridges */ for_each_compatible_node(np, "pci", "mpc10x-pci") linkstation_add_bridge(np); - - printk(KERN_INFO "BUFFALO Network Attached Storage Series\n"); - printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n"); } /* @@ -153,6 +156,7 @@ define_machine(linkstation){ .name = "Buffalo Linkstation", .probe = linkstation_probe, .setup_arch = linkstation_setup_arch, + .discover_phbs = linkstation_setup_pci, .init_IRQ = linkstation_init_IRQ, .show_cpuinfo = linkstation_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index b95c3380d2b5..5565647dc879 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -58,16 +58,14 @@ int mpc7448_hpc2_exclude_device(struct pci_controller *hose, return PCIBIOS_SUCCESSFUL; } -static void __init mpc7448_hpc2_setup_arch(void) +static void __init mpc7448_hpc2_setup_pci(void) { +#ifdef CONFIG_PCI struct device_node *np; if (ppc_md.progress) - ppc_md.progress("mpc7448_hpc2_setup_arch():set_bridge", 0); - - tsi108_csr_vir_base = get_vir_csrbase(); + ppc_md.progress("mpc7448_hpc2_setup_pci():set_bridge", 0); /* setup PCI host bridge */ -#ifdef CONFIG_PCI for_each_compatible_node(np, "pci", "tsi108-pci") tsi108_setup_pci(np, MPC7448HPC2_PCI_CFG_PHYS, 0); @@ -75,6 +73,11 @@ static void __init mpc7448_hpc2_setup_arch(void) if (ppc_md.progress) ppc_md.progress("tsi108: resources set", 0x100); #endif +} + +static void __init mpc7448_hpc2_setup_arch(void) +{ + tsi108_csr_vir_base = get_vir_csrbase(); printk(KERN_INFO "MPC7448HPC2 (TAIGA) Platform\n"); printk(KERN_INFO @@ -181,6 +184,7 @@ define_machine(mpc7448_hpc2){ .name = "MPC7448 HPC2", .probe = mpc7448_hpc2_probe, .setup_arch = mpc7448_hpc2_setup_arch, + .discover_phbs = mpc7448_hpc2_setup_pci, .init_IRQ = mpc7448_hpc2_init_IRQ, .show_cpuinfo = mpc7448_hpc2_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 1cd488daa0bf..c06a0490d157 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -154,17 +154,19 @@ static const struct of_device_id mvme5100_of_bus_ids[] __initconst = { */ static void __init mvme5100_setup_arch(void) { - struct device_node *np; - if (ppc_md.progress) ppc_md.progress("mvme5100_setup_arch()", 0); - for_each_compatible_node(np, "pci", "hawk-pci") - mvme5100_add_bridge(np); - restart = ioremap(BOARD_MODRST_REG, 4); } +static void __init mvme5100_setup_pci(void) +{ + struct device_node *np; + + for_each_compatible_node(np, "pci", "hawk-pci") + mvme5100_add_bridge(np); +} static void mvme5100_show_cpuinfo(struct seq_file *m) { @@ -205,6 +207,7 @@ define_machine(mvme5100) { .name = "MVME5100", .probe = mvme5100_probe, .setup_arch = mvme5100_setup_arch, + .discover_phbs = mvme5100_setup_pci, .init_IRQ = mvme5100_pic_init, .show_cpuinfo = mvme5100_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index e346ddcef45e..e188b90f7016 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -66,13 +66,16 @@ static int __init storcenter_add_bridge(struct device_node *dev) static void __init storcenter_setup_arch(void) { + printk(KERN_INFO "IOMEGA StorCenter\n"); +} + +static void __init storcenter_setup_pci(void) +{ struct device_node *np; /* Lookup PCI host bridges */ for_each_compatible_node(np, "pci", "mpc10x-pci") storcenter_add_bridge(np); - - printk(KERN_INFO "IOMEGA StorCenter\n"); } /* @@ -117,6 +120,7 @@ define_machine(storcenter){ .name = "IOMEGA StorCenter", .probe = storcenter_probe, .setup_arch = storcenter_setup_arch, + .discover_phbs = storcenter_setup_pci, .init_IRQ = storcenter_init_IRQ, .get_irq = mpic_get_irq, .restart = storcenter_restart, diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index c86a66d5e998..a20b9576de22 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -536,6 +536,9 @@ static int __init maple_add_bridge(struct device_node *dev) /* Check for legacy IOs */ isa_bridge_find_early(hose); + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(hose); + return 0; } diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index f7e66a2005b4..4e9ad5bf3efb 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -179,9 +179,6 @@ static void __init maple_setup_arch(void) #ifdef CONFIG_SMP smp_ops = &maple_smp_ops; #endif - /* Lookup PCI hosts */ - maple_pci_init(); - maple_use_rtas_reboot_and_halt_if_present(); printk(KERN_DEBUG "Using native/NAP idle loop\n"); @@ -351,6 +348,7 @@ define_machine(maple) { .name = "Maple", .probe = maple_probe, .setup_arch = maple_setup_arch, + .discover_phbs = maple_pci_init, .init_IRQ = maple_init_IRQ, .pci_irq_fixup = maple_pci_irq_fixup, .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index b612474f8f8e..376797eb7894 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -144,8 +144,6 @@ static void __init pas_setup_arch(void) /* Setup SMP callback */ smp_ops = &pas_smp_ops; #endif - /* Lookup PCI hosts */ - pas_pci_init(); /* Remap SDC register for doing reset */ /* XXXOJN This should maybe come out of the device tree */ @@ -446,6 +444,7 @@ define_machine(pasemi) { .name = "PA Semi PWRficient", .probe = pas_probe, .setup_arch = pas_setup_arch, + .discover_phbs = pas_pci_init, .init_IRQ = pas_init_IRQ, .get_irq = mpic_get_irq, .restart = pas_restart, diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index e35eaa9cf938..e9abe0f2e7f0 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -850,6 +850,10 @@ static int __init pmac_add_bridge(struct device_node *dev) /* Fixup "bus-range" OF property */ fixup_bus_range(dev); + /* create pci_dn's for DT nodes under this PHB */ + if (IS_ENABLED(CONFIG_PPC64)) + pci_devs_phb_init_dynamic(hose); + return 0; } diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 2e2cc0c75d87..86aee3f2483f 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -298,9 +298,6 @@ static void __init pmac_setup_arch(void) of_node_put(ic); } - /* Lookup PCI hosts */ - pmac_pci_init(); - #ifdef CONFIG_PPC32 ohare_init(); l2cr_init(); @@ -600,6 +597,7 @@ define_machine(powermac) { .name = "PowerMac", .probe = pmac_probe, .setup_arch = pmac_setup_arch, + .discover_phbs = pmac_pci_init, .show_cpuinfo = pmac_show_cpuinfo, .init_IRQ = pmac_pic_init, .get_irq = NULL, /* changed later */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index e6f461812856..999997d9e9a9 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -14,6 +14,7 @@ #include <asm/asm-prototypes.h> #include <asm/firmware.h> +#include <asm/interrupt.h> #include <asm/machdep.h> #include <asm/opal.h> #include <asm/cputhreads.h> diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 5fc9408bb0b3..019669eb21d2 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -19,6 +19,7 @@ #include <linux/numa.h> #include <asm/machdep.h> #include <asm/debugfs.h> +#include <asm/cacheflush.h> /* This enables us to keep track of the memory removed from each node. */ struct memtrace_entry { @@ -51,6 +52,27 @@ static const struct file_operations memtrace_fops = { .open = simple_open, }; +#define FLUSH_CHUNK_SIZE SZ_1G +/** + * flush_dcache_range_chunked(): Write any modified data cache blocks out to + * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE + * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) + * @chunk: the max size of the chunks + */ +static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, + unsigned long chunk) +{ + unsigned long i; + + for (i = start; i < stop; i += chunk) { + flush_dcache_range(i, min(stop, i + chunk)); + cond_resched(); + } +} + static void memtrace_clear_range(unsigned long start_pfn, unsigned long nr_pages) { @@ -62,6 +84,13 @@ static void memtrace_clear_range(unsigned long start_pfn, cond_resched(); clear_page(__va(PFN_PHYS(pfn))); } + /* + * Before we go ahead and use this range as cache inhibited range + * flush the cache. + */ + flush_dcache_range_chunked(PFN_PHYS(start_pfn), + PFN_PHYS(start_pfn + nr_pages), + FLUSH_CHUNK_SIZE); } static u64 memtrace_alloc_node(u32 nid, u64 size) diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 23571f0b555a..0d9ba70f7251 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -119,8 +119,8 @@ static void fill_prstatus(struct elf_prstatus *prstatus, int pir, * As a PIR value could also be '0', add an offset of '100' * to every PIR to avoid misinterpretations in GDB. */ - prstatus->pr_pid = cpu_to_be32(100 + pir); - prstatus->pr_ppid = cpu_to_be32(1); + prstatus->common.pr_pid = cpu_to_be32(100 + pir); + prstatus->common.pr_ppid = cpu_to_be32(1); /* * Indicate SIGUSR1 for crash initiated from kernel. @@ -130,7 +130,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, int pir, short sig; sig = kernel_initiated ? SIGUSR1 : SIGTERM; - prstatus->pr_cursig = cpu_to_be16(sig); + prstatus->common.pr_cursig = cpu_to_be16(sig); } } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c61c3b62c8c6..303d7c775740 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -624,7 +624,7 @@ static int opal_recover_mce(struct pt_regs *regs, */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index 8c739c94ed28..53172862d23b 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -150,25 +150,3 @@ int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, return 0; } EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); - -#if IS_MODULE(CONFIG_CXL) -static inline int get_cxl_module(void) -{ - struct module *cxl_module; - - mutex_lock(&module_mutex); - - cxl_module = find_module("cxl"); - if (cxl_module) - __module_get(cxl_module); - - mutex_unlock(&module_mutex); - - if (!cxl_module) - return -ENODEV; - - return 0; -} -#else -static inline int get_cxl_module(void) { return 0; } -#endif diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 5218f5da2737..30551bbd7988 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -380,6 +380,8 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, /* Remove link to a group from table's list of attached groups */ found = false; + + rcu_read_lock(); list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { if (tgl->table_group == table_group) { list_del_rcu(&tgl->next); @@ -388,6 +390,8 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, break; } } + rcu_read_unlock(); + if (WARN_ON(!found)) return; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c4f72cdc9b51..f0f901683a2f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2402,9 +2402,6 @@ static void pnv_pci_ioda_create_dbgfs(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - /* Notify initialization of PHB done */ - phb->initialized = 1; - sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); @@ -2601,17 +2598,8 @@ static resource_size_t pnv_pci_default_alignment(void) */ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) { - struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn; - /* The function is probably called while the PEs have - * not be created yet. For example, resource reassignment - * during PCI probe period. We just skip the check if - * PEs isn't ready. - */ - if (!phb->initialized) - return true; - pdn = pci_get_pdn(dev); if (!pdn || pdn->pe_number == IODA_INVALID_PE) { pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); @@ -2623,14 +2611,9 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn; struct pnv_ioda_pe *pe; - if (!phb->initialized) - return true; - pdn = pci_get_pdn(dev); if (!pdn) return false; @@ -2938,7 +2921,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES); + phb = kzalloc(sizeof(*phb), GFP_KERNEL); if (!phb) panic("%s: Failed to allocate %zu bytes\n", __func__, sizeof(*phb)); @@ -2987,7 +2970,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, else phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; - phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES); + phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL); if (!phb->diag_data) panic("%s: Failed to allocate %u bytes\n", __func__, phb->diag_data_size); @@ -3049,9 +3032,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); - aux = memblock_alloc(size, SMP_CACHE_BYTES); + aux = kzalloc(size, GFP_KERNEL); if (!aux) panic("%s: Failed to allocate %lu bytes\n", __func__, size); + phb->ioda.pe_alloc = aux; phb->ioda.m64_segmap = aux + m64map_off; phb->ioda.m32_segmap = aux + m32map_off; @@ -3178,6 +3162,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Remove M64 resource if we can't configure it successfully */ if (!phb->init_m64 || phb->init_m64(phb)) hose->mem_resources[1].flags = 0; + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(hose); } void __init pnv_pci_init_ioda2_phb(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 739a0b3b72e1..36d22920f5a3 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -119,7 +119,6 @@ struct pnv_phb { int flags; void __iomem *regs; u64 regs_phys; - int initialized; spinlock_t lock; #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 4426a109ec2f..aadf932c4e61 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -180,9 +180,6 @@ static void __init pnv_setup_arch(void) /* Initialize SMP */ pnv_smp_init(); - /* Setup PCI */ - pnv_pci_init(); - /* Setup RTC and NVRAM callbacks */ if (firmware_has_feature(FW_FEATURE_OPAL)) opal_nvram_init(); @@ -547,6 +544,7 @@ define_machine(powernv) { .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, .get_proc_freq = pnv_get_proc_freq, + .discover_phbs = pnv_pci_init, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, .power_save = NULL, diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h index c8f574d1c04a..77feee8436d4 100644 --- a/arch/powerpc/platforms/powernv/subcore.h +++ b/arch/powerpc/platforms/powernv/subcore.h @@ -15,7 +15,7 @@ void split_core_secondary_loop(u8 *state); extern void update_subcore_sibling_mask(void); #else -static inline void update_subcore_sibling_mask(void) { }; +static inline void update_subcore_sibling_mask(void) { } #endif /* CONFIG_SMP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index 598e4cd563fb..b65256a63e87 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -28,12 +28,10 @@ static DEFINE_PER_CPU(int, cpu_vas_id); static int vas_irq_fault_window_setup(struct vas_instance *vinst) { - char devname[64]; int rc = 0; - snprintf(devname, sizeof(devname), "vas-%d", vinst->vas_id); rc = request_threaded_irq(vinst->virq, vas_fault_handler, - vas_fault_thread_fn, 0, devname, vinst); + vas_fault_thread_fn, 0, vinst->name, vinst); if (rc) { pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n", @@ -80,6 +78,12 @@ static int init_vas_instance(struct platform_device *pdev) if (!vinst) return -ENOMEM; + vinst->name = kasprintf(GFP_KERNEL, "vas-%d", vasid); + if (!vinst->name) { + kfree(vinst); + return -ENOMEM; + } + INIT_LIST_HEAD(&vinst->node); ida_init(&vinst->ida); mutex_init(&vinst->mutex); @@ -162,6 +166,7 @@ static int init_vas_instance(struct platform_device *pdev) return 0; free_vinst: + kfree(vinst->name); kfree(vinst); return -ENODEV; diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 70f793e8f6cc..c7db3190baca 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -340,6 +340,7 @@ struct vas_instance { struct vas_window *rxwin[VAS_COP_TYPE_MAX]; struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; + char *name; char *dbgname; struct dentry *dbgdir; }; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 16e86ba8aa20..233503fcf8f0 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -127,7 +127,6 @@ void dlpar_free_cc_nodes(struct device_node *dn) #define NEXT_PROPERTY 3 #define PREV_PARENT 4 #define MORE_MEMORY 5 -#define CALL_AGAIN -2 #define ERR_CFG_USE -9003 struct device_node *dlpar_configure_connector(__be32 drc_index, @@ -168,6 +167,9 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, spin_unlock(&rtas_data_buf_lock); + if (rtas_busy_delay(rc)) + continue; + switch (rc) { case COMPLETE: break; @@ -216,9 +218,6 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, last_dn = last_dn->parent; break; - case CALL_AGAIN: - break; - case MORE_MEMORY: case ERR_CFG_USE: default: @@ -521,11 +520,8 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, int rc; args = argbuf = kstrdup(buf, GFP_KERNEL); - if (!argbuf) { - pr_info("Could not allocate resources for DLPAR operation\n"); - kfree(argbuf); + if (!argbuf) return -ENOMEM; - } /* * Parse out the request from the user, this will be in the form: diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index cf024fa37bda..bc15200852b7 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -43,7 +43,7 @@ static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_pe; -void pseries_pcibios_bus_add_device(struct pci_dev *pdev) +static void pseries_pcibios_bus_add_device(struct pci_dev *pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); @@ -694,8 +694,7 @@ static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u } #ifdef CONFIG_PCI_IOV -int pseries_send_allow_unfreeze(struct pci_dn *pdn, - u16 *vf_pe_array, int cur_vfs) +static int pseries_send_allow_unfreeze(struct pci_dn *pdn, u16 *vf_pe_array, int cur_vfs) { int rc; int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze"); diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 8c6e509f6967..a15ab33646b3 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -355,12 +355,12 @@ static int ibmebus_bus_device_probe(struct device *dev) if (!drv->probe) return error; - of_dev_get(of_dev); + get_device(dev); if (of_driver_match_device(dev, dev->driver)) error = drv->probe(of_dev); if (error) - of_dev_put(of_dev); + put_device(dev); return error; } diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 72a4d4167849..1bffbd1c9a94 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -55,9 +55,8 @@ struct pe_map_bar_entry { __be32 reserved; /* Reserved Space */ }; -int pseries_send_map_pe(struct pci_dev *pdev, - u16 num_vfs, - struct pe_map_bar_entry *vf_pe_array) +static int pseries_send_map_pe(struct pci_dev *pdev, u16 num_vfs, + struct pe_map_bar_entry *vf_pe_array) { struct pci_dn *pdn; int rc; @@ -88,7 +87,7 @@ int pseries_send_map_pe(struct pci_dev *pdev, return rc; } -void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) +static void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) { struct pci_dn *pdn; @@ -102,7 +101,7 @@ void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) pdn->pe_num_map[vf_index]); } -int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) +static int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) { struct pci_dn *pdn; int i, rc, vf_index; @@ -146,7 +145,7 @@ int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) return rc; } -int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +static int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { struct pci_dn *pdn; int rc; @@ -189,14 +188,14 @@ int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return rc; } -int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +static int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_sriov_vf_pdns(pdev); return pseries_pci_sriov_enable(pdev, num_vfs); } -int pseries_pcibios_sriov_disable(struct pci_dev *pdev) +static int pseries_pcibios_sriov_disable(struct pci_dev *pdev) { struct pci_dn *pdn; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 593840847cd3..4fe48c04c6c2 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -33,7 +33,7 @@ int smp_query_cpu_stopped(unsigned int pcpu); #define QCSS_HARDWARE_ERROR -1 #define QCSS_HARDWARE_BUSY -2 #else -static inline void smp_init_pseries(void) { }; +static inline void smp_init_pseries(void) { } #endif extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 149cec2212e6..f8b390a9d9fb 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -122,7 +122,7 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) * devices or systems (e.g. hugepages) that have not been initialized at the * subsys stage. */ -int __init init_ras_hotplug_IRQ(void) +static int __init init_ras_hotplug_IRQ(void) { struct device_node *np; @@ -315,12 +315,10 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) /* Handle environmental and power warning (EPOW) interrupts. */ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) { - int status; int state; int critical; - status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, - &state); + rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); if (state > 3) critical = 1; /* Time Critical */ @@ -329,12 +327,9 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) spin_lock(&ras_log_buf_lock); - status = rtas_call(ras_check_exception_token, 6, 1, NULL, - RTAS_VECTOR_EXTERNAL_INTERRUPT, - virq_to_hw(irq), - RTAS_EPOW_WARNING, - critical, __pa(&ras_log_buf), - rtas_get_error_log_max()); + rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), + rtas_get_error_log_max()); log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); @@ -722,6 +717,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log = NULL; int disposition = rtas_error_disposition(errp); + unsigned long msr; u8 error_type; if (!rtas_error_extended(errp)) @@ -747,9 +743,21 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) * SLB multihit is done by now. */ out: - mtmsr(mfmsr() | MSR_IR | MSR_DR); + msr = mfmsr(); + mtmsr(msr | MSR_IR | MSR_DR); + disposition = mce_handle_err_virtmode(regs, errp, mce_log, disposition); + + /* + * Queue irq work to log this rtas event later. + * irq_work_queue uses per-cpu variables, so do this in virt + * mode as well. + */ + irq_work_queue(&mce_errlog_process_work); + + mtmsr(msr); + return disposition; } @@ -813,7 +821,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } @@ -865,10 +873,8 @@ long pseries_machine_check_realmode(struct pt_regs *regs) * virtual mode. */ disposition = mce_handle_error(regs, errp); - fwnmi_release_errinfo(); - /* Queue irq work to log this rtas event later. */ - irq_work_queue(&mce_errlog_process_work); + fwnmi_release_errinfo(); if (disposition == RTAS_DISP_FULLY_RECOVERED) return 1; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 090c13f6c881..46e1540abc22 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -463,7 +463,7 @@ void pseries_little_endian_exceptions(void) } #endif -static void __init find_and_init_phbs(void) +static void __init pSeries_discover_phbs(void) { struct device_node *node; struct pci_controller *phb; @@ -481,6 +481,9 @@ static void __init find_and_init_phbs(void) pci_process_bridge_OF_ranges(phb, node, 0); isa_bridge_find_early(phb); phb->controller_ops = pseries_pci_controller_ops; + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(phb); } of_node_put(root); @@ -607,8 +610,8 @@ enum get_iov_fw_value_index { WDW_SIZE = 3 /* Get Window Size */ }; -resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, - enum get_iov_fw_value_index value) +static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, + enum get_iov_fw_value_index value) { const int *indexes; struct device_node *dn = pci_device_to_OF_node(dev); @@ -643,7 +646,7 @@ resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, return ret; } -void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) +static void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) { struct resource *res; resource_size_t base, size; @@ -665,7 +668,7 @@ void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) } } -void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) +static void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) { struct resource *res, *root, *conflict; resource_size_t base, size; @@ -786,7 +789,6 @@ static void __init pSeries_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); - find_and_init_phbs(); of_reconfig_notifier_register(&pci_dn_reconfig_nb); pSeries_nvram_init(); @@ -1050,6 +1052,7 @@ define_machine(pseries) { .init_IRQ = pseries_init_irq, .show_cpuinfo = pSeries_show_cpuinfo, .log_error = pSeries_log_error, + .discover_phbs = pSeries_discover_phbs, .pcibios_fixup = pSeries_final_fixup, .restart = rtas_restart, .halt = rtas_halt, diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index dcd817ca2edf..3fe37495f63d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1383,7 +1383,6 @@ static long check_bp_loc(unsigned long addr) return 1; } -#ifndef CONFIG_PPC_8xx static int find_free_data_bpt(void) { int i; @@ -1395,7 +1394,6 @@ static int find_free_data_bpt(void) printf("Couldn't find free breakpoint register\n"); return -1; } -#endif static void print_data_bpts(void) { @@ -1435,7 +1433,6 @@ bpt_cmds(void) cmd = inchar(); switch (cmd) { -#ifndef CONFIG_PPC_8xx static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; int mode; case 'd': /* bd - hardware data breakpoint */ @@ -1497,7 +1494,6 @@ bpt_cmds(void) force_enable_xmon(); } break; -#endif case 'c': if (!scanhex(&a)) { @@ -3723,7 +3719,7 @@ void dump_segments(void) printf("sr0-15 ="); for (i = 0; i < 16; ++i) - printf(" %x", mfsrin(i << 28)); + printf(" %x", mfsr(i << 28)); printf("\n"); } #endif |