diff options
Diffstat (limited to 'arch/powerpc')
352 files changed, 9539 insertions, 3279 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c7f120aaa98f..a35ab50d2eba 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,6 +80,7 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y + select BUILDTIME_EXTABLE_SORT select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF @@ -92,6 +93,7 @@ config PPC select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_GCC_PLUGINS select SYSCTL_EXCEPTION_TRACE select VIRT_TO_BUS if !PPC64 select HAVE_IDE @@ -112,7 +114,7 @@ config PPC select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select ARCH_WANT_IPC_PARSE_VERSION select SPARSE_IRQ select IRQ_DOMAIN @@ -163,9 +165,11 @@ config PPC select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select HAVE_ARCH_HARDENED_USERCOPY select HAVE_KERNEL_GZIP + select HAVE_CONTEXT_TRACKING if PPC64 + select ARCH_WEAK_RELEASE_ACQUIRE config GENERIC_CSUM - def_bool CPU_LITTLE_ENDIAN + def_bool n config EARLY_PRINTK bool @@ -396,6 +400,14 @@ config MPROFILE_KERNEL depends on PPC64 && CPU_LITTLE_ENDIAN def_bool !DISABLE_MPROFILE_KERNEL +config USE_THIN_ARCHIVES + bool "Build the kernel using thin archives" + default n + select THIN_ARCHIVES + help + Build the kernel using thin archives. + If you're unsure say N. + config IOMMU_HELPER def_bool PPC64 @@ -456,10 +468,25 @@ config KEXEC interface is strongly in flux, so no good recommendation can be made. +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select HAVE_IMA_KEXEC + select BUILD_BIN2C + depends on PPC64 + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + help + This is a new version of the kexec system call. This call is + file based and takes in file descriptors as system call arguments + for kernel and initramfs as opposed to a list of segments as is the + case for the older kexec call. + config RELOCATABLE bool "Build a relocatable kernel" depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE)) select NONSTATIC_KERNEL + select MODULE_REL_CRCS if MODVERSIONS help This builds a kernel image that is capable of running at the location the kernel is loaded at. For ppc32, there is no any @@ -479,6 +506,15 @@ config RELOCATABLE setting can still be useful to bootwrappers that need to know the load address of the kernel (eg. u-boot/mkimage). +config RELOCATABLE_TEST + bool "Test relocatable kernel" + depends on (PPC64 && RELOCATABLE) + default n + help + This runs the relocatable kernel at the address it was initially + loaded at, which tends to be non-zero and therefore test the + relocation code. + config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) @@ -490,7 +526,7 @@ config CRASH_DUMP config FA_DUMP bool "Firmware-assisted dump" - depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC + depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC_CORE help A robust mechanism to get reliable kernel crash dump with assistance from firmware. This approach does not use kexec, @@ -549,6 +585,13 @@ config ARCH_SPARSEMEM_DEFAULT config SYS_SUPPORTS_HUGETLBFS bool +config ILLEGAL_POINTER_VALUE + hex + # This is roughly half way between the top of user space and the bottom + # of kernel space, which seems about as good as we can get. + default 0x5deadbeef0000000 if PPC64 + default 0 + source "mm/Kconfig" config ARCH_MEMORY_PROBE diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 63292f64b25a..c86df246339e 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -354,4 +354,19 @@ config FAIL_IOMMU If you are unsure, say N. +config PPC_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL && DEBUG_FS + help + This option exports the state of the kernel pagetables to a + debugfs file. This is only useful for kernel developers who are + working in architecture specific areas of the kernel - probably + not a good idea to enable this feature in a production kernel. + + If you are unsure, say N. + +config PPC_HTDUMP + def_bool y + depends on PPC_PTDUMP && PPC_BOOK3S + endmenu diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 617dece67924..31286fa7873c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -23,7 +23,7 @@ CROSS32AR := $(CROSS32_COMPILE)ar ifeq ($(HAS_BIARCH),y) ifeq ($(CROSS32_COMPILE),) CROSS32CC := $(CC) -m32 -CROSS32AR := GNUTARGET=elf32-powerpc $(AR) +KBUILD_ARFLAGS += --target=elf32-powerpc endif endif @@ -85,7 +85,7 @@ ifeq ($(HAS_BIARCH),y) override AS += -a$(BITS) override LD += -m elf$(BITS)$(LDEMULATION) override CC += -m$(BITS) -override AR := GNUTARGET=elf$(BITS)-$(GNUTARGET) $(AR) +KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET) endif LDFLAGS_vmlinux-y := -Bstatic @@ -121,6 +121,7 @@ CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) ifeq ($(CONFIG_PPC_BOOK3S_64),y) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4) +CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4 else CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif @@ -249,6 +250,7 @@ core-y += arch/powerpc/kernel/ \ core-$(CONFIG_XMON) += arch/powerpc/xmon/ core-$(CONFIG_KVM) += arch/powerpc/kvm/ core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/ +core-$(CONFIG_KEXEC_FILE) += arch/powerpc/purgatory/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ @@ -275,16 +277,16 @@ zImage: relocs_check endif $(BOOT_TARGETS1): vmlinux - $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) + $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) $(BOOT_TARGETS2): vmlinux - $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) + $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) bootwrapper_install: - $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) + $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) %.dtb: scripts - $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) + $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) # Used to create 'merged defconfigs' # To use it $(call) it with the first argument as the base defconfig diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index d61c03525777..84774ccba1c2 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -1,4 +1,5 @@ addnote +decompress_inflate.c empty.c hack-coff inffast.c @@ -13,11 +14,13 @@ infutil.h kernel-vmlinux.strip.c kernel-vmlinux.strip.gz mktree +otheros.bld uImage cuImage.* dtbImage.* *.dtb treeImage.* +vmlinux.strip zImage zImage.initrd zImage.bin.* @@ -26,6 +29,7 @@ zImage.coff zImage.epapr zImage.holly zImage.*lds +zImage.maple zImage.miboot zImage.pmac zImage.pseries diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 9d47f2efa830..e82f333cc84a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -172,10 +172,6 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc $(obj)/empty.c: $(Q)touch $@ -$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S - $(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \ - -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $< - $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S $(Q)cp $< $@ @@ -357,17 +353,17 @@ $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz # Don't put the ramdisk on the pattern rule; when its missing make will try # the pattern rule with less dependencies that also matches (even with the # hard dependency listed). -$(obj)/zImage.initrd.%: vmlinux $(wrapperbits) +$(obj)/zImage.initrd.%: vmlinux $(wrapperbits) FORCE $(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz) -$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) +$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) FORCE $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@)) # dtbImage% - a dtbImage is a zImage with an embedded device tree blob -$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb +$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE $(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb +$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE $(call if_changed,wrap,$*,,$(obj)/$*.dtb) # This cannot be in the root of $(src) as the zImage rule always adds a $(obj) @@ -375,31 +371,31 @@ $(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb $(obj)/vmlinux.strip: vmlinux $(STRIP) -s -R .comment $< -o $@ -$(obj)/uImage: vmlinux $(wrapperbits) +$(obj)/uImage: vmlinux $(wrapperbits) FORCE $(call if_changed,wrap,uboot) -$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb) -$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb) -$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb) -$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb) # Rule to build device tree blobs diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts new file mode 100644 index 000000000000..47afa438602e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts @@ -0,0 +1,303 @@ +/* + * Keymile kmcent2 Device Tree Source, based on T1040RDB DTS + * + * (C) Copyright 2016 + * Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com + * + * Copyright 2014 - 2015 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "t104xsi-pre.dtsi" + +/ { + model = "keymile,kmcent2"; + compatible = "keymile,kmcent2"; + + aliases { + front_phy = &front_phy; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x04000000 + 1 0 0xf 0xfa000000 0x00010000 + 2 0 0xf 0xfb000000 0x00010000 + 4 0 0xf 0xc0000000 0x08000000 + 6 0 0xf 0xd0000000 0x08000000 + 7 0 0xf 0xd8000000 0x08000000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x04000000>; + bank-width = <2>; + device-width = <2>; + }; + + nand@1,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x1 0x0 0x10000>; + }; + + board-control@2,0 { + compatible = "keymile,qriox"; + reg = <0x2 0x0 0x80>; + }; + + chassis-mgmt@6,0 { + compatible = "keymile,bfticu"; + reg = <6 0 0x100>; + interrupt-controller; + interrupt-parent = <&mpic>; + interrupts = <11 1 0 0>; + #interrupt-cells = <1>; + }; + + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + bportals: bman-portals@ff4000000 { + ranges = <0x0 0xf 0xf4000000 0x2000000>; + }; + + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + + spi@110000 { + network-clock@1 { + compatible = "zarlink,zl30364"; + reg = <1>; + spi-max-frequency = <1000000>; + }; + }; + + sdhc@114000 { + status = "disabled"; + }; + + i2c@118000 { + clock-frequency = <100000>; + + mux@70 { + compatible = "nxp,pca9547"; + reg = <0x70>; + #address-cells = <1>; + #size-cells = <0>; + i2c-mux-idle-disconnect; + + i2c@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + eeprom@54 { + compatible = "24c02"; + reg = <0x54>; + pagesize = <2>; + read-only; + label = "ddr3-spd"; + }; + }; + + i2c@7 { + reg = <7>; + #address-cells = <1>; + #size-cells = <0>; + + temp-sensor@48 { + compatible = "national,lm75"; + reg = <0x48>; + label = "SENSOR_0"; + }; + temp-sensor@4a { + compatible = "national,lm75"; + reg = <0x4a>; + label = "SENSOR_2"; + }; + temp-sensor@4b { + compatible = "national,lm75"; + reg = <0x4b>; + label = "SENSOR_3"; + }; + }; + }; + }; + + i2c@118100 { + clock-frequency = <100000>; + + eeprom@50 { + compatible = "atmel,24c08"; + reg = <0x50>; + pagesize = <16>; + }; + + eeprom@54 { + compatible = "atmel,24c08"; + reg = <0x54>; + pagesize = <16>; + }; + }; + + i2c@119000 { + status = "disabled"; + }; + + i2c@119100 { + status = "disabled"; + }; + + serial2: serial@11d500 { + status = "disabled"; + }; + + serial3: serial@11d600 { + status = "disabled"; + }; + + usb0: usb@210000 { + status = "disabled"; + }; + usb1: usb@211000 { + status = "disabled"; + }; + + display@180000 { + status = "disabled"; + }; + + sata@220000 { + status = "disabled"; + }; + sata@221000 { + status = "disabled"; + }; + + fman@400000 { + ethernet@e0000 { + fixed-link = <0 1 1000 0 0>; + phy-connection-type = "sgmii"; + }; + + ethernet@e2000 { + fixed-link = <1 1 1000 0 0>; + phy-connection-type = "sgmii"; + }; + + ethernet@e4000 { + status = "disabled"; + }; + + ethernet@e6000 { + status = "disabled"; + }; + + ethernet@e8000 { + phy-handle = <&front_phy>; + phy-connection-type = "rgmii"; + }; + + mdio0: mdio@fc000 { + front_phy: ethernet-phy@11 { + reg = <0x11>; + }; + }; + }; + }; + + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + status = "disabled"; + }; + + pci2: pcie@ffe260000 { + status = "disabled"; + }; + + pci3: pcie@ffe270000 { + status = "disabled"; + }; + + qe: qe@ffe140000 { + ranges = <0x0 0xf 0xfe140000 0x40000>; + reg = <0xf 0xfe140000 0 0x480>; + brg-frequency = <0>; + bus-frequency = <0>; + + si1: si@700 { + compatible = "fsl,t1040-qe-si"; + reg = <0x700 0x80>; + }; + + siram1: siram@1000 { + compatible = "fsl,t1040-qe-siram"; + reg = <0x1000 0x800>; + }; + + ucc_hdlc: ucc@2000 { + device_type = "hdlc"; + compatible = "fsl,ucc-hdlc"; + rx-clock-name = "clk9"; + tx-clock-name = "clk9"; + fsl,tx-timeslot-mask = <0xfffffffe>; + fsl,rx-timeslot-mask = <0xfffffffe>; + fsl,siram-entry-id = <0>; + }; + }; +}; + +#include "t1040si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/kmcoge4.dts b/arch/powerpc/boot/dts/fsl/kmcoge4.dts index ae70a24094b0..e103c0f3f650 100644 --- a/arch/powerpc/boot/dts/fsl/kmcoge4.dts +++ b/arch/powerpc/boot/dts/fsl/kmcoge4.dts @@ -83,6 +83,10 @@ }; }; + sdhc@114000 { + status = "disabled"; + }; + i2c@119000 { status = "disabled"; }; diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts index 29757623e5ba..5ba6fbfca274 100644 --- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts @@ -41,6 +41,27 @@ #size-cells = <2>; interrupt-parent = <&mpic>; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + ifc: localbus@ffe124000 { reg = <0xf 0xfe124000 0 0x2000>; ranges = <0 0 0xf 0xe8000000 0x08000000 @@ -72,6 +93,14 @@ ranges = <0x00000000 0xf 0x00000000 0x01072000>; }; + bportals: bman-portals@ff4000000 { + ranges = <0x0 0xf 0xf4000000 0x2000000>; + }; + + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index 6e0b4892a740..da2894c59479 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -34,6 +34,21 @@ #include <dt-bindings/thermal/thermal.h> +&bman_fbpr { + compatible = "fsl,bman-fbpr"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -180,6 +195,92 @@ }; }; +&bportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + bman-portal@0 { + cell-index = <0x0>; + compatible = "fsl,bman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <105 2 0 0>; + }; + bman-portal@4000 { + cell-index = <0x1>; + compatible = "fsl,bman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <107 2 0 0>; + }; + bman-portal@8000 { + cell-index = <2>; + compatible = "fsl,bman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <109 2 0 0>; + }; + bman-portal@c000 { + cell-index = <0x3>; + compatible = "fsl,bman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <111 2 0 0>; + }; + bman-portal@10000 { + cell-index = <0x4>; + compatible = "fsl,bman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <113 2 0 0>; + }; + bman-portal@14000 { + cell-index = <0x5>; + compatible = "fsl,bman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <115 2 0 0>; + }; +}; + +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -413,6 +514,8 @@ }; /include/ "qoriq-sec5.0-0.dtsi" +/include/ "qoriq-qman3.dtsi" +/include/ "qoriq-bman1.dtsi" /include/ "qoriq-fman3l-0.dtsi" /include/ "qoriq-fman3-0-10g-0-best-effort.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts index 772143da367f..d6858b7cd93f 100644 --- a/arch/powerpc/boot/dts/fsl/t1024qds.dts +++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts @@ -41,6 +41,27 @@ #size-cells = <2>; interrupt-parent = <&mpic>; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + ifc: localbus@ffe124000 { reg = <0xf 0xfe124000 0 0x2000>; ranges = <0 0 0xf 0xe8000000 0x08000000 @@ -80,6 +101,14 @@ ranges = <0x00000000 0xf 0x00000000 0x01072000>; }; + bportals: bman-portals@ff4000000 { + ranges = <0x0 0xf 0xf4000000 0x2000000>; + }; + + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index 302cdd22b4bb..73a645324bc1 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -41,6 +41,31 @@ #size-cells = <2>; interrupt-parent = <&mpic>; + aliases { + sg_2500_aqr105_phy4 = &sg_2500_aqr105_phy4; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + ifc: localbus@ffe124000 { reg = <0xf 0xfe124000 0 0x2000>; ranges = <0 0 0xf 0xe8000000 0x08000000 @@ -82,6 +107,14 @@ ranges = <0x00000000 0xf 0x00000000 0x01072000>; }; + bportals: bman-portals@ff4000000 { + ranges = <0x0 0xf 0xf4000000 0x2000000>; + }; + + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts index 2a5a90dd272e..fcd2aeb5b8ac 100644 --- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts @@ -48,6 +48,58 @@ "fsl,deepsleep-cpld"; }; }; + + soc: soc@ffe000000 { + fman0: fman@400000 { + ethernet@e0000 { + phy-handle = <&phy_sgmii_0>; + phy-connection-type = "sgmii"; + }; + + ethernet@e2000 { + phy-handle = <&phy_sgmii_1>; + phy-connection-type = "sgmii"; + }; + + ethernet@e4000 { + phy-handle = <&phy_sgmii_2>; + phy-connection-type = "sgmii"; + }; + + ethernet@e6000 { + phy-handle = <&phy_rgmii_0>; + phy-connection-type = "rgmii"; + }; + + ethernet@e8000 { + phy-handle = <&phy_rgmii_1>; + phy-connection-type = "rgmii"; + }; + + mdio0: mdio@fc000 { + phy_sgmii_0: ethernet-phy@02 { + reg = <0x02>; + }; + + phy_sgmii_1: ethernet-phy@03 { + reg = <0x03>; + }; + + phy_sgmii_2: ethernet-phy@01 { + reg = <0x01>; + }; + + phy_rgmii_0: ethernet-phy@04 { + reg = <0x04>; + }; + + phy_rgmii_1: ethernet-phy@05 { + reg = <0x05>; + }; + }; + }; + }; + }; #include "t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index c744569a20e1..a97296c64eb2 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -678,5 +678,6 @@ compatible = "fsl,t2080-l2-cache-controller"; reg = <0xc20000 0x40000>; next-level-cache = <&cpc>; + interrupts = <16 2 1 9>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts index cc0a264b8acb..8166c660712a 100644 --- a/arch/powerpc/boot/dts/fsl/t4240rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts @@ -125,6 +125,10 @@ }; i2c@118000 { + hwmon@2f { + compatible = "winbond,w83793"; + reg = <0x2f>; + }; eeprom@52 { compatible = "at24,24c256"; reg = <0x52>; diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S index b6fcbaf5027b..3dc44b05fb97 100644 --- a/arch/powerpc/boot/ps3-head.S +++ b/arch/powerpc/boot/ps3-head.S @@ -57,11 +57,6 @@ __system_reset_overlay: bctr 1: - /* Save the value at addr zero for a null pointer write check later. */ - - li r4, 0 - lwz r3, 0(r4) - /* Primary delays then goes to _zimage_start in wrapper. */ or 31, 31, 31 /* db16cyc */ diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 4ec2d86d3c50..a05558a7e51a 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -119,13 +119,12 @@ void ps3_copy_vectors(void) flush_cache((void *)0x100, 512); } -void platform_init(unsigned long null_check) +void platform_init(void) { const u32 heapsize = 0x1000000 - (u32)_end; /* 16MiB */ void *chosen; unsigned long ft_addr; u64 rm_size; - unsigned long val; console_ops.write = ps3_console_write; platform_ops.exit = ps3_exit; @@ -153,11 +152,6 @@ void platform_init(unsigned long null_check) printf(" flat tree at 0x%lx\n\r", ft_addr); - val = *(unsigned long *)0; - - if (val != null_check) - printf("null check failed: %lx != %lx\n\r", val, null_check); - ((kernel_entry_t)0)(ft_addr, 0, NULL); ps3_exit(); diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 404b3aabdb4d..76fe3ccfd381 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -181,6 +181,28 @@ case "$elfformat" in elf32-powerpc) format=elf32ppc ;; esac +ld_version() +{ + # Poached from scripts/ld-version.sh, but we don't want to call that because + # this script (wrapper) is distributed separately from the kernel source. + # Extract linker version number from stdin and turn into single number. + awk '{ + gsub(".*\\)", ""); + gsub(".*version ", ""); + gsub("-.*", ""); + split($1,a, "."); + print a[1]*100000000 + a[2]*1000000 + a[3]*10000; + exit + }' +} + +# Do not include PT_INTERP segment when linking pie. Non-pie linking +# just ignores this option. +LD_VERSION=$(${CROSS}ld --version | ld_version) +LD_NO_DL_MIN_VERSION=$(echo 2.26 | ld_version) +if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then + nodl="--no-dynamic-linker" +fi platformo=$object/"$platform".o lds=$object/zImage.lds @@ -446,7 +468,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \ $platformo $tmp $object/wrapper.a rm $tmp fi diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig deleted file mode 100644 index aaaaa609cd24..000000000000 --- a/arch/powerpc/configs/85xx/kmp204x_defconfig +++ /dev/null @@ -1,220 +0,0 @@ -CONFIG_PPC_85xx=y -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_AUDIT=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_CGROUPS=y -CONFIG_CGROUP_SCHED=y -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_ALL=y -CONFIG_EMBEDDED=y -CONFIG_PERF_EVENTS=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y -CONFIG_CORENET_GENERIC=y -CONFIG_MPIC_MSGR=y -CONFIG_HIGHMEM=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_BINFMT_MISC=m -CONFIG_KEXEC=y -CONFIG_FORCE_MAX_ZONEORDER=13 -CONFIG_PCI=y -CONFIG_PCIEPORTBUS=y -# CONFIG_PCIEASPM is not set -CONFIG_PCI_MSI=y -CONFIG_ADVANCED_OPTIONS=y -CONFIG_LOWMEM_SIZE_BOOL=y -CONFIG_LOWMEM_SIZE=0x20000000 -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=y -CONFIG_XFRM_SUB_POLICY=y -CONFIG_XFRM_STATISTICS=y -CONFIG_NET_KEY=y -CONFIG_NET_KEY_MIGRATE=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -CONFIG_NET_IPIP=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_INET_AH=y -CONFIG_INET_ESP=y -CONFIG_INET_IPCOMP=y -CONFIG_IPV6=y -CONFIG_IP_SCTP=m -CONFIG_TIPC=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=y -CONFIG_NET_SCH_HTB=y -CONFIG_NET_SCH_HFSC=y -CONFIG_NET_SCH_PRIO=y -CONFIG_NET_SCH_MULTIQ=y -CONFIG_NET_SCH_RED=y -CONFIG_NET_SCH_SFQ=y -CONFIG_NET_SCH_TEQL=y -CONFIG_NET_SCH_TBF=y -CONFIG_NET_SCH_GRED=y -CONFIG_NET_CLS_BASIC=y -CONFIG_NET_CLS_TCINDEX=y -CONFIG_NET_CLS_U32=y -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_FLOW=y -CONFIG_NET_CLS_CGROUP=y -CONFIG_UEVENT_HELPER_PATH="/sbin/mdev" -CONFIG_DEVTMPFS=y -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_PHRAM=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_ECC_BCH=y -CONFIG_MTD_NAND_FSL_ELBC=y -CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_GLUEBI=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=2 -CONFIG_BLK_DEV_RAM_SIZE=2048 -CONFIG_EEPROM_AT24=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y -CONFIG_BLK_DEV_SR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_NETDEVICES=y -# CONFIG_NET_VENDOR_3COM is not set -# CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set -# CONFIG_NET_VENDOR_AMD is not set -# CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_NET_VENDOR_BROADCOM is not set -# CONFIG_NET_VENDOR_BROCADE is not set -# CONFIG_NET_VENDOR_CHELSIO is not set -# CONFIG_NET_VENDOR_CISCO is not set -# CONFIG_NET_VENDOR_DEC is not set -# CONFIG_NET_VENDOR_DLINK is not set -# CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_EXAR is not set -CONFIG_FSL_PQ_MDIO=y -CONFIG_FSL_XGMAC_MDIO=y -# CONFIG_NET_VENDOR_HP is not set -# CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MARVELL is not set -# CONFIG_NET_VENDOR_MELLANOX is not set -# CONFIG_NET_VENDOR_MICREL is not set -# CONFIG_NET_VENDOR_MICROCHIP is not set -# CONFIG_NET_VENDOR_MYRI is not set -# CONFIG_NET_VENDOR_NATSEMI is not set -# CONFIG_NET_VENDOR_NVIDIA is not set -# CONFIG_NET_VENDOR_OKI is not set -# CONFIG_NET_PACKET_ENGINE is not set -# CONFIG_NET_VENDOR_QLOGIC is not set -# CONFIG_NET_VENDOR_REALTEK is not set -# CONFIG_NET_VENDOR_RDC is not set -# CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SILAN is not set -# CONFIG_NET_VENDOR_SIS is not set -# CONFIG_NET_VENDOR_SMSC is not set -# CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SUN is not set -# CONFIG_NET_VENDOR_TEHUTI is not set -# CONFIG_NET_VENDOR_TI is not set -# CONFIG_NET_VENDOR_VIA is not set -# CONFIG_NET_VENDOR_WIZNET is not set -# CONFIG_NET_VENDOR_XILINX is not set -CONFIG_MARVELL_PHY=y -CONFIG_VITESSE_PHY=y -CONFIG_FIXED_PHY=y -# CONFIG_WLAN is not set -# CONFIG_INPUT_MOUSEDEV is not set -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_LEGACY_PTYS is not set -CONFIG_PPC_EPAPR_HV_BYTECHAN=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_MANY_PORTS=y -CONFIG_SERIAL_8250_DETECT_IRQ=y -CONFIG_SERIAL_8250_RSA=y -CONFIG_NVRAM=y -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_MUX=y -CONFIG_I2C_MUX_PCA954x=y -CONFIG_I2C_MPC=y -CONFIG_SPI=y -CONFIG_SPI_FSL_SPI=y -CONFIG_SPI_FSL_ESPI=y -CONFIG_SPI_SPIDEV=m -CONFIG_PTP_1588_CLOCK=y -# CONFIG_HWMON is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y -CONFIG_EDAC_MPC85XX=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_DS3232=y -CONFIG_RTC_DRV_CMOS=y -CONFIG_UIO=y -CONFIG_STAGING=y -CONFIG_CLK_QORIQ=y -CONFIG_EXT2_FS=y -CONFIG_NTFS_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y -CONFIG_UBIFS_FS=y -CONFIG_CRAMFS=y -CONFIG_SQUASHFS=y -CONFIG_SQUASHFS_XZ=y -CONFIG_NFS_FS=y -CONFIG_NFS_V4=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_UTF8=m -CONFIG_CRC_ITU_T=m -CONFIG_DEBUG_INFO=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_SHIRQ=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_SCHEDSTATS=y -CONFIG_RCU_TRACE=y -CONFIG_UPROBE_EVENT=y -CONFIG_CRYPTO_NULL=y -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_SHA256=y -CONFIG_CRYPTO_SHA512=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRYPTO_DEV_FSL_CAAM=y diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 8b83ce8a01e7..8d3e3c41258d 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -45,12 +45,6 @@ CONFIG_PARPORT_PC_FIFO=y CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -# CONFIG_IDEPCI_PCIBUS_ORDER is not set -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_VIA82CXXX=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y @@ -61,6 +55,10 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 # CONFIG_SCSI_SYM53C8XX_MMIO is not set +CONFIG_ATA=y +CONFIG_PATA_SIL680=y +CONFIG_PATA_VIA=y +CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y CONFIG_VORTEX=y CONFIG_8139CP=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 7b6f30dece34..2d7fcbe047ac 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -108,16 +108,15 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_IDE=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_SIIMAGE=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y CONFIG_ATA=y CONFIG_SATA_PROMISE=y +CONFIG_PATA_ARTOP=y CONFIG_PATA_PDC2027X=m +CONFIG_PATA_SIL680=y +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index ac9a50da2dc6..1f6f90cd8aff 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -42,12 +42,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_SL82C105=y -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -56,6 +50,10 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_ATA=y +CONFIG_PATA_VIA=y +CONFIG_PATA_WINBOND=y +CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y CONFIG_PCNET32=y CONFIG_NET_TULIP=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index 1a61e81ab0cd..cc49c95494da 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -44,6 +44,7 @@ CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAME_WARN=1024 CONFIG_FTL=y +CONFIG_GPIO_GENERIC_PLATFORM=y CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m CONFIG_HIGH_RES_TIMERS=y @@ -104,8 +105,13 @@ CONFIG_PACKET=y CONFIG_PARTITION_ADVANCED=y CONFIG_PERF_EVENTS=y CONFIG_POSIX_MQUEUE=y +CONFIG_POWER_SUPPLY=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_GPIO=y +CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_QNX4FS_FS=m CONFIG_RCU_TRACE=y +CONFIG_RESET_CONTROLLER=y CONFIG_ROOT_NFS=y CONFIG_SYSV_FS=m CONFIG_SYSVIPC=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 3b2511c090d8..e18f2e06553f 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -60,10 +60,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_CDROM_PKTCDVD=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_IDE_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -73,6 +69,7 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y CONFIG_ATA=y CONFIG_SATA_SVW=y +CONFIG_PATA_MACIO=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 27abfab31219..c4018179e219 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -39,16 +39,15 @@ CONFIG_IP_PNP_DHCP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_IDE_TASK_IOCTL=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_IPR=y CONFIG_ATA=y +CONFIG_PATA_AMD=y +CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y CONFIG_AMD8111_ETH=y CONFIG_TIGON3=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 76f4edd441d3..5553c5ce4274 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -58,9 +58,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_EEPROM_LEGACY=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_IDE_TASK_IOCTL=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_CHR_DEV_OSST=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index e5a674d4a716..fc1e7a7388b8 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -117,15 +117,6 @@ CONFIG_CONNECTOR=y CONFIG_MAC_FLOPPY=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_BLK_DEV_SL82C105=y -CONFIG_BLK_DEV_IDE_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -140,6 +131,12 @@ CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_MESH=y CONFIG_SCSI_MAC53C94=y +CONFIG_ATA=y +CONFIG_PATA_MACIO=y +CONFIG_PATA_PDC2027X=y +CONFIG_PATA_WINBOND=y +CONFIG_PATA_PCMCIA=m +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index d98b6eb3254f..b793550fac91 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -49,6 +49,7 @@ CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_HOTPLUG_CPU=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y CONFIG_MEMORY_HOTPLUG=y @@ -78,6 +79,7 @@ CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m +CONFIG_BPF_JIT=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -241,10 +243,6 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y @@ -300,7 +298,10 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPT_CRC32C_VPMSUM=m +CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m @@ -308,6 +309,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 58a98d40086f..0396126ba6a8 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -46,6 +46,7 @@ CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y @@ -85,12 +86,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_IDE_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -120,6 +115,9 @@ CONFIG_SATA_AHCI=y CONFIG_SATA_SIL24=y CONFIG_SATA_MV=y CONFIG_SATA_SVW=y +CONFIG_PATA_AMD=y +CONFIG_PATA_MACIO=y +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y @@ -335,7 +333,10 @@ CONFIG_PPC_EARLY_DEBUG=y CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPT_CRC32C_VPMSUM=m +CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m @@ -343,6 +344,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index fd2edd650c20..11a3473f9e2e 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -59,10 +59,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -79,6 +75,8 @@ CONFIG_SCSI_DEBUG=m CONFIG_ATA=y CONFIG_SATA_SIL24=y CONFIG_SATA_SVW=y +CONFIG_PATA_AMD=y +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 8fbf49801233..1d2d69dd6409 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -62,7 +62,6 @@ CONFIG_MPC8610_HPCD=y CONFIG_GEF_SBC610=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_STAT=m -CONFIG_CPU_FREQ_STAT_DETAILS=y CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m @@ -378,13 +377,6 @@ CONFIG_EEPROM_AT24=m CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EEPROM_93CX6=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=m -CONFIG_IDE_TASK_IOCTL=y -# CONFIG_IDEPCI_PCIBUS_ORDER is not set -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_IDE_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_RAID_ATTRS=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m @@ -411,13 +403,14 @@ CONFIG_ATA=y CONFIG_SATA_FSL=m CONFIG_PDC_ADMA=m CONFIG_ATA_PIIX=m +CONFIG_PATA_MACIO=y CONFIG_PATA_MPC52xx=m CONFIG_PATA_OPTIDMA=m CONFIG_PATA_SCH=m CONFIG_PATA_VIA=m CONFIG_PATA_PLATFORM=m CONFIG_PATA_OF_PLATFORM=m -CONFIG_ATA_GENERIC=m +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 8a3bc016b732..d99734f3b868 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -52,6 +52,7 @@ CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -81,6 +82,7 @@ CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m +CONFIG_BPF_JIT=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -92,10 +94,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -122,7 +120,8 @@ CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_ALUA=m CONFIG_ATA=y CONFIG_SATA_AHCI=y -# CONFIG_ATA_SFF is not set +CONFIG_PATA_AMD=y +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y @@ -244,10 +243,6 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y @@ -302,7 +297,10 @@ CONFIG_XMON=y CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPT_CRC32C_VPMSUM=m +CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m @@ -310,6 +308,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index e9122b15e5fd..74bca2eccd0f 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -36,12 +36,11 @@ CONFIG_NFTL_RW=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_IDE=y -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_SCSI_SPI_ATTRS=y +CONFIG_ATA=y +CONFIG_PATA_VIA=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 7998c177f0a2..87f40454bad3 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o -obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o +obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o md5-ppc-y := md5-asm.o md5-glue.o diff --git a/arch/powerpc/include/asm/accounting.h b/arch/powerpc/include/asm/accounting.h index c133246df467..3abcf98ed2e0 100644 --- a/arch/powerpc/include/asm/accounting.h +++ b/arch/powerpc/include/asm/accounting.h @@ -12,9 +12,17 @@ /* Stuff for accurate time accounting */ struct cpu_accounting_data { - unsigned long user_time; /* accumulated usermode TB ticks */ - unsigned long system_time; /* accumulated system TB ticks */ - unsigned long user_time_scaled; /* accumulated usermode SPURR ticks */ + /* Accumulated cputime values to flush on ticks*/ + unsigned long utime; + unsigned long stime; + unsigned long utime_scaled; + unsigned long stime_scaled; + unsigned long gtime; + unsigned long hardirq_time; + unsigned long softirq_time; + unsigned long steal_time; + unsigned long idle_time; + /* Internal counters */ unsigned long starttime; /* TB value snapshot */ unsigned long starttime_user; /* TB value on exit to usermode */ unsigned long startspurr; /* SPURR value snapshot */ diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index e0baba1535e6..ba47c70712f9 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -13,10 +13,9 @@ */ #include <linux/threads.h> -#include <linux/kprobes.h> #include <asm/cacheflush.h> #include <asm/checksum.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/epapr_hcalls.h> #include <uapi/asm/ucontext.h> diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 8e21bb492dca..d310546e5d9d 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -2,14 +2,42 @@ #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H #include <linux/threads.h> +#include <linux/slab.h> -/* For 32-bit, all levels of page tables are just drawn from get_free_page() */ -#define MAX_PGTABLE_INDEX_SIZE 0 +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages (which are linked to a struct + * page for now, and drawn from the main get_free_pages() pool), the + * allocation size will be (2^index_size * sizeof(pointer)) and + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf extern void __bad_pte(pmd_t *pmd); -extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) ({ \ + BUG_ON(!(shift)); \ + pgtable_cache[(shift) - 1]; \ + }) + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} /* * We don't have any real pmd's, and this code never triggers because @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void pgtable_free(void *table, unsigned index_size) { - BUG_ON(index_size); /* 32-bit doesn't use this */ - free_page((unsigned long)table); + if (!index_size) { + free_page((unsigned long)table); + } else { + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(index_size), table); + } } #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 6b8b2d57fdc8..012223638815 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -8,6 +8,23 @@ /* And here we include common definitions */ #include <asm/pte-common.h> +#define PTE_INDEX_SIZE PTE_SHIFT +#define PMD_INDEX_SIZE 0 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) + +#define PMD_CACHE_INDEX PMD_INDEX_SIZE + +#ifndef __ASSEMBLY__ +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE 0 +#define PUD_TABLE_SIZE 0 +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#endif /* __ASSEMBLY__ */ + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + /* * The normal case is that PTEs are 32-bits and we have a 1-page * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus @@ -19,14 +36,10 @@ * -Matt */ /* PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT) +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PTE (1 << PTE_SHIFT) -#define PTRS_PER_PMD 1 -#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) - #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary @@ -82,12 +95,8 @@ extern unsigned long ioremap_bot; -/* - * entries per page directory level: our page-table tree is two-level, so - * we don't really have any PMD directory. - */ -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT)) +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ @@ -224,7 +233,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); @@ -283,15 +293,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -#ifndef CONFIG_PPC_4K_PAGES -void pgtable_cache_init(void); -#else -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) -#endif - extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 1af837c561ba..0c4e470571ca 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -16,9 +16,6 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PMD_SHIFT - /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ H_PAGE_F_SECOND | H_PAGE_F_GIX) @@ -39,12 +36,13 @@ #ifdef CONFIG_HUGETLB_PAGE static inline int hash__hugepd_ok(hugepd_t hpd) { + unsigned long hpdval = hpd_val(hpd); /* * if it is not a pte and have hugepd shift mask * set, then it is a hugepd directory pointer */ - if (!(hpd.pd & _PAGE_PTE) && - ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)) + if (!(hpdval & _PAGE_PTE) && + ((hpdval & HUGEPD_SHIFT_MASK) != 0)) return true; return false; } diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 5aae4f530c21..f3dd21efa2ea 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -6,9 +6,6 @@ #define H_PUD_INDEX_SIZE 5 #define H_PGD_INDEX_SIZE 12 -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PAGE_SHIFT - #define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ #define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ /* diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index f61cad3de4e6..f7b721bbf918 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -33,9 +33,9 @@ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES) /* - * only with hash we need to use the second half of pmd page table + * only with hash 64k we need to use the second half of pmd page table * to store pointer to deposited pgtable_t */ #define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1) @@ -201,6 +201,10 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, unsigned long phys); extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); + +int hash__create_section_mapping(unsigned long start, unsigned long end); +int hash__remove_section_mapping(unsigned long start, unsigned long end); + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index c45189aa7476..c62f14d0bec1 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H -#define _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H +#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H +#define _ASM_POWERPC_BOOK3S_64_HUGETLB_H /* * For radix we want generic code to handle hugetlb. But then if we want * both hash and radix to be enabled together we need to workaround the @@ -21,9 +21,33 @@ static inline int hstate_get_psize(struct hstate *hstate) return MMU_PAGE_2M; else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) return MMU_PAGE_1G; + else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift) + return MMU_PAGE_16M; + else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift) + return MMU_PAGE_16G; else { WARN(1, "Wrong huge page shift\n"); return mmu_virtual_psize; } } + +#define arch_make_huge_pte arch_make_huge_pte +static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + unsigned long page_shift; + + if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) + return entry; + + page_shift = huge_page_shift(hstate_vma(vma)); + /* + * We don't support 1G hugetlb pages yet. + */ + VM_WARN_ON(page_shift == mmu_psize_defs[MMU_PAGE_1G].shift); + if (page_shift == mmu_psize_defs[MMU_PAGE_2M].shift) + return __pte(pte_val(entry) | _PAGE_LARGE); + else + return entry; +} #endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 2e6a823fa502..823015cff149 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -525,6 +525,9 @@ extern void slb_set_size(u16 size); #define ESID_BITS 18 #define ESID_BITS_1T 6 +#define ESID_BITS_MASK ((1 << ESID_BITS) - 1) +#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1) + /* * 256MB segment * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments @@ -660,9 +663,9 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, if (ssize == MMU_SEGSIZE_256M) return vsid_scramble((context << ESID_BITS) - | (ea >> SID_SHIFT), 256M); + | ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M); return vsid_scramble((context << ESID_BITS_1T) - | (ea >> SID_SHIFT_1T), 1T); + | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T); } /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 8afb0e00f7d9..d73e9dfa5237 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -44,10 +44,20 @@ struct patb_entry { }; extern struct patb_entry *partition_tb; +/* Bits in patb0 field */ #define PATB_HR (1UL << 63) -#define PATB_GR (1UL << 63) #define RPDB_MASK 0x0ffffffffffff00fUL #define RPDB_SHIFT (1UL << 8) +#define RTS1_SHIFT 61 /* top 2 bits of radix tree size */ +#define RTS1_MASK (3UL << RTS1_SHIFT) +#define RTS2_SHIFT 5 /* bottom 3 bits of radix tree size */ +#define RTS2_MASK (7UL << RTS2_SHIFT) +#define RPDS_MASK 0x1f /* root page dir. size field */ + +/* Bits in patb1 field */ +#define PATB_GR (1UL << 63) /* guest uses radix; must match HR */ +#define PRTS_MASK 0x1f /* process table size field */ + /* * Limit process table to PAGE_SIZE table. This * also limit the max pid we can support. @@ -138,5 +148,11 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, extern int (*register_process_table)(unsigned long base, unsigned long page_size, unsigned long tbl_size); +#ifdef CONFIG_PPC_PSERIES +extern void radix_init_pseries(void); +#else +static inline void radix_init_pseries(void) { }; +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h index 0d2845b44763..198aff33c380 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -35,10 +35,6 @@ static inline int pgd_huge(pgd_t pgd) } #define pgd_huge pgd_huge -#ifdef CONFIG_DEBUG_VM -extern int hugepd_ok(hugepd_t hpd); -#define is_hugepd(hpd) (hugepd_ok(hpd)) -#else /* * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't * need to setup hugepage directory for them. Our pte and page directory format @@ -49,7 +45,6 @@ static inline int hugepd_ok(hugepd_t hpd) return 0; } #define is_hugepd(pdep) 0 -#endif /* CONFIG_DEBUG_VM */ #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 0ebfbc8f0449..5905f0ff57d1 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -26,6 +26,11 @@ #define _RPAGE_SW1 0x00800 #define _RPAGE_SW2 0x00400 #define _RPAGE_SW3 0x00200 +#define _RPAGE_RSV1 0x1000000000000000UL +#define _RPAGE_RSV2 0x0800000000000000UL +#define _RPAGE_RSV3 0x0400000000000000UL +#define _RPAGE_RSV4 0x0200000000000000UL + #ifdef CONFIG_MEM_SOFT_DIRTY #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */ #else @@ -33,6 +38,11 @@ #endif #define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */ +/* + * For P9 DD1 only, we need to track whether the pte's huge. + */ +#define _PAGE_LARGE _RPAGE_RSV1 + #define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ @@ -568,10 +578,11 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev) */ static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { if (radix_enabled()) - return radix__ptep_set_access_flags(mm, ptep, entry); + return radix__ptep_set_access_flags(mm, ptep, entry, address); return hash__ptep_set_access_flags(ptep, entry); } @@ -789,9 +800,6 @@ extern struct page *pgd_page(pgd_t pgd); #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); -void pgtable_cache_init(void); - static inline int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) { diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 2a46dea8e1b1..0032b662284c 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -140,19 +140,20 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, unsigned long new_pte; old_pte = __radix_pte_update(ptep, ~0, 0); - asm volatile("ptesync" : : : "memory"); /* * new value of pte */ new_pte = (old_pte | set) & ~clr; - /* - * For now let's do heavy pid flush - * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize); + * If we are trying to clear the pte, we can skip + * the below sequence and batch the tlb flush. The + * tlb flush batching is done by mmu gather code */ - radix__flush_tlb_mm(mm); - - __radix_pte_update(ptep, 0, new_pte); + if (new_pte) { + asm volatile("ptesync" : : : "memory"); + radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr); + __radix_pte_update(ptep, 0, new_pte); + } } else old_pte = __radix_pte_update(ptep, clr, set); asm volatile("ptesync" : : : "memory"); @@ -167,7 +168,8 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, * function doesn't need to invalidate tlb. */ static inline void radix__ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | @@ -183,13 +185,7 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm, * new value of pte */ new_pte = old_pte | set; - - /* - * For now let's do heavy pid flush - * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize); - */ - radix__flush_tlb_mm(mm); - + radix__flush_tlb_pte_p9_dd1(old_pte, mm, address); __radix_pte_update(ptep, 0, new_pte); } else __radix_pte_update(ptep, 0, set); @@ -243,6 +239,8 @@ static inline int radix__pmd_trans_huge(pmd_t pmd) static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) { + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + return __pmd(pmd_val(pmd) | _PAGE_PTE | _PAGE_LARGE); return __pmd(pmd_val(pmd) | _PAGE_PTE); } static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma, @@ -293,5 +291,10 @@ static inline unsigned long radix__get_tree_size(void) } return rts_field; } + +#ifdef CONFIG_MEMORY_HOTPLUG +int radix__create_section_mapping(unsigned long start, unsigned long end); +int radix__remove_section_mapping(unsigned long start, unsigned long end); +#endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index a9e19cb2f7c5..cc7fbde4f53c 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -42,4 +42,6 @@ extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); +extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, + unsigned long address); #endif diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ffbafbf76b19..5a90292afbad 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -20,22 +20,32 @@ #endif #else /* CONFIG_PPC64 */ #define L1_CACHE_SHIFT 7 +#define IFETCH_ALIGN_SHIFT 4 /* POWER8,9 */ #endif #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #define SMP_CACHE_BYTES L1_CACHE_BYTES +#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) + #if defined(__powerpc64__) && !defined(__ASSEMBLY__) + +struct ppc_cache_info { + u32 size; + u32 line_size; + u32 block_size; /* L1 only */ + u32 log_block_size; + u32 blocks_per_page; + u32 sets; + u32 assoc; +}; + struct ppc64_caches { - u32 dsize; /* L1 d-cache size */ - u32 dline_size; /* L1 d-cache line size */ - u32 log_dline_size; - u32 dlines_per_page; - u32 isize; /* L1 i-cache size */ - u32 iline_size; /* L1 i-cache line size */ - u32 log_iline_size; - u32 ilines_per_page; + struct ppc_cache_info l1d; + struct ppc_cache_info l1i; + struct ppc_cache_info l2; + struct ppc_cache_info l3; }; extern struct ppc64_caches ppc64_caches; diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 1e8fceb308a5..4e63787dc3be 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,17 +53,29 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { #ifdef __powerpc64__ - unsigned long s = (__force u32)sum; + u64 s = (__force u32)sum; s += (__force u32)saddr; s += (__force u32)daddr; +#ifdef __BIG_ENDIAN__ s += proto + len; - s += (s >> 32); - return (__force __wsum) s; +#else + s += (proto + len) << 8; +#endif + return (__force __wsum) from64to32(s); #else __asm__("\n\ addc %0,%0,%1 \n\ @@ -123,8 +135,7 @@ static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl) for (i = 0; i < ihl - 1; i++, ptr++) s += *ptr; - s += (s >> 32); - return (__force __wsum)s; + return (__force __wsum)from64to32(s); #else __wsum sum, tmp; diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 44efe739b6b9..fc46b664c49e 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -7,6 +7,71 @@ #include <asm/asm-compat.h> #include <linux/bug.h> +#ifdef __BIG_ENDIAN +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) +#else +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) +#endif + +#define XCHG_GEN(type, sfx, cl) \ +static inline u32 __xchg_##type##sfx(volatile void *p, u32 val) \ +{ \ + unsigned int prev, prev_mask, tmp, bitoff, off; \ + \ + off = (unsigned long)p % sizeof(u32); \ + bitoff = BITOFF_CAL(sizeof(type), off); \ + p -= off; \ + val <<= bitoff; \ + prev_mask = (u32)(type)-1 << bitoff; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%3\n" \ +" andc %1,%0,%5\n" \ +" or %1,%1,%4\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %1,0,%3\n" \ +" bne- 1b\n" \ + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ + : "r" (p), "r" (val), "r" (prev_mask) \ + : "cc", cl); \ + \ + return prev >> bitoff; \ +} + +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \ +static inline \ +u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ +{ \ + unsigned int prev, prev_mask, tmp, bitoff, off; \ + \ + off = (unsigned long)p % sizeof(u32); \ + bitoff = BITOFF_CAL(sizeof(type), off); \ + p -= off; \ + old <<= bitoff; \ + new <<= bitoff; \ + prev_mask = (u32)(type)-1 << bitoff; \ + \ + __asm__ __volatile__( \ + br \ +"1: lwarx %0,0,%3\n" \ +" and %1,%0,%6\n" \ +" cmpw 0,%1,%4\n" \ +" bne- 2f\n" \ +" andc %1,%0,%6\n" \ +" or %1,%1,%5\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %1,0,%3\n" \ +" bne- 1b\n" \ + br2 \ + "\n" \ +"2:" \ + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ + : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \ + : "cc", cl); \ + \ + return prev >> bitoff; \ +} + /* * Atomic exchange * @@ -14,6 +79,11 @@ * the previous value stored there. */ +XCHG_GEN(u8, _local, "memory"); +XCHG_GEN(u8, _relaxed, "cc"); +XCHG_GEN(u16, _local, "memory"); +XCHG_GEN(u16, _relaxed, "cc"); + static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) { @@ -85,9 +155,13 @@ __xchg_u64_relaxed(u64 *p, unsigned long val) #endif static __always_inline unsigned long -__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_local(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_local(ptr, x); + case 2: + return __xchg_u16_local(ptr, x); case 4: return __xchg_u32_local(ptr, x); #ifdef CONFIG_PPC64 @@ -103,6 +177,10 @@ static __always_inline unsigned long __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_relaxed(ptr, x); + case 2: + return __xchg_u16_relaxed(ptr, x); case 4: return __xchg_u32_relaxed(ptr, x); #ifdef CONFIG_PPC64 @@ -131,6 +209,15 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) * and return the old value of *p. */ +CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); +CMPXCHG_GEN(u8, _local, , , "memory"); +CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); +CMPXCHG_GEN(u8, _relaxed, , , "cc"); +CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); +CMPXCHG_GEN(u16, _local, , , "memory"); +CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); +CMPXCHG_GEN(u16, _relaxed, , , "cc"); + static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) { @@ -316,6 +403,10 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8(ptr, old, new); + case 2: + return __cmpxchg_u16(ptr, old, new); case 4: return __cmpxchg_u32(ptr, old, new); #ifdef CONFIG_PPC64 @@ -328,10 +419,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, } static __always_inline unsigned long -__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, +__cmpxchg_local(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_local(ptr, old, new); + case 2: + return __cmpxchg_u16_local(ptr, old, new); case 4: return __cmpxchg_u32_local(ptr, old, new); #ifdef CONFIG_PPC64 @@ -348,6 +443,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_relaxed(ptr, old, new); + case 2: + return __cmpxchg_u16_relaxed(ptr, old, new); case 4: return __cmpxchg_u32_relaxed(ptr, old, new); #ifdef CONFIG_PPC64 @@ -364,6 +463,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_acquire(ptr, old, new); + case 2: + return __cmpxchg_u16_acquire(ptr, old, new); case 4: return __cmpxchg_u32_acquire(ptr, old, new); #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index b312b152461b..6e834caa3720 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -23,7 +23,9 @@ static __always_inline bool cpu_has_feature(unsigned long feature) { int i; +#ifndef __clang__ /* clang can't cope with this */ BUILD_BUG_ON(!__builtin_constant_p(feature)); +#endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG if (!static_key_initialized) { diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 3919332965af..fd321eb423cb 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -10,18 +10,62 @@ #define PNV_CORE_IDLE_LOCK_BIT 0x100 #define PNV_CORE_IDLE_THREAD_BITS 0x0FF +/* + * ============================ NOTE ================================= + * The older firmware populates only the RL field in the psscr_val and + * sets the psscr_mask to 0xf. On such a firmware, the kernel sets the + * remaining PSSCR fields to default values as follows: + * + * - ESL and EC bits are to 1. So wakeup from any stop state will be + * at vector 0x100. + * + * - MTL and PSLL are set to the maximum allowed value as per the ISA, + * i.e. 15. + * + * - The Transition Rate, TR is set to the Maximum value 3. + */ +#define PSSCR_HV_DEFAULT_VAL (PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK) + +#define PSSCR_HV_DEFAULT_MASK (PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK | PSSCR_RL_MASK) +#define PSSCR_EC_SHIFT 20 +#define PSSCR_ESL_SHIFT 21 +#define GET_PSSCR_EC(x) (((x) & PSSCR_EC) >> PSSCR_EC_SHIFT) +#define GET_PSSCR_ESL(x) (((x) & PSSCR_ESL) >> PSSCR_ESL_SHIFT) +#define GET_PSSCR_RL(x) ((x) & PSSCR_RL_MASK) + +#define ERR_EC_ESL_MISMATCH -1 +#define ERR_DEEP_STATE_ESL_MISMATCH -2 + #ifndef __ASSEMBLY__ extern u32 pnv_fastsleep_workaround_at_entry[]; extern u32 pnv_fastsleep_workaround_at_exit[]; extern u64 pnv_first_deep_stop_state; + +int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); +static inline void report_invalid_psscr_val(u64 psscr_val, int err) +{ + switch (err) { + case ERR_EC_ESL_MISMATCH: + pr_warn("Invalid psscr 0x%016llx : ESL,EC bits unequal", + psscr_val); + break; + case ERR_DEEP_STATE_ESL_MISMATCH: + pr_warn("Invalid psscr 0x%016llx : ESL cleared for deep stop-state", + psscr_val); + } +} #endif #endif /* Idle state entry routines */ #ifdef CONFIG_PPC_P7_NAP -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ std r0,0(r1); \ ptesync; \ @@ -29,6 +73,9 @@ extern u64 pnv_first_deep_stop_state; 1: cmpd cr0,r0,r0; \ bne 1b; \ IDLE_INST; \ + +#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ + IDLE_STATE_ENTER_SEQ(IDLE_INST) \ b . #endif /* CONFIG_PPC_P7_NAP */ diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index a954e4975049..86308f177f2d 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -10,7 +10,7 @@ struct pt_regs; extern struct dentry *powerpc_debugfs_root; -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) extern int (*__debugger)(struct pt_regs *regs); extern int (*__debugger_ipi)(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index ee46ffef608e..93b9b84568e8 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -136,4 +136,46 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #endif /* CONFIG_SPU_BASE */ +#ifdef CONFIG_PPC64 + +#define get_cache_geometry(level) \ + (ppc64_caches.level.assoc << 16 | ppc64_caches.level.line_size) + +#define ARCH_DLINFO_CACHE_GEOMETRY \ + NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ + NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ + NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ + NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ + NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, get_cache_geometry(l3)) + +#else +#define ARCH_DLINFO_CACHE_GEOMETRY +#endif + +/* + * The requirements here are: + * - keep the final alignment of sp (sp & 0xf) + * - make sure the 32-bit value at the first 16 byte aligned position of + * AUXV is greater than 16 for glibc compatibility. + * AT_IGNOREPPC is used for that. + * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC, + * even if DLINFO_ARCH_ITEMS goes to zero or is undefined. + * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes + */ +#define ARCH_DLINFO \ +do { \ + /* Handle glibc compatibility. */ \ + NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ + NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ + /* Cache size items */ \ + NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ + NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ + NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ + VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \ + ARCH_DLINFO_CACHE_GEOMETRY; \ +} while (0) + #endif /* _ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 9a3eee661297..14752eee3d0c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -97,6 +97,15 @@ ld reg,PACAKBASE(r13); \ ori reg,reg,(ABS_ADDR(label))@l; +/* + * Branches from unrelocated code (e.g., interrupts) to labels outside + * head-y require >64K offsets. + */ +#define __LOAD_FAR_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l; \ + addis reg,reg,(ABS_ADDR(label))@h; + /* Exception register prefixes */ #define EXC_HV H #define EXC_STD @@ -227,13 +236,49 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtctr reg; \ bctr +#define BRANCH_LINK_TO_FAR(reg, label) \ + __LOAD_FAR_HANDLER(reg, label); \ + mtctr reg; \ + bctrl + +/* + * KVM requires __LOAD_FAR_HANDLER. + * + * __BRANCH_TO_KVM_EXIT branches are also a special case because they + * explicitly use r9 then reload it from PACA before branching. Hence + * the double-underscore. + */ +#define __BRANCH_TO_KVM_EXIT(area, label) \ + mfctr r9; \ + std r9,HSTATE_SCRATCH1(r13); \ + __LOAD_FAR_HANDLER(r9, label); \ + mtctr r9; \ + ld r9,area+EX_R9(r13); \ + bctr + +#define BRANCH_TO_KVM(reg, label) \ + __LOAD_FAR_HANDLER(reg, label); \ + mtctr reg; \ + bctr + #else #define BRANCH_TO_COMMON(reg, label) \ b label +#define BRANCH_LINK_TO_FAR(reg, label) \ + bl label + +#define BRANCH_TO_KVM(reg, label) \ + b label + +#define __BRANCH_TO_KVM_EXIT(area, label) \ + ld r9,area+EX_R9(r13); \ + b label + #endif -#define __KVM_HANDLER_PROLOG(area, n) \ + +#define __KVM_HANDLER(area, h, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ std r10,HSTATE_CFAR(r13); \ @@ -243,30 +288,28 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,HSTATE_PPR(r13); \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ ld r10,area+EX_R10(r13); \ - stw r9,HSTATE_SCRATCH1(r13); \ - ld r9,area+EX_R9(r13); \ std r12,HSTATE_SCRATCH0(r13); \ - -#define __KVM_HANDLER(area, h, n) \ - __KVM_HANDLER_PROLOG(area, n) \ - li r12,n; \ - b kvmppc_interrupt + sldi r12,r9,32; \ + ori r12,r12,(n); \ + /* This reloads r9 before branching to kvmppc_interrupt */ \ + __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt) #define __KVM_HANDLER_SKIP(area, h, n) \ cmpwi r10,KVM_GUEST_MODE_SKIP; \ - ld r10,area+EX_R10(r13); \ beq 89f; \ - stw r9,HSTATE_SCRATCH1(r13); \ BEGIN_FTR_SECTION_NESTED(948) \ - ld r9,area+EX_PPR(r13); \ - std r9,HSTATE_PPR(r13); \ + ld r10,area+EX_PPR(r13); \ + std r10,HSTATE_PPR(r13); \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ - ld r9,area+EX_R9(r13); \ + ld r10,area+EX_R10(r13); \ std r12,HSTATE_SCRATCH0(r13); \ - li r12,n; \ - b kvmppc_interrupt; \ + sldi r12,r9,32; \ + ori r12,r12,(n); \ + /* This reloads r9 before branching to kvmppc_interrupt */ \ + __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \ 89: mtocrf 0x80,r9; \ ld r9,area+EX_R9(r13); \ + ld r10,area+EX_R10(r13); \ b kvmppc_skip_##h##interrupt #ifdef CONFIG_KVM_BOOK3S_64_HANDLER @@ -393,12 +436,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ - /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec); + EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \ + EXC_HV, KVMTEST_HV, vec); #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* This associate vector numbers with bits in paca->irq_happened */ @@ -475,10 +518,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_NOTEST_HV) + EXC_HV, SOFTEN_TEST_HV) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) /* diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 2a9cf845473b..eaada6c92344 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -23,10 +23,8 @@ "4: li %1,%3\n" \ "b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - ".align 3\n" \ - PPC_LONG "1b,4b,2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=&r" (oldval), "=&r" (ret) \ : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ : "cr0", "memory") @@ -104,11 +102,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "3: .section .fixup,\"ax\"\n\ 4: li %0,%6\n\ b 3b\n\ - .previous\n\ - .section __ex_table,\"a\"\n\ - .align 3\n\ - " PPC_LONG "1b,4b,2b,4b\n\ - .previous" \ + .previous\n" + EX_TABLE(1b, 4b) + EX_TABLE(2b, 4b) : "+r" (ret), "=&r" (prev), "+m" (*uaddr) : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index ab90c2fa1ea6..9bd81619d090 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -95,12 +95,12 @@ end_##sname: #define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align) \ USE_FIXED_SECTION(sname); \ - .align __align; \ + .balign __align; \ .global name; \ name: #define FIXED_SECTION_ENTRY_BEGIN(sname, name) \ - __FIXED_SECTION_ENTRY_BEGIN(sname, name, 0) + __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES) #define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \ USE_FIXED_SECTION(sname); \ @@ -203,9 +203,9 @@ name: #define EXC_VIRT_END(name, start, end) \ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, end) -#define EXC_COMMON_BEGIN(name) \ +#define EXC_COMMON_BEGIN(name) \ USE_TEXT_SECTION(); \ - .align 7; \ + .balign IFETCH_ALIGN_BYTES; \ .global name; \ DEFINE_FIXED_SYMBOL(name); \ name: @@ -218,7 +218,7 @@ name: #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #define TRAMP_KVM_BEGIN(name) \ - TRAMP_REAL_BEGIN(name) + TRAMP_VIRT_BEGIN(name) #else #define TRAMP_KVM_BEGIN(name) #endif diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index c5517f463ec7..7f4025a6c69e 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -9,7 +9,7 @@ extern struct kmem_cache *hugepte_cache; #ifdef CONFIG_PPC_BOOK3S_64 -#include <asm/book3s/64/hugetlb-radix.h> +#include <asm/book3s/64/hugetlb.h> /* * This should work for other subarchs too. But right now we use the * new format only for 64bit book3s @@ -21,12 +21,12 @@ static inline pte_t *hugepd_page(hugepd_t hpd) * We have only four bits to encode, MMU page size */ BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); - return __va(hpd.pd & HUGEPD_ADDR_MASK); + return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK); } static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) { - return (hpd.pd & HUGEPD_SHIFT_MASK) >> 2; + return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2; } static inline unsigned int hugepd_shift(hugepd_t hpd) @@ -51,12 +51,22 @@ static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma, static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!hugepd_ok(hpd)); - return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE); +#ifdef CONFIG_PPC_8xx + return (pte_t *)__va(hpd_val(hpd) & + ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK)); +#else + return (pte_t *)((hpd_val(hpd) & + ~HUGEPD_SHIFT_MASK) | PD_HUGE); +#endif } static inline unsigned int hugepd_shift(hugepd_t hpd) { - return hpd.pd & HUGEPD_SHIFT_MASK; +#ifdef CONFIG_PPC_8xx + return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; +#else + return hpd_val(hpd) & HUGEPD_SHIFT_MASK; +#endif } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -99,7 +109,15 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte); +#ifdef CONFIG_PPC_8xx +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + flush_tlb_page(vma, vmaddr); +} +#else void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +#endif void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, @@ -205,7 +223,8 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, * are reserved early in the boot process by memblock instead of via * the .dts as on IBM platforms. */ -#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \ + defined(CONFIG_PPC_8xx)) extern void __init reserve_hugetlb_gpages(void); #else static inline void reserve_hugetlb_gpages(void) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 708edebcf147..54d11b3a6bf7 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -275,7 +275,10 @@ #define H_COP 0x304 #define H_GET_MPP_X 0x314 #define H_SET_MODE 0x31C -#define MAX_HCALL_OPCODE H_SET_MODE +#define H_CLEAR_HPT 0x358 +#define H_REGISTER_PROC_TBL 0x37C +#define H_SIGNAL_SYS_RESET 0x380 +#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 @@ -306,6 +309,21 @@ #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +/* Values for argument to H_SIGNAL_SYS_RESET */ +#define H_SIGNAL_SYS_RESET_ALL -1 +#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 +/* >= 0 values are CPU number */ + +/* Flag values used in H_REGISTER_PROC_TBL hcall */ +#define PROC_TABLE_OP_MASK 0x18 +#define PROC_TABLE_DEREG 0x10 +#define PROC_TABLE_NEW 0x18 +#define PROC_TABLE_TYPE_MASK 0x06 +#define PROC_TABLE_HPT_SLB 0x00 +#define PROC_TABLE_HPT_PT 0x02 +#define PROC_TABLE_RADIX 0x04 +#define PROC_TABLE_GTSE 0x01 + #ifndef __ASSEMBLY__ /** @@ -412,27 +430,6 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } -#ifdef CONFIG_PPC_PSERIES -extern int CMO_PrPSP; -extern int CMO_SecPSP; -extern unsigned long CMO_PageSize; - -static inline int cmo_get_primary_psp(void) -{ - return CMO_PrPSP; -} - -static inline int cmo_get_secondary_psp(void) -{ - return CMO_SecPSP; -} - -static inline unsigned long cmo_get_page_size(void) -{ - return CMO_PageSize; -} -#endif /* CONFIG_PPC_PSERIES */ - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/ima.h b/arch/powerpc/include/asm/ima.h new file mode 100644 index 000000000000..2313bdface34 --- /dev/null +++ b/arch/powerpc/include/asm/ima.h @@ -0,0 +1,29 @@ +#ifndef _ASM_POWERPC_IMA_H +#define _ASM_POWERPC_IMA_H + +struct kimage; + +int ima_get_kexec_buffer(void **addr, size_t *size); +int ima_free_kexec_buffer(void); + +#ifdef CONFIG_IMA +void remove_ima_buffer(void *fdt, int chosen_node); +#else +static inline void remove_ima_buffer(void *fdt, int chosen_node) {} +#endif + +#ifdef CONFIG_IMA_KEXEC +int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, + size_t size); + +int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node); +#else +static inline int setup_ima_buffer(const struct kimage *image, void *fdt, + int chosen_node) +{ + remove_ima_buffer(fdt, chosen_node); + return 0; +} +#endif /* CONFIG_IMA_KEXEC */ + +#endif /* _ASM_POWERPC_IMA_H */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index f6fda8482f60..5ed292431b5b 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -33,6 +33,7 @@ extern struct pci_dev *isa_bridge_pcidev; #include <asm/synch.h> #include <asm/delay.h> #include <asm/mmu.h> +#include <asm/ppc_asm.h> #include <asm-generic/iomap.h> @@ -458,13 +459,10 @@ static inline unsigned int name(unsigned int port) \ "5: li %0,-1\n" \ " b 4b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 0b,5b\n" \ - " .long 1b,5b\n" \ - " .long 2b,5b\n" \ - " .long 3b,5b\n" \ - ".previous" \ + EX_TABLE(0b, 5b) \ + EX_TABLE(1b, 5b) \ + EX_TABLE(2b, 5b) \ + EX_TABLE(3b, 5b) \ : "=&r" (x) \ : "r" (port + _IO_BASE) \ : "memory"); \ @@ -479,11 +477,8 @@ static inline void name(unsigned int val, unsigned int port) \ "0:" op " %0,0,%1\n" \ "1: sync\n" \ "2:\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 0b,2b\n" \ - " .long 1b,2b\n" \ - ".previous" \ + EX_TABLE(0b, 2b) \ + EX_TABLE(1b, 2b) \ : : "r" (val), "r" (port + _IO_BASE) \ : "memory"); \ } diff --git a/arch/powerpc/include/asm/isa-bridge.h b/arch/powerpc/include/asm/isa-bridge.h new file mode 100644 index 000000000000..a3a7c1d63a7c --- /dev/null +++ b/arch/powerpc/include/asm/isa-bridge.h @@ -0,0 +1,29 @@ +#ifndef __ISA_BRIDGE_H +#define __ISA_BRIDGE_H + +#ifdef CONFIG_PPC64 + +extern void isa_bridge_find_early(struct pci_controller *hose); +extern void isa_bridge_init_non_pci(struct device_node *np); + +static inline int isa_vaddr_is_ioport(void __iomem *address) +{ + /* Check if address hits the reserved legacy IO range */ + unsigned long ea = (unsigned long)address; + return ea >= ISA_IO_BASE && ea < ISA_IO_END; +} + +#else + +static inline int isa_vaddr_is_ioport(void __iomem *address) +{ + /* No specific ISA handling on ppc32 at this stage, it + * all goes through PCI + */ + return 0; +} + +#endif + +#endif /* __ISA_BRIDGE_H */ + diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index a46f5f45570c..25668bc8cb2a 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -53,7 +53,7 @@ typedef void (*crash_shutdown_t)(void); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* * This function is responsible for capturing register states if coming @@ -91,7 +91,28 @@ static inline bool kdump_in_progress(void) return crashing_cpu >= 0; } -#else /* !CONFIG_KEXEC */ +#ifdef CONFIG_KEXEC_FILE +extern struct kexec_file_ops kexec_elf64_ops; + +#ifdef CONFIG_IMA_KEXEC +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + phys_addr_t ima_buffer_addr; + size_t ima_buffer_size; +}; +#endif + +int setup_purgatory(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr); +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline); +int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size); +#endif /* CONFIG_KEXEC_FILE */ + +#else /* !CONFIG_KEXEC_CORE */ static inline void crash_kexec_secondary(struct pt_regs *regs) { } static inline int overlaps_crashkernel(unsigned long start, unsigned long size) @@ -116,7 +137,7 @@ static inline bool kdump_in_progress(void) return false; } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KEXEC_H */ diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 2c9759bdb63b..97b8c1f83453 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -32,6 +32,7 @@ #include <asm/probes.h> #include <asm/code-patching.h> +#ifdef CONFIG_KPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT struct pt_regs; @@ -127,5 +128,11 @@ struct kprobe_ctlblk { extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_handler(struct pt_regs *regs); +extern int kprobe_post_handler(struct pt_regs *regs); +#else +static inline int kprobe_handler(struct pt_regs *regs) { return 0; } +static inline int kprobe_post_handler(struct pt_regs *regs) { return 0; } +#endif /* CONFIG_KPROBES */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KPROBES_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5cf306ae0ac3..2bf35017ffc0 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -170,6 +170,8 @@ extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, unsigned long status); extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long valid); +extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned long gpa, gva_t ea, int is_store); extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); @@ -182,6 +184,25 @@ extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); +extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, + struct kvm_vcpu *vcpu, + unsigned long ea, unsigned long dsisr); +extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *gpte, bool data, bool iswrite); +extern int kvmppc_init_vm_radix(struct kvm *kvm); +extern void kvmppc_free_radix(struct kvm *kvm); +extern int kvmppc_radix_init(void); +extern void kvmppc_radix_exit(void); +extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn); +extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn); +extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn); +extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, + struct kvm_memory_slot *memslot, unsigned long *map); +extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); + /* XXX remove this export when load_last_inst() is generic */ extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); @@ -211,8 +232,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, unsigned long pte_index, unsigned long avpn, unsigned long *hpret); -extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, +extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); +extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, + struct kvm_memory_slot *memslot, + unsigned long *map); extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 848292176908..d9b48f5bb606 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -22,6 +22,10 @@ #include <asm/book3s/64/mmu-hash.h> +/* Power architecture requires HPT is at least 256kiB, at most 64TiB */ +#define PPC_MIN_HPT_ORDER 18 +#define PPC_MAX_HPT_ORDER 46 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) { @@ -36,6 +40,12 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #endif #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + +static inline bool kvm_is_radix(struct kvm *kvm) +{ + return kvm->arch.radix; +} + #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #endif @@ -350,6 +360,18 @@ extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); extern void kvmhv_rm_send_ipi(int cpu); +static inline unsigned long kvmppc_hpt_npte(struct kvm_hpt_info *hpt) +{ + /* HPTEs are 2**4 bytes long */ + return 1UL << (hpt->order - 4); +} + +static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt) +{ + /* 128 (2**7) bytes in each HPTEG */ + return (1UL << (hpt->order - 7)) - 1; +} + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e59b172666cd..7bba8f415627 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -241,12 +241,24 @@ struct kvm_arch_memory_slot { #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ }; +struct kvm_hpt_info { + /* Host virtual (linear mapping) address of guest HPT */ + unsigned long virt; + /* Array of reverse mapping entries for each guest HPTE */ + struct revmap_entry *rev; + /* Guest HPT size is 2**(order) bytes */ + u32 order; + /* 1 if HPT allocated with CMA, 0 otherwise */ + int cma; +}; + +struct kvm_resize_hpt; + struct kvm_arch { unsigned int lpid; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE unsigned int tlb_sets; - unsigned long hpt_virt; - struct revmap_entry *revmap; + struct kvm_hpt_info hpt; atomic64_t mmio_update; unsigned int host_lpid; unsigned long host_lpcr; @@ -256,16 +268,17 @@ struct kvm_arch { unsigned long lpcr; unsigned long vrma_slb_v; int hpte_setup_done; - u32 hpt_order; atomic_t vcpus_running; u32 online_vcores; - unsigned long hpt_npte; - unsigned long hpt_mask; atomic_t hpte_mod_interest; cpumask_t need_tlb_flush; - int hpt_cma_alloc; + cpumask_t cpu_in_guest; + u8 radix; + pgd_t *pgtable; + u64 process_table; struct dentry *debugfs_dir; struct dentry *htab_dentry; + struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE struct mutex hpt_mutex; @@ -603,6 +616,7 @@ struct kvm_vcpu_arch { ulong fault_dar; u32 fault_dsisr; unsigned long intr_msr; + ulong fault_gpa; /* guest real address of page fault (POWER9) */ #endif #ifdef CONFIG_BOOKE @@ -657,6 +671,7 @@ struct kvm_vcpu_arch { int state; int ptid; int thread_cpu; + int prev_cpu; bool timer_running; wait_queue_head_t cpu_run; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2da67bf1f2ec..dd11c4c8c56a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -155,9 +155,10 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); -extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp); -extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp); -extern void kvmppc_free_hpt(struct kvm *kvm); +extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order); +extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info); +extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order); +extern void kvmppc_free_hpt(struct kvm_hpt_info *info); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, @@ -186,8 +187,8 @@ extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, unsigned long tce_value, unsigned long npages); extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba); -extern struct page *kvm_alloc_hpt(unsigned long nr_pages); -extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); +extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages); +extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern void kvmppc_core_free_memslot(struct kvm *kvm, @@ -214,6 +215,10 @@ extern void kvmppc_bookehv_exit(void); extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); +extern long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, + struct kvm_ppc_resize_hpt *rhpt); +extern long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, + struct kvm_ppc_resize_hpt *rhpt); int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); @@ -291,6 +296,8 @@ struct kvmppc_ops { struct irq_bypass_producer *); void (*irq_bypass_del_producer)(struct irq_bypass_consumer *, struct irq_bypass_producer *); + int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg); + int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e02cbc6a6c70..5011b69107a7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -183,7 +183,7 @@ struct machdep_calls { */ void (*machine_shutdown)(void); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void (*kexec_cpu_down)(int crash_shutdown, int secondary); /* Called to do what every setup is needed on image and the @@ -198,7 +198,7 @@ struct machdep_calls { * no return. */ void (*machine_kexec)(struct kimage *image); -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_SUSPEND /* These are called to disable and enable, respectively, IRQs when diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 3e0e4927811c..798b5bf91427 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -172,6 +172,41 @@ typedef struct { #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) + +/* Page size definitions, common between 32 and 64-bit + * + * shift : is the "PAGE_SHIFT" value for that page size + * penc : is the pte encoding mask + * + */ +struct mmu_psize_def { + unsigned int shift; /* number of bits */ + unsigned int enc; /* PTE encoding */ + unsigned int ind; /* Corresponding indirect page size shift */ + unsigned int flags; +#define MMU_PAGE_SIZE_DIRECT 0x1 /* Supported as a direct size */ +#define MMU_PAGE_SIZE_INDIRECT 0x2 /* Supported as an indirect size */ +}; + +extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; + +static inline int shift_to_mmu_psize(unsigned int shift) +{ + int psize; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) + if (mmu_psize_defs[psize].shift == shift) + return psize; + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} + #endif /* !__ASSEMBLY__ */ #if defined(CONFIG_PPC_4K_PAGES) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 8d1499334257..233a7e8cc8e3 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -160,7 +160,9 @@ static __always_inline bool mmu_has_feature(unsigned long feature) { int i; +#ifndef __clang__ /* clang can't cope with this */ BUILD_BUG_ON(!__builtin_constant_p(feature)); +#endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG if (!static_key_initialized) { @@ -275,19 +277,20 @@ static inline bool early_radix_enabled(void) #define MMU_PAGE_64K 2 #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ #define MMU_PAGE_256K 4 -#define MMU_PAGE_1M 5 -#define MMU_PAGE_2M 6 -#define MMU_PAGE_4M 7 -#define MMU_PAGE_8M 8 -#define MMU_PAGE_16M 9 -#define MMU_PAGE_64M 10 -#define MMU_PAGE_256M 11 -#define MMU_PAGE_1G 12 -#define MMU_PAGE_16G 13 -#define MMU_PAGE_64G 14 +#define MMU_PAGE_512K 5 +#define MMU_PAGE_1M 6 +#define MMU_PAGE_2M 7 +#define MMU_PAGE_4M 8 +#define MMU_PAGE_8M 9 +#define MMU_PAGE_16M 10 +#define MMU_PAGE_64M 11 +#define MMU_PAGE_256M 12 +#define MMU_PAGE_1G 13 +#define MMU_PAGE_16G 14 +#define MMU_PAGE_64G 15 /* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */ -#define MMU_PAGE_COUNT 15 +#define MMU_PAGE_COUNT 16 #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/mmu.h> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 5c451140660a..b9e3f0aca261 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -19,16 +19,18 @@ extern void destroy_context(struct mm_struct *mm); struct mm_iommu_table_group_mem_t; extern int isolate_lru_page(struct page *page); /* from internal.h */ -extern bool mm_iommu_preregistered(void); -extern long mm_iommu_get(unsigned long ua, unsigned long entries, +extern bool mm_iommu_preregistered(struct mm_struct *mm); +extern long mm_iommu_get(struct mm_struct *mm, + unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem); -extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); -extern void mm_iommu_init(mm_context_t *ctx); -extern void mm_iommu_cleanup(mm_context_t *ctx); -extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size); -extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries); +extern long mm_iommu_put(struct mm_struct *mm, + struct mm_iommu_table_group_mem_t *mem); +extern void mm_iommu_init(struct mm_struct *mm); +extern void mm_iommu_cleanup(struct mm_struct *mm); +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size); +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index cd4ffd86765f..53885512b8d3 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -90,13 +90,5 @@ static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sec } #endif -struct exception_table_entry; -void sort_ex_table(struct exception_table_entry *start, - struct exception_table_entry *finish); - -#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64) -#define ARCH_RELOCATES_KCRCTAB -#define reloc_start PHYSICAL_START -#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MODULE_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 76d6b9e0c8a9..633139291a48 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -2,14 +2,42 @@ #define _ASM_POWERPC_PGALLOC_32_H #include <linux/threads.h> +#include <linux/slab.h> -/* For 32-bit, all levels of page tables are just drawn from get_free_page() */ -#define MAX_PGTABLE_INDEX_SIZE 0 +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages (which are linked to a struct + * page for now, and drawn from the main get_free_pages() pool), the + * allocation size will be (2^index_size * sizeof(pointer)) and + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf extern void __bad_pte(pmd_t *pmd); -extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) ({ \ + BUG_ON(!(shift)); \ + pgtable_cache[(shift) - 1]; \ + }) + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} /* * We don't have any real pmd's, and this code never triggers because @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void pgtable_free(void *table, unsigned index_size) { - BUG_ON(index_size); /* 32-bit doesn't use this */ - free_page((unsigned long)table); + if (!index_size) { + free_page((unsigned long)table); + } else { + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(index_size), table); + } } #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index c219ef7be53b..ba9921bf202e 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -16,6 +16,23 @@ extern int icache_44x_need_flush; #endif /* __ASSEMBLY__ */ +#define PTE_INDEX_SIZE PTE_SHIFT +#define PMD_INDEX_SIZE 0 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) + +#define PMD_CACHE_INDEX PMD_INDEX_SIZE + +#ifndef __ASSEMBLY__ +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE 0 +#define PUD_TABLE_SIZE 0 +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#endif /* __ASSEMBLY__ */ + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + /* * The normal case is that PTEs are 32-bits and we have a 1-page * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus @@ -27,22 +44,12 @@ extern int icache_44x_need_flush; * -Matt */ /* PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT) +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -/* - * entries per page directory level: our page-table tree is two-level, so - * we don't really have any PMD directory. - */ -#ifndef __ASSEMBLY__ -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT)) -#endif /* __ASSEMBLY__ */ - -#define PTRS_PER_PTE (1 << PTE_SHIFT) -#define PTRS_PER_PMD 1 -#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) #define FIRST_USER_ADDRESS 0UL @@ -268,7 +275,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); @@ -328,15 +336,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -#ifndef CONFIG_PPC_4K_PAGES -void pgtable_cache_init(void); -#else -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) -#endif - extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 3742b1919661..b4df2734c078 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -49,6 +49,7 @@ #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c +#define _PMD_PAGE_512K 0x0004 /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index fc7d51753f81..d0db98793dd8 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -27,9 +27,6 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PMD_SHIFT - /* PUD_SHIFT determines what a third-level page table entry can map */ #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) #define PUD_SIZE (1UL << PUD_SHIFT) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 908324574f77..55b28ef3409a 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -31,9 +31,6 @@ #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PAGE_SHIFT - /* PMD_SHIFT determines what a second-level page table entry can map */ #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PMD_SIZE (1UL << PMD_SHIFT) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 653a1838469d..c7f927e67d14 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -26,15 +26,11 @@ #else #define PMD_CACHE_INDEX PMD_INDEX_SIZE #endif + /* * Define the address range of the kernel non-linear virtual area */ - -#ifdef CONFIG_PPC_BOOK3E #define KERN_VIRT_START ASM_CONST(0x8000000000000000) -#else -#define KERN_VIRT_START ASM_CONST(0xD000000000000000) -#endif #define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) /* @@ -43,11 +39,7 @@ * (we keep a quarter for the virtual memmap) */ #define VMALLOC_START KERN_VIRT_START -#ifdef CONFIG_PPC_BOOK3E #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 2) -#else -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#endif #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* @@ -85,12 +77,8 @@ * Defines the address of the vmemap area, in its own region on * hash table CPUs and after the vmalloc space on Book3E */ -#ifdef CONFIG_PPC_BOOK3E #define VMEMMAP_BASE VMALLOC_END #define VMEMMAP_END KERN_IO_START -#else -#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) -#endif #define vmemmap ((struct page *)VMEMMAP_BASE) @@ -301,7 +289,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, * function doesn't need to flush the hash entry */ static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long bits = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); @@ -358,8 +347,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) __pte((x).val) -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); -void pgtable_cache_init(void); extern int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags); extern int __meminit vmemmap_create_mapping(unsigned long start, diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 1263c22d60d8..0cd8a3852763 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -226,7 +226,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #ifdef CONFIG_HUGETLB_PAGE static inline int hugepd_ok(hugepd_t hpd) { - return (hpd.pd > 0); +#ifdef CONFIG_PPC_8xx + return ((hpd_val(hpd) & 0x4) != 0); +#else + /* We clear the top bit to indicate hugepd */ + return ((hpd_val(hpd) & PD_HUGE) == 0); +#endif } static inline int pmd_huge(pmd_t pmd) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0e2e57bcab50..a0aa285869b5 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -167,7 +167,8 @@ #define OPAL_INT_EOI 124 #define OPAL_INT_SET_MFRR 125 #define OPAL_PCI_TCE_KILL 126 -#define OPAL_LAST 126 +#define OPAL_NMMU_SET_PTCR 127 +#define OPAL_LAST 127 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 5c7db0f1a708..1ff03a6da76e 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -67,7 +67,6 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func, int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func, uint64_t offset, uint32_t data); int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority); -int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority); int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority); int64_t opal_register_exception_handler(uint64_t opal_exception, uint64_t handler_address, @@ -220,18 +219,13 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, int64_t opal_pci_poll2(uint64_t id, uint64_t data); int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); -int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll); int64_t opal_int_set_cppr(uint8_t cppr); int64_t opal_int_eoi(uint32_t xirr); -int64_t opal_rm_int_eoi(uint32_t xirr); int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); -int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr); int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, uint32_t pe_num, uint32_t tce_size, uint64_t dma_addr, uint32_t npages); -int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, - uint32_t pe_num, uint32_t tce_size, - uint64_t dma_addr, uint32_t npages); +int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 6a6792bb39fb..708c3e592eeb 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -187,7 +187,6 @@ struct paca_struct { /* Stuff for accurate time accounting */ struct cpu_accounting_data accounting; - u64 stolen_time; /* TB ticks taken by hypervisor */ u64 dtl_ridx; /* read index in dispatch log */ struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 56398e7e6100..47120bf2670c 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -294,15 +294,12 @@ extern long long virt_phys_offset; #include <asm/pgtable-types.h> #endif -typedef struct { signed long pd; } hugepd_t; #ifndef CONFIG_HUGETLB_PAGE #define is_hugepd(pdep) (0) #define pgd_huge(pgd) (0) #endif /* CONFIG_HUGETLB_PAGE */ -#define __hugepd(x) ((hugepd_t) { (x) }) - struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); extern void copy_user_page(void *to, void *from, unsigned long vaddr, diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index dd5f0712afa2..3e83d2a20b6f 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -47,14 +47,14 @@ static inline void clear_page(void *addr) unsigned long iterations; unsigned long onex, twox, fourx, eightx; - iterations = ppc64_caches.dlines_per_page / 8; + iterations = ppc64_caches.l1d.blocks_per_page / 8; /* * Some verisions of gcc use multiply instructions to * calculate the offsets so lets give it a hand to * do better. */ - onex = ppc64_caches.dline_size; + onex = ppc64_caches.l1d.block_size; twox = onex << 1; fourx = onex << 2; eightx = onex << 3; diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index c0309c59bed8..56c67d3f0108 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -174,14 +174,6 @@ extern int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn); extern void pci_create_OF_bus_map(void); -static inline int isa_vaddr_is_ioport(void __iomem *address) -{ - /* No specific ISA handling on ppc32 at this stage, it - * all goes through PCI - */ - return 0; -} - #else /* CONFIG_PPC64 */ /* @@ -269,16 +261,6 @@ extern void pci_hp_remove_devices(struct pci_bus *bus); /** Discover new pci devices under this bus, and add them */ extern void pci_hp_add_devices(struct pci_bus *bus); - -extern void isa_bridge_find_early(struct pci_controller *hose); - -static inline int isa_vaddr_is_ioport(void __iomem *address) -{ - /* Check if address hits the reserved legacy IO range */ - unsigned long ea = (unsigned long)address; - return ea >= ISA_IO_BASE && ea < ISA_IO_END; -} - extern int pcibios_unmap_io_space(struct pci_bus *bus); extern int pcibios_map_io_space(struct pci_bus *bus); diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index e157489ee7a1..ae0a23091a9b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -65,6 +65,7 @@ struct power_pmu { #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ #define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ +#define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index 49c0a5a80efa..9c0f5db5cf46 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -104,4 +104,12 @@ static inline bool pmd_xchg(pmd_t *pmdp, pmd_t old, pmd_t new) return pmd_raw(old) == prev; } +typedef struct { __be64 pdbe; } hugepd_t; +#define __hugepd(x) ((hugepd_t) { cpu_to_be64(x) }) + +static inline unsigned long hpd_val(hugepd_t x) +{ + return be64_to_cpu(x.pdbe); +} + #endif /* _ASM_POWERPC_PGTABLE_BE_TYPES_H */ diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index e7f4f3e0fcde..8bd3b13fe2fb 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -66,4 +66,11 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) } #endif +typedef struct { unsigned long pd; } hugepd_t; +#define __hugepd(x) ((hugepd_t) { (x) }) +static inline unsigned long hpd_val(hugepd_t x) +{ + return x.pd; +} + #endif /* _ASM_POWERPC_PGTABLE_TYPES_H */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 9bd87f269d6d..dd01212935ac 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -78,6 +78,8 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned long vmalloc_to_phys(void *vmalloc_addr); +void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); +void pgtable_cache_init(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 1b394247afc2..0bcc75e295e3 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -93,38 +93,6 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa) return vpa_call(H_VPA_REG_DTL, cpu, vpa); } -static inline long plpar_page_set_loaned(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, - vpa + i - cmo_page_sz, 0); - - return rc; -} - -static inline long plpar_page_set_active(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, - vpa + i - cmo_page_sz, 0); - - return rc; -} - extern void vpa_init(int cpu); static inline long plpar_pte_enter(unsigned long flags, @@ -340,4 +308,9 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } +static inline long plapr_signal_sys_reset(long cpu) +{ + return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h new file mode 100644 index 000000000000..0e9c2402dd20 --- /dev/null +++ b/arch/powerpc/include/asm/powernv.h @@ -0,0 +1,19 @@ +/* + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERNV_H +#define _ASM_POWERNV_H + +#ifdef CONFIG_PPC_POWERNV +extern void powernv_set_nmmu_ptcr(unsigned long ptcr); +#else +static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } +#endif + +#endif /* _ASM_POWERNV_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c56ea8c84abb..c4ced1d01d57 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -157,7 +157,7 @@ #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 -#define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff +#define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MFTMR 0x7c0002dc #define PPC_INST_MSGSND 0x7c00019c #define PPC_INST_MSGCLR 0x7c0001dc @@ -174,13 +174,13 @@ #define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_MFSPR_DSCR 0x7c1102a6 -#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff +#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR 0x7c1103a6 -#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff +#define PPC_INST_MTSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MFSPR_DSCR_USER 0x7c0302a6 -#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1fffff +#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 -#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff +#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MFVSRD 0x7c000066 #define PPC_INST_MTVSRD 0x7c000166 #define PPC_INST_SLBFEE 0x7c0007a7 diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 0f73de069f19..726288048652 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -53,7 +53,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev); struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); -int eeh_reset_pe(struct eeh_pe *); +int eeh_pe_reset_full(struct eeh_pe *pe); void eeh_save_bars(struct eeh_dev *edev); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index c73750b0d9fa..359c44341761 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -10,9 +10,7 @@ #include <asm/ppc-opcode.h> #include <asm/firmware.h> -#ifndef __ASSEMBLY__ -#error __FILE__ should only be used in assembler files -#else +#ifdef __ASSEMBLY__ #define SZL (BITS_PER_LONG/8) @@ -265,10 +263,14 @@ n: * latter is for those that incdentially must be excluded from probing * and allows them to be linked at more optimal location within text. */ +#ifdef CONFIG_KPROBES #define _ASM_NOKPROBE_SYMBOL(entry) \ .pushsection "_kprobe_blacklist","aw"; \ PPC_LONG (entry) ; \ .popsection +#else +#define _ASM_NOKPROBE_SYMBOL(entry) +#endif #define FUNC_START(name) _GLOBAL(name) #define FUNC_END(name) @@ -503,7 +505,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) #define MTMSRD(r) mtmsrd r #define MTMSR_EERI(reg) mtmsrd reg,1 #else -#define FIX_SRR1(ra, rb) #ifndef CONFIG_40x #define RFI rfi #else @@ -779,5 +780,17 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) .long 0xa6037b7d; /* mtsrr1 r11 */ \ .long 0x2400004c /* rfid */ #endif /* !CONFIG_PPC_BOOK3E */ + #endif /* __ASSEMBLY__ */ + +/* + * Helper macro for exception table entries + */ +#define EX_TABLE(_fault, _target) \ + stringify_in_c(.section __ex_table,"a";)\ + stringify_in_c(.balign 4;) \ + stringify_in_c(.long (_fault) - . ;) \ + stringify_in_c(.long (_target) - . ;) \ + stringify_in_c(.previous) + #endif /* _ASM_POWERPC_PPC_ASM_H */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index dac83fcb9445..e0fecbcea2a2 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -225,6 +225,7 @@ struct thread_struct { #ifdef CONFIG_PPC64 unsigned long start_tb; /* Start purr when proc switched in */ unsigned long accum_tb; /* Total accumulated purr for process */ +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT struct perf_event *ptrace_bps[HBP_NUM]; /* @@ -233,7 +234,6 @@ struct thread_struct { */ struct perf_event *last_hit_ubp; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ unsigned long trap_nr; /* last trap # on this thread */ u8 load_fp; @@ -312,8 +312,6 @@ struct thread_struct { unsigned long mmcr2; unsigned mmcr0; unsigned used_ebb; - unsigned long lmrr; - unsigned long lmser; #endif }; @@ -456,7 +454,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern unsigned long power7_nap(int check_irq); extern unsigned long power7_sleep(void); extern unsigned long power7_winkle(void); -extern unsigned long power9_idle_stop(unsigned long stop_level); +extern unsigned long power9_idle_stop(unsigned long stop_psscr_val, + unsigned long stop_psscr_mask); extern void flush_instruction_cache(void); extern void hard_reset_now(void); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 7f436ba1b56f..8af2546ea593 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -121,6 +121,8 @@ struct of_drconf_cell { #define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */ #define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */ +#define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */ + /* Option vector 2: Open Firmware options supported */ #define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ @@ -151,19 +153,20 @@ struct of_drconf_cell { #define OV5_XCMO 0x0440 /* Page Coalescing */ #define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */ #define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ -#define OV5_PFO_HW_RNG 0x0E80 /* PFO Random Number Generator */ -#define OV5_PFO_HW_842 0x0E40 /* PFO Compression Accelerator */ -#define OV5_PFO_HW_ENCR 0x0E20 /* PFO Encryption Accelerator */ -#define OV5_SUB_PROCESSORS 0x0F01 /* 1,2,or 4 Sub-Processors supported */ +#define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */ +#define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */ +#define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ +#define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ +#define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ +#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ +#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ +#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ +#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ +#define OV5_MMU_SLB 0x1800 /* always use SLB */ +#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ -/* - * The architecture vector has an array of PVR mask/value pairs, - * followed by # option vectors - 1, followed by the option vectors. - */ -extern unsigned char ibm_architecture_vec[]; - #endif /* __KERNEL__ */ #endif /* _POWERPC_PROM_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 04aa1ee8cdb6..9c4689401298 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -274,10 +274,14 @@ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ #define DSISR_NOHPTE 0x40000000 /* no translation found */ #define DSISR_PROTFAULT 0x08000000 /* protection fault */ +#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ #define DSISR_ISSTORE 0x02000000 /* access was a store */ #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ #define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ #define DSISR_KEYFAULT 0x00200000 /* Key fault */ +#define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */ +#define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */ +#define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */ #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ #define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ @@ -295,8 +299,6 @@ #define SPRN_HRMOR 0x139 /* Real mode offset register */ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ -#define SPRN_LMRR 0x32D /* Load Monitor Region Register */ -#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */ #define SPRN_ASDR 0x330 /* Access segment descriptor register */ #define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_VTB 0x351 /* Virtual Time Base */ @@ -308,7 +310,6 @@ #define SPRN_PMCR 0x374 /* Power Management Control Register */ /* HFSCR and FSCR bit numbers are the same */ -#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */ #define FSCR_MSGP_LG 10 /* Enable MSGP */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ @@ -319,12 +320,10 @@ #define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */ #define FSCR_FP_LG 0 /* Enable Floating Point */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ -#define FSCR_LM __MASK(FSCR_LM_LG) #define FSCR_TAR __MASK(FSCR_TAR_LG) #define FSCR_EBB __MASK(FSCR_EBB_LG) #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ -#define HFSCR_LM __MASK(FSCR_LM_LG) #define HFSCR_MSGP __MASK(FSCR_MSGP_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) @@ -343,7 +342,7 @@ #define LPCR_DPFD_SH 52 #define LPCR_DPFD (ASM_CONST(7) << LPCR_DPFD_SH) #define LPCR_VRMASD_SH 47 -#define LPCR_VRMASD (ASM_CONST(1) << LPCR_VRMASD_SH) +#define LPCR_VRMASD (ASM_CONST(0x1f) << LPCR_VRMASD_SH) #define LPCR_VRMA_L ASM_CONST(0x0008000000000000) #define LPCR_VRMA_LP0 ASM_CONST(0x0001000000000000) #define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000) @@ -553,7 +552,9 @@ #define SPRN_IBAT7U 0x236 /* Instruction BAT 7 Upper Register */ #define SPRN_ICMP 0x3D5 /* Instruction TLB Compare Register */ #define SPRN_ICTC 0x3FB /* Instruction Cache Throttling Control Reg */ +#ifndef SPRN_ICTRL #define SPRN_ICTRL 0x3F3 /* 1011 7450 icache and interrupt ctrl */ +#endif #define ICTRL_EICE 0x08000000 /* enable icache parity errs */ #define ICTRL_EDC 0x04000000 /* enable dcache parity errs */ #define ICTRL_EICP 0x00000100 /* enable icache par. check */ diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index 0197e12f7d48..ae16fef7a4d6 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -4,7 +4,7 @@ #ifndef _ASM_POWERPC_REG_8xx_H #define _ASM_POWERPC_REG_8xx_H -#include <asm/mmu-8xx.h> +#include <asm/mmu.h> /* Cache control on the MPC8xx is provided through some additional * special purpose registers. @@ -28,6 +28,17 @@ /* Special MSR manipulation registers */ #define SPRN_EIE 80 /* External interrupt enable (EE=1, RI=1) */ #define SPRN_EID 81 /* External interrupt disable (EE=0, RI=1) */ +#define SPRN_NRI 82 /* Non recoverable interrupt (EE=0, RI=0) */ + +/* Debug registers */ +#define SPRN_CMPA 144 +#define SPRN_COUNTA 150 +#define SPRN_CMPE 152 +#define SPRN_CMPF 153 +#define SPRN_LCTRL1 156 +#define SPRN_LCTRL2 157 +#define SPRN_ICTRL 158 +#define SPRN_BAR 159 /* Commands. Only the first few are available to the instruction cache. */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9c23baa10b81..076b89247ab5 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -318,6 +318,7 @@ struct pseries_hp_errorlog { #define PSERIES_HP_ELOG_ACTION_ADD 1 #define PSERIES_HP_ELOG_ACTION_REMOVE 2 +#define PSERIES_HP_ELOG_ACTION_READD 3 #define PSERIES_HP_ELOG_ID_DRC_NAME 1 #define PSERIES_HP_ELOG_ID_DRC_INDEX 2 diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 0d02c11dc331..32db16d2e7ad 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -176,7 +176,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #endif /* !CONFIG_SMP */ #endif /* !CONFIG_PPC64 */ -#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC)) +#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)) extern void smp_release_cpus(void); #else static inline void smp_release_cpus(void) { }; diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index da3cdffca440..ba08cf461ca2 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -11,16 +11,17 @@ #define __HAVE_ARCH_MEMCMP #define __HAVE_ARCH_MEMCHR -extern char * strcpy(char *,const char *); -extern char * strncpy(char *,const char *, __kernel_size_t); -extern __kernel_size_t strlen(const char *); -extern int strcmp(const char *,const char *); -extern int strncmp(const char *, const char *, __kernel_size_t); -extern char * strcat(char *, const char *); +extern char * strcpy(char *,const char *) __nocapture(2); +extern char * strncpy(char *,const char *, __kernel_size_t) __nocapture(2); +extern __kernel_size_t strlen(const char *) __nocapture(1); +extern int strcmp(const char *,const char *) __nocapture(); +extern int +strncmp(const char *, const char *, __kernel_size_t) __nocapture(1, 2); +extern char * strcat(char *, const char *) __nocapture(2); extern void * memset(void *,int,__kernel_size_t); -extern void * memcpy(void *,const void *,__kernel_size_t); -extern void * memmove(void *,const void *,__kernel_size_t); -extern int memcmp(const void *,const void *,__kernel_size_t); +extern void * memcpy(void *,const void *,__kernel_size_t) __nocapture(2); +extern void * memmove(void *,const void *,__kernel_size_t) __nocapture(2); +extern int memcmp(const void *,const void *,__kernel_size_t) __nocapture(1, 2); extern void * memchr(const void *,int,__kernel_size_t); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 2fc5d4db503c..4b369d83fe9c 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -386,3 +386,4 @@ SYSCALL(mlock2) SYSCALL(copy_file_range) COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) +SYSCALL(kexec_file_load) diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 32e36b16773f..c05cef6ee06c 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -54,7 +54,7 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, ); #ifdef CONFIG_PPC_PSERIES -extern void hcall_tracepoint_regfunc(void); +extern int hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_unregfunc(void); TRACE_EVENT_FN_COND(hcall_entry, @@ -104,7 +104,7 @@ TRACE_EVENT_FN_COND(hcall_exit, #endif #ifdef CONFIG_PPC_POWERNV -extern void opal_tracepoint_regfunc(void); +extern int opal_tracepoint_regfunc(void); extern void opal_tracepoint_unregfunc(void); TRACE_EVENT_FN(opal_entry, diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c266227fdd5b..0e6add3187bc 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/errno.h> #include <asm/asm-compat.h> +#include <asm/ppc_asm.h> #include <asm/processor.h> #include <asm/page.h> @@ -63,23 +64,30 @@ __access_ok((__force unsigned long)(addr), (size), get_fs())) /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is + * The exception table consists of pairs of relative addresses: the first is + * the address of an instruction that is allowed to fault, and the second is * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * modified, so it is entirely up to the continuation code to figure out what + * to do. * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. + * All the routines below use bits of fixup code that are out of line with the + * main instruction path. This means when everything is well, we don't even + * have to jump over them. Further, they do not intrude on our cache or tlb + * entries. */ +#define ARCH_HAS_RELATIVE_EXTABLE + struct exception_table_entry { - unsigned long insn; - unsigned long fixup; + int insn; + int fixup; }; +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. @@ -132,10 +140,7 @@ extern long __put_user_bad(void); "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err) \ : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) @@ -152,11 +157,8 @@ extern long __put_user_bad(void); "4: li %0,%3\n" \ " b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,4b\n" \ - PPC_LONG "2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=r" (err) \ : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ @@ -215,10 +217,7 @@ extern long __get_user_bad(void); " li %1,0\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) @@ -237,11 +236,8 @@ extern long __get_user_bad(void); " li %1+1,0\n" \ " b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,4b\n" \ - PPC_LONG "2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=r" (err), "=&r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ @@ -265,7 +261,7 @@ do { \ ({ \ long __gu_err; \ unsigned long __gu_val; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ @@ -278,7 +274,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ unsigned long __gu_val = 0; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ @@ -290,7 +286,7 @@ do { \ ({ \ long __gu_err; \ unsigned long __gu_val; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e8cdfec8d512..eb1acee91a20 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 382 +#define NR_syscalls 383 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 4afe66aa1400..f3f4710d4ff5 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <asm/asm-compat.h> +#include <asm/ppc_asm.h> #ifdef __BIG_ENDIAN__ @@ -193,10 +194,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) #endif "b 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n\t" - PPC_LONG_ALIGN "\n\t" - PPC_LONG "1b,3b\n" - ".previous" + EX_TABLE(1b, 3b) : [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret) : [addr] "b" (addr), "m" (*(unsigned long *)addr)); diff --git a/arch/powerpc/include/uapi/asm/auxvec.h b/arch/powerpc/include/uapi/asm/auxvec.h index ce17d2c9eb4e..be6e94ecec42 100644 --- a/arch/powerpc/include/uapi/asm/auxvec.h +++ b/arch/powerpc/include/uapi/asm/auxvec.h @@ -16,6 +16,37 @@ */ #define AT_SYSINFO_EHDR 33 -#define AT_VECTOR_SIZE_ARCH 6 /* entries in ARCH_DLINFO */ +/* + * AT_*CACHEBSIZE above represent the cache *block* size which is + * the size that is affected by the cache management instructions. + * + * It doesn't nececssarily matches the cache *line* size which is + * more of a performance tuning hint. Additionally the latter can + * be different for the different cache levels. + * + * The set of entries below represent more extensive information + * about the caches, in the form of two entry per cache type, + * one entry containing the cache size in bytes, and the other + * containing the cache line size in bytes in the bottom 16 bits + * and the cache associativity in the next 16 bits. + * + * The associativity is such that if N is the 16-bit value, the + * cache is N way set associative. A value if 0xffff means fully + * associative, a value of 1 means directly mapped. + * + * For all these fields, a value of 0 means that the information + * is not known. + */ + +#define AT_L1I_CACHESIZE 40 +#define AT_L1I_CACHEGEOMETRY 41 +#define AT_L1D_CACHESIZE 42 +#define AT_L1D_CACHEGEOMETRY 43 +#define AT_L2_CACHESIZE 44 +#define AT_L2_CACHEGEOMETRY 45 +#define AT_L3_CACHESIZE 46 +#define AT_L3_CACHEGEOMETRY 47 + +#define AT_VECTOR_SIZE_ARCH 14 /* entries in ARCH_DLINFO */ #endif diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 3a9e44c45c78..b2c6fdd5ac30 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -162,29 +162,6 @@ typedef elf_vrreg_t elf_vrregset_t32[ELF_NVRREG32]; typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; #endif - -/* - * The requirements here are: - * - keep the final alignment of sp (sp & 0xf) - * - make sure the 32-bit value at the first 16 byte aligned position of - * AUXV is greater than 16 for glibc compatibility. - * AT_IGNOREPPC is used for that. - * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC, - * even if DLINFO_ARCH_ITEMS goes to zero or is undefined. - * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes - */ -#define ARCH_DLINFO \ -do { \ - /* Handle glibc compatibility. */ \ - NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ - NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ - /* Cache size items */ \ - NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ - NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ - NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ - VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \ -} while (0) - /* PowerPC64 relocations defined by the ABIs */ #define R_PPC64_NONE R_PPC_NONE #define R_PPC64_ADDR32 R_PPC_ADDR32 /* 32bit absolute address. */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 3603b6f51b11..4edbe4bb0e8b 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -413,6 +413,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* For KVM_PPC_CONFIGURE_V3_MMU */ +struct kvm_ppc_mmuv3_cfg { + __u64 flags; + __u64 process_table; /* second doubleword of partition table entry */ +}; + +/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ +#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ +#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ + +/* For KVM_PPC_GET_RMMU_INFO */ +struct kvm_ppc_rmmu_info { + struct kvm_ppc_radix_geom { + __u8 page_shift; + __u8 level_bits[4]; + __u8 pad[3]; + } geometries[8]; + __u32 ap_encodings[8]; +}; + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) @@ -613,5 +633,7 @@ struct kvm_get_htab_header { #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) #define KVM_XICS_MASKED (1ULL << 41) #define KVM_XICS_PENDING (1ULL << 42) +#define KVM_XICS_PRESENTED (1ULL << 43) +#define KVM_XICS_QUEUED (1ULL << 44) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index e9f5f41aa55a..2f26335a3c42 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -392,5 +392,6 @@ #define __NR_copy_file_range 379 #define __NR_preadv2 380 #define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1925341dbb9c..b1a9805c2eef 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -15,7 +15,7 @@ CFLAGS_btext.o += -fPIC endif CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) -CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) @@ -58,8 +58,6 @@ obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o -obj-$(CONFIG_IBMVIO) += vio.o -obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o @@ -107,8 +105,13 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ machine_kexec_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o +ifeq ($(CONFIG_HAVE_IMA_KEXEC)$(CONFIG_IMA),yy) +obj-y += ima_kexec.o +endif + obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o @@ -128,7 +131,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) +ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),) obj-y += ppc_save_regs.o endif diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 033f3385fa49..cbc7c42cdb74 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -20,7 +20,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/cache.h> #include <asm/cputable.h> #include <asm/emulated_ops.h> @@ -204,7 +204,7 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) int i, size; #ifdef __powerpc64__ - size = ppc64_caches.dline_size; + size = ppc64_caches.l1d.block_size; #else size = L1_CACHE_BYTES; #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 195a9fc8f81c..f25239b3a06f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -160,12 +160,12 @@ int main(void) DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); #ifdef CONFIG_PPC64 - DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); - DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); - DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); - DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); - DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); - DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); + DEFINE(DCACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1d.block_size)); + DEFINE(DCACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1d.log_block_size)); + DEFINE(DCACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1d.blocks_per_page)); + DEFINE(ICACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1i.block_size)); + DEFINE(ICACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1i.log_block_size)); + DEFINE(ICACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1i.blocks_per_page)); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); @@ -249,9 +249,9 @@ int main(void) DEFINE(ACCOUNT_STARTTIME_USER, offsetof(struct paca_struct, accounting.starttime_user)); DEFINE(ACCOUNT_USER_TIME, - offsetof(struct paca_struct, accounting.user_time)); + offsetof(struct paca_struct, accounting.utime)); DEFINE(ACCOUNT_SYSTEM_TIME, - offsetof(struct paca_struct, accounting.system_time)); + offsetof(struct paca_struct, accounting.stime)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); @@ -262,9 +262,9 @@ int main(void) DEFINE(ACCOUNT_STARTTIME_USER, offsetof(struct thread_info, accounting.starttime_user)); DEFINE(ACCOUNT_USER_TIME, - offsetof(struct thread_info, accounting.user_time)); + offsetof(struct thread_info, accounting.utime)); DEFINE(ACCOUNT_SYSTEM_TIME, - offsetof(struct thread_info, accounting.system_time)); + offsetof(struct thread_info, accounting.stime)); #endif #endif /* CONFIG_PPC64 */ @@ -495,6 +495,7 @@ int main(void) DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); + DEFINE(KVM_RADIX, offsetof(struct kvm, arch.radix)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); @@ -534,6 +535,7 @@ int main(void) DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); + DEFINE(VCPU_FAULT_GPA, offsetof(struct kvm_vcpu, arch.fault_gpa)); DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index f3e1f5d29dce..917188615bf5 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -96,6 +96,7 @@ _GLOBAL(__setup_cpu_power9) mtlr r11 beqlr li r0,0 + mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) @@ -116,6 +117,7 @@ _GLOBAL(__restore_cpu_power9) mtlr r11 beqlr li r0,0 + mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index cfa0f81a5bb0..d10ad258d41a 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -18,7 +18,7 @@ #include <asm/kdump.h> #include <asm/prom.h> #include <asm/firmware.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/rtas.h> #ifdef DEBUG diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index e64a6016fba7..6877e3fa95bb 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -203,6 +203,10 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, for_each_sg(sgl, sg, nents, i) { sg->dma_address = sg_phys(sg) + get_dma_offset(dev); sg->dma_length = sg->length; + + if (attrs & DMA_ATTR_SKIP_CPU_SYNC) + continue; + __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } @@ -235,7 +239,10 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, unsigned long attrs) { BUG_ON(dir == DMA_NONE); - __dma_sync_page(page, offset, size, dir); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + __dma_sync_page(page, offset, size, dir); + return page_to_phys(page) + offset + get_dma_offset(dev); } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f25731627d7f..9de7f79e702b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -298,9 +298,17 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) * * For pHyp, we have to enable IO for log retrieval. Otherwise, * 0xFF's is always returned from PCI config space. + * + * When the @severity is EEH_LOG_PERM, the PE is going to be + * removed. Prior to that, the drivers for devices included in + * the PE will be closed. The drivers rely on working IO path + * to bring the devices to quiet state. Otherwise, PCI traffic + * from those devices after they are removed is like to cause + * another unexpected EEH error. */ if (!(pe->type & EEH_PE_PHB)) { - if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) + if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || + severity == EEH_LOG_PERM) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); /* @@ -372,7 +380,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* Find the PHB PE */ phb_pe = eeh_phb_pe_get(pe->phb); if (!phb_pe) { - pr_warn("%s Can't find PE for PHB#%d\n", + pr_warn("%s Can't find PE for PHB#%x\n", __func__, pe->phb->global_number); return -EEXIST; } @@ -664,7 +672,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) rc = eeh_ops->set_option(pe, function); if (rc) pr_warn("%s: Unexpected state change %d on " - "PHB#%d-PE#%x, err=%d\n", + "PHB#%x-PE#%x, err=%d\n", __func__, function, pe->phb->global_number, pe->addr, rc); @@ -808,76 +816,67 @@ static void *eeh_set_dev_freset(void *data, void *flag) } /** - * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second + * eeh_pe_reset_full - Complete a full reset process on the indicated PE * @pe: EEH PE * - * Assert the PCI #RST line for 1/4 second. + * This function executes a full reset procedure on a PE, including setting + * the appropriate flags, performing a fundamental or hot reset, and then + * deactivating the reset status. It is designed to be used within the EEH + * subsystem, as opposed to eeh_pe_reset which is exported to drivers and + * only performs a single operation at a time. + * + * This function will attempt to reset a PE three times before failing. */ -static void eeh_reset_pe_once(struct eeh_pe *pe) +int eeh_pe_reset_full(struct eeh_pe *pe) { + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + int type = EEH_RESET_HOT; unsigned int freset = 0; + int i, state, ret; - /* Determine type of EEH reset required for - * Partitionable Endpoint, a hot-reset (1) - * or a fundamental reset (3). - * A fundamental reset required by any device under - * Partitionable Endpoint trumps hot-reset. + /* + * Determine the type of reset to perform - hot or fundamental. + * Hot reset is the default operation, unless any device under the + * PE requires a fundamental reset. */ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); if (freset) - eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); - else - eeh_ops->reset(pe, EEH_RESET_HOT); + type = EEH_RESET_FUNDAMENTAL; - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); -} + /* Mark the PE as in reset state and block config space accesses */ + eeh_pe_state_mark(pe, reset_state); -/** - * eeh_reset_pe - Reset the indicated PE - * @pe: EEH PE - * - * This routine should be called to reset indicated device, including - * PE. A PE might include multiple PCI devices and sometimes PCI bridges - * might be involved as well. - */ -int eeh_reset_pe(struct eeh_pe *pe) -{ - int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - int i, state, ret; - - /* Mark as reset and block config space */ - eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); - - /* Take three shots at resetting the bus */ + /* Make three attempts at resetting the bus */ for (i = 0; i < 3; i++) { - eeh_reset_pe_once(pe); + ret = eeh_pe_reset(pe, type); + if (ret) + break; - /* - * EEH_PE_ISOLATED is expected to be removed after - * BAR restore. - */ + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + if (ret) + break; + + /* Wait until the PE is in a functioning state */ state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((state & flags) == flags) { - ret = 0; - goto out; - } + if ((state & active_flags) == active_flags) + break; if (state < 0) { - pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", __func__, pe->phb->global_number, pe->addr); ret = -ENOTRECOVERABLE; - goto out; + break; } - /* We might run out of credits */ + /* Set error in case this is our last attempt */ ret = -EIO; pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } -out: - eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, reset_state); return ret; } @@ -1601,6 +1600,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) return eeh_unfreeze_pe(pe, true); } + /** * eeh_pe_reset - Issue PE reset according to specified type * @pe: EEH PE diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 5c31369435f2..b94887165a10 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -545,7 +545,7 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - bool *clear_sw_state = flag; + bool clear_sw_state = *(bool *)flag; int i, rc = 1; for (i = 0; rc && i < 3; i++) @@ -588,7 +588,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); /* Issue reset */ - ret = eeh_reset_pe(pe); + ret = eeh_pe_reset_full(pe); if (ret) { eeh_pe_state_clear(pe, EEH_PE_RECOVERING); return ret; @@ -659,7 +659,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - rc = eeh_reset_pe(pe); + rc = eeh_pe_reset_full(pe); if (rc) return rc; @@ -734,7 +734,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { - pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); return; } @@ -878,7 +878,7 @@ excess_failures: * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n" + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" "last hour and has been permanently disabled.\n" "Please try reseating or replacing it.\n", pe->phb->global_number, pe->addr, @@ -886,7 +886,7 @@ excess_failures: goto perm_error; hard_fail: - pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n" + pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" "Please try reseating or replacing it\n", pe->phb->global_number, pe->addr); @@ -1000,7 +1000,7 @@ static void eeh_handle_special_event(void) bus = eeh_pe_bus_get(phb_pe); if (!bus) { pr_err("%s: Cannot find PCI bus for " - "PHB#%d-PE#%x\n", + "PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 82e7327e3cd0..accbf8b5fd46 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -75,11 +75,11 @@ static int eeh_event_handler(void * dummy) if (pe) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); if (pe->type & EEH_PE_PHB) - pr_info("EEH: Detected error on PHB#%d\n", + pr_info("EEH: Detected error on PHB#%x\n", pe->phb->global_number); else pr_info("EEH: Detected PCI bus error on " - "PHB#%d-PE#%x\n", + "PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); eeh_handle_event(pe); eeh_pe_state_clear(pe, EEH_PE_RECOVERING); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index de7d091c4c31..cc4b206f77e4 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -104,7 +104,7 @@ int eeh_phb_pe_create(struct pci_controller *phb) /* Put it into the list */ list_add_tail(&pe->child, &eeh_phb_pe); - pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); + pr_debug("EEH: Add PE for PHB#%x\n", phb->global_number); return 0; } @@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { - pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%d\n", + pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n", __func__, edev->config_addr, edev->phb->global_number); return -EINVAL; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3841d749a430..a38600949f3a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -205,6 +205,9 @@ transfer_to_handler_cont: mflr r9 lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif #ifdef CONFIG_TRACE_IRQFLAGS lis r12,reenable_mmu@h ori r12,r12,reenable_mmu@l @@ -292,7 +295,9 @@ stack_ovf: lis r9,StackOverflow@ha addi r9,r9,StackOverflow@l LOAD_MSR_KERNEL(r10,MSR_KERNEL) - FIX_SRR1(r10,r12) +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif mtspr SPRN_SRR0,r9 mtspr SPRN_SRR1,r10 SYNC @@ -417,9 +422,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) mtlr r4 mtcr r5 lwz r7,_NIP(r1) - FIX_SRR1(r8, r0) lwz r2,GPR2(r1) lwz r1,GPR1(r1) +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 SYNC @@ -699,6 +706,9 @@ fast_exception_return: lwz r10,_LINK(r11) mtlr r10 REST_GPR(10, r11) +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif mtspr SPRN_SRR1,r9 mtspr SPRN_SRR0,r12 REST_GPR(9, r11) @@ -947,7 +957,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) .globl exc_exit_restart exc_exit_restart: lwz r12,_NIP(r1) - FIX_SRR1(r9,r10) +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r9 REST_4GPRS(9, r1) @@ -1290,7 +1302,6 @@ _GLOBAL(enter_rtas) 1: tophys(r9,r1) lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ lwz r9,8(r9) /* original msr value */ - FIX_SRR1(r9,r0) addi r1,r1,INT_FRAME_SIZE li r0,0 mtspr SPRN_SPRG_RTAS,r0 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 38a1f96430e1..45b453e4d0c8 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -923,10 +923,10 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) addi r3,r1,STACK_FRAME_OVERHEAD - bl .save_nvgprs + bl save_nvgprs INTS_RESTORE_HARD - bl .unknown_exception - b .ret_from_except + bl unknown_exception + b ret_from_except /* * An interrupt came in while soft-disabled; We mark paca->irq_happened diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1ba82ea90230..f2a18432253c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) lbz r0,HSTATE_HWTHREAD_REQ(r13) cmpwi r0,0 beq 1f - b kvm_start_guest + BRANCH_TO_KVM(r10, kvm_start_guest) 1: #endif @@ -381,12 +381,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early) lbz r3,PACA_THREAD_IDLE_STATE(r13) cmpwi r3,PNV_THREAD_NAP bgt 10f - IDLE_STATE_ENTER_SEQ(PPC_NAP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) /* No return */ 10: cmpwi r3,PNV_THREAD_SLEEP bgt 2f - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) /* No return */ 2: @@ -400,7 +400,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ ori r13,r13,1 SET_PACA(r13) - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* No return */ 4: #endif @@ -717,13 +717,9 @@ hardware_interrupt_hv: BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) -do_kvm_H0x500: - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) -do_kvm_0x500: - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) EXC_REAL_END(hardware_interrupt, 0x500, 0x600) @@ -737,6 +733,8 @@ hardware_interrupt_relon_hv: ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) +TRAMP_KVM(PACA_EXGEN, 0x500) +TRAMP_KVM_HV(PACA_EXGEN, 0x500) EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) @@ -832,6 +830,31 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) TRAMP_KVM(PACA_EXGEN, 0xb00) EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + /* + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems + * that support it) before changing to HMT_MEDIUM. That allows the KVM + * code to save that value into the guest state (it is the guest's PPR + * value). Otherwise just change to HMT_MEDIUM as userspace has + * already saved the PPR. + */ +#define SYSCALL_KVMTEST \ + SET_SCRATCH0(r13); \ + GET_PACA(r13); \ + std r9,PACA_EXGEN+EX_R9(r13); \ + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ + HMT_MEDIUM; \ + std r10,PACA_EXGEN+EX_R10(r13); \ + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \ + mfcr r9; \ + KVMTEST_PR(0xc00); \ + GET_SCRATCH0(r13) + +#else +#define SYSCALL_KVMTEST \ + HMT_MEDIUM +#endif + #define LOAD_SYSCALL_HANDLER(reg) \ __LOAD_HANDLER(reg, system_call_common) @@ -885,34 +908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ #endif EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) - /* - * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems - * that support it) before changing to HMT_MEDIUM. That allows the KVM - * code to save that value into the guest state (it is the guest's PPR - * value). Otherwise just change to HMT_MEDIUM as userspace has - * already saved the PPR. - */ -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9,PACA_EXGEN+EX_R9(r13) - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); - HMT_MEDIUM; - std r10,PACA_EXGEN+EX_R10(r13) - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); - mfcr r9 - KVMTEST_PR(0xc00) - GET_SCRATCH0(r13) -#else - HMT_MEDIUM; -#endif + SYSCALL_KVMTEST SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_RFID SYSCALL_PSERIES_3 EXC_REAL_END(system_call, 0xc00, 0xd00) EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) - HMT_MEDIUM + SYSCALL_KVMTEST SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_DIRECT SYSCALL_PSERIES_3 @@ -927,7 +930,7 @@ TRAMP_KVM(PACA_EXGEN, 0xd00) EXC_COMMON(single_step_common, 0xd00, single_step_exception) EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) -EXC_VIRT_NONE(0x4e00, 0x4e20) +EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x4e20, 0xe00) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) EXC_COMMON_BEGIN(h_data_storage_common) mfspr r10,SPRN_HDAR @@ -943,7 +946,7 @@ EXC_COMMON_BEGIN(h_data_storage_common) EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) -EXC_VIRT_NONE(0x4e20, 0x4e40) +EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x4e40, 0xe20) TRAMP_KVM_HV(PACA_EXGEN, 0xe20) EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) @@ -979,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD - bl hmi_exception_realmode + BRANCH_LINK_TO_FAR(r4, hmi_exception_realmode) /* Windup the stack. */ /* Move original HSRR0 and HSRR1 into the respective regs */ ld r9,_MSR(r1) @@ -1408,7 +1411,7 @@ USE_TEXT_SECTION() /* * Hash table stuff */ - .align 7 + .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 andis. r0,r4,0xa410 /* weird error? */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8f0c7c5d93f2..8ff0dd4e77a7 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -406,12 +406,35 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) void crash_fadump(struct pt_regs *regs, const char *str) { struct fadump_crash_info_header *fdh = NULL; + int old_cpu, this_cpu; if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) return; + /* + * old_cpu == -1 means this is the first CPU which has come here, + * go ahead and trigger fadump. + * + * old_cpu != -1 means some other CPU has already on it's way + * to trigger fadump, just keep looping here. + */ + this_cpu = smp_processor_id(); + old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); + + if (old_cpu != -1) { + /* + * We can't loop here indefinitely. Wait as long as fadump + * is in force. If we race with fadump un-registration this + * loop will break and then we go down to normal panic path + * and reboot. If fadump is in force the first crashing + * cpu will definitely trigger fadump. + */ + while (fw_dump.dump_registered) + cpu_relax(); + return; + } + fdh = __va(fw_dump.fadumphdr_addr); - crashing_cpu = smp_processor_id(); fdh->crashing_cpu = crashing_cpu; crash_save_vmcoreinfo(); diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index a95639b8d4ac..5c9f50c1aa99 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -47,13 +47,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) unsigned int replaced; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 9d963547d243..1607be7c0ef2 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -869,7 +869,6 @@ __secondary_start: /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL - FIX_SRR1(r4,r5) lis r3,start_secondary@h ori r3,r3,start_secondary@l mtspr SPRN_SRR0,r3 @@ -977,7 +976,6 @@ start_here: ori r4,r4,2f@l tophys(r4,r4) li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) - FIX_SRR1(r3,r5) mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 SYNC @@ -1001,7 +999,6 @@ start_here: /* Now turn on the MMU for real! */ li r4,MSR_KERNEL - FIX_SRR1(r4,r5) lis r3,start_kernel@h ori r3,r3,start_kernel@l mtspr SPRN_SRR0,r3 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 04c546e20cc0..1dc5eae2ced3 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -107,12 +107,19 @@ __secondary_hold_acknowledge: * crash_kernel region. The loader is responsible for * observing the alignment requirement. */ + +#ifdef CONFIG_RELOCATABLE_TEST +#define RUN_AT_LOAD_DEFAULT 1 /* Test relocation, do not copy to 0 */ +#else +#define RUN_AT_LOAD_DEFAULT 0x72756e30 /* "run0" -- relocate to 0 by default */ +#endif + /* Do not move this variable as kexec-tools knows about it. */ . = 0x5c .globl __run_at_load __run_at_load: DEFINE_FIXED_SYMBOL(__run_at_load) - .long 0x72756e30 /* "run0" -- relocate to 0 by default */ + .long RUN_AT_LOAD_DEFAULT #endif . = 0x60 @@ -153,7 +160,7 @@ __secondary_hold: cmpdi 0,r12,0 beq 100b -#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) #ifdef CONFIG_PPC_BOOK3E tovirt(r12,r12) #endif @@ -214,9 +221,9 @@ booting_thread_hwid: */ _GLOBAL(book3e_start_thread) LOAD_REG_IMMEDIATE(r5, MSR_KERNEL) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 11f /* If the thread id is invalid, just exit. */ b 13f @@ -241,9 +248,9 @@ _GLOBAL(book3e_start_thread) * r3 = the thread physical id */ _GLOBAL(book3e_stop_thread) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 10f /* If the thread id is invalid, just exit. */ b 13f diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fb133a163263..c032fe8c2d26 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -73,6 +73,9 @@ #define RPN_PATTERN 0x00f0 #endif +#define PAGE_SHIFT_512K 19 +#define PAGE_SHIFT_8M 23 + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -322,20 +325,28 @@ SystemCall: #endif InstructionTLBMiss: -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 +#ifdef CONFIG_PPC_8xx_PERF_EVENT + lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha + lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, 1 + stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) +#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mfcr r3 +#endif +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) IS_KERNEL(r11, r10) #endif mfspr r11, SPRN_M_TW /* Get level 1 table */ @@ -343,7 +354,6 @@ InstructionTLBMiss: BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mtcr r3 #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -351,14 +361,25 @@ InstructionTLBMiss: /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 +#ifdef CONFIG_HUGETLB_PAGE + mtcr r11 + bt- 28, 10f /* bit 28 = Large page (8M) */ + bt- 29, 20f /* bit 29 = Large page (8M or 512k) */ +#endif rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - +4: +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) + mtcr r3 +#endif /* Insert the APG into the TWC from the Linux PTE. */ rlwimi r11, r10, 0, 25, 26 /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 1, MI_SPS16K +#endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 @@ -371,20 +392,55 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */ +#else rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ +#endif MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi +#ifdef CONFIG_HUGETLB_PAGE +10: /* 8M pages */ +#ifdef CONFIG_PPC_16K_PAGES + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 +#else + /* Level 2 base */ + rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b + +20: /* 512k pages */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b +#endif + . = 0x1200 DataStoreTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r3 EXCEPTION_PROLOG_0 +#ifdef CONFIG_PPC_8xx_PERF_EVENT + lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha + lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, 1 + stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) +#endif mfcr r3 /* If we are faulting a kernel address, we have to use the @@ -407,7 +463,6 @@ _ENTRY(DTLBMiss_jmp) #endif blt cr7, DTLBMissLinear 3: - mtcr r3 mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ @@ -418,8 +473,15 @@ _ENTRY(DTLBMiss_jmp) */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 +#ifdef CONFIG_HUGETLB_PAGE + mtcr r11 + bt- 28, 10f /* bit 28 = Large page (8M) */ + bt- 29, 20f /* bit 29 = Large page (8M or 512k) */ +#endif rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ +4: + mtcr r3 /* Insert the Guarded flag and APG into the TWC from the Linux PTE. * It is bit 26-27 of both the Linux PTE and the TWC (at least @@ -434,6 +496,11 @@ _ENTRY(DTLBMiss_jmp) rlwimi r11, r10, 32-5, 30, 30 MTSPR_CPU6(SPRN_MD_TWC, r11, r3) + /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29) + * In 16k pages mode, SPS is always 1 */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 1, MD_SPS16K +#endif /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: * Clear _PAGE_PRESENT and load that which will @@ -455,7 +522,11 @@ _ENTRY(DTLBMiss_jmp) * of the MMU. */ li r11, RPN_PATTERN +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ +#else rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ +#endif rlwimi r10, r11, 0, 20, 20 /* clear 20 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ @@ -465,6 +536,30 @@ _ENTRY(DTLBMiss_jmp) EXCEPTION_EPILOG_0 rfi +#ifdef CONFIG_HUGETLB_PAGE +10: /* 8M pages */ + /* Extract level 2 index */ +#ifdef CONFIG_PPC_16K_PAGES + rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 +#else + /* Level 2 base */ + rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b + +20: /* 512k pages */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b +#endif /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction @@ -478,6 +573,7 @@ InstructionTLBError: andis. r10,r5,0x4000 beq+ 1f tlbie r4 +itlbie: /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ 1: EXC_XFER_LITE(0x400, handle_page_fault) @@ -502,6 +598,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ andis. r10,r5,0x4000 beq+ 1f tlbie r4 +dtlbie: 1: li r10,RPN_PATTERN mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ @@ -519,8 +616,43 @@ DARFixed:/* Return from dcbx instruction bug workaround */ * support of breakpoints and such. Someday I will get around to * using them. */ - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + . = 0x1c00 +DataBreakpoint: + EXCEPTION_PROLOG_0 + mfcr r10 + mfspr r11, SPRN_SRR0 + cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l + cmplwi cr7, r11, (itlbie - PAGE_OFFSET)@l + beq- cr0, 11f + beq- cr7, 11f + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 + addi r3,r1,STACK_FRAME_OVERHEAD + mfspr r4,SPRN_BAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + EXC_XFER_EE(0x1c00, do_break) +11: + mtcr r10 + EXCEPTION_EPILOG_0 + rfi + +#ifdef CONFIG_PPC_8xx_PERF_EVENT + . = 0x1d00 +InstructionBreakpoint: + EXCEPTION_PROLOG_0 + lis r10, (instruction_counter - PAGE_OFFSET)@ha + lwz r11, (instruction_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, -1 + stw r11, (instruction_counter - PAGE_OFFSET)@l(r10) + lis r10, 0xffff + ori r10, r10, 0x01 + mtspr SPRN_COUNTA, r10 + EXCEPTION_EPILOG_0 + rfi +#else EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) +#endif EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) @@ -586,6 +718,9 @@ _ENTRY(FixupDAR_cmp) /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + mtcr r11 + bt 28,200f /* bit 28 = Large page (8M) */ + bt 29,202f /* bit 29 = Large page (8M or 512K) */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -611,6 +746,27 @@ _ENTRY(FixupDAR_cmp) 141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ + /* concat physical page address(r11) and page offset(r10) */ +200: +#ifdef CONFIG_PPC_16K_PAGES + rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 + rlwimi r11, r10, 32 - (PAGE_SHIFT_8M - 2), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 +#else + rlwinm r11, r10, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 + b 201b + +202: + rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + rlwimi r11, r10, 32 - (PAGE_SHIFT_512K - 2), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31 + b 201b + 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -870,6 +1026,14 @@ initial_mmu: lis r8, IDC_ENABLE@h mtspr SPRN_DC_CST, r8 #endif + /* Disable debug mode entry on breakpoints */ + mfspr r8, SPRN_DER +#ifdef CONFIG_PPC_8xx_PERF_EVENT + rlwinm r8, r8, 0, ~0xc +#else + rlwinm r8, r8, 0, ~0x8 +#endif + mtspr SPRN_DER, r8 blr @@ -903,3 +1067,16 @@ cpu6_errata_word: .space 16 #endif +#ifdef CONFIG_PPC_8xx_PERF_EVENT + .globl itlb_miss_counter +itlb_miss_counter: + .space 4 + + .globl dtlb_miss_counter +dtlb_miss_counter: + .space 4 + + .globl instruction_counter +instruction_counter: + .space 4 +#endif diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 03d089b3ed72..146eaa9b350e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -33,7 +33,7 @@ #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/sstep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* * Stores the breakpoints currently in use on each breakpoint address @@ -211,9 +211,11 @@ int hw_breakpoint_handler(struct die_args *args) int rc = NOTIFY_STOP; struct perf_event *bp; struct pt_regs *regs = args->regs; +#ifndef CONFIG_PPC_8xx int stepped = 1; - struct arch_hw_breakpoint *info; unsigned int instr; +#endif + struct arch_hw_breakpoint *info; unsigned long dar = regs->dar; /* Disable breakpoints during exception handling */ @@ -255,6 +257,7 @@ int hw_breakpoint_handler(struct die_args *args) (dar - bp->attr.bp_addr < bp->attr.bp_len))) info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; +#ifndef CONFIG_PPC_8xx /* Do not emulate user-space instructions, instead single-step them */ if (user_mode(regs)) { current->thread.last_hit_ubp = bp; @@ -278,6 +281,7 @@ int hw_breakpoint_handler(struct die_args *args) perf_event_disable_inatomic(bp); goto out; } +#endif /* * As a policy, the callback is invoked in a 'trigger-after-execute' * fashion diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 72dac0b58061..5f61cc0349c0 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -40,9 +40,7 @@ #define _WORC GPR11 #define _PTCR GPR12 -#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ - PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ - PSSCR_MTL_MASK +#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 .text @@ -205,7 +203,7 @@ pnv_enter_arch207_idle_mode: stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f - IDLE_STATE_ENTER_SEQ(PPC_NAP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) /* No return */ 2: /* Sleep or winkle */ @@ -239,7 +237,7 @@ pnv_fastsleep_workaround_at_entry: common_enter: /* common code for all the threads entering sleep or winkle */ bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) fastsleep_workaround_at_entry: ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -250,7 +248,7 @@ fastsleep_workaround_at_entry: /* Fast sleep workaround */ li r3,1 li r4,1 - bl opal_rm_config_cpu_idle_state + bl opal_config_cpu_idle_state /* Clear Lock bit */ li r0,0 @@ -261,10 +259,10 @@ fastsleep_workaround_at_entry: enter_winkle: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* - * r3 - requested stop state + * r3 - PSSCR value corresponding to the requested stop state. */ power_enter_stop: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -274,13 +272,22 @@ power_enter_stop: stb r4,HSTATE_HWTHREAD_STATE(r13) #endif /* + * Check if we are executing the lite variant with ESL=EC=0 + */ + andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED + clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ + bne 1f + IDLE_STATE_ENTER_SEQ(PPC_STOP) + li r3,0 /* Since we didn't lose state, return 0 */ + b pnv_wakeup_noloss +/* * Check if the requested state is a deep idle state. */ - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) +1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 bge 2f - IDLE_STATE_ENTER_SEQ(PPC_STOP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) 2: /* * Entering deep idle state. @@ -302,7 +309,7 @@ lwarx_loop_stop: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ(PPC_STOP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ @@ -353,16 +360,17 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; - /* - * r3 - requested stop state + * r3 - The PSSCR value corresponding to the stop state. + * r4 - The PSSCR mask corrresonding to the stop state. */ _GLOBAL(power9_idle_stop) - LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) - or r4,r4,r3 - mtspr SPRN_PSSCR, r4 - li r4, 1 + mfspr r5,SPRN_PSSCR + andc r5,r5,r4 + or r3,r3,r5 + mtspr SPRN_PSSCR,r3 LOAD_REG_ADDR(r5,power_enter_stop) + li r4,1 b pnv_powersave_common /* No return */ /* @@ -544,7 +552,7 @@ timebase_resync: */ ble cr3,clear_lock /* Time base re-sync */ - bl opal_rm_resync_timebase; + bl opal_resync_timebase; /* * If waking up from sleep, per core state is not lost, skip to * clear_lock. @@ -633,7 +641,7 @@ hypervisor_state_restored: fastsleep_workaround_at_exit: li r3,1 li r4,0 - bl opal_rm_config_cpu_idle_state + bl opal_config_cpu_idle_state b timebase_resync /* diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c new file mode 100644 index 000000000000..5ea42c937ca9 --- /dev/null +++ b/arch/powerpc/kernel/ima_kexec.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2016 IBM Corporation + * + * Authors: + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/of.h> +#include <linux/memblock.h> +#include <linux/libfdt.h> + +static int get_addr_size_cells(int *addr_cells, int *size_cells) +{ + struct device_node *root; + + root = of_find_node_by_path("/"); + if (!root) + return -EINVAL; + + *addr_cells = of_n_addr_cells(root); + *size_cells = of_n_size_cells(root); + + of_node_put(root); + + return 0; +} + +static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr, + size_t *size) +{ + int ret, addr_cells, size_cells; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + if (len < 4 * (addr_cells + size_cells)) + return -ENOENT; + + *addr = of_read_number(prop, addr_cells); + *size = of_read_number(prop + 4 * addr_cells, size_cells); + + return 0; +} + +/** + * ima_get_kexec_buffer - get IMA buffer from the previous kernel + * @addr: On successful return, set to point to the buffer contents. + * @size: On successful return, set to the buffer size. + * + * Return: 0 on success, negative errno on error. + */ +int ima_get_kexec_buffer(void **addr, size_t *size) +{ + int ret, len; + unsigned long tmp_addr; + size_t tmp_size; + const void *prop; + + prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size); + if (ret) + return ret; + + *addr = __va(tmp_addr); + *size = tmp_size; + + return 0; +} + +/** + * ima_free_kexec_buffer - free memory used by the IMA buffer + */ +int ima_free_kexec_buffer(void) +{ + int ret; + unsigned long addr; + size_t size; + struct property *prop; + + prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size); + if (ret) + return ret; + + ret = of_remove_property(of_chosen, prop); + if (ret) + return ret; + + return memblock_free(addr, size); + +} + +/** + * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt + * + * The IMA measurement buffer is of no use to a subsequent kernel, so we always + * remove it from the device tree. + */ +void remove_ima_buffer(void *fdt, int chosen_node) +{ + int ret, len; + unsigned long addr; + size_t size; + const void *prop; + + prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len); + if (!prop) + return; + + ret = do_get_kexec_buffer(prop, len, &addr, &size); + fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer"); + if (ret) + return; + + ret = delete_fdt_mem_rsv(fdt, addr, size); + if (!ret) + pr_debug("Removed old IMA buffer reservation.\n"); +} + +#ifdef CONFIG_IMA_KEXEC +/** + * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer + * + * Architectures should use this function to pass on the IMA buffer + * information to the next kernel. + * + * Return: 0 on success, negative errno on error. + */ +int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, + size_t size) +{ + image->arch.ima_buffer_addr = load_addr; + image->arch.ima_buffer_size = size; + + return 0; +} + +static int write_number(void *p, u64 value, int cells) +{ + if (cells == 1) { + u32 tmp; + + if (value > U32_MAX) + return -EINVAL; + + tmp = cpu_to_be32(value); + memcpy(p, &tmp, sizeof(tmp)); + } else if (cells == 2) { + u64 tmp; + + tmp = cpu_to_be64(value); + memcpy(p, &tmp, sizeof(tmp)); + } else + return -EINVAL; + + return 0; +} + +/** + * setup_ima_buffer - add IMA buffer information to the fdt + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @chosen_node: Offset to the chosen node. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node) +{ + int ret, addr_cells, size_cells, entry_size; + u8 value[16]; + + remove_ima_buffer(fdt, chosen_node); + if (!image->arch.ima_buffer_size) + return 0; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + entry_size = 4 * (addr_cells + size_cells); + + if (entry_size > sizeof(value)) + return -EINVAL; + + ret = write_number(value, image->arch.ima_buffer_addr, addr_cells); + if (ret) + return ret; + + ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size, + size_cells); + if (ret) + return ret; + + ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value, + entry_size); + if (ret < 0) + return -EINVAL; + + ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr, + image->arch.ima_buffer_size); + if (ret) + return -EINVAL; + + pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n", + image->arch.ima_buffer_addr, image->arch.ima_buffer_size); + + return 0; +} +#endif /* CONFIG_IMA_KEXEC */ diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 3963f0b68d52..a1854d1ded8b 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <asm/io.h> #include <asm/pci-bridge.h> +#include <asm/isa-bridge.h> /* * Here comes the ppc64 implementation of the IOMAP diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3c05c311e35e..a018f5cae899 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -55,7 +55,7 @@ #include <linux/of.h> #include <linux/of_irq.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/pgtable.h> #include <asm/irq.h> diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index ae1316106e2b..bb6f8993412e 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -29,6 +29,7 @@ #include <asm/pci-bridge.h> #include <asm/machdep.h> #include <asm/ppc-pci.h> +#include <asm/isa-bridge.h> unsigned long isa_io_base; /* NULL if no ISA bus */ EXPORT_SYMBOL(isa_io_base); @@ -167,6 +168,97 @@ void __init isa_bridge_find_early(struct pci_controller *hose) } /** + * isa_bridge_find_early - Find and map the ISA IO space early before + * main PCI discovery. This is optionally called by + * the arch code when adding PCI PHBs to get early + * access to ISA IO ports + */ +void __init isa_bridge_init_non_pci(struct device_node *np) +{ + const __be32 *ranges, *pbasep = NULL; + int rlen, i, rs; + u32 na, ns, pna; + u64 cbase, pbase, size = 0; + + /* If we already have an ISA bridge, bail off */ + if (isa_bridge_devnode != NULL) + return; + + pna = of_n_addr_cells(np); + if (of_property_read_u32(np, "#address-cells", &na) || + of_property_read_u32(np, "#size-cells", &ns)) { + pr_warn("ISA: Non-PCI bridge %s is missing address format\n", + np->full_name); + return; + } + + /* Check it's a supported address format */ + if (na != 2 || ns != 1) { + pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n", + np->full_name); + return; + } + rs = na + ns + pna; + + /* Grab the ranges property */ + ranges = of_get_property(np, "ranges", &rlen); + if (ranges == NULL || rlen < rs) { + pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n", + np->full_name); + return; + } + + /* Parse it. We are only looking for IO space */ + for (i = 0; (i + rs - 1) < rlen; i += rs) { + if (be32_to_cpup(ranges + i) != 1) + continue; + cbase = be32_to_cpup(ranges + i + 1); + size = of_read_number(ranges + i + na + pna, ns); + pbasep = ranges + i + na; + break; + } + + /* Got something ? */ + if (!size || !pbasep) { + pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n", + np->full_name); + return; + } + + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(size); + if (size > 0x10000) + size = 0x10000; + + /* Map pbase */ + pbase = of_translate_address(np, pbasep); + if (pbase == OF_BAD_ADDR) { + pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n", + np->full_name); + return; + } + + /* We need page alignment */ + if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { + pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n", + np->full_name); + return; + } + + /* Got it */ + isa_bridge_devnode = np; + + /* Set the global ISA io base to indicate we have an ISA bridge + * and map it + */ + isa_io_base = ISA_IO_BASE; + __ioremap_at(pbase, (void *)ISA_IO_BASE, + size, pgprot_val(pgprot_noncached(__pgprot(0)))); + + pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name); +} + +/** * isa_bridge_find_late - Find and map the ISA IO space upon discovery of * a new ISA bridge */ diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c new file mode 100644 index 000000000000..9a42309b091a --- /dev/null +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -0,0 +1,663 @@ +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "kexec_elf: " fmt + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <linux/libfdt.h> +#include <linux/module.h> +#include <linux/of_fdt.h> +#include <linux/slab.h> +#include <linux/types.h> + +#define PURGATORY_STACK_SIZE (16 * 1024) + +#define elf_addr_to_cpu elf64_to_cpu + +#ifndef Elf_Rel +#define Elf_Rel Elf64_Rel +#endif /* Elf_Rel */ + +struct elf_info { + /* + * Where the ELF binary contents are kept. + * Memory managed by the user of the struct. + */ + const char *buffer; + + const struct elfhdr *ehdr; + const struct elf_phdr *proghdrs; + struct elf_shdr *sechdrs; +}; + +static inline bool elf_is_elf_file(const struct elfhdr *ehdr) +{ + return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0; +} + +static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le64_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be64_to_cpu(value); + + return value; +} + +static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le16_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be16_to_cpu(value); + + return value; +} + +static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le32_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be32_to_cpu(value); + + return value; +} + +/** + * elf_is_ehdr_sane - check that it is safe to use the ELF header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len) +{ + if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) { + pr_debug("Bad program header size.\n"); + return false; + } else if (ehdr->e_shnum > 0 && + ehdr->e_shentsize != sizeof(struct elf_shdr)) { + pr_debug("Bad section header size.\n"); + return false; + } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT || + ehdr->e_version != EV_CURRENT) { + pr_debug("Unknown ELF version.\n"); + return false; + } + + if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { + size_t phdr_size; + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; + + /* Sanity check the program header table location. */ + if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) { + pr_debug("Program headers at invalid location.\n"); + return false; + } else if (ehdr->e_phoff + phdr_size > buf_len) { + pr_debug("Program headers truncated.\n"); + return false; + } + } + + if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { + size_t shdr_size; + + /* + * e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum; + + /* Sanity check the section header table location. */ + if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) { + pr_debug("Section headers at invalid location.\n"); + return false; + } else if (ehdr->e_shoff + shdr_size > buf_len) { + pr_debug("Section headers truncated.\n"); + return false; + } + } + + return true; +} + +static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr) +{ + struct elfhdr *buf_ehdr; + + if (len < sizeof(*buf_ehdr)) { + pr_debug("Buffer is too small to hold ELF header.\n"); + return -ENOEXEC; + } + + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident)); + if (!elf_is_elf_file(ehdr)) { + pr_debug("No ELF header magic.\n"); + return -ENOEXEC; + } + + if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) { + pr_debug("Not a supported ELF class.\n"); + return -ENOEXEC; + } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + pr_debug("Not a supported ELF data format.\n"); + return -ENOEXEC; + } + + buf_ehdr = (struct elfhdr *) buf; + if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) { + pr_debug("Bad ELF header size.\n"); + return -ENOEXEC; + } + + ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type); + ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine); + ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version); + ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry); + ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff); + ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff); + ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags); + ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize); + ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum); + ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize); + ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum); + ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx); + + return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_is_phdr_sane - check that it is safe to use the program header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len) +{ + + if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) { + pr_debug("ELF segment location wraps around.\n"); + return false; + } else if (phdr->p_offset + phdr->p_filesz > buf_len) { + pr_debug("ELF segment not in file.\n"); + return false; + } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) { + pr_debug("ELF segment address wraps around.\n"); + return false; + } + + return true; +} + +static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info, + int idx) +{ + /* Override the const in proghdrs, we are the ones doing the loading. */ + struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx]; + const char *pbuf; + struct elf_phdr *buf_phdr; + + pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr)); + buf_phdr = (struct elf_phdr *) pbuf; + + phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type); + phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset); + phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr); + phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr); + phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags); + + /* + * The following fields have a type equivalent to Elf_Addr + * both in 32 bit and 64 bit ELF. + */ + phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz); + phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz); + phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align); + + return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_read_phdrs - read the program headers from the buffer + * + * This function assumes that the program header table was checked for sanity. + * Use elf_is_ehdr_sane() if it wasn't. + */ +static int elf_read_phdrs(const char *buf, size_t len, + struct elf_info *elf_info) +{ + size_t phdr_size, i; + const struct elfhdr *ehdr = elf_info->ehdr; + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; + + elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL); + if (!elf_info->proghdrs) + return -ENOMEM; + + for (i = 0; i < ehdr->e_phnum; i++) { + int ret; + + ret = elf_read_phdr(buf, len, elf_info, i); + if (ret) { + kfree(elf_info->proghdrs); + elf_info->proghdrs = NULL; + return ret; + } + } + + return 0; +} + +/** + * elf_is_shdr_sane - check that it is safe to use the section header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len) +{ + bool size_ok; + + /* SHT_NULL headers have undefined values, so we can't check them. */ + if (shdr->sh_type == SHT_NULL) + return true; + + /* Now verify sh_entsize */ + switch (shdr->sh_type) { + case SHT_SYMTAB: + size_ok = shdr->sh_entsize == sizeof(Elf_Sym); + break; + case SHT_RELA: + size_ok = shdr->sh_entsize == sizeof(Elf_Rela); + break; + case SHT_DYNAMIC: + size_ok = shdr->sh_entsize == sizeof(Elf_Dyn); + break; + case SHT_REL: + size_ok = shdr->sh_entsize == sizeof(Elf_Rel); + break; + case SHT_NOTE: + case SHT_PROGBITS: + case SHT_HASH: + case SHT_NOBITS: + default: + /* + * This is a section whose entsize requirements + * I don't care about. If I don't know about + * the section I can't care about it's entsize + * requirements. + */ + size_ok = true; + break; + } + + if (!size_ok) { + pr_debug("ELF section with wrong entry size.\n"); + return false; + } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) { + pr_debug("ELF section address wraps around.\n"); + return false; + } + + if (shdr->sh_type != SHT_NOBITS) { + if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) { + pr_debug("ELF section location wraps around.\n"); + return false; + } else if (shdr->sh_offset + shdr->sh_size > buf_len) { + pr_debug("ELF section not in file.\n"); + return false; + } + } + + return true; +} + +static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info, + int idx) +{ + struct elf_shdr *shdr = &elf_info->sechdrs[idx]; + const struct elfhdr *ehdr = elf_info->ehdr; + const char *sbuf; + struct elf_shdr *buf_shdr; + + sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr); + buf_shdr = (struct elf_shdr *) sbuf; + + shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name); + shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type); + shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr); + shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset); + shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link); + shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info); + + /* + * The following fields have a type equivalent to Elf_Addr + * both in 32 bit and 64 bit ELF. + */ + shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags); + shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size); + shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign); + shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize); + + return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_read_shdrs - read the section headers from the buffer + * + * This function assumes that the section header table was checked for sanity. + * Use elf_is_ehdr_sane() if it wasn't. + */ +static int elf_read_shdrs(const char *buf, size_t len, + struct elf_info *elf_info) +{ + size_t shdr_size, i; + + /* + * e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum; + + elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL); + if (!elf_info->sechdrs) + return -ENOMEM; + + for (i = 0; i < elf_info->ehdr->e_shnum; i++) { + int ret; + + ret = elf_read_shdr(buf, len, elf_info, i); + if (ret) { + kfree(elf_info->sechdrs); + elf_info->sechdrs = NULL; + return ret; + } + } + + return 0; +} + +/** + * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info + * @buf: Buffer to read ELF file from. + * @len: Size of @buf. + * @ehdr: Pointer to existing struct which will be populated. + * @elf_info: Pointer to existing struct which will be populated. + * + * This function allows reading ELF files with different byte order than + * the kernel, byte-swapping the fields as needed. + * + * Return: + * On success returns 0, and the caller should call elf_free_info(elf_info) to + * free the memory allocated for the section and program headers. + */ +int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr, + struct elf_info *elf_info) +{ + int ret; + + ret = elf_read_ehdr(buf, len, ehdr); + if (ret) + return ret; + + elf_info->buffer = buf; + elf_info->ehdr = ehdr; + if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { + ret = elf_read_phdrs(buf, len, elf_info); + if (ret) + return ret; + } + if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { + ret = elf_read_shdrs(buf, len, elf_info); + if (ret) { + kfree(elf_info->proghdrs); + return ret; + } + } + + return 0; +} + +/** + * elf_free_info - free memory allocated by elf_read_from_buffer + */ +void elf_free_info(struct elf_info *elf_info) +{ + kfree(elf_info->proghdrs); + kfree(elf_info->sechdrs); + memset(elf_info, 0, sizeof(*elf_info)); +} +/** + * build_elf_exec_info - read ELF executable and check that we can use it + */ +static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr, + struct elf_info *elf_info) +{ + int i; + int ret; + + ret = elf_read_from_buffer(buf, len, ehdr, elf_info); + if (ret) + return ret; + + /* Big endian vmlinux has type ET_DYN. */ + if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) { + pr_err("Not an ELF executable.\n"); + goto error; + } else if (!elf_info->proghdrs) { + pr_err("No ELF program header.\n"); + goto error; + } + + for (i = 0; i < ehdr->e_phnum; i++) { + /* + * Kexec does not support loading interpreters. + * In addition this check keeps us from attempting + * to kexec ordinay executables. + */ + if (elf_info->proghdrs[i].p_type == PT_INTERP) { + pr_err("Requires an ELF interpreter.\n"); + goto error; + } + } + + return 0; +error: + elf_free_info(elf_info); + return -ENOEXEC; +} + +static int elf64_probe(const char *buf, unsigned long len) +{ + struct elfhdr ehdr; + struct elf_info elf_info; + int ret; + + ret = build_elf_exec_info(buf, len, &ehdr, &elf_info); + if (ret) + return ret; + + elf_free_info(&elf_info); + + return elf_check_arch(&ehdr) ? 0 : -ENOEXEC; +} + +/** + * elf_exec_load - load ELF executable image + * @lowest_load_addr: On return, will be the address where the first PT_LOAD + * section will be loaded in memory. + * + * Return: + * 0 on success, negative value on failure. + */ +static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr, + struct elf_info *elf_info, + unsigned long *lowest_load_addr) +{ + unsigned long base = 0, lowest_addr = UINT_MAX; + int ret; + size_t i; + struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size, + .top_down = false }; + + /* Read in the PT_LOAD segments. */ + for (i = 0; i < ehdr->e_phnum; i++) { + unsigned long load_addr; + size_t size; + const struct elf_phdr *phdr; + + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.memsz = phdr->p_memsz; + kbuf.buf_align = phdr->p_align; + kbuf.buf_min = phdr->p_paddr + base; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + load_addr = kbuf.mem; + + if (load_addr < lowest_addr) + lowest_addr = load_addr; + } + + /* Update entry point to reflect new load address. */ + ehdr->e_entry += base; + + *lowest_load_addr = lowest_addr; + ret = 0; + out: + return ret; +} + +static void *elf64_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned int fdt_size; + unsigned long kernel_load_addr, purgatory_load_addr; + unsigned long initrd_load_addr = 0, fdt_load_addr; + void *fdt; + const void *slave_code; + struct elfhdr ehdr; + struct elf_info elf_info; + struct kexec_buf kbuf = { .image = image, .buf_min = 0, + .buf_max = ppc64_rma_size }; + + ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + goto out; + + ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr); + if (ret) + goto out; + + pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr); + + ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true, + &purgatory_load_addr); + if (ret) { + pr_err("Loading purgatory failed.\n"); + goto out; + } + + pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); + + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_load_addr = kbuf.mem; + + pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); + } + + fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt = kmalloc(fdt_size, GFP_KERNEL); + if (!fdt) { + pr_err("Not enough memory for the device tree.\n"); + ret = -ENOMEM; + goto out; + } + ret = fdt_open_into(initial_boot_params, fdt, fdt_size); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + if (ret) + goto out; + + fdt_pack(fdt); + + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_size; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + fdt_load_addr = kbuf.mem; + + pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); + + slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); + if (ret) + pr_err("Error setting up the purgatory.\n"); + +out: + elf_free_info(&elf_info); + + /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ + return ret ? ERR_PTR(ret) : fdt; +} + +struct kexec_file_ops kexec_elf64_ops = { + .probe = elf64_probe, + .load = elf64_load, +}; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e785cc9e1ecd..735ff3d3f77d 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -35,7 +35,7 @@ #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -140,13 +140,16 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, regs->link = (unsigned long)kretprobe_trampoline; } -static int __kprobes kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; unsigned int *addr = (unsigned int *)regs->nip; struct kprobe_ctlblk *kcb; + if (user_mode(regs)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing @@ -359,12 +362,12 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_post_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (!cur) + if (!cur || user_mode(regs)) return 0; /* make sure we got here for instruction we have a kprobe on */ @@ -449,7 +452,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } @@ -470,25 +473,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode(args->regs)) - return ret; - - switch (val) { - case DIE_BPT: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_SSTEP: - if (post_kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + return NOTIFY_DONE; } unsigned long arch_deref_entry_point(void *entry) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index bc525ea0dc09..0694d20f85b6 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -233,7 +233,8 @@ static int __init add_legacy_isa_port(struct device_node *np, * * Note: Don't even try on P8 lpc, we know it's not directly mapped */ - if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) { + if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") || + of_get_property(isa_brg, "ranges", NULL)) { taddr = of_translate_address(np, reg); if (taddr == OF_BAD_ADDR) taddr = 0; diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index a205fa3d9bf3..5c12e21d0d1a 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -310,7 +310,7 @@ void default_machine_kexec(struct kimage *image) if (!kdump_in_progress()) kexec_prepare_cpus(); - pr_debug("kexec: Starting switchover sequence.\n"); + printk("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. * We setup preempt_count to avoid using VMX in memcpy. diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c new file mode 100644 index 000000000000..992c0d258e5d --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -0,0 +1,347 @@ +/* + * ppc64 code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/memblock.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> +#include <asm/ima.h> + +#define SLAVE_CODE_SIZE 256 + +static struct kexec_file_ops *kexec_file_loaders[] = { + &kexec_elf64_ops, +}; + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + /* We don't support crash kernels yet. */ + if (image->type == KEXEC_TYPE_CRASH) + return -ENOTSUPP; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image->image_loader_data); +} + +/** + * arch_kexec_walk_mem - call func(data) for each unreserved memory block + * @kbuf: Context info for the search. Also passed to @func. + * @func: Function to call for each memory block. + * + * This function is used by kexec_add_buffer and kexec_locate_mem_hole + * to find unreserved memory to load kexec segments into. + * + * Return: The memory walk will stop when func returns a non-zero value + * and that value will be returned. If all free regions are visited without + * func returning non-zero, then zero will be returned. + */ +int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) +{ + int ret = 0; + u64 i; + phys_addr_t mstart, mend; + + if (kbuf->top_down) { + for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, + &mstart, &mend, NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + ret = func(mstart, mend - 1, kbuf); + if (ret) + break; + } + } else { + for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, + NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + ret = func(mstart, mend - 1, kbuf); + if (ret) + break; + } + } + + return ret; +} + +/** + * setup_purgatory - initialize the purgatory's global variables + * @image: kexec image. + * @slave_code: Slave code for the purgatory. + * @fdt: Flattened device tree for the next kernel. + * @kernel_load_addr: Address where the kernel is loaded. + * @fdt_load_addr: Address where the flattened device tree is loaded. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_purgatory(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr) +{ + unsigned int *slave_code_buf, master_entry; + int ret; + + slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL); + if (!slave_code_buf) + return -ENOMEM; + + /* Get the slave code from the new kernel and put it in purgatory. */ + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + true); + if (ret) { + kfree(slave_code_buf); + return ret; + } + + master_entry = slave_code_buf[0]; + memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE); + slave_code_buf[0] = master_entry; + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + false); + kfree(slave_code_buf); + + ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr, + sizeof(kernel_load_addr), false); + if (ret) + return ret; + ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr, + sizeof(fdt_load_addr), false); + if (ret) + return ret; + + return 0; +} + +/** + * delete_fdt_mem_rsv - delete memory reservation with given address and size + * + * Return: 0 on success, or negative errno on error. + */ +int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) +{ + int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); + + for (i = 0; i < num_rsvs; i++) { + uint64_t rsv_start, rsv_size; + + ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); + if (ret) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + + if (rsv_start == start && rsv_size == size) { + ret = fdt_del_mem_rsv(fdt, i); + if (ret) { + pr_err("Error deleting device tree reservation.\n"); + return -EINVAL; + } + + return 0; + } + } + + return -ENOENT; +} + +/* + * setup_new_fdt - modify /chosen and memory reservation for the next kernel + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len: Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline) +{ + int ret, chosen_node; + const void *prop; + + /* Remove memory reservation for the current device tree. */ + ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params), + fdt_totalsize(initial_boot_params)); + if (ret == 0) + pr_debug("Removed old device tree reservation.\n"); + else if (ret != -ENOENT) + return ret; + + chosen_node = fdt_path_offset(fdt, "/chosen"); + if (chosen_node == -FDT_ERR_NOTFOUND) { + chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), + "chosen"); + if (chosen_node < 0) { + pr_err("Error creating /chosen.\n"); + return -EINVAL; + } + } else if (chosen_node < 0) { + pr_err("Malformed device tree: error reading /chosen.\n"); + return -EINVAL; + } + + /* Did we boot using an initrd? */ + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); + if (prop) { + uint64_t tmp_start, tmp_end, tmp_size; + + tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); + + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); + if (!prop) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); + + /* + * kexec reserves exact initrd size, while firmware may + * reserve a multiple of PAGE_SIZE, so check for both. + */ + tmp_size = tmp_end - tmp_start; + ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size); + if (ret == -ENOENT) + ret = delete_fdt_mem_rsv(fdt, tmp_start, + round_up(tmp_size, PAGE_SIZE)); + if (ret == 0) + pr_debug("Removed old initrd reservation.\n"); + else if (ret != -ENOENT) + return ret; + + /* If there's no new initrd, delete the old initrd's info. */ + if (initrd_len == 0) { + ret = fdt_delprop(fdt, chosen_node, + "linux,initrd-start"); + if (ret) { + pr_err("Error deleting linux,initrd-start.\n"); + return -EINVAL; + } + + ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end"); + if (ret) { + pr_err("Error deleting linux,initrd-end.\n"); + return -EINVAL; + } + } + } + + if (initrd_len) { + ret = fdt_setprop_u64(fdt, chosen_node, + "linux,initrd-start", + initrd_load_addr); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + /* initrd-end is the first address after the initrd image. */ + ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", + initrd_load_addr + initrd_len); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); + if (ret) { + pr_err("Error reserving initrd memory: %s\n", + fdt_strerror(ret)); + return -EINVAL; + } + } + + if (cmdline != NULL) { + ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + } else { + ret = fdt_delprop(fdt, chosen_node, "bootargs"); + if (ret && ret != -FDT_ERR_NOTFOUND) { + pr_err("Error deleting bootargs.\n"); + return -EINVAL; + } + } + + ret = setup_ima_buffer(image, fdt, chosen_node); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return ret; + } + + ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + return 0; +} diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 5e7ece0fda9f..c6923ff45131 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -72,7 +72,6 @@ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr) { - uint64_t srr1; int index = __this_cpu_inc_return(mce_nest_count) - 1; struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); @@ -99,8 +98,6 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; mce->severity = MCE_SEV_ERROR_SYNC; - srr1 = regs->msr; - /* * Populate the mce error_type and type-specific error_type. */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 93cf7a5846a6..84db14e435f5 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -296,7 +296,7 @@ _GLOBAL(flush_instruction_cache) lis r3, KERNELBASE@h iccci 0,r3 #endif -#elif CONFIG_FSL_BOOKE +#elif defined(CONFIG_FSL_BOOKE) BEGIN_FTR_SECTION mfspr r3,SPRN_L1CSR0 ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC @@ -614,7 +614,7 @@ _GLOBAL(start_secondary_resume) _GLOBAL(__main) blr -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* * Must be relocatable PIC code callable as a C function. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4f178671f230..ae179cb1bb3c 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -80,12 +80,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * each other. */ ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -96,12 +96,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Now invalidate the instruction cache */ - lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ + lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 - lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ + lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -128,12 +128,12 @@ _GLOBAL(flush_dcache_range) * Different systems have different cache line sizes */ ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -156,12 +156,12 @@ EXPORT_SYMBOL(flush_dcache_range) */ _GLOBAL(flush_dcache_phys_range) ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mfmsr r5 /* Disable MMU Data Relocation */ @@ -184,12 +184,12 @@ _GLOBAL(flush_dcache_phys_range) _GLOBAL(flush_inval_dcache_range) ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ sync @@ -225,8 +225,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Flush the dcache */ ld r7,PPC64_CACHES@toc(r2) clrrdi r3,r3,PAGE_SHIFT /* Page align */ - lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ - lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ + lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */ + lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */ mr r6,r3 mtctr r4 0: dcbst 0,r6 @@ -236,8 +236,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Now invalidate the icache */ - lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ - lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ + lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */ + lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */ mtctr r4 1: icbi 0,r3 add r3,r3,r5 @@ -478,7 +478,7 @@ _GLOBAL(kexec_wait) addi r5,r5,kexec_flag-1b 99: HMT_LOW -#ifdef CONFIG_KEXEC /* use no memory without kexec */ +#ifdef CONFIG_KEXEC_CORE /* use no memory without kexec */ lwz r4,0(r5) cmpwi 0,r4,0 beq 99b @@ -503,7 +503,7 @@ kexec_flag: .long 0 -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_PPC_BOOK3E /* * BOOK3E has no real MMU mode, so we have to setup the initial TLB @@ -716,4 +716,4 @@ _GLOBAL(kexec_sequence) mtlr 4 li r5,0 blr /* image->start(physid, image->start, 0); */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 30b89d5cbb03..3f7ba0f5bf29 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -22,7 +22,7 @@ #include <linux/vmalloc.h> #include <linux/bug.h> #include <asm/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/firmware.h> #include <linux/sort.h> #include <asm/setup.h> diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 183368e008cf..0b0f89685b67 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -286,14 +286,6 @@ static void dedotify_versions(struct modversion_info *vers, for (end = (void *)vers + size; vers < end; vers++) if (vers->name[0] == '.') { memmove(vers->name, vers->name+1, strlen(vers->name)); -#ifdef ARCH_RELOCATES_KCRCTAB - /* The TOC symbol has no CRC computed. To avoid CRC - * check failing, we must force it to the expected - * value (see CRC check in module.c). - */ - if (!strcmp(vers->name, "TOC.")) - vers->crc = -(unsigned long)reloc_start; -#endif } } @@ -652,6 +644,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *location = value - (unsigned long)location; break; + case R_PPC64_REL32: + /* 32 bits relative (used by relative exception tables) */ + *(u32 *)location = value - (unsigned long)location; + break; + case R_PPC64_TOCSAVE: /* * Marker reloc indicates we don't have to save r2. diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 34d2c595de23..d5e2b8309939 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -28,7 +28,7 @@ #include <linux/pagemap.h> #include <linux/pstore.h> #include <linux/zlib.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/nvram.h> #include <asm/rtas.h> #include <asm/prom.h> diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index b60a67d92ebd..34aeac54f120 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -114,11 +114,6 @@ static struct platform_driver of_pci_phb_driver = { }, }; -static __init int of_pci_phb_init(void) -{ - return platform_driver_register(&of_pci_phb_driver); -} - -device_initcall(of_pci_phb_init); +builtin_platform_driver(of_pci_phb_driver); #endif /* CONFIG_PPC_OF_PLATFORM_PCI */ diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 678f87a63645..41c86c6b6e4d 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -24,7 +24,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> #include <asm/byteorder.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/machdep.h> #undef DEBUG diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index c30612aad68e..56548bf6231f 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -24,7 +24,7 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/prom.h> #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 49a680d5ae37..4379a079b3c2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -730,6 +730,28 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) mtspr(SPRN_DABRX, dabrx); return 0; } +#elif defined(CONFIG_PPC_8xx) +static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) +{ + unsigned long addr = dabr & ~HW_BRK_TYPE_DABR; + unsigned long lctrl1 = 0x90000000; /* compare type: equal on E & F */ + unsigned long lctrl2 = 0x8e000002; /* watchpoint 1 on cmp E | F */ + + if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ) + lctrl1 |= 0xa0000; + else if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE) + lctrl1 |= 0xf0000; + else if ((dabr & HW_BRK_TYPE_RDWR) == 0) + lctrl2 = 0; + + mtspr(SPRN_LCTRL2, 0); + mtspr(SPRN_CMPE, addr); + mtspr(SPRN_CMPF, addr + 4); + mtspr(SPRN_LCTRL1, lctrl1); + mtspr(SPRN_LCTRL2, lctrl2); + + return 0; +} #else static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) { @@ -1051,14 +1073,6 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } - - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - /* Conditionally save Load Monitor registers, if enabled */ - if (t->fscr & FSCR_LM) { - t->lmrr = mfspr(SPRN_LMRR); - t->lmser = mfspr(SPRN_LMSER); - } - } #endif } @@ -1094,16 +1108,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } - - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - /* Conditionally restore Load Monitor registers, if enabled */ - if (new_thread->fscr & FSCR_LM) { - if (old_thread->lmrr != new_thread->lmrr) - mtspr(SPRN_LMRR, new_thread->lmrr); - if (old_thread->lmser != new_thread->lmser) - mtspr(SPRN_LMSER, new_thread->lmser); - } - } #endif } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b0245bed6f54..f5d399e46193 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -156,21 +156,22 @@ static struct ibm_pa_feature { unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0}, - {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0}, - {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1}, - {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, + { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU }, + { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU }, + { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, + { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, + { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, + { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, + { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, + { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, + .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, /* * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), * we don't want to turn on TM here, so we use the *_COMP versions * which are 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, - PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, - {0, MMU_FTR_TYPE_RADIX, 0, 0, 40, 0, 0}, + { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, + .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -427,7 +428,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, tce_alloc_end = *lprop; #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); if (lprop) crashk_res.start = *lprop; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 88ac964f4858..d3db1bcc9c1e 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -461,14 +461,14 @@ static int __init prom_next_node(phandle *nodep) } } -static int inline prom_getprop(phandle node, const char *pname, +static inline int prom_getprop(phandle node, const char *pname, void *value, size_t valuelen) { return call_prom("getprop", 4, 1, node, ADDR(pname), (u32)(unsigned long) value, (u32) valuelen); } -static int inline prom_getproplen(phandle node, const char *pname) +static inline int prom_getproplen(phandle node, const char *pname) { return call_prom("getproplen", 2, 1, node, ADDR(pname)); } @@ -635,13 +635,7 @@ static void __init early_cmdline_parse(void) * * See prom.h for the definition of the bits specified in the * architecture vector. - * - * Because the description vector contains a mix of byte and word - * values, we declare it as an unsigned char array, and use this - * macro to put word values in. */ -#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \ - ((x) >> 8) & 0xff, (x) & 0xff /* Firmware expects the value to be n - 1, where n is the # of vectors */ #define NUM_VECTORS(n) ((n) - 1) @@ -652,92 +646,221 @@ static void __init early_cmdline_parse(void) */ #define VECTOR_LENGTH(n) (1 + (n) - 2) -unsigned char ibm_architecture_vec[] = { - W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ - W(0xffff0000), W(0x003e0000), /* POWER6 */ - W(0xffff0000), W(0x003f0000), /* POWER7 */ - W(0xffff0000), W(0x004b0000), /* POWER8E */ - W(0xffff0000), W(0x004c0000), /* POWER8NVL */ - W(0xffff0000), W(0x004d0000), /* POWER8 */ - W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ - W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ - W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ - W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ - NUM_VECTORS(6), /* 6 option vectors */ - - /* option vector 1: processor architectures supported */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't ignore, don't halt */ - OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | - OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, +struct option_vector1 { + u8 byte1; + u8 arch_versions; + u8 arch_versions3; +} __packed; + +struct option_vector2 { + u8 byte1; + __be16 reserved; + __be32 real_base; + __be32 real_size; + __be32 virt_base; + __be32 virt_size; + __be32 load_base; + __be32 min_rma; + __be32 min_load; + u8 min_rma_percent; + u8 max_pft_size; +} __packed; + +struct option_vector3 { + u8 byte1; + u8 byte2; +} __packed; + +struct option_vector4 { + u8 byte1; + u8 min_vp_cap; +} __packed; + +struct option_vector5 { + u8 byte1; + u8 byte2; + u8 byte3; + u8 cmo; + u8 associativity; + u8 bin_opts; + u8 micro_checkpoint; + u8 reserved0; + __be32 max_cpus; + __be16 papr_level; + __be16 reserved1; + u8 platform_facilities; + u8 reserved2; + __be16 reserved3; + u8 subprocessors; + u8 byte22; + u8 intarch; + u8 mmu; +} __packed; + +struct option_vector6 { + u8 reserved; + u8 secondary_pteg; + u8 os_name; +} __packed; + +struct ibm_arch_vec { + struct { u32 mask, val; } pvrs[12]; + + u8 num_vectors; + + u8 vec1_len; + struct option_vector1 vec1; + + u8 vec2_len; + struct option_vector2 vec2; + + u8 vec3_len; + struct option_vector3 vec3; + + u8 vec4_len; + struct option_vector4 vec4; + + u8 vec5_len; + struct option_vector5 vec5; + + u8 vec6_len; + struct option_vector6 vec6; +} __packed; + +struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { + .pvrs = { + { + .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */ + .val = cpu_to_be32(0x003a0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER6 */ + .val = cpu_to_be32(0x003e0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER7 */ + .val = cpu_to_be32(0x003f0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8E */ + .val = cpu_to_be32(0x004b0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8NVL */ + .val = cpu_to_be32(0x004c0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8 */ + .val = cpu_to_be32(0x004d0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER9 */ + .val = cpu_to_be32(0x004e0000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */ + .val = cpu_to_be32(0x0f000005), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ + .val = cpu_to_be32(0x0f000004), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.06-compliant */ + .val = cpu_to_be32(0x0f000003), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.05-compliant */ + .val = cpu_to_be32(0x0f000002), + }, + { + .mask = cpu_to_be32(0xfffffffe), /* all 2.04-compliant and earlier */ + .val = cpu_to_be32(0x0f000001), + }, + }, + + .num_vectors = NUM_VECTORS(6), + .vec1_len = VECTOR_LENGTH(sizeof(struct option_vector1)), + .vec1 = { + .byte1 = 0, + .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | + OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, + .arch_versions3 = OV1_PPC_3_00, + }, + + .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), /* option vector 2: Open Firmware options supported */ - VECTOR_LENGTH(33), /* length */ - OV2_REAL_MODE, - 0, 0, - W(0xffffffff), /* real_base */ - W(0xffffffff), /* real_size */ - W(0xffffffff), /* virt_base */ - W(0xffffffff), /* virt_size */ - W(0xffffffff), /* load_base */ - W(256), /* 256MB min RMA */ - W(0xffffffff), /* full client load */ - 0, /* min RMA percentage of total RAM */ - 48, /* max log_2(hash table size) */ + .vec2 = { + .byte1 = OV2_REAL_MODE, + .reserved = 0, + .real_base = cpu_to_be32(0xffffffff), + .real_size = cpu_to_be32(0xffffffff), + .virt_base = cpu_to_be32(0xffffffff), + .virt_size = cpu_to_be32(0xffffffff), + .load_base = cpu_to_be32(0xffffffff), + .min_rma = cpu_to_be32(256), /* 256MB min RMA */ + .min_load = cpu_to_be32(0xffffffff), /* full client load */ + .min_rma_percent = 0, /* min RMA percentage of total RAM */ + .max_pft_size = 48, /* max log_2(hash table size) */ + }, + .vec3_len = VECTOR_LENGTH(sizeof(struct option_vector3)), /* option vector 3: processor options supported */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't ignore, don't halt */ - OV3_FP | OV3_VMX | OV3_DFP, + .vec3 = { + .byte1 = 0, /* don't ignore, don't halt */ + .byte2 = OV3_FP | OV3_VMX | OV3_DFP, + }, + .vec4_len = VECTOR_LENGTH(sizeof(struct option_vector4)), /* option vector 4: IBM PAPR implementation */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't halt */ - OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ + .vec4 = { + .byte1 = 0, /* don't halt */ + .min_vp_cap = OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ + }, + .vec5_len = VECTOR_LENGTH(sizeof(struct option_vector5)), /* option vector 5: PAPR/OF options */ - VECTOR_LENGTH(21), /* length */ - 0, /* don't ignore, don't halt */ - OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | - OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | + .vec5 = { + .byte1 = 0, /* don't ignore, don't halt */ + .byte2 = OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | + OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | #ifdef CONFIG_PCI_MSI - /* PCIe/MSI support. Without MSI full PCIe is not supported */ - OV5_FEAT(OV5_MSI), + /* PCIe/MSI support. Without MSI full PCIe is not supported */ + OV5_FEAT(OV5_MSI), #else - 0, + 0, #endif - 0, + .byte3 = 0, + .cmo = #ifdef CONFIG_PPC_SMLPAR - OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO), + OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO), #else - 0, + 0, #endif - OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), - 0, - 0, - 0, - /* WARNING: The offset of the "number of cores" field below - * must match by the macro below. Update the definition if - * the structure layout changes. - */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 133 - W(NR_CPUS), /* number of cores supported */ - 0, - 0, - 0, - 0, - OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | - OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */ - 0, /* Byte 18 */ - 0, /* Byte 19 */ - 0, /* Byte 20 */ - OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */ + .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), + .bin_opts = 0, + .micro_checkpoint = 0, + .reserved0 = 0, + .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */ + .papr_level = 0, + .reserved1 = 0, + .platform_facilities = OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | OV5_FEAT(OV5_PFO_HW_842), + .reserved2 = 0, + .reserved3 = 0, + .subprocessors = 1, + .intarch = 0, + .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | + OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), + }, /* option vector 6: IBM PAPR hints */ - VECTOR_LENGTH(3), /* length */ - 0, - 0, - OV6_LINUX, + .vec6_len = VECTOR_LENGTH(sizeof(struct option_vector6)), + .vec6 = { + .reserved = 0, + .secondary_pteg = 0, + .os_name = OV6_LINUX, + }, }; /* Old method - ELF header with PT_NOTE sections only works on BE */ @@ -873,7 +996,6 @@ static void __init prom_send_capabilities(void) ihandle root; prom_arg_t ret; u32 cores; - unsigned char *ptcores; root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { @@ -884,37 +1006,18 @@ static void __init prom_send_capabilities(void) * divide NR_CPUS. */ - /* The core value may start at an odd address. If such a word - * access is made at a cache line boundary, this leads to an - * exception which may not be handled at this time. - * Forcing a per byte access to avoid exception. - */ - ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; - cores = 0; - cores |= ptcores[0] << 24; - cores |= ptcores[1] << 16; - cores |= ptcores[2] << 8; - cores |= ptcores[3]; - if (cores != NR_CPUS) { - prom_printf("WARNING ! " - "ibm_architecture_vec structure inconsistent: %lu!\n", - cores); - } else { - cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); - prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", - cores, NR_CPUS); - ptcores[0] = (cores >> 24) & 0xff; - ptcores[1] = (cores >> 16) & 0xff; - ptcores[2] = (cores >> 8) & 0xff; - ptcores[3] = cores & 0xff; - } + cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); + prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + cores, NR_CPUS); + + ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores); /* try calling the ibm,client-architecture-support method */ prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, ADDR("ibm,client-architecture-support"), root, - ADDR(ibm_architecture_vec)) == 0) { + ADDR(&ibm_architecture_vec)) == 0) { /* the call exists... */ if (ret) prom_printf("\nWARNING: ibm,client-architecture" @@ -2747,6 +2850,9 @@ static void __init prom_find_boot_cpu(void) cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + if (!PHANDLE_VALID(cpu_pkg)) + return; + prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index b1ec62f2cc31..925a4ef90559 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -34,7 +34,7 @@ #include <linux/perf_event.h> #include <linux/context_tracking.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> @@ -463,6 +463,10 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -672,6 +676,9 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, flush_altivec_to_thread(target); flush_vsx_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); if (!ret) @@ -1019,6 +1026,10 @@ static int tm_cfpr_set(struct task_struct *target, flush_fp_to_thread(target); flush_altivec_to_thread(target); + for (i = 0; i < 32; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -1283,6 +1294,9 @@ static int tm_cvsx_set(struct task_struct *target, flush_altivec_to_thread(target); flush_vsx_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); if (!ret) diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 010b7b310237..f37eb53de1a1 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -29,7 +29,7 @@ #include <linux/signal.h> #include <linux/compat.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> @@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &tmp, + if (ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index c82eed97bd22..df56dfc4b681 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -24,7 +24,7 @@ #include <linux/bitops.h> #include <linux/rtc.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/processor.h> #include <asm/io.h> #include <asm/prom.h> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 6a3e5de544ce..b8a4987f58cf 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -35,7 +35,7 @@ #include <asm/page.h> #include <asm/param.h> #include <asm/delay.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/udbg.h> #include <asm/syscalls.h> #include <asm/smp.h> @@ -1145,31 +1145,29 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) void __init rtas_initialize(void) { unsigned long rtas_region = RTAS_INSTANTIATE_MAX; + u32 base, size, entry; + int no_base, no_size, no_entry; /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. */ rtas.dev = of_find_node_by_name(NULL, "rtas"); - if (rtas.dev) { - const __be32 *basep, *entryp, *sizep; - - basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); - sizep = of_get_property(rtas.dev, "rtas-size", NULL); - if (basep != NULL && sizep != NULL) { - rtas.base = __be32_to_cpu(*basep); - rtas.size = __be32_to_cpu(*sizep); - entryp = of_get_property(rtas.dev, - "linux,rtas-entry", NULL); - if (entryp == NULL) /* Ugh */ - rtas.entry = rtas.base; - else - rtas.entry = __be32_to_cpu(*entryp); - } else - rtas.dev = NULL; - } if (!rtas.dev) return; + no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base); + no_size = of_property_read_u32(rtas.dev, "rtas-size", &size); + if (no_base || no_size) { + of_node_put(rtas.dev); + rtas.dev = NULL; + return; + } + + rtas.base = base; + rtas.size = size; + no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); + rtas.entry = no_entry ? rtas.base : entry; + /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index db2b482af658..f6f6a8a5103a 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -19,7 +19,7 @@ #include <linux/proc_fs.h> #include <linux/reboot.h> #include <asm/delay.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/rtas.h> #define MODULE_VERS "1.0" diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index a26a02006576..3650732639ed 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -21,8 +21,9 @@ #include <linux/cpu.h> #include <linux/workqueue.h> #include <linux/slab.h> +#include <linux/topology.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/rtas.h> #include <asm/prom.h> @@ -282,6 +283,7 @@ static void prrn_work_fn(struct work_struct *work) * the RTAS event. */ pseries_devicetree_update(-prrn_update_scope); + arch_update_cpu_topology(); } static DECLARE_WORK(prrn_work, prrn_work_fn); @@ -434,7 +436,10 @@ static void do_event_scan(void) } if (error == 0) { - pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); + if (rtas_error_type((struct rtas_error_log *)logdata) != + RTAS_TYPE_PRRN) + pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, + 0); handle_rtas_event((struct rtas_error_log *)logdata); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 270ee30abdcf..4697da895133 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -87,6 +87,15 @@ EXPORT_SYMBOL(machine_id); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + + unsigned long klimit = (unsigned long) _end; /* @@ -915,7 +924,7 @@ void __init setup_arch(char **cmdline_p) init_mm.context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); + mm_iommu_init(&init_mm); #endif irqstack_early_init(); exc_lvl_early_init(); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5fe79182f0fa..2f88f6cf1a42 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -29,7 +29,7 @@ #include <asm/bootx.h> #include <asm/btext.h> #include <asm/machdep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/pmac_feature.h> #include <asm/sections.h> #include <asm/nvram.h> @@ -59,14 +59,6 @@ EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); /* - * These are used in binfmt_elf.c to put aux entries on the stack - * for each elf executable being started. - */ -int dcache_bsize; -int icache_bsize; -int ucache_bsize; - -/* * We're called here very early in the boot. * * Note that the kernel may be running at an address which is different diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8d586cff8a41..b9855f1b290a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -77,25 +77,18 @@ int spinning_secondaries; u64 ppc64_pft_size; -/* Pick defaults since we might want to patch instructions - * before we've read this from the device tree. - */ struct ppc64_caches ppc64_caches = { - .dline_size = 0x40, - .log_dline_size = 6, - .iline_size = 0x40, - .log_iline_size = 6 + .l1d = { + .block_size = 0x40, + .log_block_size = 6, + }, + .l1i = { + .block_size = 0x40, + .log_block_size = 6 + }, }; EXPORT_SYMBOL_GPL(ppc64_caches); -/* - * These are used in binfmt_elf.c to put aux entries on the stack - * for each elf executable being started. - */ -int dcache_bsize; -int icache_bsize; -int ucache_bsize; - #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) void __init setup_tlb_core_data(void) { @@ -354,7 +347,7 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ -#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) static bool use_spinloop(void) { if (!IS_ENABLED(CONFIG_PPC_BOOK3E)) @@ -399,7 +392,7 @@ void smp_release_cpus(void) DBG(" <- smp_release_cpus()\n"); } -#endif /* CONFIG_SMP || CONFIG_KEXEC */ +#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ /* * Initialize some remaining members of the ppc64_caches and systemcfg @@ -408,74 +401,135 @@ void smp_release_cpus(void) * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland */ + +static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, + u32 bsize, u32 sets) +{ + info->size = size; + info->sets = sets; + info->line_size = lsize; + info->block_size = bsize; + info->log_block_size = __ilog2(bsize); + info->blocks_per_page = PAGE_SIZE / bsize; + + if (sets == 0) + info->assoc = 0xffff; + else + info->assoc = size / (sets * lsize); +} + +static bool __init parse_cache_info(struct device_node *np, + bool icache, + struct ppc_cache_info *info) +{ + static const char *ipropnames[] __initdata = { + "i-cache-size", + "i-cache-sets", + "i-cache-block-size", + "i-cache-line-size", + }; + static const char *dpropnames[] __initdata = { + "d-cache-size", + "d-cache-sets", + "d-cache-block-size", + "d-cache-line-size", + }; + const char **propnames = icache ? ipropnames : dpropnames; + const __be32 *sizep, *lsizep, *bsizep, *setsp; + u32 size, lsize, bsize, sets; + bool success = true; + + size = 0; + sets = -1u; + lsize = bsize = cur_cpu_spec->dcache_bsize; + sizep = of_get_property(np, propnames[0], NULL); + if (sizep != NULL) + size = be32_to_cpu(*sizep); + setsp = of_get_property(np, propnames[1], NULL); + if (setsp != NULL) + sets = be32_to_cpu(*setsp); + bsizep = of_get_property(np, propnames[2], NULL); + lsizep = of_get_property(np, propnames[3], NULL); + if (bsizep == NULL) + bsizep = lsizep; + if (lsizep != NULL) + lsize = be32_to_cpu(*lsizep); + if (bsizep != NULL) + bsize = be32_to_cpu(*bsizep); + if (sizep == NULL || bsizep == NULL || lsizep == NULL) + success = false; + + /* + * OF is weird .. it represents fully associative caches + * as "1 way" which doesn't make much sense and doesn't + * leave room for direct mapped. We'll assume that 0 + * in OF means direct mapped for that reason. + */ + if (sets == 1) + sets = 0; + else if (sets == 0) + sets = 1; + + init_cache_info(info, size, lsize, bsize, sets); + + return success; +} + void __init initialize_cache_info(void) { - struct device_node *np; - unsigned long num_cpus = 0; + struct device_node *cpu = NULL, *l2, *l3 = NULL; + u32 pvr; DBG(" -> initialize_cache_info()\n"); - for_each_node_by_type(np, "cpu") { - num_cpus += 1; + /* + * All shipping POWER8 machines have a firmware bug that + * puts incorrect information in the device-tree. This will + * be (hopefully) fixed for future chips but for now hard + * code the values if we are running on one of these + */ + pvr = PVR_VER(mfspr(SPRN_PVR)); + if (pvr == PVR_POWER8 || pvr == PVR_POWER8E || + pvr == PVR_POWER8NVL) { + /* size lsize blk sets */ + init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32); + init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64); + init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512); + init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192); + } else + cpu = of_find_node_by_type(NULL, "cpu"); + + /* + * We're assuming *all* of the CPUs have the same + * d-cache and i-cache sizes... -Peter + */ + if (cpu) { + if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) + DBG("Argh, can't find dcache properties !\n"); + + if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) + DBG("Argh, can't find icache properties !\n"); /* - * We're assuming *all* of the CPUs have the same - * d-cache and i-cache sizes... -Peter + * Try to find the L2 and L3 if any. Assume they are + * unified and use the D-side properties. */ - if (num_cpus == 1) { - const __be32 *sizep, *lsizep; - u32 size, lsize; - - size = 0; - lsize = cur_cpu_spec->dcache_bsize; - sizep = of_get_property(np, "d-cache-size", NULL); - if (sizep != NULL) - size = be32_to_cpu(*sizep); - lsizep = of_get_property(np, "d-cache-block-size", - NULL); - /* fallback if block size missing */ - if (lsizep == NULL) - lsizep = of_get_property(np, - "d-cache-line-size", - NULL); - if (lsizep != NULL) - lsize = be32_to_cpu(*lsizep); - if (sizep == NULL || lsizep == NULL) - DBG("Argh, can't find dcache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - ppc64_caches.dsize = size; - ppc64_caches.dline_size = lsize; - ppc64_caches.log_dline_size = __ilog2(lsize); - ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; - - size = 0; - lsize = cur_cpu_spec->icache_bsize; - sizep = of_get_property(np, "i-cache-size", NULL); - if (sizep != NULL) - size = be32_to_cpu(*sizep); - lsizep = of_get_property(np, "i-cache-block-size", - NULL); - if (lsizep == NULL) - lsizep = of_get_property(np, - "i-cache-line-size", - NULL); - if (lsizep != NULL) - lsize = be32_to_cpu(*lsizep); - if (sizep == NULL || lsizep == NULL) - DBG("Argh, can't find icache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - ppc64_caches.isize = size; - ppc64_caches.iline_size = lsize; - ppc64_caches.log_iline_size = __ilog2(lsize); - ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; + l2 = of_find_next_cache_node(cpu); + of_node_put(cpu); + if (l2) { + parse_cache_info(l2, false, &ppc64_caches.l2); + l3 = of_find_next_cache_node(l2); + of_node_put(l2); + } + if (l3) { + parse_cache_info(l3, false, &ppc64_caches.l3); + of_node_put(l3); } } /* For use by binfmt_elf */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; + dcache_bsize = ppc64_caches.l1d.block_size; + icache_bsize = ppc64_caches.l1i.block_size; DBG(" <- initialize_cache_info()\n"); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index bbe77aed198d..3a3671172436 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -15,7 +15,7 @@ #include <linux/key.h> #include <linux/context_tracking.h> #include <asm/hw_breakpoint.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unistd.h> #include <asm/debug.h> #include <asm/tm.h> diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 27aa913ac91d..97bb1385e771 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -37,7 +37,7 @@ #include <linux/binfmts.h> #endif -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/syscalls.h> #include <asm/sigcontext.h> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 96698fdf93b4..c83c115858c1 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -27,7 +27,7 @@ #include <asm/sigcontext.h> #include <asm/ucontext.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/pgtable.h> #include <asm/unistd.h> #include <asm/cacheflush.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9c6f3fd58059..893bd7f79be6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -193,7 +193,7 @@ int smp_request_message_ipi(int virq, int msg) if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { return -EINVAL; } -#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC) +#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) if (msg == PPC_MSG_DEBUGGER_BREAK) { return 1; } @@ -325,7 +325,7 @@ void tick_broadcast(const struct cpumask *mask) } #endif -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) void smp_send_debugger_break(void) { int cpu; @@ -340,7 +340,7 @@ void smp_send_debugger_break(void) } #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 8a285876aef8..15f216d022e2 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -44,7 +44,7 @@ #include <asm/ptrace.h> #include <asm/types.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unistd.h> #include <asm/time.h> #include <asm/mmu_context.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 644cce3d8dce..de04c9fbb5cd 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -36,7 +36,7 @@ #include <linux/file.h> #include <linux/personality.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/syscalls.h> #include <asm/time.h> #include <asm/unistd.h> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index be9751f1cb2a..02e97305d22b 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -64,7 +64,7 @@ #include <asm/nvram.h> #include <asm/cache.h> #include <asm/machdep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/time.h> #include <asm/prom.h> #include <asm/irq.h> @@ -80,7 +80,7 @@ #include <linux/clockchips.h> #include <linux/timekeeper_internal.h> -static cycle_t rtc_read(struct clocksource *); +static u64 rtc_read(struct clocksource *); static struct clocksource clocksource_rtc = { .name = "rtc", .rating = 400, @@ -89,7 +89,7 @@ static struct clocksource clocksource_rtc = { .read = rtc_read, }; -static cycle_t timebase_read(struct clocksource *); +static u64 timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { .name = "timebase", .rating = 400, @@ -271,25 +271,19 @@ void accumulate_stolen_time(void) sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); - acct->system_time -= sst; - acct->user_time -= ust; - local_paca->stolen_time += ust + sst; + acct->stime -= sst; + acct->utime -= ust; + acct->steal_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) { - u64 stolen = 0; - - if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { - stolen = scan_dispatch_log(stop_tb); - get_paca()->accounting.system_time -= stolen; - } + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) + return scan_dispatch_log(stop_tb); - stolen += get_paca()->stolen_time; - get_paca()->stolen_time = 0; - return stolen; + return 0; } #else /* CONFIG_PPC_SPLPAR */ @@ -305,28 +299,27 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * or soft irq state. */ static unsigned long vtime_delta(struct task_struct *tsk, - unsigned long *sys_scaled, - unsigned long *stolen) + unsigned long *stime_scaled, + unsigned long *steal_time) { unsigned long now, nowscaled, deltascaled; - unsigned long udelta, delta, user_scaled; + unsigned long stime; + unsigned long utime, utime_scaled; struct cpu_accounting_data *acct = get_accounting(tsk); WARN_ON_ONCE(!irqs_disabled()); now = mftb(); nowscaled = read_spurr(now); - acct->system_time += now - acct->starttime; + stime = now - acct->starttime; acct->starttime = now; deltascaled = nowscaled - acct->startspurr; acct->startspurr = nowscaled; - *stolen = calculate_stolen_time(now); + *steal_time = calculate_stolen_time(now); - delta = acct->system_time; - acct->system_time = 0; - udelta = acct->user_time - acct->utime_sspurr; - acct->utime_sspurr = acct->user_time; + utime = acct->utime - acct->utime_sspurr; + acct->utime_sspurr = acct->utime; /* * Because we don't read the SPURR on every kernel entry/exit, @@ -338,62 +331,104 @@ static unsigned long vtime_delta(struct task_struct *tsk, * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick. */ - *sys_scaled = delta; - user_scaled = udelta; - if (deltascaled != delta + udelta) { - if (udelta) { - *sys_scaled = deltascaled * delta / (delta + udelta); - user_scaled = deltascaled - *sys_scaled; + *stime_scaled = stime; + utime_scaled = utime; + if (deltascaled != stime + utime) { + if (utime) { + *stime_scaled = deltascaled * stime / (stime + utime); + utime_scaled = deltascaled - *stime_scaled; } else { - *sys_scaled = deltascaled; + *stime_scaled = deltascaled; } } - acct->user_time_scaled += user_scaled; + acct->utime_scaled += utime_scaled; - return delta; + return stime; } void vtime_account_system(struct task_struct *tsk) { - unsigned long delta, sys_scaled, stolen; + unsigned long stime, stime_scaled, steal_time; + struct cpu_accounting_data *acct = get_accounting(tsk); + + stime = vtime_delta(tsk, &stime_scaled, &steal_time); + + stime -= min(stime, steal_time); + acct->steal_time += steal_time; - delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_system_time(tsk, 0, delta); - tsk->stimescaled += sys_scaled; - if (stolen) - account_steal_time(stolen); + if ((tsk->flags & PF_VCPU) && !irq_count()) { + acct->gtime += stime; + acct->utime_scaled += stime_scaled; + } else { + if (hardirq_count()) + acct->hardirq_time += stime; + else if (in_serving_softirq()) + acct->softirq_time += stime; + else + acct->stime += stime; + + acct->stime_scaled += stime_scaled; + } } EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { - unsigned long delta, sys_scaled, stolen; + unsigned long stime, stime_scaled, steal_time; + struct cpu_accounting_data *acct = get_accounting(tsk); - delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_idle_time(delta + stolen); + stime = vtime_delta(tsk, &stime_scaled, &steal_time); + acct->idle_time += stime + steal_time; } /* - * Transfer the user time accumulated in the paca - * by the exception entry and exit code to the generic - * process user time records. + * Account the whole cputime accumulated in the paca * Must be called with interrupts disabled. * Assumes that vtime_account_system/idle() has been called * recently (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { - cputime_t utime, utimescaled; struct cpu_accounting_data *acct = get_accounting(tsk); - utime = acct->user_time; - utimescaled = acct->user_time_scaled; - acct->user_time = 0; - acct->user_time_scaled = 0; + if (acct->utime) + account_user_time(tsk, acct->utime); + + if (acct->utime_scaled) + tsk->utimescaled += acct->utime_scaled; + + if (acct->gtime) + account_guest_time(tsk, acct->gtime); + + if (acct->steal_time) + account_steal_time(acct->steal_time); + + if (acct->idle_time) + account_idle_time(acct->idle_time); + + if (acct->stime) + account_system_index_time(tsk, acct->stime, CPUTIME_SYSTEM); + + if (acct->stime_scaled) + tsk->stimescaled += acct->stime_scaled; + + if (acct->hardirq_time) + account_system_index_time(tsk, acct->hardirq_time, CPUTIME_IRQ); + + if (acct->softirq_time) + account_system_index_time(tsk, acct->softirq_time, CPUTIME_SOFTIRQ); + + acct->utime = 0; + acct->utime_scaled = 0; acct->utime_sspurr = 0; - account_user_time(tsk, utime); - tsk->utimescaled += utimescaled; + acct->gtime = 0; + acct->steal_time = 0; + acct->idle_time = 0; + acct->stime = 0; + acct->stime_scaled = 0; + acct->hardirq_time = 0; + acct->softirq_time = 0; } #ifdef CONFIG_PPC32 @@ -407,8 +442,7 @@ void arch_vtime_task_switch(struct task_struct *prev) struct cpu_accounting_data *acct = get_accounting(current); acct->starttime = get_accounting(prev)->starttime; - acct->system_time = 0; - acct->user_time = 0; + acct->startspurr = get_accounting(prev)->startspurr; } #endif /* CONFIG_PPC32 */ @@ -802,18 +836,18 @@ void read_persistent_clock(struct timespec *ts) } /* clocksource code */ -static cycle_t rtc_read(struct clocksource *cs) +static u64 rtc_read(struct clocksource *cs) { - return (cycle_t)get_rtc(); + return (u64)get_rtc(); } -static cycle_t timebase_read(struct clocksource *cs) +static u64 timebase_read(struct clocksource *cs) { - return (cycle_t)get_tb(); + return (u64)get_tb(); } void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult, cycle_t cycle_last) + struct clocksource *clock, u32 mult, u64 cycle_last) { u64 new_tb_to_xs, new_stamp_xsec; u32 frac_sec; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 023a462725b5..e6cc56b61d01 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -40,7 +40,7 @@ #include <asm/emulated_ops.h> #include <asm/pgtable.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/machdep.h> #include <asm/rtas.h> @@ -64,8 +64,9 @@ #include <asm/asm-prototypes.h> #include <asm/hmi.h> #include <sysdev/fsl_pci.h> +#include <asm/kprobes.h> -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; @@ -122,9 +123,6 @@ static unsigned long oops_begin(struct pt_regs *regs) int cpu; unsigned long flags; - if (debugger(regs)) - return 1; - oops_enter(); /* racy, but better than risking deadlock. */ @@ -150,14 +148,15 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { bust_spinlocks(0); - die_owner = -1; add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; oops_exit(); printk("\n"); - if (!die_nest_count) + if (!die_nest_count) { /* Nest count reaches zero, release the lock. */ + die_owner = -1; arch_spin_unlock(&die_lock); + } raw_local_irq_restore(flags); crash_fadump(regs, "die oops"); @@ -227,8 +226,12 @@ NOKPROBE_SYMBOL(__die); void die(const char *str, struct pt_regs *regs, long err) { - unsigned long flags = oops_begin(regs); + unsigned long flags; + if (debugger(regs)) + return; + + flags = oops_begin(regs); if (__die(str, regs, err)) err = 0; oops_end(flags, regs, err); @@ -365,7 +368,7 @@ static inline int check_io_access(struct pt_regs *regs) (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } } @@ -824,6 +827,9 @@ void single_step_exception(struct pt_regs *regs) clear_single_step(regs); + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) goto bail; @@ -1177,6 +1183,9 @@ void program_check_exception(struct pt_regs *regs) if (debugger_bpt(regs)) goto bail; + if (kprobe_handler(regs)) + goto bail; + /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -1430,7 +1439,6 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", - [FSCR_LM_LG] = "LM", }; char *facility = "unknown"; u64 value; @@ -1488,14 +1496,6 @@ void facility_unavailable_exception(struct pt_regs *regs) emulate_single_step(regs); } return; - } else if ((status == FSCR_LM_LG) && cpu_has_feature(CPU_FTR_ARCH_300)) { - /* - * This process has touched LM, so turn it on forever - * for this process - */ - current->thread.fscr |= FSCR_LM; - mtspr(SPRN_FSCR, current->thread.fscr); - return; } if (status == FSCR_TM_LG) { @@ -1519,7 +1519,8 @@ void facility_unavailable_exception(struct pt_regs *regs) return; } - if ((status < ARRAY_SIZE(facility_strings)) && + if ((hv || status >= 2) && + (status < ARRAY_SIZE(facility_strings)) && facility_strings[status]) facility = facility_strings[status]; @@ -1527,9 +1528,8 @@ void facility_unavailable_exception(struct pt_regs *regs) if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - pr_err_ratelimited( - "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", - hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); + pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n", + hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr); out: if (user_mode(regs)) { @@ -1754,6 +1754,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) return; } + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "block_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) { return; @@ -1768,6 +1771,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) /* Clear the instruction completion event */ mtspr(SPRN_DBSR, DBSR_IC); + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) { return; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 4111d30badfa..22b01a3962f0 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -736,16 +736,14 @@ static int __init vdso_init(void) if (firmware_has_feature(FW_FEATURE_LPAR)) vdso_data->platform |= 1; vdso_data->physicalMemorySize = memblock_phys_mem_size(); - vdso_data->dcache_size = ppc64_caches.dsize; - vdso_data->dcache_line_size = ppc64_caches.dline_size; - vdso_data->icache_size = ppc64_caches.isize; - vdso_data->icache_line_size = ppc64_caches.iline_size; - - /* XXXOJN: Blocks should be added to ppc64_caches and used instead */ - vdso_data->dcache_block_size = ppc64_caches.dline_size; - vdso_data->icache_block_size = ppc64_caches.iline_size; - vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size; - vdso_data->icache_log_block_size = ppc64_caches.log_iline_size; + vdso_data->dcache_size = ppc64_caches.l1d.size; + vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; + vdso_data->icache_size = ppc64_caches.l1i.size; + vdso_data->icache_line_size = ppc64_caches.l1i.line_size; + vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; + vdso_data->icache_block_size = ppc64_caches.l1i.block_size; + vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; + vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; /* * Calculate the size of the 64 bits vDSO diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index c4bfadb2606b..2d8f6d8ccafc 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -7,7 +7,7 @@ #include <linux/sched.h> #include <asm/ptrace.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* Functions in vector.S */ extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 7dd89b79d038..b87ccde2137a 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -70,7 +70,8 @@ endif kvm-hv-y += \ book3s_hv.o \ book3s_hv_interrupts.o \ - book3s_64_mmu_hv.o + book3s_64_mmu_hv.o \ + book3s_64_mmu_radix.o kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ book3s_hv_rm_xics.o diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index b6952dd23152..b6b5c185bd92 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -25,7 +25,7 @@ #include <asm/cputable.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -239,6 +239,7 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, kvmppc_set_dsisr(vcpu, flags); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); /* used by kvm_hv */ void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) { diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index b795dd1ac2ef..013552f05182 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -40,84 +40,104 @@ #include "trace_hv.h" -/* Power architecture requires HPT is at least 256kB */ -#define PPC_MIN_HPT_ORDER 18 +//#define DEBUG_RESIZE_HPT 1 + +#ifdef DEBUG_RESIZE_HPT +#define resize_hpt_debug(resize, ...) \ + do { \ + printk(KERN_DEBUG "RESIZE HPT %p: ", resize); \ + printk(__VA_ARGS__); \ + } while (0) +#else +#define resize_hpt_debug(resize, ...) \ + do { } while (0) +#endif static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret); + +struct kvm_resize_hpt { + /* These fields read-only after init */ + struct kvm *kvm; + struct work_struct work; + u32 order; + + /* These fields protected by kvm->lock */ + int error; + bool prepare_done; + + /* Private to the work thread, until prepare_done is true, + * then protected by kvm->resize_hpt_sem */ + struct kvm_hpt_info hpt; +}; + static void kvmppc_rmap_reset(struct kvm *kvm); -long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) +int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) { unsigned long hpt = 0; - struct revmap_entry *rev; + int cma = 0; struct page *page = NULL; - long order = KVM_DEFAULT_HPT_ORDER; + struct revmap_entry *rev; + unsigned long npte; - if (htab_orderp) { - order = *htab_orderp; - if (order < PPC_MIN_HPT_ORDER) - order = PPC_MIN_HPT_ORDER; - } + if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER)) + return -EINVAL; - kvm->arch.hpt_cma_alloc = 0; - page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT)); + page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT)); if (page) { hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); memset((void *)hpt, 0, (1ul << order)); - kvm->arch.hpt_cma_alloc = 1; + cma = 1; } - /* Lastly try successively smaller sizes from the page allocator */ - /* Only do this if userspace didn't specify a size via ioctl */ - while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) { - hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| - __GFP_NOWARN, order - PAGE_SHIFT); - if (!hpt) - --order; - } + if (!hpt) + hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT + |__GFP_NOWARN, order - PAGE_SHIFT); if (!hpt) return -ENOMEM; - kvm->arch.hpt_virt = hpt; - kvm->arch.hpt_order = order; /* HPTEs are 2**4 bytes long */ - kvm->arch.hpt_npte = 1ul << (order - 4); - /* 128 (2**7) bytes in each HPTEG */ - kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; - - atomic64_set(&kvm->arch.mmio_update, 0); + npte = 1ul << (order - 4); /* Allocate reverse map array */ - rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); + rev = vmalloc(sizeof(struct revmap_entry) * npte); if (!rev) { - pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); - goto out_freehpt; + pr_err("kvmppc_allocate_hpt: Couldn't alloc reverse map array\n"); + if (cma) + kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT)); + else + free_pages(hpt, order - PAGE_SHIFT); + return -ENOMEM; } - kvm->arch.revmap = rev; - kvm->arch.sdr1 = __pa(hpt) | (order - 18); - pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", - hpt, order, kvm->arch.lpid); + info->order = order; + info->virt = hpt; + info->cma = cma; + info->rev = rev; - if (htab_orderp) - *htab_orderp = order; return 0; +} - out_freehpt: - if (kvm->arch.hpt_cma_alloc) - kvm_release_hpt(page, 1 << (order - PAGE_SHIFT)); - else - free_pages(hpt, order - PAGE_SHIFT); - return -ENOMEM; +void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info) +{ + atomic64_set(&kvm->arch.mmio_update, 0); + kvm->arch.hpt = *info; + kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18); + + pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", + info->virt, (long)info->order, kvm->arch.lpid); } -long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) +long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) { long err = -EBUSY; - long order; + struct kvm_hpt_info info; + + if (kvm_is_radix(kvm)) + return -EINVAL; mutex_lock(&kvm->lock); if (kvm->arch.hpte_setup_done) { @@ -129,37 +149,44 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) goto out; } } - if (kvm->arch.hpt_virt) { - order = kvm->arch.hpt_order; + if (kvm->arch.hpt.order == order) { + /* We already have a suitable HPT */ + /* Set the entire HPT to 0, i.e. invalid HPTEs */ - memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); + memset((void *)kvm->arch.hpt.virt, 0, 1ul << order); /* * Reset all the reverse-mapping chains for all memslots */ kvmppc_rmap_reset(kvm); /* Ensure that each vcpu will flush its TLB on next entry. */ cpumask_setall(&kvm->arch.need_tlb_flush); - *htab_orderp = order; err = 0; - } else { - err = kvmppc_alloc_hpt(kvm, htab_orderp); - order = *htab_orderp; + goto out; } - out: + + if (kvm->arch.hpt.virt) + kvmppc_free_hpt(&kvm->arch.hpt); + + err = kvmppc_allocate_hpt(&info, order); + if (err < 0) + goto out; + kvmppc_set_hpt(kvm, &info); + +out: mutex_unlock(&kvm->lock); return err; } -void kvmppc_free_hpt(struct kvm *kvm) +void kvmppc_free_hpt(struct kvm_hpt_info *info) { - kvmppc_free_lpid(kvm->arch.lpid); - vfree(kvm->arch.revmap); - if (kvm->arch.hpt_cma_alloc) - kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), - 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); - else - free_pages(kvm->arch.hpt_virt, - kvm->arch.hpt_order - PAGE_SHIFT); + vfree(info->rev); + if (info->cma) + kvm_free_hpt_cma(virt_to_page(info->virt), + 1 << (info->order - PAGE_SHIFT)); + else if (info->virt) + free_pages(info->virt, info->order - PAGE_SHIFT); + info->virt = 0; + info->order = 0; } /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ @@ -194,8 +221,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, if (npages > 1ul << (40 - porder)) npages = 1ul << (40 - porder); /* Can't use more than 1 HPTE per HPTEG */ - if (npages > kvm->arch.hpt_mask + 1) - npages = kvm->arch.hpt_mask + 1; + if (npages > kvmppc_hpt_mask(&kvm->arch.hpt) + 1) + npages = kvmppc_hpt_mask(&kvm->arch.hpt) + 1; hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); @@ -205,7 +232,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, for (i = 0; i < npages; ++i) { addr = i << porder; /* can't use hpt_hash since va > 64 bits */ - hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; + hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) + & kvmppc_hpt_mask(&kvm->arch.hpt); /* * We assume that the hash table is empty and no * vcpus are using it at this stage. Since we create @@ -338,11 +366,11 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, preempt_enable(); return -ENOENT; } - hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; if (cpu_has_feature(CPU_FTR_ARCH_300)) v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1])); - gr = kvm->arch.revmap[index].guest_rpte; + gr = kvm->arch.hpt.rev[index].guest_rpte; unlock_hpte(hptep, orig_v); preempt_enable(); @@ -392,8 +420,8 @@ static int instruction_is_store(unsigned int instr) return (instr & mask) != 0; } -static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned long gpa, gva_t ea, int is_store) +int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned long gpa, gva_t ea, int is_store) { u32 last_inst; @@ -458,6 +486,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long rcbits; long mmio_update; + if (kvm_is_radix(kvm)) + return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); + /* * Real-mode code has already searched the HPT and found the * entry we're interested in. Lock the entry and check that @@ -480,8 +511,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } } index = vcpu->arch.pgfault_index; - hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); - rev = &kvm->arch.revmap[index]; + hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); + rev = &kvm->arch.hpt.rev[index]; preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); @@ -695,12 +726,13 @@ static void kvmppc_rmap_reset(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, srcu_idx); } +typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn); + static int kvm_handle_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, - int (*handler)(struct kvm *kvm, - unsigned long *rmapp, - unsigned long gfn)) + hva_handler_fn handler) { int ret; int retval = 0; @@ -725,9 +757,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); for (; gfn < gfn_end; ++gfn) { - gfn_t gfn_offset = gfn - memslot->base_gfn; - - ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn); + ret = handler(kvm, memslot, gfn); retval |= ret; } } @@ -736,20 +766,61 @@ static int kvm_handle_hva_range(struct kvm *kvm, } static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - int (*handler)(struct kvm *kvm, unsigned long *rmapp, - unsigned long gfn)) + hva_handler_fn handler) { return kvm_handle_hva_range(kvm, hva, hva + 1, handler); } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, +/* Must be called with both HPTE and rmap locked */ +static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i, + unsigned long *rmapp, unsigned long gfn) +{ + __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); + struct revmap_entry *rev = kvm->arch.hpt.rev; + unsigned long j, h; + unsigned long ptel, psize, rcbits; + + j = rev[i].forw; + if (j == i) { + /* chain is now empty */ + *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); + } else { + /* remove i from chain */ + h = rev[i].back; + rev[h].forw = j; + rev[j].back = h; + rev[i].forw = rev[i].back = i; + *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; + } + + /* Now check and modify the HPTE */ + ptel = rev[i].guest_rpte; + psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && + hpte_rpn(ptel, psize) == gfn) { + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); + kvmppc_invalidate_hpte(kvm, hptep, i); + hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO); + /* Harvest R and C */ + rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); + *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; + if (rcbits & HPTE_R_C) + kvmppc_update_rmap_change(rmapp, psize); + if (rcbits & ~rev[i].guest_rpte) { + rev[i].guest_rpte = ptel | rcbits; + note_hpte_modification(kvm, &rev[i]); + } + } +} + +static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { - struct revmap_entry *rev = kvm->arch.revmap; - unsigned long h, i, j; + unsigned long i; __be64 *hptep; - unsigned long ptel, psize, rcbits; + unsigned long *rmapp; + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; for (;;) { lock_rmap(rmapp); if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { @@ -763,7 +834,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, * rmap chain lock. */ i = *rmapp & KVMPPC_RMAP_INDEX; - hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); @@ -771,37 +842,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, cpu_relax(); continue; } - j = rev[i].forw; - if (j == i) { - /* chain is now empty */ - *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); - } else { - /* remove i from chain */ - h = rev[i].back; - rev[h].forw = j; - rev[j].back = h; - rev[i].forw = rev[i].back = i; - *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; - } - /* Now check and modify the HPTE */ - ptel = rev[i].guest_rpte; - psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); - if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && - hpte_rpn(ptel, psize) == gfn) { - hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); - kvmppc_invalidate_hpte(kvm, hptep, i); - hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO); - /* Harvest R and C */ - rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); - *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; - if (rcbits & HPTE_R_C) - kvmppc_update_rmap_change(rmapp, psize); - if (rcbits & ~rev[i].guest_rpte) { - rev[i].guest_rpte = ptel | rcbits; - note_hpte_modification(kvm, &rev[i]); - } - } + kvmppc_unmap_hpte(kvm, i, rmapp, gfn); unlock_rmap(rmapp); __unlock_hpte(hptep, be64_to_cpu(hptep[0])); } @@ -810,26 +852,36 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) { - kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + hva_handler_fn handler; + + handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; + kvm_handle_hva(kvm, hva, handler); return 0; } int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) { - kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); + hva_handler_fn handler; + + handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; + kvm_handle_hva_range(kvm, start, end, handler); return 0; } void kvmppc_core_flush_memslot_hv(struct kvm *kvm, struct kvm_memory_slot *memslot) { - unsigned long *rmapp; unsigned long gfn; unsigned long n; + unsigned long *rmapp; - rmapp = memslot->arch.rmap; gfn = memslot->base_gfn; - for (n = memslot->npages; n; --n) { + rmapp = memslot->arch.rmap; + for (n = memslot->npages; n; --n, ++gfn) { + if (kvm_is_radix(kvm)) { + kvm_unmap_radix(kvm, memslot, gfn); + continue; + } /* * Testing the present bit without locking is OK because * the memslot has been marked invalid already, and hence @@ -837,20 +889,21 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, * thus the present bit can't go from 0 to 1. */ if (*rmapp & KVMPPC_RMAP_PRESENT) - kvm_unmap_rmapp(kvm, rmapp, gfn); + kvm_unmap_rmapp(kvm, memslot, gfn); ++rmapp; - ++gfn; } } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { - struct revmap_entry *rev = kvm->arch.revmap; + struct revmap_entry *rev = kvm->arch.hpt.rev; unsigned long head, i, j; __be64 *hptep; int ret = 0; + unsigned long *rmapp; + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; retry: lock_rmap(rmapp); if (*rmapp & KVMPPC_RMAP_REFERENCED) { @@ -864,7 +917,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); j = rev[i].forw; /* If this HPTE isn't referenced, ignore it */ @@ -898,17 +951,22 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); + hva_handler_fn handler; + + handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp; + return kvm_handle_hva_range(kvm, start, end, handler); } -static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { - struct revmap_entry *rev = kvm->arch.revmap; + struct revmap_entry *rev = kvm->arch.hpt.rev; unsigned long head, i, j; unsigned long *hp; int ret = 1; + unsigned long *rmapp; + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; if (*rmapp & KVMPPC_RMAP_REFERENCED) return 1; @@ -919,7 +977,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, if (*rmapp & KVMPPC_RMAP_PRESENT) { i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); + hp = (unsigned long *)(kvm->arch.hpt.virt + (i << 4)); j = rev[i].forw; if (be64_to_cpu(hp[1]) & HPTE_R_R) goto out; @@ -934,12 +992,18 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); + hva_handler_fn handler; + + handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp; + return kvm_handle_hva(kvm, hva, handler); } void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) { - kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + hva_handler_fn handler; + + handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; + kvm_handle_hva(kvm, hva, handler); } static int vcpus_running(struct kvm *kvm) @@ -953,7 +1017,7 @@ static int vcpus_running(struct kvm *kvm) */ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) { - struct revmap_entry *rev = kvm->arch.revmap; + struct revmap_entry *rev = kvm->arch.hpt.rev; unsigned long head, i, j; unsigned long n; unsigned long v, r; @@ -978,7 +1042,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) i = head = *rmapp & KVMPPC_RMAP_INDEX; do { unsigned long hptep1; - hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); j = rev[i].forw; /* @@ -1040,7 +1104,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) return npages_dirty; } -static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, +void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, struct kvm_memory_slot *memslot, unsigned long *map) { @@ -1058,12 +1122,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, __set_bit_le(gfn - memslot->base_gfn, map); } -long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long *map) +long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, + struct kvm_memory_slot *memslot, unsigned long *map) { unsigned long i, j; unsigned long *rmapp; - struct kvm_vcpu *vcpu; preempt_disable(); rmapp = memslot->arch.rmap; @@ -1079,15 +1142,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, __set_bit_le(j, map); ++rmapp; } - - /* Harvest dirty bits from VPA and DTL updates */ - /* Note: we never modify the SLB shadow buffer areas */ - kvm_for_each_vcpu(i, vcpu, kvm) { - spin_lock(&vcpu->arch.vpa_update_lock); - harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); - harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); - spin_unlock(&vcpu->arch.vpa_update_lock); - } preempt_enable(); return 0; } @@ -1142,15 +1196,367 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, gfn); if (memslot) { - rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; - lock_rmap(rmap); - *rmap |= KVMPPC_RMAP_CHANGED; - unlock_rmap(rmap); + if (!kvm_is_radix(kvm)) { + rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; + lock_rmap(rmap); + *rmap |= KVMPPC_RMAP_CHANGED; + unlock_rmap(rmap); + } else if (memslot->dirty_bitmap) { + mark_page_dirty(kvm, gfn); + } } srcu_read_unlock(&kvm->srcu, srcu_idx); } /* + * HPT resizing + */ +static int resize_hpt_allocate(struct kvm_resize_hpt *resize) +{ + int rc; + + rc = kvmppc_allocate_hpt(&resize->hpt, resize->order); + if (rc < 0) + return rc; + + resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n", + resize->hpt.virt); + + return 0; +} + +static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, + unsigned long idx) +{ + struct kvm *kvm = resize->kvm; + struct kvm_hpt_info *old = &kvm->arch.hpt; + struct kvm_hpt_info *new = &resize->hpt; + unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1; + unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1; + __be64 *hptep, *new_hptep; + unsigned long vpte, rpte, guest_rpte; + int ret; + struct revmap_entry *rev; + unsigned long apsize, psize, avpn, pteg, hash; + unsigned long new_idx, new_pteg, replace_vpte; + + hptep = (__be64 *)(old->virt + (idx << 4)); + + /* Guest is stopped, so new HPTEs can't be added or faulted + * in, only unmapped or altered by host actions. So, it's + * safe to check this before we take the HPTE lock */ + vpte = be64_to_cpu(hptep[0]); + if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT)) + return 0; /* nothing to do */ + + while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) + cpu_relax(); + + vpte = be64_to_cpu(hptep[0]); + + ret = 0; + if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT)) + /* Nothing to do */ + goto out; + + /* Unmap */ + rev = &old->rev[idx]; + guest_rpte = rev->guest_rpte; + + ret = -EIO; + apsize = hpte_page_size(vpte, guest_rpte); + if (!apsize) + goto out; + + if (vpte & HPTE_V_VALID) { + unsigned long gfn = hpte_rpn(guest_rpte, apsize); + int srcu_idx = srcu_read_lock(&kvm->srcu); + struct kvm_memory_slot *memslot = + __gfn_to_memslot(kvm_memslots(kvm), gfn); + + if (memslot) { + unsigned long *rmapp; + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; + + lock_rmap(rmapp); + kvmppc_unmap_hpte(kvm, idx, rmapp, gfn); + unlock_rmap(rmapp); + } + + srcu_read_unlock(&kvm->srcu, srcu_idx); + } + + /* Reload PTE after unmap */ + vpte = be64_to_cpu(hptep[0]); + + BUG_ON(vpte & HPTE_V_VALID); + BUG_ON(!(vpte & HPTE_V_ABSENT)); + + ret = 0; + if (!(vpte & HPTE_V_BOLTED)) + goto out; + + rpte = be64_to_cpu(hptep[1]); + psize = hpte_base_page_size(vpte, rpte); + avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); + pteg = idx / HPTES_PER_GROUP; + if (vpte & HPTE_V_SECONDARY) + pteg = ~pteg; + + if (!(vpte & HPTE_V_1TB_SEG)) { + unsigned long offset, vsid; + + /* We only have 28 - 23 bits of offset in avpn */ + offset = (avpn & 0x1f) << 23; + vsid = avpn >> 5; + /* We can find more bits from the pteg value */ + if (psize < (1ULL << 23)) + offset |= ((vsid ^ pteg) & old_hash_mask) * psize; + + hash = vsid ^ (offset / psize); + } else { + unsigned long offset, vsid; + + /* We only have 40 - 23 bits of seg_off in avpn */ + offset = (avpn & 0x1ffff) << 23; + vsid = avpn >> 17; + if (psize < (1ULL << 23)) + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; + + hash = vsid ^ (vsid << 25) ^ (offset / psize); + } + + new_pteg = hash & new_hash_mask; + if (vpte & HPTE_V_SECONDARY) { + BUG_ON(~pteg != (hash & old_hash_mask)); + new_pteg = ~new_pteg; + } else { + BUG_ON(pteg != (hash & old_hash_mask)); + } + + new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP); + new_hptep = (__be64 *)(new->virt + (new_idx << 4)); + + replace_vpte = be64_to_cpu(new_hptep[0]); + + if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { + BUG_ON(new->order >= old->order); + + if (replace_vpte & HPTE_V_BOLTED) { + if (vpte & HPTE_V_BOLTED) + /* Bolted collision, nothing we can do */ + ret = -ENOSPC; + /* Discard the new HPTE */ + goto out; + } + + /* Discard the previous HPTE */ + } + + new_hptep[1] = cpu_to_be64(rpte); + new->rev[new_idx].guest_rpte = guest_rpte; + /* No need for a barrier, since new HPT isn't active */ + new_hptep[0] = cpu_to_be64(vpte); + unlock_hpte(new_hptep, vpte); + +out: + unlock_hpte(hptep, vpte); + return ret; +} + +static int resize_hpt_rehash(struct kvm_resize_hpt *resize) +{ + struct kvm *kvm = resize->kvm; + unsigned long i; + int rc; + + for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) { + rc = resize_hpt_rehash_hpte(resize, i); + if (rc != 0) + return rc; + } + + return 0; +} + +static void resize_hpt_pivot(struct kvm_resize_hpt *resize) +{ + struct kvm *kvm = resize->kvm; + struct kvm_hpt_info hpt_tmp; + + /* Exchange the pending tables in the resize structure with + * the active tables */ + + resize_hpt_debug(resize, "resize_hpt_pivot()\n"); + + spin_lock(&kvm->mmu_lock); + asm volatile("ptesync" : : : "memory"); + + hpt_tmp = kvm->arch.hpt; + kvmppc_set_hpt(kvm, &resize->hpt); + resize->hpt = hpt_tmp; + + spin_unlock(&kvm->mmu_lock); + + synchronize_srcu_expedited(&kvm->srcu); + + resize_hpt_debug(resize, "resize_hpt_pivot() done\n"); +} + +static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) +{ + BUG_ON(kvm->arch.resize_hpt != resize); + + if (resize->hpt.virt) + kvmppc_free_hpt(&resize->hpt); + + kvm->arch.resize_hpt = NULL; + kfree(resize); +} + +static void resize_hpt_prepare_work(struct work_struct *work) +{ + struct kvm_resize_hpt *resize = container_of(work, + struct kvm_resize_hpt, + work); + struct kvm *kvm = resize->kvm; + int err; + + resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", + resize->order); + + err = resize_hpt_allocate(resize); + + mutex_lock(&kvm->lock); + + resize->error = err; + resize->prepare_done = true; + + mutex_unlock(&kvm->lock); +} + +long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, + struct kvm_ppc_resize_hpt *rhpt) +{ + unsigned long flags = rhpt->flags; + unsigned long shift = rhpt->shift; + struct kvm_resize_hpt *resize; + int ret; + + if (flags != 0) + return -EINVAL; + + if (shift && ((shift < 18) || (shift > 46))) + return -EINVAL; + + mutex_lock(&kvm->lock); + + resize = kvm->arch.resize_hpt; + + if (resize) { + if (resize->order == shift) { + /* Suitable resize in progress */ + if (resize->prepare_done) { + ret = resize->error; + if (ret != 0) + resize_hpt_release(kvm, resize); + } else { + ret = 100; /* estimated time in ms */ + } + + goto out; + } + + /* not suitable, cancel it */ + resize_hpt_release(kvm, resize); + } + + ret = 0; + if (!shift) + goto out; /* nothing to do */ + + /* start new resize */ + + resize = kzalloc(sizeof(*resize), GFP_KERNEL); + resize->order = shift; + resize->kvm = kvm; + INIT_WORK(&resize->work, resize_hpt_prepare_work); + kvm->arch.resize_hpt = resize; + + schedule_work(&resize->work); + + ret = 100; /* estimated time in ms */ + +out: + mutex_unlock(&kvm->lock); + return ret; +} + +static void resize_hpt_boot_vcpu(void *opaque) +{ + /* Nothing to do, just force a KVM exit */ +} + +long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, + struct kvm_ppc_resize_hpt *rhpt) +{ + unsigned long flags = rhpt->flags; + unsigned long shift = rhpt->shift; + struct kvm_resize_hpt *resize; + long ret; + + if (flags != 0) + return -EINVAL; + + if (shift && ((shift < 18) || (shift > 46))) + return -EINVAL; + + mutex_lock(&kvm->lock); + + resize = kvm->arch.resize_hpt; + + /* This shouldn't be possible */ + ret = -EIO; + if (WARN_ON(!kvm->arch.hpte_setup_done)) + goto out_no_hpt; + + /* Stop VCPUs from running while we mess with the HPT */ + kvm->arch.hpte_setup_done = 0; + smp_mb(); + + /* Boot all CPUs out of the guest so they re-read + * hpte_setup_done */ + on_each_cpu(resize_hpt_boot_vcpu, NULL, 1); + + ret = -ENXIO; + if (!resize || (resize->order != shift)) + goto out; + + ret = -EBUSY; + if (!resize->prepare_done) + goto out; + + ret = resize->error; + if (ret != 0) + goto out; + + ret = resize_hpt_rehash(resize); + if (ret != 0) + goto out; + + resize_hpt_pivot(resize); + +out: + /* Let VCPUs run again */ + kvm->arch.hpte_setup_done = 1; + smp_mb(); +out_no_hpt: + resize_hpt_release(kvm, resize); + mutex_unlock(&kvm->lock); + return ret; +} + +/* * Functions for reading and writing the hash table via reads and * writes on a file descriptor. * @@ -1290,8 +1696,8 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, flags = ctx->flags; i = ctx->index; - hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); - revp = kvm->arch.revmap + i; + hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE)); + revp = kvm->arch.hpt.rev + i; lbuf = (unsigned long __user *)buf; nb = 0; @@ -1306,7 +1712,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, /* Skip uninteresting entries, i.e. clean on not-first pass */ if (!first_pass) { - while (i < kvm->arch.hpt_npte && + while (i < kvmppc_hpt_npte(&kvm->arch.hpt) && !hpte_dirty(revp, hptp)) { ++i; hptp += 2; @@ -1316,7 +1722,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, hdr.index = i; /* Grab a series of valid entries */ - while (i < kvm->arch.hpt_npte && + while (i < kvmppc_hpt_npte(&kvm->arch.hpt) && hdr.n_valid < 0xffff && nb + HPTE_SIZE < count && record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { @@ -1332,7 +1738,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, ++revp; } /* Now skip invalid entries while we can */ - while (i < kvm->arch.hpt_npte && + while (i < kvmppc_hpt_npte(&kvm->arch.hpt) && hdr.n_invalid < 0xffff && record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { /* found an invalid entry */ @@ -1353,7 +1759,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, } /* Check if we've wrapped around the hash table */ - if (i >= kvm->arch.hpt_npte) { + if (i >= kvmppc_hpt_npte(&kvm->arch.hpt)) { i = 0; ctx->first_pass = 0; break; @@ -1412,11 +1818,11 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, err = -EINVAL; i = hdr.index; - if (i >= kvm->arch.hpt_npte || - i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) + if (i >= kvmppc_hpt_npte(&kvm->arch.hpt) || + i + hdr.n_valid + hdr.n_invalid > kvmppc_hpt_npte(&kvm->arch.hpt)) break; - hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE)); lbuf = (unsigned long __user *)buf; for (j = 0; j < hdr.n_valid; ++j) { __be64 hpte_v; @@ -1603,8 +2009,9 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf, kvm = p->kvm; i = p->hpt_index; - hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); - for (; len != 0 && i < kvm->arch.hpt_npte; ++i, hptp += 2) { + hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE)); + for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt); + ++i, hptp += 2) { if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))) continue; @@ -1614,7 +2021,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf, cpu_relax(); v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK; hr = be64_to_cpu(hptp[1]); - gr = kvm->arch.revmap[i].guest_rpte; + gr = kvm->arch.hpt.rev[i].guest_rpte; unlock_hpte(hptp, v); preempt_enable(); @@ -1675,7 +2082,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ - mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; + if (kvm_is_radix(vcpu->kvm)) + mmu->xlate = kvmppc_mmu_radix_xlate; + else + mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c new file mode 100644 index 000000000000..4344651f408c --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -0,0 +1,716 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> + +/* + * Supported radix tree geometry. + * Like p9, we support either 5 or 9 bits at the first (lowest) level, + * for a page size of 64k or 4k. + */ +static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; + +int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, + struct kvmppc_pte *gpte, bool data, bool iswrite) +{ + struct kvm *kvm = vcpu->kvm; + u32 pid; + int ret, level, ps; + __be64 prte, rpte; + unsigned long root, pte, index; + unsigned long rts, bits, offset; + unsigned long gpa; + unsigned long proc_tbl_size; + + /* Work out effective PID */ + switch (eaddr >> 62) { + case 0: + pid = vcpu->arch.pid; + break; + case 3: + pid = 0; + break; + default: + return -EINVAL; + } + proc_tbl_size = 1 << ((kvm->arch.process_table & PRTS_MASK) + 12); + if (pid * 16 >= proc_tbl_size) + return -EINVAL; + + /* Read partition table to find root of tree for effective PID */ + ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16, + &prte, sizeof(prte)); + if (ret) + return ret; + + root = be64_to_cpu(prte); + rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) | + ((root & RTS2_MASK) >> RTS2_SHIFT); + bits = root & RPDS_MASK; + root = root & RPDB_MASK; + + /* P9 DD1 interprets RTS (radix tree size) differently */ + offset = rts + 31; + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + offset -= 3; + + /* current implementations only support 52-bit space */ + if (offset != 52) + return -EINVAL; + + for (level = 3; level >= 0; --level) { + if (level && bits != p9_supported_radix_bits[level]) + return -EINVAL; + if (level == 0 && !(bits == 5 || bits == 9)) + return -EINVAL; + offset -= bits; + index = (eaddr >> offset) & ((1UL << bits) - 1); + /* check that low bits of page table base are zero */ + if (root & ((1UL << (bits + 3)) - 1)) + return -EINVAL; + ret = kvm_read_guest(kvm, root + index * 8, + &rpte, sizeof(rpte)); + if (ret) + return ret; + pte = __be64_to_cpu(rpte); + if (!(pte & _PAGE_PRESENT)) + return -ENOENT; + if (pte & _PAGE_PTE) + break; + bits = pte & 0x1f; + root = pte & 0x0fffffffffffff00ul; + } + /* need a leaf at lowest level; 512GB pages not supported */ + if (level < 0 || level == 3) + return -EINVAL; + + /* offset is now log base 2 of the page size */ + gpa = pte & 0x01fffffffffff000ul; + if (gpa & ((1ul << offset) - 1)) + return -EINVAL; + gpa += eaddr & ((1ul << offset) - 1); + for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps) + if (offset == mmu_psize_defs[ps].shift) + break; + gpte->page_size = ps; + + gpte->eaddr = eaddr; + gpte->raddr = gpa; + + /* Work out permissions */ + gpte->may_read = !!(pte & _PAGE_READ); + gpte->may_write = !!(pte & _PAGE_WRITE); + gpte->may_execute = !!(pte & _PAGE_EXEC); + if (kvmppc_get_msr(vcpu) & MSR_PR) { + if (pte & _PAGE_PRIVILEGED) { + gpte->may_read = 0; + gpte->may_write = 0; + gpte->may_execute = 0; + } + } else { + if (!(pte & _PAGE_PRIVILEGED)) { + /* Check AMR/IAMR to see if strict mode is in force */ + if (vcpu->arch.amr & (1ul << 62)) + gpte->may_read = 0; + if (vcpu->arch.amr & (1ul << 63)) + gpte->may_write = 0; + if (vcpu->arch.iamr & (1ul << 62)) + gpte->may_execute = 0; + } + } + + return 0; +} + +#ifdef CONFIG_PPC_64K_PAGES +#define MMU_BASE_PSIZE MMU_PAGE_64K +#else +#define MMU_BASE_PSIZE MMU_PAGE_4K +#endif + +static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, + unsigned int pshift) +{ + int psize = MMU_BASE_PSIZE; + + if (pshift >= PMD_SHIFT) + psize = MMU_PAGE_2M; + addr &= ~0xfffUL; + addr |= mmu_psize_defs[psize].ap << 5; + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + asm volatile("ptesync": : :"memory"); +} + +unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, + unsigned long clr, unsigned long set, + unsigned long addr, unsigned int shift) +{ + unsigned long old = 0; + + if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) && + pte_present(*ptep)) { + /* have to invalidate it first */ + old = __radix_pte_update(ptep, _PAGE_PRESENT, 0); + kvmppc_radix_tlbie_page(kvm, addr, shift); + set |= _PAGE_PRESENT; + old &= _PAGE_PRESENT; + } + return __radix_pte_update(ptep, clr, set) | old; +} + +void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + radix__set_pte_at(kvm->mm, addr, ptep, pte, 0); +} + +static struct kmem_cache *kvm_pte_cache; + +static pte_t *kvmppc_pte_alloc(void) +{ + return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); +} + +static void kvmppc_pte_free(pte_t *ptep) +{ + kmem_cache_free(kvm_pte_cache, ptep); +} + +static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, + unsigned int level, unsigned long mmu_seq) +{ + pgd_t *pgd; + pud_t *pud, *new_pud = NULL; + pmd_t *pmd, *new_pmd = NULL; + pte_t *ptep, *new_ptep = NULL; + unsigned long old; + int ret; + + /* Traverse the guest's 2nd-level tree, allocate new levels needed */ + pgd = kvm->arch.pgtable + pgd_index(gpa); + pud = NULL; + if (pgd_present(*pgd)) + pud = pud_offset(pgd, gpa); + else + new_pud = pud_alloc_one(kvm->mm, gpa); + + pmd = NULL; + if (pud && pud_present(*pud)) + pmd = pmd_offset(pud, gpa); + else + new_pmd = pmd_alloc_one(kvm->mm, gpa); + + if (level == 0 && !(pmd && pmd_present(*pmd))) + new_ptep = kvmppc_pte_alloc(); + + /* Check if we might have been invalidated; let the guest retry if so */ + spin_lock(&kvm->mmu_lock); + ret = -EAGAIN; + if (mmu_notifier_retry(kvm, mmu_seq)) + goto out_unlock; + + /* Now traverse again under the lock and change the tree */ + ret = -ENOMEM; + if (pgd_none(*pgd)) { + if (!new_pud) + goto out_unlock; + pgd_populate(kvm->mm, pgd, new_pud); + new_pud = NULL; + } + pud = pud_offset(pgd, gpa); + if (pud_none(*pud)) { + if (!new_pmd) + goto out_unlock; + pud_populate(kvm->mm, pud, new_pmd); + new_pmd = NULL; + } + pmd = pmd_offset(pud, gpa); + if (pmd_large(*pmd)) { + /* Someone else has instantiated a large page here; retry */ + ret = -EAGAIN; + goto out_unlock; + } + if (level == 1 && !pmd_none(*pmd)) { + /* + * There's a page table page here, but we wanted + * to install a large page. Tell the caller and let + * it try installing a normal page if it wants. + */ + ret = -EBUSY; + goto out_unlock; + } + if (level == 0) { + if (pmd_none(*pmd)) { + if (!new_ptep) + goto out_unlock; + pmd_populate(kvm->mm, pmd, new_ptep); + new_ptep = NULL; + } + ptep = pte_offset_kernel(pmd, gpa); + if (pte_present(*ptep)) { + /* PTE was previously valid, so invalidate it */ + old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, + 0, gpa, 0); + kvmppc_radix_tlbie_page(kvm, gpa, 0); + if (old & _PAGE_DIRTY) + mark_page_dirty(kvm, gpa >> PAGE_SHIFT); + } + kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); + } else { + kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); + } + ret = 0; + + out_unlock: + spin_unlock(&kvm->mmu_lock); + if (new_pud) + pud_free(kvm->mm, new_pud); + if (new_pmd) + pmd_free(kvm->mm, new_pmd); + if (new_ptep) + kvmppc_pte_free(new_ptep); + return ret; +} + +int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned long ea, unsigned long dsisr) +{ + struct kvm *kvm = vcpu->kvm; + unsigned long mmu_seq, pte_size; + unsigned long gpa, gfn, hva, pfn; + struct kvm_memory_slot *memslot; + struct page *page = NULL, *pages[1]; + long ret, npages, ok; + unsigned int writing; + struct vm_area_struct *vma; + unsigned long flags; + pte_t pte, *ptep; + unsigned long pgflags; + unsigned int shift, level; + + /* Check for unusual errors */ + if (dsisr & DSISR_UNSUPP_MMU) { + pr_err("KVM: Got unsupported MMU fault\n"); + return -EFAULT; + } + if (dsisr & DSISR_BADACCESS) { + /* Reflect to the guest as DSI */ + pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr); + kvmppc_core_queue_data_storage(vcpu, ea, dsisr); + return RESUME_GUEST; + } + + /* Translate the logical address and get the page */ + gpa = vcpu->arch.fault_gpa & ~0xfffUL; + gpa &= ~0xF000000000000000ul; + gfn = gpa >> PAGE_SHIFT; + if (!(dsisr & DSISR_PGDIRFAULT)) + gpa |= ea & 0xfff; + memslot = gfn_to_memslot(kvm, gfn); + + /* No memslot means it's an emulated MMIO region */ + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { + if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS | + DSISR_SET_RC)) { + /* + * Bad address in guest page table tree, or other + * unusual error - reflect it to the guest as DSI. + */ + kvmppc_core_queue_data_storage(vcpu, ea, dsisr); + return RESUME_GUEST; + } + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + dsisr & DSISR_ISSTORE); + } + + /* used to check for invalidations in progress */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + + writing = (dsisr & DSISR_ISSTORE) != 0; + hva = gfn_to_hva_memslot(memslot, gfn); + if (dsisr & DSISR_SET_RC) { + /* + * Need to set an R or C bit in the 2nd-level tables; + * if the relevant bits aren't already set in the linux + * page tables, fall through to do the gup_fast to + * set them in the linux page tables too. + */ + ok = 0; + pgflags = _PAGE_ACCESSED; + if (writing) + pgflags |= _PAGE_DIRTY; + local_irq_save(flags); + ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva, + NULL, NULL); + if (ptep) { + pte = READ_ONCE(*ptep); + if (pte_present(pte) && + (pte_val(pte) & pgflags) == pgflags) + ok = 1; + } + local_irq_restore(flags); + if (ok) { + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { + spin_unlock(&kvm->mmu_lock); + return RESUME_GUEST; + } + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, + gpa, NULL, &shift); + if (ptep && pte_present(*ptep)) { + kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, + gpa, shift); + spin_unlock(&kvm->mmu_lock); + return RESUME_GUEST; + } + spin_unlock(&kvm->mmu_lock); + } + } + + ret = -EFAULT; + pfn = 0; + pte_size = PAGE_SIZE; + pgflags = _PAGE_READ | _PAGE_EXEC; + level = 0; + npages = get_user_pages_fast(hva, 1, writing, pages); + if (npages < 1) { + /* Check if it's an I/O mapping */ + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, hva); + if (vma && vma->vm_start <= hva && hva < vma->vm_end && + (vma->vm_flags & VM_PFNMAP)) { + pfn = vma->vm_pgoff + + ((hva - vma->vm_start) >> PAGE_SHIFT); + pgflags = pgprot_val(vma->vm_page_prot); + } + up_read(¤t->mm->mmap_sem); + if (!pfn) + return -EFAULT; + } else { + page = pages[0]; + pfn = page_to_pfn(page); + if (PageHuge(page)) { + page = compound_head(page); + pte_size <<= compound_order(page); + /* See if we can insert a 2MB large-page PTE here */ + if (pte_size >= PMD_SIZE && + (gpa & PMD_MASK & PAGE_MASK) == + (hva & PMD_MASK & PAGE_MASK)) { + level = 1; + pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); + } + } + /* See if we can provide write access */ + if (writing) { + /* + * We assume gup_fast has set dirty on the host PTE. + */ + pgflags |= _PAGE_WRITE; + } else { + local_irq_save(flags); + ptep = __find_linux_pte_or_hugepte(current->mm->pgd, + hva, NULL, NULL); + if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) + pgflags |= _PAGE_WRITE; + local_irq_restore(flags); + } + } + + /* + * Compute the PTE value that we need to insert. + */ + pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED; + if (pgflags & _PAGE_WRITE) + pgflags |= _PAGE_DIRTY; + pte = pfn_pte(pfn, __pgprot(pgflags)); + + /* Allocate space in the tree and write the PTE */ + ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); + if (ret == -EBUSY) { + /* + * There's already a PMD where wanted to install a large page; + * for now, fall back to installing a small page. + */ + level = 0; + pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1); + pte = pfn_pte(pfn, __pgprot(pgflags)); + ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); + } + if (ret == 0 || ret == -EAGAIN) + ret = RESUME_GUEST; + + if (page) { + /* + * We drop pages[0] here, not page because page might + * have been set to the head page of a compound, but + * we have to drop the reference on the correct tail + * page to match the get inside gup() + */ + put_page(pages[0]); + } + return ret; +} + +static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn, unsigned int order) +{ + unsigned long i, limit; + unsigned long *dp; + + if (!memslot->dirty_bitmap) + return; + limit = 1ul << order; + if (limit < BITS_PER_LONG) { + for (i = 0; i < limit; ++i) + mark_page_dirty(kvm, gfn + i); + return; + } + dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn); + limit /= BITS_PER_LONG; + for (i = 0; i < limit; ++i) + *dp++ = ~0ul; +} + +/* Called with kvm->lock held */ +int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) +{ + pte_t *ptep; + unsigned long gpa = gfn << PAGE_SHIFT; + unsigned int shift; + unsigned long old; + + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, + NULL, &shift); + if (ptep && pte_present(*ptep)) { + old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, + gpa, shift); + kvmppc_radix_tlbie_page(kvm, gpa, shift); + if (old & _PAGE_DIRTY) { + if (!shift) + mark_page_dirty(kvm, gfn); + else + mark_pages_dirty(kvm, memslot, + gfn, shift - PAGE_SHIFT); + } + } + return 0; +} + +/* Called with kvm->lock held */ +int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) +{ + pte_t *ptep; + unsigned long gpa = gfn << PAGE_SHIFT; + unsigned int shift; + int ref = 0; + + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, + NULL, &shift); + if (ptep && pte_present(*ptep) && pte_young(*ptep)) { + kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, + gpa, shift); + /* XXX need to flush tlb here? */ + ref = 1; + } + return ref; +} + +/* Called with kvm->lock held */ +int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) +{ + pte_t *ptep; + unsigned long gpa = gfn << PAGE_SHIFT; + unsigned int shift; + int ref = 0; + + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, + NULL, &shift); + if (ptep && pte_present(*ptep) && pte_young(*ptep)) + ref = 1; + return ref; +} + +/* Returns the number of PAGE_SIZE pages that are dirty */ +static int kvm_radix_test_clear_dirty(struct kvm *kvm, + struct kvm_memory_slot *memslot, int pagenum) +{ + unsigned long gfn = memslot->base_gfn + pagenum; + unsigned long gpa = gfn << PAGE_SHIFT; + pte_t *ptep; + unsigned int shift; + int ret = 0; + + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, + NULL, &shift); + if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { + ret = 1; + if (shift) + ret = 1 << (shift - PAGE_SHIFT); + kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, + gpa, shift); + kvmppc_radix_tlbie_page(kvm, gpa, shift); + } + return ret; +} + +long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, + struct kvm_memory_slot *memslot, unsigned long *map) +{ + unsigned long i, j; + unsigned long n, *p; + int npages; + + /* + * Radix accumulates dirty bits in the first half of the + * memslot's dirty_bitmap area, for when pages are paged + * out or modified by the host directly. Pick up these + * bits and add them to the map. + */ + n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long); + p = memslot->dirty_bitmap; + for (i = 0; i < n; ++i) + map[i] |= xchg(&p[i], 0); + + for (i = 0; i < memslot->npages; i = j) { + npages = kvm_radix_test_clear_dirty(kvm, memslot, i); + + /* + * Note that if npages > 0 then i must be a multiple of npages, + * since huge pages are only used to back the guest at guest + * real addresses that are a multiple of their size. + * Since we have at most one PTE covering any given guest + * real address, if npages > 1 we can skip to i + npages. + */ + j = i + 1; + if (npages) + for (j = i; npages; ++j, --npages) + __set_bit_le(j, map); + } + return 0; +} + +static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info, + int psize, int *indexp) +{ + if (!mmu_psize_defs[psize].shift) + return; + info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift | + (mmu_psize_defs[psize].ap << 29); + ++(*indexp); +} + +int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info) +{ + int i; + + if (!radix_enabled()) + return -EINVAL; + memset(info, 0, sizeof(*info)); + + /* 4k page size */ + info->geometries[0].page_shift = 12; + info->geometries[0].level_bits[0] = 9; + for (i = 1; i < 4; ++i) + info->geometries[0].level_bits[i] = p9_supported_radix_bits[i]; + /* 64k page size */ + info->geometries[1].page_shift = 16; + for (i = 0; i < 4; ++i) + info->geometries[1].level_bits[i] = p9_supported_radix_bits[i]; + + i = 0; + add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i); + add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i); + add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i); + add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i); + + return 0; +} + +int kvmppc_init_vm_radix(struct kvm *kvm) +{ + kvm->arch.pgtable = pgd_alloc(kvm->mm); + if (!kvm->arch.pgtable) + return -ENOMEM; + return 0; +} + +void kvmppc_free_radix(struct kvm *kvm) +{ + unsigned long ig, iu, im; + pte_t *pte; + pmd_t *pmd; + pud_t *pud; + pgd_t *pgd; + + if (!kvm->arch.pgtable) + return; + pgd = kvm->arch.pgtable; + for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { + if (!pgd_present(*pgd)) + continue; + pud = pud_offset(pgd, 0); + for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { + if (!pud_present(*pud)) + continue; + pmd = pmd_offset(pud, 0); + for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { + if (pmd_huge(*pmd)) { + pmd_clear(pmd); + continue; + } + if (!pmd_present(*pmd)) + continue; + pte = pte_offset_map(pmd, 0); + memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); + kvmppc_pte_free(pte); + pmd_clear(pmd); + } + pmd_free(kvm->mm, pmd_offset(pud, 0)); + pud_clear(pud); + } + pud_free(kvm->mm, pud_offset(pgd, 0)); + pgd_clear(pgd); + } + pgd_free(kvm->mm, kvm->arch.pgtable); +} + +static void pte_ctor(void *addr) +{ + memset(addr, 0, PTE_TABLE_SIZE); +} + +int kvmppc_radix_init(void) +{ + unsigned long size = sizeof(void *) << PTE_INDEX_SIZE; + + kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor); + if (!kvm_pte_cache) + return -ENOMEM; + return 0; +} + +void kvmppc_radix_exit(void) +{ + kmem_cache_destroy(kvm_pte_cache); +} diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8dcbe37a4dac..1e107ece4e37 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -39,7 +39,7 @@ #include <asm/cputable.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -182,7 +182,8 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) ++vcpu->stat.halt_wakeup; } - if (kvmppc_ipi_thread(vcpu->arch.thread_cpu)) + cpu = READ_ONCE(vcpu->arch.thread_cpu); + if (cpu >= 0 && kvmppc_ipi_thread(cpu)) return; /* CPU points to the first thread of the core */ @@ -773,12 +774,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) } tvcpu->arch.prodded = 1; smp_mb(); - if (vcpu->arch.ceded) { - if (swait_active(&vcpu->wq)) { - swake_up(&vcpu->wq); - vcpu->stat.halt_wakeup++; - } - } + if (tvcpu->arch.ceded) + kvmppc_fast_vcpu_kick_hv(tvcpu); break; case H_CONFER: target = kvmppc_get_gpr(vcpu, 4); @@ -1135,7 +1132,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, /* * Userspace can only modify DPFD (default prefetch depth), * ILE (interrupt little-endian) and TC (translation control). - * On POWER8 userspace can also modify AIL (alt. interrupt loc.) + * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.). */ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; if (cpu_has_feature(CPU_FTR_ARCH_207S)) @@ -1821,6 +1818,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, vcpu->arch.vcore = vcore; vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; vcpu->arch.thread_cpu = -1; + vcpu->arch.prev_cpu = -1; vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); @@ -1872,8 +1870,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) } dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC / tb_ticks_per_sec; - hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), - HRTIMER_MODE_REL); + hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL); vcpu->arch.timer_running = 1; } @@ -1951,11 +1948,33 @@ static void kvmppc_release_hwthread(int cpu) tpaca->kvm_hstate.kvm_split_mode = NULL; } +static void do_nothing(void *x) +{ +} + +static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) +{ + int i; + + cpu = cpu_first_thread_sibling(cpu); + cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); + /* + * Make sure setting of bit in need_tlb_flush precedes + * testing of cpu_in_guest bits. The matching barrier on + * the other side is the first smp_mb() in kvmppc_run_core(). + */ + smp_mb(); + for (i = 0; i < threads_per_core; ++i) + if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest)) + smp_call_function_single(cpu + i, do_nothing, NULL, 1); +} + static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { int cpu; struct paca_struct *tpaca; struct kvmppc_vcore *mvc = vc->master_vcore; + struct kvm *kvm = vc->kvm; cpu = vc->pcpu; if (vcpu) { @@ -1966,6 +1985,27 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) cpu += vcpu->arch.ptid; vcpu->cpu = mvc->pcpu; vcpu->arch.thread_cpu = cpu; + + /* + * With radix, the guest can do TLB invalidations itself, + * and it could choose to use the local form (tlbiel) if + * it is invalidating a translation that has only ever been + * used on one vcpu. However, that doesn't mean it has + * only ever been used on one physical cpu, since vcpus + * can move around between pcpus. To cope with this, when + * a vcpu moves from one pcpu to another, we need to tell + * any vcpus running on the same core as this vcpu previously + * ran to flush the TLB. The TLB is shared between threads, + * so we use a single bit in .need_tlb_flush for all 4 threads. + */ + if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) { + if (vcpu->arch.prev_cpu >= 0 && + cpu_first_thread_sibling(vcpu->arch.prev_cpu) != + cpu_first_thread_sibling(cpu)) + radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu); + vcpu->arch.prev_cpu = cpu; + } + cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); } tpaca = &paca[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; @@ -2553,6 +2593,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_release_hwthread(pcpu + i); if (sip && sip->napped[i]) kvmppc_ipi_thread(pcpu + i); + cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); } kvmppc_set_host_core(pcpu); @@ -2621,7 +2662,8 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) int i; for_each_runnable_thread(i, vcpu, vc) { - if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded || + vcpu->arch.prodded) return 1; } @@ -2807,7 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) break; n_ceded = 0; for_each_runnable_thread(i, v, vc) { - if (!v->arch.pending_exceptions) + if (!v->arch.pending_exceptions && !v->arch.prodded) n_ceded += v->arch.ceded; else v->arch.ceded = 0; @@ -2878,7 +2920,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) smp_mb(); /* On the first time here, set up HTAB and VRMA */ - if (!vcpu->kvm->arch.hpte_setup_done) { + if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) { r = kvmppc_hv_setup_htab_rma(vcpu); if (r) goto out; @@ -2940,6 +2982,13 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, { struct kvm_ppc_one_seg_page_size *sps; + /* + * Since we don't yet support HPT guests on a radix host, + * return an error if the host uses radix. + */ + if (radix_enabled()) + return -EINVAL; + info->flags = KVM_PPC_PAGE_SIZES_REAL; if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) info->flags |= KVM_PPC_1T_SEGMENTS; @@ -2962,8 +3011,10 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; - int r; + int i, r; unsigned long n; + unsigned long *buf; + struct kvm_vcpu *vcpu; mutex_lock(&kvm->slots_lock); @@ -2977,15 +3028,32 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; + /* + * Use second half of bitmap area because radix accumulates + * bits in the first half. + */ n = kvm_dirty_bitmap_bytes(memslot); - memset(memslot->dirty_bitmap, 0, n); + buf = memslot->dirty_bitmap + n / sizeof(long); + memset(buf, 0, n); - r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); + if (kvm_is_radix(kvm)) + r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf); + else + r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf); if (r) goto out; + /* Harvest dirty bits from VPA and DTL updates */ + /* Note: we never modify the SLB shadow buffer areas */ + kvm_for_each_vcpu(i, vcpu, kvm) { + spin_lock(&vcpu->arch.vpa_update_lock); + kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf); + kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf); + spin_unlock(&vcpu->arch.vpa_update_lock); + } + r = -EFAULT; - if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) + if (copy_to_user(log->dirty_bitmap, buf, n)) goto out; r = 0; @@ -3006,6 +3074,15 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, unsigned long npages) { + /* + * For now, if radix_enabled() then we only support radix guests, + * and in that case we don't need the rmap array. + */ + if (radix_enabled()) { + slot->arch.rmap = NULL; + return 0; + } + slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); if (!slot->arch.rmap) return -ENOMEM; @@ -3038,7 +3115,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, if (npages) atomic64_inc(&kvm->arch.mmio_update); - if (npages && old->npages) { + if (npages && old->npages && !kvm_is_radix(kvm)) { /* * If modifying a memslot, reset all the rmap dirty bits. * If this is a new memslot, we don't need to do anything @@ -3047,7 +3124,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, */ slots = kvm_memslots(kvm); memslot = id_to_memslot(slots, mem->slot); - kvmppc_hv_get_dirty_log(kvm, memslot, NULL); + kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL); } } @@ -3086,14 +3163,20 @@ static void kvmppc_setup_partition_table(struct kvm *kvm) { unsigned long dw0, dw1; - /* PS field - page size for VRMA */ - dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | - ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); - /* HTABSIZE and HTABORG fields */ - dw0 |= kvm->arch.sdr1; + if (!kvm_is_radix(kvm)) { + /* PS field - page size for VRMA */ + dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | + ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); + /* HTABSIZE and HTABORG fields */ + dw0 |= kvm->arch.sdr1; - /* Second dword has GR=0; other fields are unused since UPRT=0 */ - dw1 = 0; + /* Second dword as set by userspace */ + dw1 = kvm->arch.process_table; + } else { + dw0 = PATB_HR | radix__get_tree_size() | + __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE; + dw1 = PATB_GR | kvm->arch.process_table; + } mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); } @@ -3114,12 +3197,23 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) goto out; /* another vcpu beat us to it */ /* Allocate hashed page table (if not done already) and reset it */ - if (!kvm->arch.hpt_virt) { - err = kvmppc_alloc_hpt(kvm, NULL); - if (err) { + if (!kvm->arch.hpt.virt) { + int order = KVM_DEFAULT_HPT_ORDER; + struct kvm_hpt_info info; + + err = kvmppc_allocate_hpt(&info, order); + /* If we get here, it means userspace didn't specify a + * size explicitly. So, try successively smaller + * sizes if the default failed. */ + while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER) + err = kvmppc_allocate_hpt(&info, order); + + if (err < 0) { pr_err("KVM: Couldn't alloc HPT\n"); goto out; } + + kvmppc_set_hpt(kvm, &info); } /* Look up the memslot for guest physical address 0 */ @@ -3263,6 +3357,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) { unsigned long lpcr, lpid; char buf[32]; + int ret; /* Allocate the guest's logical partition ID */ @@ -3310,13 +3405,33 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) lpcr |= LPCR_HVICE; } + /* + * For now, if the host uses radix, the guest must be radix. + */ + if (radix_enabled()) { + kvm->arch.radix = 1; + lpcr &= ~LPCR_VPM1; + lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; + ret = kvmppc_init_vm_radix(kvm); + if (ret) { + kvmppc_free_lpid(kvm->arch.lpid); + return ret; + } + kvmppc_setup_partition_table(kvm); + } + kvm->arch.lpcr = lpcr; + /* Initialization for future HPT resizes */ + kvm->arch.resize_hpt = NULL; + /* * Work out how many sets the TLB has, for the use of * the TLB invalidation loop in book3s_hv_rmhandlers.S. */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (kvm_is_radix(kvm)) + kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */ + else if (cpu_has_feature(CPU_FTR_ARCH_300)) kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ else if (cpu_has_feature(CPU_FTR_ARCH_207S)) kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */ @@ -3326,8 +3441,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) /* * Track that we now have a HV mode VM active. This blocks secondary * CPU threads from coming online. + * On POWER9, we only need to do this for HPT guests on a radix + * host, which is not yet supported. */ - kvm_hv_vm_activated(); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + kvm_hv_vm_activated(); /* * Create a debugfs directory for the VM @@ -3353,11 +3471,17 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) { debugfs_remove_recursive(kvm->arch.debugfs_dir); - kvm_hv_vm_deactivated(); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + kvm_hv_vm_deactivated(); kvmppc_free_vcores(kvm); - kvmppc_free_hpt(kvm); + kvmppc_free_lpid(kvm->arch.lpid); + + if (kvm_is_radix(kvm)) + kvmppc_free_radix(kvm); + else + kvmppc_free_hpt(&kvm->arch.hpt); kvmppc_free_pimap(kvm); } @@ -3386,11 +3510,6 @@ static int kvmppc_core_check_processor_compat_hv(void) if (!cpu_has_feature(CPU_FTR_HVMODE) || !cpu_has_feature(CPU_FTR_ARCH_206)) return -EIO; - /* - * Disable KVM for Power9 in radix mode. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled()) - return -EIO; return 0; } @@ -3588,12 +3707,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, r = -EFAULT; if (get_user(htab_order, (u32 __user *)argp)) break; - r = kvmppc_alloc_reset_hpt(kvm, &htab_order); + r = kvmppc_alloc_reset_hpt(kvm, htab_order); if (r) break; - r = -EFAULT; - if (put_user(htab_order, (u32 __user *)argp)) - break; r = 0; break; } @@ -3608,6 +3724,28 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, break; } + case KVM_PPC_RESIZE_HPT_PREPARE: { + struct kvm_ppc_resize_hpt rhpt; + + r = -EFAULT; + if (copy_from_user(&rhpt, argp, sizeof(rhpt))) + break; + + r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt); + break; + } + + case KVM_PPC_RESIZE_HPT_COMMIT: { + struct kvm_ppc_resize_hpt rhpt; + + r = -EFAULT; + if (copy_from_user(&rhpt, argp, sizeof(rhpt))) + break; + + r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt); + break; + } + default: r = -ENOTTY; } @@ -3658,6 +3796,41 @@ static void init_default_hcalls(void) } } +static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) +{ + unsigned long lpcr; + int radix; + + /* If not on a POWER9, reject it */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + + /* If any unknown flags set, reject it */ + if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE)) + return -EINVAL; + + /* We can't change a guest to/from radix yet */ + radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX); + if (radix != kvm_is_radix(kvm)) + return -EINVAL; + + /* GR (guest radix) bit in process_table field must match */ + if (!!(cfg->process_table & PATB_GR) != radix) + return -EINVAL; + + /* Process table size field must be reasonable, i.e. <= 24 */ + if ((cfg->process_table & PRTS_MASK) > 24) + return -EINVAL; + + kvm->arch.process_table = cfg->process_table; + kvmppc_setup_partition_table(kvm); + + lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0; + kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE); + + return 0; +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -3695,6 +3868,8 @@ static struct kvmppc_ops kvm_ops_hv = { .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, #endif + .configure_mmu = kvmhv_configure_mmu, + .get_rmmu_info = kvmhv_get_rmmu_info, }; static int kvm_init_subcore_bitmap(void) @@ -3729,6 +3904,11 @@ static int kvm_init_subcore_bitmap(void) return 0; } +static int kvmppc_radix_possible(void) +{ + return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled(); +} + static int kvmppc_book3s_init_hv(void) { int r; @@ -3768,12 +3948,19 @@ static int kvmppc_book3s_init_hv(void) init_vcore_lists(); r = kvmppc_mmu_hv_init(); + if (r) + return r; + + if (kvmppc_radix_possible()) + r = kvmppc_radix_init(); return r; } static void kvmppc_book3s_exit_hv(void) { kvmppc_free_host_rm_ops(); + if (kvmppc_radix_possible()) + kvmppc_radix_exit(); kvmppc_hv_ops = NULL; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 5bb24be0b346..c42a7e63b39e 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -52,19 +52,19 @@ static int __init early_parse_kvm_cma_resv(char *p) } early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); -struct page *kvm_alloc_hpt(unsigned long nr_pages) +struct page *kvm_alloc_hpt_cma(unsigned long nr_pages) { VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES)); } -EXPORT_SYMBOL_GPL(kvm_alloc_hpt); +EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma); -void kvm_release_hpt(struct page *page, unsigned long nr_pages) +void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages) { cma_release(kvm_cma, page, nr_pages); } -EXPORT_SYMBOL_GPL(kvm_release_hpt); +EXPORT_SYMBOL_GPL(kvm_free_hpt_cma); /** * kvm_cma_reserve() - reserve area for kvm hash pagetable @@ -200,7 +200,6 @@ static inline void rm_writeb(unsigned long paddr, u8 val) /* * Send an interrupt or message to another CPU. - * This can only be called in real mode. * The caller needs to include any barrier needed to order writes * to memory vs. the IPI/message. */ @@ -229,8 +228,7 @@ void kvmhv_rm_send_ipi(int cpu) if (xics_phys) rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); else - opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), - IPI_PRIORITY); + opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY); } /* @@ -412,14 +410,13 @@ static long kvmppc_read_one_intr(bool *again) /* Now read the interrupt from the ICP */ xics_phys = local_paca->kvm_hstate.xics_phys; - if (!xics_phys) { - /* Use OPAL to read the XIRR */ - rc = opal_rm_int_get_xirr(&xirr, false); - if (rc < 0) - return 1; - } else { + rc = 0; + if (!xics_phys) + rc = opal_int_get_xirr(&xirr, false); + else xirr = _lwzcix(xics_phys + XICS_XIRR); - } + if (rc < 0) + return 1; /* * Save XIRR for later. Since we get control in reverse endian @@ -445,15 +442,16 @@ static long kvmppc_read_one_intr(bool *again) * If it is an IPI, clear the MFRR and EOI it. */ if (xisr == XICS_IPI) { + rc = 0; if (xics_phys) { _stbcix(xics_phys + XICS_MFRR, 0xff); _stwcix(xics_phys + XICS_XIRR, xirr); } else { - opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); - rc = opal_rm_int_eoi(h_xirr); - /* If rc > 0, there is another interrupt pending */ - *again = rc > 0; + opal_int_set_mfrr(hard_smp_processor_id(), 0xff); + rc = opal_int_eoi(h_xirr); } + /* If rc > 0, there is another interrupt pending */ + *again = rc > 0; /* * Need to ensure side effects of above stores @@ -474,8 +472,8 @@ static long kvmppc_read_one_intr(bool *again) if (xics_phys) _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); else - opal_rm_int_set_mfrr(hard_smp_processor_id(), - IPI_PRIORITY); + opal_int_set_mfrr(hard_smp_processor_id(), + IPI_PRIORITY); /* Let side effects complete */ smp_mb(); return 1; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 9ef3c4be952f..6fca970373ee 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -43,6 +43,7 @@ static void *real_vmalloc_addr(void *x) static int global_invalidates(struct kvm *kvm, unsigned long flags) { int global; + int cpu; /* * If there is only one vcore, and it's currently running, @@ -60,8 +61,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags) /* any other core might now have stale TLB entries... */ smp_wmb(); cpumask_setall(&kvm->arch.need_tlb_flush); - cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu, - &kvm->arch.need_tlb_flush); + cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; + /* + * On POWER9, threads are independent but the TLB is shared, + * so use the bit for the first thread to represent the core. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + cpu = cpu_first_thread_sibling(cpu); + cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); } return global; @@ -79,10 +86,10 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, if (*rmap & KVMPPC_RMAP_PRESENT) { i = *rmap & KVMPPC_RMAP_INDEX; - head = &kvm->arch.revmap[i]; + head = &kvm->arch.hpt.rev[i]; if (realmode) head = real_vmalloc_addr(head); - tail = &kvm->arch.revmap[head->back]; + tail = &kvm->arch.hpt.rev[head->back]; if (realmode) tail = real_vmalloc_addr(tail); rev->forw = i; @@ -147,8 +154,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, lock_rmap(rmap); head = *rmap & KVMPPC_RMAP_INDEX; - next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]); - prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]); + next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]); + prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]); next->back = rev->back; prev->forw = rev->forw; if (head == pte_index) { @@ -182,6 +189,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, unsigned long mmu_seq; unsigned long rcbits, irq_flags = 0; + if (kvm_is_radix(kvm)) + return H_FUNCTION; psize = hpte_page_size(pteh, ptel); if (!psize) return H_PARAMETER; @@ -283,11 +292,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Find and lock the HPTEG slot to use */ do_insert: - if (pte_index >= kvm->arch.hpt_npte) + if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; - hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); for (i = 0; i < 8; ++i) { if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | @@ -318,7 +327,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte_index += i; } else { - hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) { /* Lock the slot and check again */ @@ -335,7 +344,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } /* Save away the guest's idea of the second HPTE dword */ - rev = &kvm->arch.revmap[pte_index]; + rev = &kvm->arch.hpt.rev[pte_index]; if (realmode) rev = real_vmalloc_addr(rev); if (rev) { @@ -458,9 +467,11 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, struct revmap_entry *rev; u64 pte, orig_pte, pte_r; - if (pte_index >= kvm->arch.hpt_npte) + if (kvm_is_radix(kvm)) + return H_FUNCTION; + if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) return H_PARAMETER; - hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); pte = orig_pte = be64_to_cpu(hpte[0]); @@ -476,7 +487,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, return H_NOT_FOUND; } - rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); @@ -529,6 +540,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) struct revmap_entry *rev, *revs[4]; u64 hp0, hp1; + if (kvm_is_radix(kvm)) + return H_FUNCTION; global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { n = 0; @@ -544,13 +557,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) break; } if (req != 1 || flags == 3 || - pte_index >= kvm->arch.hpt_npte) { + pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) { /* parameter error */ args[j] = ((0xa0 | flags) << 56) + pte_index; ret = H_PARAMETER; break; } - hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4)); + hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4)); /* to avoid deadlock, don't spin except for first */ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { if (n) @@ -587,7 +600,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) } args[j] = ((0x80 | flags) << 56) + pte_index; - rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); note_hpte_modification(kvm, rev); if (!(hp0 & HPTE_V_VALID)) { @@ -642,10 +655,12 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long v, r, rb, mask, bits; u64 pte_v, pte_r; - if (pte_index >= kvm->arch.hpt_npte) + if (kvm_is_radix(kvm)) + return H_FUNCTION; + if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) return H_PARAMETER; - hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); v = pte_v = be64_to_cpu(hpte[0]); @@ -665,7 +680,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, /* Update guest view of 2nd HPTE dword */ mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI | HPTE_R_KEY_LO; - rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); if (rev) { r = (rev->guest_rpte & ~mask) | bits; rev->guest_rpte = r; @@ -711,15 +726,17 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, int i, n = 1; struct revmap_entry *rev = NULL; - if (pte_index >= kvm->arch.hpt_npte) + if (kvm_is_radix(kvm)) + return H_FUNCTION; + if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) return H_PARAMETER; if (flags & H_READ_4) { pte_index &= ~3; n = 4; } - rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); for (i = 0; i < n; ++i, ++pte_index) { - hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[1]); if (cpu_has_feature(CPU_FTR_ARCH_300)) { @@ -750,11 +767,13 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long *rmap; long ret = H_NOT_FOUND; - if (pte_index >= kvm->arch.hpt_npte) + if (kvm_is_radix(kvm)) + return H_FUNCTION; + if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) return H_PARAMETER; - rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); - hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); + hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); v = be64_to_cpu(hpte[0]); @@ -796,11 +815,13 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long *rmap; long ret = H_NOT_FOUND; - if (pte_index >= kvm->arch.hpt_npte) + if (kvm_is_radix(kvm)) + return H_FUNCTION; + if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) return H_PARAMETER; - rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); - hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); + hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); v = be64_to_cpu(hpte[0]); @@ -949,7 +970,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, somask = (1UL << 28) - 1; vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; } - hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask; + hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); avpn = slb_v & ~(somask >> 16); /* also includes B */ avpn |= (eaddr & somask) >> 16; @@ -960,7 +981,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, val |= avpn; for (;;) { - hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7)); + hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ @@ -996,7 +1017,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, if (val & HPTE_V_SECONDARY) break; val |= HPTE_V_SECONDARY; - hash = hash ^ kvm->arch.hpt_mask; + hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); } return -1; } @@ -1045,14 +1066,14 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, return status; /* there really was no HPTE */ return 0; /* for prot fault, HPTE disappeared */ } - hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[1]); if (cpu_has_feature(CPU_FTR_ARCH_300)) { v = hpte_new_to_old_v(v, r); r = hpte_new_to_old_r(r); } - rev = real_vmalloc_addr(&kvm->arch.revmap[index]); + rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]); gr = rev->guest_rpte; unlock_hpte(hpte, orig_v); diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 06edc4366639..e78542d99cd6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -35,8 +35,8 @@ int kvm_irq_bypass = 1; EXPORT_SYMBOL(kvm_irq_bypass); static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, - u32 new_irq); -static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu); + u32 new_irq, bool check_resend); +static int xics_opal_set_server(unsigned int hw_irq, int server_cpu); /* -- ICS routines -- */ static void ics_rm_check_resend(struct kvmppc_xics *xics, @@ -44,20 +44,12 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics, { int i; - arch_spin_lock(&ics->lock); - for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { struct ics_irq_state *state = &ics->irq_state[i]; - - if (!state->resend) - continue; - - arch_spin_unlock(&ics->lock); - icp_rm_deliver_irq(xics, icp, state->number); - arch_spin_lock(&ics->lock); + if (state->resend) + icp_rm_deliver_irq(xics, icp, state->number, true); } - arch_spin_unlock(&ics->lock); } /* -- ICP routines -- */ @@ -70,11 +62,9 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) hcpu = hcore << threads_shift; kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); - if (paca[hcpu].kvm_hstate.xics_phys) - icp_native_cause_ipi_rm(hcpu); - else - opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu), - IPI_PRIORITY); + kvmppc_set_host_ipi(hcpu, 1); + smp_mb(); + kvmhv_rm_send_ipi(hcpu); } #else static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } @@ -290,7 +280,7 @@ static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, } static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, - u32 new_irq) + u32 new_irq, bool check_resend) { struct ics_irq_state *state; struct kvmppc_ics *ics; @@ -335,6 +325,10 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, } } + if (check_resend) + if (!state->resend) + goto out; + /* Clear the resend bit of that interrupt */ state->resend = 0; @@ -380,7 +374,9 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, */ if (reject && reject != XICS_IPI) { arch_spin_unlock(&ics->lock); + icp->n_reject++; new_irq = reject; + check_resend = 0; goto again; } } else { @@ -388,10 +384,16 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, * We failed to deliver the interrupt we need to set the * resend map bit and mark the ICS state as needing a resend */ - set_bit(ics->icsid, icp->resend_map); state->resend = 1; /* + * Make sure when checking resend, we don't miss the resend + * if resend_map bit is seen and cleared. + */ + smp_wmb(); + set_bit(ics->icsid, icp->resend_map); + + /* * If the need_resend flag got cleared in the ICP some time * between icp_rm_try_to_deliver() atomic update and now, then * we know it might have missed the resend_map bit. So we @@ -399,7 +401,9 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, */ smp_mb(); if (!icp->state.need_resend) { + state->resend = 0; arch_spin_unlock(&ics->lock); + check_resend = 0; goto again; } } @@ -594,7 +598,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, /* Handle reject in real mode */ if (reject && reject != XICS_IPI) { this_icp->n_reject++; - icp_rm_deliver_irq(xics, icp, reject); + icp_rm_deliver_irq(xics, icp, reject, false); } /* Handle resends in real mode */ @@ -662,59 +666,45 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) */ if (reject && reject != XICS_IPI) { icp->n_reject++; - icp_rm_deliver_irq(xics, icp, reject); + icp_rm_deliver_irq(xics, icp, reject, false); } bail: return check_too_hard(xics, icp); } -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq) { struct kvmppc_xics *xics = vcpu->kvm->arch.xics; struct kvmppc_icp *icp = vcpu->arch.icp; struct kvmppc_ics *ics; struct ics_irq_state *state; - u32 irq = xirr & 0x00ffffff; u16 src; - - if (!xics || !xics->real_mode) - return H_TOO_HARD; + u32 pq_old, pq_new; /* - * ICP State: EOI - * - * Note: If EOI is incorrectly used by SW to lower the CPPR - * value (ie more favored), we do not check for rejection of - * a pending interrupt, this is a SW error and PAPR sepcifies - * that we don't have to deal with it. + * ICS EOI handling: For LSI, if P bit is still set, we need to + * resend it. * - * The sending of an EOI to the ICS is handled after the - * CPPR update - * - * ICP State: Down_CPPR which we handle - * in a separate function as it's shared with H_CPPR. + * For MSI, we move Q bit into P (and clear Q). If it is set, + * resend it. */ - icp_rm_down_cppr(xics, icp, xirr >> 24); - /* IPIs have no EOI */ - if (irq == XICS_IPI) - goto bail; - /* - * EOI handling: If the interrupt is still asserted, we need to - * resend it. We can take a lockless "peek" at the ICS state here. - * - * "Message" interrupts will never have "asserted" set - */ ics = kvmppc_xics_find_ics(xics, irq, &src); if (!ics) goto bail; + state = &ics->irq_state[src]; - /* Still asserted, resend it */ - if (state->asserted) { - icp->n_reject++; - icp_rm_deliver_irq(xics, icp, irq); - } + if (state->lsi) + pq_new = state->pq_state; + else + do { + pq_old = state->pq_state; + pq_new = pq_old >> 1; + } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); + + if (pq_new & PQ_PRESENTED) + icp_rm_deliver_irq(xics, NULL, irq, false); if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) { icp->rm_action |= XICS_RM_NOTIFY_EOI; @@ -730,15 +720,48 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) ++vcpu->stat.pthru_host; if (state->intr_cpu != pcpu) { ++vcpu->stat.pthru_bad_aff; - xics_opal_rm_set_server(state->host_irq, pcpu); + xics_opal_set_server(state->host_irq, pcpu); } state->intr_cpu = -1; } } + bail: return check_too_hard(xics, icp); } +int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 irq = xirr & 0x00ffffff; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + /* + * ICP State: EOI + * + * Note: If EOI is incorrectly used by SW to lower the CPPR + * value (ie more favored), we do not check for rejection of + * a pending interrupt, this is a SW error and PAPR specifies + * that we don't have to deal with it. + * + * The sending of an EOI to the ICS is handled after the + * CPPR update + * + * ICP State: Down_CPPR which we handle + * in a separate function as it's shared with H_CPPR. + */ + icp_rm_down_cppr(xics, icp, xirr >> 24); + + /* IPIs have no EOI */ + if (irq == XICS_IPI) + return check_too_hard(xics, icp); + + return ics_rm_eoi(vcpu, irq); +} + unsigned long eoi_rc; static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) @@ -758,16 +781,16 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) if (xics_phys) { _stwcix(xics_phys + XICS_XIRR, xirr); } else { - rc = opal_rm_int_eoi(be32_to_cpu(xirr)); + rc = opal_int_eoi(be32_to_cpu(xirr)); *again = rc > 0; } } -static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) +static int xics_opal_set_server(unsigned int hw_irq, int server_cpu) { unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2; - return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY); + return opal_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY); } /* @@ -825,14 +848,33 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, { struct kvmppc_xics *xics; struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; u32 irq; + u16 src; + u32 pq_old, pq_new; irq = irq_map->v_hwirq; xics = vcpu->kvm->arch.xics; icp = vcpu->arch.icp; kvmppc_rm_handle_irq_desc(irq_map->desc); - icp_rm_deliver_irq(xics, icp, irq); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return 2; + + state = &ics->irq_state[src]; + + /* only MSIs register bypass producers, so it must be MSI here */ + do { + pq_old = state->pq_state; + pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED; + } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); + + /* Test P=1, Q=0, this is the only case where we present */ + if (pq_new == PQ_PRESENTED) + icp_rm_deliver_irq(xics, icp, irq, false); /* EOI the interrupt */ icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9338a818e05c..47414a6fe2dd 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -148,6 +148,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) addi r1, r1, 112 ld r7, HSTATE_HOST_MSR(r13) + /* + * If we came back from the guest via a relocation-on interrupt, + * we will be in virtual mode at this point, which makes it a + * little easier to get back to the caller. + */ + mfmsr r0 + andi. r0, r0, MSR_IR /* in real mode? */ + bne .Lvirt_return + cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL beq 11f @@ -181,6 +190,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_HSRR1, r7 ba 0xe80 + /* Virtual-mode return - can't get here for HMI or machine check */ +.Lvirt_return: + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + beq 16f + cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL + beq 17f + andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */ + beq 18f + mtmsrd r7, 1 /* if so then re-enable them */ +18: mtlr r8 + blr + +16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */ + mtspr SPRN_HSRR1, r7 + b exc_virt_0x4500_hardware_interrupt + +17: mtspr SPRN_HSRR0, r8 + mtspr SPRN_HSRR1, r7 + b exc_virt_0x4e80_h_doorbell + kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ @@ -518,6 +547,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Stack frame offsets */ #define STACK_SLOT_TID (112-16) #define STACK_SLOT_PSSCR (112-24) +#define STACK_SLOT_PID (112-32) .global kvmppc_hv_entry kvmppc_hv_entry: @@ -530,6 +560,7 @@ kvmppc_hv_entry: * R1 = host R1 * R2 = TOC * all other volatile GPRS = free + * Does not preserve non-volatile GPRs or CR fields */ mflr r0 std r0, PPC_LR_STKOFF(r1) @@ -549,32 +580,38 @@ kvmppc_hv_entry: bl kvmhv_start_timing 1: #endif - /* Clear out SLB */ + + /* Use cr7 as an indication of radix mode */ + ld r5, HSTATE_KVM_VCORE(r13) + ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ + lbz r0, KVM_RADIX(r9) + cmpwi cr7, r0, 0 + + /* Clear out SLB if hash */ + bne cr7, 2f li r6,0 slbmte r6,r6 slbia ptesync - +2: /* * POWER7/POWER8 host -> guest partition switch code. * We don't have to lock against concurrent tlbies, * but we do have to coordinate across hardware threads. */ /* Set bit in entry map iff exit map is zero. */ - ld r5, HSTATE_KVM_VCORE(r13) li r7, 1 lbz r6, HSTATE_PTID(r13) sld r7, r7, r6 - addi r9, r5, VCORE_ENTRY_EXIT -21: lwarx r3, 0, r9 + addi r8, r5, VCORE_ENTRY_EXIT +21: lwarx r3, 0, r8 cmpwi r3, 0x100 /* any threads starting to exit? */ bge secondary_too_late /* if so we're too late to the party */ or r3, r3, r7 - stwcx. r3, 0, r9 + stwcx. r3, 0, r8 bne 21b /* Primary thread switches to guest partition. */ - ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ cmpwi r6,0 bne 10f lwz r7,KVM_LPID(r9) @@ -590,30 +627,44 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* See if we need to flush the TLB */ lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ +BEGIN_FTR_SECTION + /* + * On POWER9, individual threads can come in here, but the + * TLB is shared between the 4 threads in a core, hence + * invalidating on one thread invalidates for all. + * Thus we make all 4 threads use the same bit here. + */ + clrrdi r6,r6,2 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) clrldi r7,r6,64-6 /* extract bit number (6 bits) */ srdi r6,r6,6 /* doubleword number */ sldi r6,r6,3 /* address offset */ add r6,r6,r9 addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ - li r0,1 - sld r0,r0,r7 + li r8,1 + sld r8,r8,r7 ld r7,0(r6) - and. r7,r7,r0 + and. r7,r7,r8 beq 22f -23: ldarx r7,0,r6 /* if set, clear the bit */ - andc r7,r7,r0 - stdcx. r7,0,r6 - bne 23b /* Flush the TLB of any entries for this LPID */ - lwz r6,KVM_TLB_SETS(r9) - li r0,0 /* RS for P9 version of tlbiel */ - mtctr r6 + lwz r0,KVM_TLB_SETS(r9) + mtctr r0 li r7,0x800 /* IS field = 0b10 */ ptesync -28: tlbiel r7 + li r0,0 /* RS for P9 version of tlbiel */ + bne cr7, 29f +28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ addi r7,r7,0x1000 bdnz 28b - ptesync + b 30f +29: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */ + addi r7,r7,0x1000 + bdnz 29b +30: ptesync +23: ldarx r7,0,r6 /* clear the bit after TLB flushed */ + andc r7,r7,r8 + stdcx. r7,0,r6 + bne 23b /* Add timebase offset onto timebase */ 22: ld r8,VCORE_TB_OFFSET(r5) @@ -658,7 +709,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) beq kvmppc_primary_no_guest kvmppc_got_guest: - /* Load up guest SLB entries */ + /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ lwz r5,VCPU_SLB_MAX(r4) cmpwi r5,0 beq 9f @@ -696,8 +747,10 @@ kvmppc_got_guest: BEGIN_FTR_SECTION mfspr r5, SPRN_TIDR mfspr r6, SPRN_PSSCR + mfspr r7, SPRN_PID std r5, STACK_SLOT_TID(r1) std r6, STACK_SLOT_PSSCR(r1) + std r7, STACK_SLOT_PID(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION @@ -824,6 +877,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_PID, r7 mtspr SPRN_WORT, r8 BEGIN_FTR_SECTION + PPC_INVALIDATE_ERAT +END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) +BEGIN_FTR_SECTION /* POWER8-only registers */ ld r5, VCPU_TCSCR(r4) ld r6, VCPU_ACOP(r4) @@ -1057,13 +1113,13 @@ hdec_soon: kvmppc_interrupt_hv: /* * Register contents: - * R12 = interrupt vector + * R12 = (guest CR << 32) | interrupt vector * R13 = PACA - * guest CR, R12 saved in shadow VCPU SCRATCH1/0 + * guest R12 saved in shadow VCPU SCRATCH0 + * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE * guest R13 saved in SPRN_SCRATCH0 */ std r9, HSTATE_SCRATCH2(r13) - lbz r9, HSTATE_IN_GUEST(r13) cmpwi r9, KVM_GUEST_MODE_HOST_HV beq kvmppc_bad_host_intr @@ -1094,8 +1150,9 @@ kvmppc_interrupt_hv: std r10, VCPU_GPR(R10)(r9) std r11, VCPU_GPR(R11)(r9) ld r3, HSTATE_SCRATCH0(r13) - lwz r4, HSTATE_SCRATCH1(r13) std r3, VCPU_GPR(R12)(r9) + /* CR is in the high half of r12 */ + srdi r4, r12, 32 stw r4, VCPU_CR(r9) BEGIN_FTR_SECTION ld r3, HSTATE_CFAR(r13) @@ -1114,6 +1171,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mfspr r11, SPRN_SRR1 std r10, VCPU_SRR0(r9) std r11, VCPU_SRR1(r9) + /* trap is in the low half of r12, clear CR from the high half */ + clrldi r12, r12, 32 andi. r0, r12, 2 /* need to read HSRR0/1? */ beq 1f mfspr r10, SPRN_HSRR0 @@ -1149,7 +1208,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 11: stw r3,VCPU_HEIR(r9) /* these are volatile across C function calls */ +#ifdef CONFIG_RELOCATABLE + ld r3, HSTATE_SCRATCH1(r13) + mtctr r3 +#else mfctr r3 +#endif mfxer r4 std r3, VCPU_CTR(r9) std r4, VCPU_XER(r9) @@ -1285,11 +1349,15 @@ mc_cont: mtspr SPRN_CTRLT,r6 4: /* Read the guest SLB and save it away */ + ld r5, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r5) + cmpwi r0, 0 + li r5, 0 + bne 3f /* for radix, save 0 entries */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ mtctr r0 li r6,0 addi r7,r9,VCPU_SLB - li r5,0 1: slbmfee r8,r6 andis. r0,r8,SLB_ESID_V@h beq 2f @@ -1301,7 +1369,7 @@ mc_cont: addi r5,r5,1 2: addi r6,r6,1 bdnz 1b - stw r5,VCPU_SLB_MAX(r9) +3: stw r5,VCPU_SLB_MAX(r9) /* * Save the guest PURR/SPURR @@ -1550,9 +1618,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) + ld r7, STACK_SLOT_PID(r1) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 + mtspr SPRN_PID, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + PPC_INVALIDATE_ERAT +END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) /* * POWER7/POWER8 guest -> host partition switch code. @@ -1663,6 +1736,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) isync /* load host SLB entries */ +BEGIN_MMU_FTR_SECTION + b 0f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED @@ -1675,7 +1751,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) slbmte r6,r5 1: addi r8,r8,16 .endr - +0: #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING /* Finish timing, if we have a vcpu */ ld r4, HSTATE_KVM_VCPU(r13) @@ -1702,11 +1778,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * reflect the HDSI to the guest as a DSI. */ kvmppc_hdsi: + ld r3, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r3) + cmpwi r0, 0 mfspr r4, SPRN_HDAR mfspr r6, SPRN_HDSISR + bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ /* HPTE not found fault or protection fault? */ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h beq 1f /* if not, send it to the guest */ +BEGIN_FTR_SECTION + mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ + b 4f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) andi. r0, r11, MSR_DR /* data relocation enabled? */ beq 3f clrrdi r0, r4, 28 @@ -1776,13 +1860,29 @@ fast_interrupt_c_return: stb r0, HSTATE_IN_GUEST(r13) b guest_exit_cont +.Lradix_hdsi: + std r4, VCPU_FAULT_DAR(r9) + stw r6, VCPU_FAULT_DSISR(r9) +.Lradix_hisi: + mfspr r5, SPRN_ASDR + std r5, VCPU_FAULT_GPA(r9) + b guest_exit_cont + /* * Similarly for an HISI, reflect it to the guest as an ISI unless * it is an HPTE not found fault for a page that we have paged out. */ kvmppc_hisi: + ld r3, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r3) + cmpwi r0, 0 + bne .Lradix_hisi /* for radix, just save ASDR */ andis. r0, r11, SRR1_ISI_NOPT@h beq 1f +BEGIN_FTR_SECTION + mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ + b 4f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) andi. r0, r11, MSR_IR /* instruction relocation enabled? */ beq 3f clrrdi r0, r10, 28 diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 826c541a12af..d4dfc0ca2a44 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -28,7 +28,7 @@ #include <asm/cputable.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -902,6 +902,69 @@ static void kvmppc_clear_debug(struct kvm_vcpu *vcpu) } } +static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + enum emulation_result er; + ulong flags; + u32 last_inst; + int emul, r; + + /* + * shadow_srr1 only contains valid flags if we came here via a program + * exception. The other exceptions (emulation assist, FP unavailable, + * etc.) do not provide flags in SRR1, so use an illegal-instruction + * exception when injecting a program interrupt into the guest. + */ + if (exit_nr == BOOK3S_INTERRUPT_PROGRAM) + flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; + else + flags = SRR1_PROGILL; + + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + if (emul != EMULATE_DONE) + return RESUME_GUEST; + + if (kvmppc_get_msr(vcpu) & MSR_PR) { +#ifdef EXIT_DEBUG + pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n", + kvmppc_get_pc(vcpu), last_inst); +#endif + if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) { + kvmppc_core_queue_program(vcpu, flags); + return RESUME_GUEST; + } + } + + vcpu->stat.emulated_inst_exits++; + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + r = RESUME_GUEST_NV; + break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; + case EMULATE_FAIL: + pr_crit("%s: emulation at %lx failed (%08x)\n", + __func__, kvmppc_get_pc(vcpu), last_inst); + kvmppc_core_queue_program(vcpu, flags); + r = RESUME_GUEST; + break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + r = RESUME_HOST_NV; + break; + case EMULATE_EXIT_USER: + r = RESUME_HOST_NV; + break; + default: + BUG(); + } + + return r; +} + int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { @@ -1044,71 +1107,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOK3S_INTERRUPT_PROGRAM: case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - { - enum emulation_result er; - ulong flags; - u32 last_inst; - int emul; - -program_interrupt: - /* - * shadow_srr1 only contains valid flags if we came here via - * a program exception. The other exceptions (emulation assist, - * FP unavailable, etc.) do not provide flags in SRR1, so use - * an illegal-instruction exception when injecting a program - * interrupt into the guest. - */ - if (exit_nr == BOOK3S_INTERRUPT_PROGRAM) - flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; - else - flags = SRR1_PROGILL; - - emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); - if (emul != EMULATE_DONE) { - r = RESUME_GUEST; - break; - } - - if (kvmppc_get_msr(vcpu) & MSR_PR) { -#ifdef EXIT_DEBUG - pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n", - kvmppc_get_pc(vcpu), last_inst); -#endif - if ((last_inst & 0xff0007ff) != - (INS_DCBZ & 0xfffffff7)) { - kvmppc_core_queue_program(vcpu, flags); - r = RESUME_GUEST; - break; - } - } - - vcpu->stat.emulated_inst_exits++; - er = kvmppc_emulate_instruction(run, vcpu); - switch (er) { - case EMULATE_DONE: - r = RESUME_GUEST_NV; - break; - case EMULATE_AGAIN: - r = RESUME_GUEST; - break; - case EMULATE_FAIL: - printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, kvmppc_get_pc(vcpu), last_inst); - kvmppc_core_queue_program(vcpu, flags); - r = RESUME_GUEST; - break; - case EMULATE_DO_MMIO: - run->exit_reason = KVM_EXIT_MMIO; - r = RESUME_HOST_NV; - break; - case EMULATE_EXIT_USER: - r = RESUME_HOST_NV; - break; - default: - BUG(); - } + r = kvmppc_exit_pr_progint(run, vcpu, exit_nr); break; - } case BOOK3S_INTERRUPT_SYSCALL: { u32 last_sc; @@ -1185,7 +1185,7 @@ program_interrupt: emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); if (emul == EMULATE_DONE) - goto program_interrupt; + r = kvmppc_exit_pr_progint(run, vcpu, exit_nr); else r = RESUME_GUEST; diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 02176fd52f84..f102616febc7 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -17,7 +17,7 @@ #include <linux/anon_inodes.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index ef27fbd5d9c5..20528701835b 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -11,7 +11,7 @@ #include <linux/kvm.h> #include <linux/err.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/kvm_book3s.h> #include <asm/kvm_ppc.h> #include <asm/hvcall.h> diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index ca8f174289bb..2a2b96d53999 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -167,20 +167,38 @@ kvmppc_handler_trampoline_enter_end: * * *****************************************************************************/ -.global kvmppc_handler_trampoline_exit -kvmppc_handler_trampoline_exit: - .global kvmppc_interrupt_pr kvmppc_interrupt_pr: + /* 64-bit entry. Register usage at this point: + * + * SPRG_SCRATCH0 = guest R13 + * R12 = (guest CR << 32) | exit handler id + * R13 = PACA + * HSTATE.SCRATCH0 = guest R12 + * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE + */ +#ifdef CONFIG_PPC64 + /* Match 32-bit entry */ +#ifdef CONFIG_RELOCATABLE + std r9, HSTATE_SCRATCH2(r13) + ld r9, HSTATE_SCRATCH1(r13) + mtctr r9 + ld r9, HSTATE_SCRATCH2(r13) +#endif + rotldi r12, r12, 32 /* Flip R12 halves for stw */ + stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */ + srdi r12, r12, 32 /* shift trap into low half */ +#endif +.global kvmppc_handler_trampoline_exit +kvmppc_handler_trampoline_exit: /* Register usage at this point: * - * SPRG_SCRATCH0 = guest R13 - * R12 = exit handler id - * R13 = shadow vcpu (32-bit) or PACA (64-bit) + * SPRG_SCRATCH0 = guest R13 + * R12 = exit handler id + * R13 = shadow vcpu (32-bit) or PACA (64-bit) * HSTATE.SCRATCH0 = guest R12 * HSTATE.SCRATCH1 = guest CR - * */ /* Save registers */ diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 3bdc639157c1..e48803e2918d 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -14,7 +14,7 @@ #include <linux/anon_inodes.h> #include <linux/spinlock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/kvm_book3s.h> #include <asm/kvm_ppc.h> #include <asm/hvcall.h> @@ -63,7 +63,7 @@ /* -- ICS routines -- */ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, - u32 new_irq); + u32 new_irq, bool check_resend); /* * Return value ideally indicates how the interrupt was handled, but no @@ -75,6 +75,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) struct ics_irq_state *state; struct kvmppc_ics *ics; u16 src; + u32 pq_old, pq_new; XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); @@ -87,25 +88,41 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) if (!state->exists) return -EINVAL; + if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET) + level = 1; + else if (level == KVM_INTERRUPT_UNSET) + level = 0; /* - * We set state->asserted locklessly. This should be fine as - * we are the only setter, thus concurrent access is undefined - * to begin with. + * Take other values the same as 1, consistent with original code. + * maybe WARN here? */ - if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL) - state->asserted = 1; - else if (level == 0 || level == KVM_INTERRUPT_UNSET) { - state->asserted = 0; + + if (!state->lsi && level == 0) /* noop for MSI */ return 0; - } + + do { + pq_old = state->pq_state; + if (state->lsi) { + if (level) { + if (pq_old & PQ_PRESENTED) + /* Setting already set LSI ... */ + return 0; + + pq_new = PQ_PRESENTED; + } else + pq_new = 0; + } else + pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED; + } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); + + /* Test P=1, Q=0, this is the only case where we present */ + if (pq_new == PQ_PRESENTED) + icp_deliver_irq(xics, NULL, irq, false); /* Record which CPU this arrived on for passed-through interrupts */ if (state->host_irq) state->intr_cpu = raw_smp_processor_id(); - /* Attempt delivery */ - icp_deliver_irq(xics, NULL, irq); - return 0; } @@ -114,29 +131,14 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, { int i; - unsigned long flags; - - local_irq_save(flags); - arch_spin_lock(&ics->lock); - for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { struct ics_irq_state *state = &ics->irq_state[i]; - - if (!state->resend) - continue; - - XICS_DBG("resend %#x prio %#x\n", state->number, - state->priority); - - arch_spin_unlock(&ics->lock); - local_irq_restore(flags); - icp_deliver_irq(xics, icp, state->number); - local_irq_save(flags); - arch_spin_lock(&ics->lock); + if (state->resend) { + XICS_DBG("resend %#x prio %#x\n", state->number, + state->priority); + icp_deliver_irq(xics, icp, state->number, true); + } } - - arch_spin_unlock(&ics->lock); - local_irq_restore(flags); } static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, @@ -155,6 +157,7 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, deliver = false; if ((state->masked_pending || state->resend) && priority != MASKED) { state->masked_pending = 0; + state->resend = 0; deliver = true; } @@ -189,7 +192,7 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) state->masked_pending, state->resend); if (write_xive(xics, ics, state, server, priority, priority)) - icp_deliver_irq(xics, icp, irq); + icp_deliver_irq(xics, icp, irq, false); return 0; } @@ -242,7 +245,7 @@ int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) if (write_xive(xics, ics, state, state->server, state->saved_priority, state->saved_priority)) - icp_deliver_irq(xics, icp, irq); + icp_deliver_irq(xics, icp, irq, false); return 0; } @@ -376,7 +379,7 @@ static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, } static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, - u32 new_irq) + u32 new_irq, bool check_resend) { struct ics_irq_state *state; struct kvmppc_ics *ics; @@ -422,6 +425,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, } } + if (check_resend) + if (!state->resend) + goto out; + /* Clear the resend bit of that interrupt */ state->resend = 0; @@ -470,6 +477,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, arch_spin_unlock(&ics->lock); local_irq_restore(flags); new_irq = reject; + check_resend = 0; goto again; } } else { @@ -477,10 +485,16 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, * We failed to deliver the interrupt we need to set the * resend map bit and mark the ICS state as needing a resend */ - set_bit(ics->icsid, icp->resend_map); state->resend = 1; /* + * Make sure when checking resend, we don't miss the resend + * if resend_map bit is seen and cleared. + */ + smp_wmb(); + set_bit(ics->icsid, icp->resend_map); + + /* * If the need_resend flag got cleared in the ICP some time * between icp_try_to_deliver() atomic update and now, then * we know it might have missed the resend_map bit. So we @@ -488,8 +502,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, */ smp_mb(); if (!icp->state.need_resend) { + state->resend = 0; arch_spin_unlock(&ics->lock); local_irq_restore(flags); + check_resend = 0; goto again; } } @@ -681,7 +697,7 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, /* Handle reject */ if (reject && reject != XICS_IPI) - icp_deliver_irq(xics, icp, reject); + icp_deliver_irq(xics, icp, reject, false); /* Handle resend */ if (resend) @@ -761,17 +777,54 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) * attempt (see comments in icp_deliver_irq). */ if (reject && reject != XICS_IPI) - icp_deliver_irq(xics, icp, reject); + icp_deliver_irq(xics, icp, reject, false); } -static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq) { struct kvmppc_xics *xics = vcpu->kvm->arch.xics; struct kvmppc_icp *icp = vcpu->arch.icp; struct kvmppc_ics *ics; struct ics_irq_state *state; - u32 irq = xirr & 0x00ffffff; u16 src; + u32 pq_old, pq_new; + + /* + * ICS EOI handling: For LSI, if P bit is still set, we need to + * resend it. + * + * For MSI, we move Q bit into P (and clear Q). If it is set, + * resend it. + */ + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n", irq); + return H_PARAMETER; + } + state = &ics->irq_state[src]; + + if (state->lsi) + pq_new = state->pq_state; + else + do { + pq_old = state->pq_state; + pq_new = pq_old >> 1; + } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); + + if (pq_new & PQ_PRESENTED) + icp_deliver_irq(xics, icp, irq, false); + + kvm_notify_acked_irq(vcpu->kvm, 0, irq); + + return H_SUCCESS; +} + +static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 irq = xirr & 0x00ffffff; XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); @@ -794,26 +847,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) /* IPIs have no EOI */ if (irq == XICS_IPI) return H_SUCCESS; - /* - * EOI handling: If the interrupt is still asserted, we need to - * resend it. We can take a lockless "peek" at the ICS state here. - * - * "Message" interrupts will never have "asserted" set - */ - ics = kvmppc_xics_find_ics(xics, irq, &src); - if (!ics) { - XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); - return H_PARAMETER; - } - state = &ics->irq_state[src]; - /* Still asserted, resend it */ - if (state->asserted) - icp_deliver_irq(xics, icp, irq); - - kvm_notify_acked_irq(vcpu->kvm, 0, irq); - - return H_SUCCESS; + return ics_eoi(vcpu, irq); } int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) @@ -832,10 +867,6 @@ int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) icp->n_rm_check_resend++; icp_check_resend(xics, icp->rm_resend_icp); } - if (icp->rm_action & XICS_RM_REJECT) { - icp->n_rm_reject++; - icp_deliver_irq(xics, icp, icp->rm_reject); - } if (icp->rm_action & XICS_RM_NOTIFY_EOI) { icp->n_rm_notify_eoi++; kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq); @@ -920,7 +951,7 @@ static int xics_debug_show(struct seq_file *m, void *private) int icsid, i; unsigned long flags; unsigned long t_rm_kick_vcpu, t_rm_check_resend; - unsigned long t_rm_reject, t_rm_notify_eoi; + unsigned long t_rm_notify_eoi; unsigned long t_reject, t_check_resend; if (!kvm) @@ -929,7 +960,6 @@ static int xics_debug_show(struct seq_file *m, void *private) t_rm_kick_vcpu = 0; t_rm_notify_eoi = 0; t_rm_check_resend = 0; - t_rm_reject = 0; t_check_resend = 0; t_reject = 0; @@ -952,14 +982,13 @@ static int xics_debug_show(struct seq_file *m, void *private) t_rm_kick_vcpu += icp->n_rm_kick_vcpu; t_rm_notify_eoi += icp->n_rm_notify_eoi; t_rm_check_resend += icp->n_rm_check_resend; - t_rm_reject += icp->n_rm_reject; t_check_resend += icp->n_check_resend; t_reject += icp->n_reject; } - seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu reject=%lu notify_eoi=%lu\n", + seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu notify_eoi=%lu\n", t_rm_kick_vcpu, t_rm_check_resend, - t_rm_reject, t_rm_notify_eoi); + t_rm_notify_eoi); seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n", t_check_resend, t_reject); for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { @@ -977,9 +1006,9 @@ static int xics_debug_show(struct seq_file *m, void *private) for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { struct ics_irq_state *irq = &ics->irq_state[i]; - seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", + seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n", irq->number, irq->server, irq->priority, - irq->saved_priority, irq->asserted, + irq->saved_priority, irq->pq_state, irq->resend, irq->masked_pending); } @@ -1198,10 +1227,17 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) val |= prio << KVM_XICS_PRIORITY_SHIFT; if (irqp->lsi) { val |= KVM_XICS_LEVEL_SENSITIVE; - if (irqp->asserted) + if (irqp->pq_state & PQ_PRESENTED) val |= KVM_XICS_PENDING; } else if (irqp->masked_pending || irqp->resend) val |= KVM_XICS_PENDING; + + if (irqp->pq_state & PQ_PRESENTED) + val |= KVM_XICS_PRESENTED; + + if (irqp->pq_state & PQ_QUEUED) + val |= KVM_XICS_QUEUED; + ret = 0; } arch_spin_unlock(&ics->lock); @@ -1253,18 +1289,20 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) irqp->resend = 0; irqp->masked_pending = 0; irqp->lsi = 0; - irqp->asserted = 0; - if (val & KVM_XICS_LEVEL_SENSITIVE) { + irqp->pq_state = 0; + if (val & KVM_XICS_LEVEL_SENSITIVE) irqp->lsi = 1; - if (val & KVM_XICS_PENDING) - irqp->asserted = 1; - } + /* If PENDING, set P in case P is not saved because of old code */ + if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) + irqp->pq_state |= PQ_PRESENTED; + if (val & KVM_XICS_QUEUED) + irqp->pq_state |= PQ_QUEUED; irqp->exists = 1; arch_spin_unlock(&ics->lock); local_irq_restore(flags); if (val & KVM_XICS_PENDING) - icp_deliver_irq(xics, NULL, irqp->number); + icp_deliver_irq(xics, NULL, irqp->number, false); return 0; } diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index 2a50320b55ca..ec5474cf70c6 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -31,16 +31,19 @@ /* Priority value to use for disabling an interrupt */ #define MASKED 0xff +#define PQ_PRESENTED 1 +#define PQ_QUEUED 2 + /* State for one irq source */ struct ics_irq_state { u32 number; u32 server; + u32 pq_state; u8 priority; u8 saved_priority; u8 resend; u8 masked_pending; u8 lsi; /* level-sensitive interrupt */ - u8 asserted; /* Only for LSI */ u8 exists; int intr_cpu; u32 host_irq; @@ -73,7 +76,6 @@ struct kvmppc_icp { */ #define XICS_RM_KICK_VCPU 0x1 #define XICS_RM_CHECK_RESEND 0x2 -#define XICS_RM_REJECT 0x4 #define XICS_RM_NOTIFY_EOI 0x8 u32 rm_action; struct kvm_vcpu *rm_kick_target; @@ -84,7 +86,6 @@ struct kvmppc_icp { /* Counters for each reason we exited real mode */ unsigned long n_rm_kick_vcpu; unsigned long n_rm_check_resend; - unsigned long n_rm_reject; unsigned long n_rm_notify_eoi; /* Counters for handling ICP processing in real mode */ unsigned long n_check_resend; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index df3f2706d3e5..0514cbd4e533 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -30,7 +30,7 @@ #include <linux/fs.h> #include <asm/cputable.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/kvm_ppc.h> #include <asm/cacheflush.h> #include <asm/dbell.h> diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index ed38f8114118..fe312c160d97 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -29,7 +29,7 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/anon_inodes.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/mpic.h> #include <asm/kvm_para.h> #include <asm/kvm_host.h> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index efd1183a6b16..2b3e4e620078 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -30,7 +30,7 @@ #include <linux/irqbypass.h> #include <linux/kvm_irqfd.h> #include <asm/cputable.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/kvm_ppc.h> #include <asm/tlbflush.h> #include <asm/cputhreads.h> @@ -565,6 +565,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PPC_HWRNG: r = kvmppc_hwrng_present(); break; + case KVM_CAP_PPC_MMU_RADIX: + r = !!(hv_enabled && radix_enabled()); + break; + case KVM_CAP_PPC_MMU_HASH_V3: + r = !!(hv_enabled && !radix_enabled() && + cpu_has_feature(CPU_FTR_ARCH_300)); + break; #endif case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -605,6 +612,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SPAPR_MULTITCE: r = 1; break; + case KVM_CAP_SPAPR_RESIZE_HPT: + r = !!hv_enabled; + break; #endif case KVM_CAP_PPC_HTM: r = cpu_has_feature(CPU_FTR_TM_COMP) && @@ -1468,6 +1478,31 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_rtas_define_token(kvm, argp); break; } + case KVM_PPC_CONFIGURE_V3_MMU: { + struct kvm *kvm = filp->private_data; + struct kvm_ppc_mmuv3_cfg cfg; + + r = -EINVAL; + if (!kvm->arch.kvm_ops->configure_mmu) + goto out; + r = -EFAULT; + if (copy_from_user(&cfg, argp, sizeof(cfg))) + goto out; + r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg); + break; + } + case KVM_PPC_GET_RMMU_INFO: { + struct kvm *kvm = filp->private_data; + struct kvm_ppc_rmmu_info info; + + r = -EINVAL; + if (!kvm->arch.kvm_ops->get_rmmu_info) + goto out; + r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info); + if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) + r = -EFAULT; + break; + } default: { struct kvm *kvm = filp->private_data; r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 309361e86523..0e649d72fe8d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o -ifeq ($(CONFIG_GENERIC_CSUM),) obj-y += checksum_$(BITS).o checksum_wrappers.o -endif obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index ea29a5d67743..9a671c774b22 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -103,17 +103,14 @@ EXPORT_SYMBOL(__csum_partial) adde r12,r12,r10 #define CSUM_COPY_16_BYTES_EXCODE(n) \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,src_error; \ - .long 8 ## n ## 1b,src_error; \ - .long 8 ## n ## 2b,src_error; \ - .long 8 ## n ## 3b,src_error; \ - .long 8 ## n ## 4b,dst_error; \ - .long 8 ## n ## 5b,dst_error; \ - .long 8 ## n ## 6b,dst_error; \ - .long 8 ## n ## 7b,dst_error; \ - .text + EX_TABLE(8 ## n ## 0b, src_error); \ + EX_TABLE(8 ## n ## 1b, src_error); \ + EX_TABLE(8 ## n ## 2b, src_error); \ + EX_TABLE(8 ## n ## 3b, src_error); \ + EX_TABLE(8 ## n ## 4b, dst_error); \ + EX_TABLE(8 ## n ## 5b, dst_error); \ + EX_TABLE(8 ## n ## 6b, dst_error); \ + EX_TABLE(8 ## n ## 7b, dst_error); .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f @@ -263,14 +260,11 @@ dst_error: stw r0,0(r8) blr - .section __ex_table,"a" - .align 2 - .long 70b,src_error - .long 71b,dst_error - .long 72b,src_error - .long 73b,dst_error - .long 54b,dst_error - .text + EX_TABLE(70b, src_error); + EX_TABLE(71b, dst_error); + EX_TABLE(72b, src_error); + EX_TABLE(73b, dst_error); + EX_TABLE(54b, dst_error); /* * this stuff handles faults in the cacheline loop and branches to either @@ -291,12 +285,11 @@ dst_error: #endif #endif - .section __ex_table,"a" - .align 2 - .long 30b,src_error - .long 31b,dst_error - .long 40b,src_error - .long 41b,dst_error - .long 50b,src_error - .long 51b,dst_error + EX_TABLE(30b, src_error); + EX_TABLE(31b, dst_error); + EX_TABLE(40b, src_error); + EX_TABLE(41b, dst_error); + EX_TABLE(50b, src_error); + EX_TABLE(51b, dst_error); + EXPORT_SYMBOL(csum_partial_copy_generic) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index fd9176671f9f..d7f1a966136e 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -36,7 +36,7 @@ _GLOBAL(__csum_partial) * work to calculate the correct checksum, we ignore that case * and take the potential slowdown of unaligned loads. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcsum_aligned li r7,4 @@ -168,8 +168,12 @@ _GLOBAL(__csum_partial) beq .Lcsum_finish lbz r6,0(r3) +#ifdef __BIG_ENDIAN__ sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif .Lcsum_finish: addze r0,r0 /* add in final carry */ @@ -182,34 +186,22 @@ EXPORT_SYMBOL(__csum_partial) .macro srcnr 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Lsrc_error_nr - .previous + EX_TABLE(100b,.Lsrc_error_nr) .endm .macro source 150: - .section __ex_table,"a" - .align 3 - .llong 150b,.Lsrc_error - .previous + EX_TABLE(150b,.Lsrc_error) .endm .macro dstnr 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldest_error_nr - .previous + EX_TABLE(200b,.Ldest_error_nr) .endm .macro dest 250: - .section __ex_table,"a" - .align 3 - .llong 250b,.Ldest_error - .previous + EX_TABLE(250b,.Ldest_error) .endm /* @@ -236,7 +228,7 @@ _GLOBAL(csum_partial_copy_generic) * If the source and destination are relatively unaligned we only * align the source. This keeps things simple. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcopy_aligned li r9,4 @@ -398,8 +390,12 @@ dstnr; sth r6,0(r4) beq .Lcopy_finish srcnr; lbz r6,0(r3) +#ifdef __BIG_ENDIAN__ sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif dstnr; stb r6,0(r4) .Lcopy_finish: diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index 08e3a3356c40..a0cb63fb76a1 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -21,7 +21,7 @@ #include <linux/compiler.h> #include <linux/types.h> #include <asm/checksum.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index d5edbeb8eb82..c1746df0f88e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -13,7 +13,7 @@ #include <linux/mm.h> #include <asm/page.h> #include <asm/code-patching.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int patch_instruction(unsigned int *addr, unsigned int instr) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 40cce33b08d6..ff0d894d7ff9 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -49,17 +49,14 @@ 9 ## n ## 1: \ addi r5,r5,-(16 * n); \ b 105f; \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,9 ## n ## 0b; \ - .long 8 ## n ## 1b,9 ## n ## 0b; \ - .long 8 ## n ## 2b,9 ## n ## 0b; \ - .long 8 ## n ## 3b,9 ## n ## 0b; \ - .long 8 ## n ## 4b,9 ## n ## 1b; \ - .long 8 ## n ## 5b,9 ## n ## 1b; \ - .long 8 ## n ## 6b,9 ## n ## 1b; \ - .long 8 ## n ## 7b,9 ## n ## 1b; \ - .text + EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f @@ -323,13 +320,10 @@ _GLOBAL(__copy_tofrom_user) 73: stwu r9,4(r6) bdnz 72b - .section __ex_table,"a" - .align 2 - .long 70b,100f - .long 71b,101f - .long 72b,102f - .long 73b,103f - .text + EX_TABLE(70b,100f) + EX_TABLE(71b,101f) + EX_TABLE(72b,102f) + EX_TABLE(73b,103f) 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ clrlwi r5,r5,32-LG_CACHELINE_BYTES @@ -364,10 +358,7 @@ _GLOBAL(__copy_tofrom_user) 53: dcbt r3,r4 54: dcbz r11,r6 - .section __ex_table,"a" - .align 2 - .long 54b,105f - .text + EX_TABLE(54b,105f) /* the main body of the cacheline loop */ COPY_16_BYTES_WITHEX(0) #if L1_CACHE_BYTES >= 32 @@ -500,15 +491,13 @@ _GLOBAL(__copy_tofrom_user) bdnz 114b 120: blr - .section __ex_table,"a" - .align 2 - .long 30b,108b - .long 31b,109b - .long 40b,110b - .long 41b,111b - .long 130b,132b - .long 131b,120b - .long 112b,120b - .long 114b,120b - .text + EX_TABLE(30b,108b) + EX_TABLE(31b,109b) + EX_TABLE(40b,110b) + EX_TABLE(41b,111b) + EX_TABLE(130b,132b) + EX_TABLE(131b,120b) + EX_TABLE(112b,120b) + EX_TABLE(114b,120b) + EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 21367b3a8146..4bcc9e76fb55 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -26,8 +26,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) ori r5,r5,PAGE_SIZE@l BEGIN_FTR_SECTION ld r10,PPC64_CACHES@toc(r2) - lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ - lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */ + lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */ + lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */ li r9,0 srd r8,r5,r11 diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 60386b2c99bb..aee6e24e81ab 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -394,70 +394,66 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 192: blr /* #bytes not copied in r3 */ - .section __ex_table,"a" - .align 3 - .llong 20b,120b - .llong 220b,320b - .llong 21b,121b - .llong 221b,321b - .llong 70b,170b - .llong 270b,370b - .llong 22b,122b - .llong 222b,322b - .llong 71b,171b - .llong 271b,371b - .llong 72b,172b - .llong 272b,372b - .llong 244b,344b - .llong 245b,345b - .llong 23b,123b - .llong 73b,173b - .llong 44b,144b - .llong 74b,174b - .llong 45b,145b - .llong 75b,175b - .llong 24b,124b - .llong 25b,125b - .llong 26b,126b - .llong 27b,127b - .llong 28b,128b - .llong 29b,129b - .llong 30b,130b - .llong 31b,131b - .llong 32b,132b - .llong 76b,176b - .llong 33b,133b - .llong 77b,177b - .llong 78b,178b - .llong 79b,179b - .llong 80b,180b - .llong 34b,134b - .llong 94b,194b - .llong 95b,195b - .llong 96b,196b - .llong 35b,135b - .llong 81b,181b - .llong 36b,136b - .llong 82b,182b - .llong 37b,137b - .llong 83b,183b - .llong 38b,138b - .llong 39b,139b - .llong 84b,184b - .llong 85b,185b - .llong 40b,140b - .llong 86b,186b - .llong 41b,141b - .llong 87b,187b - .llong 42b,142b - .llong 88b,188b - .llong 43b,143b - .llong 89b,189b - .llong 90b,190b - .llong 91b,191b - .llong 92b,192b - - .text + EX_TABLE(20b,120b) + EX_TABLE(220b,320b) + EX_TABLE(21b,121b) + EX_TABLE(221b,321b) + EX_TABLE(70b,170b) + EX_TABLE(270b,370b) + EX_TABLE(22b,122b) + EX_TABLE(222b,322b) + EX_TABLE(71b,171b) + EX_TABLE(271b,371b) + EX_TABLE(72b,172b) + EX_TABLE(272b,372b) + EX_TABLE(244b,344b) + EX_TABLE(245b,345b) + EX_TABLE(23b,123b) + EX_TABLE(73b,173b) + EX_TABLE(44b,144b) + EX_TABLE(74b,174b) + EX_TABLE(45b,145b) + EX_TABLE(75b,175b) + EX_TABLE(24b,124b) + EX_TABLE(25b,125b) + EX_TABLE(26b,126b) + EX_TABLE(27b,127b) + EX_TABLE(28b,128b) + EX_TABLE(29b,129b) + EX_TABLE(30b,130b) + EX_TABLE(31b,131b) + EX_TABLE(32b,132b) + EX_TABLE(76b,176b) + EX_TABLE(33b,133b) + EX_TABLE(77b,177b) + EX_TABLE(78b,178b) + EX_TABLE(79b,179b) + EX_TABLE(80b,180b) + EX_TABLE(34b,134b) + EX_TABLE(94b,194b) + EX_TABLE(95b,195b) + EX_TABLE(96b,196b) + EX_TABLE(35b,135b) + EX_TABLE(81b,181b) + EX_TABLE(36b,136b) + EX_TABLE(82b,182b) + EX_TABLE(37b,137b) + EX_TABLE(83b,183b) + EX_TABLE(38b,138b) + EX_TABLE(39b,139b) + EX_TABLE(84b,184b) + EX_TABLE(85b,185b) + EX_TABLE(40b,140b) + EX_TABLE(86b,186b) + EX_TABLE(41b,141b) + EX_TABLE(87b,187b) + EX_TABLE(42b,142b) + EX_TABLE(88b,188b) + EX_TABLE(43b,143b) + EX_TABLE(89b,189b) + EX_TABLE(90b,190b) + EX_TABLE(91b,191b) + EX_TABLE(92b,192b) /* * Routine to copy a whole page of data, optimized for POWER4. @@ -598,78 +594,77 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) li r5,4096 b .Ldst_aligned - .section __ex_table,"a" - .align 3 - .llong 20b,100b - .llong 21b,100b - .llong 22b,100b - .llong 23b,100b - .llong 24b,100b - .llong 25b,100b - .llong 26b,100b - .llong 27b,100b - .llong 28b,100b - .llong 29b,100b - .llong 30b,100b - .llong 31b,100b - .llong 32b,100b - .llong 33b,100b - .llong 34b,100b - .llong 35b,100b - .llong 36b,100b - .llong 37b,100b - .llong 38b,100b - .llong 39b,100b - .llong 40b,100b - .llong 41b,100b - .llong 42b,100b - .llong 43b,100b - .llong 44b,100b - .llong 45b,100b - .llong 46b,100b - .llong 47b,100b - .llong 48b,100b - .llong 49b,100b - .llong 50b,100b - .llong 51b,100b - .llong 52b,100b - .llong 53b,100b - .llong 54b,100b - .llong 55b,100b - .llong 56b,100b - .llong 57b,100b - .llong 58b,100b - .llong 59b,100b - .llong 60b,100b - .llong 61b,100b - .llong 62b,100b - .llong 63b,100b - .llong 64b,100b - .llong 65b,100b - .llong 66b,100b - .llong 67b,100b - .llong 68b,100b - .llong 69b,100b - .llong 70b,100b - .llong 71b,100b - .llong 72b,100b - .llong 73b,100b - .llong 74b,100b - .llong 75b,100b - .llong 76b,100b - .llong 77b,100b - .llong 78b,100b - .llong 79b,100b - .llong 80b,100b - .llong 81b,100b - .llong 82b,100b - .llong 83b,100b - .llong 84b,100b - .llong 85b,100b - .llong 86b,100b - .llong 87b,100b - .llong 88b,100b - .llong 89b,100b - .llong 90b,100b - .llong 91b,100b + EX_TABLE(20b,100b) + EX_TABLE(21b,100b) + EX_TABLE(22b,100b) + EX_TABLE(23b,100b) + EX_TABLE(24b,100b) + EX_TABLE(25b,100b) + EX_TABLE(26b,100b) + EX_TABLE(27b,100b) + EX_TABLE(28b,100b) + EX_TABLE(29b,100b) + EX_TABLE(30b,100b) + EX_TABLE(31b,100b) + EX_TABLE(32b,100b) + EX_TABLE(33b,100b) + EX_TABLE(34b,100b) + EX_TABLE(35b,100b) + EX_TABLE(36b,100b) + EX_TABLE(37b,100b) + EX_TABLE(38b,100b) + EX_TABLE(39b,100b) + EX_TABLE(40b,100b) + EX_TABLE(41b,100b) + EX_TABLE(42b,100b) + EX_TABLE(43b,100b) + EX_TABLE(44b,100b) + EX_TABLE(45b,100b) + EX_TABLE(46b,100b) + EX_TABLE(47b,100b) + EX_TABLE(48b,100b) + EX_TABLE(49b,100b) + EX_TABLE(50b,100b) + EX_TABLE(51b,100b) + EX_TABLE(52b,100b) + EX_TABLE(53b,100b) + EX_TABLE(54b,100b) + EX_TABLE(55b,100b) + EX_TABLE(56b,100b) + EX_TABLE(57b,100b) + EX_TABLE(58b,100b) + EX_TABLE(59b,100b) + EX_TABLE(60b,100b) + EX_TABLE(61b,100b) + EX_TABLE(62b,100b) + EX_TABLE(63b,100b) + EX_TABLE(64b,100b) + EX_TABLE(65b,100b) + EX_TABLE(66b,100b) + EX_TABLE(67b,100b) + EX_TABLE(68b,100b) + EX_TABLE(69b,100b) + EX_TABLE(70b,100b) + EX_TABLE(71b,100b) + EX_TABLE(72b,100b) + EX_TABLE(73b,100b) + EX_TABLE(74b,100b) + EX_TABLE(75b,100b) + EX_TABLE(76b,100b) + EX_TABLE(77b,100b) + EX_TABLE(78b,100b) + EX_TABLE(79b,100b) + EX_TABLE(80b,100b) + EX_TABLE(81b,100b) + EX_TABLE(82b,100b) + EX_TABLE(83b,100b) + EX_TABLE(84b,100b) + EX_TABLE(85b,100b) + EX_TABLE(86b,100b) + EX_TABLE(87b,100b) + EX_TABLE(88b,100b) + EX_TABLE(89b,100b) + EX_TABLE(90b,100b) + EX_TABLE(91b,100b) + EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index da0c568d18c4..a24b4039352c 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -29,35 +29,23 @@ .macro err1 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Ldo_err1 - .previous + EX_TABLE(100b,.Ldo_err1) .endm .macro err2 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldo_err2 - .previous + EX_TABLE(200b,.Ldo_err2) .endm #ifdef CONFIG_ALTIVEC .macro err3 300: - .section __ex_table,"a" - .align 3 - .llong 300b,.Ldo_err3 - .previous + EX_TABLE(300b,.Ldo_err3) .endm .macro err4 400: - .section __ex_table,"a" - .align 3 - .llong 400b,.Ldo_err4 - .previous + EX_TABLE(400b,.Ldo_err4) .endm diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index 5d0cdbfbe3f2..a58777c1b2cb 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -21,18 +21,12 @@ #define STKFRM (PPC_MIN_STKFRM + 16) - .macro extab instr,handler - .section __ex_table,"a" - PPC_LONG \instr,\handler - .previous - .endm - .macro inst32 op reg = 0 .rept 32 20: \op reg,0,r4 b 3f - extab 20b,99f + EX_TABLE(20b,99f) reg = reg + 1 .endr .endm @@ -100,7 +94,7 @@ _GLOBAL(do_lfs) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Load FP reg N from double at *p. N is in r3, p in r4. */ _GLOBAL(do_lfd) @@ -127,7 +121,7 @@ _GLOBAL(do_lfd) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store FP reg N to float at *p. N is in r3, p in r4. */ _GLOBAL(do_stfs) @@ -154,7 +148,7 @@ _GLOBAL(do_stfs) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store FP reg N to double at *p. N is in r3, p in r4. */ _GLOBAL(do_stfd) @@ -181,7 +175,7 @@ _GLOBAL(do_stfd) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #ifdef CONFIG_ALTIVEC /* Get the contents of vrN into v0; N is in r3. */ @@ -248,7 +242,7 @@ _GLOBAL(do_lvx) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store vector reg N to *p. N is in r3, p in r4. */ _GLOBAL(do_stvx) @@ -276,7 +270,7 @@ _GLOBAL(do_stvx) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -344,7 +338,7 @@ _GLOBAL(do_lxvd2x) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */ _GLOBAL(do_stxvd2x) @@ -372,7 +366,7 @@ _GLOBAL(do_stxvd2x) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 3362299b1859..846dba2c6360 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -14,7 +14,8 @@ #include <linux/prefetch.h> #include <asm/sstep.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <asm/cpu_has_feature.h> #include <asm/cputable.h> extern char system_call_common[]; @@ -493,10 +494,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%4\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (cr) \ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)) @@ -508,10 +506,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "r" (addr), "i" (-EFAULT), "0" (err)) @@ -523,10 +518,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err) \ : "r" (addr), "i" (-EFAULT), "0" (err)) @@ -1811,9 +1803,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ - err = -EFAULT; if (!address_ok(regs, op.ea, size)) - goto ldst_done; + return 0; err = 0; switch (size) { case 4: @@ -1836,9 +1827,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ - err = -EFAULT; if (!address_ok(regs, op.ea, size)) - goto ldst_done; + return 0; err = 0; switch (size) { case 4: diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index d13e07603519..a787776822d8 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -13,8 +13,6 @@ #include <asm/ppc_asm.h> #include <asm/export.h> - .section __ex_table,"a" - PPC_LONG_ALIGN .text /* This clears out any unused part of the destination buffer, @@ -125,10 +123,9 @@ _GLOBAL(__clear_user) 92: mfctr r3 blr - .section __ex_table,"a" - PPC_LONG 11b,90b - PPC_LONG 1b,91b - PPC_LONG 8b,92b - .text + EX_TABLE(11b, 90b) + EX_TABLE(1b, 91b) + EX_TABLE(8b, 92b) + EXPORT_SYMBOL(__clear_user) #endif diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 57ace356c949..d5b4d9498c54 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -19,6 +19,7 @@ */ #include <asm/ppc_asm.h> +#include <asm/linkage.h> #include <asm/asm-offsets.h> #include <asm/export.h> @@ -41,26 +42,17 @@ PPC64_CACHES: .macro err1 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Ldo_err1 - .previous + EX_TABLE(100b,.Ldo_err1) .endm .macro err2 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldo_err2 - .previous + EX_TABLE(200b,.Ldo_err2) .endm .macro err3 300: - .section __ex_table,"a" - .align 3 - .llong 300b,.Ldo_err3 - .previous + EX_TABLE(300b,.Ldo_err3) .endm .Ldo_err1: @@ -160,9 +152,9 @@ err2; std r0,0(r3) addi r3,r3,8 addi r4,r4,-8 - /* Destination is 16 byte aligned, need to get it cacheline aligned */ -11: lwz r7,DCACHEL1LOGLINESIZE(r5) - lwz r9,DCACHEL1LINESIZE(r5) + /* Destination is 16 byte aligned, need to get it cache block aligned */ +11: lwz r7,DCACHEL1LOGBLOCKSIZE(r5) + lwz r9,DCACHEL1BLOCKSIZE(r5) /* * With worst case alignment the long clear loop takes a minimum diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c index 5eea6f3c1e03..9bd3a3dad78d 100644 --- a/arch/powerpc/lib/usercopy_64.c +++ b/arch/powerpc/lib/usercopy_64.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { diff --git a/arch/powerpc/math-emu/fabs.c b/arch/powerpc/math-emu/fabs.c index 549baba5948f..a5e7ad1384ee 100644 --- a/arch/powerpc/math-emu/fabs.c +++ b/arch/powerpc/math-emu/fabs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fabs(u32 *frD, u32 *frB) diff --git a/arch/powerpc/math-emu/fadd.c b/arch/powerpc/math-emu/fadd.c index 0158a16e2b82..29de37e0e0da 100644 --- a/arch/powerpc/math-emu/fadd.c +++ b/arch/powerpc/math-emu/fadd.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fadds.c b/arch/powerpc/math-emu/fadds.c index 5930f40a8687..7093c5b58002 100644 --- a/arch/powerpc/math-emu/fadds.c +++ b/arch/powerpc/math-emu/fadds.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fcmpo.c b/arch/powerpc/math-emu/fcmpo.c index 5bce011c2aec..5d644467221c 100644 --- a/arch/powerpc/math-emu/fcmpo.c +++ b/arch/powerpc/math-emu/fcmpo.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fcmpu.c b/arch/powerpc/math-emu/fcmpu.c index d4fb1babc6ad..0f9bf4864832 100644 --- a/arch/powerpc/math-emu/fcmpu.c +++ b/arch/powerpc/math-emu/fcmpu.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fctiw.c b/arch/powerpc/math-emu/fctiw.c index f694440ddc00..716d6da7f204 100644 --- a/arch/powerpc/math-emu/fctiw.c +++ b/arch/powerpc/math-emu/fctiw.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fctiwz.c b/arch/powerpc/math-emu/fctiwz.c index 71e782fd4fe3..7212fa7cfd36 100644 --- a/arch/powerpc/math-emu/fctiwz.c +++ b/arch/powerpc/math-emu/fctiwz.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fdiv.c b/arch/powerpc/math-emu/fdiv.c index a29239c05e3e..e1e452069e49 100644 --- a/arch/powerpc/math-emu/fdiv.c +++ b/arch/powerpc/math-emu/fdiv.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fdivs.c b/arch/powerpc/math-emu/fdivs.c index 526bc261275f..5511e2d1c3ad 100644 --- a/arch/powerpc/math-emu/fdivs.c +++ b/arch/powerpc/math-emu/fdivs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fmadd.c b/arch/powerpc/math-emu/fmadd.c index 8c3f20aa5a95..2b6fae0bc8c2 100644 --- a/arch/powerpc/math-emu/fmadd.c +++ b/arch/powerpc/math-emu/fmadd.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fmadds.c b/arch/powerpc/math-emu/fmadds.c index 794fb31e59d1..aff35f24a236 100644 --- a/arch/powerpc/math-emu/fmadds.c +++ b/arch/powerpc/math-emu/fmadds.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fmr.c b/arch/powerpc/math-emu/fmr.c index bd55384b8196..f6347911f6a3 100644 --- a/arch/powerpc/math-emu/fmr.c +++ b/arch/powerpc/math-emu/fmr.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fmr(u32 *frD, u32 *frB) diff --git a/arch/powerpc/math-emu/fmsub.c b/arch/powerpc/math-emu/fmsub.c index 626f6fed84ac..1fb26cebe04e 100644 --- a/arch/powerpc/math-emu/fmsub.c +++ b/arch/powerpc/math-emu/fmsub.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fmsubs.c b/arch/powerpc/math-emu/fmsubs.c index 3425bc899760..f73965453e05 100644 --- a/arch/powerpc/math-emu/fmsubs.c +++ b/arch/powerpc/math-emu/fmsubs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fmul.c b/arch/powerpc/math-emu/fmul.c index 2c1929779892..ffd31b549290 100644 --- a/arch/powerpc/math-emu/fmul.c +++ b/arch/powerpc/math-emu/fmul.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fmuls.c b/arch/powerpc/math-emu/fmuls.c index f5ad5c9c77d0..21aee431ca9d 100644 --- a/arch/powerpc/math-emu/fmuls.c +++ b/arch/powerpc/math-emu/fmuls.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fnabs.c b/arch/powerpc/math-emu/fnabs.c index a7d34f3d9499..af877a53d264 100644 --- a/arch/powerpc/math-emu/fnabs.c +++ b/arch/powerpc/math-emu/fnabs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fnabs(u32 *frD, u32 *frB) diff --git a/arch/powerpc/math-emu/fneg.c b/arch/powerpc/math-emu/fneg.c index 1e988cd9c6cc..8417d174758c 100644 --- a/arch/powerpc/math-emu/fneg.c +++ b/arch/powerpc/math-emu/fneg.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fneg(u32 *frD, u32 *frB) diff --git a/arch/powerpc/math-emu/fnmadd.c b/arch/powerpc/math-emu/fnmadd.c index e817bc5453ef..6316ef0e0874 100644 --- a/arch/powerpc/math-emu/fnmadd.c +++ b/arch/powerpc/math-emu/fnmadd.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fnmadds.c b/arch/powerpc/math-emu/fnmadds.c index 4db4b7d9ba8d..9ffe037df2b9 100644 --- a/arch/powerpc/math-emu/fnmadds.c +++ b/arch/powerpc/math-emu/fnmadds.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fnmsub.c b/arch/powerpc/math-emu/fnmsub.c index f65979fa770e..f97a9cfb54ea 100644 --- a/arch/powerpc/math-emu/fnmsub.c +++ b/arch/powerpc/math-emu/fnmsub.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fnmsubs.c b/arch/powerpc/math-emu/fnmsubs.c index 9021dacc03b8..7fa1217bd930 100644 --- a/arch/powerpc/math-emu/fnmsubs.c +++ b/arch/powerpc/math-emu/fnmsubs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c index 49ccf2cc6a5a..b621a790aa67 100644 --- a/arch/powerpc/math-emu/fre.c +++ b/arch/powerpc/math-emu/fre.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fre(void *frD, void *frB) { diff --git a/arch/powerpc/math-emu/fres.c b/arch/powerpc/math-emu/fres.c index 10ecbd08b79e..211c30d0145f 100644 --- a/arch/powerpc/math-emu/fres.c +++ b/arch/powerpc/math-emu/fres.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fres(void *frD, void *frB) diff --git a/arch/powerpc/math-emu/frsp.c b/arch/powerpc/math-emu/frsp.c index ddcc14664b1a..3e3bc73e27ae 100644 --- a/arch/powerpc/math-emu/frsp.c +++ b/arch/powerpc/math-emu/frsp.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/frsqrte.c b/arch/powerpc/math-emu/frsqrte.c index 1d0a3a0fd0e6..7c2ce43750dc 100644 --- a/arch/powerpc/math-emu/frsqrte.c +++ b/arch/powerpc/math-emu/frsqrte.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int frsqrte(void *frD, void *frB) diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c index 7e838e380314..269951a8c650 100644 --- a/arch/powerpc/math-emu/frsqrtes.c +++ b/arch/powerpc/math-emu/frsqrtes.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int frsqrtes(void *frD, void *frB) { diff --git a/arch/powerpc/math-emu/fsel.c b/arch/powerpc/math-emu/fsel.c index 1b0c14498032..32b62c6c7f48 100644 --- a/arch/powerpc/math-emu/fsel.c +++ b/arch/powerpc/math-emu/fsel.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fsqrt.c b/arch/powerpc/math-emu/fsqrt.c index a55fc7d49983..0e2a34b616dc 100644 --- a/arch/powerpc/math-emu/fsqrt.c +++ b/arch/powerpc/math-emu/fsqrt.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fsqrts.c b/arch/powerpc/math-emu/fsqrts.c index 31dccbfc39ff..420cf19b5fd4 100644 --- a/arch/powerpc/math-emu/fsqrts.c +++ b/arch/powerpc/math-emu/fsqrts.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fsub.c b/arch/powerpc/math-emu/fsub.c index 02c5dff458ba..feedd705cf62 100644 --- a/arch/powerpc/math-emu/fsub.c +++ b/arch/powerpc/math-emu/fsub.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/fsubs.c b/arch/powerpc/math-emu/fsubs.c index 5d9b18c35e07..74190514063e 100644 --- a/arch/powerpc/math-emu/fsubs.c +++ b/arch/powerpc/math-emu/fsubs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/lfd.c b/arch/powerpc/math-emu/lfd.c index 79ac76d596c3..d998a50740a0 100644 --- a/arch/powerpc/math-emu/lfd.c +++ b/arch/powerpc/math-emu/lfd.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/double.h> diff --git a/arch/powerpc/math-emu/lfs.c b/arch/powerpc/math-emu/lfs.c index 434ed27be8db..1ee10b83d7e3 100644 --- a/arch/powerpc/math-emu/lfs.c +++ b/arch/powerpc/math-emu/lfs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index ab151f040502..76ee2e5dba65 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -5,7 +5,7 @@ #include <linux/types.h> #include <linux/sched.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/reg.h> #include <asm/switch_to.h> diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index 28337c9709ae..581f404caa1d 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -22,7 +22,7 @@ #include <linux/types.h> #include <linux/prctl.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/reg.h> #define FP_EX_BOOKE_E500_SPE diff --git a/arch/powerpc/math-emu/mcrfs.c b/arch/powerpc/math-emu/mcrfs.c index e948d5708e2b..8e8e72397ebc 100644 --- a/arch/powerpc/math-emu/mcrfs.c +++ b/arch/powerpc/math-emu/mcrfs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/mffs.c b/arch/powerpc/math-emu/mffs.c index 5526cf96ede5..e00fdc22a0bc 100644 --- a/arch/powerpc/math-emu/mffs.c +++ b/arch/powerpc/math-emu/mffs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/mtfsb0.c b/arch/powerpc/math-emu/mtfsb0.c index bc985585bca8..5ed3e7d5063e 100644 --- a/arch/powerpc/math-emu/mtfsb0.c +++ b/arch/powerpc/math-emu/mtfsb0.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/mtfsb1.c b/arch/powerpc/math-emu/mtfsb1.c index fe6ed5ac85b3..602aa16eda81 100644 --- a/arch/powerpc/math-emu/mtfsb1.c +++ b/arch/powerpc/math-emu/mtfsb1.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/mtfsf.c b/arch/powerpc/math-emu/mtfsf.c index 44b0fc8214f4..b0d5593ad357 100644 --- a/arch/powerpc/math-emu/mtfsf.c +++ b/arch/powerpc/math-emu/mtfsf.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/mtfsfi.c b/arch/powerpc/math-emu/mtfsfi.c index fd2acc26813b..5df30541a784 100644 --- a/arch/powerpc/math-emu/mtfsfi.c +++ b/arch/powerpc/math-emu/mtfsfi.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/math-emu/stfd.c b/arch/powerpc/math-emu/stfd.c index 33a165c8df0f..6baeaec134a2 100644 --- a/arch/powerpc/math-emu/stfd.c +++ b/arch/powerpc/math-emu/stfd.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int stfd(void *frS, void *ea) diff --git a/arch/powerpc/math-emu/stfiwx.c b/arch/powerpc/math-emu/stfiwx.c index f15a35f67e2c..9da7c5d1a872 100644 --- a/arch/powerpc/math-emu/stfiwx.c +++ b/arch/powerpc/math-emu/stfiwx.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int stfiwx(u32 *frS, void *ea) diff --git a/arch/powerpc/math-emu/stfs.c b/arch/powerpc/math-emu/stfs.c index 6122147356d1..62bd25264fb5 100644 --- a/arch/powerpc/math-emu/stfs.c +++ b/arch/powerpc/math-emu/stfs.c @@ -1,6 +1,6 @@ #include <linux/types.h> #include <linux/errno.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/sfp-machine.h> #include <math-emu/soft-fp.h> diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 31a5d42df8c9..61ac468c87c6 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -43,7 +43,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/mmu.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/smp.h> #include <asm/bootx.h> #include <asm/machdep.h> diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 1a4e570f7894..7414034df1c3 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -7,7 +7,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ - init_$(BITS).o pgtable_$(BITS).o + init_$(BITS).o pgtable_$(BITS).o \ + init-common.o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o @@ -42,3 +43,5 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o +obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o +obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 362954f98029..aaa7ec6788b9 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -134,6 +134,9 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) pr_debug("%s: invalid region access at %016llx\n", __func__, ea); return 1; } + /* Bad address */ + if (!vsid) + return 1; vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey; diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c new file mode 100644 index 000000000000..d979709a0239 --- /dev/null +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -0,0 +1,551 @@ +/* + * Copyright 2016, Rashmica Gupta, IBM Corp. + * + * This traverses the kernel virtual memory and dumps the pages that are in + * the hash pagetable, along with their flags to + * /sys/kernel/debug/kernel_hash_pagetable. + * + * If radix is enabled then there is no hash page table and so no debugfs file + * is generated. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <asm/fixmap.h> +#include <asm/pgtable.h> +#include <linux/const.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/plpar_wrappers.h> +#include <linux/memblock.h> +#include <asm/firmware.h> + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned int level; + u64 current_flags; +}; + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +static struct addr_marker address_markers[] = { + { 0, "Start of kernel VM" }, + { 0, "vmalloc() Area" }, + { 0, "vmalloc() End" }, + { 0, "isa I/O start" }, + { 0, "isa I/O end" }, + { 0, "phb I/O start" }, + { 0, "phb I/O end" }, + { 0, "I/O remap start" }, + { 0, "I/O remap end" }, + { 0, "vmemmap start" }, + { -1, NULL }, +}; + +struct flag_info { + u64 mask; + u64 val; + const char *set; + const char *clear; + bool is_val; + int shift; +}; + +static const struct flag_info v_flag_array[] = { + { + .mask = SLB_VSID_B, + .val = SLB_VSID_B_256M, + .set = "ssize: 256M", + .clear = "ssize: 1T ", + }, { + .mask = HPTE_V_SECONDARY, + .val = HPTE_V_SECONDARY, + .set = "secondary", + .clear = "primary ", + }, { + .mask = HPTE_V_VALID, + .val = HPTE_V_VALID, + .set = "valid ", + .clear = "invalid", + }, { + .mask = HPTE_V_BOLTED, + .val = HPTE_V_BOLTED, + .set = "bolted", + .clear = "", + } +}; + +static const struct flag_info r_flag_array[] = { + { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RWXX, + .set = "prot:RW--", + }, { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RWRX, + .set = "prot:RWR-", + }, { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RWRW, + .set = "prot:RWRW", + }, { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RXRX, + .set = "prot:R-R-", + }, { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RXXX, + .set = "prot:R---", + }, { + .mask = HPTE_R_KEY_HI | HPTE_R_KEY_LO, + .val = HPTE_R_KEY_HI | HPTE_R_KEY_LO, + .set = "key", + .clear = "", + .is_val = true, + }, { + .mask = HPTE_R_R, + .val = HPTE_R_R, + .set = "ref", + .clear = " ", + }, { + .mask = HPTE_R_C, + .val = HPTE_R_C, + .set = "changed", + .clear = " ", + }, { + .mask = HPTE_R_N, + .val = HPTE_R_N, + .set = "no execute", + }, { + .mask = HPTE_R_WIMG, + .val = HPTE_R_W, + .set = "writethru", + }, { + .mask = HPTE_R_WIMG, + .val = HPTE_R_I, + .set = "no cache", + }, { + .mask = HPTE_R_WIMG, + .val = HPTE_R_G, + .set = "guarded", + } +}; + +static int calculate_pagesize(struct pg_state *st, int ps, char s[]) +{ + static const char units[] = "BKMGTPE"; + const char *unit = units; + + while (ps > 9 && unit[1]) { + ps -= 10; + unit++; + } + seq_printf(st->seq, " %s_ps: %i%c\t", s, 1<<ps, *unit); + return ps; +} + +static void dump_flag_info(struct pg_state *st, const struct flag_info + *flag, u64 pte, int num) +{ + unsigned int i; + + for (i = 0; i < num; i++, flag++) { + const char *s = NULL; + u64 val; + + /* flag not defined so don't check it */ + if (flag->mask == 0) + continue; + /* Some 'flags' are actually values */ + if (flag->is_val) { + val = pte & flag->val; + if (flag->shift) + val = val >> flag->shift; + seq_printf(st->seq, " %s:%llx", flag->set, val); + } else { + if ((pte & flag->mask) == flag->val) + s = flag->set; + else + s = flag->clear; + if (s) + seq_printf(st->seq, " %s", s); + } + } +} + +static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r, + unsigned long rpn, int bps, int aps, unsigned long lp) +{ + int aps_index; + + while (ea >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + seq_printf(st->seq, "0x%lx:\t", ea); + seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v)); + dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array)); + seq_printf(st->seq, " rpn: %lx\t", rpn); + dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array)); + + calculate_pagesize(st, bps, "base"); + aps_index = calculate_pagesize(st, aps, "actual"); + if (aps_index != 2) + seq_printf(st->seq, "LP enc: %lx", lp); + seq_puts(st->seq, "\n"); +} + + +static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 + *r) +{ + struct hash_pte *hptep; + unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v; + int i, ssize = mmu_kernel_ssize; + unsigned long shift = mmu_psize_defs[psize].shift; + + /* calculate hash */ + vsid = get_kernel_vsid(ea, ssize); + vpn = hpt_vpn(ea, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); + + /* to check in the secondary hash table, we invert the hash */ + if (!primary) + hash = ~hash; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_address + hpte_group; + hpte_v = be64_to_cpu(hptep->v); + + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { + /* HPTE matches */ + *v = be64_to_cpu(hptep->v); + *r = be64_to_cpu(hptep->r); + return 0; + } + ++hpte_group; + } + return -1; +} + +#ifdef CONFIG_PPC_PSERIES +static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) +{ + struct hash_pte ptes[4]; + unsigned long vsid, vpn, hash, hpte_group, want_v; + int i, j, ssize = mmu_kernel_ssize; + long lpar_rc = 0; + unsigned long shift = mmu_psize_defs[psize].shift; + + /* calculate hash */ + vsid = get_kernel_vsid(ea, ssize); + vpn = hpt_vpn(ea, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); + + /* to check in the secondary hash table, we invert the hash */ + if (!primary) + hash = ~hash; + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + /* see if we can find an entry in the hpte with this hash */ + for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { + lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); + + if (lpar_rc != H_SUCCESS) + continue; + for (j = 0; j < 4; j++) { + if (HPTE_V_COMPARE(ptes[j].v, want_v) && + (ptes[j].v & HPTE_V_VALID)) { + /* HPTE matches */ + *v = ptes[j].v; + *r = ptes[j].r; + return 0; + } + } + } + return -1; +} +#endif + +static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, + unsigned long *lp_bits) +{ + struct mmu_psize_def entry; + unsigned long arpn, mask, lp; + int penc = -2, idx = 0, shift; + + /*. + * The LP field has 8 bits. Depending on the actual page size, some of + * these bits are concatenated with the APRN to get the RPN. The rest + * of the bits in the LP field is the LP value and is an encoding for + * the base page size and the actual page size. + * + * - find the mmu entry for our base page size + * - go through all page encodings and use the associated mask to + * find an encoding that matches our encoding in the LP field. + */ + arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; + lp = arpn & 0xff; + + entry = mmu_psize_defs[bps]; + while (idx < MMU_PAGE_COUNT) { + penc = entry.penc[idx]; + if ((penc != -1) && (mmu_psize_defs[idx].shift)) { + shift = mmu_psize_defs[idx].shift - HPTE_R_RPN_SHIFT; + mask = (0x1 << (shift)) - 1; + if ((lp & mask) == penc) { + *aps = mmu_psize_to_shift(idx); + *lp_bits = lp & mask; + *rpn = arpn >> shift; + return; + } + } + idx++; + } +} + +static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v, + u64 *r) +{ +#ifdef CONFIG_PPC_PSERIES + if (firmware_has_feature(FW_FEATURE_LPAR)) + return pseries_find(ea, psize, primary, v, r); +#endif + return native_find(ea, psize, primary, v, r); +} + +static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) +{ + unsigned long slot; + u64 v = 0, r = 0; + unsigned long rpn, lp_bits; + int base_psize = 0, actual_psize = 0; + + if (ea <= PAGE_OFFSET) + return -1; + + /* Look in primary table */ + slot = base_hpte_find(ea, psize, true, &v, &r); + + /* Look in secondary table */ + if (slot == -1) + slot = base_hpte_find(ea, psize, true, &v, &r); + + /* No entry found */ + if (slot == -1) + return -1; + + /* + * We found an entry in the hash page table: + * - check that this has the same base page + * - find the actual page size + * - find the RPN + */ + base_psize = mmu_psize_to_shift(psize); + + if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) { + decode_r(psize, r, &rpn, &actual_psize, &lp_bits); + } else { + /* 4K actual page size */ + actual_psize = 12; + rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; + /* In this case there are no LP bits */ + lp_bits = -1; + } + /* + * We didn't find a matching encoding, so the PTE we found isn't for + * this address. + */ + if (actual_psize == -1) + return -1; + + dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits); + return 0; +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr, pteval, psize; + int i, status; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + pteval = pte_val(*pte); + + if (addr < VMALLOC_END) + psize = mmu_vmalloc_psize; + else + psize = mmu_io_psize; +#ifdef CONFIG_PPC_64K_PAGES + /* check for secret 4K mappings */ + if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) || + ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) + psize = mmu_io_psize; +#endif + /* check for hashpte */ + status = hpte_find(st, addr, psize); + + if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE) + && (status != -1)) { + /* found a hpte that is not in the linux page tables */ + seq_printf(st->seq, "page probably bolted before linux" + " pagetables were set: addr:%lx, pteval:%lx\n", + addr, pteval); + } + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (!pmd_none(*pmd)) + /* pmd exists */ + walk_pte(st, pmd, addr); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (!pud_none(*pud)) + /* pud exists */ + walk_pmd(st, pud, addr); + } +} + +static void walk_pagetables(struct pg_state *st) +{ + pgd_t *pgd = pgd_offset_k(0UL); + unsigned int i; + unsigned long addr; + + /* + * Traverse the linux pagetable structure and dump pages that are in + * the hash pagetable. + */ + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = KERN_VIRT_START + i * PGDIR_SIZE; + if (!pgd_none(*pgd)) + /* pgd exists */ + walk_pud(st, pgd, addr); + } +} + + +static void walk_linearmapping(struct pg_state *st) +{ + unsigned long addr; + + /* + * Traverse the linear mapping section of virtual memory and dump pages + * that are in the hash pagetable. + */ + unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift; + + for (addr = PAGE_OFFSET; addr < PAGE_OFFSET + + memblock_phys_mem_size(); addr += psize) + hpte_find(st, addr, mmu_linear_psize); +} + +static void walk_vmemmap(struct pg_state *st) +{ +#ifdef CONFIG_SPARSEMEM_VMEMMAP + struct vmemmap_backing *ptr = vmemmap_list; + + /* + * Traverse the vmemmaped memory and dump pages that are in the hash + * pagetable. + */ + while (ptr->list) { + hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize); + ptr = ptr->list; + } + seq_puts(st->seq, "---[ vmemmap end ]---\n"); +#endif +} + +static void populate_markers(void) +{ + address_markers[0].start_address = PAGE_OFFSET; + address_markers[1].start_address = VMALLOC_START; + address_markers[2].start_address = VMALLOC_END; + address_markers[3].start_address = ISA_IO_BASE; + address_markers[4].start_address = ISA_IO_END; + address_markers[5].start_address = PHB_IO_BASE; + address_markers[6].start_address = PHB_IO_END; + address_markers[7].start_address = IOREMAP_BASE; + address_markers[8].start_address = IOREMAP_END; +#ifdef CONFIG_PPC_STD_MMU_64 + address_markers[9].start_address = H_VMEMMAP_BASE; +#else + address_markers[9].start_address = VMEMMAP_BASE; +#endif +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct pg_state st = { + .seq = m, + .start_address = PAGE_OFFSET, + .marker = address_markers, + }; + /* + * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and + * dump pages that are in the hash pagetable. + */ + walk_linearmapping(&st); + walk_pagetables(&st); + walk_vmemmap(&st); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ptdump_init(void) +{ + struct dentry *debugfs_file; + + if (!radix_enabled()) { + populate_markers(); + debugfs_file = debugfs_create_file("kernel_hash_pagetable", + 0400, NULL, NULL, &ptdump_fops); + return debugfs_file ? 0 : -ENOMEM; + } + return 0; +} +device_initcall(ptdump_init); diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c new file mode 100644 index 000000000000..49abaf4dc8e3 --- /dev/null +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -0,0 +1,442 @@ +/* + * Copyright 2016, Rashmica Gupta, IBM Corp. + * + * This traverses the kernel pagetables and dumps the + * information about the used sections of memory to + * /sys/kernel/debug/kernel_pagetables. + * + * Derived from the arm64 implementation: + * Copyright (c) 2014, The Linux Foundation, Laura Abbott. + * (C) Copyright 2008 Intel Corporation, Arjan van de Ven. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <asm/fixmap.h> +#include <asm/pgtable.h> +#include <linux/const.h> +#include <asm/page.h> +#include <asm/pgalloc.h> + +/* + * To visualise what is happening, + * + * - PTRS_PER_P** = how many entries there are in the corresponding P** + * - P**_SHIFT = how many bits of the address we use to index into the + * corresponding P** + * - P**_SIZE is how much memory we can access through the table - not the + * size of the table itself. + * P**={PGD, PUD, PMD, PTE} + * + * + * Each entry of the PGD points to a PUD. Each entry of a PUD points to a + * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to + * a page. + * + * In the case where there are only 3 levels, the PUD is folded into the + * PGD: every PUD has only one entry which points to the PMD. + * + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the PTE entries. When the continuity is broken it then + * dumps out a description of the range - ie PTEs that are virtually contiguous + * with the same PTE flags are chunked together. This is to make it clear how + * different areas of the kernel virtual memory are used. + * + */ +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned int level; + u64 current_flags; +}; + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +static struct addr_marker address_markers[] = { + { 0, "Start of kernel VM" }, + { 0, "vmalloc() Area" }, + { 0, "vmalloc() End" }, + { 0, "isa I/O start" }, + { 0, "isa I/O end" }, + { 0, "phb I/O start" }, + { 0, "phb I/O end" }, + { 0, "I/O remap start" }, + { 0, "I/O remap end" }, + { 0, "vmemmap start" }, + { -1, NULL }, +}; + +struct flag_info { + u64 mask; + u64 val; + const char *set; + const char *clear; + bool is_val; + int shift; +}; + +static const struct flag_info flag_array[] = { + { +#ifdef CONFIG_PPC_STD_MMU_64 + .mask = _PAGE_PRIVILEGED, + .val = 0, +#else + .mask = _PAGE_USER, + .val = _PAGE_USER, +#endif + .set = "user", + .clear = " ", + }, { + .mask = _PAGE_RW, + .val = _PAGE_RW, + .set = "rw", + .clear = "ro", + }, { + .mask = _PAGE_EXEC, + .val = _PAGE_EXEC, + .set = " X ", + .clear = " ", + }, { + .mask = _PAGE_PTE, + .val = _PAGE_PTE, + .set = "pte", + .clear = " ", + }, { + .mask = _PAGE_PRESENT, + .val = _PAGE_PRESENT, + .set = "present", + .clear = " ", + }, { +#ifdef CONFIG_PPC_STD_MMU_64 + .mask = H_PAGE_HASHPTE, + .val = H_PAGE_HASHPTE, +#else + .mask = _PAGE_HASHPTE, + .val = _PAGE_HASHPTE, +#endif + .set = "hpte", + .clear = " ", + }, { +#ifndef CONFIG_PPC_STD_MMU_64 + .mask = _PAGE_GUARDED, + .val = _PAGE_GUARDED, + .set = "guarded", + .clear = " ", + }, { +#endif + .mask = _PAGE_DIRTY, + .val = _PAGE_DIRTY, + .set = "dirty", + .clear = " ", + }, { + .mask = _PAGE_ACCESSED, + .val = _PAGE_ACCESSED, + .set = "accessed", + .clear = " ", + }, { +#ifndef CONFIG_PPC_STD_MMU_64 + .mask = _PAGE_WRITETHRU, + .val = _PAGE_WRITETHRU, + .set = "write through", + .clear = " ", + }, { +#endif + .mask = _PAGE_NO_CACHE, + .val = _PAGE_NO_CACHE, + .set = "no cache", + .clear = " ", + }, { +#ifdef CONFIG_PPC_BOOK3S_64 + .mask = H_PAGE_BUSY, + .val = H_PAGE_BUSY, + .set = "busy", + }, { +#ifdef CONFIG_PPC_64K_PAGES + .mask = H_PAGE_COMBO, + .val = H_PAGE_COMBO, + .set = "combo", + }, { + .mask = H_PAGE_4K_PFN, + .val = H_PAGE_4K_PFN, + .set = "4K_pfn", + }, { +#endif + .mask = H_PAGE_F_GIX, + .val = H_PAGE_F_GIX, + .set = "f_gix", + .is_val = true, + .shift = H_PAGE_F_GIX_SHIFT, + }, { + .mask = H_PAGE_F_SECOND, + .val = H_PAGE_F_SECOND, + .set = "f_second", + }, { +#endif + .mask = _PAGE_SPECIAL, + .val = _PAGE_SPECIAL, + .set = "special", + } +}; + +struct pgtable_level { + const struct flag_info *flag; + size_t num; + u64 mask; +}; + +static struct pgtable_level pg_level[] = { + { + }, { /* pgd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pud */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pmd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pte */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, +}; + +static void dump_flag_info(struct pg_state *st, const struct flag_info + *flag, u64 pte, int num) +{ + unsigned int i; + + for (i = 0; i < num; i++, flag++) { + const char *s = NULL; + u64 val; + + /* flag not defined so don't check it */ + if (flag->mask == 0) + continue; + /* Some 'flags' are actually values */ + if (flag->is_val) { + val = pte & flag->val; + if (flag->shift) + val = val >> flag->shift; + seq_printf(st->seq, " %s:%llx", flag->set, val); + } else { + if ((pte & flag->mask) == flag->val) + s = flag->set; + else + s = flag->clear; + if (s) + seq_printf(st->seq, " %s", s); + } + st->current_flags &= ~flag->mask; + } + if (st->current_flags != 0) + seq_printf(st->seq, " unknown flags:%llx", st->current_flags); +} + +static void dump_addr(struct pg_state *st, unsigned long addr) +{ + static const char units[] = "KMGTPE"; + const char *unit = units; + unsigned long delta; + + seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1); + delta = (addr - st->start_address) >> 10; + /* Work out what appropriate unit to use */ + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(st->seq, "%9lu%c", delta, *unit); + +} + +static void note_page(struct pg_state *st, unsigned long addr, + unsigned int level, u64 val) +{ + u64 flag = val & pg_level[level].mask; + /* At first no level is set */ + if (!st->level) { + st->level = level; + st->current_flags = flag; + st->start_address = addr; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + /* + * Dump the section of virtual memory when: + * - the PTE flags from one entry to the next differs. + * - we change levels in the tree. + * - the address is in a different section of memory and is thus + * used for a different purpose, regardless of the flags. + */ + } else if (flag != st->current_flags || level != st->level || + addr >= st->marker[1].start_address) { + + /* Check the PTE flags */ + if (st->current_flags) { + dump_addr(st, addr); + + /* Dump all the flags */ + if (pg_level[st->level].flag) + dump_flag_info(st, pg_level[st->level].flag, + st->current_flags, + pg_level[st->level].num); + + seq_puts(st->seq, "\n"); + } + + /* + * Address indicates we have passed the end of the + * current section of virtual memory + */ + while (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + st->start_address = addr; + st->current_flags = flag; + st->level = level; + } +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 4, pte_val(*pte)); + + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (!pmd_none(*pmd)) + /* pmd exists */ + walk_pte(st, pmd, addr); + else + note_page(st, addr, 3, pmd_val(*pmd)); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (!pud_none(*pud)) + /* pud exists */ + walk_pmd(st, pud, addr); + else + note_page(st, addr, 2, pud_val(*pud)); + } +} + +static void walk_pagetables(struct pg_state *st) +{ + pgd_t *pgd = pgd_offset_k(0UL); + unsigned int i; + unsigned long addr; + + /* + * Traverse the linux pagetable structure and dump pages that are in + * the hash pagetable. + */ + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = KERN_VIRT_START + i * PGDIR_SIZE; + if (!pgd_none(*pgd)) + /* pgd exists */ + walk_pud(st, pgd, addr); + else + note_page(st, addr, 1, pgd_val(*pgd)); + } +} + +static void populate_markers(void) +{ + address_markers[0].start_address = PAGE_OFFSET; + address_markers[1].start_address = VMALLOC_START; + address_markers[2].start_address = VMALLOC_END; + address_markers[3].start_address = ISA_IO_BASE; + address_markers[4].start_address = ISA_IO_END; + address_markers[5].start_address = PHB_IO_BASE; + address_markers[6].start_address = PHB_IO_END; + address_markers[7].start_address = IOREMAP_BASE; + address_markers[8].start_address = IOREMAP_END; +#ifdef CONFIG_PPC_STD_MMU_64 + address_markers[9].start_address = H_VMEMMAP_BASE; +#else + address_markers[9].start_address = VMEMMAP_BASE; +#endif +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct pg_state st = { + .seq = m, + .start_address = KERN_VIRT_START, + .marker = address_markers, + }; + /* Traverse kernel page tables */ + walk_pagetables(&st); + note_page(&st, 0, 0, 0); + return 0; +} + + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void build_pgtable_complete_mask(void) +{ + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].flag) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].flag[j].mask; +} + +static int ptdump_init(void) +{ + struct dentry *debugfs_file; + + populate_markers(); + build_pgtable_complete_mask(); + debugfs_file = debugfs_create_file("kernel_pagetables", 0400, NULL, + NULL, &ptdump_fops); + return debugfs_file ? 0 : -ENOMEM; +} +device_initcall(ptdump_init); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d0b137d96df1..6fd30ac7d14a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -391,6 +391,20 @@ good_area: if (is_exec) { /* + * An execution fault + no execute ? + * + * On CPUs that don't have CPU_FTR_COHERENT_ICACHE we + * deliberately create NX mappings, and use the fault to do the + * cache flush. This is usually handled in hash_page_do_lazy_icache() + * but we could end up here if that races with a concurrent PTE + * update. In that case we need to fall through here to the VMA + * check below. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && + (regs->msr & SRR1_ISI_N_OR_G)) + goto bad_area; + + /* * Allow execution from readable areas if the MMU does not * provide separate controls over reading and executing. * @@ -404,6 +418,7 @@ good_area: (cpu_has_feature(CPU_FTR_NOEXECUTE) || !(vma->vm_flags & (VM_READ | VM_WRITE)))) goto bad_area; + #ifdef CONFIG_PPC_STD_MMU /* * protfault should only happen due to us @@ -512,7 +527,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return; } diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 139dec421e57..080d49b26c3a 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -48,7 +48,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/mmu.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/smp.h> #include <asm/machdep.h> #include <asm/setup.h> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ad9fd5245be2..cc332608e656 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -123,8 +123,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) va |= ssize << 8; sllp = get_sllp_encoding(apsize); va |= sllp << 5; - asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" - : : "r"(va) : "memory"); + asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1) + : : "r" (va), "i" (CPU_FTR_ARCH_206) + : "memory"); break; default: /* We need 14 to 14 + i bits of va */ @@ -141,8 +142,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) */ va |= (vpn & 0xfe); va |= 1; /* L */ - asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" - : : "r"(va) : "memory"); + asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1) + : : "r" (va), "i" (CPU_FTR_ARCH_206) + : "memory"); break; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8410b4bb36ed..67e19a0821be 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -42,7 +42,7 @@ #include <asm/mmu_context.h> #include <asm/page.h> #include <asm/types.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/machdep.h> #include <asm/prom.h> #include <asm/tlbflush.h> @@ -747,7 +747,7 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int create_section_mapping(unsigned long start, unsigned long end) +int hash__create_section_mapping(unsigned long start, unsigned long end) { int rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(PAGE_KERNEL), mmu_linear_psize, @@ -761,7 +761,7 @@ int create_section_mapping(unsigned long start, unsigned long end) return rc; } -int remove_section_mapping(unsigned long start, unsigned long end) +int hash__remove_section_mapping(unsigned long start, unsigned long end) { int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index d5026f3800b6..a84bb44497f9 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -116,21 +116,3 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } - -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM) -/* - * This enables us to catch the wrong page directory format - * Moved here so that we can use WARN() in the call. - */ -int hugepd_ok(hugepd_t hpd) -{ - bool is_hugepd; - - /* - * We should not find this format in page directory, warn otherwise. - */ - is_hugepd = (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)); - WARN(is_hugepd, "Found wrong page directory format\n"); - return 0; -} -#endif diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a5d3ecdabc44..8c3389cbcd12 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -26,6 +26,8 @@ #ifdef CONFIG_HUGETLB_PAGE #define PAGE_SHIFT_64K 16 +#define PAGE_SHIFT_512K 19 +#define PAGE_SHIFT_8M 23 #define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16G 34 @@ -38,7 +40,7 @@ unsigned int HPAGE_SHIFT; * implementations may have more than one gpage size, so we need multiple * arrays */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define MAX_NUMBER_GPAGES 128 struct psize_gpages { u64 gpage_list[MAX_NUMBER_GPAGES]; @@ -51,7 +53,7 @@ static u64 gpage_freearray[MAX_NUMBER_GPAGES]; static unsigned nr_gpages; #endif -#define hugepd_none(hpd) ((hpd).pd == 0) +#define hugepd_none(hpd) (hpd_val(hpd) == 0) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { @@ -64,14 +66,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, { struct kmem_cache *cachep; pte_t *new; - -#ifdef CONFIG_PPC_FSL_BOOK3E int i; - int num_hugepd = 1 << (pshift - pdshift); - cachep = hugepte_cache; -#else - cachep = PGT_CACHE(pdshift - pshift); -#endif + int num_hugepd; + + if (pshift >= pdshift) { + cachep = hugepte_cache; + num_hugepd = 1 << (pshift - pdshift); + } else { + cachep = PGT_CACHE(pdshift - pshift); + num_hugepd = 1; + } new = kmem_cache_zalloc(cachep, GFP_KERNEL); @@ -89,7 +93,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, smp_wmb(); spin_lock(&mm->page_table_lock); -#ifdef CONFIG_PPC_FSL_BOOK3E + /* * We have multiple higher-level entries that point to the same * actual pte location. Fill in each as we go and backtrack on error. @@ -99,27 +103,26 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, for (i = 0; i < num_hugepd; i++, hpdp++) { if (unlikely(!hugepd_none(*hpdp))) break; - else + else { +#ifdef CONFIG_PPC_BOOK3S_64 + *hpdp = __hugepd(__pa(new) | + (shift_to_mmu_psize(pshift) << 2)); +#elif defined(CONFIG_PPC_8xx) + *hpdp = __hugepd(__pa(new) | + (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : + _PMD_PAGE_512K) | _PMD_PRESENT); +#else /* We use the old format for PPC_FSL_BOOK3E */ - hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; + *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); +#endif + } } /* If we bailed from the for loop early, an error occurred, clean up */ if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) - hpdp->pd = 0; - kmem_cache_free(cachep, new); - } -#else - if (!hugepd_none(*hpdp)) + *hpdp = __hugepd(0); kmem_cache_free(cachep, new); - else { -#ifdef CONFIG_PPC_BOOK3S_64 - hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2); -#else - hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; -#endif } -#endif spin_unlock(&mm->page_table_lock); return 0; } @@ -128,7 +131,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, * These macros define how to determine which level of the page table holds * the hpdp. */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_PGD_SHIFT PGDIR_SHIFT #define HUGEPD_PUD_SHIFT PUD_SHIFT #else @@ -136,7 +139,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #define HUGEPD_PUD_SHIFT PMD_SHIFT #endif -#ifdef CONFIG_PPC_BOOK3S_64 /* * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. @@ -153,6 +155,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); +#ifdef CONFIG_PPC_BOOK3S_64 if (pshift == PGDIR_SHIFT) /* 16GB huge page */ return (pte_t *) pg; @@ -178,32 +181,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz hpdp = (hugepd_t *)pm; } } - if (!hpdp) - return NULL; - - BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); - - if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) - return NULL; - - return hugepte_offset(*hpdp, addr, pdshift); -} - #else - -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) -{ - pgd_t *pg; - pud_t *pu; - pmd_t *pm; - hugepd_t *hpdp = NULL; - unsigned pshift = __ffs(sz); - unsigned pdshift = PGDIR_SHIFT; - - addr &= ~(sz-1); - - pg = pgd_offset(mm, addr); - if (pshift >= HUGEPD_PGD_SHIFT) { hpdp = (hugepd_t *)pg; } else { @@ -217,7 +195,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz hpdp = (hugepd_t *)pm; } } - +#endif if (!hpdp) return NULL; @@ -228,9 +206,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(*hpdp, addr, pdshift); } -#endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy allocator is setup. */ @@ -310,7 +287,11 @@ static int __init do_gpage_early_setup(char *param, char *val, npages = 0; if (npages > MAX_NUMBER_GPAGES) { pr_warn("MMU: %lu pages requested for page " +#ifdef CONFIG_PHYS_ADDR_T_64BIT "size %llu KB, limiting to " +#else + "size %u KB, limiting to " +#endif __stringify(MAX_NUMBER_GPAGES) "\n", npages, size / 1024); npages = MAX_NUMBER_GPAGES; @@ -392,7 +373,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) } #endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -442,6 +423,8 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) } put_cpu_var(hugepd_freelist_cur); } +#else +static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, @@ -453,13 +436,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif unsigned long pdmask = ~((1UL << pdshift) - 1); unsigned int num_hugepd = 1; + unsigned int shift = hugepd_shift(*hpdp); -#ifdef CONFIG_PPC_FSL_BOOK3E /* Note: On fsl the hpdp may be the first of several */ - num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); -#else - unsigned int shift = hugepd_shift(*hpdp); -#endif + if (shift > pdshift) + num_hugepd = 1 << (shift - pdshift); start &= pdmask; if (start < floor) @@ -473,13 +454,12 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif return; for (i = 0; i < num_hugepd; i++, hpdp++) - hpdp->pd = 0; + *hpdp = __hugepd(0); -#ifdef CONFIG_PPC_FSL_BOOK3E - hugepd_free(tlb, hugepte); -#else - pgtable_free_tlb(tlb, hugepte, pdshift - shift); -#endif + if (shift >= pdshift) + hugepd_free(tlb, hugepte); + else + pgtable_free_tlb(tlb, hugepte, pdshift - shift); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -492,6 +472,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, start = addr; do { + unsigned long more; + pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { @@ -502,15 +484,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, WARN_ON(!pmd_none_or_clear_bad(pmd)); continue; } -#ifdef CONFIG_PPC_FSL_BOOK3E /* * Increment next by the size of the huge mapping since * there may be more than one entry at this level for a * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, addr, next, floor, ceiling); } while (addr = next, addr != end); @@ -550,15 +533,17 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); } else { -#ifdef CONFIG_PPC_FSL_BOOK3E + unsigned long more; /* * Increment next by the size of the huge mapping since * there may be more than one entry at this level for a * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, addr, next, floor, ceiling); } @@ -615,15 +600,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } else { -#ifdef CONFIG_PPC_FSL_BOOK3E + unsigned long more; /* * Increment next by the size of the huge mapping since * there may be more than one entry at the pgd level * for a single hugepage, but all of them point to the * same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, addr, next, floor, ceiling); } @@ -753,12 +740,13 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ -#ifdef CONFIG_PPC_FSL_BOOK3E - if ((size < PAGE_SIZE) || !is_power_of_4(size)) + if (size <= PAGE_SIZE) return -EINVAL; -#else - if (!is_power_of_2(size) - || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) +#if defined(CONFIG_PPC_FSL_BOOK3E) + if (!is_power_of_4(size)) + return -EINVAL; +#elif !defined(CONFIG_PPC_8xx) + if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT)) return -EINVAL; #endif @@ -791,53 +779,15 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); -#ifdef CONFIG_PPC_FSL_BOOK3E struct kmem_cache *hugepte_cache; static int __init hugetlbpage_init(void) { int psize; - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - unsigned shift; - - if (!mmu_psize_defs[psize].shift) - continue; - - shift = mmu_psize_to_shift(psize); - - /* Don't treat normal page sizes as huge... */ - if (shift != PAGE_SHIFT) - if (add_huge_page_size(1ULL << shift) < 0) - continue; - } - - /* - * Create a kmem cache for hugeptes. The bottom bits in the pte have - * size information encoded in them, so align them to allow this - */ - hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), - HUGEPD_SHIFT_MASK + 1, 0, NULL); - if (hugepte_cache == NULL) - panic("%s: Unable to create kmem cache for hugeptes\n", - __func__); - - /* Default hpage size = 4M */ - if (mmu_psize_defs[MMU_PAGE_4M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; - else - panic("%s: Unable to set default huge page size\n", __func__); - - - return 0; -} -#else -static int __init hugetlbpage_init(void) -{ - int psize; - +#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; - +#endif for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { unsigned shift; unsigned pdshift; @@ -850,9 +800,9 @@ static int __init hugetlbpage_init(void) if (add_huge_page_size(1ULL << shift) < 0) continue; - if (shift < PMD_SHIFT) + if (shift < HUGEPD_PUD_SHIFT) pdshift = PMD_SHIFT; - else if (shift < PUD_SHIFT) + else if (shift < HUGEPD_PGD_SHIFT) pdshift = PUD_SHIFT; else pdshift = PGDIR_SHIFT; @@ -860,14 +810,34 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift != shift) { + if (pdshift > shift) pgtable_cache_add(pdshift - shift, NULL); - if (!PGT_CACHE(pdshift - shift)) - panic("hugetlbpage_init(): could not create " - "pgtable cache for %d bit pagesize\n", shift); +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) + else if (!hugepte_cache) { + /* + * Create a kmem cache for hugeptes. The bottom bits in + * the pte have size information encoded in them, so + * align them to allow this + */ + hugepte_cache = kmem_cache_create("hugepte-cache", + sizeof(pte_t), + HUGEPD_SHIFT_MASK + 1, + 0, NULL); + if (hugepte_cache == NULL) + panic("%s: Unable to create kmem cache " + "for hugeptes\n", __func__); + } +#endif } +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) + /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ + if (mmu_psize_defs[MMU_PAGE_4M].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; + else if (mmu_psize_defs[MMU_PAGE_512K].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift; +#else /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available */ @@ -877,11 +847,10 @@ static int __init hugetlbpage_init(void) HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; else if (mmu_psize_defs[MMU_PAGE_2M].shift) HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; - - +#endif return 0; } -#endif + arch_initcall(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c new file mode 100644 index 000000000000..eb8c6c8c4851 --- /dev/null +++ b/arch/powerpc/mm/init-common.c @@ -0,0 +1,105 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen <engebret@us.ibm.com> + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#undef DEBUG + +#include <linux/string.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> + +static void pgd_ctor(void *addr) +{ + memset(addr, 0, PGD_TABLE_SIZE); +} + +static void pud_ctor(void *addr) +{ + memset(addr, 0, PUD_TABLE_SIZE); +} + +static void pmd_ctor(void *addr) +{ + memset(addr, 0, PMD_TABLE_SIZE); +} + +struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; +EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ + +/* + * Create a kmem_cache() for pagetables. This is not used for PTE + * pages - they're linked to struct page, come from the normal free + * pages pool and have a different entry size (see real_pte_t) to + * everything else. Caches created by this function are used for all + * the higher level pagetables, and for hugepage pagetables. + */ +void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) +{ + char *name; + unsigned long table_size = sizeof(void *) << shift; + unsigned long align = table_size; + + /* When batching pgtable pointers for RCU freeing, we store + * the index size in the low bits. Table alignment must be + * big enough to fit it. + * + * Likewise, hugeapge pagetable pointers contain a (different) + * shift value in the low bits. All tables must be aligned so + * as to leave enough 0 bits in the address to contain it. */ + unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, + HUGEPD_SHIFT_MASK + 1); + struct kmem_cache *new; + + /* It would be nice if this was a BUILD_BUG_ON(), but at the + * moment, gcc doesn't seem to recognize is_power_of_2 as a + * constant expression, so so much for that. */ + BUG_ON(!is_power_of_2(minalign)); + BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); + + if (PGT_CACHE(shift)) + return; /* Already have a cache of this size */ + + align = max_t(unsigned long, align, minalign); + name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); + new = kmem_cache_create(name, table_size, align, 0, ctor); + if (!new) + panic("Could not allocate pgtable cache for order %d", shift); + + kfree(name); + pgtable_cache[shift - 1] = new; + + pr_debug("Allocated pgtable cache for order %d\n", shift); +} +EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */ + +void pgtable_cache_init(void) +{ + pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); + + if (PMD_CACHE_INDEX && !PGT_CACHE(PMD_CACHE_INDEX)) + pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); + /* + * In all current configs, when the PUD index exists it's the + * same size as either the pgd or pmd index except with THP enabled + * on book3s 64 + */ + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); +} diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 16ada1eb7e26..10c9a545a646 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -42,6 +42,8 @@ #include <linux/memblock.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -51,7 +53,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/mmu.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/smp.h> #include <asm/machdep.h> #include <asm/tlb.h> @@ -80,83 +82,6 @@ EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL_GPL(kernstart_addr); -static void pgd_ctor(void *addr) -{ - memset(addr, 0, PGD_TABLE_SIZE); -} - -static void pud_ctor(void *addr) -{ - memset(addr, 0, PUD_TABLE_SIZE); -} - -static void pmd_ctor(void *addr) -{ - memset(addr, 0, PMD_TABLE_SIZE); -} - -struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; - -/* - * Create a kmem_cache() for pagetables. This is not used for PTE - * pages - they're linked to struct page, come from the normal free - * pages pool and have a different entry size (see real_pte_t) to - * everything else. Caches created by this function are used for all - * the higher level pagetables, and for hugepage pagetables. - */ -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) -{ - char *name; - unsigned long table_size = sizeof(void *) << shift; - unsigned long align = table_size; - - /* When batching pgtable pointers for RCU freeing, we store - * the index size in the low bits. Table alignment must be - * big enough to fit it. - * - * Likewise, hugeapge pagetable pointers contain a (different) - * shift value in the low bits. All tables must be aligned so - * as to leave enough 0 bits in the address to contain it. */ - unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, - HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; - - /* It would be nice if this was a BUILD_BUG_ON(), but at the - * moment, gcc doesn't seem to recognize is_power_of_2 as a - * constant expression, so so much for that. */ - BUG_ON(!is_power_of_2(minalign)); - BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); - - if (PGT_CACHE(shift)) - return; /* Already have a cache of this size */ - - align = max_t(unsigned long, align, minalign); - name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor); - kfree(name); - pgtable_cache[shift - 1] = new; - pr_debug("Allocated pgtable cache for order %d\n", shift); -} - - -void pgtable_cache_init(void) -{ - pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); - pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); - /* - * In all current configs, when the PUD index exists it's the - * same size as either the pgd or pmd index except with THP enabled - * on book3s 64 - */ - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); - - if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) - panic("Couldn't allocate pgtable caches"); - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - panic("Couldn't allocate pud pgtable caches"); -} - #ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Given an address within the vmemmap, determine the pfn of the page that @@ -421,12 +346,45 @@ static int __init parse_disable_radix(char *p) } early_param("disable_radix", parse_disable_radix); +/* + * If we're running under a hypervisor, we need to check the contents of + * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do + * radix. If not, we clear the radix feature bit so we fall back to hash. + */ +static void early_check_vec5(void) +{ + unsigned long root, chosen; + int size; + const u8 *vec5; + + root = of_get_flat_dt_root(); + chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); + if (chosen == -FDT_ERR_NOTFOUND) + return; + vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); + if (!vec5) + return; + if (size <= OV5_INDX(OV5_MMU_RADIX_300) || + !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) + /* Hypervisor doesn't support radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; +} + void __init mmu_early_init_devtree(void) { /* Disable radix mode based on kernel command line. */ if (disable_radix) cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + /* + * Check /chosen/ibm,architecture-vec-5 if running as a guest. + * When running bare-metal, we can use radix if we like + * even though the ibm,architecture-vec-5 property created by + * skiboot doesn't have the necessary bits set. + */ + if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + early_check_vec5(); + if (early_radix_enabled()) radix__early_init_devtree(); else diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index b114f8b93ec9..73bf6e14c3aa 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&mm->context); + mm_iommu_init(mm); #endif return 0; } @@ -156,13 +156,11 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) } #endif - void destroy_context(struct mm_struct *mm) { #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_cleanup(&mm->context); + WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); #endif - #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); kfree(mm->context.cop_lockp); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0f1c33601dd..104bad029ce9 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -56,7 +56,7 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, } pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", - current->pid, + current ? current->pid : 0, incr ? '+' : '-', npages << PAGE_SHIFT, mm->locked_vm << PAGE_SHIFT, @@ -66,12 +66,9 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, return ret; } -bool mm_iommu_preregistered(void) +bool mm_iommu_preregistered(struct mm_struct *mm) { - if (!current || !current->mm) - return false; - - return !list_empty(¤t->mm->context.iommu_group_mem_list); + return !list_empty(&mm->context.iommu_group_mem_list); } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); @@ -124,19 +121,16 @@ static int mm_iommu_move_page_from_cma(struct page *page) return 0; } -long mm_iommu_get(unsigned long ua, unsigned long entries, +long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; struct page *page = NULL; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ++mem->used; @@ -154,7 +148,7 @@ long mm_iommu_get(unsigned long ua, unsigned long entries, } - ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + ret = mm_iommu_adjust_locked_vm(mm, entries, true); if (ret) goto unlock_exit; @@ -215,11 +209,11 @@ populate: mem->entries = entries; *pmem = mem; - list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); unlock_exit: if (locked_entries && ret) - mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + mm_iommu_adjust_locked_vm(mm, locked_entries, false); mutex_unlock(&mem_list_mutex); @@ -264,17 +258,13 @@ static void mm_iommu_free(struct rcu_head *head) static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) { list_del_rcu(&mem->next); - mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); call_rcu(&mem->rcu, mm_iommu_free); } -long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); if (mem->used == 0) { @@ -297,6 +287,8 @@ long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) /* @mapped became 0 so now mappings are disabled, release the region */ mm_iommu_release(mem); + mm_iommu_adjust_locked_vm(mm, mem->entries, false); + unlock_exit: mutex_unlock(&mem_list_mutex); @@ -304,14 +296,12 @@ unlock_exit: } EXPORT_SYMBOL_GPL(mm_iommu_put); -struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size) +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + (mem->entries << PAGE_SHIFT))) { @@ -324,14 +314,12 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, } EXPORT_SYMBOL_GPL(mm_iommu_lookup); -struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries) +struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; break; @@ -373,17 +361,7 @@ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) } EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); -void mm_iommu_init(mm_context_t *ctx) +void mm_iommu_init(struct mm_struct *mm) { - INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); -} - -void mm_iommu_cleanup(mm_context_t *ctx) -{ - struct mm_iommu_table_group_mem_t *mem, *tmp; - - list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { - list_del_rcu(&mem->next); - mm_iommu_do_free(mem); - } + INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 0cb6bd8bfccf..16267ff8c86c 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -786,14 +786,9 @@ new_range: fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); node_set_online(nid); - if (!(size = numa_enforce_memory_limit(start, size))) { - if (--ranges) - goto new_range; - else - continue; - } - - memblock_set_node(start, size, &memblock.memory, nid); + size = numa_enforce_memory_limit(start, size); + if (size) + memblock_set_node(start, size, &memblock.memory, nid); if (--ranges) goto new_range; @@ -944,7 +939,7 @@ void __init initmem_init(void) * _nocalls() + manual invocation is used because cpuhp is not yet * initialized for the boot CPU. */ - cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "POWER_NUMA_PREPARE", + cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare", ppc_numa_cpu_prepare, ppc_numa_cpu_dead); for_each_present_cpu(cpu) numa_setup_cpu(cpu); @@ -1098,7 +1093,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) nid = hot_add_node_scn_to_nid(scn_addr); } - if (nid < 0 || !node_online(nid)) + if (nid < 0 || !node_possible(nid)) nid = first_online_node; return nid; diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index f4f437cbabf1..b798ff674fab 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -35,7 +35,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, #endif changed = !pmd_same(*(pmdp), entry); if (changed) { - __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry)); + __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), + pmd_pte(entry), address); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; @@ -125,3 +126,21 @@ void mmu_cleanup_all(void) else if (mmu_hash_ops.hpte_clear_all) mmu_hash_ops.hpte_clear_all(); } + +#ifdef CONFIG_MEMORY_HOTPLUG +int create_section_mapping(unsigned long start, unsigned long end) +{ + if (radix_enabled()) + return radix__create_section_mapping(start, end); + + return hash__create_section_mapping(start, end); +} + +int remove_section_mapping(unsigned long start, unsigned long end) +{ + if (radix_enabled()) + return radix__remove_section_mapping(start, end); + + return hash__remove_section_mapping(start, end); +} +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 8d941c692eb3..feeda90cd06d 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -18,6 +18,7 @@ #include <asm/machdep.h> #include <asm/mmu.h> #include <asm/firmware.h> +#include <asm/powernv.h> #include <trace/events/thp.h> @@ -65,7 +66,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, if (!pmdp) return -ENOMEM; if (map_page_size == PMD_SIZE) { - ptep = (pte_t *)pudp; + ptep = pmdp_ptep(pmdp); goto set_the_pte; } ptep = pte_alloc_kernel(pmdp, ea); @@ -90,7 +91,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } pmdp = pmd_offset(pudp, ea); if (map_page_size == PMD_SIZE) { - ptep = (pte_t *)pudp; + ptep = pmdp_ptep(pmdp); goto set_the_pte; } if (!pmd_present(*pmdp)) { @@ -107,59 +108,71 @@ set_the_pte: return 0; } +static inline void __meminit print_mapping(unsigned long start, + unsigned long end, + unsigned long size) +{ + if (end <= start) + return; + + pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size); +} + +static int __meminit create_physical_mapping(unsigned long start, + unsigned long end) +{ + unsigned long addr, mapping_size = 0; + + start = _ALIGN_UP(start, PAGE_SIZE); + for (addr = start; addr < end; addr += mapping_size) { + unsigned long gap, previous_size; + int rc; + + gap = end - addr; + previous_size = mapping_size; + + if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && + mmu_psize_defs[MMU_PAGE_1G].shift) + mapping_size = PUD_SIZE; + else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && + mmu_psize_defs[MMU_PAGE_2M].shift) + mapping_size = PMD_SIZE; + else + mapping_size = PAGE_SIZE; + + if (mapping_size != previous_size) { + print_mapping(start, addr, previous_size); + start = addr; + } + + rc = radix__map_kernel_page((unsigned long)__va(addr), addr, + PAGE_KERNEL_X, mapping_size); + if (rc) + return rc; + } + + print_mapping(start, addr, mapping_size); + return 0; +} + static void __init radix_init_pgtable(void) { - int loop_count; - u64 base, end, start_addr; unsigned long rts_field; struct memblock_region *reg; - unsigned long linear_page_size; /* We don't support slb for radix */ mmu_slb_size = 0; /* * Create the linear mapping, using standard page size for now */ - loop_count = 0; - for_each_memblock(memory, reg) { - - start_addr = reg->base; - -redo: - if (loop_count < 1 && mmu_psize_defs[MMU_PAGE_1G].shift) - linear_page_size = PUD_SIZE; - else if (loop_count < 2 && mmu_psize_defs[MMU_PAGE_2M].shift) - linear_page_size = PMD_SIZE; - else - linear_page_size = PAGE_SIZE; - - base = _ALIGN_UP(start_addr, linear_page_size); - end = _ALIGN_DOWN(reg->base + reg->size, linear_page_size); - - pr_info("Mapping range 0x%lx - 0x%lx with 0x%lx\n", - (unsigned long)base, (unsigned long)end, - linear_page_size); - - while (base < end) { - radix__map_kernel_page((unsigned long)__va(base), - base, PAGE_KERNEL_X, - linear_page_size); - base += linear_page_size; - } - /* - * map the rest using lower page size - */ - if (end < reg->base + reg->size) { - start_addr = end; - loop_count++; - goto redo; - } - } + for_each_memblock(memory, reg) + WARN_ON(create_physical_mapping(reg->base, + reg->base + reg->size)); /* * Allocate Partition table and process table for the * host. */ - BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 23), "Process table size too large."); + BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. @@ -240,7 +253,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, /* top 3 bit is AP encoding */ shift = be32_to_cpu(prop[0]) & ~(0xe << 28); ap = be32_to_cpu(prop[0]) >> 29; - pr_info("Page size sift = %d AP=0x%x\n", shift, ap); + pr_info("Page size shift = %d AP=0x%x\n", shift, ap); idx = get_idx_from_shift(shift); if (idx < 0) @@ -312,6 +325,38 @@ static void update_hid_for_radix(void) cpu_relax(); } +static void radix_init_amor(void) +{ + /* + * In HV mode, we init AMOR (Authority Mask Override Register) so that + * the hypervisor and guest can setup IAMR (Instruction Authority Mask + * Register), enable key 0 and set it to 1. + * + * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11) + */ + mtspr(SPRN_AMOR, (3ul << 62)); +} + +static void radix_init_iamr(void) +{ + unsigned long iamr; + + /* + * The IAMR should set to 0 on DD1. + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + iamr = 0; + else + iamr = (1ul << 62); + + /* + * Radix always uses key0 of the IAMR to determine if an access is + * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction + * fetch. + */ + mtspr(SPRN_IAMR, iamr); +} + void __init radix__early_init_mmu(void) { unsigned long lpcr; @@ -368,10 +413,14 @@ void __init radix__early_init_mmu(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); + radix_init_amor(); + } else { + radix_init_pseries(); } memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); + radix_init_iamr(); radix_init_pgtable(); } @@ -391,7 +440,9 @@ void radix__early_init_mmu_secondary(void) mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + radix_init_amor(); } + radix_init_iamr(); } void radix__mmu_cleanup_all(void) @@ -402,6 +453,7 @@ void radix__mmu_cleanup_all(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); mtspr(SPRN_PTCR, 0); + powernv_set_nmmu_ptcr(0); radix__flush_tlb_all(); } } @@ -431,6 +483,173 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, memblock_set_current_limit(first_memblock_base + first_memblock_size); } +#ifdef CONFIG_MEMORY_HOTPLUG +static void free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (!pte_none(*pte)) + return; + } + + pte_free_kernel(&init_mm, pte_start); + pmd_clear(pmd); +} + +static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) +{ + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (!pmd_none(*pmd)) + return; + } + + pmd_free(&init_mm, pmd_start); + pud_clear(pud); +} + +static void remove_pte_table(pte_t *pte_start, unsigned long addr, + unsigned long end) +{ + unsigned long next; + pte_t *pte; + + pte = pte_start + pte_index(addr); + for (; addr < end; addr = next, pte++) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + if (!pte_present(*pte)) + continue; + + if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) { + /* + * The vmemmap_free() and remove_section_mapping() + * codepaths call us with aligned addresses. + */ + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, pte); + } +} + +static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, + unsigned long end) +{ + unsigned long next; + pte_t *pte_base; + pmd_t *pmd; + + pmd = pmd_start + pmd_index(addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!pmd_present(*pmd)) + continue; + + if (pmd_huge(*pmd)) { + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pmd); + continue; + } + + pte_base = (pte_t *)pmd_page_vaddr(*pmd); + remove_pte_table(pte_base, addr, next); + free_pte_table(pte_base, pmd); + } +} + +static void remove_pud_table(pud_t *pud_start, unsigned long addr, + unsigned long end) +{ + unsigned long next; + pmd_t *pmd_base; + pud_t *pud; + + pud = pud_start + pud_index(addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + + if (!pud_present(*pud)) + continue; + + if (pud_huge(*pud)) { + if (!IS_ALIGNED(addr, PUD_SIZE) || + !IS_ALIGNED(next, PUD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pud); + continue; + } + + pmd_base = (pmd_t *)pud_page_vaddr(*pud); + remove_pmd_table(pmd_base, addr, next); + free_pmd_table(pmd_base, pud); + } +} + +static void remove_pagetable(unsigned long start, unsigned long end) +{ + unsigned long addr, next; + pud_t *pud_base; + pgd_t *pgd; + + spin_lock(&init_mm.page_table_lock); + + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + + pgd = pgd_offset_k(addr); + if (!pgd_present(*pgd)) + continue; + + if (pgd_huge(*pgd)) { + if (!IS_ALIGNED(addr, PGDIR_SIZE) || + !IS_ALIGNED(next, PGDIR_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pgd); + continue; + } + + pud_base = (pud_t *)pgd_page_vaddr(*pgd); + remove_pud_table(pud_base, addr, next); + } + + spin_unlock(&init_mm.page_table_lock); + radix__flush_tlb_kernel_range(start, end); +} + +int __ref radix__create_section_mapping(unsigned long start, unsigned long end) +{ + return create_physical_mapping(start, end); +} + +int radix__remove_section_mapping(unsigned long start, unsigned long end) +{ + remove_pagetable(start, end); + return 0; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ + #ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, @@ -446,7 +665,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, #ifdef CONFIG_MEMORY_HOTPLUG void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { - /* FIXME!! intel does more. We should free page tables mapping vmemmap ? */ + remove_pagetable(start, start + page_size); } #endif #endif diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 911fdfb63ec1..cb39c8bd2436 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -224,7 +224,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, if (changed) { if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); - __ptep_set_access_flags(vma->vm_mm, ptep, entry); + __ptep_set_access_flags(vma->vm_mm, ptep, entry, address); flush_tlb_page(vma, address); } return changed; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 0ae0572bc239..a65c0b4c0669 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -42,43 +42,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[], _sinittext[], _einittext[]; -#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT) - -#ifndef CONFIG_PPC_4K_PAGES -static struct kmem_cache *pgtable_cache; - -void pgtable_cache_init(void) -{ - pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER, - 1 << PGDIR_ORDER, 0, NULL); - if (pgtable_cache == NULL) - panic("Couldn't allocate pgtable caches"); -} -#endif - -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *ret; - - /* pgdir take page or two with 4K pages and a page fraction otherwise */ -#ifndef CONFIG_PPC_4K_PAGES - ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO); -#else - ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - PGDIR_ORDER - PAGE_SHIFT); -#endif - return ret; -} - -void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifndef CONFIG_PPC_4K_PAGES - kmem_cache_free(pgtable_cache, (void *)pgd); -#else - free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); -#endif -} - __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 8bca7f58afc4..db93cf747a03 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -52,6 +52,7 @@ #include <asm/sections.h> #include <asm/firmware.h> #include <asm/dma.h> +#include <asm/powernv.h> #include "mmu_decl.h" @@ -436,6 +437,7 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) void __init mmu_partition_table_init(void) { unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + unsigned long ptcr; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); partition_tb = __va(memblock_alloc_base(patb_size, patb_size, @@ -448,19 +450,31 @@ void __init mmu_partition_table_init(void) * update partition table control register, * 64 K size. */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); + mtspr(SPRN_PTCR, ptcr); + powernv_set_nmmu_ptcr(ptcr); } void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, unsigned long dw1) { + unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); + partition_tb[lpid].patb0 = cpu_to_be64(dw0); partition_tb[lpid].patb1 = cpu_to_be64(dw1); - /* Global flush of TLBs and partition table caches for this lpid */ + /* + * Global flush of TLBs and partition table caches for this lpid. + * The type of flush (hash or radix) depends on what the previous + * use of this partition ID was, not the new use. + */ asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + if (old & PATB_HR) + asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + else + asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index d5543514c1df..94210940112f 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -15,7 +15,7 @@ #include <linux/hugetlb.h> #include <asm/pgtable.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/tlbflush.h> /* @@ -248,9 +248,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) nw = (next - addr) >> PAGE_SHIFT; up_write(&mm->mmap_sem); - err = -EFAULT; if (__copy_from_user(spp, map, nw * sizeof(u32))) - goto out2; + return -EFAULT; map += nw; down_write(&mm->mmap_sem); @@ -262,6 +261,5 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) err = 0; out: up_write(&mm->mmap_sem); - out2: return err; } diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 3493cf4e0452..61b79119065f 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -428,3 +428,21 @@ void radix__flush_tlb_all(void) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } + +void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, + unsigned long address) +{ + /* + * We track page size in pte only for DD1, So we can + * call this only on DD1. + */ + if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) { + VM_WARN_ON(1); + return; + } + + if (old_pte & _PAGE_LARGE) + radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M); + else + radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); +} diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 050badc0ebd3..ba28fcb98597 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -53,7 +53,7 @@ * other sizes not listed here. The .ind field is only used on MMUs that have * indirect page table entries. */ -#ifdef CONFIG_PPC_BOOK3E_MMU +#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx) #ifdef CONFIG_PPC_FSL_BOOK3E struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { @@ -85,6 +85,25 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .enc = BOOK3E_PAGESZ_1GB, }, }; +#elif defined(CONFIG_PPC_8xx) +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + /* we only manage 4k and 16k pages as normal pages */ +#ifdef CONFIG_PPC_4K_PAGES + [MMU_PAGE_4K] = { + .shift = 12, + }, +#else + [MMU_PAGE_16K] = { + .shift = 14, + }, +#endif + [MMU_PAGE_512K] = { + .shift = 19, + }, + [MMU_PAGE_8M] = { + .shift = 23, + }, +}; #else struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 7e706f36e364..f9941b3b5770 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -662,16 +662,17 @@ void bpf_jit_compile(struct bpf_prog *fp) */ bpf_jit_dump(flen, proglen, pass, code_base); - if (image) { - bpf_flush_icache(code_base, code_base + (proglen/4)); + bpf_flush_icache(code_base, code_base + (proglen/4)); + #ifdef CONFIG_PPC64 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; + /* Function descriptor nastiness: Address + TOC */ + ((u64 *)image)[0] = (u64)code_base; + ((u64 *)image)[1] = local_paca->kernel_toc; #endif - fp->bpf_func = (void *)image; - fp->jited = 1; - } + + fp->bpf_func = (void *)image; + fp->jited = 1; + out: kfree(addrs); return; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 73a5cf18fd84..d4ed7a0872b1 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1046,16 +1046,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ bpf_jit_dump(flen, proglen, pass, code_base); - if (image) { - bpf_flush_icache(bpf_hdr, image + alloclen); #ifdef PPC64_ELF_ABI_v1 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; + /* Function descriptor nastiness: Address + TOC */ + ((u64 *)image)[0] = (u64)code_base; + ((u64 *)image)[1] = local_paca->kernel_toc; #endif - fp->bpf_func = (void *)image; - fp->jited = 1; - } + + fp->bpf_func = (void *)image; + fp->jited = 1; + + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); out: kfree(addrs); diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index b19265de9178..5182f2936af2 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -180,7 +180,7 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) smp_wmb(); /* insure spu event buffer updates are written */ /* don't want events intermingled... */ - kt = ktime_set(0, profiling_interval); + kt = profiling_interval; if (!spu_prof_running) goto stop; hrtimer_forward(timer, timer->base->get_time(), kt); @@ -204,7 +204,7 @@ int start_spu_profiling_cycles(unsigned int cycles_reset) ktime_t kt; pr_debug("timer resolution: %lu\n", TICK_NSEC); - kt = ktime_set(0, profiling_interval); + kt = profiling_interval; hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_set_expires(&timer, kt); timer.function = profile_spus; diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 83d2b4ef7f0d..44d67b167e0b 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -295,7 +295,7 @@ out: * dcookie user still being registered (namely, the reader * of the event buffer). */ -static inline unsigned long fast_get_dcookie(struct path *path) +static inline unsigned long fast_get_dcookie(const struct path *path) { unsigned long cookie; diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c new file mode 100644 index 000000000000..3c39f05f0af3 --- /dev/null +++ b/arch/powerpc/perf/8xx-pmu.c @@ -0,0 +1,173 @@ +/* + * Performance event support - PPC 8xx + * + * Copyright 2016 Christophe Leroy, CS Systemes d'Information + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <asm/pmc.h> +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/ptrace.h> + +#define PERF_8xx_ID_CPU_CYCLES 1 +#define PERF_8xx_ID_HW_INSTRUCTIONS 2 +#define PERF_8xx_ID_ITLB_LOAD_MISS 3 +#define PERF_8xx_ID_DTLB_LOAD_MISS 4 + +#define C(x) PERF_COUNT_HW_CACHE_##x +#define DTLB_LOAD_MISS (C(DTLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16)) +#define ITLB_LOAD_MISS (C(ITLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16)) + +extern unsigned long itlb_miss_counter, dtlb_miss_counter; +extern atomic_t instruction_counter; + +static atomic_t insn_ctr_ref; + +static s64 get_insn_ctr(void) +{ + int ctr; + unsigned long counta; + + do { + ctr = atomic_read(&instruction_counter); + counta = mfspr(SPRN_COUNTA); + } while (ctr != atomic_read(&instruction_counter)); + + return ((s64)ctr << 16) | (counta >> 16); +} + +static int event_type(struct perf_event *event) +{ + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) + return PERF_8xx_ID_CPU_CYCLES; + if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) + return PERF_8xx_ID_HW_INSTRUCTIONS; + break; + case PERF_TYPE_HW_CACHE: + if (event->attr.config == ITLB_LOAD_MISS) + return PERF_8xx_ID_ITLB_LOAD_MISS; + if (event->attr.config == DTLB_LOAD_MISS) + return PERF_8xx_ID_DTLB_LOAD_MISS; + break; + case PERF_TYPE_RAW: + break; + default: + return -ENOENT; + } + return -EOPNOTSUPP; +} + +static int mpc8xx_pmu_event_init(struct perf_event *event) +{ + int type = event_type(event); + + if (type < 0) + return type; + return 0; +} + +static int mpc8xx_pmu_add(struct perf_event *event, int flags) +{ + int type = event_type(event); + s64 val = 0; + + if (type < 0) + return type; + + switch (type) { + case PERF_8xx_ID_CPU_CYCLES: + val = get_tb(); + break; + case PERF_8xx_ID_HW_INSTRUCTIONS: + if (atomic_inc_return(&insn_ctr_ref) == 1) + mtspr(SPRN_ICTRL, 0xc0080007); + val = get_insn_ctr(); + break; + case PERF_8xx_ID_ITLB_LOAD_MISS: + val = itlb_miss_counter; + break; + case PERF_8xx_ID_DTLB_LOAD_MISS: + val = dtlb_miss_counter; + break; + } + local64_set(&event->hw.prev_count, val); + return 0; +} + +static void mpc8xx_pmu_read(struct perf_event *event) +{ + int type = event_type(event); + s64 prev, val = 0, delta = 0; + + if (type < 0) + return; + + do { + prev = local64_read(&event->hw.prev_count); + switch (type) { + case PERF_8xx_ID_CPU_CYCLES: + val = get_tb(); + delta = 16 * (val - prev); + break; + case PERF_8xx_ID_HW_INSTRUCTIONS: + val = get_insn_ctr(); + delta = prev - val; + if (delta < 0) + delta += 0x1000000000000LL; + break; + case PERF_8xx_ID_ITLB_LOAD_MISS: + val = itlb_miss_counter; + delta = (s64)((s32)val - (s32)prev); + break; + case PERF_8xx_ID_DTLB_LOAD_MISS: + val = dtlb_miss_counter; + delta = (s64)((s32)val - (s32)prev); + break; + } + } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + local64_add(delta, &event->count); +} + +static void mpc8xx_pmu_del(struct perf_event *event, int flags) +{ + mpc8xx_pmu_read(event); + if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS) + return; + + /* If it was the last user, stop counting to avoid useles overhead */ + if (atomic_dec_return(&insn_ctr_ref) == 0) + mtspr(SPRN_ICTRL, 7); +} + +static struct pmu mpc8xx_pmu = { + .event_init = mpc8xx_pmu_event_init, + .add = mpc8xx_pmu_add, + .del = mpc8xx_pmu_del, + .read = mpc8xx_pmu_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | + PERF_PMU_CAP_NO_NMI, +}; + +static int init_mpc8xx_pmu(void) +{ + mtspr(SPRN_ICTRL, 7); + mtspr(SPRN_CMPA, 0); + mtspr(SPRN_COUNTA, 0xffff); + + return perf_pmu_register(&mpc8xx_pmu, "cpu", PERF_TYPE_RAW); +} + +early_initcall(init_mpc8xx_pmu); diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index f102d5370101..4d606b99a5cb 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -13,5 +13,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o +obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o + obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 72c27b8d2cf3..270eb9b74e2e 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -295,6 +295,8 @@ static inline void perf_read_regs(struct pt_regs *regs) */ if (TRAP(regs) != 0xf00) use_siar = 0; + else if ((ppmu->flags & PPMU_NO_SIAR)) + use_siar = 0; else if (marked) use_siar = 1; else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING)) @@ -2189,7 +2191,7 @@ int register_power_pmu(struct power_pmu *pmu) #endif /* CONFIG_PPC64 */ perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); - cpuhp_setup_state(CPUHP_PERF_POWER, "PERF_POWER", + cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare", power_pmu_prepare_cpu, NULL); return 0; } diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 6143c99f3ec5..50e598cf644b 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -12,6 +12,40 @@ */ #include "isa207-common.h" +PMU_FORMAT_ATTR(event, "config:0-49"); +PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); +PMU_FORMAT_ATTR(mark, "config:8"); +PMU_FORMAT_ATTR(combine, "config:11"); +PMU_FORMAT_ATTR(unit, "config:12-15"); +PMU_FORMAT_ATTR(pmc, "config:16-19"); +PMU_FORMAT_ATTR(cache_sel, "config:20-23"); +PMU_FORMAT_ATTR(sample_mode, "config:24-28"); +PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); +PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); +PMU_FORMAT_ATTR(thresh_start, "config:36-39"); +PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); + +struct attribute *isa207_pmu_format_attr[] = { + &format_attr_event.attr, + &format_attr_pmcxsel.attr, + &format_attr_mark.attr, + &format_attr_combine.attr, + &format_attr_unit.attr, + &format_attr_pmc.attr, + &format_attr_cache_sel.attr, + &format_attr_sample_mode.attr, + &format_attr_thresh_sel.attr, + &format_attr_thresh_stop.attr, + &format_attr_thresh_start.attr, + &format_attr_thresh_cmp.attr, + NULL, +}; + +struct attribute_group isa207_pmu_format_group = { + .name = "format", + .attrs = isa207_pmu_format_attr, +}; + static inline bool event_is_fab_match(u64 event) { /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */ @@ -21,6 +55,48 @@ static inline bool event_is_fab_match(u64 event) return (event == 0x30056 || event == 0x4f052); } +static bool is_event_valid(u64 event) +{ + u64 valid_mask = EVENT_VALID_MASK; + + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + valid_mask = p9_EVENT_VALID_MASK; + + return !(event & ~valid_mask); +} + +static u64 mmcra_sdar_mode(u64 event) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + + return MMCRA_SDAR_MODE_TLB; +} + +static u64 thresh_cmp_val(u64 value) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + return value << p9_MMCRA_THR_CMP_SHIFT; + + return value << MMCRA_THR_CMP_SHIFT; +} + +static unsigned long combine_from_event(u64 event) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + return p9_EVENT_COMBINE(event); + + return EVENT_COMBINE(event); +} + +static unsigned long combine_shift(unsigned long pmc) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + return p9_MMCR1_COMBINE_SHIFT(pmc); + + return MMCR1_COMBINE_SHIFT(pmc); +} + int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) { unsigned int unit, pmc, cache, ebb; @@ -28,7 +104,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) mask = value = 0; - if (event & ~EVENT_VALID_MASK) + if (!is_event_valid(event)) return -1; pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; @@ -155,15 +231,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev, pmc_inuse |= 1 << pmc; } - /* In continuous sampling mode, update SDAR on TLB miss */ - mmcra = MMCRA_SDAR_MODE_TLB; - mmcr1 = mmcr2 = 0; + mmcra = mmcr1 = mmcr2 = 0; /* Second pass: assign PMCs, set all MMCR1 fields */ for (i = 0; i < n_ev; ++i) { pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - combine = (event[i] >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK; + combine = combine_from_event(event[i]); psel = event[i] & EVENT_PSEL_MASK; if (!pmc) { @@ -177,10 +251,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev, if (pmc <= 4) { mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc); - mmcr1 |= combine << MMCR1_COMBINE_SHIFT(pmc); + mmcr1 |= combine << combine_shift(pmc); mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc); } + /* In continuous sampling mode, update SDAR on TLB miss */ + mmcra |= mmcra_sdar_mode(event[i]); + if (event[i] & EVENT_IS_L1) { cache = event[i] >> EVENT_CACHE_SEL_SHIFT; mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT; @@ -211,7 +288,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; mmcra |= val << MMCRA_THR_SEL_SHIFT; val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; - mmcra |= val << MMCRA_THR_CMP_SHIFT; + mmcra |= thresh_cmp_val(val); } if (event[i] & EVENT_WANTS_BHRB) { diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 4d0a4e5017c2..90495f1580c7 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -107,6 +107,7 @@ #define EVENT_UNIT_MASK 0xf #define EVENT_COMBINE_SHIFT 11 /* Combine bit */ #define EVENT_COMBINE_MASK 0x1 +#define EVENT_COMBINE(v) (((v) >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK) #define EVENT_MARKED_SHIFT 8 /* Marked bit */ #define EVENT_MARKED_MASK 0x1 #define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) @@ -134,6 +135,26 @@ PERF_SAMPLE_BRANCH_KERNEL |\ PERF_SAMPLE_BRANCH_HV) +/* Contants to support power9 raw encoding format */ +#define p9_EVENT_COMBINE_SHIFT 10 /* Combine bit */ +#define p9_EVENT_COMBINE_MASK 0x3ull +#define p9_EVENT_COMBINE(v) (((v) >> p9_EVENT_COMBINE_SHIFT) & p9_EVENT_COMBINE_MASK) +#define p9_SDAR_MODE_SHIFT 50 +#define p9_SDAR_MODE_MASK 0x3ull +#define p9_SDAR_MODE(v) (((v) >> p9_SDAR_MODE_SHIFT) & p9_SDAR_MODE_MASK) + +#define p9_EVENT_VALID_MASK \ + ((p9_SDAR_MODE_MASK << p9_SDAR_MODE_SHIFT | \ + (EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_LINUX_MASK | \ + EVENT_PSEL_MASK)) + /* * Layout of constraint bits: * @@ -210,15 +231,22 @@ #define MMCR1_DC_QUAL_SHIFT 47 #define MMCR1_IC_QUAL_SHIFT 46 +/* MMCR1 Combine bits macro for power9 */ +#define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2)) + /* Bits in MMCRA for PowerISA v2.07 */ #define MMCRA_SAMP_MODE_SHIFT 1 #define MMCRA_SAMP_ELIG_SHIFT 4 #define MMCRA_THR_CTL_SHIFT 8 #define MMCRA_THR_SEL_SHIFT 16 #define MMCRA_THR_CMP_SHIFT 32 -#define MMCRA_SDAR_MODE_TLB (1ull << 42) +#define MMCRA_SDAR_MODE_SHIFT 42 +#define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT) #define MMCRA_IFM_SHIFT 30 +/* MMCR1 Threshold Compare bit constant for power9 */ +#define p9_MMCRA_THR_CMP_SHIFT 45 + /* Bits in MMCR2 for PowerISA v2.07 */ #define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9))) #define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9))) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index ab830d106ec5..d07186382f3a 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -30,6 +30,9 @@ enum { #define POWER8_MMCRA_IFM2 0x0000000080000000UL #define POWER8_MMCRA_IFM3 0x00000000C0000000UL +/* PowerISA v2.07 format attribute structure*/ +extern struct attribute_group isa207_pmu_format_group; + /* Table of alternatives, sorted by column 0 */ static const unsigned int event_alternatives[][MAX_ALT] = { { PM_MRK_ST_CMPL, PM_MRK_ST_CMPL_ALT }, @@ -175,42 +178,8 @@ static struct attribute_group power8_pmu_events_group = { .attrs = power8_events_attr, }; -PMU_FORMAT_ATTR(event, "config:0-49"); -PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); -PMU_FORMAT_ATTR(mark, "config:8"); -PMU_FORMAT_ATTR(combine, "config:11"); -PMU_FORMAT_ATTR(unit, "config:12-15"); -PMU_FORMAT_ATTR(pmc, "config:16-19"); -PMU_FORMAT_ATTR(cache_sel, "config:20-23"); -PMU_FORMAT_ATTR(sample_mode, "config:24-28"); -PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); -PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); -PMU_FORMAT_ATTR(thresh_start, "config:36-39"); -PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); - -static struct attribute *power8_pmu_format_attr[] = { - &format_attr_event.attr, - &format_attr_pmcxsel.attr, - &format_attr_mark.attr, - &format_attr_combine.attr, - &format_attr_unit.attr, - &format_attr_pmc.attr, - &format_attr_cache_sel.attr, - &format_attr_sample_mode.attr, - &format_attr_thresh_sel.attr, - &format_attr_thresh_stop.attr, - &format_attr_thresh_start.attr, - &format_attr_thresh_cmp.attr, - NULL, -}; - -static struct attribute_group power8_pmu_format_group = { - .name = "format", - .attrs = power8_pmu_format_attr, -}; - static const struct attribute_group *power8_pmu_attr_groups[] = { - &power8_pmu_format_group, + &isa207_pmu_format_group, &power8_pmu_events_group, NULL, }; diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index 6447dc1c3d89..929b56d47ad9 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -16,7 +16,7 @@ EVENT(PM_CYC, 0x0001e) EVENT(PM_ICT_NOSLOT_CYC, 0x100f8) EVENT(PM_CMPLU_STALL, 0x1e054) EVENT(PM_INST_CMPL, 0x00002) -EVENT(PM_BRU_CMPL, 0x40060) +EVENT(PM_BRU_CMPL, 0x10012) EVENT(PM_BR_MPRED_CMPL, 0x400f6) /* All L1 D cache load references counted at finish, gated by reject */ diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 8e9a81967ff8..7332634e18c9 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -16,6 +16,78 @@ #include "isa207-common.h" /* + * Raw event encoding for Power9: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * | | [ ] [ ] [ thresh_cmp ] [ thresh_ctl ] + * | | | | | + * | | *- IFM (Linux) | thresh start/stop OR FAB match -* + * | *- BHRB (Linux) *sm + * *- EBB (Linux) + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ ] [ sample ] [cache] [ pmc ] [unit ] [] m [ pmcxsel ] + * | | | | | + * | | | | *- mark + * | | *- L1/L2/L3 cache_sel | + * | | | + * | *- sampling mode for marked events *- combine + * | + * *- thresh_sel + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[24] = pmc1combine[0] + * MMCR1[25] = pmc1combine[1] + * MMCR1[26] = pmc2combine[0] + * MMCR1[27] = pmc2combine[1] + * MMCR1[28] = pmc3combine[0] + * MMCR1[29] = pmc3combine[1] + * MMCR1[30] = pmc4combine[0] + * MMCR1[31] = pmc4combine[1] + * + * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 + * # PM_MRK_FAB_RSP_MATCH + * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH) + * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 + * # PM_MRK_FAB_RSP_MATCH_CYC + * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH) + * else + * MMCRA[48:55] = thresh_ctl (THRESH START/END) + * + * if thresh_sel: + * MMCRA[45:47] = thresh_sel + * + * if thresh_cmp: + * MMCRA[9:11] = thresh_cmp[0:2] + * MMCRA[12:18] = thresh_cmp[3:9] + * + * if unit == 6 or unit == 7 + * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL) + * else if unit == 8 or unit == 9: + * if cache_sel[0] == 0: # L3 bank + * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0) + * else if cache_sel[0] == 1: + * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1) + * else if cache_sel[1]: # L1 event + * MMCR1[16] = cache_sel[2] +Â * MMCR1[17] = cache_sel[3] + * + * if mark: + * MMCRA[63] = 1 (SAMPLE_ENABLE) + * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) +Â * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) + * + * if EBB and BHRB: + * MMCRA[32:33] = IFM + * + * MMCRA[SDAR_MODE] = sm + */ + +/* * Some power9 event codes. */ #define EVENT(_name, _code) _name = _code, @@ -31,6 +103,9 @@ enum { #define POWER9_MMCRA_IFM2 0x0000000080000000UL #define POWER9_MMCRA_IFM3 0x00000000C0000000UL +/* PowerISA v2.07 format attribute structure*/ +extern struct attribute_group isa207_pmu_format_group; + GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC); GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); @@ -90,10 +165,16 @@ static struct attribute_group power9_pmu_events_group = { .attrs = power9_events_attr, }; -PMU_FORMAT_ATTR(event, "config:0-49"); +static const struct attribute_group *power9_isa207_pmu_attr_groups[] = { + &isa207_pmu_format_group, + &power9_pmu_events_group, + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-51"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); PMU_FORMAT_ATTR(mark, "config:8"); -PMU_FORMAT_ATTR(combine, "config:11"); +PMU_FORMAT_ATTR(combine, "config:10-11"); PMU_FORMAT_ATTR(unit, "config:12-15"); PMU_FORMAT_ATTR(pmc, "config:16-19"); PMU_FORMAT_ATTR(cache_sel, "config:20-23"); @@ -102,6 +183,7 @@ PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); PMU_FORMAT_ATTR(thresh_start, "config:36-39"); PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); +PMU_FORMAT_ATTR(sdar_mode, "config:50-51"); static struct attribute *power9_pmu_format_attr[] = { &format_attr_event.attr, @@ -116,6 +198,7 @@ static struct attribute *power9_pmu_format_attr[] = { &format_attr_thresh_stop.attr, &format_attr_thresh_start.attr, &format_attr_thresh_cmp.attr, + &format_attr_sdar_mode.attr, NULL, }; @@ -291,6 +374,24 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { #undef C +static struct power_pmu power9_isa207_pmu = { + .name = "POWER9", + .n_counter = MAX_PMU_COUNTERS, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, + .compute_mmcr = isa207_compute_mmcr, + .config_bhrb = power9_config_bhrb, + .bhrb_filter_map = power9_bhrb_filter_map, + .get_constraint = isa207_get_constraint, + .disable_pmc = isa207_disable_pmc, + .flags = PPMU_NO_SIAR | PPMU_ARCH_207S, + .n_generic = ARRAY_SIZE(power9_generic_events), + .generic_events = power9_generic_events, + .cache_events = &power9_cache_events, + .attr_groups = power9_isa207_pmu_attr_groups, + .bhrb_nr = 32, +}; + static struct power_pmu power9_pmu = { .name = "POWER9", .n_counter = MAX_PMU_COUNTERS, @@ -311,14 +412,19 @@ static struct power_pmu power9_pmu = { static int __init init_power9_pmu(void) { - int rc; + int rc = 0; /* Comes from cpu_specs[] */ if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) return -ENODEV; - rc = register_power_pmu(&power9_pmu); + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + rc = register_power_pmu(&power9_isa207_pmu); + } else { + rc = register_power_pmu(&power9_pmu); + } + if (rc) return rc; diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index 1d7c1b142bf4..abc24501c4c0 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -103,18 +103,18 @@ config 405GP bool select IBM405_ERR77 select IBM405_ERR51 - select IBM_EMAC_ZMII + select IBM_EMAC_ZMII if IBM_EMAC config 405EX bool - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC config 405EZ bool - select IBM_EMAC_NO_FLOW_CTRL - select IBM_EMAC_MAL_CLR_ICINTSTAT - select IBM_EMAC_MAL_COMMON_ERR + select IBM_EMAC_NO_FLOW_CTRL if IBM_EMAC + select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC + select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC config XILINX_VIRTEX bool diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 25b8d641ff9f..9b0afe935cc1 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -26,7 +26,7 @@ config BLUESTONE select PCI_MSI select PPC4xx_MSI select PPC4xx_PCI_EXPRESS - select IBM_EMAC_RGMII + select IBM_EMAC_RGMII if IBM_EMAC help This option enables support for the APM APM821xx Evaluation board. @@ -125,8 +125,8 @@ config CANYONLANDS select PPC4xx_PCI_EXPRESS select PCI_MSI select PPC4xx_MSI - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC help This option enables support for the AMCC PPC460EX evaluation board. @@ -138,8 +138,8 @@ config GLACIER select 460EX # Odd since it uses 460GT but the effects are the same select PCI select PPC4xx_PCI_EXPRESS - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC help This option enables support for the AMCC PPC460GT evaluation board. @@ -164,7 +164,7 @@ config EIGER select 460SX select PCI select PPC4xx_PCI_EXPRESS - select IBM_EMAC_RGMII + select IBM_EMAC_RGMII if IBM_EMAC help This option enables support for the AMCC PPC460SX evaluation board. @@ -213,7 +213,7 @@ config AKEBONO select NETDEVICES select ETHERNET select NET_VENDOR_IBM - select IBM_EMAC_EMAC4 + select IBM_EMAC_EMAC4 if IBM_EMAC select USB if USB_SUPPORT select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD @@ -291,54 +291,54 @@ config 440EP bool select PPC_FPU select IBM440EP_ERR42 - select IBM_EMAC_ZMII + select IBM_EMAC_ZMII if IBM_EMAC config 440EPX bool select PPC_FPU - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC select USB_EHCI_BIG_ENDIAN_MMIO select USB_EHCI_BIG_ENDIAN_DESC config 440GRX bool - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC config 440GP bool - select IBM_EMAC_ZMII + select IBM_EMAC_ZMII if IBM_EMAC config 440GX bool - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII #test only - select IBM_EMAC_TAH #test only + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC #test only + select IBM_EMAC_TAH if IBM_EMAC #test only config 440SP bool config 440SPe bool - select IBM_EMAC_EMAC4 + select IBM_EMAC_EMAC4 if IBM_EMAC config 460EX bool select PPC_FPU - select IBM_EMAC_EMAC4 - select IBM_EMAC_TAH + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC config 460SX bool select PPC_FPU - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII - select IBM_EMAC_TAH + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC config 476FPE bool @@ -347,8 +347,8 @@ config 476FPE config APM821xx bool select PPC_FPU - select IBM_EMAC_EMAC4 - select IBM_EMAC_TAH + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC config 476FPE_ERR46 depends on 476FPE diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 24717d060008..08f92f6ed228 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -441,8 +441,4 @@ static struct platform_driver pmc_driver = { .remove = pmc_remove }; -static int pmc_init(void) -{ - return platform_driver_register(&pmc_driver); -} -device_initcall(pmc_init); +builtin_platform_driver(pmc_driver); diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 9dc1d28975b9..47b389dc4938 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -253,6 +253,8 @@ endif # PPC32 config PPC_QEMU_E500 bool "QEMU generic e500 platform" select DEFAULT_UIMAGE + select E500 + select PPC_E500MC if PPC64 help This option enables support for running as a QEMU guest using QEMU's generic e500 machine. This is not required if you're diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 7bc86dae9517..fe19dad568e2 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_P1022_RDK) += p1022_rdk.o obj-$(CONFIG_P1023_RDB) += p1023_rdb.o obj-$(CONFIG_TWR_P102x) += twr_p102x.o obj-$(CONFIG_CORENET_GENERIC) += corenet_generic.o +obj-$(CONFIG_FB_FSL_DIU) += t1042rdb_diu.o obj-$(CONFIG_STX_GP3) += stx_gp3.o obj-$(CONFIG_TQM85xx) += tqm85xx.o obj-$(CONFIG_SBC8548) += sbc8548.o diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 1179115a4b5c..ac191a7a1337 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -117,9 +117,6 @@ static const struct of_device_id of_device_ids[] = { { .compatible = "fsl,qe", }, - { - .compatible = "fsl,fman", - }, /* The following two are for the Freescale hypervisor */ { .name = "hypervisor", @@ -160,6 +157,7 @@ static const char * const boards[] __initconst = { "fsl,T1040RDB", "fsl,T1042RDB", "fsl,T1042RDB_PI", + "keymile,kmcent2", "keymile,kmcoge4", "varisys,CYRUS", NULL @@ -220,7 +218,7 @@ define_machine(corenet_generic) { * * Likewise, problems have been seen with kexec when coreint is enabled. */ -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) .get_irq = mpic_get_irq, #else .get_irq = mpic_get_coreint_irq, diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index fe9f19e5e935..a83a6d26090d 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -349,13 +349,13 @@ struct smp_ops_t smp_85xx_ops = { .cpu_disable = generic_cpu_disable, .cpu_die = generic_cpu_die, #endif -#if defined(CONFIG_KEXEC) && !defined(CONFIG_PPC64) +#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64) .give_timebase = smp_generic_give_timebase, .take_timebase = smp_generic_take_timebase, #endif }; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_PPC32 atomic_t kexec_down_cpus = ATOMIC_INIT(0); @@ -458,7 +458,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) default_machine_kexec(image); } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ static void smp_85xx_basic_setup(int cpu_nr) { @@ -512,7 +512,7 @@ void __init mpc85xx_smp_init(void) #endif smp_ops = &smp_85xx_ops; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down; ppc_md.machine_kexec = mpc85xx_smp_machine_kexec; #endif diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c new file mode 100644 index 000000000000..58fa3d319f1c --- /dev/null +++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c @@ -0,0 +1,152 @@ +/* + * T1042 platform DIU operation + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#include <sysdev/fsl_soc.h> + +/*DIU Pixel ClockCR offset in scfg*/ +#define CCSR_SCFG_PIXCLKCR 0x28 + +/* DIU Pixel Clock bits of the PIXCLKCR */ +#define PIXCLKCR_PXCKEN 0x80000000 +#define PIXCLKCR_PXCKINV 0x40000000 +#define PIXCLKCR_PXCKDLY 0x0000FF00 +#define PIXCLKCR_PXCLK_MASK 0x00FF0000 + +/* Some CPLD register definitions */ +#define CPLD_DIUCSR 0x16 +#define CPLD_DIUCSR_DVIEN 0x80 +#define CPLD_DIUCSR_BACKLIGHT 0x0f + +struct device_node *cpld_node; + +/** + * t1042rdb_set_monitor_port: switch the output to a different monitor port + */ +static void t1042rdb_set_monitor_port(enum fsl_diu_monitor_port port) +{ + static void __iomem *cpld_base; + + cpld_base = of_iomap(cpld_node, 0); + if (!cpld_base) { + pr_err("%s: Could not map cpld registers\n", __func__); + goto exit; + } + + switch (port) { + case FSL_DIU_PORT_DVI: + /* Enable the DVI(HDMI) port, disable the DFP and + * the backlight + */ + clrbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_DVIEN); + break; + case FSL_DIU_PORT_LVDS: + /* + * LVDS also needs backlight enabled, otherwise the display + * will be blank. + */ + /* Enable the DFP port, disable the DVI*/ + setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 8); + setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 4); + setbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_BACKLIGHT); + break; + default: + pr_err("%s: Unsupported monitor port %i\n", __func__, port); + } + + iounmap(cpld_base); +exit: + of_node_put(cpld_node); +} + +/** + * t1042rdb_set_pixel_clock: program the DIU's clock + * @pixclock: pixel clock in ps (pico seconds) + */ +static void t1042rdb_set_pixel_clock(unsigned int pixclock) +{ + struct device_node *scfg_np; + void __iomem *scfg; + unsigned long freq; + u64 temp; + u32 pxclk; + + scfg_np = of_find_compatible_node(NULL, NULL, "fsl,t1040-scfg"); + if (!scfg_np) { + pr_err("%s: Missing scfg node. Can not display video.\n", + __func__); + return; + } + + scfg = of_iomap(scfg_np, 0); + of_node_put(scfg_np); + if (!scfg) { + pr_err("%s: Could not map device. Can not display video.\n", + __func__); + return; + } + + /* Convert pixclock into frequency */ + temp = 1000000000000ULL; + do_div(temp, pixclock); + freq = temp; + + /* + * 'pxclk' is the ratio of the platform clock to the pixel clock. + * This number is programmed into the PIXCLKCR register, and the valid + * range of values is 2-255. + */ + pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq); + pxclk = clamp_t(u32, pxclk, 2, 255); + + /* Disable the pixel clock, and set it to non-inverted and no delay */ + clrbits32(scfg + CCSR_SCFG_PIXCLKCR, + PIXCLKCR_PXCKEN | PIXCLKCR_PXCKDLY | PIXCLKCR_PXCLK_MASK); + + /* Enable the clock and set the pxclk */ + setbits32(scfg + CCSR_SCFG_PIXCLKCR, PIXCLKCR_PXCKEN | (pxclk << 16)); + + iounmap(scfg); +} + +/** + * t1042rdb_valid_monitor_port: set the monitor port for sysfs + */ +static enum fsl_diu_monitor_port +t1042rdb_valid_monitor_port(enum fsl_diu_monitor_port port) +{ + switch (port) { + case FSL_DIU_PORT_DVI: + case FSL_DIU_PORT_LVDS: + return port; + default: + return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */ + } +} + +static int __init t1042rdb_diu_init(void) +{ + cpld_node = of_find_compatible_node(NULL, NULL, "fsl,t1042rdb-cpld"); + if (!cpld_node) + return 0; + + diu_ops.set_monitor_port = t1042rdb_set_monitor_port; + diu_ops.set_pixel_clock = t1042rdb_set_pixel_clock; + diu_ops.valid_monitor_port = t1042rdb_valid_monitor_port; + + return 0; +} + +early_initcall(t1042rdb_diu_init); diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 564d99bb2a26..80cbcb0ad9b1 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -130,6 +130,7 @@ config 8xx_CPU6 config 8xx_CPU15 bool "CPU15 Silicon Errata" + depends on !HUGETLB_PAGE default y help This enables a workaround for erratum CPU15 on MPC8xx chips. diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index fbdae8377b71..7e3a2ebba29b 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -168,17 +168,6 @@ config MPIC_BROKEN_REGREAD well, but enabling it uses about 8KB of memory to keep copies of the register contents in software. -config IBMVIO - depends on PPC_PSERIES - bool - default y - -config IBMEBUS - depends on PPC_PSERIES - bool "Support for GX bus based adapters" - help - Bus device driver for GX bus based adapters. - config EEH bool depends on (PPC_POWERNV || PPC_PSERIES) && PCI diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index ca2da30ad2ab..99b0ae8acb78 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -34,6 +34,7 @@ config PPC_8xx select FSL_SOC select 8xx select PPC_LIB_RHEAP + select SYS_SUPPORTS_HUGETLBFS config 40x bool "AMCC 40x" @@ -171,6 +172,13 @@ config PPC_FPU bool default y if PPC64 +config PPC_8xx_PERF_EVENT + bool "PPC 8xx perf events" + depends on PPC_8xx && PERF_EVENTS + help + This is Performance Events support for PPC 8xx. The 8xx doesn't + have a PMU but some events are emulated using 8xx features. + config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" depends on E500 || PPC_83xx diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index d9088f0b8fcc..a4522f09d65e 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -17,10 +17,10 @@ config PPC_CELL_NATIVE select PPC_CELL_COMMON select MPIC select PPC_IO_WORKAROUNDS - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII #test only - select IBM_EMAC_TAH #test only + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC #test only + select IBM_EMAC_TAH if IBM_EMAC #test only default n config PPC_IBM_CELL_BLADE @@ -46,7 +46,6 @@ config SPU_FS default m depends on PPC_CELL select SPU_BASE - select MEMORY_HOTPLUG help The SPU file system is used to access Synergistic Processing Units on machines implementing the Broadband Processor diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index e84d8fbc2e21..96c2b8a40630 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -676,7 +676,7 @@ static ssize_t spu_stat_show(struct device *dev, static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE struct crash_spu_info { struct spu *spu; diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 85c85eb3e245..e5a891ae80ee 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -30,7 +30,7 @@ #include <linux/coredump.h> #include <linux/binfmts.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "spufs.h" diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 06254467e4dd..a35e2c29d7ee 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -35,7 +35,7 @@ #include <asm/time.h> #include <asm/spu.h> #include <asm/spu_info.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "spufs.h" #include "sputrace.h" @@ -236,7 +236,6 @@ static int spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct spu_context *ctx = vma->vm_file->private_data; - unsigned long address = (unsigned long)vmf->virtual_address; unsigned long pfn, offset; offset = vmf->pgoff << PAGE_SHIFT; @@ -244,7 +243,7 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n", - address, offset); + vmf->address, offset); if (spu_acquire(ctx)) return VM_FAULT_NOPAGE; @@ -256,7 +255,7 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT; } - vm_insert_pfn(vma, address, pfn); + vm_insert_pfn(vma, vmf->address, pfn); spu_release(ctx); @@ -355,8 +354,7 @@ static int spufs_ps_fault(struct vm_area_struct *vma, down_read(¤t->mm->mmap_sem); } else { area = ctx->spu->problem_phys + ps_offs; - vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, - (area + offset) >> PAGE_SHIFT); + vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT); spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu); } diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 5364d4a54249..d8af9bc0489f 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -38,7 +38,7 @@ #include <asm/prom.h> #include <asm/spu.h> #include <asm/spu_priv1.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "spufs.h" diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index a87200a535fa..0d290ea83dc1 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -5,7 +5,7 @@ #include <linux/namei.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "spufs.h" diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index 9ef8cc3378d0..c3ede2c365c3 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -13,7 +13,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/spinlock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/prom.h> #include <asm/machdep.h> #include <asm/rtas.h> diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index dfd310031549..0409714e8070 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -263,7 +263,7 @@ static int ppc750_machine_check_exception(struct pt_regs *regs) if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } return 0; diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index f97bab8e37a2..9de100e22bf3 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -174,7 +174,7 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs) if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } return 0; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index a0589aac4163..69794d9389c2 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -24,6 +24,7 @@ #include <asm/machdep.h> #include <asm/iommu.h> #include <asm/ppc-pci.h> +#include <asm/isa-bridge.h> #include "maple.h" diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index c8c217b7dd33..f627c9fd7b48 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -90,6 +90,7 @@ struct pmac_i2c_bus int opened; int polled; /* open mode */ struct platform_device *platform_dev; + struct lock_class_key lock_key; /* ops */ int (*open)(struct pmac_i2c_bus *bus); @@ -587,6 +588,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, bus->close = kw_i2c_close; bus->xfer = kw_i2c_xfer; mutex_init(&bus->mutex); + lockdep_set_class(&bus->mutex, &bus->lock_key); if (controller == busnode) bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -815,6 +817,7 @@ static void __init pmu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = pmu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -938,6 +941,7 @@ static void __init smu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = smu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = 0; list_add(&bus->link, &pmac_i2c_busses); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 2354ea51e871..6fb5522acd70 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -393,7 +393,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) /* Create PE */ ret = eeh_add_to_parent_pe(edev); if (ret) { - pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n", + pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%x)\n", __func__, hose->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret); return NULL; @@ -1097,7 +1097,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) bus = eeh_pe_bus_get(pe); if (!bus) { - pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); return -EIO; } diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 479c25601612..4ee837e6391a 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -237,15 +237,21 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, show_fastsleep_workaround_applyonce, store_fastsleep_workaround_applyonce); +/* + * The default stop state that will be used by ppc_md.power_save + * function on platforms that support stop instruction. + */ +u64 pnv_default_stop_val; +u64 pnv_default_stop_mask; /* * Used for ppc_md.power_save which needs a function with no parameters */ static void power9_idle(void) { - /* Requesting stop state 0 */ - power9_idle_stop(0); + power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask); } + /* * First deep stop state. Used to figure out when to save/restore * hypervisor context. @@ -253,9 +259,11 @@ static void power9_idle(void) u64 pnv_first_deep_stop_state = MAX_STOP_STATE; /* - * Deepest stop idle state. Used when a cpu is offlined + * psscr value and mask of the deepest stop idle state. + * Used when a cpu is offlined. */ -u64 pnv_deepest_stop_state; +u64 pnv_deepest_stop_psscr_val; +u64 pnv_deepest_stop_psscr_mask; /* * Power ISA 3.0 idle initialization. @@ -292,53 +300,157 @@ u64 pnv_deepest_stop_state; * Bits 60:63 - Requested Level * Used to specify which power-saving level must be entered on executing * stop instruction + */ + +int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) +{ + int err = 0; + + /* + * psscr_mask == 0xf indicates an older firmware. + * Set remaining fields of psscr to the default values. + * See NOTE above definition of PSSCR_HV_DEFAULT_VAL + */ + if (*psscr_mask == 0xf) { + *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; + *psscr_mask = PSSCR_HV_DEFAULT_MASK; + return err; + } + + /* + * New firmware is expected to set the psscr_val bits correctly. + * Validate that the following invariants are correctly maintained by + * the new firmware. + * - ESL bit value matches the EC bit value. + * - ESL bit is set for all the deep stop states. + */ + if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { + err = ERR_EC_ESL_MISMATCH; + } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && + GET_PSSCR_ESL(*psscr_val) == 0) { + err = ERR_DEEP_STATE_ESL_MISMATCH; + } + + return err; +} + +/* + * pnv_arch300_idle_init: Initializes the default idle state, first + * deep idle state and deepest idle state on + * ISA 3.0 CPUs. * * @np: /ibm,opal/power-mgt device node * @flags: cpu-idle-state-flags array * @dt_idle_states: Number of idle state entries * Returns 0 on success */ -static int __init pnv_arch300_idle_init(struct device_node *np, u32 *flags, +static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, int dt_idle_states) { u64 *psscr_val = NULL; + u64 *psscr_mask = NULL; + u32 *residency_ns = NULL; + u64 max_residency_ns = 0; int rc = 0, i; + bool default_stop_found = false, deepest_stop_found = false; - psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), - GFP_KERNEL); - if (!psscr_val) { + psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL); + psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL); + residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns), + GFP_KERNEL); + + if (!psscr_val || !psscr_mask || !residency_ns) { rc = -1; goto out; } + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n"); + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); + rc = -1; + goto out; + } + + if (of_property_read_u64_array(np, + "ibm,cpu-idle-state-psscr-mask", + psscr_mask, dt_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); + rc = -1; + goto out; + } + + if (of_property_read_u32_array(np, + "ibm,cpu-idle-state-residency-ns", + residency_ns, dt_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); rc = -1; goto out; } /* - * Set pnv_first_deep_stop_state and pnv_deepest_stop_state. + * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, + * and the pnv_default_stop_{val,mask}. + * * pnv_first_deep_stop_state should be set to the first stop * level to cause hypervisor state loss. - * pnv_deepest_stop_state should be set to the deepest stop - * stop state. + * + * pnv_deepest_stop_{val,mask} should be set to values corresponding to + * the deepest stop state. + * + * pnv_default_stop_{val,mask} should be set to values corresponding to + * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. */ pnv_first_deep_stop_state = MAX_STOP_STATE; for (i = 0; i < dt_idle_states; i++) { + int err; u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK; if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && (pnv_first_deep_stop_state > psscr_rl)) pnv_first_deep_stop_state = psscr_rl; - if (pnv_deepest_stop_state < psscr_rl) - pnv_deepest_stop_state = psscr_rl; + err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i], + flags[i]); + if (err) { + report_invalid_psscr_val(psscr_val[i], err); + continue; + } + + if (max_residency_ns < residency_ns[i]) { + max_residency_ns = residency_ns[i]; + pnv_deepest_stop_psscr_val = psscr_val[i]; + pnv_deepest_stop_psscr_mask = psscr_mask[i]; + deepest_stop_found = true; + } + + if (!default_stop_found && + (flags[i] & OPAL_PM_STOP_INST_FAST)) { + pnv_default_stop_val = psscr_val[i]; + pnv_default_stop_mask = psscr_mask[i]; + default_stop_found = true; + } + } + + if (!default_stop_found) { + pnv_default_stop_val = PSSCR_HV_DEFAULT_VAL; + pnv_default_stop_mask = PSSCR_HV_DEFAULT_MASK; + pr_warn("Setting default stop psscr val=0x%016llx,mask=0x%016llx\n", + pnv_default_stop_val, pnv_default_stop_mask); + } + + if (!deepest_stop_found) { + pnv_deepest_stop_psscr_val = PSSCR_HV_DEFAULT_VAL; + pnv_deepest_stop_psscr_mask = PSSCR_HV_DEFAULT_MASK; + pr_warn("Setting default stop psscr val=0x%016llx,mask=0x%016llx\n", + pnv_deepest_stop_psscr_val, + pnv_deepest_stop_psscr_mask); } out: kfree(psscr_val); + kfree(psscr_mask); + kfree(residency_ns); return rc; } @@ -373,7 +485,7 @@ static void __init pnv_probe_idle_states(void) } if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (pnv_arch300_idle_init(np, flags, dt_idle_states)) + if (pnv_power9_idle_init(np, flags, dt_idle_states)) goto out; } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index aec85e778028..73b155fd4481 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -263,7 +263,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) /* Enable the bypass window */ top = roundup_pow_of_two(top); - dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n", + dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n", npe->pe_number); rc = opal_pci_map_pe_dma_window_real(phb->opal_id, npe->pe_number, npe->pe_number, diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index f2344cbd2f46..ecd6d9177d13 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -18,7 +18,7 @@ #include <linux/vmalloc.h> #include <linux/fcntl.h> #include <linux/kobject.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/opal.h> struct elog_obj { diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index e4169d68cb32..399908bd9954 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -21,8 +21,9 @@ #include <asm/xics.h> #include <asm/opal.h> #include <asm/prom.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/debug.h> +#include <asm/isa-bridge.h> static int opal_lpc_chip_id = -1; @@ -386,7 +387,7 @@ static int opal_lpc_init_debugfs(void) machine_device_initcall(powernv, opal_lpc_init_debugfs); #endif /* CONFIG_DEBUG_FS */ -void opal_lpc_init(void) +void __init opal_lpc_init(void) { struct device_node *np; @@ -406,9 +407,17 @@ void opal_lpc_init(void) if (opal_lpc_chip_id < 0) return; - /* Setup special IO ops */ - ppc_pci_io = opal_lpc_io; - isa_io_special = true; - - pr_info("OPAL: Power8 LPC bus found, chip ID %d\n", opal_lpc_chip_id); + /* Does it support direct mapping ? */ + if (of_get_property(np, "ranges", NULL)) { + pr_info("OPAL: Found memory mapped LPC bus on chip %d\n", + opal_lpc_chip_id); + isa_bridge_init_non_pci(np); + } else { + pr_info("OPAL: Found non-mapped LPC bus on chip %d\n", + opal_lpc_chip_id); + + /* Setup special IO ops */ + ppc_pci_io = opal_lpc_io; + isa_io_special = true; + } } diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 39d6ff9e5630..7a9cde0cfbd1 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -123,6 +123,10 @@ void __init opal_msglog_init(void) return; } + /* Report maximum size */ + opal_msglog_attr.size = be32_to_cpu(mc->ibuf_size) + + be32_to_cpu(mc->obuf_size); + opal_memcons = mc; } diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index e315e704cca7..2d6ee1c5ad85 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -29,7 +29,7 @@ #include <asm/opal-prd.h> #include <asm/opal.h> #include <asm/io.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> /** diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index 1e496b780efd..3c447002edff 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -6,9 +6,10 @@ #ifdef HAVE_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; -void opal_tracepoint_regfunc(void) +int opal_tracepoint_regfunc(void) { static_key_slow_inc(&opal_tracepoint_key); + return 0; } void opal_tracepoint_unregfunc(void) @@ -25,9 +26,10 @@ void opal_tracepoint_unregfunc(void) /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long opal_tracepoint_refcount; -void opal_tracepoint_regfunc(void) +int opal_tracepoint_regfunc(void) { opal_tracepoint_refcount++; + return 0; } void opal_tracepoint_unregfunc(void) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3aa40f1b20f5..6849ca729c11 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -58,14 +58,16 @@ END_FTR_SECTION(0, 1); \ #define OPAL_CALL(name, token) \ _GLOBAL_TOC(name); \ + mfmsr r12; \ mflr r0; \ + andi. r11,r12,MSR_IR|MSR_DR; \ std r0,PPC_LR_STKOFF(r1); \ li r0,token; \ + beq opal_real_call; \ OPAL_BRANCH(opal_tracepoint_entry) \ - mfcr r12; \ - stw r12,8(r1); \ + mfcr r11; \ + stw r11,8(r1); \ li r11,0; \ - mfmsr r12; \ ori r11,r11,MSR_EE; \ std r12,PACASAVEDMSR(r13); \ andc r12,r12,r11; \ @@ -98,6 +100,30 @@ opal_return: mtcr r4; rfid +opal_real_call: + mfcr r11 + stw r11,8(r1) + /* Set opal return address */ + LOAD_REG_ADDR(r11, opal_return_realmode) + mtlr r11 + li r11,MSR_LE + andc r12,r12,r11 + mtspr SPRN_HSRR1,r12 + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +opal_return_realmode: + FIXUP_ENDIAN + ld r2,PACATOC(r13); + lwz r11,8(r1); + ld r12,PPC_LR_STKOFF(r1) + mtcr r11; + mtlr r12 + blr + #ifdef CONFIG_TRACEPOINTS opal_tracepoint_entry: stdu r1,-STACKFRAMESIZE(r1) @@ -155,36 +181,6 @@ opal_tracepoint_return: blr #endif -#define OPAL_CALL_REAL(name, token) \ - _GLOBAL_TOC(name); \ - mflr r0; \ - std r0,PPC_LR_STKOFF(r1); \ - li r0,token; \ - mfcr r12; \ - stw r12,8(r1); \ - \ - /* Set opal return address */ \ - LOAD_REG_ADDR(r11, opal_return_realmode); \ - mtlr r11; \ - mfmsr r12; \ - li r11,MSR_LE; \ - andc r12,r12,r11; \ - mtspr SPRN_HSRR1,r12; \ - LOAD_REG_ADDR(r11,opal); \ - ld r12,8(r11); \ - ld r2,0(r11); \ - mtspr SPRN_HSRR0,r12; \ - hrfid - -opal_return_realmode: - FIXUP_ENDIAN - ld r2,PACATOC(r13); - lwz r11,8(r1); - ld r12,PPC_LR_STKOFF(r1) - mtcr r11; - mtlr r12 - blr - OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); @@ -208,7 +204,6 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE); OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); -OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE); OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); @@ -264,7 +259,6 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); -OPAL_CALL_REAL(opal_rm_resync_timebase, OPAL_RESYNC_TIMEBASE); OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); @@ -280,9 +274,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); -OPAL_CALL_REAL(opal_rm_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); -OPAL_CALL_REAL(opal_rm_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); @@ -304,11 +296,8 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); -OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR); OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); -OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI); OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); -OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR); OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); -OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); +OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index b3b8930ac52f..86d9fde93c17 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -632,21 +632,11 @@ static void __init opal_dump_region_init(void) "rc = %d\n", rc); } -static void opal_pdev_init(struct device_node *opal_node, - const char *compatible) +static void opal_pdev_init(const char *compatible) { struct device_node *np; - for_each_child_of_node(opal_node, np) - if (of_device_is_compatible(np, compatible)) - of_platform_device_create(np, NULL, NULL); -} - -static void opal_i2c_create_devs(void) -{ - struct device_node *np; - - for_each_compatible_node(np, NULL, "ibm,opal-i2c") + for_each_compatible_node(np, NULL, compatible) of_platform_device_create(np, NULL, NULL); } @@ -718,7 +708,7 @@ static int __init opal_init(void) opal_hmi_handler_init(); /* Create i2c platform devices */ - opal_i2c_create_devs(); + opal_pdev_init("ibm,opal-i2c"); /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); @@ -753,12 +743,12 @@ static int __init opal_init(void) } /* Initialize platform devices: IPMI backend, PRD & flash interface */ - opal_pdev_init(opal_node, "ibm,opal-ipmi"); - opal_pdev_init(opal_node, "ibm,opal-flash"); - opal_pdev_init(opal_node, "ibm,opal-prd"); + opal_pdev_init("ibm,opal-ipmi"); + opal_pdev_init("ibm,opal-flash"); + opal_pdev_init("ibm,opal-prd"); /* Initialise platform device: oppanel interface */ - opal_pdev_init(opal_node, "ibm,opal-oppanel"); + opal_pdev_init("ibm,opal-oppanel"); /* Initialise OPAL kmsg dumper for flushing console on panic */ opal_kmsg_init(); @@ -885,6 +875,17 @@ int opal_error_code(int rc) } } +void powernv_set_nmmu_ptcr(unsigned long ptcr) +{ + int rc; + + if (firmware_has_feature(FW_FEATURE_OPAL)) { + rc = opal_nmmu_set_ptcr(-1UL, ptcr); + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) + pr_warn("%s: Unable to set nest mmu ptcr\n", __func__); + } +} + EXPORT_SYMBOL_GPL(opal_poll_events); EXPORT_SYMBOL_GPL(opal_rtc_read); EXPORT_SYMBOL_GPL(opal_rtc_write); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d4b33dd2d9e7..8278f43ad4b8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -83,7 +83,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, PCI_SLOT(pe->rid), PCI_FUNC(pe->rid)); #endif /* CONFIG_PCI_IOV*/ - printk("%spci %s: [PE# %.3d] %pV", + printk("%spci %s: [PE# %.2x] %pV", level, pfix, pe->pe_number, &vaf); va_end(args); @@ -145,8 +145,8 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) */ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (rc != OPAL_SUCCESS) - pr_warn("%s: Error %lld unfreezing PHB#%d-PE#%d\n", + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) + pr_warn("%s: Error %lld unfreezing PHB#%x-PE#%x\n", __func__, rc, phb->hose->global_number, pe_no); return &phb->ioda.pe_array[pe_no]; @@ -155,13 +155,13 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) { if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) { - pr_warn("%s: Invalid PE %d on PHB#%x\n", + pr_warn("%s: Invalid PE %x on PHB#%x\n", __func__, pe_no, phb->hose->global_number); return; } if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) - pr_debug("%s: PE %d was reserved on PHB#%x\n", + pr_debug("%s: PE %x was reserved on PHB#%x\n", __func__, pe_no, phb->hose->global_number); pnv_ioda_init_pe(phb, pe_no); @@ -229,7 +229,7 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) r->end -= (2 * phb->ioda.m64_segsize); else - pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", + pr_warn(" Cannot strip M64 segment for reserved PE#%x\n", phb->ioda.reserved_pe_idx); return 0; @@ -291,7 +291,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) OPAL_M64_WINDOW_TYPE, index, base, 0, PNV_IODA1_M64_SEGS * segsz); if (rc != OPAL_SUCCESS) { - pr_warn(" Error %lld setting M64 PHB#%d-BAR#%d\n", + pr_warn(" Error %lld setting M64 PHB#%x-BAR#%d\n", rc, phb->hose->global_number, index); goto fail; } @@ -300,7 +300,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) OPAL_M64_WINDOW_TYPE, index, OPAL_ENABLE_M64_SPLIT); if (rc != OPAL_SUCCESS) { - pr_warn(" Error %lld enabling M64 PHB#%d-BAR#%d\n", + pr_warn(" Error %lld enabling M64 PHB#%x-BAR#%d\n", rc, phb->hose->global_number, index); goto fail; } @@ -316,7 +316,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) r->end -= (2 * phb->ioda.m64_segsize); else - WARN(1, "Wrong reserved PE#%d on PHB#%d\n", + WARN(1, "Wrong reserved PE#%x on PHB#%x\n", phb->ioda.reserved_pe_idx, phb->hose->global_number); return 0; @@ -414,7 +414,7 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) pe->pe_number / PNV_IODA1_M64_SEGS, pe->pe_number % PNV_IODA1_M64_SEGS); if (rc != OPAL_SUCCESS) - pr_warn("%s: Error %lld mapping M64 for PHB#%d-PE#%d\n", + pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", __func__, rc, phb->hose->global_number, pe->pe_number); } @@ -941,14 +941,14 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) pe->mve_number = pe->pe_number; rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number); if (rc != OPAL_SUCCESS) { - pe_err(pe, "OPAL error %ld setting up MVE %d\n", + pe_err(pe, "OPAL error %ld setting up MVE %x\n", rc, pe->mve_number); pe->mve_number = -1; } else { rc = opal_pci_set_mve_enable(phb->opal_id, pe->mve_number, OPAL_ENABLE_MVE); if (rc) { - pe_err(pe, "OPAL error %ld enabling MVE %d\n", + pe_err(pe, "OPAL error %ld enabling MVE %x\n", rc, pe->mve_number); pe->mve_number = -1; } @@ -1159,10 +1159,10 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pe->rid = bus->busn_res.start << 8; if (all) - pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", + pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n", bus->busn_res.start, bus->busn_res.end, pe->pe_number); else - pe_info(pe, "Secondary bus %d associated with PE#%d\n", + pe_info(pe, "Secondary bus %d associated with PE#%x\n", bus->busn_res.start, pe->pe_number); if (pnv_ioda_configure_pe(phb, pe)) { @@ -1213,7 +1213,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) * peer NPU. */ dev_info(&npu_pdev->dev, - "Associating to existing PE %d\n", pe_num); + "Associating to existing PE %x\n", pe_num); pci_dev_get(npu_pdev); npu_pdn = pci_get_pdn(npu_pdev); rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; @@ -1326,7 +1326,9 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) else m64_bars = 1; - pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL); + pdn->m64_map = kmalloc_array(m64_bars, + sizeof(*pdn->m64_map), + GFP_KERNEL); if (!pdn->m64_map) return -ENOMEM; /* Initialize the m64_map to IODA_INVALID_M64 */ @@ -1539,7 +1541,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | pci_iov_virtfn_devfn(pdev, vf_index); - pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n", + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); @@ -1593,8 +1595,9 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) /* Allocating pe_num_map */ if (pdn->m64_single_mode) - pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs, - GFP_KERNEL); + pdn->pe_num_map = kmalloc_array(num_vfs, + sizeof(*pdn->pe_num_map), + GFP_KERNEL); else pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); @@ -1950,7 +1953,12 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, struct pnv_phb *phb = pe->phb; unsigned int shift = tbl->it_page_shift; - if (phb->type == PNV_PHB_NPU) { + /* + * NVLink1 can use the TCE kill register directly as + * it's the same as PHB3. NVLink2 is different and + * should go via the OPAL call. + */ + if (phb->model == PNV_PHB_MODEL_NPU) { /* * The NVLink hardware does not support TCE kill * per TCE entry so we have to invalidate @@ -1962,11 +1970,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) pnv_pci_phb3_tce_invalidate(pe, rm, shift, index, npages); - else if (rm) - opal_rm_pci_tce_kill(phb->opal_id, - OPAL_PCI_TCE_KILL_PAGES, - pe->pe_number, 1u << shift, - index << shift, npages); else opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL_PAGES, @@ -2844,7 +2847,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, pnv_set_msi_irq_chip(phb, virq); pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," - " address=%x_%08x data=%x PE# %d\n", + " address=%x_%08x data=%x PE# %x\n", pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, msg->address_hi, msg->address_lo, data, pe->pe_number); @@ -2993,7 +2996,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); if (rc != OPAL_SUCCESS) { - pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n", + pr_err("%s: Error %lld mapping IO segment#%d to PE#%x\n", __func__, rc, index, pe->pe_number); break; } @@ -3017,7 +3020,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); if (rc != OPAL_SUCCESS) { - pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d", + pr_err("%s: Error %lld mapping M32 segment#%d to PE#%x", __func__, rc, index, pe->pe_number); break; } @@ -3281,7 +3284,7 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) pnv_pci_ioda2_setup_dma_pe(phb, pe); break; default: - pr_warn("%s: No DMA for PHB#%d (type %d)\n", + pr_warn("%s: No DMA for PHB#%x (type %d)\n", __func__, phb->hose->global_number, phb->type); } } @@ -3671,6 +3674,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->model = PNV_PHB_MODEL_PHB3; else if (of_device_is_compatible(np, "ibm,power8-npu-pciex")) phb->model = PNV_PHB_MODEL_NPU; + else if (of_device_is_compatible(np, "ibm,power9-npu-pciex")) + phb->model = PNV_PHB_MODEL_NPU2; else phb->model = PNV_PHB_MODEL_UNKNOWN; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index db7b8020f68e..eb835e977e33 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -234,7 +234,7 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, int i; data = (struct OpalIoP7IOCPhbErrorData *)common; - pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n", + pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n", hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) @@ -326,7 +326,7 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, int i; data = (struct OpalIoPhb3ErrorData*)common; - pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n", + pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n", hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) pr_info("brdgCtl: %08x\n", @@ -516,7 +516,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) } } - pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", + pr_devel(" -> EEH check, bdfn=%04x PE#%x fstate=%x\n", (pdn->busno << 8) | (pdn->devfn), pe_no, fstate); /* Clear the frozen state if applicable */ @@ -940,6 +940,13 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") pnv_pci_init_npu_phb(np); + /* + * Look for NPU2 PHBs which we treat mostly as NPU PHBs with + * the exception of TCE kill which requires an OPAL call. + */ + for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb") + pnv_pci_init_npu_phb(np); + /* Configure IOMMU DMA hooks */ set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index e64df7894d6e..e1d3e5526b54 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -19,6 +19,7 @@ enum pnv_phb_model { PNV_PHB_MODEL_P7IOC, PNV_PHB_MODEL_PHB3, PNV_PHB_MODEL_NPU, + PNV_PHB_MODEL_NPU2, }; #define PNV_PCI_DIAG_BUF_SIZE 8192 diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index da7c843ac7f1..613052232475 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -18,7 +18,8 @@ static inline void pnv_pci_shutdown(void) { } #endif extern u32 pnv_get_supported_cpuidle_states(void); -extern u64 pnv_deepest_stop_state; +extern u64 pnv_deepest_stop_psscr_val; +extern u64 pnv_deepest_stop_psscr_mask; extern void pnv_lpc_init(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index efe8b6bb168b..d50c7d99baaf 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -174,7 +174,7 @@ static void pnv_shutdown(void) opal_shutdown(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void pnv_kexec_wait_secondaries_down(void) { int my_cpu, i, notified = -1; @@ -245,7 +245,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); } } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static unsigned long pnv_memory_block_size(void) @@ -311,7 +311,7 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = NULL, .calibrate_decr = generic_calibrate_decr, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_cpu_down = pnv_kexec_cpu_down, #endif #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index c789258ae1e1..1c6405fb769a 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -182,15 +182,17 @@ static void pnv_smp_cpu_kill_self(void) ppc64_runlatch_off(); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - srr1 = power9_idle_stop(pnv_deepest_stop_state); - else if (idle_states & OPAL_PM_WINKLE_ENABLED) + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val, + pnv_deepest_stop_psscr_mask); + } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { srr1 = power7_winkle(); - else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || - (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) + } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || + (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { srr1 = power7_sleep(); - else + } else { srr1 = power7_nap(1); + } ppc64_runlatch_on(); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 3a487e7f4a5e..6244bc849469 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -250,7 +250,7 @@ static int __init ps3_probe(void) return 1; } -#if defined(CONFIG_KEXEC) +#if defined(CONFIG_KEXEC_CORE) static void ps3_kexec_cpu_down(int crash_shutdown, int secondary) { int cpu = smp_processor_id(); @@ -276,7 +276,7 @@ define_machine(ps3) { .progress = ps3_progress, .restart = ps3_restart, .halt = ps3_halt, -#if defined(CONFIG_KEXEC) +#if defined(CONFIG_KEXEC_CORE) .kexec_cpu_down = ps3_kexec_cpu_down, #endif }; diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index bec90fb30425..30ec04f1c67c 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -17,7 +17,6 @@ config PPC_PSERIES select PPC_UDBG_16550 select PPC_NATIVE select PPC_DOORBELL - select HAVE_CONTEXT_TRACKING select HOTPLUG_CPU if SMP select ARCH_RANDOM select PPC_DOORBELL @@ -127,3 +126,14 @@ config HV_PERF_CTRS systems. 24x7 is available on Power 8 systems. If unsure, select Y. + +config IBMVIO + depends on PPC_PSERIES + bool + default y + +config IBMEBUS + depends on PPC_PSERIES && !CPU_LITTLE_ENDIAN + bool "Support for GX bus based adapters" + help + Bus device driver for GX bus based adapters. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index fedc2ccf029d..8f4ba089e802 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -8,7 +8,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_KEXEC) += kexec.o +obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o @@ -21,6 +21,8 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o obj-$(CONFIG_LPARCFG) += lparcfg.o +obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_IBMEBUS) += ibmebus.o ifeq ($(CONFIG_PPC_PSERIES),y) obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 66e7227469b8..4839db385bb0 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -37,10 +37,12 @@ #include <asm/hvcall.h> #include <asm/mmu.h> #include <asm/pgalloc.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/memory.h> #include <asm/plpar_wrappers.h> +#include "pseries.h" + #define CMM_DRIVER_VERSION "1.0.0" #define CMM_DEFAULT_DELAY 1 #define CMM_HOTPLUG_DELAY 5 @@ -109,6 +111,38 @@ static int hotplug_occurred; /* protected by the hotplug mutex */ static struct task_struct *cmm_thread_ptr; +static long plpar_page_set_loaned(unsigned long vpa) +{ + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, + vpa + i - cmo_page_sz, 0); + + return rc; +} + +static long plpar_page_set_active(unsigned long vpa) +{ + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, + vpa + i - cmo_page_sz, 0); + + return rc; +} + /** * cmm_alloc_pages - Allocate pages and mark them as loaned * @nr: number of pages to allocate diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 423e450efe07..d3a81e746fc4 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -24,7 +24,7 @@ #include <asm/prom.h> #include <asm/machdep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/rtas.h> static struct workqueue_struct *pseries_hp_wq; @@ -418,88 +418,146 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, } } -static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, - const char *buf, size_t count) +static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog) { - struct pseries_hp_errorlog *hp_elog; - struct completion hotplug_done; - const char *arg; - int rc; + char *arg; - hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); - if (!hp_elog) { - rc = -ENOMEM; - goto dlpar_store_out; - } + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; - /* Parse out the request from the user, this will be in the form - * <resource> <action> <id_type> <id> - */ - arg = buf; - if (!strncmp(arg, "memory", 6)) { + if (sysfs_streq(arg, "memory")) { hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM; - arg += strlen("memory "); - } else if (!strncmp(arg, "cpu", 3)) { + } else if (sysfs_streq(arg, "cpu")) { hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU; - arg += strlen("cpu "); } else { - pr_err("Invalid resource specified: \"%s\"\n", buf); - rc = -EINVAL; - goto dlpar_store_out; + pr_err("Invalid resource specified.\n"); + return -EINVAL; } - if (!strncmp(arg, "add", 3)) { + return 0; +} + +static int dlpar_parse_action(char **cmd, struct pseries_hp_errorlog *hp_elog) +{ + char *arg; + + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; + + if (sysfs_streq(arg, "add")) { hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD; - arg += strlen("add "); - } else if (!strncmp(arg, "remove", 6)) { + } else if (sysfs_streq(arg, "remove")) { hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE; - arg += strlen("remove "); } else { - pr_err("Invalid action specified: \"%s\"\n", buf); - rc = -EINVAL; - goto dlpar_store_out; + pr_err("Invalid action specified.\n"); + return -EINVAL; } - if (!strncmp(arg, "index", 5)) { - u32 index; + return 0; +} + +static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog) +{ + char *arg; + u32 count, index; + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; + + if (sysfs_streq(arg, "index")) { hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; - arg += strlen("index "); + arg = strsep(cmd, " "); + if (!arg) { + pr_err("No DRC Index specified.\n"); + return -EINVAL; + } + if (kstrtou32(arg, 0, &index)) { - rc = -EINVAL; - pr_err("Invalid drc_index specified: \"%s\"\n", buf); - goto dlpar_store_out; + pr_err("Invalid DRC Index specified.\n"); + return -EINVAL; } hp_elog->_drc_u.drc_index = cpu_to_be32(index); - } else if (!strncmp(arg, "count", 5)) { - u32 count; - + } else if (sysfs_streq(arg, "count")) { hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT; - arg += strlen("count "); + arg = strsep(cmd, " "); + if (!arg) { + pr_err("No DRC count specified.\n"); + return -EINVAL; + } + if (kstrtou32(arg, 0, &count)) { - rc = -EINVAL; - pr_err("Invalid count specified: \"%s\"\n", buf); - goto dlpar_store_out; + pr_err("Invalid DRC count specified.\n"); + return -EINVAL; } hp_elog->_drc_u.drc_count = cpu_to_be32(count); } else { - pr_err("Invalid id_type specified: \"%s\"\n", buf); - rc = -EINVAL; - goto dlpar_store_out; + pr_err("Invalid id_type specified.\n"); + return -EINVAL; + } + + return 0; +} + +static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, + const char *buf, size_t count) +{ + struct pseries_hp_errorlog *hp_elog; + struct completion hotplug_done; + char *argbuf; + char *args; + int rc; + + args = argbuf = kstrdup(buf, GFP_KERNEL); + hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); + if (!hp_elog || !argbuf) { + pr_info("Could not allocate resources for DLPAR operation\n"); + kfree(argbuf); + kfree(hp_elog); + return -ENOMEM; } + /* + * Parse out the request from the user, this will be in the form: + * <resource> <action> <id_type> <id> + */ + rc = dlpar_parse_resource(&args, hp_elog); + if (rc) + goto dlpar_store_out; + + rc = dlpar_parse_action(&args, hp_elog); + if (rc) + goto dlpar_store_out; + + rc = dlpar_parse_id_type(&args, hp_elog); + if (rc) + goto dlpar_store_out; + init_completion(&hotplug_done); queue_hotplug_event(hp_elog, &hotplug_done, &rc); wait_for_completion(&hotplug_done); dlpar_store_out: + kfree(argbuf); kfree(hp_elog); + + if (rc) + pr_err("Could not handle DLPAR request \"%s\"\n", buf); + return rc ? rc : count; } -static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store); +static ssize_t dlpar_show(struct class *class, struct class_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", "memory,cpu"); +} + +static CLASS_ATTR(dlpar, S_IWUSR | S_IRUSR, dlpar_show, dlpar_store); static int __init pseries_dlpar_init(void) { diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 39049e4884fb..6b04e3f0f982 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -24,7 +24,7 @@ #include <linux/debugfs.h> #include <linux/spinlock.h> #include <asm/smp.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/firmware.h> #include <asm/lppaca.h> #include <asm/debug.h> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1c428f06b14c..1eef46d9cf30 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -270,7 +270,7 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) eeh_add_flag(EEH_ENABLED); eeh_add_to_parent_pe(edev); - pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%d-PE#%x\n", + pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n", __func__, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), pe.phb->global_number, pe.addr); @@ -371,7 +371,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -384,7 +384,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -653,7 +653,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) rtas_busy_delay(ret); } - pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, ret); return ret; } diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index ea7f09bd73b1..7d67623203b8 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -126,7 +126,7 @@ static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) index = OV5_INDX(vec5_fw_features_table[i].feature); feat = OV5_FEAT(vec5_fw_features_table[i].feature); - if (vec5[index] & feat) + if (index < len && (vec5[index] & feat)) powerpc_firmware_features |= vec5_fw_features_table[i].val; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 76ec104e88be..3381c20edbc0 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -446,9 +446,7 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb) /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); - dlpar_release_drc(lmb->drc_index); dlpar_remove_device_tree_lmb(lmb); - return 0; } @@ -472,12 +470,15 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, /* Validate that there are enough LMBs to satisfy the request */ for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].flags & DRCONF_MEM_ASSIGNED) + if (lmb_is_removable(&lmbs[i])) lmbs_available++; } - if (lmbs_available < lmbs_to_remove) + if (lmbs_available < lmbs_to_remove) { + pr_info("Not enough LMBs available (%d of %d) to satisfy request\n", + lmbs_available, lmbs_to_remove); return -EINVAL; + } for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) { rc = dlpar_remove_lmb(&lmbs[i]); @@ -513,6 +514,7 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, if (!lmbs[i].reserved) continue; + dlpar_release_drc(lmbs[i].drc_index); pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr); @@ -542,6 +544,9 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) if (lmbs[i].drc_index == drc_index) { lmb_found = 1; rc = dlpar_remove_lmb(&lmbs[i]); + if (!rc) + dlpar_release_drc(lmbs[i].drc_index); + break; } } @@ -558,6 +563,44 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) return rc; } +static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop) +{ + struct of_drconf_cell *lmbs; + u32 num_lmbs, *p; + int lmb_found; + int i, rc; + + pr_info("Attempting to update LMB, drc index %x\n", drc_index); + + p = prop->value; + num_lmbs = *p++; + lmbs = (struct of_drconf_cell *)p; + + lmb_found = 0; + for (i = 0; i < num_lmbs; i++) { + if (lmbs[i].drc_index == drc_index) { + lmb_found = 1; + rc = dlpar_remove_lmb(&lmbs[i]); + if (!rc) { + rc = dlpar_add_lmb(&lmbs[i]); + if (rc) + dlpar_release_drc(lmbs[i].drc_index); + } + break; + } + } + + if (!lmb_found) + rc = -EINVAL; + + if (rc) + pr_info("Failed to update memory at %llx\n", + lmbs[i].base_addr); + else + pr_info("Memory at %llx was updated\n", lmbs[i].base_addr); + + return rc; +} #else static inline int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) @@ -596,10 +639,6 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; - rc = dlpar_acquire_drc(lmb->drc_index); - if (rc) - return rc; - rc = dlpar_add_device_tree_lmb(lmb); if (rc) { pr_err("Couldn't update device tree for drc index %x\n", @@ -615,12 +654,10 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) /* Add the memory */ rc = add_memory(nid, lmb->base_addr, block_sz); - if (rc) { + if (rc) dlpar_remove_device_tree_lmb(lmb); - dlpar_release_drc(lmb->drc_index); - } else { + else lmb->flags |= DRCONF_MEM_ASSIGNED; - } return rc; } @@ -652,10 +689,16 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop) return -EINVAL; for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) { - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_acquire_drc(lmbs[i].drc_index); if (rc) continue; + rc = dlpar_add_lmb(&lmbs[i]); + if (rc) { + dlpar_release_drc(lmbs[i].drc_index); + continue; + } + lmbs_added++; /* Mark this lmb so we can remove it later if all of the @@ -675,6 +718,8 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop) if (rc) pr_err("Failed to remove LMB, drc index %x\n", be32_to_cpu(lmbs[i].drc_index)); + else + dlpar_release_drc(lmbs[i].drc_index); } rc = -EINVAL; } else { @@ -708,7 +753,13 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop) for (i = 0; i < num_lmbs; i++) { if (lmbs[i].drc_index == drc_index) { lmb_found = 1; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_acquire_drc(lmbs[i].drc_index); + if (!rc) { + rc = dlpar_add_lmb(&lmbs[i]); + if (rc) + dlpar_release_drc(lmbs[i].drc_index); + } + break; } } @@ -766,6 +817,9 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) else rc = -EINVAL; break; + case PSERIES_HP_ELOG_ACTION_READD: + rc = dlpar_memory_readd_by_index(drc_index, prop); + break; default: pr_err("Invalid action (%d) specified\n", hp_elog->action); rc = -EINVAL; diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 6ca9a2ffaac7..614c28537141 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -180,6 +180,7 @@ static int ibmebus_create_device(struct device_node *dn) static int ibmebus_create_devices(const struct of_device_id *matches) { struct device_node *root, *child; + struct device *dev; int ret = 0; root = of_find_node_by_path("/"); @@ -188,9 +189,12 @@ static int ibmebus_create_devices(const struct of_device_id *matches) if (!of_match_node(matches, child)) continue; - if (bus_find_device(&ibmebus_bus_type, NULL, child, - ibmebus_match_node)) + dev = bus_find_device(&ibmebus_bus_type, NULL, child, + ibmebus_match_node); + if (dev) { + put_device(dev); continue; + } ret = ibmebus_create_device(child); if (ret) { @@ -262,6 +266,7 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, const char *buf, size_t count) { struct device_node *dn = NULL; + struct device *dev; char *path; ssize_t rc = 0; @@ -269,8 +274,10 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, if (!path) return -ENOMEM; - if (bus_find_device(&ibmebus_bus_type, NULL, path, - ibmebus_match_path)) { + dev = bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path); + if (dev) { + put_device(dev); printk(KERN_WARNING "%s: %s has already been probed\n", __func__, path); rc = -EEXIST; @@ -307,6 +314,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, ibmebus_match_path))) { of_device_unregister(to_platform_device(dev)); + put_device(dev); kfree(path); return count; @@ -415,303 +423,6 @@ static struct device_attribute ibmebus_bus_device_attrs[] = { __ATTR_NULL }; -#ifdef CONFIG_PM_SLEEP -static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) -{ - struct platform_device *of_dev = to_platform_device(dev); - struct platform_driver *drv = to_platform_driver(dev->driver); - int ret = 0; - - if (dev->driver && drv->suspend) - ret = drv->suspend(of_dev, mesg); - return ret; -} - -static int ibmebus_bus_legacy_resume(struct device *dev) -{ - struct platform_device *of_dev = to_platform_device(dev); - struct platform_driver *drv = to_platform_driver(dev->driver); - int ret = 0; - - if (dev->driver && drv->resume) - ret = drv->resume(of_dev); - return ret; -} - -static int ibmebus_bus_pm_prepare(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (drv && drv->pm && drv->pm->prepare) - ret = drv->pm->prepare(dev); - - return ret; -} - -static void ibmebus_bus_pm_complete(struct device *dev) -{ - struct device_driver *drv = dev->driver; - - if (drv && drv->pm && drv->pm->complete) - drv->pm->complete(dev); -} - -#ifdef CONFIG_SUSPEND - -static int ibmebus_bus_pm_suspend(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend) - ret = drv->pm->suspend(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND); - } - - return ret; -} - -static int ibmebus_bus_pm_suspend_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend_noirq) - ret = drv->pm->suspend_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_resume(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume) - ret = drv->pm->resume(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_resume_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume_noirq) - ret = drv->pm->resume_noirq(dev); - } - - return ret; -} - -#else /* !CONFIG_SUSPEND */ - -#define ibmebus_bus_pm_suspend NULL -#define ibmebus_bus_pm_resume NULL -#define ibmebus_bus_pm_suspend_noirq NULL -#define ibmebus_bus_pm_resume_noirq NULL - -#endif /* !CONFIG_SUSPEND */ - -#ifdef CONFIG_HIBERNATE_CALLBACKS - -static int ibmebus_bus_pm_freeze(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze) - ret = drv->pm->freeze(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE); - } - - return ret; -} - -static int ibmebus_bus_pm_freeze_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze_noirq) - ret = drv->pm->freeze_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_thaw(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw) - ret = drv->pm->thaw(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_thaw_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw_noirq) - ret = drv->pm->thaw_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_poweroff(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff) - ret = drv->pm->poweroff(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE); - } - - return ret; -} - -static int ibmebus_bus_pm_poweroff_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff_noirq) - ret = drv->pm->poweroff_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_restore(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore) - ret = drv->pm->restore(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_restore_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore_noirq) - ret = drv->pm->restore_noirq(dev); - } - - return ret; -} - -#else /* !CONFIG_HIBERNATE_CALLBACKS */ - -#define ibmebus_bus_pm_freeze NULL -#define ibmebus_bus_pm_thaw NULL -#define ibmebus_bus_pm_poweroff NULL -#define ibmebus_bus_pm_restore NULL -#define ibmebus_bus_pm_freeze_noirq NULL -#define ibmebus_bus_pm_thaw_noirq NULL -#define ibmebus_bus_pm_poweroff_noirq NULL -#define ibmebus_bus_pm_restore_noirq NULL - -#endif /* !CONFIG_HIBERNATE_CALLBACKS */ - -static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { - .prepare = ibmebus_bus_pm_prepare, - .complete = ibmebus_bus_pm_complete, - .suspend = ibmebus_bus_pm_suspend, - .resume = ibmebus_bus_pm_resume, - .freeze = ibmebus_bus_pm_freeze, - .thaw = ibmebus_bus_pm_thaw, - .poweroff = ibmebus_bus_pm_poweroff, - .restore = ibmebus_bus_pm_restore, - .suspend_noirq = ibmebus_bus_pm_suspend_noirq, - .resume_noirq = ibmebus_bus_pm_resume_noirq, - .freeze_noirq = ibmebus_bus_pm_freeze_noirq, - .thaw_noirq = ibmebus_bus_pm_thaw_noirq, - .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq, - .restore_noirq = ibmebus_bus_pm_restore_noirq, -}; - -#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops) - -#else /* !CONFIG_PM_SLEEP */ - -#define IBMEBUS_BUS_PM_OPS_PTR NULL - -#endif /* !CONFIG_PM_SLEEP */ - struct bus_type ibmebus_bus_type = { .name = "ibmebus", .uevent = of_device_uevent_modalias, @@ -721,7 +432,6 @@ struct bus_type ibmebus_bus_type = { .remove = ibmebus_bus_device_remove, .shutdown = ibmebus_bus_device_shutdown, .dev_attrs = ibmebus_bus_device_attrs, - .pm = IBMEBUS_BUS_PM_OPS_PTR, }; EXPORT_SYMBOL(ibmebus_bus_type); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f2c98f6c1c9c..0587655aea69 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -221,7 +221,7 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) return -1; } -static void pSeries_lpar_hptab_clear(void) +static void manual_hpte_clear_all(void) { unsigned long size_bytes = 1UL << ppc64_pft_size; unsigned long hpte_count = size_bytes >> 4; @@ -249,6 +249,26 @@ static void pSeries_lpar_hptab_clear(void) &(ptes[j].pteh), &(ptes[j].ptel)); } } +} + +static int hcall_hpte_clear_all(void) +{ + int rc; + + do { + rc = plpar_hcall_norets(H_CLEAR_HPT); + } while (rc == H_CONTINUE); + + return rc; +} + +static void pseries_hpte_clear_all(void) +{ + int rc; + + rc = hcall_hpte_clear_all(); + if (rc != H_SUCCESS) + manual_hpte_clear_all(); #ifdef __LITTLE_ENDIAN__ /* @@ -589,6 +609,29 @@ static int __init disable_bulk_remove(char *str) __setup("bulk_remove=", disable_bulk_remove); +/* Actually only used for radix, so far */ +static int pseries_lpar_register_process_table(unsigned long base, + unsigned long page_size, unsigned long table_size) +{ + long rc; + unsigned long flags = PROC_TABLE_NEW; + + if (radix_enabled()) + flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; + for (;;) { + rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, + page_size, table_size); + if (!H_IS_LONG_BUSY(rc)) + break; + mdelay(get_longbusy_msecs(rc)); + } + if (rc != H_SUCCESS) { + pr_err("Failed to register process table (rc=%ld)\n", rc); + BUG(); + } + return rc; +} + void __init hpte_init_pseries(void) { mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; @@ -598,10 +641,16 @@ void __init hpte_init_pseries(void) mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove; mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted; mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; - mmu_hash_ops.hpte_clear_all = pSeries_lpar_hptab_clear; + mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; } +void radix_init_pseries(void) +{ + pr_info("Using radix MMU under hypervisor\n"); + register_process_table = pseries_lpar_register_process_table; +} + #ifdef CONFIG_PPC_SMLPAR #define CMO_FREE_HINT_DEFAULT 1 static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; @@ -661,9 +710,10 @@ EXPORT_SYMBOL(arch_free_page); #ifdef HAVE_JUMP_LABEL struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; -void hcall_tracepoint_regfunc(void) +int hcall_tracepoint_regfunc(void) { static_key_slow_inc(&hcall_tracepoint_key); + return 0; } void hcall_tracepoint_unregfunc(void) @@ -680,9 +730,10 @@ void hcall_tracepoint_unregfunc(void) /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long hcall_tracepoint_refcount; -void hcall_tracepoint_regfunc(void) +int hcall_tracepoint_regfunc(void) { hcall_tracepoint_refcount++; + return 0; } void hcall_tracepoint_unregfunc(void) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index afa05a2cb702..779fc2a1c8f7 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -25,7 +25,7 @@ #include <linux/init.h> #include <linux/seq_file.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/lppaca.h> #include <asm/hvcall.h> #include <asm/firmware.h> @@ -37,6 +37,7 @@ #include <asm/mmu.h> #include <asm/machdep.h> +#include "pseries.h" /* * This isn't a module but we expose that to userspace diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index a560a98bcf3b..5a0c7ba429ce 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -39,6 +39,7 @@ struct update_props_workarea { #define ADD_DT_NODE 0x03000000 #define MIGRATION_SCOPE (1) +#define PRRN_SCOPE -2 static int mobility_rtas_call(int token, char *buf, s32 scope) { @@ -236,6 +237,35 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) return rc; } +static void prrn_update_node(__be32 phandle) +{ + struct pseries_hp_errorlog *hp_elog; + struct device_node *dn; + + /* + * If a node is found from a the given phandle, the phandle does not + * represent the drc index of an LMB and we can ignore. + */ + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (dn) { + of_node_put(dn); + return; + } + + hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); + if(!hp_elog) + return; + + hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM; + hp_elog->action = PSERIES_HP_ELOG_ACTION_READD; + hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; + hp_elog->_drc_u.drc_index = phandle; + + queue_hotplug_event(hp_elog, NULL, NULL); + + kfree(hp_elog); +} + int pseries_devicetree_update(s32 scope) { char *rtas_buf; @@ -274,6 +304,10 @@ int pseries_devicetree_update(s32 scope) break; case UPDATE_DT_NODE: update_dt_node(phandle, scope); + + if (scope == PRRN_SCOPE) + prrn_update_node(phandle); + break; case ADD_DT_NODE: drc_index = *data++; diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 79aef8c1c5b3..69cedc1b3b8a 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -18,7 +18,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/ctype.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/nvram.h> #include <asm/rtas.h> #include <asm/prom.h> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index b1be7b713fe6..1361a9db534b 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -79,4 +79,23 @@ extern struct pci_controller_ops pseries_pci_controller_ops; unsigned long pseries_memory_block_size(void); +extern int CMO_PrPSP; +extern int CMO_SecPSP; +extern unsigned long CMO_PageSize; + +static inline int cmo_get_primary_psp(void) +{ + return CMO_PrPSP; +} + +static inline int cmo_get_secondary_psp(void) +{ + return CMO_SecPSP; +} + +static inline unsigned long cmo_get_page_size(void) +{ + return CMO_PageSize; +} + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index cc66c49f07aa..e5bf1e84047f 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -19,7 +19,7 @@ #include <asm/prom.h> #include <asm/machdep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/mmu.h> #include "of_helpers.h" diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index 7d28cabf1206..c47585a78b69 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -27,7 +27,7 @@ #include <linux/init.h> #include <linux/delay.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/rtas.h> #include <asm/prom.h> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 97aa3f332f24..b4d362ed03a1 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -66,6 +66,7 @@ #include <asm/reg.h> #include <asm/plpar_wrappers.h> #include <asm/kexec.h> +#include <asm/isa-bridge.h> #include "pseries.h" @@ -367,7 +368,7 @@ void pseries_disable_reloc_on_exc(void) } EXPORT_SYMBOL(pseries_disable_reloc_on_exc); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void pSeries_machine_kexec(struct kimage *image) { if (firmware_has_feature(FW_FEATURE_SET_MODE)) @@ -725,7 +726,7 @@ define_machine(pseries) { .progress = rtas_progress, .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/platforms/pseries/vio.c index b3813ddb2fb4..2c8fb3ec989e 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1648,6 +1648,9 @@ static struct vio_dev *vio_find_name(const char *name) /** * vio_find_node - find an already-registered vio_dev * @vnode: device_node of the virtual device we're looking for + * + * Takes a reference to the embedded struct device which needs to be dropped + * after use. */ struct vio_dev *vio_find_node(struct device_node *vnode) { diff --git a/arch/powerpc/purgatory/.gitignore b/arch/powerpc/purgatory/.gitignore new file mode 100644 index 000000000000..e9e66f178a6d --- /dev/null +++ b/arch/powerpc/purgatory/.gitignore @@ -0,0 +1,2 @@ +kexec-purgatory.c +purgatory.ro diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile new file mode 100644 index 000000000000..ac8793c13348 --- /dev/null +++ b/arch/powerpc/purgatory/Makefile @@ -0,0 +1,15 @@ +targets += trampoline.o purgatory.ro kexec-purgatory.c + +LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined + +$(obj)/purgatory.ro: $(obj)/trampoline.o FORCE + $(call if_changed,ld) + +CMD_BIN2C = $(objtree)/scripts/basic/bin2c +quiet_cmd_bin2c = BIN2C $@ + cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@ + +$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE + $(call if_changed,bin2c) + +obj-y += kexec-purgatory.o diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S new file mode 100644 index 000000000000..f9760ccf4032 --- /dev/null +++ b/arch/powerpc/purgatory/trampoline.S @@ -0,0 +1,128 @@ +/* + * kexec trampoline + * + * Based on code taken from kexec-tools and kexec-lite. + * + * Copyright (C) 2004 - 2005, Milton D Miller II, IBM Corporation + * Copyright (C) 2006, Mohan Kumar M, IBM Corporation + * Copyright (C) 2013, Anton Blanchard, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation (version 2 of the License). + */ + +#if defined(__LITTLE_ENDIAN__) +#define STWX_BE stwbrx +#define LWZX_BE lwbrx +#elif defined(__BIG_ENDIAN__) +#define STWX_BE stwx +#define LWZX_BE lwzx +#else +#error no endianness defined! +#endif + + .machine ppc64 + .balign 256 + .globl purgatory_start +purgatory_start: + b master + + /* ABI: possible run_at_load flag at 0x5c */ + .org purgatory_start + 0x5c + .globl run_at_load +run_at_load: + .long 0 + .size run_at_load, . - run_at_load + + /* ABI: slaves start at 60 with r3=phys */ + .org purgatory_start + 0x60 +slave: + b . + /* ABI: end of copied region */ + .org purgatory_start + 0x100 + .size purgatory_start, . - purgatory_start + +/* + * The above 0x100 bytes at purgatory_start are replaced with the + * code from the kernel (or next stage) by setup_purgatory(). + */ + +master: + or %r1,%r1,%r1 /* low priority to let other threads catchup */ + isync + mr %r17,%r3 /* save cpu id to r17 */ + mr %r15,%r4 /* save physical address in reg15 */ + + or %r3,%r3,%r3 /* ok now to high priority, lets boot */ + lis %r6,0x1 + mtctr %r6 /* delay a bit for slaves to catch up */ + bdnz . /* before we overwrite 0-100 again */ + + bl 0f /* Work out where we're running */ +0: mflr %r18 + + /* load device-tree address */ + ld %r3, (dt_offset - 0b)(%r18) + mr %r16,%r3 /* save dt address in reg16 */ + li %r4,20 + LWZX_BE %r6,%r3,%r4 /* fetch __be32 version number at byte 20 */ + cmpwi %r0,%r6,2 /* v2 or later? */ + blt 1f + li %r4,28 + STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */ +1: + /* load the kernel address */ + ld %r4,(kernel - 0b)(%r18) + + /* load the run_at_load flag */ + /* possibly patched by kexec */ + ld %r6,(run_at_load - 0b)(%r18) + /* and patch it into the kernel */ + stw %r6,(0x5c)(%r4) + + mr %r3,%r16 /* restore dt address */ + + li %r5,0 /* r5 will be 0 for kernel */ + + mfmsr %r11 + andi. %r10,%r11,1 /* test MSR_LE */ + bne .Little_endian + + mtctr %r4 /* prepare branch to */ + bctr /* start kernel */ + +.Little_endian: + mtsrr0 %r4 /* prepare branch to */ + + clrrdi %r11,%r11,1 /* clear MSR_LE */ + mtsrr1 %r11 + + rfid /* update MSR and start kernel */ + + + .balign 8 + .globl kernel +kernel: + .llong 0x0 + .size kernel, . - kernel + + .balign 8 + .globl dt_offset +dt_offset: + .llong 0x0 + .size dt_offset, . - dt_offset + + + .data + .balign 8 +.globl sha256_digest +sha256_digest: + .skip 32 + .size sha256_digest, . - sha256_digest + + .balign 8 +.globl sha_regions +sha_regions: + .skip 8 * 2 * 16 + .size sha_regions, . - sha_regions diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c index 424b67fdb57f..5340a483cf55 100644 --- a/arch/powerpc/sysdev/fsl_lbc.c +++ b/arch/powerpc/sysdev/fsl_lbc.c @@ -31,7 +31,7 @@ #include <asm/prom.h> #include <asm/fsl_lbc.h> -static spinlock_t fsl_lbc_lock = __SPIN_LOCK_UNLOCKED(fsl_lbc_lock); +static DEFINE_SPINLOCK(fsl_lbc_lock); struct fsl_lbc_ctrl *fsl_lbc_ctrl_dev; EXPORT_SYMBOL(fsl_lbc_ctrl_dev); diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c index 1d6fd7c59fe9..232225e7f863 100644 --- a/arch/powerpc/sysdev/fsl_pmc.c +++ b/arch/powerpc/sysdev/fsl_pmc.c @@ -85,8 +85,4 @@ static struct platform_driver pmc_driver = { .probe = pmc_probe, }; -static int __init pmc_init(void) -{ - return platform_driver_register(&pmc_driver); -} -device_initcall(pmc_init); +builtin_platform_driver(pmc_driver); diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 3cc7cace194a..1c41c51f22cb 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -80,10 +80,8 @@ "3: li %1,-1\n" \ " li %0,%3\n" \ " b 2b\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".text" \ + ".previous\n" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) @@ -113,7 +111,7 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs) out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } } diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index d93056eedcb0..19101f9cfcfc 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -77,13 +77,10 @@ phys_addr_t get_immrbase(void) EXPORT_SYMBOL(get_immrbase); -static u32 sysfreq = -1; - u32 fsl_get_sys_freq(void) { + static u32 sysfreq = -1; struct device_node *soc; - const u32 *prop; - int size; if (sysfreq != -1) return sysfreq; @@ -92,12 +89,9 @@ u32 fsl_get_sys_freq(void) if (!soc) return -1; - prop = of_get_property(soc, "clock-frequency", &size); - if (!prop || size != sizeof(*prop) || *prop == 0) - prop = of_get_property(soc, "bus-frequency", &size); - - if (prop && size == sizeof(*prop)) - sysfreq = *prop; + of_property_read_u32(soc, "clock-frequency", &sysfreq); + if (sysfreq == -1 || !sysfreq) + of_property_read_u32(soc, "bus-frequency", &sysfreq); of_node_put(soc); return sysfreq; @@ -106,23 +100,17 @@ EXPORT_SYMBOL(fsl_get_sys_freq); #if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx) -static u32 brgfreq = -1; - u32 get_brgfreq(void) { + static u32 brgfreq = -1; struct device_node *node; - const unsigned int *prop; - int size; if (brgfreq != -1) return brgfreq; node = of_find_compatible_node(NULL, NULL, "fsl,cpm-brg"); if (node) { - prop = of_get_property(node, "clock-frequency", &size); - if (prop && size == 4) - brgfreq = *prop; - + of_property_read_u32(node, "clock-frequency", &brgfreq); of_node_put(node); return brgfreq; } @@ -135,15 +123,11 @@ u32 get_brgfreq(void) node = of_find_node_by_type(NULL, "qe"); if (node) { - prop = of_get_property(node, "brg-frequency", &size); - if (prop && size == 4) - brgfreq = *prop; - - if (brgfreq == -1 || brgfreq == 0) { - prop = of_get_property(node, "bus-frequency", &size); - if (prop && size == 4) - brgfreq = *prop / 2; - } + of_property_read_u32(node, "brg-frequency", &brgfreq); + if (brgfreq == -1 || !brgfreq) + if (!of_property_read_u32(node, "bus-frequency", + &brgfreq)) + brgfreq /= 2; of_node_put(node); } @@ -152,10 +136,9 @@ u32 get_brgfreq(void) EXPORT_SYMBOL(get_brgfreq); -static u32 fs_baudrate = -1; - u32 get_baudrate(void) { + static u32 fs_baudrate = -1; struct device_node *node; if (fs_baudrate != -1) @@ -163,12 +146,7 @@ u32 get_baudrate(void) node = of_find_node_by_type(NULL, "serial"); if (node) { - int size; - const unsigned int *prop = of_get_property(node, - "current-speed", &size); - - if (prop) - fs_baudrate = *prop; + of_property_read_u32(node, "current-speed", &fs_baudrate); of_node_put(node); } diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c index 6f5a8d177c42..d0e9f178a324 100644 --- a/arch/powerpc/sysdev/scom.c +++ b/arch/powerpc/sysdev/scom.c @@ -25,7 +25,7 @@ #include <asm/debug.h> #include <asm/prom.h> #include <asm/scom.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> const struct scom_controller *scom_controller; EXPORT_SYMBOL_GPL(scom_controller); diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 57c971b7839c..5692dd569b9b 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -30,7 +30,7 @@ #include <asm/byteorder.h> #include <asm/io.h> #include <asm/irq.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/machdep.h> #include <asm/pci-bridge.h> #include <asm/tsi108.h> @@ -137,10 +137,8 @@ void tsi108_clear_pci_error(u32 pci_cfg_base) ".section .fixup,\"ax\"\n" \ "3: li %0,-1\n" \ " b 2b\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 1b,3b\n" \ - ".text" \ + ".previous\n" \ + EX_TABLE(1b, 3b) \ : "=r"(x) : "r"(addr)) int diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index d38e86fd5720..60c57657c772 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -20,6 +20,7 @@ #include <asm/xics.h> #include <asm/io.h> #include <asm/opal.h> +#include <asm/kvm_ppc.h> static void icp_opal_teardown_cpu(void) { @@ -39,7 +40,26 @@ static void icp_opal_flush_ipi(void) * Should we be flagging idle loop instead? * Or creating some task to be scheduled? */ - opal_int_eoi((0x00 << 24) | XICS_IPI); + if (opal_int_eoi((0x00 << 24) | XICS_IPI) > 0) + force_external_irq_replay(); +} + +static unsigned int icp_opal_get_xirr(void) +{ + unsigned int kvm_xirr; + __be32 hw_xirr; + int64_t rc; + + /* Handle an interrupt latched by KVM first */ + kvm_xirr = kvmppc_get_xics_latch(); + if (kvm_xirr) + return kvm_xirr; + + /* Then ask OPAL */ + rc = opal_int_get_xirr(&hw_xirr, false); + if (rc < 0) + return 0; + return be32_to_cpu(hw_xirr); } static unsigned int icp_opal_get_irq(void) @@ -47,12 +67,8 @@ static unsigned int icp_opal_get_irq(void) unsigned int xirr; unsigned int vec; unsigned int irq; - int64_t rc; - rc = opal_int_get_xirr(&xirr, false); - if (rc < 0) - return 0; - xirr = be32_to_cpu(xirr); + xirr = icp_opal_get_xirr(); vec = xirr & 0x00ffffff; if (vec == XICS_IRQ_SPURIOUS) return 0; @@ -67,7 +83,8 @@ static unsigned int icp_opal_get_irq(void) xics_mask_unknown_vec(vec); /* We might learn about it later, so EOI it */ - opal_int_eoi(xirr); + if (opal_int_eoi(xirr) > 0) + force_external_irq_replay(); return 0; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 760545519a0b..1be0499f5397 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -10,6 +10,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#include <linux/kernel.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/smp.h> @@ -225,6 +227,7 @@ Commands:\n\ #endif "\ dr dump stream of raw bytes\n\ + dt dump the tracing buffers (uses printk)\n\ e print exception information\n\ f flush cache\n\ la lookup symbol+offset of specified address\n\ @@ -1400,7 +1403,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, struct pt_regs regs; while (max_to_print--) { - if (sp < PAGE_OFFSET) { + if (!is_kernel_addr(sp)) { if (sp != 0) printf("SP (%lx) is in userspace\n", sp); break; @@ -1428,12 +1431,12 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, mread(newsp + LRSAVE_OFFSET, &nextip, sizeof(unsigned long)); if (lr == ip) { - if (lr < PAGE_OFFSET + if (!is_kernel_addr(lr) || (fnstart <= lr && lr < fnend)) printip = 0; } else if (lr == nextip) { printip = 0; - } else if (lr >= PAGE_OFFSET + } else if (is_kernel_addr(lr) && !(fnstart <= lr && lr < fnend)) { printf("[link register ] "); xmon_print_symbol(lr, " ", "\n"); @@ -1493,7 +1496,7 @@ static void print_bug_trap(struct pt_regs *regs) if (regs->msr & MSR_PR) return; /* not in kernel */ addr = regs->nip; /* address of trap instruction */ - if (addr < PAGE_OFFSET) + if (!is_kernel_addr(addr)) return; bug = find_bug(regs->nip); if (bug == NULL) @@ -2284,14 +2287,14 @@ static void dump_one_paca(int cpu) DUMP(p, subcore_sibling_mask, "x"); #endif - DUMP(p, accounting.user_time, "llx"); - DUMP(p, accounting.system_time, "llx"); - DUMP(p, accounting.user_time_scaled, "llx"); + DUMP(p, accounting.utime, "llx"); + DUMP(p, accounting.stime, "llx"); + DUMP(p, accounting.utime_scaled, "llx"); DUMP(p, accounting.starttime, "llx"); DUMP(p, accounting.starttime_user, "llx"); DUMP(p, accounting.startspurr, "llx"); DUMP(p, accounting.utime_sspurr, "llx"); - DUMP(p, stolen_time, "llx"); + DUMP(p, accounting.steal_time, "llx"); #undef DUMP catch_memory_errors = 0; @@ -2364,6 +2367,9 @@ dump(void) dump_log_buf(); } else if (c == 'o') { dump_opal_msglog(); + } else if (c == 't') { + ftrace_dump(DUMP_ALL); + tracing_on(); } else if (c == 'r') { scanhex(&ndump); if (ndump == 0) |